1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Per core/cpu state 4 * 5 * Used to coordinate shared registers between HT threads or 6 * among events on a single PMU. 7 */ 8 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/stddef.h> 12 #include <linux/types.h> 13 #include <linux/init.h> 14 #include <linux/slab.h> 15 #include <linux/export.h> 16 #include <linux/nmi.h> 17 #include <linux/kvm_host.h> 18 19 #include <asm/cpufeature.h> 20 #include <asm/cpuid/api.h> 21 #include <asm/debugreg.h> 22 #include <asm/hardirq.h> 23 #include <asm/intel-family.h> 24 #include <asm/intel_pt.h> 25 #include <asm/apic.h> 26 #include <asm/cpu_device_id.h> 27 #include <asm/msr.h> 28 29 #include "../perf_event.h" 30 31 /* 32 * Intel PerfMon, used on Core and later. 33 */ 34 static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = 35 { 36 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, 37 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 38 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, 39 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, 40 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 41 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 42 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 43 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ 44 }; 45 46 static struct event_constraint intel_core_event_constraints[] __read_mostly = 47 { 48 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 49 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 50 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ 51 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ 52 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 53 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ 54 EVENT_CONSTRAINT_END 55 }; 56 57 static struct event_constraint intel_core2_event_constraints[] __read_mostly = 58 { 59 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 60 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 61 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 62 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 63 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 64 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 65 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ 66 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ 67 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ 68 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 69 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ 70 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ 71 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ 72 EVENT_CONSTRAINT_END 73 }; 74 75 static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = 76 { 77 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 78 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 79 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 80 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 81 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 82 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 83 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ 84 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ 85 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ 86 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 87 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 88 EVENT_CONSTRAINT_END 89 }; 90 91 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 92 { 93 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 94 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 95 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), 96 EVENT_EXTRA_END 97 }; 98 99 static struct event_constraint intel_westmere_event_constraints[] __read_mostly = 100 { 101 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 102 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 103 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 104 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 105 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 106 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 107 INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ 108 EVENT_CONSTRAINT_END 109 }; 110 111 static struct event_constraint intel_snb_event_constraints[] __read_mostly = 112 { 113 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 114 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 115 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 116 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ 117 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ 118 INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 119 INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 120 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 121 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 122 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 123 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ 124 INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 125 126 /* 127 * When HT is off these events can only run on the bottom 4 counters 128 * When HT is on, they are impacted by the HT bug and require EXCL access 129 */ 130 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 131 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 132 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 133 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 134 135 EVENT_CONSTRAINT_END 136 }; 137 138 static struct event_constraint intel_ivb_event_constraints[] __read_mostly = 139 { 140 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 141 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 142 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 143 INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ 144 INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMPTY */ 145 INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ 146 INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ 147 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ 148 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ 149 INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ 150 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 151 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 152 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 153 154 /* 155 * When HT is off these events can only run on the bottom 4 counters 156 * When HT is on, they are impacted by the HT bug and require EXCL access 157 */ 158 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 159 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 160 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 161 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 162 163 EVENT_CONSTRAINT_END 164 }; 165 166 static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 167 { 168 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 169 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 170 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), 171 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), 172 EVENT_EXTRA_END 173 }; 174 175 static struct event_constraint intel_v1_event_constraints[] __read_mostly = 176 { 177 EVENT_CONSTRAINT_END 178 }; 179 180 static struct event_constraint intel_gen_event_constraints[] __read_mostly = 181 { 182 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 183 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 184 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 185 EVENT_CONSTRAINT_END 186 }; 187 188 static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly = 189 { 190 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 191 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 192 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 193 FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ 194 FIXED_EVENT_CONSTRAINT(0x0500, 4), 195 FIXED_EVENT_CONSTRAINT(0x0600, 5), 196 FIXED_EVENT_CONSTRAINT(0x0700, 6), 197 FIXED_EVENT_CONSTRAINT(0x0800, 7), 198 FIXED_EVENT_CONSTRAINT(0x0900, 8), 199 FIXED_EVENT_CONSTRAINT(0x0a00, 9), 200 FIXED_EVENT_CONSTRAINT(0x0b00, 10), 201 FIXED_EVENT_CONSTRAINT(0x0c00, 11), 202 FIXED_EVENT_CONSTRAINT(0x0d00, 12), 203 FIXED_EVENT_CONSTRAINT(0x0e00, 13), 204 FIXED_EVENT_CONSTRAINT(0x0f00, 14), 205 FIXED_EVENT_CONSTRAINT(0x1000, 15), 206 EVENT_CONSTRAINT_END 207 }; 208 209 static struct event_constraint intel_slm_event_constraints[] __read_mostly = 210 { 211 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 212 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 213 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ 214 EVENT_CONSTRAINT_END 215 }; 216 217 static struct event_constraint intel_grt_event_constraints[] __read_mostly = { 218 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 219 FIXED_EVENT_CONSTRAINT(0x0100, 0), /* pseudo INST_RETIRED.ANY */ 220 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 221 FIXED_EVENT_CONSTRAINT(0x0200, 1), /* pseudo CPU_CLK_UNHALTED.THREAD */ 222 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF_TSC */ 223 FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ 224 EVENT_CONSTRAINT_END 225 }; 226 227 static struct event_constraint intel_skt_event_constraints[] __read_mostly = { 228 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 229 FIXED_EVENT_CONSTRAINT(0x0100, 0), /* pseudo INST_RETIRED.ANY */ 230 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 231 FIXED_EVENT_CONSTRAINT(0x0200, 1), /* pseudo CPU_CLK_UNHALTED.THREAD */ 232 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF_TSC */ 233 FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ 234 FIXED_EVENT_CONSTRAINT(0x0073, 4), /* TOPDOWN_BAD_SPECULATION.ALL */ 235 FIXED_EVENT_CONSTRAINT(0x0500, 4), /* pseudo TOPDOWN_BAD_SPECULATION.ALL */ 236 FIXED_EVENT_CONSTRAINT(0x019c, 5), /* TOPDOWN_FE_BOUND.ALL */ 237 FIXED_EVENT_CONSTRAINT(0x0600, 5), /* pseudo TOPDOWN_FE_BOUND.ALL */ 238 FIXED_EVENT_CONSTRAINT(0x02c2, 6), /* TOPDOWN_RETIRING.ALL */ 239 FIXED_EVENT_CONSTRAINT(0x0700, 6), /* pseudo TOPDOWN_RETIRING.ALL */ 240 EVENT_CONSTRAINT_END 241 }; 242 243 static struct event_constraint intel_arw_event_constraints[] __read_mostly = { 244 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 245 FIXED_EVENT_CONSTRAINT(0x0100, 0), /* pseudo INST_RETIRED.ANY */ 246 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 247 FIXED_EVENT_CONSTRAINT(0x0200, 1), /* pseudo CPU_CLK_UNHALTED.THREAD */ 248 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF_TSC */ 249 FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ 250 FIXED_EVENT_CONSTRAINT(0x0073, 4), /* TOPDOWN_BAD_SPECULATION.ALL */ 251 FIXED_EVENT_CONSTRAINT(0x0500, 4), /* pseudo TOPDOWN_BAD_SPECULATION.ALL */ 252 FIXED_EVENT_CONSTRAINT(0x019c, 5), /* TOPDOWN_FE_BOUND.ALL */ 253 FIXED_EVENT_CONSTRAINT(0x0600, 5), /* pseudo TOPDOWN_FE_BOUND.ALL */ 254 FIXED_EVENT_CONSTRAINT(0x02c2, 6), /* TOPDOWN_RETIRING.ALL */ 255 FIXED_EVENT_CONSTRAINT(0x0700, 6), /* pseudo TOPDOWN_RETIRING.ALL */ 256 INTEL_UEVENT_CONSTRAINT(0x01b7, 0x1), 257 INTEL_UEVENT_CONSTRAINT(0x02b7, 0x2), 258 INTEL_UEVENT_CONSTRAINT(0x04b7, 0x4), 259 INTEL_UEVENT_CONSTRAINT(0x08b7, 0x8), 260 INTEL_UEVENT_CONSTRAINT(0x0175, 0x1), 261 INTEL_UEVENT_CONSTRAINT(0x0275, 0x2), 262 INTEL_UEVENT_CONSTRAINT(0x21d3, 0x1), 263 INTEL_UEVENT_CONSTRAINT(0x22d3, 0x1), 264 EVENT_CONSTRAINT_END 265 }; 266 267 static struct event_constraint intel_skl_event_constraints[] = { 268 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 269 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 270 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 271 INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ 272 273 /* 274 * when HT is off, these can only run on the bottom 4 counters 275 */ 276 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ 277 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ 278 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ 279 INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ 280 INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */ 281 282 EVENT_CONSTRAINT_END 283 }; 284 285 static struct extra_reg intel_knl_extra_regs[] __read_mostly = { 286 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0), 287 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1), 288 EVENT_EXTRA_END 289 }; 290 291 static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 292 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 293 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), 294 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), 295 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 296 EVENT_EXTRA_END 297 }; 298 299 static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { 300 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 301 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), 302 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), 303 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 304 EVENT_EXTRA_END 305 }; 306 307 static struct extra_reg intel_skl_extra_regs[] __read_mostly = { 308 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), 309 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), 310 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 311 /* 312 * Note the low 8 bits eventsel code is not a continuous field, containing 313 * some #GPing bits. These are masked out. 314 */ 315 INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), 316 EVENT_EXTRA_END 317 }; 318 319 static struct event_constraint intel_icl_event_constraints[] = { 320 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 321 FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* old INST_RETIRED.PREC_DIST */ 322 FIXED_EVENT_CONSTRAINT(0x0100, 0), /* pseudo INST_RETIRED.ANY */ 323 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 324 FIXED_EVENT_CONSTRAINT(0x0200, 1), /* pseudo CPU_CLK_UNHALTED.THREAD */ 325 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF_TSC */ 326 FIXED_EVENT_CONSTRAINT(0x0400, 3), /* pseudo TOPDOWN.SLOTS */ 327 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), 328 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), 329 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), 330 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), 331 INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), 332 INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), 333 INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ 334 INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf), 335 INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), 336 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ 337 INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ 338 INTEL_UEVENT_CONSTRAINT(0x14a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ 339 INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */ 340 INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), 341 INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), 342 INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), 343 INTEL_EVENT_CONSTRAINT(0xef, 0xf), 344 INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), 345 EVENT_CONSTRAINT_END 346 }; 347 348 static struct extra_reg intel_icl_extra_regs[] __read_mostly = { 349 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0), 350 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1), 351 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 352 INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), 353 EVENT_EXTRA_END 354 }; 355 356 static struct extra_reg intel_glc_extra_regs[] __read_mostly = { 357 INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), 358 INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), 359 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 360 INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), 361 INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), 362 INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), 363 EVENT_EXTRA_END 364 }; 365 366 static struct event_constraint intel_glc_event_constraints[] = { 367 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 368 FIXED_EVENT_CONSTRAINT(0x0100, 0), /* pseudo INST_RETIRED.ANY */ 369 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 370 FIXED_EVENT_CONSTRAINT(0x0200, 1), /* pseudo CPU_CLK_UNHALTED.THREAD */ 371 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF_TSC */ 372 FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ 373 FIXED_EVENT_CONSTRAINT(0x0400, 3), /* pseudo TOPDOWN.SLOTS */ 374 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), 375 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), 376 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), 377 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), 378 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4), 379 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5), 380 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), 381 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), 382 383 INTEL_EVENT_CONSTRAINT(0x2e, 0xff), 384 INTEL_EVENT_CONSTRAINT(0x3c, 0xff), 385 /* 386 * Generally event codes < 0x90 are restricted to counters 0-3. 387 * The 0x2E and 0x3C are exception, which has no restriction. 388 */ 389 INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf), 390 391 INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf), 392 INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), 393 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), 394 INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), 395 INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf), 396 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0xf), 397 INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), 398 INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), 399 INTEL_UEVENT_CONSTRAINT(0x01cd, 0xfe), 400 INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1), 401 INTEL_EVENT_CONSTRAINT(0xce, 0x1), 402 INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), 403 /* 404 * Generally event codes >= 0x90 are likely to have no restrictions. 405 * The exception are defined as above. 406 */ 407 INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff), 408 409 EVENT_CONSTRAINT_END 410 }; 411 412 static struct extra_reg intel_rwc_extra_regs[] __read_mostly = { 413 INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), 414 INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), 415 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 416 INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), 417 INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), 418 INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), 419 INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), 420 EVENT_EXTRA_END 421 }; 422 423 static struct event_constraint intel_lnc_event_constraints[] = { 424 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 425 FIXED_EVENT_CONSTRAINT(0x0100, 0), /* pseudo INST_RETIRED.ANY */ 426 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 427 FIXED_EVENT_CONSTRAINT(0x0200, 1), /* pseudo CPU_CLK_UNHALTED.THREAD */ 428 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF_TSC */ 429 FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ 430 FIXED_EVENT_CONSTRAINT(0x0400, 3), /* pseudo TOPDOWN.SLOTS */ 431 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), 432 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), 433 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), 434 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), 435 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4), 436 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5), 437 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), 438 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), 439 440 INTEL_EVENT_CONSTRAINT(0x20, 0xf), 441 442 INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), 443 INTEL_UEVENT_CONSTRAINT(0x0175, 0x4), 444 445 INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff), 446 INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff), 447 448 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), 449 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), 450 INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), 451 INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), 452 INTEL_UEVENT_CONSTRAINT(0x10a4, 0x8), 453 INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8), 454 INTEL_UEVENT_CONSTRAINT(0x01cd, 0x3fc), 455 INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3), 456 457 INTEL_UEVENT_CONSTRAINT(0x87d0, 0x3ff), 458 INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), 459 460 EVENT_CONSTRAINT_END 461 }; 462 463 static struct extra_reg intel_lnc_extra_regs[] __read_mostly = { 464 INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0xfffffffffffull, RSP_0), 465 INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0xfffffffffffull, RSP_1), 466 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 467 INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), 468 INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), 469 INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE), 470 INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), 471 EVENT_EXTRA_END 472 }; 473 474 static struct event_constraint intel_pnc_event_constraints[] = { 475 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 476 FIXED_EVENT_CONSTRAINT(0x0100, 0), /* pseudo INST_RETIRED.ANY */ 477 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 478 FIXED_EVENT_CONSTRAINT(0x0200, 1), /* pseudo CPU_CLK_UNHALTED.THREAD */ 479 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF_TSC */ 480 FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ 481 FIXED_EVENT_CONSTRAINT(0x0400, 3), /* pseudo TOPDOWN.SLOTS */ 482 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), 483 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), 484 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), 485 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), 486 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4), 487 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5), 488 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), 489 METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), 490 491 INTEL_EVENT_CONSTRAINT(0x20, 0xf), 492 INTEL_EVENT_CONSTRAINT(0x79, 0xf), 493 494 INTEL_UEVENT_CONSTRAINT(0x0275, 0xf), 495 INTEL_UEVENT_CONSTRAINT(0x0176, 0xf), 496 INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), 497 INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), 498 INTEL_UEVENT_CONSTRAINT(0x01cd, 0xfc), 499 INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3), 500 501 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), 502 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), 503 INTEL_EVENT_CONSTRAINT(0xd4, 0xf), 504 INTEL_EVENT_CONSTRAINT(0xd6, 0xf), 505 INTEL_EVENT_CONSTRAINT(0xdf, 0xf), 506 INTEL_EVENT_CONSTRAINT(0xce, 0x1), 507 508 INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8), 509 INTEL_UEVENT_CONSTRAINT(0x0847, 0xf), 510 INTEL_UEVENT_CONSTRAINT(0x0446, 0xf), 511 INTEL_UEVENT_CONSTRAINT(0x0846, 0xf), 512 INTEL_UEVENT_CONSTRAINT(0x0148, 0xf), 513 514 EVENT_CONSTRAINT_END 515 }; 516 517 static struct extra_reg intel_pnc_extra_regs[] __read_mostly = { 518 /* must define OMR_X first, see intel_alt_er() */ 519 INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OMR_0, 0x40ffffff0000ffffull, OMR_0), 520 INTEL_UEVENT_EXTRA_REG(0x022a, MSR_OMR_1, 0x40ffffff0000ffffull, OMR_1), 521 INTEL_UEVENT_EXTRA_REG(0x042a, MSR_OMR_2, 0x40ffffff0000ffffull, OMR_2), 522 INTEL_UEVENT_EXTRA_REG(0x082a, MSR_OMR_3, 0x40ffffff0000ffffull, OMR_3), 523 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 524 INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), 525 INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), 526 INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE), 527 INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), 528 EVENT_EXTRA_END 529 }; 530 531 EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); 532 EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); 533 EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); 534 535 static struct attribute *nhm_mem_events_attrs[] = { 536 EVENT_PTR(mem_ld_nhm), 537 NULL, 538 }; 539 540 /* 541 * topdown events for Intel Core CPUs. 542 * 543 * The events are all in slots, which is a free slot in a 4 wide 544 * pipeline. Some events are already reported in slots, for cycle 545 * events we multiply by the pipeline width (4). 546 * 547 * With Hyper Threading on, topdown metrics are either summed or averaged 548 * between the threads of a core: (count_t0 + count_t1). 549 * 550 * For the average case the metric is always scaled to pipeline width, 551 * so we use factor 2 ((count_t0 + count_t1) / 2 * 4) 552 */ 553 554 EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots, 555 "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */ 556 "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */ 557 EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2"); 558 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued, 559 "event=0xe,umask=0x1"); /* uops_issued.any */ 560 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired, 561 "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */ 562 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles, 563 "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */ 564 EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, 565 "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */ 566 "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */ 567 EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, 568 "4", "2"); 569 570 EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4"); 571 EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80"); 572 EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81"); 573 EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82"); 574 EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83"); 575 EVENT_ATTR_STR(topdown-heavy-ops, td_heavy_ops, "event=0x00,umask=0x84"); 576 EVENT_ATTR_STR(topdown-br-mispredict, td_br_mispredict, "event=0x00,umask=0x85"); 577 EVENT_ATTR_STR(topdown-fetch-lat, td_fetch_lat, "event=0x00,umask=0x86"); 578 EVENT_ATTR_STR(topdown-mem-bound, td_mem_bound, "event=0x00,umask=0x87"); 579 580 static struct attribute *snb_events_attrs[] = { 581 EVENT_PTR(td_slots_issued), 582 EVENT_PTR(td_slots_retired), 583 EVENT_PTR(td_fetch_bubbles), 584 EVENT_PTR(td_total_slots), 585 EVENT_PTR(td_total_slots_scale), 586 EVENT_PTR(td_recovery_bubbles), 587 EVENT_PTR(td_recovery_bubbles_scale), 588 NULL, 589 }; 590 591 static struct attribute *snb_mem_events_attrs[] = { 592 EVENT_PTR(mem_ld_snb), 593 EVENT_PTR(mem_st_snb), 594 NULL, 595 }; 596 597 static struct event_constraint intel_hsw_event_constraints[] = { 598 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 599 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 600 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 601 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ 602 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 603 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 604 /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 605 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), 606 /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 607 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), 608 /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ 609 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), 610 611 /* 612 * When HT is off these events can only run on the bottom 4 counters 613 * When HT is on, they are impacted by the HT bug and require EXCL access 614 */ 615 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 616 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 617 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 618 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 619 620 EVENT_CONSTRAINT_END 621 }; 622 623 static struct event_constraint intel_bdw_event_constraints[] = { 624 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 625 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 626 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 627 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ 628 INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ 629 /* 630 * when HT is off, these can only run on the bottom 4 counters 631 */ 632 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ 633 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ 634 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ 635 INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ 636 EVENT_CONSTRAINT_END 637 }; 638 639 static u64 intel_pmu_event_map(int hw_event) 640 { 641 return intel_perfmon_event_map[hw_event]; 642 } 643 644 static __initconst const u64 glc_hw_cache_event_ids 645 [PERF_COUNT_HW_CACHE_MAX] 646 [PERF_COUNT_HW_CACHE_OP_MAX] 647 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 648 { 649 [ C(L1D ) ] = { 650 [ C(OP_READ) ] = { 651 [ C(RESULT_ACCESS) ] = 0x81d0, 652 [ C(RESULT_MISS) ] = 0xe124, 653 }, 654 [ C(OP_WRITE) ] = { 655 [ C(RESULT_ACCESS) ] = 0x82d0, 656 }, 657 }, 658 [ C(L1I ) ] = { 659 [ C(OP_READ) ] = { 660 [ C(RESULT_MISS) ] = 0xe424, 661 }, 662 [ C(OP_WRITE) ] = { 663 [ C(RESULT_ACCESS) ] = -1, 664 [ C(RESULT_MISS) ] = -1, 665 }, 666 }, 667 [ C(LL ) ] = { 668 [ C(OP_READ) ] = { 669 [ C(RESULT_ACCESS) ] = 0x12a, 670 [ C(RESULT_MISS) ] = 0x12a, 671 }, 672 [ C(OP_WRITE) ] = { 673 [ C(RESULT_ACCESS) ] = 0x12a, 674 [ C(RESULT_MISS) ] = 0x12a, 675 }, 676 }, 677 [ C(DTLB) ] = { 678 [ C(OP_READ) ] = { 679 [ C(RESULT_ACCESS) ] = 0x81d0, 680 [ C(RESULT_MISS) ] = 0xe12, 681 }, 682 [ C(OP_WRITE) ] = { 683 [ C(RESULT_ACCESS) ] = 0x82d0, 684 [ C(RESULT_MISS) ] = 0xe13, 685 }, 686 }, 687 [ C(ITLB) ] = { 688 [ C(OP_READ) ] = { 689 [ C(RESULT_ACCESS) ] = -1, 690 [ C(RESULT_MISS) ] = 0xe11, 691 }, 692 [ C(OP_WRITE) ] = { 693 [ C(RESULT_ACCESS) ] = -1, 694 [ C(RESULT_MISS) ] = -1, 695 }, 696 [ C(OP_PREFETCH) ] = { 697 [ C(RESULT_ACCESS) ] = -1, 698 [ C(RESULT_MISS) ] = -1, 699 }, 700 }, 701 [ C(BPU ) ] = { 702 [ C(OP_READ) ] = { 703 [ C(RESULT_ACCESS) ] = 0x4c4, 704 [ C(RESULT_MISS) ] = 0x4c5, 705 }, 706 [ C(OP_WRITE) ] = { 707 [ C(RESULT_ACCESS) ] = -1, 708 [ C(RESULT_MISS) ] = -1, 709 }, 710 [ C(OP_PREFETCH) ] = { 711 [ C(RESULT_ACCESS) ] = -1, 712 [ C(RESULT_MISS) ] = -1, 713 }, 714 }, 715 [ C(NODE) ] = { 716 [ C(OP_READ) ] = { 717 [ C(RESULT_ACCESS) ] = 0x12a, 718 [ C(RESULT_MISS) ] = 0x12a, 719 }, 720 }, 721 }; 722 723 /* ADL P-core (Golden cove) specific event code. */ 724 static __initconst const u64 adl_glc_hw_cache_event_ids 725 [PERF_COUNT_HW_CACHE_MAX] 726 [PERF_COUNT_HW_CACHE_OP_MAX] 727 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 728 { 729 [ C(L1D ) ] = { 730 [ C(OP_READ) ] = { 731 [ C(RESULT_ACCESS) ] = 0x81d0, 732 [ C(RESULT_MISS) ] = 0xe124, 733 }, 734 [ C(OP_WRITE) ] = { 735 [ C(RESULT_ACCESS) ] = 0x82d0, 736 }, 737 }, 738 [ C(L1I ) ] = { 739 [ C(OP_READ) ] = { 740 [ C(RESULT_MISS) ] = 0xe424, 741 }, 742 [ C(OP_WRITE) ] = { 743 [ C(RESULT_ACCESS) ] = -1, 744 [ C(RESULT_MISS) ] = -1, 745 }, 746 }, 747 [ C(LL ) ] = { 748 [ C(OP_READ) ] = { 749 [ C(RESULT_ACCESS) ] = 0x12a, 750 [ C(RESULT_MISS) ] = 0x12a, 751 }, 752 [ C(OP_WRITE) ] = { 753 [ C(RESULT_ACCESS) ] = 0x12a, 754 [ C(RESULT_MISS) ] = 0x12a, 755 }, 756 }, 757 [ C(DTLB) ] = { 758 [ C(OP_READ) ] = { 759 [ C(RESULT_ACCESS) ] = 0x81d0, 760 [ C(RESULT_MISS) ] = 0xe12, 761 }, 762 [ C(OP_WRITE) ] = { 763 [ C(RESULT_ACCESS) ] = 0x82d0, 764 [ C(RESULT_MISS) ] = 0xe13, 765 }, 766 }, 767 [ C(ITLB) ] = { 768 [ C(OP_READ) ] = { 769 [ C(RESULT_ACCESS) ] = -1, 770 [ C(RESULT_MISS) ] = 0xe11, 771 }, 772 [ C(OP_WRITE) ] = { 773 [ C(RESULT_ACCESS) ] = -1, 774 [ C(RESULT_MISS) ] = -1, 775 }, 776 [ C(OP_PREFETCH) ] = { 777 [ C(RESULT_ACCESS) ] = -1, 778 [ C(RESULT_MISS) ] = -1, 779 }, 780 }, 781 [ C(BPU ) ] = { 782 [ C(OP_READ) ] = { 783 [ C(RESULT_ACCESS) ] = 0x4c4, 784 [ C(RESULT_MISS) ] = 0x4c5, 785 }, 786 [ C(OP_WRITE) ] = { 787 [ C(RESULT_ACCESS) ] = -1, 788 [ C(RESULT_MISS) ] = -1, 789 }, 790 [ C(OP_PREFETCH) ] = { 791 [ C(RESULT_ACCESS) ] = -1, 792 [ C(RESULT_MISS) ] = -1, 793 }, 794 }, 795 }; 796 797 static __initconst const u64 glc_hw_cache_extra_regs 798 [PERF_COUNT_HW_CACHE_MAX] 799 [PERF_COUNT_HW_CACHE_OP_MAX] 800 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 801 { 802 [ C(LL ) ] = { 803 [ C(OP_READ) ] = { 804 [ C(RESULT_ACCESS) ] = 0x10001, /* OCR.DEMAND_DATA_RD.ANY_RESPONSE */ 805 [ C(RESULT_MISS) ] = 0x3fbfc00001, /* OCR.DEMAND_DATA_RD.L3_MISS */ 806 }, 807 [ C(OP_WRITE) ] = { 808 [ C(RESULT_ACCESS) ] = 0x3f3ffc0002, /* OCR.DEMAND_RFO.ANY_RESPONSE */ 809 [ C(RESULT_MISS) ] = 0x3f3fc00002, /* OCR.DEMAND_RFO.L3_MISS */ 810 }, 811 }, 812 [ C(NODE) ] = { 813 [ C(OP_READ) ] = { 814 [ C(RESULT_ACCESS) ] = 0x104000001, /* OCR.DEMAND_DATA_RD.LOCAL_DRAM */ 815 [ C(RESULT_MISS) ] = 0x730000001, /* OCR.DEMAND_DATA_RD.REMOTE_DRAM */ 816 }, 817 }, 818 }; 819 820 /* ADL P-core (Golden cove) specific extra regs value. */ 821 static __initconst const u64 adl_glc_hw_cache_extra_regs 822 [PERF_COUNT_HW_CACHE_MAX] 823 [PERF_COUNT_HW_CACHE_OP_MAX] 824 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 825 { 826 [ C(LL ) ] = { 827 [ C(OP_READ) ] = { 828 [ C(RESULT_ACCESS) ] = 0x10001, /* OCR.DEMAND_DATA_RD.ANY_RESPONSE */ 829 [ C(RESULT_MISS) ] = 0x3fbfc00001, /* OCR.DEMAND_DATA_RD.L3_MISS */ 830 }, 831 [ C(OP_WRITE) ] = { 832 [ C(RESULT_ACCESS) ] = 0x10002, /* OCR.DEMAND_RFO.ANY_RESPONSE */ 833 [ C(RESULT_MISS) ] = 0x3fbfc00002, /* OCR.DEMAND_RFO.L3_MISS */ 834 }, 835 }, 836 }; 837 838 static __initconst const u64 lnc_hw_cache_extra_regs 839 [PERF_COUNT_HW_CACHE_MAX] 840 [PERF_COUNT_HW_CACHE_OP_MAX] 841 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 842 { 843 [ C(LL ) ] = { 844 [ C(OP_READ) ] = { 845 [ C(RESULT_ACCESS) ] = 0x10001, /* OCR.DEMAND_DATA_RD.ANY_RESPONSE */ 846 [ C(RESULT_MISS) ] = 0x9E7FA000001, /* OCR.DEMAND_DATA_RD.L3_MISS */ 847 }, 848 [ C(OP_WRITE) ] = { 849 [ C(RESULT_ACCESS) ] = 0x10002, /* OCR.DEMAND_RFO.ANY_RESPONSE */ 850 [ C(RESULT_MISS) ] = 0x9E7FA000002, /* OCR.DEMAND_RFO.L3_MISS */ 851 }, 852 }, 853 }; 854 855 /* ARL specific lioncove hw_cache_extra_regs[] variant. */ 856 static __initconst const u64 arl_lnc_hw_cache_extra_regs 857 [PERF_COUNT_HW_CACHE_MAX] 858 [PERF_COUNT_HW_CACHE_OP_MAX] 859 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 860 { 861 [ C(LL ) ] = { 862 [ C(OP_READ) ] = { 863 [ C(RESULT_ACCESS) ] = 0x10001, /* OCR.DEMAND_DATA_RD.ANY_RESPONSE */ 864 [ C(RESULT_MISS) ] = 0xFE7F8000001, /* OCR.DEMAND_DATA_RD.L3_MISS */ 865 }, 866 [ C(OP_WRITE) ] = { 867 [ C(RESULT_ACCESS) ] = 0x10002, /* OCR.DEMAND_RFO.ANY_RESPONSE */ 868 [ C(RESULT_MISS) ] = 0xFE7F8000002, /* OCR.DEMAND_RFO.L3_MISS */ 869 }, 870 }, 871 }; 872 873 static __initconst const u64 pnc_hw_cache_event_ids 874 [PERF_COUNT_HW_CACHE_MAX] 875 [PERF_COUNT_HW_CACHE_OP_MAX] 876 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 877 { 878 [ C(L1D ) ] = { 879 [ C(OP_READ) ] = { 880 [ C(RESULT_ACCESS) ] = 0x81d0, 881 [ C(RESULT_MISS) ] = 0xe124, 882 }, 883 [ C(OP_WRITE) ] = { 884 [ C(RESULT_ACCESS) ] = 0x82d0, 885 }, 886 }, 887 [ C(L1I ) ] = { 888 [ C(OP_READ) ] = { 889 [ C(RESULT_MISS) ] = 0xe424, 890 }, 891 [ C(OP_WRITE) ] = { 892 [ C(RESULT_ACCESS) ] = -1, 893 [ C(RESULT_MISS) ] = -1, 894 }, 895 }, 896 [ C(LL ) ] = { 897 [ C(OP_READ) ] = { 898 [ C(RESULT_ACCESS) ] = 0x12a, 899 [ C(RESULT_MISS) ] = 0x12a, 900 }, 901 [ C(OP_WRITE) ] = { 902 [ C(RESULT_ACCESS) ] = 0x12a, 903 [ C(RESULT_MISS) ] = 0x12a, 904 }, 905 }, 906 [ C(DTLB) ] = { 907 [ C(OP_READ) ] = { 908 [ C(RESULT_ACCESS) ] = 0x81d0, 909 [ C(RESULT_MISS) ] = 0xe12, 910 }, 911 [ C(OP_WRITE) ] = { 912 [ C(RESULT_ACCESS) ] = 0x82d0, 913 [ C(RESULT_MISS) ] = 0xe13, 914 }, 915 }, 916 [ C(ITLB) ] = { 917 [ C(OP_READ) ] = { 918 [ C(RESULT_ACCESS) ] = -1, 919 [ C(RESULT_MISS) ] = 0xe11, 920 }, 921 [ C(OP_WRITE) ] = { 922 [ C(RESULT_ACCESS) ] = -1, 923 [ C(RESULT_MISS) ] = -1, 924 }, 925 [ C(OP_PREFETCH) ] = { 926 [ C(RESULT_ACCESS) ] = -1, 927 [ C(RESULT_MISS) ] = -1, 928 }, 929 }, 930 [ C(BPU ) ] = { 931 [ C(OP_READ) ] = { 932 [ C(RESULT_ACCESS) ] = 0x4c4, 933 [ C(RESULT_MISS) ] = 0x4c5, 934 }, 935 [ C(OP_WRITE) ] = { 936 [ C(RESULT_ACCESS) ] = -1, 937 [ C(RESULT_MISS) ] = -1, 938 }, 939 [ C(OP_PREFETCH) ] = { 940 [ C(RESULT_ACCESS) ] = -1, 941 [ C(RESULT_MISS) ] = -1, 942 }, 943 }, 944 [ C(NODE) ] = { 945 [ C(OP_READ) ] = { 946 [ C(RESULT_ACCESS) ] = -1, 947 [ C(RESULT_MISS) ] = -1, 948 }, 949 }, 950 }; 951 952 static __initconst const u64 pnc_hw_cache_extra_regs 953 [PERF_COUNT_HW_CACHE_MAX] 954 [PERF_COUNT_HW_CACHE_OP_MAX] 955 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 956 { 957 [ C(LL ) ] = { 958 [ C(OP_READ) ] = { 959 [ C(RESULT_ACCESS) ] = 0x4000000000000001, /* OMR.DEMAND_DATA_RD.ANY_RESPONSE */ 960 [ C(RESULT_MISS) ] = 0xFFFFF000000001, /* OMR.DEMAND_DATA_RD.L3_MISS */ 961 }, 962 [ C(OP_WRITE) ] = { 963 [ C(RESULT_ACCESS) ] = 0x4000000000000002, /* OMR.DEMAND_RFO.ANY_RESPONSE */ 964 [ C(RESULT_MISS) ] = 0xFFFFF000000002, /* OMR.DEMAND_RFO.L3_MISS */ 965 }, 966 }, 967 }; 968 969 static __initconst const u64 cyc_hw_cache_extra_regs 970 [PERF_COUNT_HW_CACHE_MAX] 971 [PERF_COUNT_HW_CACHE_OP_MAX] 972 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 973 { 974 [ C(LL ) ] = { 975 [ C(OP_READ) ] = { 976 [ C(RESULT_ACCESS) ] = 0x4000000000000001, /* OMR.DEMAND_DATA_RD.ANY_RESPONSE */ 977 [ C(RESULT_MISS) ] = 0xFF03F000000001, /* OMR.DEMAND_DATA_RD.L3_MISS */ 978 }, 979 [ C(OP_WRITE) ] = { 980 [ C(RESULT_ACCESS) ] = 0x4000000000000002, /* OMR.DEMAND_RFO.ANY_RESPONSE */ 981 [ C(RESULT_MISS) ] = 0xFF03F000000002, /* OMR.DEMAND_RFO.L3_MISS */ 982 }, 983 }, 984 }; 985 986 /* 987 * Notes on the events: 988 * - data reads do not include code reads (comparable to earlier tables) 989 * - data counts include speculative execution (except L1 write, dtlb, bpu) 990 * - remote node access includes remote memory, remote cache, remote mmio. 991 * - prefetches are not included in the counts. 992 * - icache miss does not include decoded icache 993 */ 994 995 #define SKL_DEMAND_DATA_RD BIT_ULL(0) 996 #define SKL_DEMAND_RFO BIT_ULL(1) 997 #define SKL_ANY_RESPONSE BIT_ULL(16) 998 #define SKL_SUPPLIER_NONE BIT_ULL(17) 999 #define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26) 1000 #define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27) 1001 #define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28) 1002 #define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29) 1003 #define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \ 1004 SKL_L3_MISS_REMOTE_HOP0_DRAM| \ 1005 SKL_L3_MISS_REMOTE_HOP1_DRAM| \ 1006 SKL_L3_MISS_REMOTE_HOP2P_DRAM) 1007 #define SKL_SPL_HIT BIT_ULL(30) 1008 #define SKL_SNOOP_NONE BIT_ULL(31) 1009 #define SKL_SNOOP_NOT_NEEDED BIT_ULL(32) 1010 #define SKL_SNOOP_MISS BIT_ULL(33) 1011 #define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34) 1012 #define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35) 1013 #define SKL_SNOOP_HITM BIT_ULL(36) 1014 #define SKL_SNOOP_NON_DRAM BIT_ULL(37) 1015 #define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \ 1016 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ 1017 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ 1018 SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM) 1019 #define SKL_DEMAND_READ SKL_DEMAND_DATA_RD 1020 #define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \ 1021 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ 1022 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ 1023 SKL_SNOOP_HITM|SKL_SPL_HIT) 1024 #define SKL_DEMAND_WRITE SKL_DEMAND_RFO 1025 #define SKL_LLC_ACCESS SKL_ANY_RESPONSE 1026 #define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \ 1027 SKL_L3_MISS_REMOTE_HOP1_DRAM| \ 1028 SKL_L3_MISS_REMOTE_HOP2P_DRAM) 1029 1030 static __initconst const u64 skl_hw_cache_event_ids 1031 [PERF_COUNT_HW_CACHE_MAX] 1032 [PERF_COUNT_HW_CACHE_OP_MAX] 1033 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1034 { 1035 [ C(L1D ) ] = { 1036 [ C(OP_READ) ] = { 1037 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ 1038 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ 1039 }, 1040 [ C(OP_WRITE) ] = { 1041 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ 1042 [ C(RESULT_MISS) ] = 0x0, 1043 }, 1044 [ C(OP_PREFETCH) ] = { 1045 [ C(RESULT_ACCESS) ] = 0x0, 1046 [ C(RESULT_MISS) ] = 0x0, 1047 }, 1048 }, 1049 [ C(L1I ) ] = { 1050 [ C(OP_READ) ] = { 1051 [ C(RESULT_ACCESS) ] = 0x0, 1052 [ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */ 1053 }, 1054 [ C(OP_WRITE) ] = { 1055 [ C(RESULT_ACCESS) ] = -1, 1056 [ C(RESULT_MISS) ] = -1, 1057 }, 1058 [ C(OP_PREFETCH) ] = { 1059 [ C(RESULT_ACCESS) ] = 0x0, 1060 [ C(RESULT_MISS) ] = 0x0, 1061 }, 1062 }, 1063 [ C(LL ) ] = { 1064 [ C(OP_READ) ] = { 1065 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1066 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1067 }, 1068 [ C(OP_WRITE) ] = { 1069 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1070 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1071 }, 1072 [ C(OP_PREFETCH) ] = { 1073 [ C(RESULT_ACCESS) ] = 0x0, 1074 [ C(RESULT_MISS) ] = 0x0, 1075 }, 1076 }, 1077 [ C(DTLB) ] = { 1078 [ C(OP_READ) ] = { 1079 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ 1080 [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ 1081 }, 1082 [ C(OP_WRITE) ] = { 1083 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ 1084 [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ 1085 }, 1086 [ C(OP_PREFETCH) ] = { 1087 [ C(RESULT_ACCESS) ] = 0x0, 1088 [ C(RESULT_MISS) ] = 0x0, 1089 }, 1090 }, 1091 [ C(ITLB) ] = { 1092 [ C(OP_READ) ] = { 1093 [ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */ 1094 [ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */ 1095 }, 1096 [ C(OP_WRITE) ] = { 1097 [ C(RESULT_ACCESS) ] = -1, 1098 [ C(RESULT_MISS) ] = -1, 1099 }, 1100 [ C(OP_PREFETCH) ] = { 1101 [ C(RESULT_ACCESS) ] = -1, 1102 [ C(RESULT_MISS) ] = -1, 1103 }, 1104 }, 1105 [ C(BPU ) ] = { 1106 [ C(OP_READ) ] = { 1107 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ 1108 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 1109 }, 1110 [ C(OP_WRITE) ] = { 1111 [ C(RESULT_ACCESS) ] = -1, 1112 [ C(RESULT_MISS) ] = -1, 1113 }, 1114 [ C(OP_PREFETCH) ] = { 1115 [ C(RESULT_ACCESS) ] = -1, 1116 [ C(RESULT_MISS) ] = -1, 1117 }, 1118 }, 1119 [ C(NODE) ] = { 1120 [ C(OP_READ) ] = { 1121 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1122 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1123 }, 1124 [ C(OP_WRITE) ] = { 1125 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1126 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1127 }, 1128 [ C(OP_PREFETCH) ] = { 1129 [ C(RESULT_ACCESS) ] = 0x0, 1130 [ C(RESULT_MISS) ] = 0x0, 1131 }, 1132 }, 1133 }; 1134 1135 static __initconst const u64 skl_hw_cache_extra_regs 1136 [PERF_COUNT_HW_CACHE_MAX] 1137 [PERF_COUNT_HW_CACHE_OP_MAX] 1138 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1139 { 1140 [ C(LL ) ] = { 1141 [ C(OP_READ) ] = { 1142 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| 1143 SKL_LLC_ACCESS|SKL_ANY_SNOOP, 1144 [ C(RESULT_MISS) ] = SKL_DEMAND_READ| 1145 SKL_L3_MISS|SKL_ANY_SNOOP| 1146 SKL_SUPPLIER_NONE, 1147 }, 1148 [ C(OP_WRITE) ] = { 1149 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| 1150 SKL_LLC_ACCESS|SKL_ANY_SNOOP, 1151 [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| 1152 SKL_L3_MISS|SKL_ANY_SNOOP| 1153 SKL_SUPPLIER_NONE, 1154 }, 1155 [ C(OP_PREFETCH) ] = { 1156 [ C(RESULT_ACCESS) ] = 0x0, 1157 [ C(RESULT_MISS) ] = 0x0, 1158 }, 1159 }, 1160 [ C(NODE) ] = { 1161 [ C(OP_READ) ] = { 1162 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| 1163 SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, 1164 [ C(RESULT_MISS) ] = SKL_DEMAND_READ| 1165 SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, 1166 }, 1167 [ C(OP_WRITE) ] = { 1168 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| 1169 SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, 1170 [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| 1171 SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, 1172 }, 1173 [ C(OP_PREFETCH) ] = { 1174 [ C(RESULT_ACCESS) ] = 0x0, 1175 [ C(RESULT_MISS) ] = 0x0, 1176 }, 1177 }, 1178 }; 1179 1180 static __initconst const u64 snc_hw_cache_extra_regs 1181 [PERF_COUNT_HW_CACHE_MAX] 1182 [PERF_COUNT_HW_CACHE_OP_MAX] 1183 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1184 { 1185 [ C(LL ) ] = { 1186 [ C(OP_READ) ] = { 1187 [ C(RESULT_ACCESS) ] = 0x10001, /* OCR.DEMAND_DATA_RD.ANY_RESPONSE */ 1188 [ C(RESULT_MISS) ] = 0x3FBFC00001, /* OCR.DEMAND_DATA_RD.L3_MISS */ 1189 }, 1190 [ C(OP_WRITE) ] = { 1191 [ C(RESULT_ACCESS) ] = 0x3F3FFC0002, /* OCR.DEMAND_RFO.ANY_RESPONSE */ 1192 [ C(RESULT_MISS) ] = 0x3F3FC00002, /* OCR.DEMAND_RFO.L3_MISS */ 1193 }, 1194 [ C(OP_PREFETCH) ] = { 1195 [ C(RESULT_ACCESS) ] = 0x0, 1196 [ C(RESULT_MISS) ] = 0x0, 1197 }, 1198 }, 1199 [ C(NODE) ] = { 1200 [ C(OP_READ) ] = { 1201 [ C(RESULT_ACCESS) ] = 0x104000001, /* OCR.DEMAND_DATA_RD.LOCAL_DRAM */ 1202 [ C(RESULT_MISS) ] = 0x730000001, /* OCR.DEMAND_DATA_RD.REMOTE_DRAM */ 1203 }, 1204 [ C(OP_WRITE) ] = { 1205 [ C(RESULT_ACCESS) ] = 0x104000002, /* OCR.DEMAND_RFO.LOCAL_DRAM */ 1206 [ C(RESULT_MISS) ] = 0x730000002, /* OCR.DEMAND_RFO.REMOTE_DRAM */ 1207 }, 1208 [ C(OP_PREFETCH) ] = { 1209 [ C(RESULT_ACCESS) ] = 0x0, 1210 [ C(RESULT_MISS) ] = 0x0, 1211 }, 1212 }, 1213 }; 1214 1215 #define SNB_DMND_DATA_RD (1ULL << 0) 1216 #define SNB_DMND_RFO (1ULL << 1) 1217 #define SNB_DMND_IFETCH (1ULL << 2) 1218 #define SNB_DMND_WB (1ULL << 3) 1219 #define SNB_PF_DATA_RD (1ULL << 4) 1220 #define SNB_PF_RFO (1ULL << 5) 1221 #define SNB_PF_IFETCH (1ULL << 6) 1222 #define SNB_LLC_DATA_RD (1ULL << 7) 1223 #define SNB_LLC_RFO (1ULL << 8) 1224 #define SNB_LLC_IFETCH (1ULL << 9) 1225 #define SNB_BUS_LOCKS (1ULL << 10) 1226 #define SNB_STRM_ST (1ULL << 11) 1227 #define SNB_OTHER (1ULL << 15) 1228 #define SNB_RESP_ANY (1ULL << 16) 1229 #define SNB_NO_SUPP (1ULL << 17) 1230 #define SNB_LLC_HITM (1ULL << 18) 1231 #define SNB_LLC_HITE (1ULL << 19) 1232 #define SNB_LLC_HITS (1ULL << 20) 1233 #define SNB_LLC_HITF (1ULL << 21) 1234 #define SNB_LOCAL (1ULL << 22) 1235 #define SNB_REMOTE (0xffULL << 23) 1236 #define SNB_SNP_NONE (1ULL << 31) 1237 #define SNB_SNP_NOT_NEEDED (1ULL << 32) 1238 #define SNB_SNP_MISS (1ULL << 33) 1239 #define SNB_NO_FWD (1ULL << 34) 1240 #define SNB_SNP_FWD (1ULL << 35) 1241 #define SNB_HITM (1ULL << 36) 1242 #define SNB_NON_DRAM (1ULL << 37) 1243 1244 #define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) 1245 #define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) 1246 #define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 1247 1248 #define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ 1249 SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ 1250 SNB_HITM) 1251 1252 #define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) 1253 #define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) 1254 1255 #define SNB_L3_ACCESS SNB_RESP_ANY 1256 #define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) 1257 1258 static __initconst const u64 snb_hw_cache_extra_regs 1259 [PERF_COUNT_HW_CACHE_MAX] 1260 [PERF_COUNT_HW_CACHE_OP_MAX] 1261 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1262 { 1263 [ C(LL ) ] = { 1264 [ C(OP_READ) ] = { 1265 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, 1266 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, 1267 }, 1268 [ C(OP_WRITE) ] = { 1269 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, 1270 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, 1271 }, 1272 [ C(OP_PREFETCH) ] = { 1273 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, 1274 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, 1275 }, 1276 }, 1277 [ C(NODE) ] = { 1278 [ C(OP_READ) ] = { 1279 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, 1280 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, 1281 }, 1282 [ C(OP_WRITE) ] = { 1283 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, 1284 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, 1285 }, 1286 [ C(OP_PREFETCH) ] = { 1287 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, 1288 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, 1289 }, 1290 }, 1291 }; 1292 1293 static __initconst const u64 snb_hw_cache_event_ids 1294 [PERF_COUNT_HW_CACHE_MAX] 1295 [PERF_COUNT_HW_CACHE_OP_MAX] 1296 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1297 { 1298 [ C(L1D) ] = { 1299 [ C(OP_READ) ] = { 1300 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ 1301 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ 1302 }, 1303 [ C(OP_WRITE) ] = { 1304 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ 1305 [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ 1306 }, 1307 [ C(OP_PREFETCH) ] = { 1308 [ C(RESULT_ACCESS) ] = 0x0, 1309 [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ 1310 }, 1311 }, 1312 [ C(L1I ) ] = { 1313 [ C(OP_READ) ] = { 1314 [ C(RESULT_ACCESS) ] = 0x0, 1315 [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ 1316 }, 1317 [ C(OP_WRITE) ] = { 1318 [ C(RESULT_ACCESS) ] = -1, 1319 [ C(RESULT_MISS) ] = -1, 1320 }, 1321 [ C(OP_PREFETCH) ] = { 1322 [ C(RESULT_ACCESS) ] = 0x0, 1323 [ C(RESULT_MISS) ] = 0x0, 1324 }, 1325 }, 1326 [ C(LL ) ] = { 1327 [ C(OP_READ) ] = { 1328 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 1329 [ C(RESULT_ACCESS) ] = 0x01b7, 1330 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 1331 [ C(RESULT_MISS) ] = 0x01b7, 1332 }, 1333 [ C(OP_WRITE) ] = { 1334 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 1335 [ C(RESULT_ACCESS) ] = 0x01b7, 1336 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 1337 [ C(RESULT_MISS) ] = 0x01b7, 1338 }, 1339 [ C(OP_PREFETCH) ] = { 1340 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 1341 [ C(RESULT_ACCESS) ] = 0x01b7, 1342 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 1343 [ C(RESULT_MISS) ] = 0x01b7, 1344 }, 1345 }, 1346 [ C(DTLB) ] = { 1347 [ C(OP_READ) ] = { 1348 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ 1349 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ 1350 }, 1351 [ C(OP_WRITE) ] = { 1352 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ 1353 [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ 1354 }, 1355 [ C(OP_PREFETCH) ] = { 1356 [ C(RESULT_ACCESS) ] = 0x0, 1357 [ C(RESULT_MISS) ] = 0x0, 1358 }, 1359 }, 1360 [ C(ITLB) ] = { 1361 [ C(OP_READ) ] = { 1362 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ 1363 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ 1364 }, 1365 [ C(OP_WRITE) ] = { 1366 [ C(RESULT_ACCESS) ] = -1, 1367 [ C(RESULT_MISS) ] = -1, 1368 }, 1369 [ C(OP_PREFETCH) ] = { 1370 [ C(RESULT_ACCESS) ] = -1, 1371 [ C(RESULT_MISS) ] = -1, 1372 }, 1373 }, 1374 [ C(BPU ) ] = { 1375 [ C(OP_READ) ] = { 1376 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 1377 [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 1378 }, 1379 [ C(OP_WRITE) ] = { 1380 [ C(RESULT_ACCESS) ] = -1, 1381 [ C(RESULT_MISS) ] = -1, 1382 }, 1383 [ C(OP_PREFETCH) ] = { 1384 [ C(RESULT_ACCESS) ] = -1, 1385 [ C(RESULT_MISS) ] = -1, 1386 }, 1387 }, 1388 [ C(NODE) ] = { 1389 [ C(OP_READ) ] = { 1390 [ C(RESULT_ACCESS) ] = 0x01b7, 1391 [ C(RESULT_MISS) ] = 0x01b7, 1392 }, 1393 [ C(OP_WRITE) ] = { 1394 [ C(RESULT_ACCESS) ] = 0x01b7, 1395 [ C(RESULT_MISS) ] = 0x01b7, 1396 }, 1397 [ C(OP_PREFETCH) ] = { 1398 [ C(RESULT_ACCESS) ] = 0x01b7, 1399 [ C(RESULT_MISS) ] = 0x01b7, 1400 }, 1401 }, 1402 1403 }; 1404 1405 /* 1406 * Notes on the events: 1407 * - data reads do not include code reads (comparable to earlier tables) 1408 * - data counts include speculative execution (except L1 write, dtlb, bpu) 1409 * - remote node access includes remote memory, remote cache, remote mmio. 1410 * - prefetches are not included in the counts because they are not 1411 * reliably counted. 1412 */ 1413 1414 #define HSW_DEMAND_DATA_RD BIT_ULL(0) 1415 #define HSW_DEMAND_RFO BIT_ULL(1) 1416 #define HSW_ANY_RESPONSE BIT_ULL(16) 1417 #define HSW_SUPPLIER_NONE BIT_ULL(17) 1418 #define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22) 1419 #define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27) 1420 #define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28) 1421 #define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29) 1422 #define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \ 1423 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ 1424 HSW_L3_MISS_REMOTE_HOP2P) 1425 #define HSW_SNOOP_NONE BIT_ULL(31) 1426 #define HSW_SNOOP_NOT_NEEDED BIT_ULL(32) 1427 #define HSW_SNOOP_MISS BIT_ULL(33) 1428 #define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34) 1429 #define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35) 1430 #define HSW_SNOOP_HITM BIT_ULL(36) 1431 #define HSW_SNOOP_NON_DRAM BIT_ULL(37) 1432 #define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \ 1433 HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \ 1434 HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \ 1435 HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM) 1436 #define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM) 1437 #define HSW_DEMAND_READ HSW_DEMAND_DATA_RD 1438 #define HSW_DEMAND_WRITE HSW_DEMAND_RFO 1439 #define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\ 1440 HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P) 1441 #define HSW_LLC_ACCESS HSW_ANY_RESPONSE 1442 1443 #define BDW_L3_MISS_LOCAL BIT(26) 1444 #define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \ 1445 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ 1446 HSW_L3_MISS_REMOTE_HOP2P) 1447 1448 1449 static __initconst const u64 hsw_hw_cache_event_ids 1450 [PERF_COUNT_HW_CACHE_MAX] 1451 [PERF_COUNT_HW_CACHE_OP_MAX] 1452 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1453 { 1454 [ C(L1D ) ] = { 1455 [ C(OP_READ) ] = { 1456 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 1457 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ 1458 }, 1459 [ C(OP_WRITE) ] = { 1460 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 1461 [ C(RESULT_MISS) ] = 0x0, 1462 }, 1463 [ C(OP_PREFETCH) ] = { 1464 [ C(RESULT_ACCESS) ] = 0x0, 1465 [ C(RESULT_MISS) ] = 0x0, 1466 }, 1467 }, 1468 [ C(L1I ) ] = { 1469 [ C(OP_READ) ] = { 1470 [ C(RESULT_ACCESS) ] = 0x0, 1471 [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ 1472 }, 1473 [ C(OP_WRITE) ] = { 1474 [ C(RESULT_ACCESS) ] = -1, 1475 [ C(RESULT_MISS) ] = -1, 1476 }, 1477 [ C(OP_PREFETCH) ] = { 1478 [ C(RESULT_ACCESS) ] = 0x0, 1479 [ C(RESULT_MISS) ] = 0x0, 1480 }, 1481 }, 1482 [ C(LL ) ] = { 1483 [ C(OP_READ) ] = { 1484 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1485 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1486 }, 1487 [ C(OP_WRITE) ] = { 1488 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1489 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1490 }, 1491 [ C(OP_PREFETCH) ] = { 1492 [ C(RESULT_ACCESS) ] = 0x0, 1493 [ C(RESULT_MISS) ] = 0x0, 1494 }, 1495 }, 1496 [ C(DTLB) ] = { 1497 [ C(OP_READ) ] = { 1498 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 1499 [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ 1500 }, 1501 [ C(OP_WRITE) ] = { 1502 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 1503 [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ 1504 }, 1505 [ C(OP_PREFETCH) ] = { 1506 [ C(RESULT_ACCESS) ] = 0x0, 1507 [ C(RESULT_MISS) ] = 0x0, 1508 }, 1509 }, 1510 [ C(ITLB) ] = { 1511 [ C(OP_READ) ] = { 1512 [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ 1513 [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ 1514 }, 1515 [ C(OP_WRITE) ] = { 1516 [ C(RESULT_ACCESS) ] = -1, 1517 [ C(RESULT_MISS) ] = -1, 1518 }, 1519 [ C(OP_PREFETCH) ] = { 1520 [ C(RESULT_ACCESS) ] = -1, 1521 [ C(RESULT_MISS) ] = -1, 1522 }, 1523 }, 1524 [ C(BPU ) ] = { 1525 [ C(OP_READ) ] = { 1526 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ 1527 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 1528 }, 1529 [ C(OP_WRITE) ] = { 1530 [ C(RESULT_ACCESS) ] = -1, 1531 [ C(RESULT_MISS) ] = -1, 1532 }, 1533 [ C(OP_PREFETCH) ] = { 1534 [ C(RESULT_ACCESS) ] = -1, 1535 [ C(RESULT_MISS) ] = -1, 1536 }, 1537 }, 1538 [ C(NODE) ] = { 1539 [ C(OP_READ) ] = { 1540 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1541 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1542 }, 1543 [ C(OP_WRITE) ] = { 1544 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1545 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 1546 }, 1547 [ C(OP_PREFETCH) ] = { 1548 [ C(RESULT_ACCESS) ] = 0x0, 1549 [ C(RESULT_MISS) ] = 0x0, 1550 }, 1551 }, 1552 }; 1553 1554 static __initconst const u64 hsw_hw_cache_extra_regs 1555 [PERF_COUNT_HW_CACHE_MAX] 1556 [PERF_COUNT_HW_CACHE_OP_MAX] 1557 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1558 { 1559 [ C(LL ) ] = { 1560 [ C(OP_READ) ] = { 1561 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| 1562 HSW_LLC_ACCESS, 1563 [ C(RESULT_MISS) ] = HSW_DEMAND_READ| 1564 HSW_L3_MISS|HSW_ANY_SNOOP, 1565 }, 1566 [ C(OP_WRITE) ] = { 1567 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| 1568 HSW_LLC_ACCESS, 1569 [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| 1570 HSW_L3_MISS|HSW_ANY_SNOOP, 1571 }, 1572 [ C(OP_PREFETCH) ] = { 1573 [ C(RESULT_ACCESS) ] = 0x0, 1574 [ C(RESULT_MISS) ] = 0x0, 1575 }, 1576 }, 1577 [ C(NODE) ] = { 1578 [ C(OP_READ) ] = { 1579 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| 1580 HSW_L3_MISS_LOCAL_DRAM| 1581 HSW_SNOOP_DRAM, 1582 [ C(RESULT_MISS) ] = HSW_DEMAND_READ| 1583 HSW_L3_MISS_REMOTE| 1584 HSW_SNOOP_DRAM, 1585 }, 1586 [ C(OP_WRITE) ] = { 1587 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| 1588 HSW_L3_MISS_LOCAL_DRAM| 1589 HSW_SNOOP_DRAM, 1590 [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| 1591 HSW_L3_MISS_REMOTE| 1592 HSW_SNOOP_DRAM, 1593 }, 1594 [ C(OP_PREFETCH) ] = { 1595 [ C(RESULT_ACCESS) ] = 0x0, 1596 [ C(RESULT_MISS) ] = 0x0, 1597 }, 1598 }, 1599 }; 1600 1601 static __initconst const u64 westmere_hw_cache_event_ids 1602 [PERF_COUNT_HW_CACHE_MAX] 1603 [PERF_COUNT_HW_CACHE_OP_MAX] 1604 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1605 { 1606 [ C(L1D) ] = { 1607 [ C(OP_READ) ] = { 1608 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 1609 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ 1610 }, 1611 [ C(OP_WRITE) ] = { 1612 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 1613 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ 1614 }, 1615 [ C(OP_PREFETCH) ] = { 1616 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 1617 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ 1618 }, 1619 }, 1620 [ C(L1I ) ] = { 1621 [ C(OP_READ) ] = { 1622 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 1623 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 1624 }, 1625 [ C(OP_WRITE) ] = { 1626 [ C(RESULT_ACCESS) ] = -1, 1627 [ C(RESULT_MISS) ] = -1, 1628 }, 1629 [ C(OP_PREFETCH) ] = { 1630 [ C(RESULT_ACCESS) ] = 0x0, 1631 [ C(RESULT_MISS) ] = 0x0, 1632 }, 1633 }, 1634 [ C(LL ) ] = { 1635 [ C(OP_READ) ] = { 1636 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 1637 [ C(RESULT_ACCESS) ] = 0x01b7, 1638 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 1639 [ C(RESULT_MISS) ] = 0x01b7, 1640 }, 1641 /* 1642 * Use RFO, not WRITEBACK, because a write miss would typically occur 1643 * on RFO. 1644 */ 1645 [ C(OP_WRITE) ] = { 1646 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 1647 [ C(RESULT_ACCESS) ] = 0x01b7, 1648 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 1649 [ C(RESULT_MISS) ] = 0x01b7, 1650 }, 1651 [ C(OP_PREFETCH) ] = { 1652 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 1653 [ C(RESULT_ACCESS) ] = 0x01b7, 1654 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 1655 [ C(RESULT_MISS) ] = 0x01b7, 1656 }, 1657 }, 1658 [ C(DTLB) ] = { 1659 [ C(OP_READ) ] = { 1660 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 1661 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ 1662 }, 1663 [ C(OP_WRITE) ] = { 1664 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 1665 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ 1666 }, 1667 [ C(OP_PREFETCH) ] = { 1668 [ C(RESULT_ACCESS) ] = 0x0, 1669 [ C(RESULT_MISS) ] = 0x0, 1670 }, 1671 }, 1672 [ C(ITLB) ] = { 1673 [ C(OP_READ) ] = { 1674 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ 1675 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ 1676 }, 1677 [ C(OP_WRITE) ] = { 1678 [ C(RESULT_ACCESS) ] = -1, 1679 [ C(RESULT_MISS) ] = -1, 1680 }, 1681 [ C(OP_PREFETCH) ] = { 1682 [ C(RESULT_ACCESS) ] = -1, 1683 [ C(RESULT_MISS) ] = -1, 1684 }, 1685 }, 1686 [ C(BPU ) ] = { 1687 [ C(OP_READ) ] = { 1688 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 1689 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ 1690 }, 1691 [ C(OP_WRITE) ] = { 1692 [ C(RESULT_ACCESS) ] = -1, 1693 [ C(RESULT_MISS) ] = -1, 1694 }, 1695 [ C(OP_PREFETCH) ] = { 1696 [ C(RESULT_ACCESS) ] = -1, 1697 [ C(RESULT_MISS) ] = -1, 1698 }, 1699 }, 1700 [ C(NODE) ] = { 1701 [ C(OP_READ) ] = { 1702 [ C(RESULT_ACCESS) ] = 0x01b7, 1703 [ C(RESULT_MISS) ] = 0x01b7, 1704 }, 1705 [ C(OP_WRITE) ] = { 1706 [ C(RESULT_ACCESS) ] = 0x01b7, 1707 [ C(RESULT_MISS) ] = 0x01b7, 1708 }, 1709 [ C(OP_PREFETCH) ] = { 1710 [ C(RESULT_ACCESS) ] = 0x01b7, 1711 [ C(RESULT_MISS) ] = 0x01b7, 1712 }, 1713 }, 1714 }; 1715 1716 /* 1717 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; 1718 * See IA32 SDM Vol 3B 30.6.1.3 1719 */ 1720 1721 #define NHM_DMND_DATA_RD (1 << 0) 1722 #define NHM_DMND_RFO (1 << 1) 1723 #define NHM_DMND_IFETCH (1 << 2) 1724 #define NHM_DMND_WB (1 << 3) 1725 #define NHM_PF_DATA_RD (1 << 4) 1726 #define NHM_PF_DATA_RFO (1 << 5) 1727 #define NHM_PF_IFETCH (1 << 6) 1728 #define NHM_OFFCORE_OTHER (1 << 7) 1729 #define NHM_UNCORE_HIT (1 << 8) 1730 #define NHM_OTHER_CORE_HIT_SNP (1 << 9) 1731 #define NHM_OTHER_CORE_HITM (1 << 10) 1732 /* reserved */ 1733 #define NHM_REMOTE_CACHE_FWD (1 << 12) 1734 #define NHM_REMOTE_DRAM (1 << 13) 1735 #define NHM_LOCAL_DRAM (1 << 14) 1736 #define NHM_NON_DRAM (1 << 15) 1737 1738 #define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) 1739 #define NHM_REMOTE (NHM_REMOTE_DRAM) 1740 1741 #define NHM_DMND_READ (NHM_DMND_DATA_RD) 1742 #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) 1743 #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) 1744 1745 #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) 1746 #define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) 1747 #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) 1748 1749 static __initconst const u64 nehalem_hw_cache_extra_regs 1750 [PERF_COUNT_HW_CACHE_MAX] 1751 [PERF_COUNT_HW_CACHE_OP_MAX] 1752 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1753 { 1754 [ C(LL ) ] = { 1755 [ C(OP_READ) ] = { 1756 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, 1757 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, 1758 }, 1759 [ C(OP_WRITE) ] = { 1760 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, 1761 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, 1762 }, 1763 [ C(OP_PREFETCH) ] = { 1764 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, 1765 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, 1766 }, 1767 }, 1768 [ C(NODE) ] = { 1769 [ C(OP_READ) ] = { 1770 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, 1771 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, 1772 }, 1773 [ C(OP_WRITE) ] = { 1774 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, 1775 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, 1776 }, 1777 [ C(OP_PREFETCH) ] = { 1778 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, 1779 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, 1780 }, 1781 }, 1782 }; 1783 1784 static __initconst const u64 nehalem_hw_cache_event_ids 1785 [PERF_COUNT_HW_CACHE_MAX] 1786 [PERF_COUNT_HW_CACHE_OP_MAX] 1787 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1788 { 1789 [ C(L1D) ] = { 1790 [ C(OP_READ) ] = { 1791 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 1792 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ 1793 }, 1794 [ C(OP_WRITE) ] = { 1795 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 1796 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ 1797 }, 1798 [ C(OP_PREFETCH) ] = { 1799 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 1800 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ 1801 }, 1802 }, 1803 [ C(L1I ) ] = { 1804 [ C(OP_READ) ] = { 1805 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 1806 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 1807 }, 1808 [ C(OP_WRITE) ] = { 1809 [ C(RESULT_ACCESS) ] = -1, 1810 [ C(RESULT_MISS) ] = -1, 1811 }, 1812 [ C(OP_PREFETCH) ] = { 1813 [ C(RESULT_ACCESS) ] = 0x0, 1814 [ C(RESULT_MISS) ] = 0x0, 1815 }, 1816 }, 1817 [ C(LL ) ] = { 1818 [ C(OP_READ) ] = { 1819 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 1820 [ C(RESULT_ACCESS) ] = 0x01b7, 1821 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 1822 [ C(RESULT_MISS) ] = 0x01b7, 1823 }, 1824 /* 1825 * Use RFO, not WRITEBACK, because a write miss would typically occur 1826 * on RFO. 1827 */ 1828 [ C(OP_WRITE) ] = { 1829 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 1830 [ C(RESULT_ACCESS) ] = 0x01b7, 1831 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 1832 [ C(RESULT_MISS) ] = 0x01b7, 1833 }, 1834 [ C(OP_PREFETCH) ] = { 1835 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 1836 [ C(RESULT_ACCESS) ] = 0x01b7, 1837 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 1838 [ C(RESULT_MISS) ] = 0x01b7, 1839 }, 1840 }, 1841 [ C(DTLB) ] = { 1842 [ C(OP_READ) ] = { 1843 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ 1844 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ 1845 }, 1846 [ C(OP_WRITE) ] = { 1847 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ 1848 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ 1849 }, 1850 [ C(OP_PREFETCH) ] = { 1851 [ C(RESULT_ACCESS) ] = 0x0, 1852 [ C(RESULT_MISS) ] = 0x0, 1853 }, 1854 }, 1855 [ C(ITLB) ] = { 1856 [ C(OP_READ) ] = { 1857 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ 1858 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ 1859 }, 1860 [ C(OP_WRITE) ] = { 1861 [ C(RESULT_ACCESS) ] = -1, 1862 [ C(RESULT_MISS) ] = -1, 1863 }, 1864 [ C(OP_PREFETCH) ] = { 1865 [ C(RESULT_ACCESS) ] = -1, 1866 [ C(RESULT_MISS) ] = -1, 1867 }, 1868 }, 1869 [ C(BPU ) ] = { 1870 [ C(OP_READ) ] = { 1871 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 1872 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ 1873 }, 1874 [ C(OP_WRITE) ] = { 1875 [ C(RESULT_ACCESS) ] = -1, 1876 [ C(RESULT_MISS) ] = -1, 1877 }, 1878 [ C(OP_PREFETCH) ] = { 1879 [ C(RESULT_ACCESS) ] = -1, 1880 [ C(RESULT_MISS) ] = -1, 1881 }, 1882 }, 1883 [ C(NODE) ] = { 1884 [ C(OP_READ) ] = { 1885 [ C(RESULT_ACCESS) ] = 0x01b7, 1886 [ C(RESULT_MISS) ] = 0x01b7, 1887 }, 1888 [ C(OP_WRITE) ] = { 1889 [ C(RESULT_ACCESS) ] = 0x01b7, 1890 [ C(RESULT_MISS) ] = 0x01b7, 1891 }, 1892 [ C(OP_PREFETCH) ] = { 1893 [ C(RESULT_ACCESS) ] = 0x01b7, 1894 [ C(RESULT_MISS) ] = 0x01b7, 1895 }, 1896 }, 1897 }; 1898 1899 static __initconst const u64 core2_hw_cache_event_ids 1900 [PERF_COUNT_HW_CACHE_MAX] 1901 [PERF_COUNT_HW_CACHE_OP_MAX] 1902 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1903 { 1904 [ C(L1D) ] = { 1905 [ C(OP_READ) ] = { 1906 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ 1907 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ 1908 }, 1909 [ C(OP_WRITE) ] = { 1910 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ 1911 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ 1912 }, 1913 [ C(OP_PREFETCH) ] = { 1914 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ 1915 [ C(RESULT_MISS) ] = 0, 1916 }, 1917 }, 1918 [ C(L1I ) ] = { 1919 [ C(OP_READ) ] = { 1920 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ 1921 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ 1922 }, 1923 [ C(OP_WRITE) ] = { 1924 [ C(RESULT_ACCESS) ] = -1, 1925 [ C(RESULT_MISS) ] = -1, 1926 }, 1927 [ C(OP_PREFETCH) ] = { 1928 [ C(RESULT_ACCESS) ] = 0, 1929 [ C(RESULT_MISS) ] = 0, 1930 }, 1931 }, 1932 [ C(LL ) ] = { 1933 [ C(OP_READ) ] = { 1934 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ 1935 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ 1936 }, 1937 [ C(OP_WRITE) ] = { 1938 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ 1939 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ 1940 }, 1941 [ C(OP_PREFETCH) ] = { 1942 [ C(RESULT_ACCESS) ] = 0, 1943 [ C(RESULT_MISS) ] = 0, 1944 }, 1945 }, 1946 [ C(DTLB) ] = { 1947 [ C(OP_READ) ] = { 1948 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ 1949 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ 1950 }, 1951 [ C(OP_WRITE) ] = { 1952 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ 1953 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ 1954 }, 1955 [ C(OP_PREFETCH) ] = { 1956 [ C(RESULT_ACCESS) ] = 0, 1957 [ C(RESULT_MISS) ] = 0, 1958 }, 1959 }, 1960 [ C(ITLB) ] = { 1961 [ C(OP_READ) ] = { 1962 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 1963 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ 1964 }, 1965 [ C(OP_WRITE) ] = { 1966 [ C(RESULT_ACCESS) ] = -1, 1967 [ C(RESULT_MISS) ] = -1, 1968 }, 1969 [ C(OP_PREFETCH) ] = { 1970 [ C(RESULT_ACCESS) ] = -1, 1971 [ C(RESULT_MISS) ] = -1, 1972 }, 1973 }, 1974 [ C(BPU ) ] = { 1975 [ C(OP_READ) ] = { 1976 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1977 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1978 }, 1979 [ C(OP_WRITE) ] = { 1980 [ C(RESULT_ACCESS) ] = -1, 1981 [ C(RESULT_MISS) ] = -1, 1982 }, 1983 [ C(OP_PREFETCH) ] = { 1984 [ C(RESULT_ACCESS) ] = -1, 1985 [ C(RESULT_MISS) ] = -1, 1986 }, 1987 }, 1988 }; 1989 1990 static __initconst const u64 atom_hw_cache_event_ids 1991 [PERF_COUNT_HW_CACHE_MAX] 1992 [PERF_COUNT_HW_CACHE_OP_MAX] 1993 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1994 { 1995 [ C(L1D) ] = { 1996 [ C(OP_READ) ] = { 1997 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ 1998 [ C(RESULT_MISS) ] = 0, 1999 }, 2000 [ C(OP_WRITE) ] = { 2001 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ 2002 [ C(RESULT_MISS) ] = 0, 2003 }, 2004 [ C(OP_PREFETCH) ] = { 2005 [ C(RESULT_ACCESS) ] = 0x0, 2006 [ C(RESULT_MISS) ] = 0, 2007 }, 2008 }, 2009 [ C(L1I ) ] = { 2010 [ C(OP_READ) ] = { 2011 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 2012 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 2013 }, 2014 [ C(OP_WRITE) ] = { 2015 [ C(RESULT_ACCESS) ] = -1, 2016 [ C(RESULT_MISS) ] = -1, 2017 }, 2018 [ C(OP_PREFETCH) ] = { 2019 [ C(RESULT_ACCESS) ] = 0, 2020 [ C(RESULT_MISS) ] = 0, 2021 }, 2022 }, 2023 [ C(LL ) ] = { 2024 [ C(OP_READ) ] = { 2025 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ 2026 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ 2027 }, 2028 [ C(OP_WRITE) ] = { 2029 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ 2030 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ 2031 }, 2032 [ C(OP_PREFETCH) ] = { 2033 [ C(RESULT_ACCESS) ] = 0, 2034 [ C(RESULT_MISS) ] = 0, 2035 }, 2036 }, 2037 [ C(DTLB) ] = { 2038 [ C(OP_READ) ] = { 2039 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ 2040 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ 2041 }, 2042 [ C(OP_WRITE) ] = { 2043 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ 2044 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ 2045 }, 2046 [ C(OP_PREFETCH) ] = { 2047 [ C(RESULT_ACCESS) ] = 0, 2048 [ C(RESULT_MISS) ] = 0, 2049 }, 2050 }, 2051 [ C(ITLB) ] = { 2052 [ C(OP_READ) ] = { 2053 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 2054 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ 2055 }, 2056 [ C(OP_WRITE) ] = { 2057 [ C(RESULT_ACCESS) ] = -1, 2058 [ C(RESULT_MISS) ] = -1, 2059 }, 2060 [ C(OP_PREFETCH) ] = { 2061 [ C(RESULT_ACCESS) ] = -1, 2062 [ C(RESULT_MISS) ] = -1, 2063 }, 2064 }, 2065 [ C(BPU ) ] = { 2066 [ C(OP_READ) ] = { 2067 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 2068 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 2069 }, 2070 [ C(OP_WRITE) ] = { 2071 [ C(RESULT_ACCESS) ] = -1, 2072 [ C(RESULT_MISS) ] = -1, 2073 }, 2074 [ C(OP_PREFETCH) ] = { 2075 [ C(RESULT_ACCESS) ] = -1, 2076 [ C(RESULT_MISS) ] = -1, 2077 }, 2078 }, 2079 }; 2080 2081 EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c"); 2082 EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2"); 2083 /* no_alloc_cycles.not_delivered */ 2084 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm, 2085 "event=0xca,umask=0x50"); 2086 EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2"); 2087 /* uops_retired.all */ 2088 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm, 2089 "event=0xc2,umask=0x10"); 2090 /* uops_retired.all */ 2091 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm, 2092 "event=0xc2,umask=0x10"); 2093 2094 static struct attribute *slm_events_attrs[] = { 2095 EVENT_PTR(td_total_slots_slm), 2096 EVENT_PTR(td_total_slots_scale_slm), 2097 EVENT_PTR(td_fetch_bubbles_slm), 2098 EVENT_PTR(td_fetch_bubbles_scale_slm), 2099 EVENT_PTR(td_slots_issued_slm), 2100 EVENT_PTR(td_slots_retired_slm), 2101 NULL 2102 }; 2103 2104 static struct extra_reg intel_slm_extra_regs[] __read_mostly = 2105 { 2106 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 2107 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), 2108 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1), 2109 EVENT_EXTRA_END 2110 }; 2111 2112 #define SLM_DMND_READ SNB_DMND_DATA_RD 2113 #define SLM_DMND_WRITE SNB_DMND_RFO 2114 #define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 2115 2116 #define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM) 2117 #define SLM_LLC_ACCESS SNB_RESP_ANY 2118 #define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM) 2119 2120 static __initconst const u64 slm_hw_cache_extra_regs 2121 [PERF_COUNT_HW_CACHE_MAX] 2122 [PERF_COUNT_HW_CACHE_OP_MAX] 2123 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 2124 { 2125 [ C(LL ) ] = { 2126 [ C(OP_READ) ] = { 2127 [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, 2128 [ C(RESULT_MISS) ] = 0, 2129 }, 2130 [ C(OP_WRITE) ] = { 2131 [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, 2132 [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS, 2133 }, 2134 [ C(OP_PREFETCH) ] = { 2135 [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS, 2136 [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS, 2137 }, 2138 }, 2139 }; 2140 2141 static __initconst const u64 slm_hw_cache_event_ids 2142 [PERF_COUNT_HW_CACHE_MAX] 2143 [PERF_COUNT_HW_CACHE_OP_MAX] 2144 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 2145 { 2146 [ C(L1D) ] = { 2147 [ C(OP_READ) ] = { 2148 [ C(RESULT_ACCESS) ] = 0, 2149 [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */ 2150 }, 2151 [ C(OP_WRITE) ] = { 2152 [ C(RESULT_ACCESS) ] = 0, 2153 [ C(RESULT_MISS) ] = 0, 2154 }, 2155 [ C(OP_PREFETCH) ] = { 2156 [ C(RESULT_ACCESS) ] = 0, 2157 [ C(RESULT_MISS) ] = 0, 2158 }, 2159 }, 2160 [ C(L1I ) ] = { 2161 [ C(OP_READ) ] = { 2162 [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */ 2163 [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */ 2164 }, 2165 [ C(OP_WRITE) ] = { 2166 [ C(RESULT_ACCESS) ] = -1, 2167 [ C(RESULT_MISS) ] = -1, 2168 }, 2169 [ C(OP_PREFETCH) ] = { 2170 [ C(RESULT_ACCESS) ] = 0, 2171 [ C(RESULT_MISS) ] = 0, 2172 }, 2173 }, 2174 [ C(LL ) ] = { 2175 [ C(OP_READ) ] = { 2176 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 2177 [ C(RESULT_ACCESS) ] = 0x01b7, 2178 [ C(RESULT_MISS) ] = 0, 2179 }, 2180 [ C(OP_WRITE) ] = { 2181 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 2182 [ C(RESULT_ACCESS) ] = 0x01b7, 2183 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 2184 [ C(RESULT_MISS) ] = 0x01b7, 2185 }, 2186 [ C(OP_PREFETCH) ] = { 2187 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 2188 [ C(RESULT_ACCESS) ] = 0x01b7, 2189 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 2190 [ C(RESULT_MISS) ] = 0x01b7, 2191 }, 2192 }, 2193 [ C(DTLB) ] = { 2194 [ C(OP_READ) ] = { 2195 [ C(RESULT_ACCESS) ] = 0, 2196 [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */ 2197 }, 2198 [ C(OP_WRITE) ] = { 2199 [ C(RESULT_ACCESS) ] = 0, 2200 [ C(RESULT_MISS) ] = 0, 2201 }, 2202 [ C(OP_PREFETCH) ] = { 2203 [ C(RESULT_ACCESS) ] = 0, 2204 [ C(RESULT_MISS) ] = 0, 2205 }, 2206 }, 2207 [ C(ITLB) ] = { 2208 [ C(OP_READ) ] = { 2209 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 2210 [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ 2211 }, 2212 [ C(OP_WRITE) ] = { 2213 [ C(RESULT_ACCESS) ] = -1, 2214 [ C(RESULT_MISS) ] = -1, 2215 }, 2216 [ C(OP_PREFETCH) ] = { 2217 [ C(RESULT_ACCESS) ] = -1, 2218 [ C(RESULT_MISS) ] = -1, 2219 }, 2220 }, 2221 [ C(BPU ) ] = { 2222 [ C(OP_READ) ] = { 2223 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 2224 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 2225 }, 2226 [ C(OP_WRITE) ] = { 2227 [ C(RESULT_ACCESS) ] = -1, 2228 [ C(RESULT_MISS) ] = -1, 2229 }, 2230 [ C(OP_PREFETCH) ] = { 2231 [ C(RESULT_ACCESS) ] = -1, 2232 [ C(RESULT_MISS) ] = -1, 2233 }, 2234 }, 2235 }; 2236 2237 EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c"); 2238 EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3"); 2239 /* UOPS_NOT_DELIVERED.ANY */ 2240 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c"); 2241 /* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */ 2242 EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02"); 2243 /* UOPS_RETIRED.ANY */ 2244 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2"); 2245 /* UOPS_ISSUED.ANY */ 2246 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e"); 2247 2248 static struct attribute *glm_events_attrs[] = { 2249 EVENT_PTR(td_total_slots_glm), 2250 EVENT_PTR(td_total_slots_scale_glm), 2251 EVENT_PTR(td_fetch_bubbles_glm), 2252 EVENT_PTR(td_recovery_bubbles_glm), 2253 EVENT_PTR(td_slots_issued_glm), 2254 EVENT_PTR(td_slots_retired_glm), 2255 NULL 2256 }; 2257 2258 static struct extra_reg intel_glm_extra_regs[] __read_mostly = { 2259 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 2260 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0), 2261 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1), 2262 EVENT_EXTRA_END 2263 }; 2264 2265 #define GLM_DEMAND_DATA_RD BIT_ULL(0) 2266 #define GLM_DEMAND_RFO BIT_ULL(1) 2267 #define GLM_ANY_RESPONSE BIT_ULL(16) 2268 #define GLM_SNP_NONE_OR_MISS BIT_ULL(33) 2269 #define GLM_DEMAND_READ GLM_DEMAND_DATA_RD 2270 #define GLM_DEMAND_WRITE GLM_DEMAND_RFO 2271 #define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 2272 #define GLM_LLC_ACCESS GLM_ANY_RESPONSE 2273 #define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM) 2274 #define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM) 2275 2276 static __initconst const u64 glm_hw_cache_event_ids 2277 [PERF_COUNT_HW_CACHE_MAX] 2278 [PERF_COUNT_HW_CACHE_OP_MAX] 2279 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2280 [C(L1D)] = { 2281 [C(OP_READ)] = { 2282 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 2283 [C(RESULT_MISS)] = 0x0, 2284 }, 2285 [C(OP_WRITE)] = { 2286 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 2287 [C(RESULT_MISS)] = 0x0, 2288 }, 2289 [C(OP_PREFETCH)] = { 2290 [C(RESULT_ACCESS)] = 0x0, 2291 [C(RESULT_MISS)] = 0x0, 2292 }, 2293 }, 2294 [C(L1I)] = { 2295 [C(OP_READ)] = { 2296 [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ 2297 [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ 2298 }, 2299 [C(OP_WRITE)] = { 2300 [C(RESULT_ACCESS)] = -1, 2301 [C(RESULT_MISS)] = -1, 2302 }, 2303 [C(OP_PREFETCH)] = { 2304 [C(RESULT_ACCESS)] = 0x0, 2305 [C(RESULT_MISS)] = 0x0, 2306 }, 2307 }, 2308 [C(LL)] = { 2309 [C(OP_READ)] = { 2310 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2311 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2312 }, 2313 [C(OP_WRITE)] = { 2314 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2315 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2316 }, 2317 [C(OP_PREFETCH)] = { 2318 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2319 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2320 }, 2321 }, 2322 [C(DTLB)] = { 2323 [C(OP_READ)] = { 2324 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 2325 [C(RESULT_MISS)] = 0x0, 2326 }, 2327 [C(OP_WRITE)] = { 2328 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 2329 [C(RESULT_MISS)] = 0x0, 2330 }, 2331 [C(OP_PREFETCH)] = { 2332 [C(RESULT_ACCESS)] = 0x0, 2333 [C(RESULT_MISS)] = 0x0, 2334 }, 2335 }, 2336 [C(ITLB)] = { 2337 [C(OP_READ)] = { 2338 [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ 2339 [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ 2340 }, 2341 [C(OP_WRITE)] = { 2342 [C(RESULT_ACCESS)] = -1, 2343 [C(RESULT_MISS)] = -1, 2344 }, 2345 [C(OP_PREFETCH)] = { 2346 [C(RESULT_ACCESS)] = -1, 2347 [C(RESULT_MISS)] = -1, 2348 }, 2349 }, 2350 [C(BPU)] = { 2351 [C(OP_READ)] = { 2352 [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 2353 [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 2354 }, 2355 [C(OP_WRITE)] = { 2356 [C(RESULT_ACCESS)] = -1, 2357 [C(RESULT_MISS)] = -1, 2358 }, 2359 [C(OP_PREFETCH)] = { 2360 [C(RESULT_ACCESS)] = -1, 2361 [C(RESULT_MISS)] = -1, 2362 }, 2363 }, 2364 }; 2365 2366 static __initconst const u64 glm_hw_cache_extra_regs 2367 [PERF_COUNT_HW_CACHE_MAX] 2368 [PERF_COUNT_HW_CACHE_OP_MAX] 2369 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2370 [C(LL)] = { 2371 [C(OP_READ)] = { 2372 [C(RESULT_ACCESS)] = GLM_DEMAND_READ| 2373 GLM_LLC_ACCESS, 2374 [C(RESULT_MISS)] = GLM_DEMAND_READ| 2375 GLM_LLC_MISS, 2376 }, 2377 [C(OP_WRITE)] = { 2378 [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| 2379 GLM_LLC_ACCESS, 2380 [C(RESULT_MISS)] = GLM_DEMAND_WRITE| 2381 GLM_LLC_MISS, 2382 }, 2383 [C(OP_PREFETCH)] = { 2384 [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH| 2385 GLM_LLC_ACCESS, 2386 [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH| 2387 GLM_LLC_MISS, 2388 }, 2389 }, 2390 }; 2391 2392 static __initconst const u64 glp_hw_cache_event_ids 2393 [PERF_COUNT_HW_CACHE_MAX] 2394 [PERF_COUNT_HW_CACHE_OP_MAX] 2395 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2396 [C(L1D)] = { 2397 [C(OP_READ)] = { 2398 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 2399 [C(RESULT_MISS)] = 0x0, 2400 }, 2401 [C(OP_WRITE)] = { 2402 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 2403 [C(RESULT_MISS)] = 0x0, 2404 }, 2405 [C(OP_PREFETCH)] = { 2406 [C(RESULT_ACCESS)] = 0x0, 2407 [C(RESULT_MISS)] = 0x0, 2408 }, 2409 }, 2410 [C(L1I)] = { 2411 [C(OP_READ)] = { 2412 [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ 2413 [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ 2414 }, 2415 [C(OP_WRITE)] = { 2416 [C(RESULT_ACCESS)] = -1, 2417 [C(RESULT_MISS)] = -1, 2418 }, 2419 [C(OP_PREFETCH)] = { 2420 [C(RESULT_ACCESS)] = 0x0, 2421 [C(RESULT_MISS)] = 0x0, 2422 }, 2423 }, 2424 [C(LL)] = { 2425 [C(OP_READ)] = { 2426 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2427 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2428 }, 2429 [C(OP_WRITE)] = { 2430 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2431 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 2432 }, 2433 [C(OP_PREFETCH)] = { 2434 [C(RESULT_ACCESS)] = 0x0, 2435 [C(RESULT_MISS)] = 0x0, 2436 }, 2437 }, 2438 [C(DTLB)] = { 2439 [C(OP_READ)] = { 2440 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 2441 [C(RESULT_MISS)] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ 2442 }, 2443 [C(OP_WRITE)] = { 2444 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 2445 [C(RESULT_MISS)] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ 2446 }, 2447 [C(OP_PREFETCH)] = { 2448 [C(RESULT_ACCESS)] = 0x0, 2449 [C(RESULT_MISS)] = 0x0, 2450 }, 2451 }, 2452 [C(ITLB)] = { 2453 [C(OP_READ)] = { 2454 [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ 2455 [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ 2456 }, 2457 [C(OP_WRITE)] = { 2458 [C(RESULT_ACCESS)] = -1, 2459 [C(RESULT_MISS)] = -1, 2460 }, 2461 [C(OP_PREFETCH)] = { 2462 [C(RESULT_ACCESS)] = -1, 2463 [C(RESULT_MISS)] = -1, 2464 }, 2465 }, 2466 [C(BPU)] = { 2467 [C(OP_READ)] = { 2468 [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 2469 [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 2470 }, 2471 [C(OP_WRITE)] = { 2472 [C(RESULT_ACCESS)] = -1, 2473 [C(RESULT_MISS)] = -1, 2474 }, 2475 [C(OP_PREFETCH)] = { 2476 [C(RESULT_ACCESS)] = -1, 2477 [C(RESULT_MISS)] = -1, 2478 }, 2479 }, 2480 }; 2481 2482 static __initconst const u64 glp_hw_cache_extra_regs 2483 [PERF_COUNT_HW_CACHE_MAX] 2484 [PERF_COUNT_HW_CACHE_OP_MAX] 2485 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2486 [C(LL)] = { 2487 [C(OP_READ)] = { 2488 [C(RESULT_ACCESS)] = GLM_DEMAND_READ| 2489 GLM_LLC_ACCESS, 2490 [C(RESULT_MISS)] = GLM_DEMAND_READ| 2491 GLM_LLC_MISS, 2492 }, 2493 [C(OP_WRITE)] = { 2494 [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| 2495 GLM_LLC_ACCESS, 2496 [C(RESULT_MISS)] = GLM_DEMAND_WRITE| 2497 GLM_LLC_MISS, 2498 }, 2499 [C(OP_PREFETCH)] = { 2500 [C(RESULT_ACCESS)] = 0x0, 2501 [C(RESULT_MISS)] = 0x0, 2502 }, 2503 }, 2504 }; 2505 2506 #define TNT_LOCAL_DRAM BIT_ULL(26) 2507 #define TNT_DEMAND_READ GLM_DEMAND_DATA_RD 2508 #define TNT_DEMAND_WRITE GLM_DEMAND_RFO 2509 #define TNT_LLC_ACCESS GLM_ANY_RESPONSE 2510 #define TNT_SNP_ANY (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \ 2511 SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM) 2512 #define TNT_LLC_MISS (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM) 2513 2514 static __initconst const u64 tnt_hw_cache_extra_regs 2515 [PERF_COUNT_HW_CACHE_MAX] 2516 [PERF_COUNT_HW_CACHE_OP_MAX] 2517 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2518 [C(LL)] = { 2519 [C(OP_READ)] = { 2520 [C(RESULT_ACCESS)] = TNT_DEMAND_READ| 2521 TNT_LLC_ACCESS, 2522 [C(RESULT_MISS)] = TNT_DEMAND_READ| 2523 TNT_LLC_MISS, 2524 }, 2525 [C(OP_WRITE)] = { 2526 [C(RESULT_ACCESS)] = TNT_DEMAND_WRITE| 2527 TNT_LLC_ACCESS, 2528 [C(RESULT_MISS)] = TNT_DEMAND_WRITE| 2529 TNT_LLC_MISS, 2530 }, 2531 [C(OP_PREFETCH)] = { 2532 [C(RESULT_ACCESS)] = 0x0, 2533 [C(RESULT_MISS)] = 0x0, 2534 }, 2535 }, 2536 }; 2537 2538 static __initconst const u64 grt_hw_cache_extra_regs 2539 [PERF_COUNT_HW_CACHE_MAX] 2540 [PERF_COUNT_HW_CACHE_OP_MAX] 2541 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2542 [C(LL)] = { 2543 [C(OP_READ)] = { 2544 [C(RESULT_ACCESS)] = 0x10001, /* OCR.DEMAND_DATA_RD.ANY_RESPONSE */ 2545 [C(RESULT_MISS)] = 0x3F84400001, /* OCR.DEMAND_DATA_RD.L3_MISS */ 2546 }, 2547 [C(OP_WRITE)] = { 2548 [C(RESULT_ACCESS)] = 0x10002, /* OCR.DEMAND_RFO.ANY_RESPONSE */ 2549 [C(RESULT_MISS)] = 0x3F84400002, /* OCR.DEMAND_RFO.L3_MISS */ 2550 }, 2551 }, 2552 }; 2553 2554 static __initconst const u64 cmt_hw_cache_extra_regs 2555 [PERF_COUNT_HW_CACHE_MAX] 2556 [PERF_COUNT_HW_CACHE_OP_MAX] 2557 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2558 [C(LL)] = { 2559 [C(OP_READ)] = { 2560 [C(RESULT_ACCESS)] = 0x10001, /* OCR.DEMAND_DATA_RD.ANY_RESPONSE */ 2561 [C(RESULT_MISS)] = 0x3fbfc00001, /* OCR.DEMAND_DATA_RD.L3_MISS */ 2562 }, 2563 [C(OP_WRITE)] = { 2564 [C(RESULT_ACCESS)] = 0x10002, /* OCR.DEMAND_RFO.ANY_RESPONSE */ 2565 [C(RESULT_MISS)] = 0x3fbfc00002, /* OCR.DEMAND_RFO.L3_MISS */ 2566 }, 2567 }, 2568 }; 2569 2570 static __initconst const u64 skt_hw_cache_extra_regs 2571 [PERF_COUNT_HW_CACHE_MAX] 2572 [PERF_COUNT_HW_CACHE_OP_MAX] 2573 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2574 [C(LL)] = { 2575 [C(OP_READ)] = { 2576 [C(RESULT_ACCESS)] = 0x10001, /* OCR.DEMAND_DATA_RD.ANY_RESPONSE */ 2577 [C(RESULT_MISS)] = 0x13FBFC00001, /* OCR.DEMAND_DATA_RD.L3_MISS */ 2578 }, 2579 [C(OP_WRITE)] = { 2580 [C(RESULT_ACCESS)] = 0x10002, /* OCR.DEMAND_RFO.ANY_RESPONSE */ 2581 [C(RESULT_MISS)] = 0x13FBFC00002, /* OCR.DEMAND_RFO.L3_MISS */ 2582 }, 2583 }, 2584 }; 2585 2586 static __initconst const u64 dkt_hw_cache_extra_regs 2587 [PERF_COUNT_HW_CACHE_MAX] 2588 [PERF_COUNT_HW_CACHE_OP_MAX] 2589 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2590 [C(LL)] = { 2591 [C(OP_READ)] = { 2592 [C(RESULT_ACCESS)] = 0x10001, /* OCR.DEMAND_DATA_RD.ANY_RESPONSE */ 2593 [C(RESULT_MISS)] = 0x33FBFC00001, /* OCR.DEMAND_DATA_RD.L3_MISS */ 2594 }, 2595 [C(OP_WRITE)] = { 2596 [C(RESULT_ACCESS)] = 0x10002, /* OCR.DEMAND_RFO.ANY_RESPONSE */ 2597 [C(RESULT_MISS)] = 0x33FBFC00002, /* OCR.DEMAND_RFO.L3_MISS */ 2598 }, 2599 }, 2600 }; 2601 2602 static __initconst const u64 arw_hw_cache_extra_regs 2603 [PERF_COUNT_HW_CACHE_MAX] 2604 [PERF_COUNT_HW_CACHE_OP_MAX] 2605 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2606 [C(LL)] = { 2607 [C(OP_READ)] = { 2608 [C(RESULT_ACCESS)] = 0x4000000000000009, /* OMR.DEMAND_DATA_RD.ANY_RESPONSE */ 2609 [C(RESULT_MISS)] = 0xFF03F000000009, /* OMR.DEMAND_DATA_RD.L3_MISS */ 2610 }, 2611 [C(OP_WRITE)] = { 2612 [C(RESULT_ACCESS)] = 0x400000000000000A, /* OMR.DEMAND_RFO.ANY_RESPONSE */ 2613 [C(RESULT_MISS)] = 0xFF03F00000000A, /* OMR.DEMAND_RFO.L3_MISS */ 2614 }, 2615 }, 2616 }; 2617 2618 EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_tnt, "event=0x71,umask=0x0"); 2619 EVENT_ATTR_STR(topdown-retiring, td_retiring_tnt, "event=0xc2,umask=0x0"); 2620 EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_tnt, "event=0x73,umask=0x6"); 2621 EVENT_ATTR_STR(topdown-be-bound, td_be_bound_tnt, "event=0x74,umask=0x0"); 2622 2623 static struct attribute *tnt_events_attrs[] = { 2624 EVENT_PTR(td_fe_bound_tnt), 2625 EVENT_PTR(td_retiring_tnt), 2626 EVENT_PTR(td_bad_spec_tnt), 2627 EVENT_PTR(td_be_bound_tnt), 2628 NULL, 2629 }; 2630 2631 static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { 2632 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 2633 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0), 2634 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1), 2635 EVENT_EXTRA_END 2636 }; 2637 2638 EVENT_ATTR_STR(mem-loads, mem_ld_grt, "event=0xd0,umask=0x5,ldlat=3"); 2639 EVENT_ATTR_STR(mem-stores, mem_st_grt, "event=0xd0,umask=0x6"); 2640 2641 static struct attribute *grt_mem_attrs[] = { 2642 EVENT_PTR(mem_ld_grt), 2643 EVENT_PTR(mem_st_grt), 2644 NULL 2645 }; 2646 2647 static struct extra_reg intel_grt_extra_regs[] __read_mostly = { 2648 /* 2649 * Must define OFFCORE_RSP_X first, see intel_fixup_er(). 2650 * Bit 63 only valid on OFFCORE_RSP_0 MSR. 2651 */ 2652 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x8003f03fffffffffull, RSP_0), 2653 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x3f03fffffffffull, RSP_1), 2654 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0), 2655 EVENT_EXTRA_END 2656 }; 2657 2658 EVENT_ATTR_STR(topdown-retiring, td_retiring_cmt, "event=0x72,umask=0x0"); 2659 EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_cmt, "event=0x73,umask=0x0"); 2660 2661 static struct attribute *cmt_events_attrs[] = { 2662 EVENT_PTR(td_fe_bound_tnt), 2663 EVENT_PTR(td_retiring_cmt), 2664 EVENT_PTR(td_bad_spec_cmt), 2665 EVENT_PTR(td_be_bound_tnt), 2666 NULL 2667 }; 2668 2669 static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { 2670 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 2671 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0), 2672 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff3ffffffffffull, RSP_1), 2673 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0), 2674 INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0), 2675 INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1), 2676 EVENT_EXTRA_END 2677 }; 2678 2679 static struct extra_reg intel_arw_extra_regs[] __read_mostly = { 2680 /* must define OMR_X first, see intel_alt_er() */ 2681 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OMR_0, 0xc0ffffffffffffffull, OMR_0), 2682 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OMR_1, 0xc0ffffffffffffffull, OMR_1), 2683 INTEL_UEVENT_EXTRA_REG(0x04b7, MSR_OMR_2, 0xc0ffffffffffffffull, OMR_2), 2684 INTEL_UEVENT_EXTRA_REG(0x08b7, MSR_OMR_3, 0xc0ffffffffffffffull, OMR_3), 2685 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0), 2686 INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0), 2687 INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1), 2688 EVENT_EXTRA_END 2689 }; 2690 2691 EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01"); 2692 EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02"); 2693 EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02"); 2694 2695 static struct attribute *skt_events_attrs[] = { 2696 EVENT_PTR(td_fe_bound_skt), 2697 EVENT_PTR(td_retiring_skt), 2698 EVENT_PTR(td_bad_spec_cmt), 2699 EVENT_PTR(td_be_bound_skt), 2700 NULL, 2701 }; 2702 2703 #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ 2704 #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ 2705 #define KNL_MCDRAM_LOCAL BIT_ULL(21) 2706 #define KNL_MCDRAM_FAR BIT_ULL(22) 2707 #define KNL_DDR_LOCAL BIT_ULL(23) 2708 #define KNL_DDR_FAR BIT_ULL(24) 2709 #define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \ 2710 KNL_DDR_LOCAL | KNL_DDR_FAR) 2711 #define KNL_L2_READ SLM_DMND_READ 2712 #define KNL_L2_WRITE SLM_DMND_WRITE 2713 #define KNL_L2_PREFETCH SLM_DMND_PREFETCH 2714 #define KNL_L2_ACCESS SLM_LLC_ACCESS 2715 #define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \ 2716 KNL_DRAM_ANY | SNB_SNP_ANY | \ 2717 SNB_NON_DRAM) 2718 2719 static __initconst const u64 knl_hw_cache_extra_regs 2720 [PERF_COUNT_HW_CACHE_MAX] 2721 [PERF_COUNT_HW_CACHE_OP_MAX] 2722 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2723 [C(LL)] = { 2724 [C(OP_READ)] = { 2725 [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS, 2726 [C(RESULT_MISS)] = 0, 2727 }, 2728 [C(OP_WRITE)] = { 2729 [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS, 2730 [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS, 2731 }, 2732 [C(OP_PREFETCH)] = { 2733 [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS, 2734 [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS, 2735 }, 2736 }, 2737 }; 2738 2739 /* 2740 * Used from PMIs where the LBRs are already disabled. 2741 * 2742 * This function could be called consecutively. It is required to remain in 2743 * disabled state if called consecutively. 2744 * 2745 * During consecutive calls, the same disable value will be written to related 2746 * registers, so the PMU state remains unchanged. 2747 * 2748 * intel_bts events don't coexist with intel PMU's BTS events because of 2749 * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them 2750 * disabled around intel PMU's event batching etc, only inside the PMI handler. 2751 * 2752 * Avoid PEBS_ENABLE MSR access in PMIs. 2753 * The GLOBAL_CTRL has been disabled. All the counters do not count anymore. 2754 * It doesn't matter if the PEBS is enabled or not. 2755 * Usually, the PEBS status are not changed in PMIs. It's unnecessary to 2756 * access PEBS_ENABLE MSR in disable_all()/enable_all(). 2757 * However, there are some cases which may change PEBS status, e.g. PMI 2758 * throttle. The PEBS_ENABLE should be updated where the status changes. 2759 */ 2760 static __always_inline void __intel_pmu_disable_all(bool bts) 2761 { 2762 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2763 2764 wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0); 2765 2766 if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 2767 intel_pmu_disable_bts(); 2768 } 2769 2770 static __always_inline void intel_pmu_disable_all(void) 2771 { 2772 __intel_pmu_disable_all(true); 2773 static_call_cond(x86_pmu_pebs_disable_all)(); 2774 intel_pmu_lbr_disable_all(); 2775 } 2776 2777 static void __intel_pmu_enable_all(int added, bool pmi) 2778 { 2779 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2780 u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); 2781 2782 intel_pmu_lbr_enable_all(pmi); 2783 2784 if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) { 2785 wrmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val); 2786 cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val; 2787 } 2788 2789 wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 2790 intel_ctrl & ~cpuc->intel_ctrl_guest_mask); 2791 2792 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 2793 struct perf_event *event = 2794 cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; 2795 2796 if (WARN_ON_ONCE(!event)) 2797 return; 2798 2799 intel_pmu_enable_bts(event->hw.config); 2800 } 2801 } 2802 2803 static void intel_pmu_enable_all(int added) 2804 { 2805 static_call_cond(x86_pmu_pebs_enable_all)(); 2806 __intel_pmu_enable_all(added, false); 2807 } 2808 2809 static noinline int 2810 __intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, 2811 unsigned int cnt, unsigned long flags) 2812 { 2813 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2814 2815 intel_pmu_lbr_read(); 2816 cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr); 2817 2818 memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt); 2819 intel_pmu_enable_all(0); 2820 local_irq_restore(flags); 2821 return cnt; 2822 } 2823 2824 static int 2825 intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) 2826 { 2827 unsigned long flags; 2828 2829 /* must not have branches... */ 2830 local_irq_save(flags); 2831 __intel_pmu_disable_all(false); /* we don't care about BTS */ 2832 __intel_pmu_lbr_disable(); 2833 /* ... until here */ 2834 return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); 2835 } 2836 2837 static int 2838 intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) 2839 { 2840 unsigned long flags; 2841 2842 /* must not have branches... */ 2843 local_irq_save(flags); 2844 __intel_pmu_disable_all(false); /* we don't care about BTS */ 2845 __intel_pmu_arch_lbr_disable(); 2846 /* ... until here */ 2847 return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); 2848 } 2849 2850 /* 2851 * Workaround for: 2852 * Intel Errata AAK100 (model 26) 2853 * Intel Errata AAP53 (model 30) 2854 * Intel Errata BD53 (model 44) 2855 * 2856 * The official story: 2857 * These chips need to be 'reset' when adding counters by programming the 2858 * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either 2859 * in sequence on the same PMC or on different PMCs. 2860 * 2861 * In practice it appears some of these events do in fact count, and 2862 * we need to program all 4 events. 2863 */ 2864 static void intel_pmu_nhm_workaround(void) 2865 { 2866 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2867 static const unsigned long nhm_magic[4] = { 2868 0x4300B5, 2869 0x4300D2, 2870 0x4300B1, 2871 0x4300B1 2872 }; 2873 struct perf_event *event; 2874 int i; 2875 2876 /* 2877 * The Errata requires below steps: 2878 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; 2879 * 2) Configure 4 PERFEVTSELx with the magic events and clear 2880 * the corresponding PMCx; 2881 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; 2882 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; 2883 * 5) Clear 4 pairs of ERFEVTSELx and PMCx; 2884 */ 2885 2886 /* 2887 * The real steps we choose are a little different from above. 2888 * A) To reduce MSR operations, we don't run step 1) as they 2889 * are already cleared before this function is called; 2890 * B) Call x86_perf_event_update to save PMCx before configuring 2891 * PERFEVTSELx with magic number; 2892 * C) With step 5), we do clear only when the PERFEVTSELx is 2893 * not used currently. 2894 * D) Call x86_perf_event_set_period to restore PMCx; 2895 */ 2896 2897 /* We always operate 4 pairs of PERF Counters */ 2898 for (i = 0; i < 4; i++) { 2899 event = cpuc->events[i]; 2900 if (event) 2901 static_call(x86_pmu_update)(event); 2902 } 2903 2904 for (i = 0; i < 4; i++) { 2905 wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); 2906 wrmsrq(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); 2907 } 2908 2909 wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); 2910 wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); 2911 2912 for (i = 0; i < 4; i++) { 2913 event = cpuc->events[i]; 2914 2915 if (event) { 2916 static_call(x86_pmu_set_period)(event); 2917 __x86_pmu_enable_event(&event->hw, 2918 ARCH_PERFMON_EVENTSEL_ENABLE); 2919 } else 2920 wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); 2921 } 2922 } 2923 2924 static void intel_pmu_nhm_enable_all(int added) 2925 { 2926 if (added) 2927 intel_pmu_nhm_workaround(); 2928 intel_pmu_enable_all(added); 2929 } 2930 2931 static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on) 2932 { 2933 u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0; 2934 2935 if (cpuc->tfa_shadow != val) { 2936 cpuc->tfa_shadow = val; 2937 wrmsrq(MSR_TSX_FORCE_ABORT, val); 2938 } 2939 } 2940 2941 static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) 2942 { 2943 /* 2944 * We're going to use PMC3, make sure TFA is set before we touch it. 2945 */ 2946 if (cntr == 3) 2947 intel_set_tfa(cpuc, true); 2948 } 2949 2950 static void intel_tfa_pmu_enable_all(int added) 2951 { 2952 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2953 2954 /* 2955 * If we find PMC3 is no longer used when we enable the PMU, we can 2956 * clear TFA. 2957 */ 2958 if (!test_bit(3, cpuc->active_mask)) 2959 intel_set_tfa(cpuc, false); 2960 2961 intel_pmu_enable_all(added); 2962 } 2963 2964 static inline u64 intel_pmu_get_status(void) 2965 { 2966 u64 status; 2967 2968 rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status); 2969 2970 return status; 2971 } 2972 2973 static inline void intel_pmu_ack_status(u64 ack) 2974 { 2975 wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 2976 } 2977 2978 static inline bool event_is_checkpointed(struct perf_event *event) 2979 { 2980 return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; 2981 } 2982 2983 static inline void intel_set_masks(struct perf_event *event, int idx) 2984 { 2985 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2986 2987 if (event->attr.exclude_host) 2988 __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask); 2989 if (event->attr.exclude_guest) 2990 __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask); 2991 if (event_is_checkpointed(event)) 2992 __set_bit(idx, (unsigned long *)&cpuc->intel_cp_status); 2993 } 2994 2995 static inline void intel_clear_masks(struct perf_event *event, int idx) 2996 { 2997 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2998 2999 __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask); 3000 __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask); 3001 __clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status); 3002 } 3003 3004 static void intel_pmu_disable_fixed(struct perf_event *event) 3005 { 3006 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3007 struct hw_perf_event *hwc = &event->hw; 3008 int idx = hwc->idx; 3009 u64 mask; 3010 3011 if (is_topdown_idx(idx)) { 3012 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3013 3014 /* 3015 * When there are other active TopDown events, 3016 * don't disable the fixed counter 3. 3017 */ 3018 if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx)) 3019 return; 3020 idx = INTEL_PMC_IDX_FIXED_SLOTS; 3021 } 3022 3023 intel_clear_masks(event, idx); 3024 3025 mask = intel_fixed_bits_by_idx(idx - INTEL_PMC_IDX_FIXED, INTEL_FIXED_BITS_MASK); 3026 cpuc->fixed_ctrl_val &= ~mask; 3027 } 3028 3029 static inline void __intel_pmu_update_event_ext(int idx, u64 ext) 3030 { 3031 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3032 u32 msr; 3033 3034 if (idx < INTEL_PMC_IDX_FIXED) { 3035 msr = MSR_IA32_PMC_V6_GP0_CFG_C + 3036 x86_pmu.addr_offset(idx, false); 3037 } else { 3038 msr = MSR_IA32_PMC_V6_FX0_CFG_C + 3039 x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false); 3040 } 3041 3042 cpuc->cfg_c_val[idx] = ext; 3043 wrmsrq(msr, ext); 3044 } 3045 3046 static void intel_pmu_disable_event_ext(struct perf_event *event) 3047 { 3048 /* 3049 * Only clear CFG_C MSR for PEBS counter group events, 3050 * it avoids the HW counter's value to be added into 3051 * other PEBS records incorrectly after PEBS counter 3052 * group events are disabled. 3053 * 3054 * For other events, it's unnecessary to clear CFG_C MSRs 3055 * since CFG_C doesn't take effect if counter is in 3056 * disabled state. That helps to reduce the WRMSR overhead 3057 * in context switches. 3058 */ 3059 if (!is_pebs_counter_event_group(event)) 3060 return; 3061 3062 __intel_pmu_update_event_ext(event->hw.idx, 0); 3063 } 3064 3065 DEFINE_STATIC_CALL_NULL(intel_pmu_disable_event_ext, intel_pmu_disable_event_ext); 3066 3067 static void intel_pmu_disable_event(struct perf_event *event) 3068 { 3069 struct hw_perf_event *hwc = &event->hw; 3070 int idx = hwc->idx; 3071 3072 switch (idx) { 3073 case 0 ... INTEL_PMC_IDX_FIXED - 1: 3074 intel_clear_masks(event, idx); 3075 static_call_cond(intel_pmu_disable_event_ext)(event); 3076 x86_pmu_disable_event(event); 3077 break; 3078 case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: 3079 static_call_cond(intel_pmu_disable_event_ext)(event); 3080 fallthrough; 3081 case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: 3082 intel_pmu_disable_fixed(event); 3083 break; 3084 case INTEL_PMC_IDX_FIXED_BTS: 3085 intel_pmu_disable_bts(); 3086 intel_pmu_drain_bts_buffer(); 3087 return; 3088 case INTEL_PMC_IDX_FIXED_VLBR: 3089 intel_clear_masks(event, idx); 3090 break; 3091 default: 3092 intel_clear_masks(event, idx); 3093 pr_warn("Failed to disable the event with invalid index %d\n", 3094 idx); 3095 return; 3096 } 3097 3098 /* 3099 * Needs to be called after x86_pmu_disable_event, 3100 * so we don't trigger the event without PEBS bit set. 3101 */ 3102 if (unlikely(event->attr.precise_ip)) 3103 static_call(x86_pmu_pebs_disable)(event); 3104 } 3105 3106 static void intel_pmu_assign_event(struct perf_event *event, int idx) 3107 { 3108 if (is_pebs_pt(event)) 3109 perf_report_aux_output_id(event, idx); 3110 } 3111 3112 static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event) 3113 { 3114 return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK; 3115 } 3116 3117 static void intel_pmu_del_event(struct perf_event *event) 3118 { 3119 if (intel_pmu_needs_branch_stack(event)) 3120 intel_pmu_lbr_del(event); 3121 if (event->attr.precise_ip) 3122 intel_pmu_pebs_del(event); 3123 if (is_pebs_counter_event_group(event) || 3124 is_acr_event_group(event)) 3125 this_cpu_ptr(&cpu_hw_events)->n_late_setup--; 3126 } 3127 3128 static int icl_set_topdown_event_period(struct perf_event *event) 3129 { 3130 struct hw_perf_event *hwc = &event->hw; 3131 s64 left = local64_read(&hwc->period_left); 3132 3133 /* 3134 * The values in PERF_METRICS MSR are derived from fixed counter 3. 3135 * Software should start both registers, PERF_METRICS and fixed 3136 * counter 3, from zero. 3137 * Clear PERF_METRICS and Fixed counter 3 in initialization. 3138 * After that, both MSRs will be cleared for each read. 3139 * Don't need to clear them again. 3140 */ 3141 if (left == x86_pmu.max_period) { 3142 wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0); 3143 wrmsrq(MSR_PERF_METRICS, 0); 3144 hwc->saved_slots = 0; 3145 hwc->saved_metric = 0; 3146 } 3147 3148 if ((hwc->saved_slots) && is_slots_event(event)) { 3149 wrmsrq(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots); 3150 wrmsrq(MSR_PERF_METRICS, hwc->saved_metric); 3151 } 3152 3153 perf_event_update_userpage(event); 3154 3155 return 0; 3156 } 3157 3158 DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period); 3159 3160 static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) 3161 { 3162 u32 val; 3163 3164 /* 3165 * The metric is reported as an 8bit integer fraction 3166 * summing up to 0xff. 3167 * slots-in-metric = (Metric / 0xff) * slots 3168 */ 3169 val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff; 3170 return mul_u64_u32_div(slots, val, 0xff); 3171 } 3172 3173 static u64 icl_get_topdown_value(struct perf_event *event, 3174 u64 slots, u64 metrics) 3175 { 3176 int idx = event->hw.idx; 3177 u64 delta; 3178 3179 if (is_metric_idx(idx)) 3180 delta = icl_get_metrics_event_value(metrics, slots, idx); 3181 else 3182 delta = slots; 3183 3184 return delta; 3185 } 3186 3187 static void __icl_update_topdown_event(struct perf_event *event, 3188 u64 slots, u64 metrics, 3189 u64 last_slots, u64 last_metrics) 3190 { 3191 u64 delta, last = 0; 3192 3193 delta = icl_get_topdown_value(event, slots, metrics); 3194 if (last_slots) 3195 last = icl_get_topdown_value(event, last_slots, last_metrics); 3196 3197 /* 3198 * The 8bit integer fraction of metric may be not accurate, 3199 * especially when the changes is very small. 3200 * For example, if only a few bad_spec happens, the fraction 3201 * may be reduced from 1 to 0. If so, the bad_spec event value 3202 * will be 0 which is definitely less than the last value. 3203 * Avoid update event->count for this case. 3204 */ 3205 if (delta > last) { 3206 delta -= last; 3207 local64_add(delta, &event->count); 3208 } 3209 } 3210 3211 static void update_saved_topdown_regs(struct perf_event *event, u64 slots, 3212 u64 metrics, int metric_end) 3213 { 3214 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3215 struct perf_event *other; 3216 int idx; 3217 3218 event->hw.saved_slots = slots; 3219 event->hw.saved_metric = metrics; 3220 3221 for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { 3222 if (!is_topdown_idx(idx)) 3223 continue; 3224 other = cpuc->events[idx]; 3225 other->hw.saved_slots = slots; 3226 other->hw.saved_metric = metrics; 3227 } 3228 } 3229 3230 /* 3231 * Update all active Topdown events. 3232 * 3233 * The PERF_METRICS and Fixed counter 3 are read separately. The values may be 3234 * modify by a NMI. PMU has to be disabled before calling this function. 3235 */ 3236 3237 static u64 intel_update_topdown_event(struct perf_event *event, int metric_end, u64 *val) 3238 { 3239 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3240 struct perf_event *other; 3241 u64 slots, metrics; 3242 bool reset = true; 3243 int idx; 3244 3245 if (!val) { 3246 /* read Fixed counter 3 */ 3247 slots = rdpmc(3 | INTEL_PMC_FIXED_RDPMC_BASE); 3248 if (!slots) 3249 return 0; 3250 3251 /* read PERF_METRICS */ 3252 metrics = rdpmc(INTEL_PMC_FIXED_RDPMC_METRICS); 3253 } else { 3254 slots = val[0]; 3255 metrics = val[1]; 3256 /* 3257 * Don't reset the PERF_METRICS and Fixed counter 3 3258 * for each PEBS record read. Utilize the RDPMC metrics 3259 * clear mode. 3260 */ 3261 reset = false; 3262 } 3263 3264 for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { 3265 if (!is_topdown_idx(idx)) 3266 continue; 3267 other = cpuc->events[idx]; 3268 __icl_update_topdown_event(other, slots, metrics, 3269 event ? event->hw.saved_slots : 0, 3270 event ? event->hw.saved_metric : 0); 3271 } 3272 3273 /* 3274 * Check and update this event, which may have been cleared 3275 * in active_mask e.g. x86_pmu_stop() 3276 */ 3277 if (event && !test_bit(event->hw.idx, cpuc->active_mask)) { 3278 __icl_update_topdown_event(event, slots, metrics, 3279 event->hw.saved_slots, 3280 event->hw.saved_metric); 3281 3282 /* 3283 * In x86_pmu_stop(), the event is cleared in active_mask first, 3284 * then drain the delta, which indicates context switch for 3285 * counting. 3286 * Save metric and slots for context switch. 3287 * Don't need to reset the PERF_METRICS and Fixed counter 3. 3288 * Because the values will be restored in next schedule in. 3289 */ 3290 update_saved_topdown_regs(event, slots, metrics, metric_end); 3291 reset = false; 3292 } 3293 3294 if (reset) { 3295 /* The fixed counter 3 has to be written before the PERF_METRICS. */ 3296 wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0); 3297 wrmsrq(MSR_PERF_METRICS, 0); 3298 if (event) 3299 update_saved_topdown_regs(event, 0, 0, metric_end); 3300 } 3301 3302 return slots; 3303 } 3304 3305 static u64 icl_update_topdown_event(struct perf_event *event, u64 *val) 3306 { 3307 return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE + 3308 x86_pmu.num_topdown_events - 1, 3309 val); 3310 } 3311 3312 DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update); 3313 3314 static void intel_pmu_read_event(struct perf_event *event) 3315 { 3316 if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN) || 3317 is_pebs_counter_event_group(event)) { 3318 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3319 bool pmu_enabled = cpuc->enabled; 3320 3321 /* Only need to call update_topdown_event() once for group read. */ 3322 if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ)) 3323 return; 3324 3325 cpuc->enabled = 0; 3326 if (pmu_enabled) 3327 intel_pmu_disable_all(); 3328 3329 /* 3330 * If the PEBS counters snapshotting is enabled, 3331 * the topdown event is available in PEBS records. 3332 */ 3333 if (is_topdown_count(event) && !is_pebs_counter_event_group(event)) 3334 static_call(intel_pmu_update_topdown_event)(event, NULL); 3335 else 3336 intel_pmu_drain_pebs_buffer(); 3337 3338 cpuc->enabled = pmu_enabled; 3339 if (pmu_enabled) 3340 intel_pmu_enable_all(0); 3341 3342 return; 3343 } 3344 3345 x86_perf_event_update(event); 3346 } 3347 3348 static void intel_pmu_enable_fixed(struct perf_event *event) 3349 { 3350 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3351 struct hw_perf_event *hwc = &event->hw; 3352 int idx = hwc->idx; 3353 u64 bits = 0; 3354 3355 if (is_topdown_idx(idx)) { 3356 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3357 /* 3358 * When there are other active TopDown events, 3359 * don't enable the fixed counter 3 again. 3360 */ 3361 if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx)) 3362 return; 3363 3364 idx = INTEL_PMC_IDX_FIXED_SLOTS; 3365 3366 if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR) 3367 bits |= INTEL_FIXED_3_METRICS_CLEAR; 3368 } 3369 3370 intel_set_masks(event, idx); 3371 3372 /* 3373 * Enable IRQ generation (0x8), if not PEBS or self-reloaded 3374 * ACR event, and enable ring-3 counting (0x2) and ring-0 3375 * counting (0x1) if requested: 3376 */ 3377 if (!event->attr.precise_ip && !is_acr_self_reload_event(event)) 3378 bits |= INTEL_FIXED_0_ENABLE_PMI; 3379 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) 3380 bits |= INTEL_FIXED_0_USER; 3381 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 3382 bits |= INTEL_FIXED_0_KERNEL; 3383 if (hwc->config & ARCH_PERFMON_EVENTSEL_RDPMC_USER_DISABLE) 3384 bits |= INTEL_FIXED_0_RDPMC_USER_DISABLE; 3385 3386 /* 3387 * ANY bit is supported in v3 and up 3388 */ 3389 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) 3390 bits |= INTEL_FIXED_0_ANYTHREAD; 3391 3392 idx -= INTEL_PMC_IDX_FIXED; 3393 bits = intel_fixed_bits_by_idx(idx, bits); 3394 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) 3395 bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE); 3396 3397 cpuc->fixed_ctrl_val &= ~intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK); 3398 cpuc->fixed_ctrl_val |= bits; 3399 } 3400 3401 static void intel_pmu_config_acr(int idx, u64 mask, u32 reload) 3402 { 3403 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3404 int msr_b, msr_c; 3405 int msr_offset; 3406 3407 if (!mask && !cpuc->acr_cfg_b[idx]) 3408 return; 3409 3410 if (idx < INTEL_PMC_IDX_FIXED) { 3411 msr_b = MSR_IA32_PMC_V6_GP0_CFG_B; 3412 msr_c = MSR_IA32_PMC_V6_GP0_CFG_C; 3413 msr_offset = x86_pmu.addr_offset(idx, false); 3414 } else { 3415 msr_b = MSR_IA32_PMC_V6_FX0_CFG_B; 3416 msr_c = MSR_IA32_PMC_V6_FX0_CFG_C; 3417 msr_offset = x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false); 3418 } 3419 3420 if (cpuc->acr_cfg_b[idx] != mask) { 3421 wrmsrq(msr_b + msr_offset, mask); 3422 cpuc->acr_cfg_b[idx] = mask; 3423 } 3424 /* Only update CFG_C reload when ACR is actively enabled (mask != 0) */ 3425 if (mask && ((cpuc->cfg_c_val[idx] & ARCH_PEBS_RELOAD) != reload)) { 3426 wrmsrq(msr_c + msr_offset, reload); 3427 cpuc->cfg_c_val[idx] = reload; 3428 } 3429 } 3430 3431 static void intel_pmu_enable_acr(struct perf_event *event) 3432 { 3433 struct hw_perf_event *hwc = &event->hw; 3434 3435 if (!is_acr_event_group(event) || !event->attr.config2) { 3436 /* 3437 * The disable doesn't clear the ACR CFG register. 3438 * Check and clear the ACR CFG register. 3439 */ 3440 intel_pmu_config_acr(hwc->idx, 0, 0); 3441 return; 3442 } 3443 3444 intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period); 3445 } 3446 3447 DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr); 3448 3449 static void intel_pmu_enable_event_ext(struct perf_event *event) 3450 { 3451 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3452 struct hw_perf_event *hwc = &event->hw; 3453 u64 ext = 0; 3454 3455 if (is_acr_event_group(event)) 3456 ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD; 3457 3458 if (event->attr.precise_ip) { 3459 u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event); 3460 struct arch_pebs_cap cap = hybrid(cpuc->pmu, arch_pebs_cap); 3461 union arch_pebs_index old, new; 3462 3463 ext |= ARCH_PEBS_EN; 3464 if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) 3465 ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD; 3466 3467 if (pebs_data_cfg && cap.caps) { 3468 if (pebs_data_cfg & PEBS_DATACFG_MEMINFO) 3469 ext |= ARCH_PEBS_AUX & cap.caps; 3470 3471 if (pebs_data_cfg & PEBS_DATACFG_GP) 3472 ext |= ARCH_PEBS_GPR & cap.caps; 3473 3474 if (pebs_data_cfg & PEBS_DATACFG_XMMS) 3475 ext |= ARCH_PEBS_VECR_XMM & cap.caps; 3476 3477 if (pebs_data_cfg & PEBS_DATACFG_LBRS) 3478 ext |= ARCH_PEBS_LBR & cap.caps; 3479 3480 if (pebs_data_cfg & 3481 (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT)) 3482 ext |= ARCH_PEBS_CNTR_GP & cap.caps; 3483 3484 if (pebs_data_cfg & 3485 (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT)) 3486 ext |= ARCH_PEBS_CNTR_FIXED & cap.caps; 3487 3488 if (pebs_data_cfg & PEBS_DATACFG_METRICS) 3489 ext |= ARCH_PEBS_CNTR_METRICS & cap.caps; 3490 } 3491 3492 if (cpuc->n_pebs == cpuc->n_large_pebs) 3493 new.thresh = ARCH_PEBS_THRESH_MULTI; 3494 else 3495 new.thresh = ARCH_PEBS_THRESH_SINGLE; 3496 3497 rdmsrq(MSR_IA32_PEBS_INDEX, old.whole); 3498 if (new.thresh != old.thresh || !old.en) { 3499 if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) { 3500 /* 3501 * Large PEBS was enabled. 3502 * Drain PEBS buffer before applying the single PEBS. 3503 */ 3504 intel_pmu_drain_pebs_buffer(); 3505 } else { 3506 new.wr = 0; 3507 new.full = 0; 3508 new.en = 1; 3509 wrmsrq(MSR_IA32_PEBS_INDEX, new.whole); 3510 } 3511 } 3512 } 3513 3514 if (is_pebs_counter_event_group(event)) 3515 ext |= ARCH_PEBS_CNTR_ALLOW; 3516 3517 if (cpuc->cfg_c_val[hwc->idx] != ext) 3518 __intel_pmu_update_event_ext(hwc->idx, ext); 3519 } 3520 3521 static void intel_pmu_update_rdpmc_user_disable(struct perf_event *event) 3522 { 3523 if (!x86_pmu_has_rdpmc_user_disable(event->pmu)) 3524 return; 3525 3526 /* 3527 * Counter scope's user-space rdpmc is disabled by default 3528 * except two cases. 3529 * a. rdpmc = 2 (user space rdpmc enabled unconditionally) 3530 * b. rdpmc = 1 and the event is not a system-wide event. 3531 * The count of non-system-wide events would be cleared when 3532 * context switches, so no count data is leaked. 3533 */ 3534 if (x86_pmu.attr_rdpmc == X86_USER_RDPMC_ALWAYS_ENABLE || 3535 (x86_pmu.attr_rdpmc == X86_USER_RDPMC_CONDITIONAL_ENABLE && 3536 event->ctx->task)) 3537 event->hw.config &= ~ARCH_PERFMON_EVENTSEL_RDPMC_USER_DISABLE; 3538 else 3539 event->hw.config |= ARCH_PERFMON_EVENTSEL_RDPMC_USER_DISABLE; 3540 } 3541 3542 DEFINE_STATIC_CALL_NULL(intel_pmu_enable_event_ext, intel_pmu_enable_event_ext); 3543 3544 static void intel_pmu_enable_event(struct perf_event *event) 3545 { 3546 u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE; 3547 struct hw_perf_event *hwc = &event->hw; 3548 int idx = hwc->idx; 3549 3550 intel_pmu_update_rdpmc_user_disable(event); 3551 3552 if (unlikely(event->attr.precise_ip)) 3553 static_call(x86_pmu_pebs_enable)(event); 3554 3555 switch (idx) { 3556 case 0 ... INTEL_PMC_IDX_FIXED - 1: 3557 if (branch_sample_counters(event)) 3558 enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR; 3559 intel_set_masks(event, idx); 3560 static_call_cond(intel_pmu_enable_acr_event)(event); 3561 static_call_cond(intel_pmu_enable_event_ext)(event); 3562 /* 3563 * For self-reloaded ACR event, don't enable PMI since 3564 * HW won't set overflow bit in GLOBAL_STATUS. Otherwise, 3565 * the PMI would be recognized as a suspicious NMI. 3566 */ 3567 if (is_acr_self_reload_event(event)) 3568 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 3569 else if (!event->attr.precise_ip) 3570 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 3571 __x86_pmu_enable_event(hwc, enable_mask); 3572 break; 3573 case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: 3574 static_call_cond(intel_pmu_enable_acr_event)(event); 3575 static_call_cond(intel_pmu_enable_event_ext)(event); 3576 fallthrough; 3577 case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: 3578 intel_pmu_enable_fixed(event); 3579 break; 3580 case INTEL_PMC_IDX_FIXED_BTS: 3581 if (!__this_cpu_read(cpu_hw_events.enabled)) 3582 return; 3583 intel_pmu_enable_bts(hwc->config); 3584 break; 3585 case INTEL_PMC_IDX_FIXED_VLBR: 3586 intel_set_masks(event, idx); 3587 break; 3588 default: 3589 pr_warn("Failed to enable the event with invalid index %d\n", 3590 idx); 3591 } 3592 } 3593 3594 static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc) 3595 { 3596 struct perf_event *event, *leader; 3597 int i, j, k, bit, idx; 3598 3599 /* 3600 * FIXME: ACR mask parsing relies on cpuc->event_list[] (active events only). 3601 * Disabling an ACR event causes bit-shifting errors in the acr_mask of 3602 * remaining group members. As ACR sampling requires all events to be active, 3603 * this limitation is acceptable for now. Revisit if independent event toggling 3604 * is required. 3605 */ 3606 for (i = 0; i < cpuc->n_events; i++) { 3607 leader = cpuc->event_list[i]; 3608 if (!is_acr_event_group(leader)) 3609 continue; 3610 3611 /* Find the last event of the ACR group. */ 3612 for (j = i; j < cpuc->n_events; j++) { 3613 event = cpuc->event_list[j]; 3614 if (event->group_leader != leader->group_leader) 3615 break; 3616 } 3617 3618 /* 3619 * Translate the user-space ACR mask (attr.config2) into the physical 3620 * counter bitmask (hw.config1) for each ACR event in the group. 3621 * NOTE: ACR event contiguity is guaranteed by intel_pmu_hw_config(). 3622 */ 3623 for (k = i; k < j; k++) { 3624 event = cpuc->event_list[k]; 3625 event->hw.config1 = 0; 3626 for_each_set_bit(bit, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { 3627 idx = i + bit; 3628 /* Event index of ACR group must locate in [i, j). */ 3629 if (idx >= j || !is_acr_event_group(cpuc->event_list[idx])) 3630 continue; 3631 __set_bit(cpuc->assign[idx], (unsigned long *)&event->hw.config1); 3632 } 3633 } 3634 i = j - 1; 3635 } 3636 } 3637 3638 void intel_pmu_late_setup(void) 3639 { 3640 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3641 3642 if (!cpuc->n_late_setup) 3643 return; 3644 3645 intel_pmu_pebs_late_setup(cpuc); 3646 intel_pmu_acr_late_setup(cpuc); 3647 } 3648 3649 static void intel_pmu_add_event(struct perf_event *event) 3650 { 3651 if (event->attr.precise_ip) 3652 intel_pmu_pebs_add(event); 3653 if (intel_pmu_needs_branch_stack(event)) 3654 intel_pmu_lbr_add(event); 3655 if (is_pebs_counter_event_group(event) || 3656 is_acr_event_group(event)) 3657 this_cpu_ptr(&cpu_hw_events)->n_late_setup++; 3658 } 3659 3660 /* 3661 * Save and restart an expired event. Called by NMI contexts, 3662 * so it has to be careful about preempting normal event ops: 3663 */ 3664 int intel_pmu_save_and_restart(struct perf_event *event) 3665 { 3666 static_call(x86_pmu_update)(event); 3667 /* 3668 * For a checkpointed counter always reset back to 0. This 3669 * avoids a situation where the counter overflows, aborts the 3670 * transaction and is then set back to shortly before the 3671 * overflow, and overflows and aborts again. 3672 */ 3673 if (unlikely(event_is_checkpointed(event))) { 3674 /* No race with NMIs because the counter should not be armed */ 3675 wrmsrq(event->hw.event_base, 0); 3676 local64_set(&event->hw.prev_count, 0); 3677 } 3678 return static_call(x86_pmu_set_period)(event); 3679 } 3680 3681 static int intel_pmu_set_period(struct perf_event *event) 3682 { 3683 if (unlikely(is_topdown_count(event))) 3684 return static_call(intel_pmu_set_topdown_event_period)(event); 3685 3686 return x86_perf_event_set_period(event); 3687 } 3688 3689 static u64 intel_pmu_update(struct perf_event *event) 3690 { 3691 if (unlikely(is_topdown_count(event))) 3692 return static_call(intel_pmu_update_topdown_event)(event, NULL); 3693 3694 return x86_perf_event_update(event); 3695 } 3696 3697 static void intel_pmu_reset(void) 3698 { 3699 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); 3700 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3701 unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask); 3702 unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); 3703 unsigned long flags; 3704 int idx; 3705 3706 if (!*(u64 *)cntr_mask) 3707 return; 3708 3709 local_irq_save(flags); 3710 3711 pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); 3712 3713 for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) { 3714 wrmsrq_safe(x86_pmu_config_addr(idx), 0ull); 3715 wrmsrq_safe(x86_pmu_event_addr(idx), 0ull); 3716 } 3717 for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) { 3718 if (fixed_counter_disabled(idx, cpuc->pmu)) 3719 continue; 3720 wrmsrq_safe(x86_pmu_fixed_ctr_addr(idx), 0ull); 3721 } 3722 3723 if (ds) 3724 ds->bts_index = ds->bts_buffer_base; 3725 3726 /* Ack all overflows and disable fixed counters */ 3727 if (x86_pmu.version >= 2) { 3728 intel_pmu_ack_status(intel_pmu_get_status()); 3729 wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0); 3730 } 3731 3732 /* Reset LBRs and LBR freezing */ 3733 if (x86_pmu.lbr_nr) { 3734 update_debugctlmsr(get_debugctlmsr() & 3735 ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR)); 3736 } 3737 3738 local_irq_restore(flags); 3739 } 3740 3741 /* 3742 * We may be running with guest PEBS events created by KVM, and the 3743 * PEBS records are logged into the guest's DS and invisible to host. 3744 * 3745 * In the case of guest PEBS overflow, we only trigger a fake event 3746 * to emulate the PEBS overflow PMI for guest PEBS counters in KVM. 3747 * The guest will then vm-entry and check the guest DS area to read 3748 * the guest PEBS records. 3749 * 3750 * The contents and other behavior of the guest event do not matter. 3751 */ 3752 static void x86_pmu_handle_guest_pebs(struct pt_regs *regs, 3753 struct perf_sample_data *data) 3754 { 3755 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3756 u64 guest_pebs_idxs = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask; 3757 struct perf_event *event = NULL; 3758 int bit; 3759 3760 if (!unlikely(perf_guest_state())) 3761 return; 3762 3763 if (!x86_pmu.pebs_ept || !x86_pmu.pebs_active || 3764 !guest_pebs_idxs) 3765 return; 3766 3767 for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) { 3768 event = cpuc->events[bit]; 3769 if (!event->attr.precise_ip) 3770 continue; 3771 3772 perf_sample_data_init(data, 0, event->hw.last_period); 3773 perf_event_overflow(event, data, regs); 3774 3775 /* Inject one fake event is enough. */ 3776 break; 3777 } 3778 } 3779 3780 static int handle_pmi_common(struct pt_regs *regs, u64 status) 3781 { 3782 struct perf_sample_data data; 3783 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3784 int bit; 3785 int handled = 0; 3786 3787 inc_perf_irq_stat(); 3788 3789 /* 3790 * Ignore a range of extra bits in status that do not indicate 3791 * overflow by themselves. 3792 */ 3793 status &= ~(GLOBAL_STATUS_COND_CHG | 3794 GLOBAL_STATUS_ASIF | 3795 GLOBAL_STATUS_LBRS_FROZEN); 3796 if (!status) 3797 return 0; 3798 /* 3799 * In case multiple PEBS events are sampled at the same time, 3800 * it is possible to have GLOBAL_STATUS bit 62 set indicating 3801 * PEBS buffer overflow and also seeing at most 3 PEBS counters 3802 * having their bits set in the status register. This is a sign 3803 * that there was at least one PEBS record pending at the time 3804 * of the PMU interrupt. PEBS counters must only be processed 3805 * via the drain_pebs() calls and not via the regular sample 3806 * processing loop coming after that the function, otherwise 3807 * phony regular samples may be generated in the sampling buffer 3808 * not marked with the EXACT tag. Another possibility is to have 3809 * one PEBS event and at least one non-PEBS event which overflows 3810 * while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will 3811 * not be set, yet the overflow status bit for the PEBS counter will 3812 * be on Skylake. 3813 * 3814 * To avoid this problem, we systematically ignore the PEBS-enabled 3815 * counters from the GLOBAL_STATUS mask and we always process PEBS 3816 * events via drain_pebs(). 3817 */ 3818 status &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable); 3819 3820 /* 3821 * PEBS overflow sets bit 62 in the global status register 3822 */ 3823 if (__test_and_clear_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, (unsigned long *)&status)) { 3824 u64 pebs_enabled = cpuc->pebs_enabled; 3825 3826 handled++; 3827 x86_pmu_handle_guest_pebs(regs, &data); 3828 static_call(x86_pmu_drain_pebs)(regs, &data); 3829 3830 /* 3831 * PMI throttle may be triggered, which stops the PEBS event. 3832 * Although cpuc->pebs_enabled is updated accordingly, the 3833 * MSR_IA32_PEBS_ENABLE is not updated. Because the 3834 * cpuc->enabled has been forced to 0 in PMI. 3835 * Update the MSR if pebs_enabled is changed. 3836 */ 3837 if (pebs_enabled != cpuc->pebs_enabled) 3838 wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 3839 3840 /* 3841 * Above PEBS handler (PEBS counters snapshotting) has updated fixed 3842 * counter 3 and perf metrics counts if they are in counter group, 3843 * unnecessary to update again. 3844 */ 3845 if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] && 3846 is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS])) 3847 status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT; 3848 } 3849 3850 /* 3851 * Arch PEBS sets bit 54 in the global status register 3852 */ 3853 if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT, 3854 (unsigned long *)&status)) { 3855 handled++; 3856 static_call(x86_pmu_drain_pebs)(regs, &data); 3857 3858 if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] && 3859 is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS])) 3860 status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT; 3861 } 3862 3863 /* 3864 * Intel PT 3865 */ 3866 if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) { 3867 handled++; 3868 if (!perf_guest_handle_intel_pt_intr()) 3869 intel_pt_interrupt(); 3870 } 3871 3872 /* 3873 * Intel Perf metrics 3874 */ 3875 if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) { 3876 handled++; 3877 static_call(intel_pmu_update_topdown_event)(NULL, NULL); 3878 } 3879 3880 status &= hybrid(cpuc->pmu, intel_ctrl); 3881 3882 /* 3883 * Checkpointed counters can lead to 'spurious' PMIs because the 3884 * rollback caused by the PMI will have cleared the overflow status 3885 * bit. Therefore always force probe these counters. 3886 */ 3887 status |= cpuc->intel_cp_status; 3888 3889 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 3890 struct perf_event *event = cpuc->events[bit]; 3891 u64 last_period; 3892 3893 handled++; 3894 3895 if (!test_bit(bit, cpuc->active_mask)) 3896 continue; 3897 /* Event may have already been cleared: */ 3898 if (!event) 3899 continue; 3900 3901 /* 3902 * There may be unprocessed PEBS records in the PEBS buffer, 3903 * which still stores the previous values. 3904 * Process those records first before handling the latest value. 3905 * For example, 3906 * A is a regular counter 3907 * B is a PEBS event which reads A 3908 * C is a PEBS event 3909 * 3910 * The following can happen: 3911 * B-assist A=1 3912 * C A=2 3913 * B-assist A=3 3914 * A-overflow-PMI A=4 3915 * C-assist-PMI (PEBS buffer) A=5 3916 * 3917 * The PEBS buffer has to be drained before handling the A-PMI 3918 */ 3919 if (is_pebs_counter_event_group(event)) 3920 static_call(x86_pmu_drain_pebs)(regs, &data); 3921 3922 last_period = event->hw.last_period; 3923 3924 if (!intel_pmu_save_and_restart(event)) 3925 continue; 3926 3927 perf_sample_data_init(&data, 0, last_period); 3928 3929 if (has_branch_stack(event)) 3930 intel_pmu_lbr_save_brstack(&data, cpuc, event); 3931 3932 perf_event_overflow(event, &data, regs); 3933 } 3934 3935 return handled; 3936 } 3937 3938 /* 3939 * This handler is triggered by the local APIC, so the APIC IRQ handling 3940 * rules apply: 3941 */ 3942 static int intel_pmu_handle_irq(struct pt_regs *regs) 3943 { 3944 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 3945 bool late_ack = hybrid_bit(cpuc->pmu, late_ack); 3946 bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack); 3947 int loops; 3948 u64 status; 3949 int handled; 3950 int pmu_enabled; 3951 3952 /* 3953 * Save the PMU state. 3954 * It needs to be restored when leaving the handler. 3955 */ 3956 pmu_enabled = cpuc->enabled; 3957 /* 3958 * In general, the early ACK is only applied for old platforms. 3959 * For the big core starts from Haswell, the late ACK should be 3960 * applied. 3961 * For the small core after Tremont, we have to do the ACK right 3962 * before re-enabling counters, which is in the middle of the 3963 * NMI handler. 3964 */ 3965 if (!late_ack && !mid_ack) 3966 apic_write(APIC_LVTPC, APIC_DM_NMI); 3967 intel_bts_disable_local(); 3968 cpuc->enabled = 0; 3969 __intel_pmu_disable_all(true); 3970 handled = intel_pmu_drain_bts_buffer(); 3971 handled += intel_bts_interrupt(); 3972 status = intel_pmu_get_status(); 3973 if (!status) 3974 goto done; 3975 3976 loops = 0; 3977 again: 3978 intel_pmu_lbr_read(); 3979 intel_pmu_ack_status(status); 3980 if (++loops > 100) { 3981 static bool warned; 3982 3983 if (!warned) { 3984 WARN(1, "perfevents: irq loop stuck!\n"); 3985 perf_event_print_debug(); 3986 warned = true; 3987 } 3988 intel_pmu_reset(); 3989 goto done; 3990 } 3991 3992 handled += handle_pmi_common(regs, status); 3993 3994 /* 3995 * Repeat if there is more work to be done: 3996 */ 3997 status = intel_pmu_get_status(); 3998 if (status) 3999 goto again; 4000 4001 done: 4002 if (mid_ack) 4003 apic_write(APIC_LVTPC, APIC_DM_NMI); 4004 /* Only restore PMU state when it's active. See x86_pmu_disable(). */ 4005 cpuc->enabled = pmu_enabled; 4006 if (pmu_enabled) 4007 __intel_pmu_enable_all(0, true); 4008 intel_bts_enable_local(); 4009 4010 /* 4011 * Only unmask the NMI after the overflow counters 4012 * have been reset. This avoids spurious NMIs on 4013 * Haswell CPUs. 4014 */ 4015 if (late_ack) 4016 apic_write(APIC_LVTPC, APIC_DM_NMI); 4017 return handled; 4018 } 4019 4020 static struct event_constraint * 4021 intel_bts_constraints(struct perf_event *event) 4022 { 4023 if (unlikely(intel_pmu_has_bts(event))) 4024 return &bts_constraint; 4025 4026 return NULL; 4027 } 4028 4029 /* 4030 * Note: matches a fake event, like Fixed2. 4031 */ 4032 static struct event_constraint * 4033 intel_vlbr_constraints(struct perf_event *event) 4034 { 4035 struct event_constraint *c = &vlbr_constraint; 4036 4037 if (unlikely(constraint_match(c, event->hw.config))) { 4038 event->hw.flags |= c->flags; 4039 return c; 4040 } 4041 4042 return NULL; 4043 } 4044 4045 static int intel_alt_er(struct cpu_hw_events *cpuc, 4046 int idx, u64 config) 4047 { 4048 struct extra_reg *extra_regs = hybrid(cpuc->pmu, extra_regs); 4049 int alt_idx = idx; 4050 4051 switch (idx) { 4052 case EXTRA_REG_RSP_0 ... EXTRA_REG_RSP_1: 4053 if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) 4054 return idx; 4055 if (++alt_idx > EXTRA_REG_RSP_1) 4056 alt_idx = EXTRA_REG_RSP_0; 4057 if (config & ~extra_regs[alt_idx].valid_mask) 4058 return idx; 4059 break; 4060 4061 case EXTRA_REG_OMR_0 ... EXTRA_REG_OMR_3: 4062 if (!(x86_pmu.flags & PMU_FL_HAS_OMR)) 4063 return idx; 4064 if (++alt_idx > EXTRA_REG_OMR_3) 4065 alt_idx = EXTRA_REG_OMR_0; 4066 /* 4067 * Subtracting EXTRA_REG_OMR_0 ensures to get correct 4068 * OMR extra_reg entries which start from 0. 4069 */ 4070 if (config & ~extra_regs[alt_idx - EXTRA_REG_OMR_0].valid_mask) 4071 return idx; 4072 break; 4073 4074 default: 4075 break; 4076 } 4077 4078 return alt_idx; 4079 } 4080 4081 static void intel_fixup_er(struct perf_event *event, int idx) 4082 { 4083 struct extra_reg *extra_regs = hybrid(event->pmu, extra_regs); 4084 int er_idx; 4085 4086 event->hw.extra_reg.idx = idx; 4087 switch (idx) { 4088 case EXTRA_REG_RSP_0 ... EXTRA_REG_RSP_1: 4089 er_idx = idx - EXTRA_REG_RSP_0; 4090 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 4091 event->hw.config |= extra_regs[er_idx].event; 4092 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0 + er_idx; 4093 break; 4094 4095 case EXTRA_REG_OMR_0 ... EXTRA_REG_OMR_3: 4096 er_idx = idx - EXTRA_REG_OMR_0; 4097 event->hw.config &= ~ARCH_PERFMON_EVENTSEL_UMASK; 4098 event->hw.config |= 1ULL << (8 + er_idx); 4099 event->hw.extra_reg.reg = MSR_OMR_0 + er_idx; 4100 break; 4101 4102 default: 4103 pr_warn("The extra reg idx %d is not supported.\n", idx); 4104 } 4105 } 4106 4107 /* 4108 * manage allocation of shared extra msr for certain events 4109 * 4110 * sharing can be: 4111 * per-cpu: to be shared between the various events on a single PMU 4112 * per-core: per-cpu + shared by HT threads 4113 */ 4114 static struct event_constraint * 4115 __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, 4116 struct perf_event *event, 4117 struct hw_perf_event_extra *reg) 4118 { 4119 struct event_constraint *c = &emptyconstraint; 4120 struct er_account *era; 4121 unsigned long flags; 4122 int idx = reg->idx; 4123 4124 /* 4125 * reg->alloc can be set due to existing state, so for fake cpuc we 4126 * need to ignore this, otherwise we might fail to allocate proper fake 4127 * state for this extra reg constraint. Also see the comment below. 4128 */ 4129 if (reg->alloc && !cpuc->is_fake) 4130 return NULL; /* call x86_get_event_constraint() */ 4131 4132 again: 4133 era = &cpuc->shared_regs->regs[idx]; 4134 /* 4135 * we use spin_lock_irqsave() to avoid lockdep issues when 4136 * passing a fake cpuc 4137 */ 4138 raw_spin_lock_irqsave(&era->lock, flags); 4139 4140 if (!atomic_read(&era->ref) || era->config == reg->config) { 4141 4142 /* 4143 * If its a fake cpuc -- as per validate_{group,event}() we 4144 * shouldn't touch event state and we can avoid doing so 4145 * since both will only call get_event_constraints() once 4146 * on each event, this avoids the need for reg->alloc. 4147 * 4148 * Not doing the ER fixup will only result in era->reg being 4149 * wrong, but since we won't actually try and program hardware 4150 * this isn't a problem either. 4151 */ 4152 if (!cpuc->is_fake) { 4153 if (idx != reg->idx) 4154 intel_fixup_er(event, idx); 4155 4156 /* 4157 * x86_schedule_events() can call get_event_constraints() 4158 * multiple times on events in the case of incremental 4159 * scheduling(). reg->alloc ensures we only do the ER 4160 * allocation once. 4161 */ 4162 reg->alloc = 1; 4163 } 4164 4165 /* lock in msr value */ 4166 era->config = reg->config; 4167 era->reg = reg->reg; 4168 4169 /* one more user */ 4170 atomic_inc(&era->ref); 4171 4172 /* 4173 * need to call x86_get_event_constraint() 4174 * to check if associated event has constraints 4175 */ 4176 c = NULL; 4177 } else { 4178 idx = intel_alt_er(cpuc, idx, reg->config); 4179 if (idx != reg->idx) { 4180 raw_spin_unlock_irqrestore(&era->lock, flags); 4181 goto again; 4182 } 4183 } 4184 raw_spin_unlock_irqrestore(&era->lock, flags); 4185 4186 return c; 4187 } 4188 4189 static void 4190 __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, 4191 struct hw_perf_event_extra *reg) 4192 { 4193 struct er_account *era; 4194 4195 /* 4196 * Only put constraint if extra reg was actually allocated. Also takes 4197 * care of event which do not use an extra shared reg. 4198 * 4199 * Also, if this is a fake cpuc we shouldn't touch any event state 4200 * (reg->alloc) and we don't care about leaving inconsistent cpuc state 4201 * either since it'll be thrown out. 4202 */ 4203 if (!reg->alloc || cpuc->is_fake) 4204 return; 4205 4206 era = &cpuc->shared_regs->regs[reg->idx]; 4207 4208 /* one fewer user */ 4209 atomic_dec(&era->ref); 4210 4211 /* allocate again next time */ 4212 reg->alloc = 0; 4213 } 4214 4215 static struct event_constraint * 4216 intel_shared_regs_constraints(struct cpu_hw_events *cpuc, 4217 struct perf_event *event) 4218 { 4219 struct event_constraint *c = NULL, *d; 4220 struct hw_perf_event_extra *xreg, *breg; 4221 4222 xreg = &event->hw.extra_reg; 4223 if (xreg->idx != EXTRA_REG_NONE) { 4224 c = __intel_shared_reg_get_constraints(cpuc, event, xreg); 4225 if (c == &emptyconstraint) 4226 return c; 4227 } 4228 breg = &event->hw.branch_reg; 4229 if (breg->idx != EXTRA_REG_NONE) { 4230 d = __intel_shared_reg_get_constraints(cpuc, event, breg); 4231 if (d == &emptyconstraint) { 4232 __intel_shared_reg_put_constraints(cpuc, xreg); 4233 c = d; 4234 } 4235 } 4236 return c; 4237 } 4238 4239 struct event_constraint * 4240 x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 4241 struct perf_event *event) 4242 { 4243 struct event_constraint *event_constraints = hybrid(cpuc->pmu, event_constraints); 4244 struct event_constraint *c; 4245 4246 if (event_constraints) { 4247 for_each_event_constraint(c, event_constraints) { 4248 if (constraint_match(c, event->hw.config)) { 4249 event->hw.flags |= c->flags; 4250 return c; 4251 } 4252 } 4253 } 4254 4255 return &hybrid_var(cpuc->pmu, unconstrained); 4256 } 4257 4258 static struct event_constraint * 4259 __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 4260 struct perf_event *event) 4261 { 4262 struct event_constraint *c; 4263 4264 c = intel_vlbr_constraints(event); 4265 if (c) 4266 return c; 4267 4268 c = intel_bts_constraints(event); 4269 if (c) 4270 return c; 4271 4272 c = intel_shared_regs_constraints(cpuc, event); 4273 if (c) 4274 return c; 4275 4276 c = intel_pebs_constraints(event); 4277 if (c) 4278 return c; 4279 4280 return x86_get_event_constraints(cpuc, idx, event); 4281 } 4282 4283 static void 4284 intel_start_scheduling(struct cpu_hw_events *cpuc) 4285 { 4286 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 4287 struct intel_excl_states *xl; 4288 int tid = cpuc->excl_thread_id; 4289 4290 /* 4291 * nothing needed if in group validation mode 4292 */ 4293 if (cpuc->is_fake || !is_ht_workaround_enabled()) 4294 return; 4295 4296 /* 4297 * no exclusion needed 4298 */ 4299 if (WARN_ON_ONCE(!excl_cntrs)) 4300 return; 4301 4302 xl = &excl_cntrs->states[tid]; 4303 4304 xl->sched_started = true; 4305 /* 4306 * lock shared state until we are done scheduling 4307 * in stop_event_scheduling() 4308 * makes scheduling appear as a transaction 4309 */ 4310 raw_spin_lock(&excl_cntrs->lock); 4311 } 4312 4313 static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) 4314 { 4315 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 4316 struct event_constraint *c = cpuc->event_constraint[idx]; 4317 struct intel_excl_states *xl; 4318 int tid = cpuc->excl_thread_id; 4319 4320 if (cpuc->is_fake || !is_ht_workaround_enabled()) 4321 return; 4322 4323 if (WARN_ON_ONCE(!excl_cntrs)) 4324 return; 4325 4326 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) 4327 return; 4328 4329 xl = &excl_cntrs->states[tid]; 4330 4331 lockdep_assert_held(&excl_cntrs->lock); 4332 4333 if (c->flags & PERF_X86_EVENT_EXCL) 4334 xl->state[cntr] = INTEL_EXCL_EXCLUSIVE; 4335 else 4336 xl->state[cntr] = INTEL_EXCL_SHARED; 4337 } 4338 4339 static void 4340 intel_stop_scheduling(struct cpu_hw_events *cpuc) 4341 { 4342 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 4343 struct intel_excl_states *xl; 4344 int tid = cpuc->excl_thread_id; 4345 4346 /* 4347 * nothing needed if in group validation mode 4348 */ 4349 if (cpuc->is_fake || !is_ht_workaround_enabled()) 4350 return; 4351 /* 4352 * no exclusion needed 4353 */ 4354 if (WARN_ON_ONCE(!excl_cntrs)) 4355 return; 4356 4357 xl = &excl_cntrs->states[tid]; 4358 4359 xl->sched_started = false; 4360 /* 4361 * release shared state lock (acquired in intel_start_scheduling()) 4362 */ 4363 raw_spin_unlock(&excl_cntrs->lock); 4364 } 4365 4366 static struct event_constraint * 4367 dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx) 4368 { 4369 WARN_ON_ONCE(!cpuc->constraint_list); 4370 4371 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { 4372 struct event_constraint *cx; 4373 4374 /* 4375 * grab pre-allocated constraint entry 4376 */ 4377 cx = &cpuc->constraint_list[idx]; 4378 4379 /* 4380 * initialize dynamic constraint 4381 * with static constraint 4382 */ 4383 *cx = *c; 4384 4385 /* 4386 * mark constraint as dynamic 4387 */ 4388 cx->flags |= PERF_X86_EVENT_DYNAMIC; 4389 c = cx; 4390 } 4391 4392 return c; 4393 } 4394 4395 static struct event_constraint * 4396 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, 4397 int idx, struct event_constraint *c) 4398 { 4399 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 4400 struct intel_excl_states *xlo; 4401 int tid = cpuc->excl_thread_id; 4402 int is_excl, i, w; 4403 4404 /* 4405 * validating a group does not require 4406 * enforcing cross-thread exclusion 4407 */ 4408 if (cpuc->is_fake || !is_ht_workaround_enabled()) 4409 return c; 4410 4411 /* 4412 * no exclusion needed 4413 */ 4414 if (WARN_ON_ONCE(!excl_cntrs)) 4415 return c; 4416 4417 /* 4418 * because we modify the constraint, we need 4419 * to make a copy. Static constraints come 4420 * from static const tables. 4421 * 4422 * only needed when constraint has not yet 4423 * been cloned (marked dynamic) 4424 */ 4425 c = dyn_constraint(cpuc, c, idx); 4426 4427 /* 4428 * From here on, the constraint is dynamic. 4429 * Either it was just allocated above, or it 4430 * was allocated during a earlier invocation 4431 * of this function 4432 */ 4433 4434 /* 4435 * state of sibling HT 4436 */ 4437 xlo = &excl_cntrs->states[tid ^ 1]; 4438 4439 /* 4440 * event requires exclusive counter access 4441 * across HT threads 4442 */ 4443 is_excl = c->flags & PERF_X86_EVENT_EXCL; 4444 if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { 4445 event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; 4446 if (!cpuc->n_excl++) 4447 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); 4448 } 4449 4450 /* 4451 * Modify static constraint with current dynamic 4452 * state of thread 4453 * 4454 * EXCLUSIVE: sibling counter measuring exclusive event 4455 * SHARED : sibling counter measuring non-exclusive event 4456 * UNUSED : sibling counter unused 4457 */ 4458 w = c->weight; 4459 for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) { 4460 /* 4461 * exclusive event in sibling counter 4462 * our corresponding counter cannot be used 4463 * regardless of our event 4464 */ 4465 if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) { 4466 __clear_bit(i, c->idxmsk); 4467 w--; 4468 continue; 4469 } 4470 /* 4471 * if measuring an exclusive event, sibling 4472 * measuring non-exclusive, then counter cannot 4473 * be used 4474 */ 4475 if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) { 4476 __clear_bit(i, c->idxmsk); 4477 w--; 4478 continue; 4479 } 4480 } 4481 4482 /* 4483 * if we return an empty mask, then switch 4484 * back to static empty constraint to avoid 4485 * the cost of freeing later on 4486 */ 4487 if (!w) 4488 c = &emptyconstraint; 4489 4490 c->weight = w; 4491 4492 return c; 4493 } 4494 4495 static struct event_constraint * 4496 intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 4497 struct perf_event *event) 4498 { 4499 struct event_constraint *c1, *c2; 4500 4501 c1 = cpuc->event_constraint[idx]; 4502 4503 /* 4504 * first time only 4505 * - static constraint: no change across incremental scheduling calls 4506 * - dynamic constraint: handled by intel_get_excl_constraints() 4507 */ 4508 c2 = __intel_get_event_constraints(cpuc, idx, event); 4509 if (c1) { 4510 WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC)); 4511 bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); 4512 c1->weight = c2->weight; 4513 c2 = c1; 4514 } 4515 4516 if (cpuc->excl_cntrs) 4517 return intel_get_excl_constraints(cpuc, event, idx, c2); 4518 4519 if (event->hw.dyn_constraint != ~0ULL) { 4520 c2 = dyn_constraint(cpuc, c2, idx); 4521 c2->idxmsk64 &= event->hw.dyn_constraint; 4522 c2->weight = hweight64(c2->idxmsk64); 4523 } 4524 4525 return c2; 4526 } 4527 4528 static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, 4529 struct perf_event *event) 4530 { 4531 struct hw_perf_event *hwc = &event->hw; 4532 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 4533 int tid = cpuc->excl_thread_id; 4534 struct intel_excl_states *xl; 4535 4536 /* 4537 * nothing needed if in group validation mode 4538 */ 4539 if (cpuc->is_fake) 4540 return; 4541 4542 if (WARN_ON_ONCE(!excl_cntrs)) 4543 return; 4544 4545 if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { 4546 hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; 4547 if (!--cpuc->n_excl) 4548 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); 4549 } 4550 4551 /* 4552 * If event was actually assigned, then mark the counter state as 4553 * unused now. 4554 */ 4555 if (hwc->idx >= 0) { 4556 xl = &excl_cntrs->states[tid]; 4557 4558 /* 4559 * put_constraint may be called from x86_schedule_events() 4560 * which already has the lock held so here make locking 4561 * conditional. 4562 */ 4563 if (!xl->sched_started) 4564 raw_spin_lock(&excl_cntrs->lock); 4565 4566 xl->state[hwc->idx] = INTEL_EXCL_UNUSED; 4567 4568 if (!xl->sched_started) 4569 raw_spin_unlock(&excl_cntrs->lock); 4570 } 4571 } 4572 4573 static void 4574 intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, 4575 struct perf_event *event) 4576 { 4577 struct hw_perf_event_extra *reg; 4578 4579 reg = &event->hw.extra_reg; 4580 if (reg->idx != EXTRA_REG_NONE) 4581 __intel_shared_reg_put_constraints(cpuc, reg); 4582 4583 reg = &event->hw.branch_reg; 4584 if (reg->idx != EXTRA_REG_NONE) 4585 __intel_shared_reg_put_constraints(cpuc, reg); 4586 } 4587 4588 static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 4589 struct perf_event *event) 4590 { 4591 intel_put_shared_regs_event_constraints(cpuc, event); 4592 4593 /* 4594 * is PMU has exclusive counter restrictions, then 4595 * all events are subject to and must call the 4596 * put_excl_constraints() routine 4597 */ 4598 if (cpuc->excl_cntrs) 4599 intel_put_excl_constraints(cpuc, event); 4600 } 4601 4602 static void intel_pebs_aliases_core2(struct perf_event *event) 4603 { 4604 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 4605 /* 4606 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 4607 * (0x003c) so that we can use it with PEBS. 4608 * 4609 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 4610 * PEBS capable. However we can use INST_RETIRED.ANY_P 4611 * (0x00c0), which is a PEBS capable event, to get the same 4612 * count. 4613 * 4614 * INST_RETIRED.ANY_P counts the number of cycles that retires 4615 * CNTMASK instructions. By setting CNTMASK to a value (16) 4616 * larger than the maximum number of instructions that can be 4617 * retired per cycle (4) and then inverting the condition, we 4618 * count all cycles that retire 16 or less instructions, which 4619 * is every cycle. 4620 * 4621 * Thereby we gain a PEBS capable cycle counter. 4622 */ 4623 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 4624 4625 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 4626 event->hw.config = alt_config; 4627 } 4628 } 4629 4630 static void intel_pebs_aliases_snb(struct perf_event *event) 4631 { 4632 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 4633 /* 4634 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 4635 * (0x003c) so that we can use it with PEBS. 4636 * 4637 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 4638 * PEBS capable. However we can use UOPS_RETIRED.ALL 4639 * (0x01c2), which is a PEBS capable event, to get the same 4640 * count. 4641 * 4642 * UOPS_RETIRED.ALL counts the number of cycles that retires 4643 * CNTMASK micro-ops. By setting CNTMASK to a value (16) 4644 * larger than the maximum number of micro-ops that can be 4645 * retired per cycle (4) and then inverting the condition, we 4646 * count all cycles that retire 16 or less micro-ops, which 4647 * is every cycle. 4648 * 4649 * Thereby we gain a PEBS capable cycle counter. 4650 */ 4651 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); 4652 4653 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 4654 event->hw.config = alt_config; 4655 } 4656 } 4657 4658 static void intel_pebs_aliases_precdist(struct perf_event *event) 4659 { 4660 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 4661 /* 4662 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 4663 * (0x003c) so that we can use it with PEBS. 4664 * 4665 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 4666 * PEBS capable. However we can use INST_RETIRED.PREC_DIST 4667 * (0x01c0), which is a PEBS capable event, to get the same 4668 * count. 4669 * 4670 * The PREC_DIST event has special support to minimize sample 4671 * shadowing effects. One drawback is that it can be 4672 * only programmed on counter 1, but that seems like an 4673 * acceptable trade off. 4674 */ 4675 u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16); 4676 4677 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 4678 event->hw.config = alt_config; 4679 } 4680 } 4681 4682 static void intel_pebs_aliases_ivb(struct perf_event *event) 4683 { 4684 if (event->attr.precise_ip < 3) 4685 return intel_pebs_aliases_snb(event); 4686 return intel_pebs_aliases_precdist(event); 4687 } 4688 4689 static void intel_pebs_aliases_skl(struct perf_event *event) 4690 { 4691 if (event->attr.precise_ip < 3) 4692 return intel_pebs_aliases_core2(event); 4693 return intel_pebs_aliases_precdist(event); 4694 } 4695 4696 static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) 4697 { 4698 unsigned long flags = x86_pmu.large_pebs_flags; 4699 4700 if (event->attr.use_clockid) 4701 flags &= ~PERF_SAMPLE_TIME; 4702 if (!event->attr.exclude_kernel) 4703 flags &= ~PERF_SAMPLE_REGS_USER; 4704 if (event->attr.sample_regs_user & ~PEBS_GP_REGS) 4705 flags &= ~PERF_SAMPLE_REGS_USER; 4706 if (event->attr.sample_regs_intr & ~PEBS_GP_REGS) 4707 flags &= ~PERF_SAMPLE_REGS_INTR; 4708 return flags; 4709 } 4710 4711 static int intel_pmu_bts_config(struct perf_event *event) 4712 { 4713 struct perf_event_attr *attr = &event->attr; 4714 4715 if (unlikely(intel_pmu_has_bts(event))) { 4716 /* BTS is not supported by this architecture. */ 4717 if (!x86_pmu.bts_active) 4718 return -EOPNOTSUPP; 4719 4720 /* BTS is currently only allowed for user-mode. */ 4721 if (!attr->exclude_kernel) 4722 return -EOPNOTSUPP; 4723 4724 /* BTS is not allowed for precise events. */ 4725 if (attr->precise_ip) 4726 return -EOPNOTSUPP; 4727 4728 /* disallow bts if conflicting events are present */ 4729 if (x86_add_exclusive(x86_lbr_exclusive_lbr)) 4730 return -EBUSY; 4731 4732 event->destroy = hw_perf_lbr_event_destroy; 4733 } 4734 4735 return 0; 4736 } 4737 4738 static int core_pmu_hw_config(struct perf_event *event) 4739 { 4740 int ret = x86_pmu_hw_config(event); 4741 4742 if (ret) 4743 return ret; 4744 4745 return intel_pmu_bts_config(event); 4746 } 4747 4748 #define INTEL_TD_METRIC_AVAILABLE_MAX (INTEL_TD_METRIC_RETIRING + \ 4749 ((x86_pmu.num_topdown_events - 1) << 8)) 4750 4751 static bool is_available_metric_event(struct perf_event *event) 4752 { 4753 return is_metric_event(event) && 4754 event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX; 4755 } 4756 4757 static inline bool is_mem_loads_event(struct perf_event *event) 4758 { 4759 return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01); 4760 } 4761 4762 static inline bool is_mem_loads_aux_event(struct perf_event *event) 4763 { 4764 return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82); 4765 } 4766 4767 static inline bool require_mem_loads_aux_event(struct perf_event *event) 4768 { 4769 if (!(x86_pmu.flags & PMU_FL_MEM_LOADS_AUX)) 4770 return false; 4771 4772 if (is_hybrid()) 4773 return hybrid_pmu(event->pmu)->pmu_type == hybrid_big; 4774 4775 return true; 4776 } 4777 4778 static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) 4779 { 4780 union perf_capabilities *intel_cap = &hybrid(event->pmu, intel_cap); 4781 4782 return test_bit(idx, (unsigned long *)&intel_cap->capabilities); 4783 } 4784 4785 static u64 intel_pmu_freq_start_period(struct perf_event *event) 4786 { 4787 int type = event->attr.type; 4788 u64 config, factor; 4789 s64 start; 4790 4791 /* 4792 * The 127 is the lowest possible recommended SAV (sample after value) 4793 * for a 4000 freq (default freq), according to the event list JSON file. 4794 * Also, assume the workload is idle 50% time. 4795 */ 4796 factor = 64 * 4000; 4797 if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) 4798 goto end; 4799 4800 /* 4801 * The estimation of the start period in the freq mode is 4802 * based on the below assumption. 4803 * 4804 * For a cycles or an instructions event, 1GHZ of the 4805 * underlying platform, 1 IPC. The workload is idle 50% time. 4806 * The start period = 1,000,000,000 * 1 / freq / 2. 4807 * = 500,000,000 / freq 4808 * 4809 * Usually, the branch-related events occur less than the 4810 * instructions event. According to the Intel event list JSON 4811 * file, the SAV (sample after value) of a branch-related event 4812 * is usually 1/4 of an instruction event. 4813 * The start period of branch-related events = 125,000,000 / freq. 4814 * 4815 * The cache-related events occurs even less. The SAV is usually 4816 * 1/20 of an instruction event. 4817 * The start period of cache-related events = 25,000,000 / freq. 4818 */ 4819 config = event->attr.config & PERF_HW_EVENT_MASK; 4820 if (type == PERF_TYPE_HARDWARE) { 4821 switch (config) { 4822 case PERF_COUNT_HW_CPU_CYCLES: 4823 case PERF_COUNT_HW_INSTRUCTIONS: 4824 case PERF_COUNT_HW_BUS_CYCLES: 4825 case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: 4826 case PERF_COUNT_HW_STALLED_CYCLES_BACKEND: 4827 case PERF_COUNT_HW_REF_CPU_CYCLES: 4828 factor = 500000000; 4829 break; 4830 case PERF_COUNT_HW_BRANCH_INSTRUCTIONS: 4831 case PERF_COUNT_HW_BRANCH_MISSES: 4832 factor = 125000000; 4833 break; 4834 case PERF_COUNT_HW_CACHE_REFERENCES: 4835 case PERF_COUNT_HW_CACHE_MISSES: 4836 factor = 25000000; 4837 break; 4838 default: 4839 goto end; 4840 } 4841 } 4842 4843 if (type == PERF_TYPE_HW_CACHE) 4844 factor = 25000000; 4845 end: 4846 /* 4847 * Usually, a prime or a number with less factors (close to prime) 4848 * is chosen as an SAV, which makes it less likely that the sampling 4849 * period synchronizes with some periodic event in the workload. 4850 * Minus 1 to make it at least avoiding values near power of twos 4851 * for the default freq. 4852 */ 4853 start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1; 4854 4855 if (start > x86_pmu.max_period) 4856 start = x86_pmu.max_period; 4857 4858 if (x86_pmu.limit_period) 4859 x86_pmu.limit_period(event, &start); 4860 4861 return start; 4862 } 4863 4864 static inline bool intel_pmu_has_acr(struct pmu *pmu) 4865 { 4866 return !!hybrid(pmu, acr_cause_mask64); 4867 } 4868 4869 static bool intel_pmu_is_acr_group(struct perf_event *event) 4870 { 4871 /* The group leader has the ACR flag set */ 4872 if (is_acr_event_group(event)) 4873 return true; 4874 4875 /* The acr_mask is set */ 4876 if (event->attr.config2) 4877 return true; 4878 4879 return false; 4880 } 4881 4882 static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu) 4883 { 4884 u64 caps; 4885 4886 if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline) 4887 return true; 4888 4889 caps = hybrid(pmu, arch_pebs_cap).caps; 4890 if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK)) 4891 return true; 4892 4893 return false; 4894 } 4895 4896 static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event, 4897 u64 *cause_mask, int *num) 4898 { 4899 event->hw.dyn_constraint &= hybrid(event->pmu, acr_cntr_mask64); 4900 *cause_mask |= event->attr.config2; 4901 *num += 1; 4902 } 4903 4904 static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event, 4905 int idx, u64 cause_mask) 4906 { 4907 if (test_bit(idx, (unsigned long *)&cause_mask)) 4908 event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64); 4909 } 4910 4911 static inline int intel_set_branch_counter_constr(struct perf_event *event, 4912 int *num) 4913 { 4914 if (branch_sample_call_stack(event)) 4915 return -EINVAL; 4916 if (branch_sample_counters(event)) { 4917 (*num)++; 4918 event->hw.dyn_constraint &= x86_pmu.lbr_counters; 4919 } 4920 4921 return 0; 4922 } 4923 4924 static int intel_pmu_hw_config(struct perf_event *event) 4925 { 4926 int ret = x86_pmu_hw_config(event); 4927 4928 if (ret) 4929 return ret; 4930 4931 ret = intel_pmu_bts_config(event); 4932 if (ret) 4933 return ret; 4934 4935 if (event->attr.freq && event->attr.sample_freq) { 4936 event->hw.sample_period = intel_pmu_freq_start_period(event); 4937 event->hw.last_period = event->hw.sample_period; 4938 local64_set(&event->hw.period_left, event->hw.sample_period); 4939 } 4940 4941 if (event->attr.precise_ip) { 4942 struct arch_pebs_cap pebs_cap = hybrid(event->pmu, arch_pebs_cap); 4943 4944 if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) 4945 return -EINVAL; 4946 4947 if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { 4948 event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; 4949 if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event)) && 4950 !has_aux_action(event)) { 4951 event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; 4952 event->attach_state |= PERF_ATTACH_SCHED_CB; 4953 } 4954 } 4955 if (x86_pmu.pebs_aliases) 4956 x86_pmu.pebs_aliases(event); 4957 4958 if (x86_pmu.arch_pebs) { 4959 u64 cntr_mask = hybrid(event->pmu, intel_ctrl) & 4960 ~GLOBAL_CTRL_EN_PERF_METRICS; 4961 u64 pebs_mask = event->attr.precise_ip >= 3 ? 4962 pebs_cap.pdists : pebs_cap.counters; 4963 if (cntr_mask != pebs_mask) 4964 event->hw.dyn_constraint &= pebs_mask; 4965 } 4966 } 4967 4968 if (needs_branch_stack(event)) { 4969 /* Avoid branch stack setup for counting events in SAMPLE READ */ 4970 if (is_sampling_event(event) || 4971 !(event->attr.sample_type & PERF_SAMPLE_READ)) 4972 event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK; 4973 } 4974 4975 if (branch_sample_counters(event)) { 4976 struct perf_event *leader, *sibling; 4977 int num = 0; 4978 4979 if (!(x86_pmu.flags & PMU_FL_BR_CNTR) || 4980 (event->attr.config & ~INTEL_ARCH_EVENT_MASK)) 4981 return -EINVAL; 4982 4983 /* 4984 * The branch counter logging is not supported in the call stack 4985 * mode yet, since we cannot simply flush the LBR during e.g., 4986 * multiplexing. Also, there is no obvious usage with the call 4987 * stack mode. Simply forbids it for now. 4988 * 4989 * If any events in the group enable the branch counter logging 4990 * feature, the group is treated as a branch counter logging 4991 * group, which requires the extra space to store the counters. 4992 */ 4993 leader = event->group_leader; 4994 if (intel_set_branch_counter_constr(leader, &num)) 4995 return -EINVAL; 4996 leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; 4997 4998 for_each_sibling_event(sibling, leader) { 4999 if (intel_set_branch_counter_constr(sibling, &num)) 5000 return -EINVAL; 5001 } 5002 5003 /* event isn't installed as a sibling yet. */ 5004 if (event != leader) { 5005 if (intel_set_branch_counter_constr(event, &num)) 5006 return -EINVAL; 5007 } 5008 5009 if (num > fls(x86_pmu.lbr_counters)) 5010 return -EINVAL; 5011 /* 5012 * Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't 5013 * require any branch stack setup. 5014 * Clear the bit to avoid unnecessary branch stack setup. 5015 */ 5016 if (0 == (event->attr.branch_sample_type & 5017 ~(PERF_SAMPLE_BRANCH_PLM_ALL | 5018 PERF_SAMPLE_BRANCH_COUNTERS))) 5019 event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK; 5020 5021 /* 5022 * Force the leader to be a LBR event. So LBRs can be reset 5023 * with the leader event. See intel_pmu_lbr_del() for details. 5024 */ 5025 if (!intel_pmu_needs_branch_stack(leader)) 5026 return -EINVAL; 5027 } 5028 5029 if (intel_pmu_needs_branch_stack(event)) { 5030 ret = intel_pmu_setup_lbr_filter(event); 5031 if (ret) 5032 return ret; 5033 event->attach_state |= PERF_ATTACH_SCHED_CB; 5034 5035 /* 5036 * BTS is set up earlier in this path, so don't account twice 5037 */ 5038 if (!unlikely(intel_pmu_has_bts(event))) { 5039 /* disallow lbr if conflicting events are present */ 5040 if (x86_add_exclusive(x86_lbr_exclusive_lbr)) 5041 return -EBUSY; 5042 5043 event->destroy = hw_perf_lbr_event_destroy; 5044 } 5045 } 5046 5047 if (event->attr.aux_output) { 5048 if (!event->attr.precise_ip) 5049 return -EINVAL; 5050 5051 event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT; 5052 } 5053 5054 if ((event->attr.sample_type & PERF_SAMPLE_READ) && 5055 intel_pmu_has_pebs_counter_group(event->pmu) && 5056 is_sampling_event(event) && 5057 event->attr.precise_ip) 5058 event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR; 5059 5060 if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) { 5061 struct perf_event *sibling, *leader = event->group_leader; 5062 struct pmu *pmu = event->pmu; 5063 bool has_sw_event = false; 5064 int num = 0, idx = 0; 5065 u64 cause_mask = 0; 5066 5067 /* Not support perf metrics */ 5068 if (is_metric_event(event)) 5069 return -EINVAL; 5070 5071 /* Not support freq mode */ 5072 if (event->attr.freq) 5073 return -EINVAL; 5074 5075 /* PDist is not supported */ 5076 if (event->attr.config2 && event->attr.precise_ip > 2) 5077 return -EINVAL; 5078 5079 /* The reload value cannot exceeds the max period */ 5080 if (event->attr.sample_period > x86_pmu.max_period) 5081 return -EINVAL; 5082 /* 5083 * The counter-constraints of each event cannot be finalized 5084 * unless the whole group is scanned. However, it's hard 5085 * to know whether the event is the last one of the group. 5086 * Recalculate the counter-constraints for each event when 5087 * adding a new event. 5088 * 5089 * The group is traversed twice, which may be optimized later. 5090 * In the first round, 5091 * - Find all events which do reload when other events 5092 * overflow and set the corresponding counter-constraints 5093 * - Add all events, which can cause other events reload, 5094 * in the cause_mask 5095 * - Error out if the number of events exceeds the HW limit 5096 * - The ACR events must be contiguous. 5097 * Error out if there are non-X86 events between ACR events. 5098 * This is not a HW limit, but a SW limit. 5099 * With the assumption, the intel_pmu_acr_late_setup() can 5100 * easily convert the event idx to counter idx without 5101 * traversing the whole event list. 5102 */ 5103 if (!is_x86_event(leader)) 5104 return -EINVAL; 5105 5106 if (leader->attr.config2) 5107 intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num); 5108 5109 if (leader->nr_siblings) { 5110 for_each_sibling_event(sibling, leader) { 5111 if (!is_x86_event(sibling)) { 5112 has_sw_event = true; 5113 continue; 5114 } 5115 if (!sibling->attr.config2) 5116 continue; 5117 if (has_sw_event) 5118 return -EINVAL; 5119 intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num); 5120 } 5121 } 5122 if (leader != event && event->attr.config2) { 5123 if (has_sw_event) 5124 return -EINVAL; 5125 intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num); 5126 } 5127 5128 if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) || 5129 num > hweight64(hybrid(event->pmu, acr_cntr_mask64))) 5130 return -EINVAL; 5131 /* 5132 * In the second round, apply the counter-constraints for 5133 * the events which can cause other events reload. 5134 */ 5135 intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask); 5136 5137 if (leader->nr_siblings) { 5138 for_each_sibling_event(sibling, leader) { 5139 if (is_x86_event(sibling)) 5140 intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask); 5141 } 5142 } 5143 5144 if (leader != event) 5145 intel_pmu_set_acr_caused_constr(event, idx, cause_mask); 5146 5147 leader->hw.flags |= PERF_X86_EVENT_ACR; 5148 } 5149 5150 if ((event->attr.type == PERF_TYPE_HARDWARE) || 5151 (event->attr.type == PERF_TYPE_HW_CACHE)) 5152 return 0; 5153 5154 /* 5155 * Config Topdown slots and metric events 5156 * 5157 * The slots event on Fixed Counter 3 can support sampling, 5158 * which will be handled normally in x86_perf_event_update(). 5159 * 5160 * Metric events don't support sampling and require being paired 5161 * with a slots event as group leader. When the slots event 5162 * is used in a metrics group, it too cannot support sampling. 5163 */ 5164 if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) { 5165 /* The metrics_clear can only be set for the slots event */ 5166 if (event->attr.config1 && 5167 (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR))) 5168 return -EINVAL; 5169 5170 if (event->attr.config2) 5171 return -EINVAL; 5172 5173 /* 5174 * The TopDown metrics events and slots event don't 5175 * support any filters. 5176 */ 5177 if (event->attr.config & X86_ALL_EVENT_FLAGS) 5178 return -EINVAL; 5179 5180 if (is_available_metric_event(event)) { 5181 struct perf_event *leader = event->group_leader; 5182 5183 /* The metric events don't support sampling. */ 5184 if (is_sampling_event(event)) 5185 return -EINVAL; 5186 5187 /* The metric events require a slots group leader. */ 5188 if (!is_slots_event(leader)) 5189 return -EINVAL; 5190 5191 /* 5192 * The leader/SLOTS must not be a sampling event for 5193 * metric use; hardware requires it starts at 0 when used 5194 * in conjunction with MSR_PERF_METRICS. 5195 */ 5196 if (is_sampling_event(leader)) 5197 return -EINVAL; 5198 5199 event->event_caps |= PERF_EV_CAP_SIBLING; 5200 /* 5201 * Only once we have a METRICs sibling do we 5202 * need TopDown magic. 5203 */ 5204 leader->hw.flags |= PERF_X86_EVENT_TOPDOWN; 5205 event->hw.flags |= PERF_X86_EVENT_TOPDOWN; 5206 } 5207 } 5208 5209 /* 5210 * The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR 5211 * doesn't function quite right. As a work-around it needs to always be 5212 * co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82). 5213 * The actual count of this second event is irrelevant it just needs 5214 * to be active to make the first event function correctly. 5215 * 5216 * In a group, the auxiliary event must be in front of the load latency 5217 * event. The rule is to simplify the implementation of the check. 5218 * That's because perf cannot have a complete group at the moment. 5219 */ 5220 if (require_mem_loads_aux_event(event) && 5221 (event->attr.sample_type & PERF_SAMPLE_DATA_SRC) && 5222 is_mem_loads_event(event)) { 5223 struct perf_event *leader = event->group_leader; 5224 struct perf_event *sibling = NULL; 5225 5226 /* 5227 * When this memload event is also the first event (no group 5228 * exists yet), then there is no aux event before it. 5229 */ 5230 if (leader == event) 5231 return -ENODATA; 5232 5233 if (!is_mem_loads_aux_event(leader)) { 5234 for_each_sibling_event(sibling, leader) { 5235 if (is_mem_loads_aux_event(sibling)) 5236 break; 5237 } 5238 if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list)) 5239 return -ENODATA; 5240 } 5241 } 5242 5243 if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) 5244 return 0; 5245 5246 if (x86_pmu.version < 3) 5247 return -EINVAL; 5248 5249 ret = perf_allow_cpu(); 5250 if (ret) 5251 return ret; 5252 5253 event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; 5254 5255 return 0; 5256 } 5257 5258 /* 5259 * Currently, the only caller of this function is the atomic_switch_perf_msrs(). 5260 * The host perf context helps to prepare the values of the real hardware for 5261 * a set of msrs that need to be switched atomically in a vmx transaction. 5262 * 5263 * For example, the pseudocode needed to add a new msr should look like: 5264 * 5265 * arr[(*nr)++] = (struct perf_guest_switch_msr){ 5266 * .msr = the hardware msr address, 5267 * .host = the value the hardware has when it doesn't run a guest, 5268 * .guest = the value the hardware has when it runs a guest, 5269 * }; 5270 * 5271 * These values have nothing to do with the emulated values the guest sees 5272 * when it uses {RD,WR}MSR, which should be handled by the KVM context, 5273 * specifically in the intel_pmu_{get,set}_msr(). 5274 */ 5275 static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) 5276 { 5277 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 5278 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; 5279 struct kvm_pmu *kvm_pmu = (struct kvm_pmu *)data; 5280 u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); 5281 u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable; 5282 int global_ctrl, pebs_enable; 5283 5284 /* 5285 * In addition to obeying exclude_guest/exclude_host, remove bits being 5286 * used for PEBS when running a guest, because PEBS writes to virtual 5287 * addresses (not physical addresses). 5288 */ 5289 *nr = 0; 5290 global_ctrl = (*nr)++; 5291 arr[global_ctrl] = (struct perf_guest_switch_msr){ 5292 .msr = MSR_CORE_PERF_GLOBAL_CTRL, 5293 .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask, 5294 .guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask, 5295 }; 5296 5297 if (!x86_pmu.ds_pebs) 5298 return arr; 5299 5300 /* 5301 * If PMU counter has PEBS enabled it is not enough to 5302 * disable counter on a guest entry since PEBS memory 5303 * write can overshoot guest entry and corrupt guest 5304 * memory. Disabling PEBS solves the problem. 5305 * 5306 * Don't do this if the CPU already enforces it. 5307 */ 5308 if (x86_pmu.pebs_no_isolation) { 5309 arr[(*nr)++] = (struct perf_guest_switch_msr){ 5310 .msr = MSR_IA32_PEBS_ENABLE, 5311 .host = cpuc->pebs_enabled, 5312 .guest = 0, 5313 }; 5314 return arr; 5315 } 5316 5317 if (!kvm_pmu || !x86_pmu.pebs_ept) 5318 return arr; 5319 5320 arr[(*nr)++] = (struct perf_guest_switch_msr){ 5321 .msr = MSR_IA32_DS_AREA, 5322 .host = (unsigned long)cpuc->ds, 5323 .guest = kvm_pmu->ds_area, 5324 }; 5325 5326 if (x86_pmu.intel_cap.pebs_baseline) { 5327 arr[(*nr)++] = (struct perf_guest_switch_msr){ 5328 .msr = MSR_PEBS_DATA_CFG, 5329 .host = cpuc->active_pebs_data_cfg, 5330 .guest = kvm_pmu->pebs_data_cfg, 5331 }; 5332 } 5333 5334 pebs_enable = (*nr)++; 5335 arr[pebs_enable] = (struct perf_guest_switch_msr){ 5336 .msr = MSR_IA32_PEBS_ENABLE, 5337 .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, 5338 .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable, 5339 }; 5340 5341 if (arr[pebs_enable].host) { 5342 /* Disable guest PEBS if host PEBS is enabled. */ 5343 arr[pebs_enable].guest = 0; 5344 } else { 5345 /* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */ 5346 arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask; 5347 arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask; 5348 /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ 5349 arr[global_ctrl].guest |= arr[pebs_enable].guest; 5350 } 5351 5352 return arr; 5353 } 5354 5355 static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data) 5356 { 5357 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 5358 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; 5359 int idx; 5360 5361 for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { 5362 struct perf_event *event = cpuc->events[idx]; 5363 5364 arr[idx].msr = x86_pmu_config_addr(idx); 5365 arr[idx].host = arr[idx].guest = 0; 5366 5367 if (!test_bit(idx, cpuc->active_mask)) 5368 continue; 5369 5370 arr[idx].host = arr[idx].guest = 5371 event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; 5372 5373 if (event->attr.exclude_host) 5374 arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 5375 else if (event->attr.exclude_guest) 5376 arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 5377 } 5378 5379 *nr = x86_pmu_max_num_counters(cpuc->pmu); 5380 return arr; 5381 } 5382 5383 static void core_pmu_enable_event(struct perf_event *event) 5384 { 5385 if (!event->attr.exclude_host) 5386 x86_pmu_enable_event(event); 5387 } 5388 5389 static void core_pmu_enable_all(int added) 5390 { 5391 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 5392 int idx; 5393 5394 for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { 5395 struct hw_perf_event *hwc = &cpuc->events[idx]->hw; 5396 5397 if (!test_bit(idx, cpuc->active_mask) || 5398 cpuc->events[idx]->attr.exclude_host) 5399 continue; 5400 5401 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 5402 } 5403 } 5404 5405 static int hsw_hw_config(struct perf_event *event) 5406 { 5407 int ret = intel_pmu_hw_config(event); 5408 5409 if (ret) 5410 return ret; 5411 if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) 5412 return 0; 5413 event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); 5414 5415 /* 5416 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with 5417 * PEBS or in ANY thread mode. Since the results are non-sensical forbid 5418 * this combination. 5419 */ 5420 if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && 5421 ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || 5422 event->attr.precise_ip > 0)) 5423 return -EOPNOTSUPP; 5424 5425 if (event_is_checkpointed(event)) { 5426 /* 5427 * Sampling of checkpointed events can cause situations where 5428 * the CPU constantly aborts because of a overflow, which is 5429 * then checkpointed back and ignored. Forbid checkpointing 5430 * for sampling. 5431 * 5432 * But still allow a long sampling period, so that perf stat 5433 * from KVM works. 5434 */ 5435 if (event->attr.sample_period > 0 && 5436 event->attr.sample_period < 0x7fffffff) 5437 return -EOPNOTSUPP; 5438 } 5439 return 0; 5440 } 5441 5442 static struct event_constraint counter0_constraint = 5443 INTEL_ALL_EVENT_CONSTRAINT(0, 0x1); 5444 5445 static struct event_constraint counter1_constraint = 5446 INTEL_ALL_EVENT_CONSTRAINT(0, 0x2); 5447 5448 static struct event_constraint counter0_1_constraint = 5449 INTEL_ALL_EVENT_CONSTRAINT(0, 0x3); 5450 5451 static struct event_constraint counter2_constraint = 5452 EVENT_CONSTRAINT(0, 0x4, 0); 5453 5454 static struct event_constraint fixed0_constraint = 5455 FIXED_EVENT_CONSTRAINT(0x00c0, 0); 5456 5457 static struct event_constraint fixed0_counter0_constraint = 5458 INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL); 5459 5460 static struct event_constraint fixed0_counter0_1_constraint = 5461 INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL); 5462 5463 static struct event_constraint counters_1_7_constraint = 5464 INTEL_ALL_EVENT_CONSTRAINT(0, 0xfeULL); 5465 5466 static struct event_constraint * 5467 hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5468 struct perf_event *event) 5469 { 5470 struct event_constraint *c; 5471 5472 c = intel_get_event_constraints(cpuc, idx, event); 5473 5474 /* Handle special quirk on in_tx_checkpointed only in counter 2 */ 5475 if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { 5476 if (c->idxmsk64 & (1U << 2)) 5477 return &counter2_constraint; 5478 return &emptyconstraint; 5479 } 5480 5481 return c; 5482 } 5483 5484 static struct event_constraint * 5485 icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5486 struct perf_event *event) 5487 { 5488 /* 5489 * Fixed counter 0 has less skid. 5490 * Force instruction:ppp in Fixed counter 0 5491 */ 5492 if ((event->attr.precise_ip == 3) && 5493 constraint_match(&fixed0_constraint, event->hw.config)) 5494 return &fixed0_constraint; 5495 5496 return hsw_get_event_constraints(cpuc, idx, event); 5497 } 5498 5499 static struct event_constraint * 5500 glc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5501 struct perf_event *event) 5502 { 5503 struct event_constraint *c; 5504 5505 c = icl_get_event_constraints(cpuc, idx, event); 5506 5507 /* 5508 * The :ppp indicates the Precise Distribution (PDist) facility, which 5509 * is only supported on the GP counter 0. If a :ppp event which is not 5510 * available on the GP counter 0, error out. 5511 * Exception: Instruction PDIR is only available on the fixed counter 0. 5512 */ 5513 if ((event->attr.precise_ip == 3) && 5514 !constraint_match(&fixed0_constraint, event->hw.config)) { 5515 if (c->idxmsk64 & BIT_ULL(0)) 5516 return &counter0_constraint; 5517 5518 return &emptyconstraint; 5519 } 5520 5521 return c; 5522 } 5523 5524 static struct event_constraint * 5525 glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5526 struct perf_event *event) 5527 { 5528 struct event_constraint *c; 5529 5530 /* :ppp means to do reduced skid PEBS which is PMC0 only. */ 5531 if (event->attr.precise_ip == 3) 5532 return &counter0_constraint; 5533 5534 c = intel_get_event_constraints(cpuc, idx, event); 5535 5536 return c; 5537 } 5538 5539 static struct event_constraint * 5540 tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5541 struct perf_event *event) 5542 { 5543 struct event_constraint *c; 5544 5545 c = intel_get_event_constraints(cpuc, idx, event); 5546 5547 /* 5548 * :ppp means to do reduced skid PEBS, 5549 * which is available on PMC0 and fixed counter 0. 5550 */ 5551 if (event->attr.precise_ip == 3) { 5552 /* Force instruction:ppp on PMC0 and Fixed counter 0 */ 5553 if (constraint_match(&fixed0_constraint, event->hw.config)) 5554 return &fixed0_counter0_constraint; 5555 5556 return &counter0_constraint; 5557 } 5558 5559 return c; 5560 } 5561 5562 static bool allow_tsx_force_abort = true; 5563 5564 static struct event_constraint * 5565 tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5566 struct perf_event *event) 5567 { 5568 struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event); 5569 5570 /* 5571 * Without TFA we must not use PMC3. 5572 */ 5573 if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) { 5574 c = dyn_constraint(cpuc, c, idx); 5575 c->idxmsk64 &= ~(1ULL << 3); 5576 c->weight--; 5577 } 5578 5579 return c; 5580 } 5581 5582 static struct event_constraint * 5583 adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5584 struct perf_event *event) 5585 { 5586 struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); 5587 5588 if (pmu->pmu_type == hybrid_big) 5589 return glc_get_event_constraints(cpuc, idx, event); 5590 else if (pmu->pmu_type == hybrid_small) 5591 return tnt_get_event_constraints(cpuc, idx, event); 5592 5593 WARN_ON(1); 5594 return &emptyconstraint; 5595 } 5596 5597 static struct event_constraint * 5598 cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5599 struct perf_event *event) 5600 { 5601 struct event_constraint *c; 5602 5603 c = intel_get_event_constraints(cpuc, idx, event); 5604 5605 /* 5606 * The :ppp indicates the Precise Distribution (PDist) facility, which 5607 * is only supported on the GP counter 0 & 1 and Fixed counter 0. 5608 * If a :ppp event which is not available on the above eligible counters, 5609 * error out. 5610 */ 5611 if (event->attr.precise_ip == 3) { 5612 /* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */ 5613 if (constraint_match(&fixed0_constraint, event->hw.config)) { 5614 /* The fixed counter 0 doesn't support LBR event logging. */ 5615 if (branch_sample_counters(event)) 5616 return &counter0_1_constraint; 5617 else 5618 return &fixed0_counter0_1_constraint; 5619 } 5620 5621 switch (c->idxmsk64 & 0x3ull) { 5622 case 0x1: 5623 return &counter0_constraint; 5624 case 0x2: 5625 return &counter1_constraint; 5626 case 0x3: 5627 return &counter0_1_constraint; 5628 } 5629 return &emptyconstraint; 5630 } 5631 5632 return c; 5633 } 5634 5635 static struct event_constraint * 5636 rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5637 struct perf_event *event) 5638 { 5639 struct event_constraint *c; 5640 5641 c = glc_get_event_constraints(cpuc, idx, event); 5642 5643 /* The Retire Latency is not supported by the fixed counter 0. */ 5644 if (event->attr.precise_ip && 5645 (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE) && 5646 constraint_match(&fixed0_constraint, event->hw.config)) { 5647 /* 5648 * The Instruction PDIR is only available 5649 * on the fixed counter 0. Error out for this case. 5650 */ 5651 if (event->attr.precise_ip == 3) 5652 return &emptyconstraint; 5653 return &counters_1_7_constraint; 5654 } 5655 5656 return c; 5657 } 5658 5659 static struct event_constraint * 5660 mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5661 struct perf_event *event) 5662 { 5663 struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); 5664 5665 if (pmu->pmu_type == hybrid_big) 5666 return rwc_get_event_constraints(cpuc, idx, event); 5667 if (pmu->pmu_type == hybrid_small) 5668 return cmt_get_event_constraints(cpuc, idx, event); 5669 5670 WARN_ON(1); 5671 return &emptyconstraint; 5672 } 5673 5674 static int adl_hw_config(struct perf_event *event) 5675 { 5676 struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); 5677 5678 if (pmu->pmu_type == hybrid_big) 5679 return hsw_hw_config(event); 5680 else if (pmu->pmu_type == hybrid_small) 5681 return intel_pmu_hw_config(event); 5682 5683 WARN_ON(1); 5684 return -EOPNOTSUPP; 5685 } 5686 5687 static enum intel_cpu_type adl_get_hybrid_cpu_type(void) 5688 { 5689 return INTEL_CPU_TYPE_CORE; 5690 } 5691 5692 static inline bool erratum_hsw11(struct perf_event *event) 5693 { 5694 return (event->hw.config & INTEL_ARCH_EVENT_MASK) == 5695 X86_CONFIG(.event=0xc0, .umask=0x01); 5696 } 5697 5698 static struct event_constraint * 5699 arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 5700 struct perf_event *event) 5701 { 5702 struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); 5703 5704 if (pmu->pmu_type == hybrid_tiny) 5705 return cmt_get_event_constraints(cpuc, idx, event); 5706 5707 return mtl_get_event_constraints(cpuc, idx, event); 5708 } 5709 5710 static int arl_h_hw_config(struct perf_event *event) 5711 { 5712 struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); 5713 5714 if (pmu->pmu_type == hybrid_tiny) 5715 return intel_pmu_hw_config(event); 5716 5717 return adl_hw_config(event); 5718 } 5719 5720 /* 5721 * The HSW11 requires a period larger than 100 which is the same as the BDM11. 5722 * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL. 5723 * 5724 * The message 'interrupt took too long' can be observed on any counter which 5725 * was armed with a period < 32 and two events expired in the same NMI. 5726 * A minimum period of 32 is enforced for the rest of the events. 5727 */ 5728 static void hsw_limit_period(struct perf_event *event, s64 *left) 5729 { 5730 *left = max(*left, erratum_hsw11(event) ? 128 : 32); 5731 } 5732 5733 /* 5734 * Broadwell: 5735 * 5736 * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared 5737 * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine 5738 * the two to enforce a minimum period of 128 (the smallest value that has bits 5739 * 0-5 cleared and >= 100). 5740 * 5741 * Because of how the code in x86_perf_event_set_period() works, the truncation 5742 * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period 5743 * to make up for the 'lost' events due to carrying the 'error' in period_left. 5744 * 5745 * Therefore the effective (average) period matches the requested period, 5746 * despite coarser hardware granularity. 5747 */ 5748 static void bdw_limit_period(struct perf_event *event, s64 *left) 5749 { 5750 if (erratum_hsw11(event)) { 5751 if (*left < 128) 5752 *left = 128; 5753 *left &= ~0x3fULL; 5754 } 5755 } 5756 5757 static void nhm_limit_period(struct perf_event *event, s64 *left) 5758 { 5759 *left = max(*left, 32LL); 5760 } 5761 5762 static void glc_limit_period(struct perf_event *event, s64 *left) 5763 { 5764 if (event->attr.precise_ip == 3) 5765 *left = max(*left, 128LL); 5766 } 5767 5768 PMU_FORMAT_ATTR(event, "config:0-7" ); 5769 PMU_FORMAT_ATTR(umask, "config:8-15" ); 5770 PMU_FORMAT_ATTR(edge, "config:18" ); 5771 PMU_FORMAT_ATTR(pc, "config:19" ); 5772 PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ 5773 PMU_FORMAT_ATTR(inv, "config:23" ); 5774 PMU_FORMAT_ATTR(cmask, "config:24-31" ); 5775 PMU_FORMAT_ATTR(in_tx, "config:32" ); 5776 PMU_FORMAT_ATTR(in_tx_cp, "config:33" ); 5777 PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */ 5778 5779 PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */ 5780 5781 static ssize_t umask2_show(struct device *dev, 5782 struct device_attribute *attr, 5783 char *page) 5784 { 5785 u64 mask = hybrid(dev_get_drvdata(dev), config_mask) & ARCH_PERFMON_EVENTSEL_UMASK2; 5786 5787 if (mask == ARCH_PERFMON_EVENTSEL_UMASK2) 5788 return sprintf(page, "config:8-15,40-47\n"); 5789 5790 /* Roll back to the old format if umask2 is not supported. */ 5791 return sprintf(page, "config:8-15\n"); 5792 } 5793 5794 static struct device_attribute format_attr_umask2 = 5795 __ATTR(umask, 0444, umask2_show, NULL); 5796 5797 static struct attribute *format_evtsel_ext_attrs[] = { 5798 &format_attr_umask2.attr, 5799 &format_attr_eq.attr, 5800 &format_attr_metrics_clear.attr, 5801 NULL 5802 }; 5803 5804 static umode_t 5805 evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i) 5806 { 5807 struct device *dev = kobj_to_dev(kobj); 5808 u64 mask; 5809 5810 /* 5811 * The umask and umask2 have different formats but share the 5812 * same attr name. In update mode, the previous value of the 5813 * umask is unconditionally removed before is_visible. If 5814 * umask2 format is not enumerated, it's impossible to roll 5815 * back to the old format. 5816 * Does the check in umask2_show rather than is_visible. 5817 */ 5818 if (i == 0) 5819 return attr->mode; 5820 5821 mask = hybrid(dev_get_drvdata(dev), config_mask); 5822 if (i == 1) 5823 return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0; 5824 5825 /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */ 5826 if (i == 2) { 5827 union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap); 5828 5829 return intel_cap.rdpmc_metrics_clear ? attr->mode : 0; 5830 } 5831 5832 return 0; 5833 } 5834 5835 static struct attribute *intel_arch_formats_attr[] = { 5836 &format_attr_event.attr, 5837 &format_attr_umask.attr, 5838 &format_attr_edge.attr, 5839 &format_attr_pc.attr, 5840 &format_attr_inv.attr, 5841 &format_attr_cmask.attr, 5842 NULL, 5843 }; 5844 5845 ssize_t intel_event_sysfs_show(char *page, u64 config) 5846 { 5847 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); 5848 5849 return x86_event_sysfs_show(page, config, event); 5850 } 5851 5852 static struct intel_shared_regs *allocate_shared_regs(int cpu) 5853 { 5854 struct intel_shared_regs *regs; 5855 int i; 5856 5857 regs = kzalloc_node(sizeof(struct intel_shared_regs), 5858 GFP_KERNEL, cpu_to_node(cpu)); 5859 if (regs) { 5860 /* 5861 * initialize the locks to keep lockdep happy 5862 */ 5863 for (i = 0; i < EXTRA_REG_MAX; i++) 5864 raw_spin_lock_init(®s->regs[i].lock); 5865 5866 regs->core_id = -1; 5867 } 5868 return regs; 5869 } 5870 5871 static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) 5872 { 5873 struct intel_excl_cntrs *c; 5874 5875 c = kzalloc_node(sizeof(struct intel_excl_cntrs), 5876 GFP_KERNEL, cpu_to_node(cpu)); 5877 if (c) { 5878 raw_spin_lock_init(&c->lock); 5879 c->core_id = -1; 5880 } 5881 return c; 5882 } 5883 5884 5885 int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) 5886 { 5887 cpuc->pebs_record_size = x86_pmu.pebs_record_size; 5888 5889 if (is_hybrid() || x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { 5890 cpuc->shared_regs = allocate_shared_regs(cpu); 5891 if (!cpuc->shared_regs) 5892 goto err; 5893 } 5894 5895 if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAINT)) { 5896 size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); 5897 5898 cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); 5899 if (!cpuc->constraint_list) 5900 goto err_shared_regs; 5901 } 5902 5903 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { 5904 cpuc->excl_cntrs = allocate_excl_cntrs(cpu); 5905 if (!cpuc->excl_cntrs) 5906 goto err_constraint_list; 5907 5908 cpuc->excl_thread_id = 0; 5909 } 5910 5911 return 0; 5912 5913 err_constraint_list: 5914 kfree(cpuc->constraint_list); 5915 cpuc->constraint_list = NULL; 5916 5917 err_shared_regs: 5918 kfree(cpuc->shared_regs); 5919 cpuc->shared_regs = NULL; 5920 5921 err: 5922 return -ENOMEM; 5923 } 5924 5925 static int intel_pmu_cpu_prepare(int cpu) 5926 { 5927 int ret; 5928 5929 ret = intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu); 5930 if (ret) 5931 return ret; 5932 5933 return alloc_arch_pebs_buf_on_cpu(cpu); 5934 } 5935 5936 static void flip_smm_bit(void *data) 5937 { 5938 unsigned long set = *(unsigned long *)data; 5939 5940 if (set > 0) { 5941 msr_set_bit(MSR_IA32_DEBUGCTLMSR, 5942 DEBUGCTLMSR_FREEZE_IN_SMM_BIT); 5943 } else { 5944 msr_clear_bit(MSR_IA32_DEBUGCTLMSR, 5945 DEBUGCTLMSR_FREEZE_IN_SMM_BIT); 5946 } 5947 } 5948 5949 static void intel_pmu_check_counters_mask(u64 *cntr_mask, 5950 u64 *fixed_cntr_mask, 5951 u64 *intel_ctrl) 5952 { 5953 unsigned int bit; 5954 5955 bit = fls64(*cntr_mask); 5956 if (bit > INTEL_PMC_MAX_GENERIC) { 5957 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 5958 bit, INTEL_PMC_MAX_GENERIC); 5959 *cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); 5960 } 5961 *intel_ctrl = *cntr_mask; 5962 5963 bit = fls64(*fixed_cntr_mask); 5964 if (bit > INTEL_PMC_MAX_FIXED) { 5965 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 5966 bit, INTEL_PMC_MAX_FIXED); 5967 *fixed_cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0); 5968 } 5969 5970 *intel_ctrl |= *fixed_cntr_mask << INTEL_PMC_IDX_FIXED; 5971 } 5972 5973 static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, 5974 u64 cntr_mask, 5975 u64 fixed_cntr_mask, 5976 u64 intel_ctrl); 5977 5978 enum dyn_constr_type { 5979 DYN_CONSTR_NONE, 5980 DYN_CONSTR_BR_CNTR, 5981 DYN_CONSTR_ACR_CNTR, 5982 DYN_CONSTR_ACR_CAUSE, 5983 DYN_CONSTR_PEBS, 5984 DYN_CONSTR_PDIST, 5985 5986 DYN_CONSTR_MAX, 5987 }; 5988 5989 static const char * const dyn_constr_type_name[] = { 5990 [DYN_CONSTR_NONE] = "a normal event", 5991 [DYN_CONSTR_BR_CNTR] = "a branch counter logging event", 5992 [DYN_CONSTR_ACR_CNTR] = "an auto-counter reload event", 5993 [DYN_CONSTR_ACR_CAUSE] = "an auto-counter reload cause event", 5994 [DYN_CONSTR_PEBS] = "a PEBS event", 5995 [DYN_CONSTR_PDIST] = "a PEBS PDIST event", 5996 }; 5997 5998 static void __intel_pmu_check_dyn_constr(struct event_constraint *constr, 5999 enum dyn_constr_type type, u64 mask) 6000 { 6001 struct event_constraint *c1, *c2; 6002 int new_weight, check_weight; 6003 u64 new_mask, check_mask; 6004 6005 for_each_event_constraint(c1, constr) { 6006 new_mask = c1->idxmsk64 & mask; 6007 new_weight = hweight64(new_mask); 6008 6009 /* ignore topdown perf metrics event */ 6010 if (c1->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) 6011 continue; 6012 6013 if (!new_weight && fls64(c1->idxmsk64) < INTEL_PMC_IDX_FIXED) { 6014 pr_info("The event 0x%llx is not supported as %s.\n", 6015 c1->code, dyn_constr_type_name[type]); 6016 } 6017 6018 if (new_weight <= 1) 6019 continue; 6020 6021 for_each_event_constraint(c2, c1 + 1) { 6022 bool check_fail = false; 6023 6024 check_mask = c2->idxmsk64 & mask; 6025 check_weight = hweight64(check_mask); 6026 6027 if (c2->idxmsk64 & INTEL_PMC_MSK_TOPDOWN || 6028 !check_weight) 6029 continue; 6030 6031 /* The same constraints or no overlap */ 6032 if (new_mask == check_mask || 6033 (new_mask ^ check_mask) == (new_mask | check_mask)) 6034 continue; 6035 6036 /* 6037 * A scheduler issue may be triggered in the following cases. 6038 * - Two overlap constraints have the same weight. 6039 * E.g., A constraints: 0x3, B constraints: 0x6 6040 * event counter failure case 6041 * B PMC[2:1] 1 6042 * A PMC[1:0] 0 6043 * A PMC[1:0] FAIL 6044 * - Two overlap constraints have different weight. 6045 * The constraint has a low weight, but has high last bit. 6046 * E.g., A constraints: 0x7, B constraints: 0xC 6047 * event counter failure case 6048 * B PMC[3:2] 2 6049 * A PMC[2:0] 0 6050 * A PMC[2:0] 1 6051 * A PMC[2:0] FAIL 6052 */ 6053 if (new_weight == check_weight) { 6054 check_fail = true; 6055 } else if (new_weight < check_weight) { 6056 if ((new_mask | check_mask) != check_mask && 6057 fls64(new_mask) > fls64(check_mask)) 6058 check_fail = true; 6059 } else { 6060 if ((new_mask | check_mask) != new_mask && 6061 fls64(new_mask) < fls64(check_mask)) 6062 check_fail = true; 6063 } 6064 6065 if (check_fail) { 6066 pr_warn("The two events 0x%llx and 0x%llx may not be " 6067 "fully scheduled under some circumstances as " 6068 "%s.\n", 6069 c1->code, c2->code, dyn_constr_type_name[type]); 6070 } 6071 } 6072 } 6073 } 6074 6075 static void intel_pmu_check_dyn_constr(struct pmu *pmu, 6076 struct event_constraint *constr, 6077 u64 cntr_mask) 6078 { 6079 u64 gp_mask = GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); 6080 enum dyn_constr_type i; 6081 u64 mask; 6082 6083 for (i = DYN_CONSTR_NONE; i < DYN_CONSTR_MAX; i++) { 6084 mask = 0; 6085 switch (i) { 6086 case DYN_CONSTR_NONE: 6087 mask = cntr_mask; 6088 break; 6089 case DYN_CONSTR_BR_CNTR: 6090 if (x86_pmu.flags & PMU_FL_BR_CNTR) 6091 mask = x86_pmu.lbr_counters; 6092 break; 6093 case DYN_CONSTR_ACR_CNTR: 6094 mask = hybrid(pmu, acr_cntr_mask64) & gp_mask; 6095 break; 6096 case DYN_CONSTR_ACR_CAUSE: 6097 if (hybrid(pmu, acr_cntr_mask64) == 6098 hybrid(pmu, acr_cause_mask64)) 6099 continue; 6100 mask = hybrid(pmu, acr_cause_mask64) & gp_mask; 6101 break; 6102 case DYN_CONSTR_PEBS: 6103 if (x86_pmu.arch_pebs) { 6104 mask = hybrid(pmu, arch_pebs_cap).counters & 6105 gp_mask; 6106 } 6107 break; 6108 case DYN_CONSTR_PDIST: 6109 if (x86_pmu.arch_pebs) { 6110 mask = hybrid(pmu, arch_pebs_cap).pdists & 6111 gp_mask; 6112 } 6113 break; 6114 default: 6115 pr_warn("Unsupported dynamic constraint type %d\n", i); 6116 } 6117 6118 if (mask) 6119 __intel_pmu_check_dyn_constr(constr, i, mask); 6120 } 6121 } 6122 6123 static void intel_pmu_check_event_constraints_all(struct pmu *pmu) 6124 { 6125 struct event_constraint *event_constraints = hybrid(pmu, event_constraints); 6126 struct event_constraint *pebs_constraints = hybrid(pmu, pebs_constraints); 6127 u64 cntr_mask = hybrid(pmu, cntr_mask64); 6128 u64 fixed_cntr_mask = hybrid(pmu, fixed_cntr_mask64); 6129 u64 intel_ctrl = hybrid(pmu, intel_ctrl); 6130 6131 intel_pmu_check_event_constraints(event_constraints, cntr_mask, 6132 fixed_cntr_mask, intel_ctrl); 6133 6134 if (event_constraints) 6135 intel_pmu_check_dyn_constr(pmu, event_constraints, cntr_mask); 6136 6137 if (pebs_constraints) 6138 intel_pmu_check_dyn_constr(pmu, pebs_constraints, cntr_mask); 6139 } 6140 6141 static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); 6142 6143 static inline bool intel_pmu_broken_perf_cap(void) 6144 { 6145 /* The Perf Metric (Bit 15) is always cleared */ 6146 if (boot_cpu_data.x86_vfm == INTEL_METEORLAKE || 6147 boot_cpu_data.x86_vfm == INTEL_METEORLAKE_L) 6148 return true; 6149 6150 return false; 6151 } 6152 6153 static inline void __intel_update_pmu_caps(struct pmu *pmu) 6154 { 6155 struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id()); 6156 6157 if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM) 6158 dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; 6159 } 6160 6161 static inline void __intel_update_large_pebs_flags(struct pmu *pmu) 6162 { 6163 u64 caps = hybrid(pmu, arch_pebs_cap).caps; 6164 6165 x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; 6166 if (caps & ARCH_PEBS_LBR) 6167 x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK; 6168 if (caps & ARCH_PEBS_CNTR_MASK) 6169 x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ; 6170 6171 if (!(caps & ARCH_PEBS_AUX)) 6172 x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC; 6173 if (!(caps & ARCH_PEBS_GPR)) { 6174 x86_pmu.large_pebs_flags &= 6175 ~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER); 6176 } 6177 } 6178 6179 #define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED)) 6180 6181 static void update_pmu_cap(struct pmu *pmu) 6182 { 6183 unsigned int eax, ebx, ecx, edx; 6184 union cpuid35_eax eax_0; 6185 union cpuid35_ebx ebx_0; 6186 u64 cntrs_mask = 0; 6187 u64 pebs_mask = 0; 6188 u64 pdists_mask = 0; 6189 6190 cpuid(ARCH_PERFMON_EXT_LEAF, &eax_0.full, &ebx_0.full, &ecx, &edx); 6191 6192 if (ebx_0.split.umask2) 6193 hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2; 6194 if (ebx_0.split.eq) 6195 hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ; 6196 if (ebx_0.split.rdpmc_user_disable) 6197 hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_RDPMC_USER_DISABLE; 6198 6199 if (eax_0.split.cntr_subleaf) { 6200 cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, 6201 &eax, &ebx, &ecx, &edx); 6202 hybrid(pmu, cntr_mask64) = eax; 6203 hybrid(pmu, fixed_cntr_mask64) = ebx; 6204 cntrs_mask = counter_mask(eax, ebx); 6205 } 6206 6207 if (eax_0.split.acr_subleaf) { 6208 cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF, 6209 &eax, &ebx, &ecx, &edx); 6210 /* The mask of the counters which can be reloaded */ 6211 hybrid(pmu, acr_cntr_mask64) = counter_mask(eax, ebx); 6212 /* The mask of the counters which can cause a reload of reloadable counters */ 6213 hybrid(pmu, acr_cause_mask64) = counter_mask(ecx, edx); 6214 } 6215 6216 /* Bits[5:4] should be set simultaneously if arch-PEBS is supported */ 6217 if (eax_0.split.pebs_caps_subleaf && eax_0.split.pebs_cnts_subleaf) { 6218 cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_CAP_LEAF, 6219 &eax, &ebx, &ecx, &edx); 6220 hybrid(pmu, arch_pebs_cap).caps = (u64)ebx << 32; 6221 6222 cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_COUNTER_LEAF, 6223 &eax, &ebx, &ecx, &edx); 6224 pebs_mask = counter_mask(eax, ecx); 6225 pdists_mask = counter_mask(ebx, edx); 6226 hybrid(pmu, arch_pebs_cap).counters = pebs_mask; 6227 hybrid(pmu, arch_pebs_cap).pdists = pdists_mask; 6228 6229 if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) { 6230 x86_pmu.arch_pebs = 0; 6231 } else { 6232 __intel_update_pmu_caps(pmu); 6233 __intel_update_large_pebs_flags(pmu); 6234 } 6235 } else { 6236 WARN_ON(x86_pmu.arch_pebs == 1); 6237 x86_pmu.arch_pebs = 0; 6238 } 6239 6240 if (!intel_pmu_broken_perf_cap()) { 6241 /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ 6242 rdmsrq(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities); 6243 } 6244 } 6245 6246 static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) 6247 { 6248 intel_pmu_check_counters_mask(&pmu->cntr_mask64, &pmu->fixed_cntr_mask64, 6249 &pmu->intel_ctrl); 6250 pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64); 6251 pmu->unconstrained = (struct event_constraint) 6252 __EVENT_CONSTRAINT(0, pmu->cntr_mask64, 6253 0, x86_pmu_num_counters(&pmu->pmu), 0, 0); 6254 6255 if (pmu->intel_cap.perf_metrics) 6256 pmu->intel_ctrl |= GLOBAL_CTRL_EN_PERF_METRICS; 6257 else 6258 pmu->intel_ctrl &= ~GLOBAL_CTRL_EN_PERF_METRICS; 6259 6260 pmu->pmu.capabilities |= PERF_PMU_CAP_MEDIATED_VPMU; 6261 6262 intel_pmu_check_event_constraints_all(&pmu->pmu); 6263 6264 intel_pmu_check_extra_regs(pmu->extra_regs); 6265 } 6266 6267 static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) 6268 { 6269 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); 6270 enum intel_cpu_type cpu_type = c->topo.intel_type; 6271 int i; 6272 6273 /* 6274 * This is running on a CPU model that is known to have hybrid 6275 * configurations. But the CPU told us it is not hybrid, shame 6276 * on it. There should be a fixup function provided for these 6277 * troublesome CPUs (->get_hybrid_cpu_type). 6278 */ 6279 if (cpu_type == INTEL_CPU_TYPE_UNKNOWN) { 6280 if (x86_pmu.get_hybrid_cpu_type) 6281 cpu_type = x86_pmu.get_hybrid_cpu_type(); 6282 else 6283 return NULL; 6284 } 6285 6286 /* 6287 * This essentially just maps between the 'hybrid_cpu_type' 6288 * and 'hybrid_pmu_type' enums except for ARL-H processor 6289 * which needs to compare atom uarch native id since ARL-H 6290 * contains two different atom uarchs. 6291 */ 6292 for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { 6293 enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; 6294 u32 native_id; 6295 6296 if (cpu_type == INTEL_CPU_TYPE_CORE && pmu_type == hybrid_big) 6297 return &x86_pmu.hybrid_pmu[i]; 6298 if (cpu_type == INTEL_CPU_TYPE_ATOM) { 6299 if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small) 6300 return &x86_pmu.hybrid_pmu[i]; 6301 6302 native_id = c->topo.intel_native_model_id; 6303 if (native_id == INTEL_ATOM_SKT_NATIVE_ID && pmu_type == hybrid_small) 6304 return &x86_pmu.hybrid_pmu[i]; 6305 if (native_id == INTEL_ATOM_CMT_NATIVE_ID && pmu_type == hybrid_tiny) 6306 return &x86_pmu.hybrid_pmu[i]; 6307 } 6308 } 6309 6310 return NULL; 6311 } 6312 6313 static bool init_hybrid_pmu(int cpu) 6314 { 6315 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 6316 struct x86_hybrid_pmu *pmu = find_hybrid_pmu_for_cpu(); 6317 6318 if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) { 6319 cpuc->pmu = NULL; 6320 return false; 6321 } 6322 6323 /* Only check and dump the PMU information for the first CPU */ 6324 if (!cpumask_empty(&pmu->supported_cpus)) 6325 goto end; 6326 6327 if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) 6328 update_pmu_cap(&pmu->pmu); 6329 6330 intel_pmu_check_hybrid_pmus(pmu); 6331 6332 if (!check_hw_exists(&pmu->pmu, pmu->cntr_mask, pmu->fixed_cntr_mask)) 6333 return false; 6334 6335 pr_info("%s PMU driver: ", pmu->name); 6336 6337 pr_cont("\n"); 6338 6339 x86_pmu_show_pmu_cap(&pmu->pmu); 6340 6341 end: 6342 cpumask_set_cpu(cpu, &pmu->supported_cpus); 6343 cpuc->pmu = &pmu->pmu; 6344 6345 return true; 6346 } 6347 6348 static void intel_pmu_cpu_starting(int cpu) 6349 { 6350 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 6351 int core_id = topology_core_id(cpu); 6352 int i; 6353 6354 if (is_hybrid() && !init_hybrid_pmu(cpu)) 6355 return; 6356 6357 init_debug_store_on_cpu(cpu); 6358 init_arch_pebs_on_cpu(cpu); 6359 /* 6360 * Deal with CPUs that don't clear their LBRs on power-up, and that may 6361 * even boot with LBRs enabled. 6362 */ 6363 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr) 6364 msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT); 6365 intel_pmu_lbr_reset(); 6366 6367 cpuc->lbr_sel = NULL; 6368 6369 if (x86_pmu.flags & PMU_FL_TFA) { 6370 WARN_ON_ONCE(cpuc->tfa_shadow); 6371 cpuc->tfa_shadow = ~0ULL; 6372 intel_set_tfa(cpuc, false); 6373 } 6374 6375 if (x86_pmu.version > 1) 6376 flip_smm_bit(&x86_pmu.attr_freeze_on_smi); 6377 6378 /* 6379 * Disable perf metrics if any added CPU doesn't support it. 6380 * 6381 * Turn off the check for a hybrid architecture, because the 6382 * architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicate 6383 * the architecture features. The perf metrics is a model-specific 6384 * feature for now. The corresponding bit should always be 0 on 6385 * a hybrid platform, e.g., Alder Lake. 6386 */ 6387 if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) { 6388 union perf_capabilities perf_cap; 6389 6390 rdmsrq(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities); 6391 if (!perf_cap.perf_metrics) { 6392 x86_pmu.intel_cap.perf_metrics = 0; 6393 x86_pmu.intel_ctrl &= ~GLOBAL_CTRL_EN_PERF_METRICS; 6394 } 6395 } 6396 6397 __intel_update_pmu_caps(cpuc->pmu); 6398 6399 if (!cpuc->shared_regs) 6400 return; 6401 6402 if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { 6403 for_each_cpu(i, topology_sibling_cpumask(cpu)) { 6404 struct intel_shared_regs *pc; 6405 6406 pc = per_cpu(cpu_hw_events, i).shared_regs; 6407 if (pc && pc->core_id == core_id) { 6408 cpuc->kfree_on_online[0] = cpuc->shared_regs; 6409 cpuc->shared_regs = pc; 6410 break; 6411 } 6412 } 6413 cpuc->shared_regs->core_id = core_id; 6414 cpuc->shared_regs->refcnt++; 6415 } 6416 6417 if (x86_pmu.lbr_sel_map) 6418 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; 6419 6420 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { 6421 for_each_cpu(i, topology_sibling_cpumask(cpu)) { 6422 struct cpu_hw_events *sibling; 6423 struct intel_excl_cntrs *c; 6424 6425 sibling = &per_cpu(cpu_hw_events, i); 6426 c = sibling->excl_cntrs; 6427 if (c && c->core_id == core_id) { 6428 cpuc->kfree_on_online[1] = cpuc->excl_cntrs; 6429 cpuc->excl_cntrs = c; 6430 if (!sibling->excl_thread_id) 6431 cpuc->excl_thread_id = 1; 6432 break; 6433 } 6434 } 6435 cpuc->excl_cntrs->core_id = core_id; 6436 cpuc->excl_cntrs->refcnt++; 6437 } 6438 } 6439 6440 static void free_excl_cntrs(struct cpu_hw_events *cpuc) 6441 { 6442 struct intel_excl_cntrs *c; 6443 6444 c = cpuc->excl_cntrs; 6445 if (c) { 6446 if (c->core_id == -1 || --c->refcnt == 0) 6447 kfree(c); 6448 cpuc->excl_cntrs = NULL; 6449 } 6450 6451 kfree(cpuc->constraint_list); 6452 cpuc->constraint_list = NULL; 6453 } 6454 6455 static void intel_pmu_cpu_dying(int cpu) 6456 { 6457 fini_debug_store_on_cpu(cpu); 6458 fini_arch_pebs_on_cpu(cpu); 6459 } 6460 6461 void intel_cpuc_finish(struct cpu_hw_events *cpuc) 6462 { 6463 struct intel_shared_regs *pc; 6464 6465 pc = cpuc->shared_regs; 6466 if (pc) { 6467 if (pc->core_id == -1 || --pc->refcnt == 0) 6468 kfree(pc); 6469 cpuc->shared_regs = NULL; 6470 } 6471 6472 free_excl_cntrs(cpuc); 6473 } 6474 6475 static void intel_pmu_cpu_dead(int cpu) 6476 { 6477 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 6478 6479 release_arch_pebs_buf_on_cpu(cpu); 6480 intel_cpuc_finish(cpuc); 6481 6482 if (is_hybrid() && cpuc->pmu) 6483 cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus); 6484 } 6485 6486 static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, 6487 struct task_struct *task, bool sched_in) 6488 { 6489 intel_pmu_pebs_sched_task(pmu_ctx, sched_in); 6490 intel_pmu_lbr_sched_task(pmu_ctx, task, sched_in); 6491 } 6492 6493 static int intel_pmu_check_period(struct perf_event *event, u64 value) 6494 { 6495 return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0; 6496 } 6497 6498 static void intel_aux_output_init(void) 6499 { 6500 /* Refer also intel_pmu_aux_output_match() */ 6501 if (x86_pmu.intel_cap.pebs_output_pt_available) 6502 x86_pmu.assign = intel_pmu_assign_event; 6503 } 6504 6505 static int intel_pmu_aux_output_match(struct perf_event *event) 6506 { 6507 /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */ 6508 if (!x86_pmu.intel_cap.pebs_output_pt_available) 6509 return 0; 6510 6511 return is_intel_pt_event(event); 6512 } 6513 6514 static void intel_pmu_filter(struct pmu *pmu, int cpu, bool *ret) 6515 { 6516 struct x86_hybrid_pmu *hpmu = hybrid_pmu(pmu); 6517 6518 *ret = !cpumask_test_cpu(cpu, &hpmu->supported_cpus); 6519 } 6520 6521 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); 6522 6523 PMU_FORMAT_ATTR(ldlat, "config1:0-15"); 6524 6525 PMU_FORMAT_ATTR(frontend, "config1:0-23"); 6526 6527 PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63"); 6528 6529 static struct attribute *intel_arch3_formats_attr[] = { 6530 &format_attr_event.attr, 6531 &format_attr_umask.attr, 6532 &format_attr_edge.attr, 6533 &format_attr_pc.attr, 6534 &format_attr_any.attr, 6535 &format_attr_inv.attr, 6536 &format_attr_cmask.attr, 6537 NULL, 6538 }; 6539 6540 static struct attribute *hsw_format_attr[] = { 6541 &format_attr_in_tx.attr, 6542 &format_attr_in_tx_cp.attr, 6543 &format_attr_offcore_rsp.attr, 6544 &format_attr_ldlat.attr, 6545 NULL 6546 }; 6547 6548 static struct attribute *nhm_format_attr[] = { 6549 &format_attr_offcore_rsp.attr, 6550 &format_attr_ldlat.attr, 6551 NULL 6552 }; 6553 6554 static struct attribute *slm_format_attr[] = { 6555 &format_attr_offcore_rsp.attr, 6556 NULL 6557 }; 6558 6559 static struct attribute *cmt_format_attr[] = { 6560 &format_attr_offcore_rsp.attr, 6561 &format_attr_ldlat.attr, 6562 &format_attr_snoop_rsp.attr, 6563 NULL 6564 }; 6565 6566 static struct attribute *skl_format_attr[] = { 6567 &format_attr_frontend.attr, 6568 NULL, 6569 }; 6570 6571 static __initconst const struct x86_pmu core_pmu = { 6572 .name = "core", 6573 .handle_irq = x86_pmu_handle_irq, 6574 .disable_all = x86_pmu_disable_all, 6575 .enable_all = core_pmu_enable_all, 6576 .enable = core_pmu_enable_event, 6577 .disable = x86_pmu_disable_event, 6578 .hw_config = core_pmu_hw_config, 6579 .schedule_events = x86_schedule_events, 6580 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 6581 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 6582 .fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0, 6583 .event_map = intel_pmu_event_map, 6584 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 6585 .apic = 1, 6586 .large_pebs_flags = LARGE_PEBS_FLAGS, 6587 6588 /* 6589 * Intel PMCs cannot be accessed sanely above 32-bit width, 6590 * so we install an artificial 1<<31 period regardless of 6591 * the generic event period: 6592 */ 6593 .max_period = (1ULL<<31) - 1, 6594 .get_event_constraints = intel_get_event_constraints, 6595 .put_event_constraints = intel_put_event_constraints, 6596 .event_constraints = intel_core_event_constraints, 6597 .guest_get_msrs = core_guest_get_msrs, 6598 .format_attrs = intel_arch_formats_attr, 6599 .events_sysfs_show = intel_event_sysfs_show, 6600 6601 /* 6602 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs 6603 * together with PMU version 1 and thus be using core_pmu with 6604 * shared_regs. We need following callbacks here to allocate 6605 * it properly. 6606 */ 6607 .cpu_prepare = intel_pmu_cpu_prepare, 6608 .cpu_starting = intel_pmu_cpu_starting, 6609 .cpu_dying = intel_pmu_cpu_dying, 6610 .cpu_dead = intel_pmu_cpu_dead, 6611 6612 .check_period = intel_pmu_check_period, 6613 6614 .lbr_reset = intel_pmu_lbr_reset_64, 6615 .lbr_read = intel_pmu_lbr_read_64, 6616 .lbr_save = intel_pmu_lbr_save, 6617 .lbr_restore = intel_pmu_lbr_restore, 6618 }; 6619 6620 static __initconst const struct x86_pmu intel_pmu = { 6621 .name = "Intel", 6622 .handle_irq = intel_pmu_handle_irq, 6623 .disable_all = intel_pmu_disable_all, 6624 .enable_all = intel_pmu_enable_all, 6625 .enable = intel_pmu_enable_event, 6626 .disable = intel_pmu_disable_event, 6627 .add = intel_pmu_add_event, 6628 .del = intel_pmu_del_event, 6629 .read = intel_pmu_read_event, 6630 .set_period = intel_pmu_set_period, 6631 .update = intel_pmu_update, 6632 .hw_config = intel_pmu_hw_config, 6633 .schedule_events = x86_schedule_events, 6634 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 6635 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 6636 .fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0, 6637 .event_map = intel_pmu_event_map, 6638 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 6639 .apic = 1, 6640 .large_pebs_flags = LARGE_PEBS_FLAGS, 6641 /* 6642 * Intel PMCs cannot be accessed sanely above 32 bit width, 6643 * so we install an artificial 1<<31 period regardless of 6644 * the generic event period: 6645 */ 6646 .max_period = (1ULL << 31) - 1, 6647 .get_event_constraints = intel_get_event_constraints, 6648 .put_event_constraints = intel_put_event_constraints, 6649 .pebs_aliases = intel_pebs_aliases_core2, 6650 6651 .format_attrs = intel_arch3_formats_attr, 6652 .events_sysfs_show = intel_event_sysfs_show, 6653 6654 .cpu_prepare = intel_pmu_cpu_prepare, 6655 .cpu_starting = intel_pmu_cpu_starting, 6656 .cpu_dying = intel_pmu_cpu_dying, 6657 .cpu_dead = intel_pmu_cpu_dead, 6658 6659 .guest_get_msrs = intel_guest_get_msrs, 6660 .sched_task = intel_pmu_sched_task, 6661 6662 .check_period = intel_pmu_check_period, 6663 6664 .aux_output_match = intel_pmu_aux_output_match, 6665 6666 .lbr_reset = intel_pmu_lbr_reset_64, 6667 .lbr_read = intel_pmu_lbr_read_64, 6668 .lbr_save = intel_pmu_lbr_save, 6669 .lbr_restore = intel_pmu_lbr_restore, 6670 6671 /* 6672 * SMM has access to all 4 rings and while traditionally SMM code only 6673 * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM. 6674 * 6675 * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction 6676 * between SMM or not, this results in what should be pure userspace 6677 * counters including SMM data. 6678 * 6679 * This is a clear privilege issue, therefore globally disable 6680 * counting SMM by default. 6681 */ 6682 .attr_freeze_on_smi = 1, 6683 }; 6684 6685 static __init void intel_clovertown_quirk(void) 6686 { 6687 /* 6688 * PEBS is unreliable due to: 6689 * 6690 * AJ67 - PEBS may experience CPL leaks 6691 * AJ68 - PEBS PMI may be delayed by one event 6692 * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] 6693 * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS 6694 * 6695 * AJ67 could be worked around by restricting the OS/USR flags. 6696 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. 6697 * 6698 * AJ106 could possibly be worked around by not allowing LBR 6699 * usage from PEBS, including the fixup. 6700 * AJ68 could possibly be worked around by always programming 6701 * a pebs_event_reset[0] value and coping with the lost events. 6702 * 6703 * But taken together it might just make sense to not enable PEBS on 6704 * these chips. 6705 */ 6706 pr_warn("PEBS disabled due to CPU errata\n"); 6707 x86_pmu.ds_pebs = 0; 6708 x86_pmu.pebs_constraints = NULL; 6709 } 6710 6711 static const struct x86_cpu_id isolation_ucodes[] = { 6712 X86_MATCH_VFM_STEPS(INTEL_HASWELL, 3, 3, 0x0000001f), 6713 X86_MATCH_VFM_STEPS(INTEL_HASWELL_L, 1, 1, 0x0000001e), 6714 X86_MATCH_VFM_STEPS(INTEL_HASWELL_G, 1, 1, 0x00000015), 6715 X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 2, 2, 0x00000037), 6716 X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 4, 4, 0x0000000a), 6717 X86_MATCH_VFM_STEPS(INTEL_BROADWELL, 4, 4, 0x00000023), 6718 X86_MATCH_VFM_STEPS(INTEL_BROADWELL_G, 1, 1, 0x00000014), 6719 X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 2, 2, 0x00000010), 6720 X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 3, 3, 0x07000009), 6721 X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 4, 4, 0x0f000009), 6722 X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 5, 5, 0x0e000002), 6723 X86_MATCH_VFM_STEPS(INTEL_BROADWELL_X, 1, 1, 0x0b000014), 6724 X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 3, 3, 0x00000021), 6725 X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 4, 7, 0x00000000), 6726 X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 11, 11, 0x00000000), 6727 X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_L, 3, 3, 0x0000007c), 6728 X86_MATCH_VFM_STEPS(INTEL_SKYLAKE, 3, 3, 0x0000007c), 6729 X86_MATCH_VFM_STEPS(INTEL_KABYLAKE, 9, 13, 0x0000004e), 6730 X86_MATCH_VFM_STEPS(INTEL_KABYLAKE_L, 9, 12, 0x0000004e), 6731 {} 6732 }; 6733 6734 static void intel_check_pebs_isolation(void) 6735 { 6736 x86_pmu.pebs_no_isolation = !x86_match_min_microcode_rev(isolation_ucodes); 6737 } 6738 6739 static __init void intel_pebs_isolation_quirk(void) 6740 { 6741 WARN_ON_ONCE(x86_pmu.check_microcode); 6742 x86_pmu.check_microcode = intel_check_pebs_isolation; 6743 intel_check_pebs_isolation(); 6744 } 6745 6746 static const struct x86_cpu_id pebs_ucodes[] = { 6747 X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE, 7, 7, 0x00000028), 6748 X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 6, 6, 0x00000618), 6749 X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 7, 7, 0x0000070c), 6750 {} 6751 }; 6752 6753 static bool intel_snb_pebs_broken(void) 6754 { 6755 return !x86_match_min_microcode_rev(pebs_ucodes); 6756 } 6757 6758 static void intel_snb_check_microcode(void) 6759 { 6760 if (intel_snb_pebs_broken() == x86_pmu.pebs_broken) 6761 return; 6762 6763 /* 6764 * Serialized by the microcode lock.. 6765 */ 6766 if (x86_pmu.pebs_broken) { 6767 pr_info("PEBS enabled due to microcode update\n"); 6768 x86_pmu.pebs_broken = 0; 6769 } else { 6770 pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); 6771 x86_pmu.pebs_broken = 1; 6772 } 6773 } 6774 6775 static bool is_lbr_from(unsigned long msr) 6776 { 6777 unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr; 6778 6779 return x86_pmu.lbr_from <= msr && msr < lbr_from_nr; 6780 } 6781 6782 /* 6783 * Under certain circumstances, access certain MSR may cause #GP. 6784 * The function tests if the input MSR can be safely accessed. 6785 */ 6786 static bool check_msr(unsigned long msr, u64 mask) 6787 { 6788 u64 val_old, val_new, val_tmp; 6789 6790 /* 6791 * Disable the check for real HW, so we don't 6792 * mess with potentially enabled registers: 6793 */ 6794 if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) 6795 return true; 6796 6797 /* 6798 * Read the current value, change it and read it back to see if it 6799 * matches, this is needed to detect certain hardware emulators 6800 * (qemu/kvm) that don't trap on the MSR access and always return 0s. 6801 */ 6802 if (rdmsrq_safe(msr, &val_old)) 6803 return false; 6804 6805 /* 6806 * Only change the bits which can be updated by wrmsrq. 6807 */ 6808 val_tmp = val_old ^ mask; 6809 6810 if (is_lbr_from(msr)) 6811 val_tmp = lbr_from_signext_quirk_wr(val_tmp); 6812 6813 if (wrmsrq_safe(msr, val_tmp) || 6814 rdmsrq_safe(msr, &val_new)) 6815 return false; 6816 6817 /* 6818 * Quirk only affects validation in wrmsr(), so wrmsrq()'s value 6819 * should equal rdmsrq()'s even with the quirk. 6820 */ 6821 if (val_new != val_tmp) 6822 return false; 6823 6824 if (is_lbr_from(msr)) 6825 val_old = lbr_from_signext_quirk_wr(val_old); 6826 6827 /* Here it's sure that the MSR can be safely accessed. 6828 * Restore the old value and return. 6829 */ 6830 wrmsrq(msr, val_old); 6831 6832 return true; 6833 } 6834 6835 static __init void intel_sandybridge_quirk(void) 6836 { 6837 x86_pmu.check_microcode = intel_snb_check_microcode; 6838 cpus_read_lock(); 6839 intel_snb_check_microcode(); 6840 cpus_read_unlock(); 6841 } 6842 6843 static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { 6844 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, 6845 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, 6846 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, 6847 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, 6848 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, 6849 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, 6850 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, 6851 }; 6852 6853 static __init void intel_arch_events_quirk(void) 6854 { 6855 int bit; 6856 6857 /* disable event that reported as not present by cpuid */ 6858 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { 6859 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; 6860 pr_warn("CPUID marked event: \'%s\' unavailable\n", 6861 intel_arch_events_map[bit].name); 6862 } 6863 } 6864 6865 static __init void intel_nehalem_quirk(void) 6866 { 6867 union cpuid10_ebx ebx; 6868 6869 ebx.full = x86_pmu.events_maskl; 6870 if (ebx.split.no_branch_misses_retired) { 6871 /* 6872 * Erratum AAJ80 detected, we work it around by using 6873 * the BR_MISP_EXEC.ANY event. This will over-count 6874 * branch-misses, but it's still much better than the 6875 * architectural event which is often completely bogus: 6876 */ 6877 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; 6878 ebx.split.no_branch_misses_retired = 0; 6879 x86_pmu.events_maskl = ebx.full; 6880 pr_info("CPU erratum AAJ80 worked around\n"); 6881 } 6882 } 6883 6884 /* 6885 * enable software workaround for errata: 6886 * SNB: BJ122 6887 * IVB: BV98 6888 * HSW: HSD29 6889 * 6890 * Only needed when HT is enabled. However detecting 6891 * if HT is enabled is difficult (model specific). So instead, 6892 * we enable the workaround in the early boot, and verify if 6893 * it is needed in a later initcall phase once we have valid 6894 * topology information to check if HT is actually enabled 6895 */ 6896 static __init void intel_ht_bug(void) 6897 { 6898 x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; 6899 6900 x86_pmu.start_scheduling = intel_start_scheduling; 6901 x86_pmu.commit_scheduling = intel_commit_scheduling; 6902 x86_pmu.stop_scheduling = intel_stop_scheduling; 6903 } 6904 6905 EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); 6906 EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") 6907 6908 /* Haswell special events */ 6909 EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); 6910 EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); 6911 EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); 6912 EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); 6913 EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); 6914 EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); 6915 EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); 6916 EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); 6917 EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); 6918 EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); 6919 EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); 6920 EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); 6921 6922 static struct attribute *hsw_events_attrs[] = { 6923 EVENT_PTR(td_slots_issued), 6924 EVENT_PTR(td_slots_retired), 6925 EVENT_PTR(td_fetch_bubbles), 6926 EVENT_PTR(td_total_slots), 6927 EVENT_PTR(td_total_slots_scale), 6928 EVENT_PTR(td_recovery_bubbles), 6929 EVENT_PTR(td_recovery_bubbles_scale), 6930 NULL 6931 }; 6932 6933 static struct attribute *hsw_mem_events_attrs[] = { 6934 EVENT_PTR(mem_ld_hsw), 6935 EVENT_PTR(mem_st_hsw), 6936 NULL, 6937 }; 6938 6939 static struct attribute *hsw_tsx_events_attrs[] = { 6940 EVENT_PTR(tx_start), 6941 EVENT_PTR(tx_commit), 6942 EVENT_PTR(tx_abort), 6943 EVENT_PTR(tx_capacity), 6944 EVENT_PTR(tx_conflict), 6945 EVENT_PTR(el_start), 6946 EVENT_PTR(el_commit), 6947 EVENT_PTR(el_abort), 6948 EVENT_PTR(el_capacity), 6949 EVENT_PTR(el_conflict), 6950 EVENT_PTR(cycles_t), 6951 EVENT_PTR(cycles_ct), 6952 NULL 6953 }; 6954 6955 EVENT_ATTR_STR(tx-capacity-read, tx_capacity_read, "event=0x54,umask=0x80"); 6956 EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2"); 6957 EVENT_ATTR_STR(el-capacity-read, el_capacity_read, "event=0x54,umask=0x80"); 6958 EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2"); 6959 6960 static struct attribute *icl_events_attrs[] = { 6961 EVENT_PTR(mem_ld_hsw), 6962 EVENT_PTR(mem_st_hsw), 6963 NULL, 6964 }; 6965 6966 static struct attribute *icl_td_events_attrs[] = { 6967 EVENT_PTR(slots), 6968 EVENT_PTR(td_retiring), 6969 EVENT_PTR(td_bad_spec), 6970 EVENT_PTR(td_fe_bound), 6971 EVENT_PTR(td_be_bound), 6972 NULL, 6973 }; 6974 6975 static struct attribute *icl_tsx_events_attrs[] = { 6976 EVENT_PTR(tx_start), 6977 EVENT_PTR(tx_abort), 6978 EVENT_PTR(tx_commit), 6979 EVENT_PTR(tx_capacity_read), 6980 EVENT_PTR(tx_capacity_write), 6981 EVENT_PTR(tx_conflict), 6982 EVENT_PTR(el_start), 6983 EVENT_PTR(el_abort), 6984 EVENT_PTR(el_commit), 6985 EVENT_PTR(el_capacity_read), 6986 EVENT_PTR(el_capacity_write), 6987 EVENT_PTR(el_conflict), 6988 EVENT_PTR(cycles_t), 6989 EVENT_PTR(cycles_ct), 6990 NULL, 6991 }; 6992 6993 6994 EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2"); 6995 EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82"); 6996 6997 static struct attribute *glc_events_attrs[] = { 6998 EVENT_PTR(mem_ld_hsw), 6999 EVENT_PTR(mem_st_spr), 7000 EVENT_PTR(mem_ld_aux), 7001 NULL, 7002 }; 7003 7004 static struct attribute *glc_td_events_attrs[] = { 7005 EVENT_PTR(slots), 7006 EVENT_PTR(td_retiring), 7007 EVENT_PTR(td_bad_spec), 7008 EVENT_PTR(td_fe_bound), 7009 EVENT_PTR(td_be_bound), 7010 EVENT_PTR(td_heavy_ops), 7011 EVENT_PTR(td_br_mispredict), 7012 EVENT_PTR(td_fetch_lat), 7013 EVENT_PTR(td_mem_bound), 7014 NULL, 7015 }; 7016 7017 static struct attribute *glc_tsx_events_attrs[] = { 7018 EVENT_PTR(tx_start), 7019 EVENT_PTR(tx_abort), 7020 EVENT_PTR(tx_commit), 7021 EVENT_PTR(tx_capacity_read), 7022 EVENT_PTR(tx_capacity_write), 7023 EVENT_PTR(tx_conflict), 7024 EVENT_PTR(cycles_t), 7025 EVENT_PTR(cycles_ct), 7026 NULL, 7027 }; 7028 7029 static ssize_t freeze_on_smi_show(struct device *cdev, 7030 struct device_attribute *attr, 7031 char *buf) 7032 { 7033 return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi); 7034 } 7035 7036 static DEFINE_MUTEX(freeze_on_smi_mutex); 7037 7038 static ssize_t freeze_on_smi_store(struct device *cdev, 7039 struct device_attribute *attr, 7040 const char *buf, size_t count) 7041 { 7042 unsigned long val; 7043 ssize_t ret; 7044 7045 ret = kstrtoul(buf, 0, &val); 7046 if (ret) 7047 return ret; 7048 7049 if (val > 1) 7050 return -EINVAL; 7051 7052 mutex_lock(&freeze_on_smi_mutex); 7053 7054 if (x86_pmu.attr_freeze_on_smi == val) 7055 goto done; 7056 7057 x86_pmu.attr_freeze_on_smi = val; 7058 7059 cpus_read_lock(); 7060 on_each_cpu(flip_smm_bit, &val, 1); 7061 cpus_read_unlock(); 7062 done: 7063 mutex_unlock(&freeze_on_smi_mutex); 7064 7065 return count; 7066 } 7067 7068 static void update_tfa_sched(void *ignored) 7069 { 7070 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 7071 7072 /* 7073 * check if PMC3 is used 7074 * and if so force schedule out for all event types all contexts 7075 */ 7076 if (test_bit(3, cpuc->active_mask)) 7077 perf_pmu_resched(x86_get_pmu(smp_processor_id())); 7078 } 7079 7080 static ssize_t show_sysctl_tfa(struct device *cdev, 7081 struct device_attribute *attr, 7082 char *buf) 7083 { 7084 return snprintf(buf, 40, "%d\n", allow_tsx_force_abort); 7085 } 7086 7087 static ssize_t set_sysctl_tfa(struct device *cdev, 7088 struct device_attribute *attr, 7089 const char *buf, size_t count) 7090 { 7091 bool val; 7092 ssize_t ret; 7093 7094 ret = kstrtobool(buf, &val); 7095 if (ret) 7096 return ret; 7097 7098 /* no change */ 7099 if (val == allow_tsx_force_abort) 7100 return count; 7101 7102 allow_tsx_force_abort = val; 7103 7104 cpus_read_lock(); 7105 on_each_cpu(update_tfa_sched, NULL, 1); 7106 cpus_read_unlock(); 7107 7108 return count; 7109 } 7110 7111 7112 static DEVICE_ATTR_RW(freeze_on_smi); 7113 7114 static ssize_t branches_show(struct device *cdev, 7115 struct device_attribute *attr, 7116 char *buf) 7117 { 7118 return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr); 7119 } 7120 7121 static DEVICE_ATTR_RO(branches); 7122 7123 static ssize_t branch_counter_nr_show(struct device *cdev, 7124 struct device_attribute *attr, 7125 char *buf) 7126 { 7127 return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters)); 7128 } 7129 7130 static DEVICE_ATTR_RO(branch_counter_nr); 7131 7132 static ssize_t branch_counter_width_show(struct device *cdev, 7133 struct device_attribute *attr, 7134 char *buf) 7135 { 7136 return snprintf(buf, PAGE_SIZE, "%d\n", LBR_INFO_BR_CNTR_BITS); 7137 } 7138 7139 static DEVICE_ATTR_RO(branch_counter_width); 7140 7141 static struct attribute *lbr_attrs[] = { 7142 &dev_attr_branches.attr, 7143 &dev_attr_branch_counter_nr.attr, 7144 &dev_attr_branch_counter_width.attr, 7145 NULL 7146 }; 7147 7148 static umode_t 7149 lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) 7150 { 7151 /* branches */ 7152 if (i == 0) 7153 return x86_pmu.lbr_nr ? attr->mode : 0; 7154 7155 return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0; 7156 } 7157 7158 static char pmu_name_str[30]; 7159 7160 static DEVICE_STRING_ATTR_RO(pmu_name, 0444, pmu_name_str); 7161 7162 static struct attribute *intel_pmu_caps_attrs[] = { 7163 &dev_attr_pmu_name.attr.attr, 7164 NULL 7165 }; 7166 7167 static DEVICE_ATTR(allow_tsx_force_abort, 0644, 7168 show_sysctl_tfa, 7169 set_sysctl_tfa); 7170 7171 static struct attribute *intel_pmu_attrs[] = { 7172 &dev_attr_freeze_on_smi.attr, 7173 &dev_attr_allow_tsx_force_abort.attr, 7174 NULL, 7175 }; 7176 7177 static umode_t 7178 default_is_visible(struct kobject *kobj, struct attribute *attr, int i) 7179 { 7180 if (attr == &dev_attr_allow_tsx_force_abort.attr) 7181 return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; 7182 7183 return attr->mode; 7184 } 7185 7186 static umode_t 7187 tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) 7188 { 7189 return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0; 7190 } 7191 7192 static umode_t 7193 pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) 7194 { 7195 return intel_pmu_has_pebs() ? attr->mode : 0; 7196 } 7197 7198 static umode_t 7199 mem_is_visible(struct kobject *kobj, struct attribute *attr, int i) 7200 { 7201 if (attr == &event_attr_mem_ld_aux.attr.attr) 7202 return x86_pmu.flags & PMU_FL_MEM_LOADS_AUX ? attr->mode : 0; 7203 7204 return pebs_is_visible(kobj, attr, i); 7205 } 7206 7207 static umode_t 7208 exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) 7209 { 7210 return x86_pmu.version >= 2 ? attr->mode : 0; 7211 } 7212 7213 static umode_t 7214 td_is_visible(struct kobject *kobj, struct attribute *attr, int i) 7215 { 7216 /* 7217 * Hide the perf metrics topdown events 7218 * if the feature is not enumerated. 7219 */ 7220 if (x86_pmu.num_topdown_events) 7221 return x86_pmu.intel_cap.perf_metrics ? attr->mode : 0; 7222 7223 return attr->mode; 7224 } 7225 7226 PMU_FORMAT_ATTR(acr_mask, "config2:0-63"); 7227 7228 static struct attribute *format_acr_attrs[] = { 7229 &format_attr_acr_mask.attr, 7230 NULL 7231 }; 7232 7233 static umode_t 7234 acr_is_visible(struct kobject *kobj, struct attribute *attr, int i) 7235 { 7236 struct device *dev = kobj_to_dev(kobj); 7237 7238 return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0; 7239 } 7240 7241 static struct attribute_group group_events_td = { 7242 .name = "events", 7243 .is_visible = td_is_visible, 7244 }; 7245 7246 static struct attribute_group group_events_mem = { 7247 .name = "events", 7248 .is_visible = mem_is_visible, 7249 }; 7250 7251 static struct attribute_group group_events_tsx = { 7252 .name = "events", 7253 .is_visible = tsx_is_visible, 7254 }; 7255 7256 static struct attribute_group group_caps_gen = { 7257 .name = "caps", 7258 .attrs = intel_pmu_caps_attrs, 7259 }; 7260 7261 static struct attribute_group group_caps_lbr = { 7262 .name = "caps", 7263 .attrs = lbr_attrs, 7264 .is_visible = lbr_is_visible, 7265 }; 7266 7267 static struct attribute_group group_format_extra = { 7268 .name = "format", 7269 .is_visible = exra_is_visible, 7270 }; 7271 7272 static struct attribute_group group_format_extra_skl = { 7273 .name = "format", 7274 .is_visible = exra_is_visible, 7275 }; 7276 7277 static struct attribute_group group_format_evtsel_ext = { 7278 .name = "format", 7279 .attrs = format_evtsel_ext_attrs, 7280 .is_visible = evtsel_ext_is_visible, 7281 }; 7282 7283 static struct attribute_group group_format_acr = { 7284 .name = "format", 7285 .attrs = format_acr_attrs, 7286 .is_visible = acr_is_visible, 7287 }; 7288 7289 static struct attribute_group group_default = { 7290 .attrs = intel_pmu_attrs, 7291 .is_visible = default_is_visible, 7292 }; 7293 7294 static const struct attribute_group *attr_update[] = { 7295 &group_events_td, 7296 &group_events_mem, 7297 &group_events_tsx, 7298 &group_caps_gen, 7299 &group_caps_lbr, 7300 &group_format_extra, 7301 &group_format_extra_skl, 7302 &group_format_evtsel_ext, 7303 &group_format_acr, 7304 &group_default, 7305 NULL, 7306 }; 7307 7308 EVENT_ATTR_STR_HYBRID(slots, slots_adl, "event=0x00,umask=0x4", hybrid_big); 7309 EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_adl, "event=0xc2,umask=0x0;event=0x00,umask=0x80", hybrid_big_small); 7310 EVENT_ATTR_STR_HYBRID(topdown-bad-spec, td_bad_spec_adl, "event=0x73,umask=0x0;event=0x00,umask=0x81", hybrid_big_small); 7311 EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_adl, "event=0x71,umask=0x0;event=0x00,umask=0x82", hybrid_big_small); 7312 EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_adl, "event=0x74,umask=0x0;event=0x00,umask=0x83", hybrid_big_small); 7313 EVENT_ATTR_STR_HYBRID(topdown-heavy-ops, td_heavy_ops_adl, "event=0x00,umask=0x84", hybrid_big); 7314 EVENT_ATTR_STR_HYBRID(topdown-br-mispredict, td_br_mis_adl, "event=0x00,umask=0x85", hybrid_big); 7315 EVENT_ATTR_STR_HYBRID(topdown-fetch-lat, td_fetch_lat_adl, "event=0x00,umask=0x86", hybrid_big); 7316 EVENT_ATTR_STR_HYBRID(topdown-mem-bound, td_mem_bound_adl, "event=0x00,umask=0x87", hybrid_big); 7317 7318 static struct attribute *adl_hybrid_events_attrs[] = { 7319 EVENT_PTR(slots_adl), 7320 EVENT_PTR(td_retiring_adl), 7321 EVENT_PTR(td_bad_spec_adl), 7322 EVENT_PTR(td_fe_bound_adl), 7323 EVENT_PTR(td_be_bound_adl), 7324 EVENT_PTR(td_heavy_ops_adl), 7325 EVENT_PTR(td_br_mis_adl), 7326 EVENT_PTR(td_fetch_lat_adl), 7327 EVENT_PTR(td_mem_bound_adl), 7328 NULL, 7329 }; 7330 7331 EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_lnl, "event=0xc2,umask=0x02;event=0x00,umask=0x80", hybrid_big_small); 7332 EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_lnl, "event=0x9c,umask=0x01;event=0x00,umask=0x82", hybrid_big_small); 7333 EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_lnl, "event=0xa4,umask=0x02;event=0x00,umask=0x83", hybrid_big_small); 7334 7335 static struct attribute *lnl_hybrid_events_attrs[] = { 7336 EVENT_PTR(slots_adl), 7337 EVENT_PTR(td_retiring_lnl), 7338 EVENT_PTR(td_bad_spec_adl), 7339 EVENT_PTR(td_fe_bound_lnl), 7340 EVENT_PTR(td_be_bound_lnl), 7341 EVENT_PTR(td_heavy_ops_adl), 7342 EVENT_PTR(td_br_mis_adl), 7343 EVENT_PTR(td_fetch_lat_adl), 7344 EVENT_PTR(td_mem_bound_adl), 7345 NULL 7346 }; 7347 7348 /* The event string must be in PMU IDX order. */ 7349 EVENT_ATTR_STR_HYBRID(topdown-retiring, 7350 td_retiring_arl_h, 7351 "event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0", 7352 hybrid_big_small_tiny); 7353 EVENT_ATTR_STR_HYBRID(topdown-bad-spec, 7354 td_bad_spec_arl_h, 7355 "event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0", 7356 hybrid_big_small_tiny); 7357 EVENT_ATTR_STR_HYBRID(topdown-fe-bound, 7358 td_fe_bound_arl_h, 7359 "event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0", 7360 hybrid_big_small_tiny); 7361 EVENT_ATTR_STR_HYBRID(topdown-be-bound, 7362 td_be_bound_arl_h, 7363 "event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0", 7364 hybrid_big_small_tiny); 7365 7366 static struct attribute *arl_h_hybrid_events_attrs[] = { 7367 EVENT_PTR(slots_adl), 7368 EVENT_PTR(td_retiring_arl_h), 7369 EVENT_PTR(td_bad_spec_arl_h), 7370 EVENT_PTR(td_fe_bound_arl_h), 7371 EVENT_PTR(td_be_bound_arl_h), 7372 EVENT_PTR(td_heavy_ops_adl), 7373 EVENT_PTR(td_br_mis_adl), 7374 EVENT_PTR(td_fetch_lat_adl), 7375 EVENT_PTR(td_mem_bound_adl), 7376 NULL, 7377 }; 7378 7379 /* Must be in IDX order */ 7380 EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small); 7381 EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small); 7382 EVENT_ATTR_STR_HYBRID(mem-loads-aux, mem_ld_aux_adl, "event=0x03,umask=0x82", hybrid_big); 7383 7384 static struct attribute *adl_hybrid_mem_attrs[] = { 7385 EVENT_PTR(mem_ld_adl), 7386 EVENT_PTR(mem_st_adl), 7387 EVENT_PTR(mem_ld_aux_adl), 7388 NULL, 7389 }; 7390 7391 static struct attribute *mtl_hybrid_mem_attrs[] = { 7392 EVENT_PTR(mem_ld_adl), 7393 EVENT_PTR(mem_st_adl), 7394 NULL 7395 }; 7396 7397 EVENT_ATTR_STR_HYBRID(mem-loads, 7398 mem_ld_arl_h, 7399 "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3", 7400 hybrid_big_small_tiny); 7401 EVENT_ATTR_STR_HYBRID(mem-stores, 7402 mem_st_arl_h, 7403 "event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6", 7404 hybrid_big_small_tiny); 7405 7406 static struct attribute *arl_h_hybrid_mem_attrs[] = { 7407 EVENT_PTR(mem_ld_arl_h), 7408 EVENT_PTR(mem_st_arl_h), 7409 NULL, 7410 }; 7411 7412 EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big); 7413 EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big); 7414 EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big); 7415 EVENT_ATTR_STR_HYBRID(tx-conflict, tx_conflict_adl, "event=0x54,umask=0x1", hybrid_big); 7416 EVENT_ATTR_STR_HYBRID(cycles-t, cycles_t_adl, "event=0x3c,in_tx=1", hybrid_big); 7417 EVENT_ATTR_STR_HYBRID(cycles-ct, cycles_ct_adl, "event=0x3c,in_tx=1,in_tx_cp=1", hybrid_big); 7418 EVENT_ATTR_STR_HYBRID(tx-capacity-read, tx_capacity_read_adl, "event=0x54,umask=0x80", hybrid_big); 7419 EVENT_ATTR_STR_HYBRID(tx-capacity-write, tx_capacity_write_adl, "event=0x54,umask=0x2", hybrid_big); 7420 7421 static struct attribute *adl_hybrid_tsx_attrs[] = { 7422 EVENT_PTR(tx_start_adl), 7423 EVENT_PTR(tx_abort_adl), 7424 EVENT_PTR(tx_commit_adl), 7425 EVENT_PTR(tx_capacity_read_adl), 7426 EVENT_PTR(tx_capacity_write_adl), 7427 EVENT_PTR(tx_conflict_adl), 7428 EVENT_PTR(cycles_t_adl), 7429 EVENT_PTR(cycles_ct_adl), 7430 NULL, 7431 }; 7432 7433 FORMAT_ATTR_HYBRID(in_tx, hybrid_big); 7434 FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big); 7435 FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny); 7436 FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny); 7437 FORMAT_ATTR_HYBRID(frontend, hybrid_big); 7438 7439 #define ADL_HYBRID_RTM_FORMAT_ATTR \ 7440 FORMAT_HYBRID_PTR(in_tx), \ 7441 FORMAT_HYBRID_PTR(in_tx_cp) 7442 7443 #define ADL_HYBRID_FORMAT_ATTR \ 7444 FORMAT_HYBRID_PTR(offcore_rsp), \ 7445 FORMAT_HYBRID_PTR(ldlat), \ 7446 FORMAT_HYBRID_PTR(frontend) 7447 7448 static struct attribute *adl_hybrid_extra_attr_rtm[] = { 7449 ADL_HYBRID_RTM_FORMAT_ATTR, 7450 ADL_HYBRID_FORMAT_ATTR, 7451 NULL 7452 }; 7453 7454 static struct attribute *adl_hybrid_extra_attr[] = { 7455 ADL_HYBRID_FORMAT_ATTR, 7456 NULL 7457 }; 7458 7459 FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny); 7460 7461 static struct attribute *mtl_hybrid_extra_attr_rtm[] = { 7462 ADL_HYBRID_RTM_FORMAT_ATTR, 7463 ADL_HYBRID_FORMAT_ATTR, 7464 FORMAT_HYBRID_PTR(snoop_rsp), 7465 NULL 7466 }; 7467 7468 static struct attribute *mtl_hybrid_extra_attr[] = { 7469 ADL_HYBRID_FORMAT_ATTR, 7470 FORMAT_HYBRID_PTR(snoop_rsp), 7471 NULL 7472 }; 7473 7474 static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr) 7475 { 7476 struct device *dev = kobj_to_dev(kobj); 7477 struct x86_hybrid_pmu *pmu = 7478 container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); 7479 struct perf_pmu_events_hybrid_attr *pmu_attr = 7480 container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr); 7481 7482 return pmu->pmu_type & pmu_attr->pmu_type; 7483 } 7484 7485 static umode_t hybrid_events_is_visible(struct kobject *kobj, 7486 struct attribute *attr, int i) 7487 { 7488 return is_attr_for_this_pmu(kobj, attr) ? attr->mode : 0; 7489 } 7490 7491 static inline int hybrid_find_supported_cpu(struct x86_hybrid_pmu *pmu) 7492 { 7493 int cpu = cpumask_first(&pmu->supported_cpus); 7494 7495 return (cpu >= nr_cpu_ids) ? -1 : cpu; 7496 } 7497 7498 static umode_t hybrid_tsx_is_visible(struct kobject *kobj, 7499 struct attribute *attr, int i) 7500 { 7501 struct device *dev = kobj_to_dev(kobj); 7502 struct x86_hybrid_pmu *pmu = 7503 container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); 7504 int cpu = hybrid_find_supported_cpu(pmu); 7505 7506 return (cpu >= 0) && is_attr_for_this_pmu(kobj, attr) && cpu_has(&cpu_data(cpu), X86_FEATURE_RTM) ? attr->mode : 0; 7507 } 7508 7509 static umode_t hybrid_format_is_visible(struct kobject *kobj, 7510 struct attribute *attr, int i) 7511 { 7512 struct device *dev = kobj_to_dev(kobj); 7513 struct x86_hybrid_pmu *pmu = 7514 container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); 7515 struct perf_pmu_format_hybrid_attr *pmu_attr = 7516 container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr); 7517 int cpu = hybrid_find_supported_cpu(pmu); 7518 7519 return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0; 7520 } 7521 7522 static umode_t hybrid_td_is_visible(struct kobject *kobj, 7523 struct attribute *attr, int i) 7524 { 7525 struct device *dev = kobj_to_dev(kobj); 7526 struct x86_hybrid_pmu *pmu = 7527 container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); 7528 7529 if (!is_attr_for_this_pmu(kobj, attr)) 7530 return 0; 7531 7532 7533 /* Only the big core supports perf metrics */ 7534 if (pmu->pmu_type == hybrid_big) 7535 return pmu->intel_cap.perf_metrics ? attr->mode : 0; 7536 7537 return attr->mode; 7538 } 7539 7540 static struct attribute_group hybrid_group_events_td = { 7541 .name = "events", 7542 .is_visible = hybrid_td_is_visible, 7543 }; 7544 7545 static struct attribute_group hybrid_group_events_mem = { 7546 .name = "events", 7547 .is_visible = hybrid_events_is_visible, 7548 }; 7549 7550 static struct attribute_group hybrid_group_events_tsx = { 7551 .name = "events", 7552 .is_visible = hybrid_tsx_is_visible, 7553 }; 7554 7555 static struct attribute_group hybrid_group_format_extra = { 7556 .name = "format", 7557 .is_visible = hybrid_format_is_visible, 7558 }; 7559 7560 static ssize_t intel_hybrid_get_attr_cpus(struct device *dev, 7561 struct device_attribute *attr, 7562 char *buf) 7563 { 7564 struct x86_hybrid_pmu *pmu = 7565 container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); 7566 7567 return cpumap_print_to_pagebuf(true, buf, &pmu->supported_cpus); 7568 } 7569 7570 static DEVICE_ATTR(cpus, S_IRUGO, intel_hybrid_get_attr_cpus, NULL); 7571 static struct attribute *intel_hybrid_cpus_attrs[] = { 7572 &dev_attr_cpus.attr, 7573 NULL, 7574 }; 7575 7576 static struct attribute_group hybrid_group_cpus = { 7577 .attrs = intel_hybrid_cpus_attrs, 7578 }; 7579 7580 static const struct attribute_group *hybrid_attr_update[] = { 7581 &hybrid_group_events_td, 7582 &hybrid_group_events_mem, 7583 &hybrid_group_events_tsx, 7584 &group_caps_gen, 7585 &group_caps_lbr, 7586 &hybrid_group_format_extra, 7587 &group_format_evtsel_ext, 7588 &group_format_acr, 7589 &group_default, 7590 &hybrid_group_cpus, 7591 NULL, 7592 }; 7593 7594 static struct attribute *empty_attrs; 7595 7596 static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, 7597 u64 cntr_mask, 7598 u64 fixed_cntr_mask, 7599 u64 intel_ctrl) 7600 { 7601 struct event_constraint *c; 7602 7603 if (!event_constraints) 7604 return; 7605 7606 /* 7607 * event on fixed counter2 (REF_CYCLES) only works on this 7608 * counter, so do not extend mask to generic counters 7609 */ 7610 for_each_event_constraint(c, event_constraints) { 7611 /* 7612 * Don't extend the topdown slots and metrics 7613 * events to the generic counters. 7614 */ 7615 if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) { 7616 /* 7617 * Disable topdown slots and metrics events, 7618 * if slots event is not in CPUID. 7619 */ 7620 if (!(INTEL_PMC_MSK_FIXED_SLOTS & intel_ctrl)) 7621 c->idxmsk64 = 0; 7622 c->weight = hweight64(c->idxmsk64); 7623 continue; 7624 } 7625 7626 if (c->cmask == FIXED_EVENT_FLAGS) { 7627 /* Disabled fixed counters which are not in CPUID */ 7628 c->idxmsk64 &= intel_ctrl; 7629 7630 /* 7631 * Don't extend the pseudo-encoding to the 7632 * generic counters 7633 */ 7634 if (!use_fixed_pseudo_encoding(c->code)) 7635 c->idxmsk64 |= cntr_mask; 7636 } 7637 c->idxmsk64 &= cntr_mask | (fixed_cntr_mask << INTEL_PMC_IDX_FIXED); 7638 c->weight = hweight64(c->idxmsk64); 7639 } 7640 } 7641 7642 static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs) 7643 { 7644 struct extra_reg *er; 7645 7646 /* 7647 * Access extra MSR may cause #GP under certain circumstances. 7648 * E.g. KVM doesn't support offcore event 7649 * Check all extra_regs here. 7650 */ 7651 if (!extra_regs) 7652 return; 7653 7654 for (er = extra_regs; er->msr; er++) { 7655 er->extra_msr_access = check_msr(er->msr, 0x11UL); 7656 /* Disable LBR select mapping */ 7657 if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) 7658 x86_pmu.lbr_sel_map = NULL; 7659 } 7660 } 7661 7662 static inline int intel_pmu_v6_addr_offset(int index, bool eventsel) 7663 { 7664 return MSR_IA32_PMC_V6_STEP * index; 7665 } 7666 7667 static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { 7668 { hybrid_small, "cpu_atom" }, 7669 { hybrid_big, "cpu_core" }, 7670 { hybrid_tiny, "cpu_lowpower" }, 7671 }; 7672 7673 static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) 7674 { 7675 unsigned long pmus_mask = pmus; 7676 struct x86_hybrid_pmu *pmu; 7677 int idx = 0, bit; 7678 7679 x86_pmu.num_hybrid_pmus = hweight_long(pmus_mask); 7680 x86_pmu.hybrid_pmu = kzalloc_objs(struct x86_hybrid_pmu, 7681 x86_pmu.num_hybrid_pmus); 7682 if (!x86_pmu.hybrid_pmu) 7683 return -ENOMEM; 7684 7685 static_branch_enable(&perf_is_hybrid); 7686 x86_pmu.filter = intel_pmu_filter; 7687 7688 for_each_set_bit(bit, &pmus_mask, ARRAY_SIZE(intel_hybrid_pmu_type_map)) { 7689 pmu = &x86_pmu.hybrid_pmu[idx++]; 7690 pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id; 7691 pmu->name = intel_hybrid_pmu_type_map[bit].name; 7692 7693 pmu->cntr_mask64 = x86_pmu.cntr_mask64; 7694 pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64; 7695 pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64); 7696 pmu->config_mask = X86_RAW_EVENT_MASK; 7697 pmu->unconstrained = (struct event_constraint) 7698 __EVENT_CONSTRAINT(0, pmu->cntr_mask64, 7699 0, x86_pmu_num_counters(&pmu->pmu), 0, 0); 7700 7701 pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; 7702 if (pmu->pmu_type & hybrid_small_tiny) { 7703 pmu->intel_cap.perf_metrics = 0; 7704 pmu->mid_ack = true; 7705 } else if (pmu->pmu_type & hybrid_big) { 7706 pmu->intel_cap.perf_metrics = 1; 7707 pmu->late_ack = true; 7708 } 7709 } 7710 7711 return 0; 7712 } 7713 7714 static __always_inline void intel_pmu_ref_cycles_ext(void) 7715 { 7716 if (!(x86_pmu.events_maskl & (INTEL_PMC_MSK_FIXED_REF_CYCLES >> INTEL_PMC_IDX_FIXED))) 7717 intel_perfmon_event_map[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x013c; 7718 } 7719 7720 static __always_inline void intel_pmu_init_glc(struct pmu *pmu) 7721 { 7722 x86_pmu.late_ack = true; 7723 x86_pmu.limit_period = glc_limit_period; 7724 x86_pmu.pebs_aliases = NULL; 7725 x86_pmu.pebs_prec_dist = true; 7726 x86_pmu.pebs_block = true; 7727 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 7728 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 7729 x86_pmu.flags |= PMU_FL_INSTR_LATENCY; 7730 x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); 7731 x86_pmu.lbr_pt_coexist = true; 7732 x86_pmu.num_topdown_events = 8; 7733 static_call_update(intel_pmu_update_topdown_event, 7734 &icl_update_topdown_event); 7735 static_call_update(intel_pmu_set_topdown_event_period, 7736 &icl_set_topdown_event_period); 7737 7738 memcpy(hybrid_var(pmu, hw_cache_event_ids), glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 7739 memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7740 hybrid(pmu, event_constraints) = intel_glc_event_constraints; 7741 hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints; 7742 7743 intel_pmu_ref_cycles_ext(); 7744 } 7745 7746 static __always_inline void intel_pmu_init_glc_hybrid(struct pmu *pmu) 7747 { 7748 intel_pmu_init_glc(pmu); 7749 7750 /* ADL has different extra MSR values from Server for the L3 or node OCR/OMR events. */ 7751 memcpy(hybrid_var(pmu, hw_cache_event_ids), adl_glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 7752 memcpy(hybrid_var(pmu, hw_cache_extra_regs), adl_glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7753 } 7754 7755 static __always_inline void intel_pmu_init_grt(struct pmu *pmu) 7756 { 7757 x86_pmu.mid_ack = true; 7758 x86_pmu.limit_period = glc_limit_period; 7759 x86_pmu.pebs_aliases = NULL; 7760 x86_pmu.pebs_prec_dist = true; 7761 x86_pmu.pebs_block = true; 7762 x86_pmu.lbr_pt_coexist = true; 7763 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 7764 x86_pmu.flags |= PMU_FL_INSTR_LATENCY; 7765 7766 memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 7767 memcpy(hybrid_var(pmu, hw_cache_extra_regs), grt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7768 hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; 7769 hybrid(pmu, event_constraints) = intel_grt_event_constraints; 7770 hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints; 7771 hybrid(pmu, extra_regs) = intel_grt_extra_regs; 7772 7773 intel_pmu_ref_cycles_ext(); 7774 } 7775 7776 static __always_inline void intel_pmu_init_cmt(struct pmu *pmu) 7777 { 7778 intel_pmu_init_grt(pmu); 7779 memcpy(hybrid_var(pmu, hw_cache_extra_regs), 7780 cmt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7781 hybrid(pmu, pebs_constraints) = intel_cmt_pebs_event_constraints; 7782 hybrid(pmu, extra_regs) = intel_cmt_extra_regs; 7783 } 7784 7785 static __always_inline void intel_pmu_init_lnc(struct pmu *pmu) 7786 { 7787 intel_pmu_init_glc(pmu); 7788 hybrid(pmu, event_constraints) = intel_lnc_event_constraints; 7789 hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints; 7790 hybrid(pmu, extra_regs) = intel_lnc_extra_regs; 7791 7792 memcpy(hybrid_var(pmu, hw_cache_event_ids), adl_glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 7793 memcpy(hybrid_var(pmu, hw_cache_extra_regs), lnc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7794 } 7795 7796 static __always_inline void intel_pmu_init_pnc(struct pmu *pmu) 7797 { 7798 intel_pmu_init_glc(pmu); 7799 x86_pmu.flags &= ~PMU_FL_HAS_RSP_1; 7800 x86_pmu.flags |= PMU_FL_HAS_OMR; 7801 memcpy(hybrid_var(pmu, hw_cache_event_ids), 7802 pnc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 7803 memcpy(hybrid_var(pmu, hw_cache_extra_regs), 7804 pnc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7805 hybrid(pmu, event_constraints) = intel_pnc_event_constraints; 7806 hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints; 7807 hybrid(pmu, extra_regs) = intel_pnc_extra_regs; 7808 static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); 7809 } 7810 7811 static __always_inline void intel_pmu_init_cyc(struct pmu *pmu) 7812 { 7813 intel_pmu_init_pnc(pmu); 7814 memcpy(hybrid_var(pmu, hw_cache_extra_regs), 7815 cyc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7816 } 7817 7818 static __always_inline void intel_pmu_init_skt(struct pmu *pmu) 7819 { 7820 intel_pmu_init_cmt(pmu); 7821 hybrid(pmu, event_constraints) = intel_skt_event_constraints; 7822 memcpy(hybrid_var(pmu, hw_cache_extra_regs), 7823 skt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7824 static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); 7825 } 7826 7827 /* Hybrid client variant. */ 7828 static __always_inline void intel_pmu_init_dkt_hybrid(struct pmu *pmu) 7829 { 7830 intel_pmu_init_skt(pmu); 7831 hybrid(pmu, pebs_constraints) = intel_dkt_pebs_event_constraints; 7832 } 7833 7834 /* 7835 * Darkmont is used by the CWF and PTL E-cores, but their L3 OCR 7836 * events require different extra MSR values. Keep a separate init 7837 * function for the non-hybrid server variant. 7838 */ 7839 static __always_inline void intel_pmu_init_dkt(struct pmu *pmu) 7840 { 7841 intel_pmu_init_dkt_hybrid(pmu); 7842 memcpy(hybrid_var(pmu, hw_cache_extra_regs), 7843 dkt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7844 } 7845 7846 static __always_inline void intel_pmu_init_arw(struct pmu *pmu) 7847 { 7848 intel_pmu_init_grt(pmu); 7849 x86_pmu.flags &= ~PMU_FL_HAS_RSP_1; 7850 x86_pmu.flags |= PMU_FL_HAS_OMR; 7851 memcpy(hybrid_var(pmu, hw_cache_extra_regs), 7852 arw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7853 hybrid(pmu, event_constraints) = intel_arw_event_constraints; 7854 hybrid(pmu, pebs_constraints) = intel_dkt_pebs_event_constraints; 7855 hybrid(pmu, extra_regs) = intel_arw_extra_regs; 7856 static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); 7857 } 7858 7859 __init int intel_pmu_init(void) 7860 { 7861 struct attribute **extra_skl_attr = &empty_attrs; 7862 struct attribute **extra_attr = &empty_attrs; 7863 struct attribute **td_attr = &empty_attrs; 7864 struct attribute **mem_attr = &empty_attrs; 7865 struct attribute **tsx_attr = &empty_attrs; 7866 union cpuid10_edx edx; 7867 union cpuid10_eax eax; 7868 union cpuid10_ebx ebx; 7869 unsigned int fixed_mask; 7870 bool pmem = false; 7871 int version, i; 7872 char *name; 7873 struct x86_hybrid_pmu *pmu; 7874 7875 /* Architectural Perfmon was introduced starting with Core "Yonah" */ 7876 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 7877 switch (boot_cpu_data.x86) { 7878 case 6: 7879 if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH) 7880 return p6_pmu_init(); 7881 break; 7882 case 11: 7883 return knc_pmu_init(); 7884 case 15: 7885 return p4_pmu_init(); 7886 } 7887 7888 pr_cont("unsupported CPU family %d model %d ", 7889 boot_cpu_data.x86, boot_cpu_data.x86_model); 7890 return -ENODEV; 7891 } 7892 7893 /* 7894 * Check whether the Architectural PerfMon supports 7895 * Branch Misses Retired hw_event or not. 7896 */ 7897 cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full); 7898 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) 7899 return -ENODEV; 7900 7901 version = eax.split.version_id; 7902 if (version < 2) 7903 x86_pmu = core_pmu; 7904 else 7905 x86_pmu = intel_pmu; 7906 7907 x86_pmu.version = version; 7908 x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0); 7909 x86_pmu.cntval_bits = eax.split.bit_width; 7910 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 7911 7912 x86_pmu.events_maskl = ebx.full; 7913 x86_pmu.events_mask_len = eax.split.mask_length; 7914 7915 x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64); 7916 x86_pmu.pebs_capable = PEBS_COUNTER_MASK; 7917 x86_pmu.config_mask = X86_RAW_EVENT_MASK; 7918 7919 /* 7920 * Quirk: v2 perfmon does not report fixed-purpose events, so 7921 * assume at least 3 events, when not running in a hypervisor: 7922 */ 7923 if (version > 1 && version < 5) { 7924 int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); 7925 7926 x86_pmu.fixed_cntr_mask64 = 7927 GENMASK_ULL(max((int)edx.split.num_counters_fixed, assume) - 1, 0); 7928 } else if (version >= 5) 7929 x86_pmu.fixed_cntr_mask64 = fixed_mask; 7930 7931 if (boot_cpu_has(X86_FEATURE_PDCM)) { 7932 u64 capabilities; 7933 7934 rdmsrq(MSR_IA32_PERF_CAPABILITIES, capabilities); 7935 x86_pmu.intel_cap.capabilities = capabilities; 7936 } 7937 7938 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) { 7939 x86_pmu.lbr_reset = intel_pmu_lbr_reset_32; 7940 x86_pmu.lbr_read = intel_pmu_lbr_read_32; 7941 } 7942 7943 if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) 7944 intel_pmu_arch_lbr_init(); 7945 7946 intel_pebs_init(); 7947 7948 x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ 7949 7950 if (version >= 5) { 7951 x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated; 7952 if (x86_pmu.intel_cap.anythread_deprecated) 7953 pr_cont(" AnyThread deprecated, "); 7954 } 7955 7956 /* The perf side of core PMU is ready to support the mediated vPMU. */ 7957 x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_MEDIATED_VPMU; 7958 7959 /* 7960 * Many features on and after V6 require dynamic constraint, 7961 * e.g., Arch PEBS, ACR. 7962 */ 7963 if (version >= 6) { 7964 x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT; 7965 x86_pmu.late_setup = intel_pmu_late_setup; 7966 } 7967 7968 /* 7969 * Install the hw-cache-events table: 7970 */ 7971 switch (boot_cpu_data.x86_vfm) { 7972 case INTEL_CORE_YONAH: 7973 pr_cont("Core events, "); 7974 name = "core"; 7975 break; 7976 7977 case INTEL_CORE2_MEROM: 7978 x86_add_quirk(intel_clovertown_quirk); 7979 fallthrough; 7980 7981 case INTEL_CORE2_MEROM_L: 7982 case INTEL_CORE2_PENRYN: 7983 case INTEL_CORE2_DUNNINGTON: 7984 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 7985 sizeof(hw_cache_event_ids)); 7986 7987 intel_pmu_lbr_init_core(); 7988 7989 x86_pmu.event_constraints = intel_core2_event_constraints; 7990 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; 7991 pr_cont("Core2 events, "); 7992 name = "core2"; 7993 break; 7994 7995 case INTEL_NEHALEM: 7996 case INTEL_NEHALEM_EP: 7997 case INTEL_NEHALEM_EX: 7998 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 7999 sizeof(hw_cache_event_ids)); 8000 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 8001 sizeof(hw_cache_extra_regs)); 8002 8003 intel_pmu_lbr_init_nhm(); 8004 8005 x86_pmu.event_constraints = intel_nehalem_event_constraints; 8006 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 8007 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 8008 x86_pmu.extra_regs = intel_nehalem_extra_regs; 8009 x86_pmu.limit_period = nhm_limit_period; 8010 8011 mem_attr = nhm_mem_events_attrs; 8012 8013 /* UOPS_ISSUED.STALLED_CYCLES */ 8014 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 8015 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 8016 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 8017 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 8018 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); 8019 8020 intel_pmu_pebs_data_source_nhm(); 8021 x86_add_quirk(intel_nehalem_quirk); 8022 x86_pmu.pebs_no_tlb = 1; 8023 extra_attr = nhm_format_attr; 8024 8025 pr_cont("Nehalem events, "); 8026 name = "nehalem"; 8027 break; 8028 8029 case INTEL_ATOM_BONNELL: 8030 case INTEL_ATOM_BONNELL_MID: 8031 case INTEL_ATOM_SALTWELL: 8032 case INTEL_ATOM_SALTWELL_MID: 8033 case INTEL_ATOM_SALTWELL_TABLET: 8034 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 8035 sizeof(hw_cache_event_ids)); 8036 8037 intel_pmu_lbr_init_atom(); 8038 8039 x86_pmu.event_constraints = intel_gen_event_constraints; 8040 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; 8041 x86_pmu.pebs_aliases = intel_pebs_aliases_core2; 8042 pr_cont("Atom events, "); 8043 name = "bonnell"; 8044 break; 8045 8046 case INTEL_ATOM_SILVERMONT: 8047 case INTEL_ATOM_SILVERMONT_D: 8048 case INTEL_ATOM_SILVERMONT_MID: 8049 case INTEL_ATOM_AIRMONT: 8050 case INTEL_ATOM_AIRMONT_NP: 8051 case INTEL_ATOM_SILVERMONT_MID2: 8052 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, 8053 sizeof(hw_cache_event_ids)); 8054 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, 8055 sizeof(hw_cache_extra_regs)); 8056 8057 intel_pmu_lbr_init_slm(); 8058 8059 x86_pmu.event_constraints = intel_slm_event_constraints; 8060 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; 8061 x86_pmu.extra_regs = intel_slm_extra_regs; 8062 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8063 td_attr = slm_events_attrs; 8064 extra_attr = slm_format_attr; 8065 pr_cont("Silvermont events, "); 8066 name = "silvermont"; 8067 break; 8068 8069 case INTEL_ATOM_GOLDMONT: 8070 case INTEL_ATOM_GOLDMONT_D: 8071 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, 8072 sizeof(hw_cache_event_ids)); 8073 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, 8074 sizeof(hw_cache_extra_regs)); 8075 8076 intel_pmu_lbr_init_skl(); 8077 8078 x86_pmu.event_constraints = intel_slm_event_constraints; 8079 x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints; 8080 x86_pmu.extra_regs = intel_glm_extra_regs; 8081 /* 8082 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS 8083 * for precise cycles. 8084 * :pp is identical to :ppp 8085 */ 8086 x86_pmu.pebs_aliases = NULL; 8087 x86_pmu.pebs_prec_dist = true; 8088 x86_pmu.lbr_pt_coexist = true; 8089 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8090 td_attr = glm_events_attrs; 8091 extra_attr = slm_format_attr; 8092 pr_cont("Goldmont events, "); 8093 name = "goldmont"; 8094 break; 8095 8096 case INTEL_ATOM_GOLDMONT_PLUS: 8097 memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, 8098 sizeof(hw_cache_event_ids)); 8099 memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, 8100 sizeof(hw_cache_extra_regs)); 8101 8102 intel_pmu_lbr_init_skl(); 8103 8104 x86_pmu.event_constraints = intel_slm_event_constraints; 8105 x86_pmu.extra_regs = intel_glm_extra_regs; 8106 /* 8107 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS 8108 * for precise cycles. 8109 */ 8110 x86_pmu.pebs_aliases = NULL; 8111 x86_pmu.pebs_prec_dist = true; 8112 x86_pmu.lbr_pt_coexist = true; 8113 x86_pmu.pebs_capable = ~0ULL; 8114 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8115 x86_pmu.flags |= PMU_FL_PEBS_ALL; 8116 x86_pmu.get_event_constraints = glp_get_event_constraints; 8117 td_attr = glm_events_attrs; 8118 /* Goldmont Plus has 4-wide pipeline */ 8119 event_attr_td_total_slots_scale_glm.event_str = "4"; 8120 extra_attr = slm_format_attr; 8121 pr_cont("Goldmont plus events, "); 8122 name = "goldmont_plus"; 8123 break; 8124 8125 case INTEL_ATOM_TREMONT_D: 8126 case INTEL_ATOM_TREMONT: 8127 case INTEL_ATOM_TREMONT_L: 8128 x86_pmu.late_ack = true; 8129 memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, 8130 sizeof(hw_cache_event_ids)); 8131 memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, 8132 sizeof(hw_cache_extra_regs)); 8133 hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; 8134 8135 intel_pmu_lbr_init_skl(); 8136 8137 x86_pmu.event_constraints = intel_slm_event_constraints; 8138 x86_pmu.extra_regs = intel_tnt_extra_regs; 8139 /* 8140 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS 8141 * for precise cycles. 8142 */ 8143 x86_pmu.pebs_aliases = NULL; 8144 x86_pmu.pebs_prec_dist = true; 8145 x86_pmu.lbr_pt_coexist = true; 8146 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8147 x86_pmu.get_event_constraints = tnt_get_event_constraints; 8148 td_attr = tnt_events_attrs; 8149 extra_attr = slm_format_attr; 8150 pr_cont("Tremont events, "); 8151 name = "Tremont"; 8152 break; 8153 8154 case INTEL_ATOM_GRACEMONT: 8155 intel_pmu_init_grt(NULL); 8156 intel_pmu_pebs_data_source_grt(); 8157 x86_pmu.pebs_latency_data = grt_latency_data; 8158 x86_pmu.get_event_constraints = tnt_get_event_constraints; 8159 td_attr = tnt_events_attrs; 8160 mem_attr = grt_mem_attrs; 8161 extra_attr = nhm_format_attr; 8162 pr_cont("Gracemont events, "); 8163 name = "gracemont"; 8164 break; 8165 8166 case INTEL_ATOM_CRESTMONT: 8167 case INTEL_ATOM_CRESTMONT_X: 8168 intel_pmu_init_cmt(NULL); 8169 intel_pmu_pebs_data_source_cmt(); 8170 x86_pmu.pebs_latency_data = cmt_latency_data; 8171 x86_pmu.get_event_constraints = cmt_get_event_constraints; 8172 td_attr = cmt_events_attrs; 8173 mem_attr = grt_mem_attrs; 8174 extra_attr = cmt_format_attr; 8175 pr_cont("Crestmont events, "); 8176 name = "crestmont"; 8177 break; 8178 8179 case INTEL_ATOM_DARKMONT_X: 8180 intel_pmu_init_dkt(NULL); 8181 intel_pmu_pebs_data_source_cmt(); 8182 x86_pmu.pebs_latency_data = cmt_latency_data; 8183 x86_pmu.get_event_constraints = cmt_get_event_constraints; 8184 td_attr = skt_events_attrs; 8185 mem_attr = grt_mem_attrs; 8186 extra_attr = cmt_format_attr; 8187 pr_cont("Darkmont events, "); 8188 name = "darkmont"; 8189 break; 8190 8191 case INTEL_WESTMERE: 8192 case INTEL_WESTMERE_EP: 8193 case INTEL_WESTMERE_EX: 8194 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 8195 sizeof(hw_cache_event_ids)); 8196 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 8197 sizeof(hw_cache_extra_regs)); 8198 8199 intel_pmu_lbr_init_nhm(); 8200 8201 x86_pmu.event_constraints = intel_westmere_event_constraints; 8202 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 8203 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 8204 x86_pmu.extra_regs = intel_westmere_extra_regs; 8205 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8206 8207 mem_attr = nhm_mem_events_attrs; 8208 8209 /* UOPS_ISSUED.STALLED_CYCLES */ 8210 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 8211 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 8212 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 8213 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 8214 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); 8215 8216 intel_pmu_pebs_data_source_nhm(); 8217 extra_attr = nhm_format_attr; 8218 pr_cont("Westmere events, "); 8219 name = "westmere"; 8220 break; 8221 8222 case INTEL_SANDYBRIDGE: 8223 case INTEL_SANDYBRIDGE_X: 8224 x86_add_quirk(intel_sandybridge_quirk); 8225 x86_add_quirk(intel_ht_bug); 8226 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 8227 sizeof(hw_cache_event_ids)); 8228 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 8229 sizeof(hw_cache_extra_regs)); 8230 8231 intel_pmu_lbr_init_snb(); 8232 8233 x86_pmu.event_constraints = intel_snb_event_constraints; 8234 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 8235 x86_pmu.pebs_aliases = intel_pebs_aliases_snb; 8236 if (boot_cpu_data.x86_vfm == INTEL_SANDYBRIDGE_X) 8237 x86_pmu.extra_regs = intel_snbep_extra_regs; 8238 else 8239 x86_pmu.extra_regs = intel_snb_extra_regs; 8240 8241 8242 /* all extra regs are per-cpu when HT is on */ 8243 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8244 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 8245 8246 td_attr = snb_events_attrs; 8247 mem_attr = snb_mem_events_attrs; 8248 8249 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 8250 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 8251 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 8252 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ 8253 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 8254 X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); 8255 8256 extra_attr = nhm_format_attr; 8257 8258 pr_cont("SandyBridge events, "); 8259 name = "sandybridge"; 8260 break; 8261 8262 case INTEL_IVYBRIDGE: 8263 case INTEL_IVYBRIDGE_X: 8264 x86_add_quirk(intel_ht_bug); 8265 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 8266 sizeof(hw_cache_event_ids)); 8267 /* dTLB-load-misses on IVB is different than SNB */ 8268 hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ 8269 8270 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 8271 sizeof(hw_cache_extra_regs)); 8272 8273 intel_pmu_lbr_init_snb(); 8274 8275 x86_pmu.event_constraints = intel_ivb_event_constraints; 8276 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; 8277 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; 8278 x86_pmu.pebs_prec_dist = true; 8279 if (boot_cpu_data.x86_vfm == INTEL_IVYBRIDGE_X) 8280 x86_pmu.extra_regs = intel_snbep_extra_regs; 8281 else 8282 x86_pmu.extra_regs = intel_snb_extra_regs; 8283 /* all extra regs are per-cpu when HT is on */ 8284 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8285 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 8286 8287 td_attr = snb_events_attrs; 8288 mem_attr = snb_mem_events_attrs; 8289 8290 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 8291 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 8292 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 8293 8294 extra_attr = nhm_format_attr; 8295 8296 pr_cont("IvyBridge events, "); 8297 name = "ivybridge"; 8298 break; 8299 8300 8301 case INTEL_HASWELL: 8302 case INTEL_HASWELL_X: 8303 case INTEL_HASWELL_L: 8304 case INTEL_HASWELL_G: 8305 x86_add_quirk(intel_ht_bug); 8306 x86_add_quirk(intel_pebs_isolation_quirk); 8307 x86_pmu.late_ack = true; 8308 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 8309 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 8310 8311 intel_pmu_lbr_init_hsw(); 8312 8313 x86_pmu.event_constraints = intel_hsw_event_constraints; 8314 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; 8315 x86_pmu.extra_regs = intel_snbep_extra_regs; 8316 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; 8317 x86_pmu.pebs_prec_dist = true; 8318 /* all extra regs are per-cpu when HT is on */ 8319 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8320 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 8321 8322 x86_pmu.hw_config = hsw_hw_config; 8323 x86_pmu.get_event_constraints = hsw_get_event_constraints; 8324 x86_pmu.limit_period = hsw_limit_period; 8325 x86_pmu.lbr_double_abort = true; 8326 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8327 hsw_format_attr : nhm_format_attr; 8328 td_attr = hsw_events_attrs; 8329 mem_attr = hsw_mem_events_attrs; 8330 tsx_attr = hsw_tsx_events_attrs; 8331 pr_cont("Haswell events, "); 8332 name = "haswell"; 8333 break; 8334 8335 case INTEL_BROADWELL: 8336 case INTEL_BROADWELL_D: 8337 case INTEL_BROADWELL_G: 8338 case INTEL_BROADWELL_X: 8339 x86_add_quirk(intel_pebs_isolation_quirk); 8340 x86_pmu.late_ack = true; 8341 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 8342 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 8343 8344 /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */ 8345 hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ | 8346 BDW_L3_MISS|HSW_SNOOP_DRAM; 8347 hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS| 8348 HSW_SNOOP_DRAM; 8349 hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ| 8350 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; 8351 hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| 8352 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; 8353 8354 intel_pmu_lbr_init_hsw(); 8355 8356 x86_pmu.event_constraints = intel_bdw_event_constraints; 8357 x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; 8358 x86_pmu.extra_regs = intel_snbep_extra_regs; 8359 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; 8360 x86_pmu.pebs_prec_dist = true; 8361 /* all extra regs are per-cpu when HT is on */ 8362 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8363 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 8364 8365 x86_pmu.hw_config = hsw_hw_config; 8366 x86_pmu.get_event_constraints = hsw_get_event_constraints; 8367 x86_pmu.limit_period = bdw_limit_period; 8368 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8369 hsw_format_attr : nhm_format_attr; 8370 td_attr = hsw_events_attrs; 8371 mem_attr = hsw_mem_events_attrs; 8372 tsx_attr = hsw_tsx_events_attrs; 8373 pr_cont("Broadwell events, "); 8374 name = "broadwell"; 8375 break; 8376 8377 case INTEL_XEON_PHI_KNL: 8378 case INTEL_XEON_PHI_KNM: 8379 memcpy(hw_cache_event_ids, 8380 slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 8381 memcpy(hw_cache_extra_regs, 8382 knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 8383 intel_pmu_lbr_init_knl(); 8384 8385 x86_pmu.event_constraints = intel_slm_event_constraints; 8386 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; 8387 x86_pmu.extra_regs = intel_knl_extra_regs; 8388 8389 /* all extra regs are per-cpu when HT is on */ 8390 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8391 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 8392 extra_attr = slm_format_attr; 8393 pr_cont("Knights Landing/Mill events, "); 8394 name = "knights-landing"; 8395 break; 8396 8397 case INTEL_SKYLAKE_X: 8398 pmem = true; 8399 fallthrough; 8400 case INTEL_SKYLAKE_L: 8401 case INTEL_SKYLAKE: 8402 case INTEL_KABYLAKE_L: 8403 case INTEL_KABYLAKE: 8404 case INTEL_COMETLAKE_L: 8405 case INTEL_COMETLAKE: 8406 x86_add_quirk(intel_pebs_isolation_quirk); 8407 x86_pmu.late_ack = true; 8408 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 8409 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 8410 intel_pmu_lbr_init_skl(); 8411 8412 /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ 8413 event_attr_td_recovery_bubbles.event_str_noht = 8414 "event=0xd,umask=0x1,cmask=1"; 8415 event_attr_td_recovery_bubbles.event_str_ht = 8416 "event=0xd,umask=0x1,cmask=1,any=1"; 8417 8418 x86_pmu.event_constraints = intel_skl_event_constraints; 8419 x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; 8420 x86_pmu.extra_regs = intel_skl_extra_regs; 8421 x86_pmu.pebs_aliases = intel_pebs_aliases_skl; 8422 x86_pmu.pebs_prec_dist = true; 8423 /* all extra regs are per-cpu when HT is on */ 8424 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8425 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 8426 8427 x86_pmu.hw_config = hsw_hw_config; 8428 x86_pmu.get_event_constraints = hsw_get_event_constraints; 8429 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8430 hsw_format_attr : nhm_format_attr; 8431 extra_skl_attr = skl_format_attr; 8432 td_attr = hsw_events_attrs; 8433 mem_attr = hsw_mem_events_attrs; 8434 tsx_attr = hsw_tsx_events_attrs; 8435 intel_pmu_pebs_data_source_skl(pmem); 8436 8437 /* 8438 * Processors with CPUID.RTM_ALWAYS_ABORT have TSX deprecated by default. 8439 * TSX force abort hooks are not required on these systems. Only deploy 8440 * workaround when microcode has not enabled X86_FEATURE_RTM_ALWAYS_ABORT. 8441 */ 8442 if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) && 8443 !boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) { 8444 x86_pmu.flags |= PMU_FL_TFA; 8445 x86_pmu.get_event_constraints = tfa_get_event_constraints; 8446 x86_pmu.enable_all = intel_tfa_pmu_enable_all; 8447 x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; 8448 } 8449 8450 pr_cont("Skylake events, "); 8451 name = "skylake"; 8452 break; 8453 8454 case INTEL_ICELAKE_X: 8455 case INTEL_ICELAKE_D: 8456 memcpy(hw_cache_extra_regs, snc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 8457 x86_pmu.pebs_ept = 1; 8458 pmem = true; 8459 goto snc_common; 8460 case INTEL_ICELAKE_L: 8461 case INTEL_ICELAKE: 8462 case INTEL_TIGERLAKE_L: 8463 case INTEL_TIGERLAKE: 8464 case INTEL_ROCKETLAKE: 8465 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 8466 snc_common: 8467 x86_pmu.late_ack = true; 8468 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 8469 hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; 8470 intel_pmu_lbr_init_skl(); 8471 8472 x86_pmu.event_constraints = intel_icl_event_constraints; 8473 x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints; 8474 x86_pmu.extra_regs = intel_icl_extra_regs; 8475 x86_pmu.pebs_aliases = NULL; 8476 x86_pmu.pebs_prec_dist = true; 8477 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 8478 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 8479 8480 x86_pmu.hw_config = hsw_hw_config; 8481 x86_pmu.get_event_constraints = icl_get_event_constraints; 8482 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8483 hsw_format_attr : nhm_format_attr; 8484 extra_skl_attr = skl_format_attr; 8485 mem_attr = icl_events_attrs; 8486 td_attr = icl_td_events_attrs; 8487 tsx_attr = icl_tsx_events_attrs; 8488 x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); 8489 x86_pmu.lbr_pt_coexist = true; 8490 intel_pmu_pebs_data_source_skl(pmem); 8491 x86_pmu.num_topdown_events = 4; 8492 static_call_update(intel_pmu_update_topdown_event, 8493 &icl_update_topdown_event); 8494 static_call_update(intel_pmu_set_topdown_event_period, 8495 &icl_set_topdown_event_period); 8496 pr_cont("Icelake events, "); 8497 name = "icelake"; 8498 break; 8499 8500 case INTEL_SAPPHIRERAPIDS_X: 8501 case INTEL_EMERALDRAPIDS_X: 8502 x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; 8503 x86_pmu.extra_regs = intel_glc_extra_regs; 8504 pr_cont("Sapphire Rapids events, "); 8505 name = "sapphire_rapids"; 8506 goto glc_common; 8507 8508 case INTEL_GRANITERAPIDS_X: 8509 case INTEL_GRANITERAPIDS_D: 8510 x86_pmu.extra_regs = intel_rwc_extra_regs; 8511 pr_cont("Granite Rapids events, "); 8512 name = "granite_rapids"; 8513 goto glc_common; 8514 8515 case INTEL_DIAMONDRAPIDS_X: 8516 intel_pmu_init_pnc(NULL); 8517 x86_pmu.pebs_latency_data = pnc_latency_data; 8518 8519 pr_cont("Panthercove events, "); 8520 name = "panthercove"; 8521 goto glc_base; 8522 8523 glc_common: 8524 intel_pmu_init_glc(NULL); 8525 intel_pmu_pebs_data_source_skl(true); 8526 8527 glc_base: 8528 x86_pmu.pebs_ept = 1; 8529 x86_pmu.hw_config = hsw_hw_config; 8530 x86_pmu.get_event_constraints = glc_get_event_constraints; 8531 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8532 hsw_format_attr : nhm_format_attr; 8533 extra_skl_attr = skl_format_attr; 8534 mem_attr = glc_events_attrs; 8535 td_attr = glc_td_events_attrs; 8536 tsx_attr = glc_tsx_events_attrs; 8537 break; 8538 8539 case INTEL_ALDERLAKE: 8540 case INTEL_ALDERLAKE_L: 8541 case INTEL_RAPTORLAKE: 8542 case INTEL_RAPTORLAKE_P: 8543 case INTEL_RAPTORLAKE_S: 8544 /* 8545 * Alder Lake has 2 types of CPU, core and atom. 8546 * 8547 * Initialize the common PerfMon capabilities here. 8548 */ 8549 intel_pmu_init_hybrid(hybrid_big_small); 8550 8551 x86_pmu.pebs_latency_data = grt_latency_data; 8552 x86_pmu.get_event_constraints = adl_get_event_constraints; 8553 x86_pmu.hw_config = adl_hw_config; 8554 x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; 8555 8556 td_attr = adl_hybrid_events_attrs; 8557 mem_attr = adl_hybrid_mem_attrs; 8558 tsx_attr = adl_hybrid_tsx_attrs; 8559 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8560 adl_hybrid_extra_attr_rtm : adl_hybrid_extra_attr; 8561 8562 /* Initialize big core specific PerfMon capabilities.*/ 8563 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; 8564 intel_pmu_init_glc_hybrid(&pmu->pmu); 8565 if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { 8566 pmu->cntr_mask64 <<= 2; 8567 pmu->cntr_mask64 |= 0x3; 8568 pmu->fixed_cntr_mask64 <<= 1; 8569 pmu->fixed_cntr_mask64 |= 0x1; 8570 } else { 8571 pmu->cntr_mask64 = x86_pmu.cntr_mask64; 8572 pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64; 8573 } 8574 8575 /* 8576 * Quirk: For some Alder Lake machine, when all E-cores are disabled in 8577 * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However, 8578 * the X86_FEATURE_HYBRID_CPU is still set. The above codes will 8579 * mistakenly add extra counters for P-cores. Correct the number of 8580 * counters here. 8581 */ 8582 if ((x86_pmu_num_counters(&pmu->pmu) > 8) || (x86_pmu_num_counters_fixed(&pmu->pmu) > 4)) { 8583 pmu->cntr_mask64 = x86_pmu.cntr_mask64; 8584 pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64; 8585 } 8586 8587 pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64); 8588 pmu->unconstrained = (struct event_constraint) 8589 __EVENT_CONSTRAINT(0, pmu->cntr_mask64, 8590 0, x86_pmu_num_counters(&pmu->pmu), 0, 0); 8591 8592 pmu->extra_regs = intel_glc_extra_regs; 8593 8594 /* Initialize Atom core specific PerfMon capabilities.*/ 8595 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; 8596 intel_pmu_init_grt(&pmu->pmu); 8597 8598 x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; 8599 intel_pmu_pebs_data_source_adl(); 8600 pr_cont("Alderlake Hybrid events, "); 8601 name = "alderlake_hybrid"; 8602 break; 8603 8604 case INTEL_METEORLAKE: 8605 case INTEL_METEORLAKE_L: 8606 case INTEL_ARROWLAKE_U: 8607 intel_pmu_init_hybrid(hybrid_big_small); 8608 8609 x86_pmu.pebs_latency_data = cmt_latency_data; 8610 x86_pmu.get_event_constraints = mtl_get_event_constraints; 8611 x86_pmu.hw_config = adl_hw_config; 8612 8613 td_attr = adl_hybrid_events_attrs; 8614 mem_attr = mtl_hybrid_mem_attrs; 8615 tsx_attr = adl_hybrid_tsx_attrs; 8616 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8617 mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; 8618 8619 /* Initialize big core specific PerfMon capabilities.*/ 8620 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; 8621 intel_pmu_init_glc_hybrid(&pmu->pmu); 8622 pmu->extra_regs = intel_rwc_extra_regs; 8623 8624 /* Initialize Atom core specific PerfMon capabilities.*/ 8625 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; 8626 intel_pmu_init_cmt(&pmu->pmu); 8627 8628 intel_pmu_pebs_data_source_mtl(); 8629 pr_cont("Meteorlake Hybrid events, "); 8630 name = "meteorlake_hybrid"; 8631 break; 8632 8633 case INTEL_PANTHERLAKE_L: 8634 case INTEL_WILDCATLAKE_L: 8635 pr_cont("Pantherlake Hybrid events, "); 8636 name = "pantherlake_hybrid"; 8637 8638 intel_pmu_init_hybrid(hybrid_big_small); 8639 8640 /* Initialize big core specific PerfMon capabilities.*/ 8641 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; 8642 intel_pmu_init_lnc(&pmu->pmu); 8643 /* Initialize Atom core specific PerfMon capabilities.*/ 8644 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; 8645 intel_pmu_init_dkt_hybrid(&pmu->pmu); 8646 8647 goto lnl_common; 8648 8649 case INTEL_ARROWLAKE: 8650 pr_cont("Arrowlake Hybrid events, "); 8651 name = "arrowlake_hybrid"; 8652 8653 intel_pmu_init_hybrid(hybrid_big_small); 8654 8655 /* Initialize big core specific PerfMon capabilities.*/ 8656 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; 8657 intel_pmu_init_lnc(&pmu->pmu); 8658 memcpy(hybrid_var(&pmu->pmu, hw_cache_extra_regs), 8659 arl_lnc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 8660 /* Initialize Atom core specific PerfMon capabilities.*/ 8661 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; 8662 intel_pmu_init_skt(&pmu->pmu); 8663 8664 goto lnl_common; 8665 8666 case INTEL_LUNARLAKE_M: 8667 pr_cont("Lunarlake Hybrid events, "); 8668 name = "lunarlake_hybrid"; 8669 8670 intel_pmu_init_hybrid(hybrid_big_small); 8671 8672 /* Initialize big core specific PerfMon capabilities.*/ 8673 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; 8674 intel_pmu_init_lnc(&pmu->pmu); 8675 /* Initialize Atom core specific PerfMon capabilities.*/ 8676 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; 8677 intel_pmu_init_skt(&pmu->pmu); 8678 8679 lnl_common: 8680 8681 x86_pmu.pebs_latency_data = lnl_latency_data; 8682 x86_pmu.get_event_constraints = mtl_get_event_constraints; 8683 x86_pmu.hw_config = adl_hw_config; 8684 8685 td_attr = lnl_hybrid_events_attrs; 8686 mem_attr = mtl_hybrid_mem_attrs; 8687 tsx_attr = adl_hybrid_tsx_attrs; 8688 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8689 mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; 8690 8691 intel_pmu_pebs_data_source_lnl(); 8692 break; 8693 8694 case INTEL_ARROWLAKE_H: 8695 intel_pmu_init_hybrid(hybrid_big_small_tiny); 8696 8697 x86_pmu.pebs_latency_data = arl_h_latency_data; 8698 x86_pmu.get_event_constraints = arl_h_get_event_constraints; 8699 x86_pmu.hw_config = arl_h_hw_config; 8700 8701 td_attr = arl_h_hybrid_events_attrs; 8702 mem_attr = arl_h_hybrid_mem_attrs; 8703 tsx_attr = adl_hybrid_tsx_attrs; 8704 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8705 mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; 8706 8707 /* Initialize big core specific PerfMon capabilities. */ 8708 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; 8709 intel_pmu_init_lnc(&pmu->pmu); 8710 memcpy(hybrid_var(&pmu->pmu, hw_cache_extra_regs), 8711 arl_lnc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 8712 8713 /* Initialize Atom core specific PerfMon capabilities. */ 8714 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; 8715 intel_pmu_init_skt(&pmu->pmu); 8716 8717 /* Initialize Lower Power Atom specific PerfMon capabilities. */ 8718 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX]; 8719 intel_pmu_init_cmt(&pmu->pmu); 8720 8721 intel_pmu_pebs_data_source_arl_h(); 8722 pr_cont("ArrowLake-H Hybrid events, "); 8723 name = "arrowlake_h_hybrid"; 8724 break; 8725 8726 case INTEL_NOVALAKE: 8727 case INTEL_NOVALAKE_L: 8728 pr_cont("Novalake Hybrid events, "); 8729 name = "novalake_hybrid"; 8730 intel_pmu_init_hybrid(hybrid_big_small); 8731 8732 x86_pmu.pebs_latency_data = nvl_latency_data; 8733 x86_pmu.get_event_constraints = mtl_get_event_constraints; 8734 x86_pmu.hw_config = adl_hw_config; 8735 8736 td_attr = lnl_hybrid_events_attrs; 8737 mem_attr = mtl_hybrid_mem_attrs; 8738 tsx_attr = adl_hybrid_tsx_attrs; 8739 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 8740 mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; 8741 8742 /* Initialize big core specific PerfMon capabilities.*/ 8743 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; 8744 intel_pmu_init_cyc(&pmu->pmu); 8745 8746 /* Initialize Atom core specific PerfMon capabilities.*/ 8747 pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; 8748 intel_pmu_init_arw(&pmu->pmu); 8749 8750 intel_pmu_pebs_data_source_lnl(); 8751 break; 8752 8753 default: 8754 switch (x86_pmu.version) { 8755 case 1: 8756 x86_pmu.event_constraints = intel_v1_event_constraints; 8757 pr_cont("generic architected perfmon v1, "); 8758 name = "generic_arch_v1"; 8759 break; 8760 case 2: 8761 case 3: 8762 case 4: 8763 /* 8764 * default constraints for v2 and up 8765 */ 8766 x86_pmu.event_constraints = intel_gen_event_constraints; 8767 pr_cont("generic architected perfmon, "); 8768 name = "generic_arch_v2+"; 8769 break; 8770 default: 8771 /* 8772 * The default constraints for v5 and up can support up to 8773 * 16 fixed counters. For the fixed counters 4 and later, 8774 * the pseudo-encoding is applied. 8775 * The constraints may be cut according to the CPUID enumeration 8776 * by inserting the EVENT_CONSTRAINT_END. 8777 */ 8778 if (fls64(x86_pmu.fixed_cntr_mask64) > INTEL_PMC_MAX_FIXED) 8779 x86_pmu.fixed_cntr_mask64 &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0); 8780 intel_v5_gen_event_constraints[fls64(x86_pmu.fixed_cntr_mask64)].weight = -1; 8781 x86_pmu.event_constraints = intel_v5_gen_event_constraints; 8782 pr_cont("generic architected perfmon, "); 8783 name = "generic_arch_v5+"; 8784 break; 8785 } 8786 } 8787 8788 snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name); 8789 8790 if (!is_hybrid()) { 8791 group_events_td.attrs = td_attr; 8792 group_events_mem.attrs = mem_attr; 8793 group_events_tsx.attrs = tsx_attr; 8794 group_format_extra.attrs = extra_attr; 8795 group_format_extra_skl.attrs = extra_skl_attr; 8796 8797 x86_pmu.attr_update = attr_update; 8798 } else { 8799 hybrid_group_events_td.attrs = td_attr; 8800 hybrid_group_events_mem.attrs = mem_attr; 8801 hybrid_group_events_tsx.attrs = tsx_attr; 8802 hybrid_group_format_extra.attrs = extra_attr; 8803 8804 x86_pmu.attr_update = hybrid_attr_update; 8805 } 8806 8807 /* 8808 * The archPerfmonExt (0x23) includes an enhanced enumeration of 8809 * PMU architectural features with a per-core view. For non-hybrid, 8810 * each core has the same PMU capabilities. It's good enough to 8811 * update the x86_pmu from the booting CPU. For hybrid, the x86_pmu 8812 * is used to keep the common capabilities. Still keep the values 8813 * from the leaf 0xa. The core specific update will be done later 8814 * when a new type is online. 8815 */ 8816 if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) 8817 update_pmu_cap(NULL); 8818 8819 if (x86_pmu.arch_pebs) { 8820 static_call_update(intel_pmu_disable_event_ext, 8821 intel_pmu_disable_event_ext); 8822 static_call_update(intel_pmu_enable_event_ext, 8823 intel_pmu_enable_event_ext); 8824 pr_cont("Architectural PEBS, "); 8825 } 8826 8827 intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64, 8828 &x86_pmu.fixed_cntr_mask64, 8829 &x86_pmu.intel_ctrl); 8830 8831 /* AnyThread may be deprecated on arch perfmon v5 or later */ 8832 if (x86_pmu.intel_cap.anythread_deprecated) 8833 x86_pmu.format_attrs = intel_arch_formats_attr; 8834 8835 intel_pmu_check_event_constraints_all(NULL); 8836 8837 /* 8838 * Access LBR MSR may cause #GP under certain circumstances. 8839 * Check all LBR MSR here. 8840 * Disable LBR access if any LBR MSRs can not be accessed. 8841 */ 8842 if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL)) 8843 x86_pmu.lbr_nr = 0; 8844 for (i = 0; i < x86_pmu.lbr_nr; i++) { 8845 if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && 8846 check_msr(x86_pmu.lbr_to + i, 0xffffUL))) 8847 x86_pmu.lbr_nr = 0; 8848 } 8849 8850 if (x86_pmu.lbr_nr) { 8851 intel_pmu_lbr_init(); 8852 8853 pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); 8854 8855 /* only support branch_stack snapshot for perfmon >= v2 */ 8856 if (x86_pmu.disable_all == intel_pmu_disable_all) { 8857 if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) { 8858 static_call_update(perf_snapshot_branch_stack, 8859 intel_pmu_snapshot_arch_branch_stack); 8860 } else { 8861 static_call_update(perf_snapshot_branch_stack, 8862 intel_pmu_snapshot_branch_stack); 8863 } 8864 } 8865 } 8866 8867 intel_pmu_check_extra_regs(x86_pmu.extra_regs); 8868 8869 /* Support full width counters using alternative MSR range */ 8870 if (x86_pmu.intel_cap.full_width_write) { 8871 x86_pmu.max_period = x86_pmu.cntval_mask >> 1; 8872 x86_pmu.perfctr = MSR_IA32_PMC0; 8873 pr_cont("full-width counters, "); 8874 } 8875 8876 /* Support V6+ MSR Aliasing */ 8877 if (x86_pmu.version >= 6) { 8878 x86_pmu.perfctr = MSR_IA32_PMC_V6_GP0_CTR; 8879 x86_pmu.eventsel = MSR_IA32_PMC_V6_GP0_CFG_A; 8880 x86_pmu.fixedctr = MSR_IA32_PMC_V6_FX0_CTR; 8881 x86_pmu.addr_offset = intel_pmu_v6_addr_offset; 8882 } 8883 8884 if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) 8885 x86_pmu.intel_ctrl |= GLOBAL_CTRL_EN_PERF_METRICS; 8886 8887 if (x86_pmu.intel_cap.pebs_timing_info) 8888 x86_pmu.flags |= PMU_FL_RETIRE_LATENCY; 8889 8890 intel_aux_output_init(); 8891 8892 return 0; 8893 } 8894 8895 /* 8896 * HT bug: phase 2 init 8897 * Called once we have valid topology information to check 8898 * whether or not HT is enabled 8899 * If HT is off, then we disable the workaround 8900 */ 8901 static __init int fixup_ht_bug(void) 8902 { 8903 int c; 8904 /* 8905 * problem not present on this CPU model, nothing to do 8906 */ 8907 if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) 8908 return 0; 8909 8910 if (topology_max_smt_threads() > 1) { 8911 pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); 8912 return 0; 8913 } 8914 8915 cpus_read_lock(); 8916 8917 hardlockup_detector_perf_stop(); 8918 8919 x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); 8920 8921 x86_pmu.start_scheduling = NULL; 8922 x86_pmu.commit_scheduling = NULL; 8923 x86_pmu.stop_scheduling = NULL; 8924 8925 hardlockup_detector_perf_restart(); 8926 8927 for_each_online_cpu(c) 8928 free_excl_cntrs(&per_cpu(cpu_hw_events, c)); 8929 8930 cpus_read_unlock(); 8931 pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); 8932 return 0; 8933 } 8934 subsys_initcall(fixup_ht_bug) 8935