1 /* 2 * Per core/cpu state 3 * 4 * Used to coordinate shared registers between HT threads or 5 * among events on a single PMU. 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/stddef.h> 11 #include <linux/types.h> 12 #include <linux/init.h> 13 #include <linux/slab.h> 14 #include <linux/export.h> 15 #include <linux/nmi.h> 16 17 #include <asm/cpufeature.h> 18 #include <asm/hardirq.h> 19 #include <asm/apic.h> 20 21 #include "../perf_event.h" 22 23 /* 24 * Intel PerfMon, used on Core and later. 25 */ 26 static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = 27 { 28 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, 29 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 30 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, 31 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, 32 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 33 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 34 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 35 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ 36 }; 37 38 static struct event_constraint intel_core_event_constraints[] __read_mostly = 39 { 40 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 41 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 42 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ 43 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ 44 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 45 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ 46 EVENT_CONSTRAINT_END 47 }; 48 49 static struct event_constraint intel_core2_event_constraints[] __read_mostly = 50 { 51 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 52 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 53 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 54 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 55 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 56 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 57 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ 58 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ 59 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ 60 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 61 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ 62 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ 63 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ 64 EVENT_CONSTRAINT_END 65 }; 66 67 static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = 68 { 69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 71 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 72 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 73 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 74 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 75 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ 76 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ 77 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ 78 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 79 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 80 EVENT_CONSTRAINT_END 81 }; 82 83 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 84 { 85 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 86 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 87 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), 88 EVENT_EXTRA_END 89 }; 90 91 static struct event_constraint intel_westmere_event_constraints[] __read_mostly = 92 { 93 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 94 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 95 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 96 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 97 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 98 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 99 INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ 100 EVENT_CONSTRAINT_END 101 }; 102 103 static struct event_constraint intel_snb_event_constraints[] __read_mostly = 104 { 105 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 106 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 107 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 108 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ 109 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ 110 INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 111 INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 112 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 113 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 114 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 115 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ 116 INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 117 118 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 119 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 120 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 121 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 122 123 EVENT_CONSTRAINT_END 124 }; 125 126 static struct event_constraint intel_ivb_event_constraints[] __read_mostly = 127 { 128 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 129 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 130 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 131 INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ 132 INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ 133 INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ 134 INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ 135 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ 136 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ 137 INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ 138 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 139 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 140 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 141 142 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 143 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 144 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 145 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 146 147 EVENT_CONSTRAINT_END 148 }; 149 150 static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 151 { 152 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 153 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 154 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), 155 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), 156 EVENT_EXTRA_END 157 }; 158 159 static struct event_constraint intel_v1_event_constraints[] __read_mostly = 160 { 161 EVENT_CONSTRAINT_END 162 }; 163 164 static struct event_constraint intel_gen_event_constraints[] __read_mostly = 165 { 166 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 167 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 168 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 169 EVENT_CONSTRAINT_END 170 }; 171 172 static struct event_constraint intel_slm_event_constraints[] __read_mostly = 173 { 174 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 175 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 176 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ 177 EVENT_CONSTRAINT_END 178 }; 179 180 static struct event_constraint intel_skl_event_constraints[] = { 181 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 182 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 183 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 184 INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ 185 EVENT_CONSTRAINT_END 186 }; 187 188 static struct extra_reg intel_knl_extra_regs[] __read_mostly = { 189 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0), 190 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1), 191 EVENT_EXTRA_END 192 }; 193 194 static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 195 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 196 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), 197 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), 198 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 199 EVENT_EXTRA_END 200 }; 201 202 static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { 203 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 204 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), 205 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), 206 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 207 EVENT_EXTRA_END 208 }; 209 210 static struct extra_reg intel_skl_extra_regs[] __read_mostly = { 211 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), 212 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), 213 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 214 /* 215 * Note the low 8 bits eventsel code is not a continuous field, containing 216 * some #GPing bits. These are masked out. 217 */ 218 INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), 219 EVENT_EXTRA_END 220 }; 221 222 EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); 223 EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); 224 EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); 225 226 static struct attribute *nhm_events_attrs[] = { 227 EVENT_PTR(mem_ld_nhm), 228 NULL, 229 }; 230 231 /* 232 * topdown events for Intel Core CPUs. 233 * 234 * The events are all in slots, which is a free slot in a 4 wide 235 * pipeline. Some events are already reported in slots, for cycle 236 * events we multiply by the pipeline width (4). 237 * 238 * With Hyper Threading on, topdown metrics are either summed or averaged 239 * between the threads of a core: (count_t0 + count_t1). 240 * 241 * For the average case the metric is always scaled to pipeline width, 242 * so we use factor 2 ((count_t0 + count_t1) / 2 * 4) 243 */ 244 245 EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots, 246 "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */ 247 "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */ 248 EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2"); 249 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued, 250 "event=0xe,umask=0x1"); /* uops_issued.any */ 251 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired, 252 "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */ 253 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles, 254 "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */ 255 EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, 256 "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */ 257 "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */ 258 EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, 259 "4", "2"); 260 261 static struct attribute *snb_events_attrs[] = { 262 EVENT_PTR(mem_ld_snb), 263 EVENT_PTR(mem_st_snb), 264 EVENT_PTR(td_slots_issued), 265 EVENT_PTR(td_slots_retired), 266 EVENT_PTR(td_fetch_bubbles), 267 EVENT_PTR(td_total_slots), 268 EVENT_PTR(td_total_slots_scale), 269 EVENT_PTR(td_recovery_bubbles), 270 EVENT_PTR(td_recovery_bubbles_scale), 271 NULL, 272 }; 273 274 static struct event_constraint intel_hsw_event_constraints[] = { 275 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 276 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 277 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 278 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ 279 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 280 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 281 /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 282 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), 283 /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 284 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), 285 /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ 286 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), 287 288 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 289 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 290 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 291 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 292 293 EVENT_CONSTRAINT_END 294 }; 295 296 static struct event_constraint intel_bdw_event_constraints[] = { 297 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 298 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 299 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 300 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ 301 INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ 302 EVENT_CONSTRAINT_END 303 }; 304 305 static u64 intel_pmu_event_map(int hw_event) 306 { 307 return intel_perfmon_event_map[hw_event]; 308 } 309 310 /* 311 * Notes on the events: 312 * - data reads do not include code reads (comparable to earlier tables) 313 * - data counts include speculative execution (except L1 write, dtlb, bpu) 314 * - remote node access includes remote memory, remote cache, remote mmio. 315 * - prefetches are not included in the counts. 316 * - icache miss does not include decoded icache 317 */ 318 319 #define SKL_DEMAND_DATA_RD BIT_ULL(0) 320 #define SKL_DEMAND_RFO BIT_ULL(1) 321 #define SKL_ANY_RESPONSE BIT_ULL(16) 322 #define SKL_SUPPLIER_NONE BIT_ULL(17) 323 #define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26) 324 #define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27) 325 #define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28) 326 #define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29) 327 #define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \ 328 SKL_L3_MISS_REMOTE_HOP0_DRAM| \ 329 SKL_L3_MISS_REMOTE_HOP1_DRAM| \ 330 SKL_L3_MISS_REMOTE_HOP2P_DRAM) 331 #define SKL_SPL_HIT BIT_ULL(30) 332 #define SKL_SNOOP_NONE BIT_ULL(31) 333 #define SKL_SNOOP_NOT_NEEDED BIT_ULL(32) 334 #define SKL_SNOOP_MISS BIT_ULL(33) 335 #define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34) 336 #define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35) 337 #define SKL_SNOOP_HITM BIT_ULL(36) 338 #define SKL_SNOOP_NON_DRAM BIT_ULL(37) 339 #define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \ 340 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ 341 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ 342 SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM) 343 #define SKL_DEMAND_READ SKL_DEMAND_DATA_RD 344 #define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \ 345 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ 346 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ 347 SKL_SNOOP_HITM|SKL_SPL_HIT) 348 #define SKL_DEMAND_WRITE SKL_DEMAND_RFO 349 #define SKL_LLC_ACCESS SKL_ANY_RESPONSE 350 #define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \ 351 SKL_L3_MISS_REMOTE_HOP1_DRAM| \ 352 SKL_L3_MISS_REMOTE_HOP2P_DRAM) 353 354 static __initconst const u64 skl_hw_cache_event_ids 355 [PERF_COUNT_HW_CACHE_MAX] 356 [PERF_COUNT_HW_CACHE_OP_MAX] 357 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 358 { 359 [ C(L1D ) ] = { 360 [ C(OP_READ) ] = { 361 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ 362 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ 363 }, 364 [ C(OP_WRITE) ] = { 365 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ 366 [ C(RESULT_MISS) ] = 0x0, 367 }, 368 [ C(OP_PREFETCH) ] = { 369 [ C(RESULT_ACCESS) ] = 0x0, 370 [ C(RESULT_MISS) ] = 0x0, 371 }, 372 }, 373 [ C(L1I ) ] = { 374 [ C(OP_READ) ] = { 375 [ C(RESULT_ACCESS) ] = 0x0, 376 [ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */ 377 }, 378 [ C(OP_WRITE) ] = { 379 [ C(RESULT_ACCESS) ] = -1, 380 [ C(RESULT_MISS) ] = -1, 381 }, 382 [ C(OP_PREFETCH) ] = { 383 [ C(RESULT_ACCESS) ] = 0x0, 384 [ C(RESULT_MISS) ] = 0x0, 385 }, 386 }, 387 [ C(LL ) ] = { 388 [ C(OP_READ) ] = { 389 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 390 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 391 }, 392 [ C(OP_WRITE) ] = { 393 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 394 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 395 }, 396 [ C(OP_PREFETCH) ] = { 397 [ C(RESULT_ACCESS) ] = 0x0, 398 [ C(RESULT_MISS) ] = 0x0, 399 }, 400 }, 401 [ C(DTLB) ] = { 402 [ C(OP_READ) ] = { 403 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ 404 [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ 405 }, 406 [ C(OP_WRITE) ] = { 407 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ 408 [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */ 409 }, 410 [ C(OP_PREFETCH) ] = { 411 [ C(RESULT_ACCESS) ] = 0x0, 412 [ C(RESULT_MISS) ] = 0x0, 413 }, 414 }, 415 [ C(ITLB) ] = { 416 [ C(OP_READ) ] = { 417 [ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */ 418 [ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */ 419 }, 420 [ C(OP_WRITE) ] = { 421 [ C(RESULT_ACCESS) ] = -1, 422 [ C(RESULT_MISS) ] = -1, 423 }, 424 [ C(OP_PREFETCH) ] = { 425 [ C(RESULT_ACCESS) ] = -1, 426 [ C(RESULT_MISS) ] = -1, 427 }, 428 }, 429 [ C(BPU ) ] = { 430 [ C(OP_READ) ] = { 431 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ 432 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 433 }, 434 [ C(OP_WRITE) ] = { 435 [ C(RESULT_ACCESS) ] = -1, 436 [ C(RESULT_MISS) ] = -1, 437 }, 438 [ C(OP_PREFETCH) ] = { 439 [ C(RESULT_ACCESS) ] = -1, 440 [ C(RESULT_MISS) ] = -1, 441 }, 442 }, 443 [ C(NODE) ] = { 444 [ C(OP_READ) ] = { 445 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 446 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 447 }, 448 [ C(OP_WRITE) ] = { 449 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 450 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 451 }, 452 [ C(OP_PREFETCH) ] = { 453 [ C(RESULT_ACCESS) ] = 0x0, 454 [ C(RESULT_MISS) ] = 0x0, 455 }, 456 }, 457 }; 458 459 static __initconst const u64 skl_hw_cache_extra_regs 460 [PERF_COUNT_HW_CACHE_MAX] 461 [PERF_COUNT_HW_CACHE_OP_MAX] 462 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 463 { 464 [ C(LL ) ] = { 465 [ C(OP_READ) ] = { 466 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| 467 SKL_LLC_ACCESS|SKL_ANY_SNOOP, 468 [ C(RESULT_MISS) ] = SKL_DEMAND_READ| 469 SKL_L3_MISS|SKL_ANY_SNOOP| 470 SKL_SUPPLIER_NONE, 471 }, 472 [ C(OP_WRITE) ] = { 473 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| 474 SKL_LLC_ACCESS|SKL_ANY_SNOOP, 475 [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| 476 SKL_L3_MISS|SKL_ANY_SNOOP| 477 SKL_SUPPLIER_NONE, 478 }, 479 [ C(OP_PREFETCH) ] = { 480 [ C(RESULT_ACCESS) ] = 0x0, 481 [ C(RESULT_MISS) ] = 0x0, 482 }, 483 }, 484 [ C(NODE) ] = { 485 [ C(OP_READ) ] = { 486 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| 487 SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, 488 [ C(RESULT_MISS) ] = SKL_DEMAND_READ| 489 SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, 490 }, 491 [ C(OP_WRITE) ] = { 492 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| 493 SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, 494 [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| 495 SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, 496 }, 497 [ C(OP_PREFETCH) ] = { 498 [ C(RESULT_ACCESS) ] = 0x0, 499 [ C(RESULT_MISS) ] = 0x0, 500 }, 501 }, 502 }; 503 504 #define SNB_DMND_DATA_RD (1ULL << 0) 505 #define SNB_DMND_RFO (1ULL << 1) 506 #define SNB_DMND_IFETCH (1ULL << 2) 507 #define SNB_DMND_WB (1ULL << 3) 508 #define SNB_PF_DATA_RD (1ULL << 4) 509 #define SNB_PF_RFO (1ULL << 5) 510 #define SNB_PF_IFETCH (1ULL << 6) 511 #define SNB_LLC_DATA_RD (1ULL << 7) 512 #define SNB_LLC_RFO (1ULL << 8) 513 #define SNB_LLC_IFETCH (1ULL << 9) 514 #define SNB_BUS_LOCKS (1ULL << 10) 515 #define SNB_STRM_ST (1ULL << 11) 516 #define SNB_OTHER (1ULL << 15) 517 #define SNB_RESP_ANY (1ULL << 16) 518 #define SNB_NO_SUPP (1ULL << 17) 519 #define SNB_LLC_HITM (1ULL << 18) 520 #define SNB_LLC_HITE (1ULL << 19) 521 #define SNB_LLC_HITS (1ULL << 20) 522 #define SNB_LLC_HITF (1ULL << 21) 523 #define SNB_LOCAL (1ULL << 22) 524 #define SNB_REMOTE (0xffULL << 23) 525 #define SNB_SNP_NONE (1ULL << 31) 526 #define SNB_SNP_NOT_NEEDED (1ULL << 32) 527 #define SNB_SNP_MISS (1ULL << 33) 528 #define SNB_NO_FWD (1ULL << 34) 529 #define SNB_SNP_FWD (1ULL << 35) 530 #define SNB_HITM (1ULL << 36) 531 #define SNB_NON_DRAM (1ULL << 37) 532 533 #define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) 534 #define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) 535 #define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 536 537 #define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ 538 SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ 539 SNB_HITM) 540 541 #define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) 542 #define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) 543 544 #define SNB_L3_ACCESS SNB_RESP_ANY 545 #define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) 546 547 static __initconst const u64 snb_hw_cache_extra_regs 548 [PERF_COUNT_HW_CACHE_MAX] 549 [PERF_COUNT_HW_CACHE_OP_MAX] 550 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 551 { 552 [ C(LL ) ] = { 553 [ C(OP_READ) ] = { 554 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, 555 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, 556 }, 557 [ C(OP_WRITE) ] = { 558 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, 559 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, 560 }, 561 [ C(OP_PREFETCH) ] = { 562 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, 563 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, 564 }, 565 }, 566 [ C(NODE) ] = { 567 [ C(OP_READ) ] = { 568 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, 569 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, 570 }, 571 [ C(OP_WRITE) ] = { 572 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, 573 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, 574 }, 575 [ C(OP_PREFETCH) ] = { 576 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, 577 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, 578 }, 579 }, 580 }; 581 582 static __initconst const u64 snb_hw_cache_event_ids 583 [PERF_COUNT_HW_CACHE_MAX] 584 [PERF_COUNT_HW_CACHE_OP_MAX] 585 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 586 { 587 [ C(L1D) ] = { 588 [ C(OP_READ) ] = { 589 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ 590 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ 591 }, 592 [ C(OP_WRITE) ] = { 593 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ 594 [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ 595 }, 596 [ C(OP_PREFETCH) ] = { 597 [ C(RESULT_ACCESS) ] = 0x0, 598 [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ 599 }, 600 }, 601 [ C(L1I ) ] = { 602 [ C(OP_READ) ] = { 603 [ C(RESULT_ACCESS) ] = 0x0, 604 [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ 605 }, 606 [ C(OP_WRITE) ] = { 607 [ C(RESULT_ACCESS) ] = -1, 608 [ C(RESULT_MISS) ] = -1, 609 }, 610 [ C(OP_PREFETCH) ] = { 611 [ C(RESULT_ACCESS) ] = 0x0, 612 [ C(RESULT_MISS) ] = 0x0, 613 }, 614 }, 615 [ C(LL ) ] = { 616 [ C(OP_READ) ] = { 617 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 618 [ C(RESULT_ACCESS) ] = 0x01b7, 619 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 620 [ C(RESULT_MISS) ] = 0x01b7, 621 }, 622 [ C(OP_WRITE) ] = { 623 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 624 [ C(RESULT_ACCESS) ] = 0x01b7, 625 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 626 [ C(RESULT_MISS) ] = 0x01b7, 627 }, 628 [ C(OP_PREFETCH) ] = { 629 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 630 [ C(RESULT_ACCESS) ] = 0x01b7, 631 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 632 [ C(RESULT_MISS) ] = 0x01b7, 633 }, 634 }, 635 [ C(DTLB) ] = { 636 [ C(OP_READ) ] = { 637 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ 638 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ 639 }, 640 [ C(OP_WRITE) ] = { 641 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ 642 [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ 643 }, 644 [ C(OP_PREFETCH) ] = { 645 [ C(RESULT_ACCESS) ] = 0x0, 646 [ C(RESULT_MISS) ] = 0x0, 647 }, 648 }, 649 [ C(ITLB) ] = { 650 [ C(OP_READ) ] = { 651 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ 652 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ 653 }, 654 [ C(OP_WRITE) ] = { 655 [ C(RESULT_ACCESS) ] = -1, 656 [ C(RESULT_MISS) ] = -1, 657 }, 658 [ C(OP_PREFETCH) ] = { 659 [ C(RESULT_ACCESS) ] = -1, 660 [ C(RESULT_MISS) ] = -1, 661 }, 662 }, 663 [ C(BPU ) ] = { 664 [ C(OP_READ) ] = { 665 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 666 [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 667 }, 668 [ C(OP_WRITE) ] = { 669 [ C(RESULT_ACCESS) ] = -1, 670 [ C(RESULT_MISS) ] = -1, 671 }, 672 [ C(OP_PREFETCH) ] = { 673 [ C(RESULT_ACCESS) ] = -1, 674 [ C(RESULT_MISS) ] = -1, 675 }, 676 }, 677 [ C(NODE) ] = { 678 [ C(OP_READ) ] = { 679 [ C(RESULT_ACCESS) ] = 0x01b7, 680 [ C(RESULT_MISS) ] = 0x01b7, 681 }, 682 [ C(OP_WRITE) ] = { 683 [ C(RESULT_ACCESS) ] = 0x01b7, 684 [ C(RESULT_MISS) ] = 0x01b7, 685 }, 686 [ C(OP_PREFETCH) ] = { 687 [ C(RESULT_ACCESS) ] = 0x01b7, 688 [ C(RESULT_MISS) ] = 0x01b7, 689 }, 690 }, 691 692 }; 693 694 /* 695 * Notes on the events: 696 * - data reads do not include code reads (comparable to earlier tables) 697 * - data counts include speculative execution (except L1 write, dtlb, bpu) 698 * - remote node access includes remote memory, remote cache, remote mmio. 699 * - prefetches are not included in the counts because they are not 700 * reliably counted. 701 */ 702 703 #define HSW_DEMAND_DATA_RD BIT_ULL(0) 704 #define HSW_DEMAND_RFO BIT_ULL(1) 705 #define HSW_ANY_RESPONSE BIT_ULL(16) 706 #define HSW_SUPPLIER_NONE BIT_ULL(17) 707 #define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22) 708 #define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27) 709 #define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28) 710 #define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29) 711 #define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \ 712 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ 713 HSW_L3_MISS_REMOTE_HOP2P) 714 #define HSW_SNOOP_NONE BIT_ULL(31) 715 #define HSW_SNOOP_NOT_NEEDED BIT_ULL(32) 716 #define HSW_SNOOP_MISS BIT_ULL(33) 717 #define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34) 718 #define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35) 719 #define HSW_SNOOP_HITM BIT_ULL(36) 720 #define HSW_SNOOP_NON_DRAM BIT_ULL(37) 721 #define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \ 722 HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \ 723 HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \ 724 HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM) 725 #define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM) 726 #define HSW_DEMAND_READ HSW_DEMAND_DATA_RD 727 #define HSW_DEMAND_WRITE HSW_DEMAND_RFO 728 #define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\ 729 HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P) 730 #define HSW_LLC_ACCESS HSW_ANY_RESPONSE 731 732 #define BDW_L3_MISS_LOCAL BIT(26) 733 #define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \ 734 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ 735 HSW_L3_MISS_REMOTE_HOP2P) 736 737 738 static __initconst const u64 hsw_hw_cache_event_ids 739 [PERF_COUNT_HW_CACHE_MAX] 740 [PERF_COUNT_HW_CACHE_OP_MAX] 741 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 742 { 743 [ C(L1D ) ] = { 744 [ C(OP_READ) ] = { 745 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 746 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ 747 }, 748 [ C(OP_WRITE) ] = { 749 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 750 [ C(RESULT_MISS) ] = 0x0, 751 }, 752 [ C(OP_PREFETCH) ] = { 753 [ C(RESULT_ACCESS) ] = 0x0, 754 [ C(RESULT_MISS) ] = 0x0, 755 }, 756 }, 757 [ C(L1I ) ] = { 758 [ C(OP_READ) ] = { 759 [ C(RESULT_ACCESS) ] = 0x0, 760 [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ 761 }, 762 [ C(OP_WRITE) ] = { 763 [ C(RESULT_ACCESS) ] = -1, 764 [ C(RESULT_MISS) ] = -1, 765 }, 766 [ C(OP_PREFETCH) ] = { 767 [ C(RESULT_ACCESS) ] = 0x0, 768 [ C(RESULT_MISS) ] = 0x0, 769 }, 770 }, 771 [ C(LL ) ] = { 772 [ C(OP_READ) ] = { 773 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 774 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 775 }, 776 [ C(OP_WRITE) ] = { 777 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 778 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 779 }, 780 [ C(OP_PREFETCH) ] = { 781 [ C(RESULT_ACCESS) ] = 0x0, 782 [ C(RESULT_MISS) ] = 0x0, 783 }, 784 }, 785 [ C(DTLB) ] = { 786 [ C(OP_READ) ] = { 787 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 788 [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ 789 }, 790 [ C(OP_WRITE) ] = { 791 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 792 [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ 793 }, 794 [ C(OP_PREFETCH) ] = { 795 [ C(RESULT_ACCESS) ] = 0x0, 796 [ C(RESULT_MISS) ] = 0x0, 797 }, 798 }, 799 [ C(ITLB) ] = { 800 [ C(OP_READ) ] = { 801 [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ 802 [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ 803 }, 804 [ C(OP_WRITE) ] = { 805 [ C(RESULT_ACCESS) ] = -1, 806 [ C(RESULT_MISS) ] = -1, 807 }, 808 [ C(OP_PREFETCH) ] = { 809 [ C(RESULT_ACCESS) ] = -1, 810 [ C(RESULT_MISS) ] = -1, 811 }, 812 }, 813 [ C(BPU ) ] = { 814 [ C(OP_READ) ] = { 815 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ 816 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 817 }, 818 [ C(OP_WRITE) ] = { 819 [ C(RESULT_ACCESS) ] = -1, 820 [ C(RESULT_MISS) ] = -1, 821 }, 822 [ C(OP_PREFETCH) ] = { 823 [ C(RESULT_ACCESS) ] = -1, 824 [ C(RESULT_MISS) ] = -1, 825 }, 826 }, 827 [ C(NODE) ] = { 828 [ C(OP_READ) ] = { 829 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 830 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 831 }, 832 [ C(OP_WRITE) ] = { 833 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 834 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 835 }, 836 [ C(OP_PREFETCH) ] = { 837 [ C(RESULT_ACCESS) ] = 0x0, 838 [ C(RESULT_MISS) ] = 0x0, 839 }, 840 }, 841 }; 842 843 static __initconst const u64 hsw_hw_cache_extra_regs 844 [PERF_COUNT_HW_CACHE_MAX] 845 [PERF_COUNT_HW_CACHE_OP_MAX] 846 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 847 { 848 [ C(LL ) ] = { 849 [ C(OP_READ) ] = { 850 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| 851 HSW_LLC_ACCESS, 852 [ C(RESULT_MISS) ] = HSW_DEMAND_READ| 853 HSW_L3_MISS|HSW_ANY_SNOOP, 854 }, 855 [ C(OP_WRITE) ] = { 856 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| 857 HSW_LLC_ACCESS, 858 [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| 859 HSW_L3_MISS|HSW_ANY_SNOOP, 860 }, 861 [ C(OP_PREFETCH) ] = { 862 [ C(RESULT_ACCESS) ] = 0x0, 863 [ C(RESULT_MISS) ] = 0x0, 864 }, 865 }, 866 [ C(NODE) ] = { 867 [ C(OP_READ) ] = { 868 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| 869 HSW_L3_MISS_LOCAL_DRAM| 870 HSW_SNOOP_DRAM, 871 [ C(RESULT_MISS) ] = HSW_DEMAND_READ| 872 HSW_L3_MISS_REMOTE| 873 HSW_SNOOP_DRAM, 874 }, 875 [ C(OP_WRITE) ] = { 876 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| 877 HSW_L3_MISS_LOCAL_DRAM| 878 HSW_SNOOP_DRAM, 879 [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| 880 HSW_L3_MISS_REMOTE| 881 HSW_SNOOP_DRAM, 882 }, 883 [ C(OP_PREFETCH) ] = { 884 [ C(RESULT_ACCESS) ] = 0x0, 885 [ C(RESULT_MISS) ] = 0x0, 886 }, 887 }, 888 }; 889 890 static __initconst const u64 westmere_hw_cache_event_ids 891 [PERF_COUNT_HW_CACHE_MAX] 892 [PERF_COUNT_HW_CACHE_OP_MAX] 893 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 894 { 895 [ C(L1D) ] = { 896 [ C(OP_READ) ] = { 897 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 898 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ 899 }, 900 [ C(OP_WRITE) ] = { 901 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 902 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ 903 }, 904 [ C(OP_PREFETCH) ] = { 905 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 906 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ 907 }, 908 }, 909 [ C(L1I ) ] = { 910 [ C(OP_READ) ] = { 911 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 912 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 913 }, 914 [ C(OP_WRITE) ] = { 915 [ C(RESULT_ACCESS) ] = -1, 916 [ C(RESULT_MISS) ] = -1, 917 }, 918 [ C(OP_PREFETCH) ] = { 919 [ C(RESULT_ACCESS) ] = 0x0, 920 [ C(RESULT_MISS) ] = 0x0, 921 }, 922 }, 923 [ C(LL ) ] = { 924 [ C(OP_READ) ] = { 925 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 926 [ C(RESULT_ACCESS) ] = 0x01b7, 927 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 928 [ C(RESULT_MISS) ] = 0x01b7, 929 }, 930 /* 931 * Use RFO, not WRITEBACK, because a write miss would typically occur 932 * on RFO. 933 */ 934 [ C(OP_WRITE) ] = { 935 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 936 [ C(RESULT_ACCESS) ] = 0x01b7, 937 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 938 [ C(RESULT_MISS) ] = 0x01b7, 939 }, 940 [ C(OP_PREFETCH) ] = { 941 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 942 [ C(RESULT_ACCESS) ] = 0x01b7, 943 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 944 [ C(RESULT_MISS) ] = 0x01b7, 945 }, 946 }, 947 [ C(DTLB) ] = { 948 [ C(OP_READ) ] = { 949 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 950 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ 951 }, 952 [ C(OP_WRITE) ] = { 953 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 954 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ 955 }, 956 [ C(OP_PREFETCH) ] = { 957 [ C(RESULT_ACCESS) ] = 0x0, 958 [ C(RESULT_MISS) ] = 0x0, 959 }, 960 }, 961 [ C(ITLB) ] = { 962 [ C(OP_READ) ] = { 963 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ 964 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ 965 }, 966 [ C(OP_WRITE) ] = { 967 [ C(RESULT_ACCESS) ] = -1, 968 [ C(RESULT_MISS) ] = -1, 969 }, 970 [ C(OP_PREFETCH) ] = { 971 [ C(RESULT_ACCESS) ] = -1, 972 [ C(RESULT_MISS) ] = -1, 973 }, 974 }, 975 [ C(BPU ) ] = { 976 [ C(OP_READ) ] = { 977 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 978 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ 979 }, 980 [ C(OP_WRITE) ] = { 981 [ C(RESULT_ACCESS) ] = -1, 982 [ C(RESULT_MISS) ] = -1, 983 }, 984 [ C(OP_PREFETCH) ] = { 985 [ C(RESULT_ACCESS) ] = -1, 986 [ C(RESULT_MISS) ] = -1, 987 }, 988 }, 989 [ C(NODE) ] = { 990 [ C(OP_READ) ] = { 991 [ C(RESULT_ACCESS) ] = 0x01b7, 992 [ C(RESULT_MISS) ] = 0x01b7, 993 }, 994 [ C(OP_WRITE) ] = { 995 [ C(RESULT_ACCESS) ] = 0x01b7, 996 [ C(RESULT_MISS) ] = 0x01b7, 997 }, 998 [ C(OP_PREFETCH) ] = { 999 [ C(RESULT_ACCESS) ] = 0x01b7, 1000 [ C(RESULT_MISS) ] = 0x01b7, 1001 }, 1002 }, 1003 }; 1004 1005 /* 1006 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; 1007 * See IA32 SDM Vol 3B 30.6.1.3 1008 */ 1009 1010 #define NHM_DMND_DATA_RD (1 << 0) 1011 #define NHM_DMND_RFO (1 << 1) 1012 #define NHM_DMND_IFETCH (1 << 2) 1013 #define NHM_DMND_WB (1 << 3) 1014 #define NHM_PF_DATA_RD (1 << 4) 1015 #define NHM_PF_DATA_RFO (1 << 5) 1016 #define NHM_PF_IFETCH (1 << 6) 1017 #define NHM_OFFCORE_OTHER (1 << 7) 1018 #define NHM_UNCORE_HIT (1 << 8) 1019 #define NHM_OTHER_CORE_HIT_SNP (1 << 9) 1020 #define NHM_OTHER_CORE_HITM (1 << 10) 1021 /* reserved */ 1022 #define NHM_REMOTE_CACHE_FWD (1 << 12) 1023 #define NHM_REMOTE_DRAM (1 << 13) 1024 #define NHM_LOCAL_DRAM (1 << 14) 1025 #define NHM_NON_DRAM (1 << 15) 1026 1027 #define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) 1028 #define NHM_REMOTE (NHM_REMOTE_DRAM) 1029 1030 #define NHM_DMND_READ (NHM_DMND_DATA_RD) 1031 #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) 1032 #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) 1033 1034 #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) 1035 #define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) 1036 #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) 1037 1038 static __initconst const u64 nehalem_hw_cache_extra_regs 1039 [PERF_COUNT_HW_CACHE_MAX] 1040 [PERF_COUNT_HW_CACHE_OP_MAX] 1041 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1042 { 1043 [ C(LL ) ] = { 1044 [ C(OP_READ) ] = { 1045 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, 1046 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, 1047 }, 1048 [ C(OP_WRITE) ] = { 1049 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, 1050 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, 1051 }, 1052 [ C(OP_PREFETCH) ] = { 1053 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, 1054 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, 1055 }, 1056 }, 1057 [ C(NODE) ] = { 1058 [ C(OP_READ) ] = { 1059 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, 1060 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, 1061 }, 1062 [ C(OP_WRITE) ] = { 1063 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, 1064 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, 1065 }, 1066 [ C(OP_PREFETCH) ] = { 1067 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, 1068 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, 1069 }, 1070 }, 1071 }; 1072 1073 static __initconst const u64 nehalem_hw_cache_event_ids 1074 [PERF_COUNT_HW_CACHE_MAX] 1075 [PERF_COUNT_HW_CACHE_OP_MAX] 1076 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1077 { 1078 [ C(L1D) ] = { 1079 [ C(OP_READ) ] = { 1080 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 1081 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ 1082 }, 1083 [ C(OP_WRITE) ] = { 1084 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 1085 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ 1086 }, 1087 [ C(OP_PREFETCH) ] = { 1088 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 1089 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ 1090 }, 1091 }, 1092 [ C(L1I ) ] = { 1093 [ C(OP_READ) ] = { 1094 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 1095 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 1096 }, 1097 [ C(OP_WRITE) ] = { 1098 [ C(RESULT_ACCESS) ] = -1, 1099 [ C(RESULT_MISS) ] = -1, 1100 }, 1101 [ C(OP_PREFETCH) ] = { 1102 [ C(RESULT_ACCESS) ] = 0x0, 1103 [ C(RESULT_MISS) ] = 0x0, 1104 }, 1105 }, 1106 [ C(LL ) ] = { 1107 [ C(OP_READ) ] = { 1108 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 1109 [ C(RESULT_ACCESS) ] = 0x01b7, 1110 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 1111 [ C(RESULT_MISS) ] = 0x01b7, 1112 }, 1113 /* 1114 * Use RFO, not WRITEBACK, because a write miss would typically occur 1115 * on RFO. 1116 */ 1117 [ C(OP_WRITE) ] = { 1118 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 1119 [ C(RESULT_ACCESS) ] = 0x01b7, 1120 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 1121 [ C(RESULT_MISS) ] = 0x01b7, 1122 }, 1123 [ C(OP_PREFETCH) ] = { 1124 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 1125 [ C(RESULT_ACCESS) ] = 0x01b7, 1126 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 1127 [ C(RESULT_MISS) ] = 0x01b7, 1128 }, 1129 }, 1130 [ C(DTLB) ] = { 1131 [ C(OP_READ) ] = { 1132 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ 1133 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ 1134 }, 1135 [ C(OP_WRITE) ] = { 1136 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ 1137 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ 1138 }, 1139 [ C(OP_PREFETCH) ] = { 1140 [ C(RESULT_ACCESS) ] = 0x0, 1141 [ C(RESULT_MISS) ] = 0x0, 1142 }, 1143 }, 1144 [ C(ITLB) ] = { 1145 [ C(OP_READ) ] = { 1146 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ 1147 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ 1148 }, 1149 [ C(OP_WRITE) ] = { 1150 [ C(RESULT_ACCESS) ] = -1, 1151 [ C(RESULT_MISS) ] = -1, 1152 }, 1153 [ C(OP_PREFETCH) ] = { 1154 [ C(RESULT_ACCESS) ] = -1, 1155 [ C(RESULT_MISS) ] = -1, 1156 }, 1157 }, 1158 [ C(BPU ) ] = { 1159 [ C(OP_READ) ] = { 1160 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 1161 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ 1162 }, 1163 [ C(OP_WRITE) ] = { 1164 [ C(RESULT_ACCESS) ] = -1, 1165 [ C(RESULT_MISS) ] = -1, 1166 }, 1167 [ C(OP_PREFETCH) ] = { 1168 [ C(RESULT_ACCESS) ] = -1, 1169 [ C(RESULT_MISS) ] = -1, 1170 }, 1171 }, 1172 [ C(NODE) ] = { 1173 [ C(OP_READ) ] = { 1174 [ C(RESULT_ACCESS) ] = 0x01b7, 1175 [ C(RESULT_MISS) ] = 0x01b7, 1176 }, 1177 [ C(OP_WRITE) ] = { 1178 [ C(RESULT_ACCESS) ] = 0x01b7, 1179 [ C(RESULT_MISS) ] = 0x01b7, 1180 }, 1181 [ C(OP_PREFETCH) ] = { 1182 [ C(RESULT_ACCESS) ] = 0x01b7, 1183 [ C(RESULT_MISS) ] = 0x01b7, 1184 }, 1185 }, 1186 }; 1187 1188 static __initconst const u64 core2_hw_cache_event_ids 1189 [PERF_COUNT_HW_CACHE_MAX] 1190 [PERF_COUNT_HW_CACHE_OP_MAX] 1191 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1192 { 1193 [ C(L1D) ] = { 1194 [ C(OP_READ) ] = { 1195 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ 1196 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ 1197 }, 1198 [ C(OP_WRITE) ] = { 1199 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ 1200 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ 1201 }, 1202 [ C(OP_PREFETCH) ] = { 1203 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ 1204 [ C(RESULT_MISS) ] = 0, 1205 }, 1206 }, 1207 [ C(L1I ) ] = { 1208 [ C(OP_READ) ] = { 1209 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ 1210 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ 1211 }, 1212 [ C(OP_WRITE) ] = { 1213 [ C(RESULT_ACCESS) ] = -1, 1214 [ C(RESULT_MISS) ] = -1, 1215 }, 1216 [ C(OP_PREFETCH) ] = { 1217 [ C(RESULT_ACCESS) ] = 0, 1218 [ C(RESULT_MISS) ] = 0, 1219 }, 1220 }, 1221 [ C(LL ) ] = { 1222 [ C(OP_READ) ] = { 1223 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ 1224 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ 1225 }, 1226 [ C(OP_WRITE) ] = { 1227 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ 1228 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ 1229 }, 1230 [ C(OP_PREFETCH) ] = { 1231 [ C(RESULT_ACCESS) ] = 0, 1232 [ C(RESULT_MISS) ] = 0, 1233 }, 1234 }, 1235 [ C(DTLB) ] = { 1236 [ C(OP_READ) ] = { 1237 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ 1238 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ 1239 }, 1240 [ C(OP_WRITE) ] = { 1241 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ 1242 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ 1243 }, 1244 [ C(OP_PREFETCH) ] = { 1245 [ C(RESULT_ACCESS) ] = 0, 1246 [ C(RESULT_MISS) ] = 0, 1247 }, 1248 }, 1249 [ C(ITLB) ] = { 1250 [ C(OP_READ) ] = { 1251 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 1252 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ 1253 }, 1254 [ C(OP_WRITE) ] = { 1255 [ C(RESULT_ACCESS) ] = -1, 1256 [ C(RESULT_MISS) ] = -1, 1257 }, 1258 [ C(OP_PREFETCH) ] = { 1259 [ C(RESULT_ACCESS) ] = -1, 1260 [ C(RESULT_MISS) ] = -1, 1261 }, 1262 }, 1263 [ C(BPU ) ] = { 1264 [ C(OP_READ) ] = { 1265 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1266 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1267 }, 1268 [ C(OP_WRITE) ] = { 1269 [ C(RESULT_ACCESS) ] = -1, 1270 [ C(RESULT_MISS) ] = -1, 1271 }, 1272 [ C(OP_PREFETCH) ] = { 1273 [ C(RESULT_ACCESS) ] = -1, 1274 [ C(RESULT_MISS) ] = -1, 1275 }, 1276 }, 1277 }; 1278 1279 static __initconst const u64 atom_hw_cache_event_ids 1280 [PERF_COUNT_HW_CACHE_MAX] 1281 [PERF_COUNT_HW_CACHE_OP_MAX] 1282 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1283 { 1284 [ C(L1D) ] = { 1285 [ C(OP_READ) ] = { 1286 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ 1287 [ C(RESULT_MISS) ] = 0, 1288 }, 1289 [ C(OP_WRITE) ] = { 1290 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ 1291 [ C(RESULT_MISS) ] = 0, 1292 }, 1293 [ C(OP_PREFETCH) ] = { 1294 [ C(RESULT_ACCESS) ] = 0x0, 1295 [ C(RESULT_MISS) ] = 0, 1296 }, 1297 }, 1298 [ C(L1I ) ] = { 1299 [ C(OP_READ) ] = { 1300 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 1301 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 1302 }, 1303 [ C(OP_WRITE) ] = { 1304 [ C(RESULT_ACCESS) ] = -1, 1305 [ C(RESULT_MISS) ] = -1, 1306 }, 1307 [ C(OP_PREFETCH) ] = { 1308 [ C(RESULT_ACCESS) ] = 0, 1309 [ C(RESULT_MISS) ] = 0, 1310 }, 1311 }, 1312 [ C(LL ) ] = { 1313 [ C(OP_READ) ] = { 1314 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ 1315 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ 1316 }, 1317 [ C(OP_WRITE) ] = { 1318 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ 1319 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ 1320 }, 1321 [ C(OP_PREFETCH) ] = { 1322 [ C(RESULT_ACCESS) ] = 0, 1323 [ C(RESULT_MISS) ] = 0, 1324 }, 1325 }, 1326 [ C(DTLB) ] = { 1327 [ C(OP_READ) ] = { 1328 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ 1329 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ 1330 }, 1331 [ C(OP_WRITE) ] = { 1332 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ 1333 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ 1334 }, 1335 [ C(OP_PREFETCH) ] = { 1336 [ C(RESULT_ACCESS) ] = 0, 1337 [ C(RESULT_MISS) ] = 0, 1338 }, 1339 }, 1340 [ C(ITLB) ] = { 1341 [ C(OP_READ) ] = { 1342 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 1343 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ 1344 }, 1345 [ C(OP_WRITE) ] = { 1346 [ C(RESULT_ACCESS) ] = -1, 1347 [ C(RESULT_MISS) ] = -1, 1348 }, 1349 [ C(OP_PREFETCH) ] = { 1350 [ C(RESULT_ACCESS) ] = -1, 1351 [ C(RESULT_MISS) ] = -1, 1352 }, 1353 }, 1354 [ C(BPU ) ] = { 1355 [ C(OP_READ) ] = { 1356 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1357 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1358 }, 1359 [ C(OP_WRITE) ] = { 1360 [ C(RESULT_ACCESS) ] = -1, 1361 [ C(RESULT_MISS) ] = -1, 1362 }, 1363 [ C(OP_PREFETCH) ] = { 1364 [ C(RESULT_ACCESS) ] = -1, 1365 [ C(RESULT_MISS) ] = -1, 1366 }, 1367 }, 1368 }; 1369 1370 EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c"); 1371 EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2"); 1372 /* no_alloc_cycles.not_delivered */ 1373 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm, 1374 "event=0xca,umask=0x50"); 1375 EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2"); 1376 /* uops_retired.all */ 1377 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm, 1378 "event=0xc2,umask=0x10"); 1379 /* uops_retired.all */ 1380 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm, 1381 "event=0xc2,umask=0x10"); 1382 1383 static struct attribute *slm_events_attrs[] = { 1384 EVENT_PTR(td_total_slots_slm), 1385 EVENT_PTR(td_total_slots_scale_slm), 1386 EVENT_PTR(td_fetch_bubbles_slm), 1387 EVENT_PTR(td_fetch_bubbles_scale_slm), 1388 EVENT_PTR(td_slots_issued_slm), 1389 EVENT_PTR(td_slots_retired_slm), 1390 NULL 1391 }; 1392 1393 static struct extra_reg intel_slm_extra_regs[] __read_mostly = 1394 { 1395 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 1396 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), 1397 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1), 1398 EVENT_EXTRA_END 1399 }; 1400 1401 #define SLM_DMND_READ SNB_DMND_DATA_RD 1402 #define SLM_DMND_WRITE SNB_DMND_RFO 1403 #define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 1404 1405 #define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM) 1406 #define SLM_LLC_ACCESS SNB_RESP_ANY 1407 #define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM) 1408 1409 static __initconst const u64 slm_hw_cache_extra_regs 1410 [PERF_COUNT_HW_CACHE_MAX] 1411 [PERF_COUNT_HW_CACHE_OP_MAX] 1412 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1413 { 1414 [ C(LL ) ] = { 1415 [ C(OP_READ) ] = { 1416 [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, 1417 [ C(RESULT_MISS) ] = 0, 1418 }, 1419 [ C(OP_WRITE) ] = { 1420 [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, 1421 [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS, 1422 }, 1423 [ C(OP_PREFETCH) ] = { 1424 [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS, 1425 [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS, 1426 }, 1427 }, 1428 }; 1429 1430 static __initconst const u64 slm_hw_cache_event_ids 1431 [PERF_COUNT_HW_CACHE_MAX] 1432 [PERF_COUNT_HW_CACHE_OP_MAX] 1433 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1434 { 1435 [ C(L1D) ] = { 1436 [ C(OP_READ) ] = { 1437 [ C(RESULT_ACCESS) ] = 0, 1438 [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */ 1439 }, 1440 [ C(OP_WRITE) ] = { 1441 [ C(RESULT_ACCESS) ] = 0, 1442 [ C(RESULT_MISS) ] = 0, 1443 }, 1444 [ C(OP_PREFETCH) ] = { 1445 [ C(RESULT_ACCESS) ] = 0, 1446 [ C(RESULT_MISS) ] = 0, 1447 }, 1448 }, 1449 [ C(L1I ) ] = { 1450 [ C(OP_READ) ] = { 1451 [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */ 1452 [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */ 1453 }, 1454 [ C(OP_WRITE) ] = { 1455 [ C(RESULT_ACCESS) ] = -1, 1456 [ C(RESULT_MISS) ] = -1, 1457 }, 1458 [ C(OP_PREFETCH) ] = { 1459 [ C(RESULT_ACCESS) ] = 0, 1460 [ C(RESULT_MISS) ] = 0, 1461 }, 1462 }, 1463 [ C(LL ) ] = { 1464 [ C(OP_READ) ] = { 1465 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 1466 [ C(RESULT_ACCESS) ] = 0x01b7, 1467 [ C(RESULT_MISS) ] = 0, 1468 }, 1469 [ C(OP_WRITE) ] = { 1470 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 1471 [ C(RESULT_ACCESS) ] = 0x01b7, 1472 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 1473 [ C(RESULT_MISS) ] = 0x01b7, 1474 }, 1475 [ C(OP_PREFETCH) ] = { 1476 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 1477 [ C(RESULT_ACCESS) ] = 0x01b7, 1478 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 1479 [ C(RESULT_MISS) ] = 0x01b7, 1480 }, 1481 }, 1482 [ C(DTLB) ] = { 1483 [ C(OP_READ) ] = { 1484 [ C(RESULT_ACCESS) ] = 0, 1485 [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */ 1486 }, 1487 [ C(OP_WRITE) ] = { 1488 [ C(RESULT_ACCESS) ] = 0, 1489 [ C(RESULT_MISS) ] = 0, 1490 }, 1491 [ C(OP_PREFETCH) ] = { 1492 [ C(RESULT_ACCESS) ] = 0, 1493 [ C(RESULT_MISS) ] = 0, 1494 }, 1495 }, 1496 [ C(ITLB) ] = { 1497 [ C(OP_READ) ] = { 1498 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 1499 [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ 1500 }, 1501 [ C(OP_WRITE) ] = { 1502 [ C(RESULT_ACCESS) ] = -1, 1503 [ C(RESULT_MISS) ] = -1, 1504 }, 1505 [ C(OP_PREFETCH) ] = { 1506 [ C(RESULT_ACCESS) ] = -1, 1507 [ C(RESULT_MISS) ] = -1, 1508 }, 1509 }, 1510 [ C(BPU ) ] = { 1511 [ C(OP_READ) ] = { 1512 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1513 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1514 }, 1515 [ C(OP_WRITE) ] = { 1516 [ C(RESULT_ACCESS) ] = -1, 1517 [ C(RESULT_MISS) ] = -1, 1518 }, 1519 [ C(OP_PREFETCH) ] = { 1520 [ C(RESULT_ACCESS) ] = -1, 1521 [ C(RESULT_MISS) ] = -1, 1522 }, 1523 }, 1524 }; 1525 1526 static struct extra_reg intel_glm_extra_regs[] __read_mostly = { 1527 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 1528 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0), 1529 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1), 1530 EVENT_EXTRA_END 1531 }; 1532 1533 #define GLM_DEMAND_DATA_RD BIT_ULL(0) 1534 #define GLM_DEMAND_RFO BIT_ULL(1) 1535 #define GLM_ANY_RESPONSE BIT_ULL(16) 1536 #define GLM_SNP_NONE_OR_MISS BIT_ULL(33) 1537 #define GLM_DEMAND_READ GLM_DEMAND_DATA_RD 1538 #define GLM_DEMAND_WRITE GLM_DEMAND_RFO 1539 #define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 1540 #define GLM_LLC_ACCESS GLM_ANY_RESPONSE 1541 #define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM) 1542 #define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM) 1543 1544 static __initconst const u64 glm_hw_cache_event_ids 1545 [PERF_COUNT_HW_CACHE_MAX] 1546 [PERF_COUNT_HW_CACHE_OP_MAX] 1547 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1548 [C(L1D)] = { 1549 [C(OP_READ)] = { 1550 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 1551 [C(RESULT_MISS)] = 0x0, 1552 }, 1553 [C(OP_WRITE)] = { 1554 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 1555 [C(RESULT_MISS)] = 0x0, 1556 }, 1557 [C(OP_PREFETCH)] = { 1558 [C(RESULT_ACCESS)] = 0x0, 1559 [C(RESULT_MISS)] = 0x0, 1560 }, 1561 }, 1562 [C(L1I)] = { 1563 [C(OP_READ)] = { 1564 [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ 1565 [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ 1566 }, 1567 [C(OP_WRITE)] = { 1568 [C(RESULT_ACCESS)] = -1, 1569 [C(RESULT_MISS)] = -1, 1570 }, 1571 [C(OP_PREFETCH)] = { 1572 [C(RESULT_ACCESS)] = 0x0, 1573 [C(RESULT_MISS)] = 0x0, 1574 }, 1575 }, 1576 [C(LL)] = { 1577 [C(OP_READ)] = { 1578 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1579 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1580 }, 1581 [C(OP_WRITE)] = { 1582 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1583 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1584 }, 1585 [C(OP_PREFETCH)] = { 1586 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1587 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1588 }, 1589 }, 1590 [C(DTLB)] = { 1591 [C(OP_READ)] = { 1592 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 1593 [C(RESULT_MISS)] = 0x0, 1594 }, 1595 [C(OP_WRITE)] = { 1596 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 1597 [C(RESULT_MISS)] = 0x0, 1598 }, 1599 [C(OP_PREFETCH)] = { 1600 [C(RESULT_ACCESS)] = 0x0, 1601 [C(RESULT_MISS)] = 0x0, 1602 }, 1603 }, 1604 [C(ITLB)] = { 1605 [C(OP_READ)] = { 1606 [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ 1607 [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ 1608 }, 1609 [C(OP_WRITE)] = { 1610 [C(RESULT_ACCESS)] = -1, 1611 [C(RESULT_MISS)] = -1, 1612 }, 1613 [C(OP_PREFETCH)] = { 1614 [C(RESULT_ACCESS)] = -1, 1615 [C(RESULT_MISS)] = -1, 1616 }, 1617 }, 1618 [C(BPU)] = { 1619 [C(OP_READ)] = { 1620 [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 1621 [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 1622 }, 1623 [C(OP_WRITE)] = { 1624 [C(RESULT_ACCESS)] = -1, 1625 [C(RESULT_MISS)] = -1, 1626 }, 1627 [C(OP_PREFETCH)] = { 1628 [C(RESULT_ACCESS)] = -1, 1629 [C(RESULT_MISS)] = -1, 1630 }, 1631 }, 1632 }; 1633 1634 static __initconst const u64 glm_hw_cache_extra_regs 1635 [PERF_COUNT_HW_CACHE_MAX] 1636 [PERF_COUNT_HW_CACHE_OP_MAX] 1637 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1638 [C(LL)] = { 1639 [C(OP_READ)] = { 1640 [C(RESULT_ACCESS)] = GLM_DEMAND_READ| 1641 GLM_LLC_ACCESS, 1642 [C(RESULT_MISS)] = GLM_DEMAND_READ| 1643 GLM_LLC_MISS, 1644 }, 1645 [C(OP_WRITE)] = { 1646 [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| 1647 GLM_LLC_ACCESS, 1648 [C(RESULT_MISS)] = GLM_DEMAND_WRITE| 1649 GLM_LLC_MISS, 1650 }, 1651 [C(OP_PREFETCH)] = { 1652 [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH| 1653 GLM_LLC_ACCESS, 1654 [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH| 1655 GLM_LLC_MISS, 1656 }, 1657 }, 1658 }; 1659 1660 #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ 1661 #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ 1662 #define KNL_MCDRAM_LOCAL BIT_ULL(21) 1663 #define KNL_MCDRAM_FAR BIT_ULL(22) 1664 #define KNL_DDR_LOCAL BIT_ULL(23) 1665 #define KNL_DDR_FAR BIT_ULL(24) 1666 #define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \ 1667 KNL_DDR_LOCAL | KNL_DDR_FAR) 1668 #define KNL_L2_READ SLM_DMND_READ 1669 #define KNL_L2_WRITE SLM_DMND_WRITE 1670 #define KNL_L2_PREFETCH SLM_DMND_PREFETCH 1671 #define KNL_L2_ACCESS SLM_LLC_ACCESS 1672 #define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \ 1673 KNL_DRAM_ANY | SNB_SNP_ANY | \ 1674 SNB_NON_DRAM) 1675 1676 static __initconst const u64 knl_hw_cache_extra_regs 1677 [PERF_COUNT_HW_CACHE_MAX] 1678 [PERF_COUNT_HW_CACHE_OP_MAX] 1679 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1680 [C(LL)] = { 1681 [C(OP_READ)] = { 1682 [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS, 1683 [C(RESULT_MISS)] = 0, 1684 }, 1685 [C(OP_WRITE)] = { 1686 [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS, 1687 [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS, 1688 }, 1689 [C(OP_PREFETCH)] = { 1690 [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS, 1691 [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS, 1692 }, 1693 }, 1694 }; 1695 1696 /* 1697 * Used from PMIs where the LBRs are already disabled. 1698 * 1699 * This function could be called consecutively. It is required to remain in 1700 * disabled state if called consecutively. 1701 * 1702 * During consecutive calls, the same disable value will be written to related 1703 * registers, so the PMU state remains unchanged. hw.state in 1704 * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive 1705 * calls. 1706 */ 1707 static void __intel_pmu_disable_all(void) 1708 { 1709 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1710 1711 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 1712 1713 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 1714 intel_pmu_disable_bts(); 1715 else 1716 intel_bts_disable_local(); 1717 1718 intel_pmu_pebs_disable_all(); 1719 } 1720 1721 static void intel_pmu_disable_all(void) 1722 { 1723 __intel_pmu_disable_all(); 1724 intel_pmu_lbr_disable_all(); 1725 } 1726 1727 static void __intel_pmu_enable_all(int added, bool pmi) 1728 { 1729 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1730 1731 intel_pmu_pebs_enable_all(); 1732 intel_pmu_lbr_enable_all(pmi); 1733 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1734 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); 1735 1736 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 1737 struct perf_event *event = 1738 cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; 1739 1740 if (WARN_ON_ONCE(!event)) 1741 return; 1742 1743 intel_pmu_enable_bts(event->hw.config); 1744 } else 1745 intel_bts_enable_local(); 1746 } 1747 1748 static void intel_pmu_enable_all(int added) 1749 { 1750 __intel_pmu_enable_all(added, false); 1751 } 1752 1753 /* 1754 * Workaround for: 1755 * Intel Errata AAK100 (model 26) 1756 * Intel Errata AAP53 (model 30) 1757 * Intel Errata BD53 (model 44) 1758 * 1759 * The official story: 1760 * These chips need to be 'reset' when adding counters by programming the 1761 * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either 1762 * in sequence on the same PMC or on different PMCs. 1763 * 1764 * In practise it appears some of these events do in fact count, and 1765 * we need to programm all 4 events. 1766 */ 1767 static void intel_pmu_nhm_workaround(void) 1768 { 1769 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1770 static const unsigned long nhm_magic[4] = { 1771 0x4300B5, 1772 0x4300D2, 1773 0x4300B1, 1774 0x4300B1 1775 }; 1776 struct perf_event *event; 1777 int i; 1778 1779 /* 1780 * The Errata requires below steps: 1781 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; 1782 * 2) Configure 4 PERFEVTSELx with the magic events and clear 1783 * the corresponding PMCx; 1784 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; 1785 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; 1786 * 5) Clear 4 pairs of ERFEVTSELx and PMCx; 1787 */ 1788 1789 /* 1790 * The real steps we choose are a little different from above. 1791 * A) To reduce MSR operations, we don't run step 1) as they 1792 * are already cleared before this function is called; 1793 * B) Call x86_perf_event_update to save PMCx before configuring 1794 * PERFEVTSELx with magic number; 1795 * C) With step 5), we do clear only when the PERFEVTSELx is 1796 * not used currently. 1797 * D) Call x86_perf_event_set_period to restore PMCx; 1798 */ 1799 1800 /* We always operate 4 pairs of PERF Counters */ 1801 for (i = 0; i < 4; i++) { 1802 event = cpuc->events[i]; 1803 if (event) 1804 x86_perf_event_update(event); 1805 } 1806 1807 for (i = 0; i < 4; i++) { 1808 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); 1809 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); 1810 } 1811 1812 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); 1813 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); 1814 1815 for (i = 0; i < 4; i++) { 1816 event = cpuc->events[i]; 1817 1818 if (event) { 1819 x86_perf_event_set_period(event); 1820 __x86_pmu_enable_event(&event->hw, 1821 ARCH_PERFMON_EVENTSEL_ENABLE); 1822 } else 1823 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); 1824 } 1825 } 1826 1827 static void intel_pmu_nhm_enable_all(int added) 1828 { 1829 if (added) 1830 intel_pmu_nhm_workaround(); 1831 intel_pmu_enable_all(added); 1832 } 1833 1834 static inline u64 intel_pmu_get_status(void) 1835 { 1836 u64 status; 1837 1838 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 1839 1840 return status; 1841 } 1842 1843 static inline void intel_pmu_ack_status(u64 ack) 1844 { 1845 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 1846 } 1847 1848 static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) 1849 { 1850 int idx = hwc->idx - INTEL_PMC_IDX_FIXED; 1851 u64 ctrl_val, mask; 1852 1853 mask = 0xfULL << (idx * 4); 1854 1855 rdmsrl(hwc->config_base, ctrl_val); 1856 ctrl_val &= ~mask; 1857 wrmsrl(hwc->config_base, ctrl_val); 1858 } 1859 1860 static inline bool event_is_checkpointed(struct perf_event *event) 1861 { 1862 return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; 1863 } 1864 1865 static void intel_pmu_disable_event(struct perf_event *event) 1866 { 1867 struct hw_perf_event *hwc = &event->hw; 1868 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1869 1870 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { 1871 intel_pmu_disable_bts(); 1872 intel_pmu_drain_bts_buffer(); 1873 return; 1874 } 1875 1876 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); 1877 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 1878 cpuc->intel_cp_status &= ~(1ull << hwc->idx); 1879 1880 /* 1881 * must disable before any actual event 1882 * because any event may be combined with LBR 1883 */ 1884 if (needs_branch_stack(event)) 1885 intel_pmu_lbr_disable(event); 1886 1887 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1888 intel_pmu_disable_fixed(hwc); 1889 return; 1890 } 1891 1892 x86_pmu_disable_event(event); 1893 1894 if (unlikely(event->attr.precise_ip)) 1895 intel_pmu_pebs_disable(event); 1896 } 1897 1898 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) 1899 { 1900 int idx = hwc->idx - INTEL_PMC_IDX_FIXED; 1901 u64 ctrl_val, bits, mask; 1902 1903 /* 1904 * Enable IRQ generation (0x8), 1905 * and enable ring-3 counting (0x2) and ring-0 counting (0x1) 1906 * if requested: 1907 */ 1908 bits = 0x8ULL; 1909 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) 1910 bits |= 0x2; 1911 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 1912 bits |= 0x1; 1913 1914 /* 1915 * ANY bit is supported in v3 and up 1916 */ 1917 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) 1918 bits |= 0x4; 1919 1920 bits <<= (idx * 4); 1921 mask = 0xfULL << (idx * 4); 1922 1923 rdmsrl(hwc->config_base, ctrl_val); 1924 ctrl_val &= ~mask; 1925 ctrl_val |= bits; 1926 wrmsrl(hwc->config_base, ctrl_val); 1927 } 1928 1929 static void intel_pmu_enable_event(struct perf_event *event) 1930 { 1931 struct hw_perf_event *hwc = &event->hw; 1932 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1933 1934 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { 1935 if (!__this_cpu_read(cpu_hw_events.enabled)) 1936 return; 1937 1938 intel_pmu_enable_bts(hwc->config); 1939 return; 1940 } 1941 /* 1942 * must enabled before any actual event 1943 * because any event may be combined with LBR 1944 */ 1945 if (needs_branch_stack(event)) 1946 intel_pmu_lbr_enable(event); 1947 1948 if (event->attr.exclude_host) 1949 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); 1950 if (event->attr.exclude_guest) 1951 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); 1952 1953 if (unlikely(event_is_checkpointed(event))) 1954 cpuc->intel_cp_status |= (1ull << hwc->idx); 1955 1956 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1957 intel_pmu_enable_fixed(hwc); 1958 return; 1959 } 1960 1961 if (unlikely(event->attr.precise_ip)) 1962 intel_pmu_pebs_enable(event); 1963 1964 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 1965 } 1966 1967 /* 1968 * Save and restart an expired event. Called by NMI contexts, 1969 * so it has to be careful about preempting normal event ops: 1970 */ 1971 int intel_pmu_save_and_restart(struct perf_event *event) 1972 { 1973 x86_perf_event_update(event); 1974 /* 1975 * For a checkpointed counter always reset back to 0. This 1976 * avoids a situation where the counter overflows, aborts the 1977 * transaction and is then set back to shortly before the 1978 * overflow, and overflows and aborts again. 1979 */ 1980 if (unlikely(event_is_checkpointed(event))) { 1981 /* No race with NMIs because the counter should not be armed */ 1982 wrmsrl(event->hw.event_base, 0); 1983 local64_set(&event->hw.prev_count, 0); 1984 } 1985 return x86_perf_event_set_period(event); 1986 } 1987 1988 static void intel_pmu_reset(void) 1989 { 1990 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); 1991 unsigned long flags; 1992 int idx; 1993 1994 if (!x86_pmu.num_counters) 1995 return; 1996 1997 local_irq_save(flags); 1998 1999 pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); 2000 2001 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 2002 wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); 2003 wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); 2004 } 2005 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) 2006 wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 2007 2008 if (ds) 2009 ds->bts_index = ds->bts_buffer_base; 2010 2011 /* Ack all overflows and disable fixed counters */ 2012 if (x86_pmu.version >= 2) { 2013 intel_pmu_ack_status(intel_pmu_get_status()); 2014 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 2015 } 2016 2017 /* Reset LBRs and LBR freezing */ 2018 if (x86_pmu.lbr_nr) { 2019 update_debugctlmsr(get_debugctlmsr() & 2020 ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR)); 2021 } 2022 2023 local_irq_restore(flags); 2024 } 2025 2026 /* 2027 * This handler is triggered by the local APIC, so the APIC IRQ handling 2028 * rules apply: 2029 */ 2030 static int intel_pmu_handle_irq(struct pt_regs *regs) 2031 { 2032 struct perf_sample_data data; 2033 struct cpu_hw_events *cpuc; 2034 int bit, loops; 2035 u64 status; 2036 int handled; 2037 2038 cpuc = this_cpu_ptr(&cpu_hw_events); 2039 2040 /* 2041 * No known reason to not always do late ACK, 2042 * but just in case do it opt-in. 2043 */ 2044 if (!x86_pmu.late_ack) 2045 apic_write(APIC_LVTPC, APIC_DM_NMI); 2046 __intel_pmu_disable_all(); 2047 handled = intel_pmu_drain_bts_buffer(); 2048 handled += intel_bts_interrupt(); 2049 status = intel_pmu_get_status(); 2050 if (!status) 2051 goto done; 2052 2053 loops = 0; 2054 again: 2055 intel_pmu_lbr_read(); 2056 intel_pmu_ack_status(status); 2057 if (++loops > 100) { 2058 static bool warned = false; 2059 if (!warned) { 2060 WARN(1, "perfevents: irq loop stuck!\n"); 2061 perf_event_print_debug(); 2062 warned = true; 2063 } 2064 intel_pmu_reset(); 2065 goto done; 2066 } 2067 2068 inc_irq_stat(apic_perf_irqs); 2069 2070 2071 /* 2072 * Ignore a range of extra bits in status that do not indicate 2073 * overflow by themselves. 2074 */ 2075 status &= ~(GLOBAL_STATUS_COND_CHG | 2076 GLOBAL_STATUS_ASIF | 2077 GLOBAL_STATUS_LBRS_FROZEN); 2078 if (!status) 2079 goto done; 2080 2081 /* 2082 * PEBS overflow sets bit 62 in the global status register 2083 */ 2084 if (__test_and_clear_bit(62, (unsigned long *)&status)) { 2085 handled++; 2086 x86_pmu.drain_pebs(regs); 2087 /* 2088 * There are cases where, even though, the PEBS ovfl bit is set 2089 * in GLOBAL_OVF_STATUS, the PEBS events may also have their 2090 * overflow bits set for their counters. We must clear them 2091 * here because they have been processed as exact samples in 2092 * the drain_pebs() routine. They must not be processed again 2093 * in the for_each_bit_set() loop for regular samples below. 2094 */ 2095 status &= ~cpuc->pebs_enabled; 2096 status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; 2097 } 2098 2099 /* 2100 * Intel PT 2101 */ 2102 if (__test_and_clear_bit(55, (unsigned long *)&status)) { 2103 handled++; 2104 intel_pt_interrupt(); 2105 } 2106 2107 /* 2108 * Checkpointed counters can lead to 'spurious' PMIs because the 2109 * rollback caused by the PMI will have cleared the overflow status 2110 * bit. Therefore always force probe these counters. 2111 */ 2112 status |= cpuc->intel_cp_status; 2113 2114 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 2115 struct perf_event *event = cpuc->events[bit]; 2116 2117 handled++; 2118 2119 if (!test_bit(bit, cpuc->active_mask)) 2120 continue; 2121 2122 if (!intel_pmu_save_and_restart(event)) 2123 continue; 2124 2125 perf_sample_data_init(&data, 0, event->hw.last_period); 2126 2127 if (has_branch_stack(event)) 2128 data.br_stack = &cpuc->lbr_stack; 2129 2130 if (perf_event_overflow(event, &data, regs)) 2131 x86_pmu_stop(event, 0); 2132 } 2133 2134 /* 2135 * Repeat if there is more work to be done: 2136 */ 2137 status = intel_pmu_get_status(); 2138 if (status) 2139 goto again; 2140 2141 done: 2142 /* Only restore PMU state when it's active. See x86_pmu_disable(). */ 2143 if (cpuc->enabled) 2144 __intel_pmu_enable_all(0, true); 2145 2146 /* 2147 * Only unmask the NMI after the overflow counters 2148 * have been reset. This avoids spurious NMIs on 2149 * Haswell CPUs. 2150 */ 2151 if (x86_pmu.late_ack) 2152 apic_write(APIC_LVTPC, APIC_DM_NMI); 2153 return handled; 2154 } 2155 2156 static struct event_constraint * 2157 intel_bts_constraints(struct perf_event *event) 2158 { 2159 struct hw_perf_event *hwc = &event->hw; 2160 unsigned int hw_event, bts_event; 2161 2162 if (event->attr.freq) 2163 return NULL; 2164 2165 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; 2166 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 2167 2168 if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) 2169 return &bts_constraint; 2170 2171 return NULL; 2172 } 2173 2174 static int intel_alt_er(int idx, u64 config) 2175 { 2176 int alt_idx = idx; 2177 2178 if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) 2179 return idx; 2180 2181 if (idx == EXTRA_REG_RSP_0) 2182 alt_idx = EXTRA_REG_RSP_1; 2183 2184 if (idx == EXTRA_REG_RSP_1) 2185 alt_idx = EXTRA_REG_RSP_0; 2186 2187 if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask) 2188 return idx; 2189 2190 return alt_idx; 2191 } 2192 2193 static void intel_fixup_er(struct perf_event *event, int idx) 2194 { 2195 event->hw.extra_reg.idx = idx; 2196 2197 if (idx == EXTRA_REG_RSP_0) { 2198 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 2199 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event; 2200 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; 2201 } else if (idx == EXTRA_REG_RSP_1) { 2202 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 2203 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event; 2204 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 2205 } 2206 } 2207 2208 /* 2209 * manage allocation of shared extra msr for certain events 2210 * 2211 * sharing can be: 2212 * per-cpu: to be shared between the various events on a single PMU 2213 * per-core: per-cpu + shared by HT threads 2214 */ 2215 static struct event_constraint * 2216 __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, 2217 struct perf_event *event, 2218 struct hw_perf_event_extra *reg) 2219 { 2220 struct event_constraint *c = &emptyconstraint; 2221 struct er_account *era; 2222 unsigned long flags; 2223 int idx = reg->idx; 2224 2225 /* 2226 * reg->alloc can be set due to existing state, so for fake cpuc we 2227 * need to ignore this, otherwise we might fail to allocate proper fake 2228 * state for this extra reg constraint. Also see the comment below. 2229 */ 2230 if (reg->alloc && !cpuc->is_fake) 2231 return NULL; /* call x86_get_event_constraint() */ 2232 2233 again: 2234 era = &cpuc->shared_regs->regs[idx]; 2235 /* 2236 * we use spin_lock_irqsave() to avoid lockdep issues when 2237 * passing a fake cpuc 2238 */ 2239 raw_spin_lock_irqsave(&era->lock, flags); 2240 2241 if (!atomic_read(&era->ref) || era->config == reg->config) { 2242 2243 /* 2244 * If its a fake cpuc -- as per validate_{group,event}() we 2245 * shouldn't touch event state and we can avoid doing so 2246 * since both will only call get_event_constraints() once 2247 * on each event, this avoids the need for reg->alloc. 2248 * 2249 * Not doing the ER fixup will only result in era->reg being 2250 * wrong, but since we won't actually try and program hardware 2251 * this isn't a problem either. 2252 */ 2253 if (!cpuc->is_fake) { 2254 if (idx != reg->idx) 2255 intel_fixup_er(event, idx); 2256 2257 /* 2258 * x86_schedule_events() can call get_event_constraints() 2259 * multiple times on events in the case of incremental 2260 * scheduling(). reg->alloc ensures we only do the ER 2261 * allocation once. 2262 */ 2263 reg->alloc = 1; 2264 } 2265 2266 /* lock in msr value */ 2267 era->config = reg->config; 2268 era->reg = reg->reg; 2269 2270 /* one more user */ 2271 atomic_inc(&era->ref); 2272 2273 /* 2274 * need to call x86_get_event_constraint() 2275 * to check if associated event has constraints 2276 */ 2277 c = NULL; 2278 } else { 2279 idx = intel_alt_er(idx, reg->config); 2280 if (idx != reg->idx) { 2281 raw_spin_unlock_irqrestore(&era->lock, flags); 2282 goto again; 2283 } 2284 } 2285 raw_spin_unlock_irqrestore(&era->lock, flags); 2286 2287 return c; 2288 } 2289 2290 static void 2291 __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, 2292 struct hw_perf_event_extra *reg) 2293 { 2294 struct er_account *era; 2295 2296 /* 2297 * Only put constraint if extra reg was actually allocated. Also takes 2298 * care of event which do not use an extra shared reg. 2299 * 2300 * Also, if this is a fake cpuc we shouldn't touch any event state 2301 * (reg->alloc) and we don't care about leaving inconsistent cpuc state 2302 * either since it'll be thrown out. 2303 */ 2304 if (!reg->alloc || cpuc->is_fake) 2305 return; 2306 2307 era = &cpuc->shared_regs->regs[reg->idx]; 2308 2309 /* one fewer user */ 2310 atomic_dec(&era->ref); 2311 2312 /* allocate again next time */ 2313 reg->alloc = 0; 2314 } 2315 2316 static struct event_constraint * 2317 intel_shared_regs_constraints(struct cpu_hw_events *cpuc, 2318 struct perf_event *event) 2319 { 2320 struct event_constraint *c = NULL, *d; 2321 struct hw_perf_event_extra *xreg, *breg; 2322 2323 xreg = &event->hw.extra_reg; 2324 if (xreg->idx != EXTRA_REG_NONE) { 2325 c = __intel_shared_reg_get_constraints(cpuc, event, xreg); 2326 if (c == &emptyconstraint) 2327 return c; 2328 } 2329 breg = &event->hw.branch_reg; 2330 if (breg->idx != EXTRA_REG_NONE) { 2331 d = __intel_shared_reg_get_constraints(cpuc, event, breg); 2332 if (d == &emptyconstraint) { 2333 __intel_shared_reg_put_constraints(cpuc, xreg); 2334 c = d; 2335 } 2336 } 2337 return c; 2338 } 2339 2340 struct event_constraint * 2341 x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2342 struct perf_event *event) 2343 { 2344 struct event_constraint *c; 2345 2346 if (x86_pmu.event_constraints) { 2347 for_each_event_constraint(c, x86_pmu.event_constraints) { 2348 if ((event->hw.config & c->cmask) == c->code) { 2349 event->hw.flags |= c->flags; 2350 return c; 2351 } 2352 } 2353 } 2354 2355 return &unconstrained; 2356 } 2357 2358 static struct event_constraint * 2359 __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2360 struct perf_event *event) 2361 { 2362 struct event_constraint *c; 2363 2364 c = intel_bts_constraints(event); 2365 if (c) 2366 return c; 2367 2368 c = intel_shared_regs_constraints(cpuc, event); 2369 if (c) 2370 return c; 2371 2372 c = intel_pebs_constraints(event); 2373 if (c) 2374 return c; 2375 2376 return x86_get_event_constraints(cpuc, idx, event); 2377 } 2378 2379 static void 2380 intel_start_scheduling(struct cpu_hw_events *cpuc) 2381 { 2382 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2383 struct intel_excl_states *xl; 2384 int tid = cpuc->excl_thread_id; 2385 2386 /* 2387 * nothing needed if in group validation mode 2388 */ 2389 if (cpuc->is_fake || !is_ht_workaround_enabled()) 2390 return; 2391 2392 /* 2393 * no exclusion needed 2394 */ 2395 if (WARN_ON_ONCE(!excl_cntrs)) 2396 return; 2397 2398 xl = &excl_cntrs->states[tid]; 2399 2400 xl->sched_started = true; 2401 /* 2402 * lock shared state until we are done scheduling 2403 * in stop_event_scheduling() 2404 * makes scheduling appear as a transaction 2405 */ 2406 raw_spin_lock(&excl_cntrs->lock); 2407 } 2408 2409 static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) 2410 { 2411 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2412 struct event_constraint *c = cpuc->event_constraint[idx]; 2413 struct intel_excl_states *xl; 2414 int tid = cpuc->excl_thread_id; 2415 2416 if (cpuc->is_fake || !is_ht_workaround_enabled()) 2417 return; 2418 2419 if (WARN_ON_ONCE(!excl_cntrs)) 2420 return; 2421 2422 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) 2423 return; 2424 2425 xl = &excl_cntrs->states[tid]; 2426 2427 lockdep_assert_held(&excl_cntrs->lock); 2428 2429 if (c->flags & PERF_X86_EVENT_EXCL) 2430 xl->state[cntr] = INTEL_EXCL_EXCLUSIVE; 2431 else 2432 xl->state[cntr] = INTEL_EXCL_SHARED; 2433 } 2434 2435 static void 2436 intel_stop_scheduling(struct cpu_hw_events *cpuc) 2437 { 2438 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2439 struct intel_excl_states *xl; 2440 int tid = cpuc->excl_thread_id; 2441 2442 /* 2443 * nothing needed if in group validation mode 2444 */ 2445 if (cpuc->is_fake || !is_ht_workaround_enabled()) 2446 return; 2447 /* 2448 * no exclusion needed 2449 */ 2450 if (WARN_ON_ONCE(!excl_cntrs)) 2451 return; 2452 2453 xl = &excl_cntrs->states[tid]; 2454 2455 xl->sched_started = false; 2456 /* 2457 * release shared state lock (acquired in intel_start_scheduling()) 2458 */ 2459 raw_spin_unlock(&excl_cntrs->lock); 2460 } 2461 2462 static struct event_constraint * 2463 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, 2464 int idx, struct event_constraint *c) 2465 { 2466 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2467 struct intel_excl_states *xlo; 2468 int tid = cpuc->excl_thread_id; 2469 int is_excl, i; 2470 2471 /* 2472 * validating a group does not require 2473 * enforcing cross-thread exclusion 2474 */ 2475 if (cpuc->is_fake || !is_ht_workaround_enabled()) 2476 return c; 2477 2478 /* 2479 * no exclusion needed 2480 */ 2481 if (WARN_ON_ONCE(!excl_cntrs)) 2482 return c; 2483 2484 /* 2485 * because we modify the constraint, we need 2486 * to make a copy. Static constraints come 2487 * from static const tables. 2488 * 2489 * only needed when constraint has not yet 2490 * been cloned (marked dynamic) 2491 */ 2492 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { 2493 struct event_constraint *cx; 2494 2495 /* 2496 * grab pre-allocated constraint entry 2497 */ 2498 cx = &cpuc->constraint_list[idx]; 2499 2500 /* 2501 * initialize dynamic constraint 2502 * with static constraint 2503 */ 2504 *cx = *c; 2505 2506 /* 2507 * mark constraint as dynamic, so we 2508 * can free it later on 2509 */ 2510 cx->flags |= PERF_X86_EVENT_DYNAMIC; 2511 c = cx; 2512 } 2513 2514 /* 2515 * From here on, the constraint is dynamic. 2516 * Either it was just allocated above, or it 2517 * was allocated during a earlier invocation 2518 * of this function 2519 */ 2520 2521 /* 2522 * state of sibling HT 2523 */ 2524 xlo = &excl_cntrs->states[tid ^ 1]; 2525 2526 /* 2527 * event requires exclusive counter access 2528 * across HT threads 2529 */ 2530 is_excl = c->flags & PERF_X86_EVENT_EXCL; 2531 if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { 2532 event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; 2533 if (!cpuc->n_excl++) 2534 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); 2535 } 2536 2537 /* 2538 * Modify static constraint with current dynamic 2539 * state of thread 2540 * 2541 * EXCLUSIVE: sibling counter measuring exclusive event 2542 * SHARED : sibling counter measuring non-exclusive event 2543 * UNUSED : sibling counter unused 2544 */ 2545 for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) { 2546 /* 2547 * exclusive event in sibling counter 2548 * our corresponding counter cannot be used 2549 * regardless of our event 2550 */ 2551 if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) 2552 __clear_bit(i, c->idxmsk); 2553 /* 2554 * if measuring an exclusive event, sibling 2555 * measuring non-exclusive, then counter cannot 2556 * be used 2557 */ 2558 if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) 2559 __clear_bit(i, c->idxmsk); 2560 } 2561 2562 /* 2563 * recompute actual bit weight for scheduling algorithm 2564 */ 2565 c->weight = hweight64(c->idxmsk64); 2566 2567 /* 2568 * if we return an empty mask, then switch 2569 * back to static empty constraint to avoid 2570 * the cost of freeing later on 2571 */ 2572 if (c->weight == 0) 2573 c = &emptyconstraint; 2574 2575 return c; 2576 } 2577 2578 static struct event_constraint * 2579 intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2580 struct perf_event *event) 2581 { 2582 struct event_constraint *c1 = NULL; 2583 struct event_constraint *c2; 2584 2585 if (idx >= 0) /* fake does < 0 */ 2586 c1 = cpuc->event_constraint[idx]; 2587 2588 /* 2589 * first time only 2590 * - static constraint: no change across incremental scheduling calls 2591 * - dynamic constraint: handled by intel_get_excl_constraints() 2592 */ 2593 c2 = __intel_get_event_constraints(cpuc, idx, event); 2594 if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) { 2595 bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); 2596 c1->weight = c2->weight; 2597 c2 = c1; 2598 } 2599 2600 if (cpuc->excl_cntrs) 2601 return intel_get_excl_constraints(cpuc, event, idx, c2); 2602 2603 return c2; 2604 } 2605 2606 static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, 2607 struct perf_event *event) 2608 { 2609 struct hw_perf_event *hwc = &event->hw; 2610 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2611 int tid = cpuc->excl_thread_id; 2612 struct intel_excl_states *xl; 2613 2614 /* 2615 * nothing needed if in group validation mode 2616 */ 2617 if (cpuc->is_fake) 2618 return; 2619 2620 if (WARN_ON_ONCE(!excl_cntrs)) 2621 return; 2622 2623 if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { 2624 hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; 2625 if (!--cpuc->n_excl) 2626 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); 2627 } 2628 2629 /* 2630 * If event was actually assigned, then mark the counter state as 2631 * unused now. 2632 */ 2633 if (hwc->idx >= 0) { 2634 xl = &excl_cntrs->states[tid]; 2635 2636 /* 2637 * put_constraint may be called from x86_schedule_events() 2638 * which already has the lock held so here make locking 2639 * conditional. 2640 */ 2641 if (!xl->sched_started) 2642 raw_spin_lock(&excl_cntrs->lock); 2643 2644 xl->state[hwc->idx] = INTEL_EXCL_UNUSED; 2645 2646 if (!xl->sched_started) 2647 raw_spin_unlock(&excl_cntrs->lock); 2648 } 2649 } 2650 2651 static void 2652 intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, 2653 struct perf_event *event) 2654 { 2655 struct hw_perf_event_extra *reg; 2656 2657 reg = &event->hw.extra_reg; 2658 if (reg->idx != EXTRA_REG_NONE) 2659 __intel_shared_reg_put_constraints(cpuc, reg); 2660 2661 reg = &event->hw.branch_reg; 2662 if (reg->idx != EXTRA_REG_NONE) 2663 __intel_shared_reg_put_constraints(cpuc, reg); 2664 } 2665 2666 static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 2667 struct perf_event *event) 2668 { 2669 intel_put_shared_regs_event_constraints(cpuc, event); 2670 2671 /* 2672 * is PMU has exclusive counter restrictions, then 2673 * all events are subject to and must call the 2674 * put_excl_constraints() routine 2675 */ 2676 if (cpuc->excl_cntrs) 2677 intel_put_excl_constraints(cpuc, event); 2678 } 2679 2680 static void intel_pebs_aliases_core2(struct perf_event *event) 2681 { 2682 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 2683 /* 2684 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 2685 * (0x003c) so that we can use it with PEBS. 2686 * 2687 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 2688 * PEBS capable. However we can use INST_RETIRED.ANY_P 2689 * (0x00c0), which is a PEBS capable event, to get the same 2690 * count. 2691 * 2692 * INST_RETIRED.ANY_P counts the number of cycles that retires 2693 * CNTMASK instructions. By setting CNTMASK to a value (16) 2694 * larger than the maximum number of instructions that can be 2695 * retired per cycle (4) and then inverting the condition, we 2696 * count all cycles that retire 16 or less instructions, which 2697 * is every cycle. 2698 * 2699 * Thereby we gain a PEBS capable cycle counter. 2700 */ 2701 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 2702 2703 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 2704 event->hw.config = alt_config; 2705 } 2706 } 2707 2708 static void intel_pebs_aliases_snb(struct perf_event *event) 2709 { 2710 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 2711 /* 2712 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 2713 * (0x003c) so that we can use it with PEBS. 2714 * 2715 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 2716 * PEBS capable. However we can use UOPS_RETIRED.ALL 2717 * (0x01c2), which is a PEBS capable event, to get the same 2718 * count. 2719 * 2720 * UOPS_RETIRED.ALL counts the number of cycles that retires 2721 * CNTMASK micro-ops. By setting CNTMASK to a value (16) 2722 * larger than the maximum number of micro-ops that can be 2723 * retired per cycle (4) and then inverting the condition, we 2724 * count all cycles that retire 16 or less micro-ops, which 2725 * is every cycle. 2726 * 2727 * Thereby we gain a PEBS capable cycle counter. 2728 */ 2729 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); 2730 2731 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 2732 event->hw.config = alt_config; 2733 } 2734 } 2735 2736 static void intel_pebs_aliases_precdist(struct perf_event *event) 2737 { 2738 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 2739 /* 2740 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 2741 * (0x003c) so that we can use it with PEBS. 2742 * 2743 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 2744 * PEBS capable. However we can use INST_RETIRED.PREC_DIST 2745 * (0x01c0), which is a PEBS capable event, to get the same 2746 * count. 2747 * 2748 * The PREC_DIST event has special support to minimize sample 2749 * shadowing effects. One drawback is that it can be 2750 * only programmed on counter 1, but that seems like an 2751 * acceptable trade off. 2752 */ 2753 u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16); 2754 2755 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 2756 event->hw.config = alt_config; 2757 } 2758 } 2759 2760 static void intel_pebs_aliases_ivb(struct perf_event *event) 2761 { 2762 if (event->attr.precise_ip < 3) 2763 return intel_pebs_aliases_snb(event); 2764 return intel_pebs_aliases_precdist(event); 2765 } 2766 2767 static void intel_pebs_aliases_skl(struct perf_event *event) 2768 { 2769 if (event->attr.precise_ip < 3) 2770 return intel_pebs_aliases_core2(event); 2771 return intel_pebs_aliases_precdist(event); 2772 } 2773 2774 static unsigned long intel_pmu_free_running_flags(struct perf_event *event) 2775 { 2776 unsigned long flags = x86_pmu.free_running_flags; 2777 2778 if (event->attr.use_clockid) 2779 flags &= ~PERF_SAMPLE_TIME; 2780 return flags; 2781 } 2782 2783 static int intel_pmu_hw_config(struct perf_event *event) 2784 { 2785 int ret = x86_pmu_hw_config(event); 2786 2787 if (ret) 2788 return ret; 2789 2790 if (event->attr.precise_ip) { 2791 if (!event->attr.freq) { 2792 event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; 2793 if (!(event->attr.sample_type & 2794 ~intel_pmu_free_running_flags(event))) 2795 event->hw.flags |= PERF_X86_EVENT_FREERUNNING; 2796 } 2797 if (x86_pmu.pebs_aliases) 2798 x86_pmu.pebs_aliases(event); 2799 } 2800 2801 if (needs_branch_stack(event)) { 2802 ret = intel_pmu_setup_lbr_filter(event); 2803 if (ret) 2804 return ret; 2805 2806 /* 2807 * BTS is set up earlier in this path, so don't account twice 2808 */ 2809 if (!intel_pmu_has_bts(event)) { 2810 /* disallow lbr if conflicting events are present */ 2811 if (x86_add_exclusive(x86_lbr_exclusive_lbr)) 2812 return -EBUSY; 2813 2814 event->destroy = hw_perf_lbr_event_destroy; 2815 } 2816 } 2817 2818 if (event->attr.type != PERF_TYPE_RAW) 2819 return 0; 2820 2821 if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) 2822 return 0; 2823 2824 if (x86_pmu.version < 3) 2825 return -EINVAL; 2826 2827 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 2828 return -EACCES; 2829 2830 event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; 2831 2832 return 0; 2833 } 2834 2835 struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) 2836 { 2837 if (x86_pmu.guest_get_msrs) 2838 return x86_pmu.guest_get_msrs(nr); 2839 *nr = 0; 2840 return NULL; 2841 } 2842 EXPORT_SYMBOL_GPL(perf_guest_get_msrs); 2843 2844 static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) 2845 { 2846 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2847 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; 2848 2849 arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; 2850 arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; 2851 arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; 2852 /* 2853 * If PMU counter has PEBS enabled it is not enough to disable counter 2854 * on a guest entry since PEBS memory write can overshoot guest entry 2855 * and corrupt guest memory. Disabling PEBS solves the problem. 2856 */ 2857 arr[1].msr = MSR_IA32_PEBS_ENABLE; 2858 arr[1].host = cpuc->pebs_enabled; 2859 arr[1].guest = 0; 2860 2861 *nr = 2; 2862 return arr; 2863 } 2864 2865 static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) 2866 { 2867 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2868 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; 2869 int idx; 2870 2871 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 2872 struct perf_event *event = cpuc->events[idx]; 2873 2874 arr[idx].msr = x86_pmu_config_addr(idx); 2875 arr[idx].host = arr[idx].guest = 0; 2876 2877 if (!test_bit(idx, cpuc->active_mask)) 2878 continue; 2879 2880 arr[idx].host = arr[idx].guest = 2881 event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; 2882 2883 if (event->attr.exclude_host) 2884 arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 2885 else if (event->attr.exclude_guest) 2886 arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 2887 } 2888 2889 *nr = x86_pmu.num_counters; 2890 return arr; 2891 } 2892 2893 static void core_pmu_enable_event(struct perf_event *event) 2894 { 2895 if (!event->attr.exclude_host) 2896 x86_pmu_enable_event(event); 2897 } 2898 2899 static void core_pmu_enable_all(int added) 2900 { 2901 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2902 int idx; 2903 2904 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 2905 struct hw_perf_event *hwc = &cpuc->events[idx]->hw; 2906 2907 if (!test_bit(idx, cpuc->active_mask) || 2908 cpuc->events[idx]->attr.exclude_host) 2909 continue; 2910 2911 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 2912 } 2913 } 2914 2915 static int hsw_hw_config(struct perf_event *event) 2916 { 2917 int ret = intel_pmu_hw_config(event); 2918 2919 if (ret) 2920 return ret; 2921 if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) 2922 return 0; 2923 event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); 2924 2925 /* 2926 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with 2927 * PEBS or in ANY thread mode. Since the results are non-sensical forbid 2928 * this combination. 2929 */ 2930 if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && 2931 ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || 2932 event->attr.precise_ip > 0)) 2933 return -EOPNOTSUPP; 2934 2935 if (event_is_checkpointed(event)) { 2936 /* 2937 * Sampling of checkpointed events can cause situations where 2938 * the CPU constantly aborts because of a overflow, which is 2939 * then checkpointed back and ignored. Forbid checkpointing 2940 * for sampling. 2941 * 2942 * But still allow a long sampling period, so that perf stat 2943 * from KVM works. 2944 */ 2945 if (event->attr.sample_period > 0 && 2946 event->attr.sample_period < 0x7fffffff) 2947 return -EOPNOTSUPP; 2948 } 2949 return 0; 2950 } 2951 2952 static struct event_constraint counter2_constraint = 2953 EVENT_CONSTRAINT(0, 0x4, 0); 2954 2955 static struct event_constraint * 2956 hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2957 struct perf_event *event) 2958 { 2959 struct event_constraint *c; 2960 2961 c = intel_get_event_constraints(cpuc, idx, event); 2962 2963 /* Handle special quirk on in_tx_checkpointed only in counter 2 */ 2964 if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { 2965 if (c->idxmsk64 & (1U << 2)) 2966 return &counter2_constraint; 2967 return &emptyconstraint; 2968 } 2969 2970 return c; 2971 } 2972 2973 /* 2974 * Broadwell: 2975 * 2976 * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared 2977 * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine 2978 * the two to enforce a minimum period of 128 (the smallest value that has bits 2979 * 0-5 cleared and >= 100). 2980 * 2981 * Because of how the code in x86_perf_event_set_period() works, the truncation 2982 * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period 2983 * to make up for the 'lost' events due to carrying the 'error' in period_left. 2984 * 2985 * Therefore the effective (average) period matches the requested period, 2986 * despite coarser hardware granularity. 2987 */ 2988 static unsigned bdw_limit_period(struct perf_event *event, unsigned left) 2989 { 2990 if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == 2991 X86_CONFIG(.event=0xc0, .umask=0x01)) { 2992 if (left < 128) 2993 left = 128; 2994 left &= ~0x3fu; 2995 } 2996 return left; 2997 } 2998 2999 PMU_FORMAT_ATTR(event, "config:0-7" ); 3000 PMU_FORMAT_ATTR(umask, "config:8-15" ); 3001 PMU_FORMAT_ATTR(edge, "config:18" ); 3002 PMU_FORMAT_ATTR(pc, "config:19" ); 3003 PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ 3004 PMU_FORMAT_ATTR(inv, "config:23" ); 3005 PMU_FORMAT_ATTR(cmask, "config:24-31" ); 3006 PMU_FORMAT_ATTR(in_tx, "config:32"); 3007 PMU_FORMAT_ATTR(in_tx_cp, "config:33"); 3008 3009 static struct attribute *intel_arch_formats_attr[] = { 3010 &format_attr_event.attr, 3011 &format_attr_umask.attr, 3012 &format_attr_edge.attr, 3013 &format_attr_pc.attr, 3014 &format_attr_inv.attr, 3015 &format_attr_cmask.attr, 3016 NULL, 3017 }; 3018 3019 ssize_t intel_event_sysfs_show(char *page, u64 config) 3020 { 3021 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); 3022 3023 return x86_event_sysfs_show(page, config, event); 3024 } 3025 3026 struct intel_shared_regs *allocate_shared_regs(int cpu) 3027 { 3028 struct intel_shared_regs *regs; 3029 int i; 3030 3031 regs = kzalloc_node(sizeof(struct intel_shared_regs), 3032 GFP_KERNEL, cpu_to_node(cpu)); 3033 if (regs) { 3034 /* 3035 * initialize the locks to keep lockdep happy 3036 */ 3037 for (i = 0; i < EXTRA_REG_MAX; i++) 3038 raw_spin_lock_init(®s->regs[i].lock); 3039 3040 regs->core_id = -1; 3041 } 3042 return regs; 3043 } 3044 3045 static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) 3046 { 3047 struct intel_excl_cntrs *c; 3048 3049 c = kzalloc_node(sizeof(struct intel_excl_cntrs), 3050 GFP_KERNEL, cpu_to_node(cpu)); 3051 if (c) { 3052 raw_spin_lock_init(&c->lock); 3053 c->core_id = -1; 3054 } 3055 return c; 3056 } 3057 3058 static int intel_pmu_cpu_prepare(int cpu) 3059 { 3060 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 3061 3062 if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { 3063 cpuc->shared_regs = allocate_shared_regs(cpu); 3064 if (!cpuc->shared_regs) 3065 goto err; 3066 } 3067 3068 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { 3069 size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); 3070 3071 cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); 3072 if (!cpuc->constraint_list) 3073 goto err_shared_regs; 3074 3075 cpuc->excl_cntrs = allocate_excl_cntrs(cpu); 3076 if (!cpuc->excl_cntrs) 3077 goto err_constraint_list; 3078 3079 cpuc->excl_thread_id = 0; 3080 } 3081 3082 return NOTIFY_OK; 3083 3084 err_constraint_list: 3085 kfree(cpuc->constraint_list); 3086 cpuc->constraint_list = NULL; 3087 3088 err_shared_regs: 3089 kfree(cpuc->shared_regs); 3090 cpuc->shared_regs = NULL; 3091 3092 err: 3093 return NOTIFY_BAD; 3094 } 3095 3096 static void intel_pmu_cpu_starting(int cpu) 3097 { 3098 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 3099 int core_id = topology_core_id(cpu); 3100 int i; 3101 3102 init_debug_store_on_cpu(cpu); 3103 /* 3104 * Deal with CPUs that don't clear their LBRs on power-up. 3105 */ 3106 intel_pmu_lbr_reset(); 3107 3108 cpuc->lbr_sel = NULL; 3109 3110 if (!cpuc->shared_regs) 3111 return; 3112 3113 if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { 3114 for_each_cpu(i, topology_sibling_cpumask(cpu)) { 3115 struct intel_shared_regs *pc; 3116 3117 pc = per_cpu(cpu_hw_events, i).shared_regs; 3118 if (pc && pc->core_id == core_id) { 3119 cpuc->kfree_on_online[0] = cpuc->shared_regs; 3120 cpuc->shared_regs = pc; 3121 break; 3122 } 3123 } 3124 cpuc->shared_regs->core_id = core_id; 3125 cpuc->shared_regs->refcnt++; 3126 } 3127 3128 if (x86_pmu.lbr_sel_map) 3129 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; 3130 3131 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { 3132 for_each_cpu(i, topology_sibling_cpumask(cpu)) { 3133 struct intel_excl_cntrs *c; 3134 3135 c = per_cpu(cpu_hw_events, i).excl_cntrs; 3136 if (c && c->core_id == core_id) { 3137 cpuc->kfree_on_online[1] = cpuc->excl_cntrs; 3138 cpuc->excl_cntrs = c; 3139 cpuc->excl_thread_id = 1; 3140 break; 3141 } 3142 } 3143 cpuc->excl_cntrs->core_id = core_id; 3144 cpuc->excl_cntrs->refcnt++; 3145 } 3146 } 3147 3148 static void free_excl_cntrs(int cpu) 3149 { 3150 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 3151 struct intel_excl_cntrs *c; 3152 3153 c = cpuc->excl_cntrs; 3154 if (c) { 3155 if (c->core_id == -1 || --c->refcnt == 0) 3156 kfree(c); 3157 cpuc->excl_cntrs = NULL; 3158 kfree(cpuc->constraint_list); 3159 cpuc->constraint_list = NULL; 3160 } 3161 } 3162 3163 static void intel_pmu_cpu_dying(int cpu) 3164 { 3165 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 3166 struct intel_shared_regs *pc; 3167 3168 pc = cpuc->shared_regs; 3169 if (pc) { 3170 if (pc->core_id == -1 || --pc->refcnt == 0) 3171 kfree(pc); 3172 cpuc->shared_regs = NULL; 3173 } 3174 3175 free_excl_cntrs(cpu); 3176 3177 fini_debug_store_on_cpu(cpu); 3178 } 3179 3180 static void intel_pmu_sched_task(struct perf_event_context *ctx, 3181 bool sched_in) 3182 { 3183 if (x86_pmu.pebs_active) 3184 intel_pmu_pebs_sched_task(ctx, sched_in); 3185 if (x86_pmu.lbr_nr) 3186 intel_pmu_lbr_sched_task(ctx, sched_in); 3187 } 3188 3189 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); 3190 3191 PMU_FORMAT_ATTR(ldlat, "config1:0-15"); 3192 3193 PMU_FORMAT_ATTR(frontend, "config1:0-23"); 3194 3195 static struct attribute *intel_arch3_formats_attr[] = { 3196 &format_attr_event.attr, 3197 &format_attr_umask.attr, 3198 &format_attr_edge.attr, 3199 &format_attr_pc.attr, 3200 &format_attr_any.attr, 3201 &format_attr_inv.attr, 3202 &format_attr_cmask.attr, 3203 &format_attr_in_tx.attr, 3204 &format_attr_in_tx_cp.attr, 3205 3206 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ 3207 &format_attr_ldlat.attr, /* PEBS load latency */ 3208 NULL, 3209 }; 3210 3211 static struct attribute *skl_format_attr[] = { 3212 &format_attr_frontend.attr, 3213 NULL, 3214 }; 3215 3216 static __initconst const struct x86_pmu core_pmu = { 3217 .name = "core", 3218 .handle_irq = x86_pmu_handle_irq, 3219 .disable_all = x86_pmu_disable_all, 3220 .enable_all = core_pmu_enable_all, 3221 .enable = core_pmu_enable_event, 3222 .disable = x86_pmu_disable_event, 3223 .hw_config = x86_pmu_hw_config, 3224 .schedule_events = x86_schedule_events, 3225 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 3226 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 3227 .event_map = intel_pmu_event_map, 3228 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 3229 .apic = 1, 3230 .free_running_flags = PEBS_FREERUNNING_FLAGS, 3231 3232 /* 3233 * Intel PMCs cannot be accessed sanely above 32-bit width, 3234 * so we install an artificial 1<<31 period regardless of 3235 * the generic event period: 3236 */ 3237 .max_period = (1ULL<<31) - 1, 3238 .get_event_constraints = intel_get_event_constraints, 3239 .put_event_constraints = intel_put_event_constraints, 3240 .event_constraints = intel_core_event_constraints, 3241 .guest_get_msrs = core_guest_get_msrs, 3242 .format_attrs = intel_arch_formats_attr, 3243 .events_sysfs_show = intel_event_sysfs_show, 3244 3245 /* 3246 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs 3247 * together with PMU version 1 and thus be using core_pmu with 3248 * shared_regs. We need following callbacks here to allocate 3249 * it properly. 3250 */ 3251 .cpu_prepare = intel_pmu_cpu_prepare, 3252 .cpu_starting = intel_pmu_cpu_starting, 3253 .cpu_dying = intel_pmu_cpu_dying, 3254 }; 3255 3256 static __initconst const struct x86_pmu intel_pmu = { 3257 .name = "Intel", 3258 .handle_irq = intel_pmu_handle_irq, 3259 .disable_all = intel_pmu_disable_all, 3260 .enable_all = intel_pmu_enable_all, 3261 .enable = intel_pmu_enable_event, 3262 .disable = intel_pmu_disable_event, 3263 .hw_config = intel_pmu_hw_config, 3264 .schedule_events = x86_schedule_events, 3265 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 3266 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 3267 .event_map = intel_pmu_event_map, 3268 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 3269 .apic = 1, 3270 .free_running_flags = PEBS_FREERUNNING_FLAGS, 3271 /* 3272 * Intel PMCs cannot be accessed sanely above 32 bit width, 3273 * so we install an artificial 1<<31 period regardless of 3274 * the generic event period: 3275 */ 3276 .max_period = (1ULL << 31) - 1, 3277 .get_event_constraints = intel_get_event_constraints, 3278 .put_event_constraints = intel_put_event_constraints, 3279 .pebs_aliases = intel_pebs_aliases_core2, 3280 3281 .format_attrs = intel_arch3_formats_attr, 3282 .events_sysfs_show = intel_event_sysfs_show, 3283 3284 .cpu_prepare = intel_pmu_cpu_prepare, 3285 .cpu_starting = intel_pmu_cpu_starting, 3286 .cpu_dying = intel_pmu_cpu_dying, 3287 .guest_get_msrs = intel_guest_get_msrs, 3288 .sched_task = intel_pmu_sched_task, 3289 }; 3290 3291 static __init void intel_clovertown_quirk(void) 3292 { 3293 /* 3294 * PEBS is unreliable due to: 3295 * 3296 * AJ67 - PEBS may experience CPL leaks 3297 * AJ68 - PEBS PMI may be delayed by one event 3298 * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] 3299 * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS 3300 * 3301 * AJ67 could be worked around by restricting the OS/USR flags. 3302 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. 3303 * 3304 * AJ106 could possibly be worked around by not allowing LBR 3305 * usage from PEBS, including the fixup. 3306 * AJ68 could possibly be worked around by always programming 3307 * a pebs_event_reset[0] value and coping with the lost events. 3308 * 3309 * But taken together it might just make sense to not enable PEBS on 3310 * these chips. 3311 */ 3312 pr_warn("PEBS disabled due to CPU errata\n"); 3313 x86_pmu.pebs = 0; 3314 x86_pmu.pebs_constraints = NULL; 3315 } 3316 3317 static int intel_snb_pebs_broken(int cpu) 3318 { 3319 u32 rev = UINT_MAX; /* default to broken for unknown models */ 3320 3321 switch (cpu_data(cpu).x86_model) { 3322 case 42: /* SNB */ 3323 rev = 0x28; 3324 break; 3325 3326 case 45: /* SNB-EP */ 3327 switch (cpu_data(cpu).x86_mask) { 3328 case 6: rev = 0x618; break; 3329 case 7: rev = 0x70c; break; 3330 } 3331 } 3332 3333 return (cpu_data(cpu).microcode < rev); 3334 } 3335 3336 static void intel_snb_check_microcode(void) 3337 { 3338 int pebs_broken = 0; 3339 int cpu; 3340 3341 get_online_cpus(); 3342 for_each_online_cpu(cpu) { 3343 if ((pebs_broken = intel_snb_pebs_broken(cpu))) 3344 break; 3345 } 3346 put_online_cpus(); 3347 3348 if (pebs_broken == x86_pmu.pebs_broken) 3349 return; 3350 3351 /* 3352 * Serialized by the microcode lock.. 3353 */ 3354 if (x86_pmu.pebs_broken) { 3355 pr_info("PEBS enabled due to microcode update\n"); 3356 x86_pmu.pebs_broken = 0; 3357 } else { 3358 pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); 3359 x86_pmu.pebs_broken = 1; 3360 } 3361 } 3362 3363 /* 3364 * Under certain circumstances, access certain MSR may cause #GP. 3365 * The function tests if the input MSR can be safely accessed. 3366 */ 3367 static bool check_msr(unsigned long msr, u64 mask) 3368 { 3369 u64 val_old, val_new, val_tmp; 3370 3371 /* 3372 * Read the current value, change it and read it back to see if it 3373 * matches, this is needed to detect certain hardware emulators 3374 * (qemu/kvm) that don't trap on the MSR access and always return 0s. 3375 */ 3376 if (rdmsrl_safe(msr, &val_old)) 3377 return false; 3378 3379 /* 3380 * Only change the bits which can be updated by wrmsrl. 3381 */ 3382 val_tmp = val_old ^ mask; 3383 if (wrmsrl_safe(msr, val_tmp) || 3384 rdmsrl_safe(msr, &val_new)) 3385 return false; 3386 3387 if (val_new != val_tmp) 3388 return false; 3389 3390 /* Here it's sure that the MSR can be safely accessed. 3391 * Restore the old value and return. 3392 */ 3393 wrmsrl(msr, val_old); 3394 3395 return true; 3396 } 3397 3398 static __init void intel_sandybridge_quirk(void) 3399 { 3400 x86_pmu.check_microcode = intel_snb_check_microcode; 3401 intel_snb_check_microcode(); 3402 } 3403 3404 static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { 3405 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, 3406 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, 3407 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, 3408 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, 3409 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, 3410 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, 3411 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, 3412 }; 3413 3414 static __init void intel_arch_events_quirk(void) 3415 { 3416 int bit; 3417 3418 /* disable event that reported as not presend by cpuid */ 3419 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { 3420 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; 3421 pr_warn("CPUID marked event: \'%s\' unavailable\n", 3422 intel_arch_events_map[bit].name); 3423 } 3424 } 3425 3426 static __init void intel_nehalem_quirk(void) 3427 { 3428 union cpuid10_ebx ebx; 3429 3430 ebx.full = x86_pmu.events_maskl; 3431 if (ebx.split.no_branch_misses_retired) { 3432 /* 3433 * Erratum AAJ80 detected, we work it around by using 3434 * the BR_MISP_EXEC.ANY event. This will over-count 3435 * branch-misses, but it's still much better than the 3436 * architectural event which is often completely bogus: 3437 */ 3438 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; 3439 ebx.split.no_branch_misses_retired = 0; 3440 x86_pmu.events_maskl = ebx.full; 3441 pr_info("CPU erratum AAJ80 worked around\n"); 3442 } 3443 } 3444 3445 /* 3446 * enable software workaround for errata: 3447 * SNB: BJ122 3448 * IVB: BV98 3449 * HSW: HSD29 3450 * 3451 * Only needed when HT is enabled. However detecting 3452 * if HT is enabled is difficult (model specific). So instead, 3453 * we enable the workaround in the early boot, and verify if 3454 * it is needed in a later initcall phase once we have valid 3455 * topology information to check if HT is actually enabled 3456 */ 3457 static __init void intel_ht_bug(void) 3458 { 3459 x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; 3460 3461 x86_pmu.start_scheduling = intel_start_scheduling; 3462 x86_pmu.commit_scheduling = intel_commit_scheduling; 3463 x86_pmu.stop_scheduling = intel_stop_scheduling; 3464 } 3465 3466 EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); 3467 EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") 3468 3469 /* Haswell special events */ 3470 EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); 3471 EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); 3472 EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); 3473 EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); 3474 EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); 3475 EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); 3476 EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); 3477 EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); 3478 EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); 3479 EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); 3480 EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); 3481 EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); 3482 3483 static struct attribute *hsw_events_attrs[] = { 3484 EVENT_PTR(tx_start), 3485 EVENT_PTR(tx_commit), 3486 EVENT_PTR(tx_abort), 3487 EVENT_PTR(tx_capacity), 3488 EVENT_PTR(tx_conflict), 3489 EVENT_PTR(el_start), 3490 EVENT_PTR(el_commit), 3491 EVENT_PTR(el_abort), 3492 EVENT_PTR(el_capacity), 3493 EVENT_PTR(el_conflict), 3494 EVENT_PTR(cycles_t), 3495 EVENT_PTR(cycles_ct), 3496 EVENT_PTR(mem_ld_hsw), 3497 EVENT_PTR(mem_st_hsw), 3498 EVENT_PTR(td_slots_issued), 3499 EVENT_PTR(td_slots_retired), 3500 EVENT_PTR(td_fetch_bubbles), 3501 EVENT_PTR(td_total_slots), 3502 EVENT_PTR(td_total_slots_scale), 3503 EVENT_PTR(td_recovery_bubbles), 3504 EVENT_PTR(td_recovery_bubbles_scale), 3505 NULL 3506 }; 3507 3508 __init int intel_pmu_init(void) 3509 { 3510 union cpuid10_edx edx; 3511 union cpuid10_eax eax; 3512 union cpuid10_ebx ebx; 3513 struct event_constraint *c; 3514 unsigned int unused; 3515 struct extra_reg *er; 3516 int version, i; 3517 3518 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 3519 switch (boot_cpu_data.x86) { 3520 case 0x6: 3521 return p6_pmu_init(); 3522 case 0xb: 3523 return knc_pmu_init(); 3524 case 0xf: 3525 return p4_pmu_init(); 3526 } 3527 return -ENODEV; 3528 } 3529 3530 /* 3531 * Check whether the Architectural PerfMon supports 3532 * Branch Misses Retired hw_event or not. 3533 */ 3534 cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); 3535 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) 3536 return -ENODEV; 3537 3538 version = eax.split.version_id; 3539 if (version < 2) 3540 x86_pmu = core_pmu; 3541 else 3542 x86_pmu = intel_pmu; 3543 3544 x86_pmu.version = version; 3545 x86_pmu.num_counters = eax.split.num_counters; 3546 x86_pmu.cntval_bits = eax.split.bit_width; 3547 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 3548 3549 x86_pmu.events_maskl = ebx.full; 3550 x86_pmu.events_mask_len = eax.split.mask_length; 3551 3552 x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); 3553 3554 /* 3555 * Quirk: v2 perfmon does not report fixed-purpose events, so 3556 * assume at least 3 events: 3557 */ 3558 if (version > 1) 3559 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 3560 3561 if (boot_cpu_has(X86_FEATURE_PDCM)) { 3562 u64 capabilities; 3563 3564 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); 3565 x86_pmu.intel_cap.capabilities = capabilities; 3566 } 3567 3568 intel_ds_init(); 3569 3570 x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ 3571 3572 /* 3573 * Install the hw-cache-events table: 3574 */ 3575 switch (boot_cpu_data.x86_model) { 3576 case 14: /* 65nm Core "Yonah" */ 3577 pr_cont("Core events, "); 3578 break; 3579 3580 case 15: /* 65nm Core2 "Merom" */ 3581 x86_add_quirk(intel_clovertown_quirk); 3582 case 22: /* 65nm Core2 "Merom-L" */ 3583 case 23: /* 45nm Core2 "Penryn" */ 3584 case 29: /* 45nm Core2 "Dunnington (MP) */ 3585 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 3586 sizeof(hw_cache_event_ids)); 3587 3588 intel_pmu_lbr_init_core(); 3589 3590 x86_pmu.event_constraints = intel_core2_event_constraints; 3591 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; 3592 pr_cont("Core2 events, "); 3593 break; 3594 3595 case 30: /* 45nm Nehalem */ 3596 case 26: /* 45nm Nehalem-EP */ 3597 case 46: /* 45nm Nehalem-EX */ 3598 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 3599 sizeof(hw_cache_event_ids)); 3600 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 3601 sizeof(hw_cache_extra_regs)); 3602 3603 intel_pmu_lbr_init_nhm(); 3604 3605 x86_pmu.event_constraints = intel_nehalem_event_constraints; 3606 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 3607 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 3608 x86_pmu.extra_regs = intel_nehalem_extra_regs; 3609 3610 x86_pmu.cpu_events = nhm_events_attrs; 3611 3612 /* UOPS_ISSUED.STALLED_CYCLES */ 3613 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3614 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3615 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 3616 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 3617 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); 3618 3619 intel_pmu_pebs_data_source_nhm(); 3620 x86_add_quirk(intel_nehalem_quirk); 3621 3622 pr_cont("Nehalem events, "); 3623 break; 3624 3625 case 28: /* 45nm Atom "Pineview" */ 3626 case 38: /* 45nm Atom "Lincroft" */ 3627 case 39: /* 32nm Atom "Penwell" */ 3628 case 53: /* 32nm Atom "Cloverview" */ 3629 case 54: /* 32nm Atom "Cedarview" */ 3630 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 3631 sizeof(hw_cache_event_ids)); 3632 3633 intel_pmu_lbr_init_atom(); 3634 3635 x86_pmu.event_constraints = intel_gen_event_constraints; 3636 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; 3637 x86_pmu.pebs_aliases = intel_pebs_aliases_core2; 3638 pr_cont("Atom events, "); 3639 break; 3640 3641 case 55: /* 22nm Atom "Silvermont" */ 3642 case 76: /* 14nm Atom "Airmont" */ 3643 case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ 3644 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, 3645 sizeof(hw_cache_event_ids)); 3646 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, 3647 sizeof(hw_cache_extra_regs)); 3648 3649 intel_pmu_lbr_init_slm(); 3650 3651 x86_pmu.event_constraints = intel_slm_event_constraints; 3652 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; 3653 x86_pmu.extra_regs = intel_slm_extra_regs; 3654 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3655 x86_pmu.cpu_events = slm_events_attrs; 3656 pr_cont("Silvermont events, "); 3657 break; 3658 3659 case 92: /* 14nm Atom "Goldmont" */ 3660 case 95: /* 14nm Atom "Goldmont Denverton" */ 3661 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, 3662 sizeof(hw_cache_event_ids)); 3663 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, 3664 sizeof(hw_cache_extra_regs)); 3665 3666 intel_pmu_lbr_init_skl(); 3667 3668 x86_pmu.event_constraints = intel_slm_event_constraints; 3669 x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints; 3670 x86_pmu.extra_regs = intel_glm_extra_regs; 3671 /* 3672 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS 3673 * for precise cycles. 3674 * :pp is identical to :ppp 3675 */ 3676 x86_pmu.pebs_aliases = NULL; 3677 x86_pmu.pebs_prec_dist = true; 3678 x86_pmu.lbr_pt_coexist = true; 3679 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3680 pr_cont("Goldmont events, "); 3681 break; 3682 3683 case 37: /* 32nm Westmere */ 3684 case 44: /* 32nm Westmere-EP */ 3685 case 47: /* 32nm Westmere-EX */ 3686 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 3687 sizeof(hw_cache_event_ids)); 3688 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 3689 sizeof(hw_cache_extra_regs)); 3690 3691 intel_pmu_lbr_init_nhm(); 3692 3693 x86_pmu.event_constraints = intel_westmere_event_constraints; 3694 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 3695 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 3696 x86_pmu.extra_regs = intel_westmere_extra_regs; 3697 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3698 3699 x86_pmu.cpu_events = nhm_events_attrs; 3700 3701 /* UOPS_ISSUED.STALLED_CYCLES */ 3702 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3703 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3704 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 3705 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 3706 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); 3707 3708 intel_pmu_pebs_data_source_nhm(); 3709 pr_cont("Westmere events, "); 3710 break; 3711 3712 case 42: /* 32nm SandyBridge */ 3713 case 45: /* 32nm SandyBridge-E/EN/EP */ 3714 x86_add_quirk(intel_sandybridge_quirk); 3715 x86_add_quirk(intel_ht_bug); 3716 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 3717 sizeof(hw_cache_event_ids)); 3718 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 3719 sizeof(hw_cache_extra_regs)); 3720 3721 intel_pmu_lbr_init_snb(); 3722 3723 x86_pmu.event_constraints = intel_snb_event_constraints; 3724 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 3725 x86_pmu.pebs_aliases = intel_pebs_aliases_snb; 3726 if (boot_cpu_data.x86_model == 45) 3727 x86_pmu.extra_regs = intel_snbep_extra_regs; 3728 else 3729 x86_pmu.extra_regs = intel_snb_extra_regs; 3730 3731 3732 /* all extra regs are per-cpu when HT is on */ 3733 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3734 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3735 3736 x86_pmu.cpu_events = snb_events_attrs; 3737 3738 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 3739 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3740 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3741 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ 3742 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 3743 X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); 3744 3745 pr_cont("SandyBridge events, "); 3746 break; 3747 3748 case 58: /* 22nm IvyBridge */ 3749 case 62: /* 22nm IvyBridge-EP/EX */ 3750 x86_add_quirk(intel_ht_bug); 3751 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 3752 sizeof(hw_cache_event_ids)); 3753 /* dTLB-load-misses on IVB is different than SNB */ 3754 hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ 3755 3756 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 3757 sizeof(hw_cache_extra_regs)); 3758 3759 intel_pmu_lbr_init_snb(); 3760 3761 x86_pmu.event_constraints = intel_ivb_event_constraints; 3762 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; 3763 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; 3764 x86_pmu.pebs_prec_dist = true; 3765 if (boot_cpu_data.x86_model == 62) 3766 x86_pmu.extra_regs = intel_snbep_extra_regs; 3767 else 3768 x86_pmu.extra_regs = intel_snb_extra_regs; 3769 /* all extra regs are per-cpu when HT is on */ 3770 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3771 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3772 3773 x86_pmu.cpu_events = snb_events_attrs; 3774 3775 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 3776 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3777 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3778 3779 pr_cont("IvyBridge events, "); 3780 break; 3781 3782 3783 case 60: /* 22nm Haswell Core */ 3784 case 63: /* 22nm Haswell Server */ 3785 case 69: /* 22nm Haswell ULT */ 3786 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ 3787 x86_add_quirk(intel_ht_bug); 3788 x86_pmu.late_ack = true; 3789 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3790 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3791 3792 intel_pmu_lbr_init_hsw(); 3793 3794 x86_pmu.event_constraints = intel_hsw_event_constraints; 3795 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; 3796 x86_pmu.extra_regs = intel_snbep_extra_regs; 3797 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; 3798 x86_pmu.pebs_prec_dist = true; 3799 /* all extra regs are per-cpu when HT is on */ 3800 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3801 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3802 3803 x86_pmu.hw_config = hsw_hw_config; 3804 x86_pmu.get_event_constraints = hsw_get_event_constraints; 3805 x86_pmu.cpu_events = hsw_events_attrs; 3806 x86_pmu.lbr_double_abort = true; 3807 pr_cont("Haswell events, "); 3808 break; 3809 3810 case 61: /* 14nm Broadwell Core-M */ 3811 case 86: /* 14nm Broadwell Xeon D */ 3812 case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ 3813 case 79: /* 14nm Broadwell Server */ 3814 x86_pmu.late_ack = true; 3815 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3816 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3817 3818 /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */ 3819 hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ | 3820 BDW_L3_MISS|HSW_SNOOP_DRAM; 3821 hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS| 3822 HSW_SNOOP_DRAM; 3823 hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ| 3824 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; 3825 hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| 3826 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; 3827 3828 intel_pmu_lbr_init_hsw(); 3829 3830 x86_pmu.event_constraints = intel_bdw_event_constraints; 3831 x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; 3832 x86_pmu.extra_regs = intel_snbep_extra_regs; 3833 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; 3834 x86_pmu.pebs_prec_dist = true; 3835 /* all extra regs are per-cpu when HT is on */ 3836 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3837 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3838 3839 x86_pmu.hw_config = hsw_hw_config; 3840 x86_pmu.get_event_constraints = hsw_get_event_constraints; 3841 x86_pmu.cpu_events = hsw_events_attrs; 3842 x86_pmu.limit_period = bdw_limit_period; 3843 pr_cont("Broadwell events, "); 3844 break; 3845 3846 case 87: /* Knights Landing Xeon Phi */ 3847 memcpy(hw_cache_event_ids, 3848 slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3849 memcpy(hw_cache_extra_regs, 3850 knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3851 intel_pmu_lbr_init_knl(); 3852 3853 x86_pmu.event_constraints = intel_slm_event_constraints; 3854 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; 3855 x86_pmu.extra_regs = intel_knl_extra_regs; 3856 3857 /* all extra regs are per-cpu when HT is on */ 3858 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3859 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3860 3861 pr_cont("Knights Landing events, "); 3862 break; 3863 3864 case 142: /* 14nm Kabylake Mobile */ 3865 case 158: /* 14nm Kabylake Desktop */ 3866 case 78: /* 14nm Skylake Mobile */ 3867 case 94: /* 14nm Skylake Desktop */ 3868 case 85: /* 14nm Skylake Server */ 3869 x86_pmu.late_ack = true; 3870 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3871 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3872 intel_pmu_lbr_init_skl(); 3873 3874 /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ 3875 event_attr_td_recovery_bubbles.event_str_noht = 3876 "event=0xd,umask=0x1,cmask=1"; 3877 event_attr_td_recovery_bubbles.event_str_ht = 3878 "event=0xd,umask=0x1,cmask=1,any=1"; 3879 3880 x86_pmu.event_constraints = intel_skl_event_constraints; 3881 x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; 3882 x86_pmu.extra_regs = intel_skl_extra_regs; 3883 x86_pmu.pebs_aliases = intel_pebs_aliases_skl; 3884 x86_pmu.pebs_prec_dist = true; 3885 /* all extra regs are per-cpu when HT is on */ 3886 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3887 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3888 3889 x86_pmu.hw_config = hsw_hw_config; 3890 x86_pmu.get_event_constraints = hsw_get_event_constraints; 3891 x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, 3892 skl_format_attr); 3893 WARN_ON(!x86_pmu.format_attrs); 3894 x86_pmu.cpu_events = hsw_events_attrs; 3895 pr_cont("Skylake events, "); 3896 break; 3897 3898 default: 3899 switch (x86_pmu.version) { 3900 case 1: 3901 x86_pmu.event_constraints = intel_v1_event_constraints; 3902 pr_cont("generic architected perfmon v1, "); 3903 break; 3904 default: 3905 /* 3906 * default constraints for v2 and up 3907 */ 3908 x86_pmu.event_constraints = intel_gen_event_constraints; 3909 pr_cont("generic architected perfmon, "); 3910 break; 3911 } 3912 } 3913 3914 if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { 3915 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 3916 x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); 3917 x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; 3918 } 3919 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 3920 3921 if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { 3922 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 3923 x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); 3924 x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; 3925 } 3926 3927 x86_pmu.intel_ctrl |= 3928 ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; 3929 3930 if (x86_pmu.event_constraints) { 3931 /* 3932 * event on fixed counter2 (REF_CYCLES) only works on this 3933 * counter, so do not extend mask to generic counters 3934 */ 3935 for_each_event_constraint(c, x86_pmu.event_constraints) { 3936 if (c->cmask == FIXED_EVENT_FLAGS 3937 && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { 3938 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 3939 } 3940 c->idxmsk64 &= 3941 ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); 3942 c->weight = hweight64(c->idxmsk64); 3943 } 3944 } 3945 3946 /* 3947 * Access LBR MSR may cause #GP under certain circumstances. 3948 * E.g. KVM doesn't support LBR MSR 3949 * Check all LBT MSR here. 3950 * Disable LBR access if any LBR MSRs can not be accessed. 3951 */ 3952 if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) 3953 x86_pmu.lbr_nr = 0; 3954 for (i = 0; i < x86_pmu.lbr_nr; i++) { 3955 if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && 3956 check_msr(x86_pmu.lbr_to + i, 0xffffUL))) 3957 x86_pmu.lbr_nr = 0; 3958 } 3959 3960 /* 3961 * Access extra MSR may cause #GP under certain circumstances. 3962 * E.g. KVM doesn't support offcore event 3963 * Check all extra_regs here. 3964 */ 3965 if (x86_pmu.extra_regs) { 3966 for (er = x86_pmu.extra_regs; er->msr; er++) { 3967 er->extra_msr_access = check_msr(er->msr, 0x11UL); 3968 /* Disable LBR select mapping */ 3969 if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) 3970 x86_pmu.lbr_sel_map = NULL; 3971 } 3972 } 3973 3974 /* Support full width counters using alternative MSR range */ 3975 if (x86_pmu.intel_cap.full_width_write) { 3976 x86_pmu.max_period = x86_pmu.cntval_mask; 3977 x86_pmu.perfctr = MSR_IA32_PMC0; 3978 pr_cont("full-width counters, "); 3979 } 3980 3981 return 0; 3982 } 3983 3984 /* 3985 * HT bug: phase 2 init 3986 * Called once we have valid topology information to check 3987 * whether or not HT is enabled 3988 * If HT is off, then we disable the workaround 3989 */ 3990 static __init int fixup_ht_bug(void) 3991 { 3992 int c; 3993 /* 3994 * problem not present on this CPU model, nothing to do 3995 */ 3996 if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) 3997 return 0; 3998 3999 if (topology_max_smt_threads() > 1) { 4000 pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); 4001 return 0; 4002 } 4003 4004 if (lockup_detector_suspend() != 0) { 4005 pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n"); 4006 return 0; 4007 } 4008 4009 x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); 4010 4011 x86_pmu.start_scheduling = NULL; 4012 x86_pmu.commit_scheduling = NULL; 4013 x86_pmu.stop_scheduling = NULL; 4014 4015 lockup_detector_resume(); 4016 4017 get_online_cpus(); 4018 4019 for_each_online_cpu(c) { 4020 free_excl_cntrs(c); 4021 } 4022 4023 put_online_cpus(); 4024 pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); 4025 return 0; 4026 } 4027 subsys_initcall(fixup_ht_bug) 4028