xref: /linux/arch/x86/events/intel/ds.c (revision ebfd9b7af2fb1e4bbc97a8b33845e7402c3defa9)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/bitops.h>
3 #include <linux/types.h>
4 #include <linux/slab.h>
5 #include <linux/sched/clock.h>
6 
7 #include <asm/cpu_entry_area.h>
8 #include <asm/debugreg.h>
9 #include <asm/perf_event.h>
10 #include <asm/tlbflush.h>
11 #include <asm/insn.h>
12 #include <asm/io.h>
13 #include <asm/msr.h>
14 #include <asm/timer.h>
15 
16 #include "../perf_event.h"
17 
18 /* Waste a full page so it can be mapped into the cpu_entry_area */
19 DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
20 
21 /* The size of a BTS record in bytes: */
22 #define BTS_RECORD_SIZE		24
23 
24 #define PEBS_FIXUP_SIZE		PAGE_SIZE
25 
26 /*
27  * pebs_record_32 for p4 and core not supported
28 
29 struct pebs_record_32 {
30 	u32 flags, ip;
31 	u32 ax, bc, cx, dx;
32 	u32 si, di, bp, sp;
33 };
34 
35  */
36 
37 union omr_encoding {
38 	struct {
39 		u8 omr_source : 4;
40 		u8 omr_remote : 1;
41 		u8 omr_hitm : 1;
42 		u8 omr_snoop : 1;
43 		u8 omr_promoted : 1;
44 	};
45 	u8 omr_full;
46 };
47 
48 union intel_x86_pebs_dse {
49 	u64 val;
50 	struct {
51 		unsigned int ld_dse:4;
52 		unsigned int ld_stlb_miss:1;
53 		unsigned int ld_locked:1;
54 		unsigned int ld_data_blk:1;
55 		unsigned int ld_addr_blk:1;
56 		unsigned int ld_reserved:24;
57 	};
58 	struct {
59 		unsigned int st_l1d_hit:1;
60 		unsigned int st_reserved1:3;
61 		unsigned int st_stlb_miss:1;
62 		unsigned int st_locked:1;
63 		unsigned int st_reserved2:26;
64 	};
65 	struct {
66 		unsigned int st_lat_dse:4;
67 		unsigned int st_lat_stlb_miss:1;
68 		unsigned int st_lat_locked:1;
69 		unsigned int ld_reserved3:26;
70 	};
71 	struct {
72 		unsigned int mtl_dse:5;
73 		unsigned int mtl_locked:1;
74 		unsigned int mtl_stlb_miss:1;
75 		unsigned int mtl_fwd_blk:1;
76 		unsigned int ld_reserved4:24;
77 	};
78 	struct {
79 		unsigned int lnc_dse:8;
80 		unsigned int ld_reserved5:2;
81 		unsigned int lnc_stlb_miss:1;
82 		unsigned int lnc_locked:1;
83 		unsigned int lnc_data_blk:1;
84 		unsigned int lnc_addr_blk:1;
85 		unsigned int ld_reserved6:18;
86 	};
87 	struct {
88 		unsigned int pnc_dse: 8;
89 		unsigned int pnc_l2_miss:1;
90 		unsigned int pnc_stlb_clean_hit:1;
91 		unsigned int pnc_stlb_any_hit:1;
92 		unsigned int pnc_stlb_miss:1;
93 		unsigned int pnc_locked:1;
94 		unsigned int pnc_data_blk:1;
95 		unsigned int pnc_addr_blk:1;
96 		unsigned int pnc_fb_full:1;
97 		unsigned int ld_reserved8:16;
98 	};
99 	struct {
100 		unsigned int arw_dse:8;
101 		unsigned int arw_l2_miss:1;
102 		unsigned int arw_xq_promotion:1;
103 		unsigned int arw_reissue:1;
104 		unsigned int arw_stlb_miss:1;
105 		unsigned int arw_locked:1;
106 		unsigned int arw_data_blk:1;
107 		unsigned int arw_addr_blk:1;
108 		unsigned int arw_fb_full:1;
109 		unsigned int ld_reserved9:16;
110 	};
111 };
112 
113 
114 /*
115  * Map PEBS Load Latency Data Source encodings to generic
116  * memory data source information
117  */
118 #define P(a, b) PERF_MEM_S(a, b)
119 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
120 #define LEVEL(x) P(LVLNUM, x)
121 #define REM P(REMOTE, REMOTE)
122 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
123 
124 /* Version for Sandy Bridge and later */
125 static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
126 	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
127 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
128 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
129 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
130 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
131 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
132 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
133 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
134 	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
135 	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
136 	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
137 	OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
138 	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
139 	OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
140 	OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
141 	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
142 };
143 
144 /* Patch up minor differences in the bits */
intel_pmu_pebs_data_source_nhm(void)145 void __init intel_pmu_pebs_data_source_nhm(void)
146 {
147 	pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
148 	pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
149 	pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
150 }
151 
__intel_pmu_pebs_data_source_skl(bool pmem,u64 * data_source)152 static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
153 {
154 	u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
155 
156 	data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
157 	data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
158 	data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
159 	data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
160 	data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
161 }
162 
intel_pmu_pebs_data_source_skl(bool pmem)163 void __init intel_pmu_pebs_data_source_skl(bool pmem)
164 {
165 	__intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
166 }
167 
__intel_pmu_pebs_data_source_grt(u64 * data_source)168 static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source)
169 {
170 	data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
171 	data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
172 	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
173 }
174 
intel_pmu_pebs_data_source_grt(void)175 void __init intel_pmu_pebs_data_source_grt(void)
176 {
177 	__intel_pmu_pebs_data_source_grt(pebs_data_source);
178 }
179 
intel_pmu_pebs_data_source_adl(void)180 void __init intel_pmu_pebs_data_source_adl(void)
181 {
182 	u64 *data_source;
183 
184 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
185 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
186 	__intel_pmu_pebs_data_source_skl(false, data_source);
187 
188 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
189 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
190 	__intel_pmu_pebs_data_source_grt(data_source);
191 }
192 
__intel_pmu_pebs_data_source_cmt(u64 * data_source)193 static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
194 {
195 	data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
196 	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
197 	data_source[0x0a] = OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, NONE);
198 	data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
199 	data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD);
200 	data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM);
201 }
202 
intel_pmu_pebs_data_source_mtl(void)203 void __init intel_pmu_pebs_data_source_mtl(void)
204 {
205 	u64 *data_source;
206 
207 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
208 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
209 	__intel_pmu_pebs_data_source_skl(false, data_source);
210 
211 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
212 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
213 	__intel_pmu_pebs_data_source_cmt(data_source);
214 }
215 
intel_pmu_pebs_data_source_arl_h(void)216 void __init intel_pmu_pebs_data_source_arl_h(void)
217 {
218 	u64 *data_source;
219 
220 	intel_pmu_pebs_data_source_lnl();
221 
222 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
223 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
224 	__intel_pmu_pebs_data_source_cmt(data_source);
225 }
226 
intel_pmu_pebs_data_source_cmt(void)227 void __init intel_pmu_pebs_data_source_cmt(void)
228 {
229 	__intel_pmu_pebs_data_source_cmt(pebs_data_source);
230 }
231 
232 /* Version for Lion Cove and later */
233 static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
234 	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),	/* 0x00: ukn L3 */
235 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x01: L1 hit */
236 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x02: L1 hit */
237 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),	/* 0x03: LFB/L1 Miss Handling Buffer hit */
238 	0,							/* 0x04: Reserved */
239 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),	/* 0x05: L2 Hit */
240 	OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE),			/* 0x06: L2 Miss Handling Buffer Hit */
241 	0,							/* 0x07: Reserved */
242 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),	/* 0x08: L3 Hit */
243 	0,							/* 0x09: Reserved */
244 	0,							/* 0x0a: Reserved */
245 	0,							/* 0x0b: Reserved */
246 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOPX, FWD),	/* 0x0c: L3 Hit Snoop Fwd */
247 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),	/* 0x0d: L3 Hit Snoop HitM */
248 	0,							/* 0x0e: Reserved */
249 	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),	/* 0x0f: L3 Miss Snoop HitM */
250 	OP_LH | LEVEL(MSC) | P(SNOOP, NONE),			/* 0x10: Memory-side Cache Hit */
251 	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */
252 };
253 
intel_pmu_pebs_data_source_lnl(void)254 void __init intel_pmu_pebs_data_source_lnl(void)
255 {
256 	u64 *data_source;
257 
258 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
259 	memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
260 
261 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
262 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
263 	__intel_pmu_pebs_data_source_cmt(data_source);
264 }
265 
266 /* Version for Panthercove and later */
267 
268 /* L2 hit */
269 #define PNC_PEBS_DATA_SOURCE_MAX	16
270 static u64 pnc_pebs_l2_hit_data_source[PNC_PEBS_DATA_SOURCE_MAX] = {
271 	P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA),	/* 0x00: non-cache access */
272 	OP_LH               | LEVEL(L0) | P(SNOOP, NONE),	/* 0x01: L0 hit */
273 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x02: L1 hit */
274 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),	/* 0x03: L1 Miss Handling Buffer hit */
275 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),	/* 0x04: L2 Hit Clean */
276 	0,							/* 0x05: Reserved */
277 	0,							/* 0x06: Reserved */
278 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HIT),	/* 0x07: L2 Hit Snoop HIT */
279 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HITM),	/* 0x08: L2 Hit Snoop Hit Modified */
280 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, MISS),	/* 0x09: Prefetch Promotion */
281 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, MISS),	/* 0x0a: Cross Core Prefetch Promotion */
282 	0,							/* 0x0b: Reserved */
283 	0,							/* 0x0c: Reserved */
284 	0,							/* 0x0d: Reserved */
285 	0,							/* 0x0e: Reserved */
286 	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE),	/* 0x0f: uncached */
287 };
288 
289 /* Version for Arctic Wolf and later */
290 
291 /* L2 hit */
292 #define ARW_PEBS_DATA_SOURCE_MAX	16
293 static u64 arw_pebs_l2_hit_data_source[ARW_PEBS_DATA_SOURCE_MAX] = {
294 	P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA),	/* 0x00: non-cache access */
295 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x01: L1 hit */
296 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),	/* 0x02: WCB Hit */
297 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),	/* 0x03: L2 Hit Clean */
298 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HIT),	/* 0x04: L2 Hit Snoop HIT */
299 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HITM),	/* 0x05: L2 Hit Snoop Hit Modified */
300 	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE),	/* 0x06: uncached */
301 	0,							/* 0x07: Reserved */
302 	0,							/* 0x08: Reserved */
303 	0,							/* 0x09: Reserved */
304 	0,							/* 0x0a: Reserved */
305 	0,							/* 0x0b: Reserved */
306 	0,							/* 0x0c: Reserved */
307 	0,							/* 0x0d: Reserved */
308 	0,							/* 0x0e: Reserved */
309 	0,							/* 0x0f: Reserved */
310 };
311 
312 /* L2 miss */
313 #define OMR_DATA_SOURCE_MAX		16
314 static u64 omr_data_source[OMR_DATA_SOURCE_MAX] = {
315 	P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA),	/* 0x00: invalid */
316 	0,							/* 0x01: Reserved */
317 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_SHARE),	/* 0x02: local CA shared cache */
318 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_NON_SHARE),/* 0x03: local CA non-shared cache */
319 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_IO),	/* 0x04: other CA IO agent */
320 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_SHARE),	/* 0x05: other CA shared cache */
321 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_NON_SHARE),/* 0x06: other CA non-shared cache */
322 	OP_LH | LEVEL(RAM) | P(REGION, MMIO),			/* 0x07: MMIO */
323 	OP_LH | LEVEL(RAM) | P(REGION, MEM0),			/* 0x08: Memory region 0 */
324 	OP_LH | LEVEL(RAM) | P(REGION, MEM1),			/* 0x09: Memory region 1 */
325 	OP_LH | LEVEL(RAM) | P(REGION, MEM2),			/* 0x0a: Memory region 2 */
326 	OP_LH | LEVEL(RAM) | P(REGION, MEM3),			/* 0x0b: Memory region 3 */
327 	OP_LH | LEVEL(RAM) | P(REGION, MEM4),			/* 0x0c: Memory region 4 */
328 	OP_LH | LEVEL(RAM) | P(REGION, MEM5),			/* 0x0d: Memory region 5 */
329 	OP_LH | LEVEL(RAM) | P(REGION, MEM6),			/* 0x0e: Memory region 6 */
330 	OP_LH | LEVEL(RAM) | P(REGION, MEM7),			/* 0x0f: Memory region 7 */
331 };
332 
parse_omr_data_source(u8 dse)333 static u64 parse_omr_data_source(u8 dse)
334 {
335 	union omr_encoding omr;
336 	u64 val = 0;
337 
338 	omr.omr_full = dse;
339 	val = omr_data_source[omr.omr_source];
340 	if (omr.omr_source > 0x1 && omr.omr_source < 0x7)
341 		val |= omr.omr_remote ? P(LVL, REM_CCE1) : 0;
342 	else if (omr.omr_source > 0x7)
343 		val |= omr.omr_remote ? P(LVL, REM_RAM1) : P(LVL, LOC_RAM);
344 
345 	if (omr.omr_remote)
346 		val |= REM;
347 
348 	if (omr.omr_source == 0x2) {
349 		u8 snoop = omr.omr_snoop | (omr.omr_promoted << 1);
350 
351 		if (omr.omr_hitm)
352 			val |= P(SNOOP, HITM);
353 		else if (snoop == 0x0)
354 			val |= P(SNOOP, NA);
355 		else if (snoop == 0x1)
356 			val |= P(SNOOP, MISS);
357 		else if (snoop == 0x2)
358 			val |= P(SNOOP, HIT);
359 		else if (snoop == 0x3)
360 			val |= P(SNOOP, NONE);
361 	} else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) {
362 		val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT);
363 		val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0;
364 	} else {
365 		val |= P(SNOOP, NONE);
366 	}
367 
368 	return val;
369 }
370 
precise_store_data(u64 status)371 static u64 precise_store_data(u64 status)
372 {
373 	union intel_x86_pebs_dse dse;
374 	u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
375 
376 	dse.val = status;
377 
378 	/*
379 	 * bit 4: TLB access
380 	 * 1 = stored missed 2nd level TLB
381 	 *
382 	 * so it either hit the walker or the OS
383 	 * otherwise hit 2nd level TLB
384 	 */
385 	if (dse.st_stlb_miss)
386 		val |= P(TLB, MISS);
387 	else
388 		val |= P(TLB, HIT);
389 
390 	/*
391 	 * bit 0: hit L1 data cache
392 	 * if not set, then all we know is that
393 	 * it missed L1D
394 	 */
395 	if (dse.st_l1d_hit)
396 		val |= P(LVL, HIT);
397 	else
398 		val |= P(LVL, MISS);
399 
400 	/*
401 	 * bit 5: Locked prefix
402 	 */
403 	if (dse.st_locked)
404 		val |= P(LOCK, LOCKED);
405 
406 	return val;
407 }
408 
precise_datala_hsw(struct perf_event * event,u64 status)409 static u64 precise_datala_hsw(struct perf_event *event, u64 status)
410 {
411 	union perf_mem_data_src dse;
412 
413 	dse.val = PERF_MEM_NA;
414 
415 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
416 		dse.mem_op = PERF_MEM_OP_STORE;
417 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
418 		dse.mem_op = PERF_MEM_OP_LOAD;
419 
420 	/*
421 	 * L1 info only valid for following events:
422 	 *
423 	 * MEM_UOPS_RETIRED.STLB_MISS_STORES
424 	 * MEM_UOPS_RETIRED.LOCK_STORES
425 	 * MEM_UOPS_RETIRED.SPLIT_STORES
426 	 * MEM_UOPS_RETIRED.ALL_STORES
427 	 */
428 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
429 		if (status & 1)
430 			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
431 		else
432 			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
433 	}
434 	return dse.val;
435 }
436 
pebs_set_tlb_lock(u64 * val,bool tlb,bool lock)437 static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
438 {
439 	/*
440 	 * TLB access
441 	 * 0 = did not miss 2nd level TLB
442 	 * 1 = missed 2nd level TLB
443 	 */
444 	if (tlb)
445 		*val |= P(TLB, MISS) | P(TLB, L2);
446 	else
447 		*val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
448 
449 	/* locked prefix */
450 	if (lock)
451 		*val |= P(LOCK, LOCKED);
452 }
453 
454 /* Retrieve the latency data for e-core of ADL */
__grt_latency_data(struct perf_event * event,u64 status,u8 dse,bool tlb,bool lock,bool blk)455 static u64 __grt_latency_data(struct perf_event *event, u64 status,
456 			       u8 dse, bool tlb, bool lock, bool blk)
457 {
458 	u64 val;
459 
460 	WARN_ON_ONCE(is_hybrid() &&
461 		     hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
462 
463 	dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
464 	val = hybrid_var(event->pmu, pebs_data_source)[dse];
465 
466 	pebs_set_tlb_lock(&val, tlb, lock);
467 
468 	if (blk)
469 		val |= P(BLK, DATA);
470 	else
471 		val |= P(BLK, NA);
472 
473 	return val;
474 }
475 
grt_latency_data(struct perf_event * event,u64 status)476 u64 grt_latency_data(struct perf_event *event, u64 status)
477 {
478 	union intel_x86_pebs_dse dse;
479 
480 	dse.val = status;
481 
482 	return __grt_latency_data(event, status, dse.ld_dse,
483 				  dse.ld_locked, dse.ld_stlb_miss,
484 				  dse.ld_data_blk);
485 }
486 
487 /* Retrieve the latency data for e-core of MTL */
cmt_latency_data(struct perf_event * event,u64 status)488 u64 cmt_latency_data(struct perf_event *event, u64 status)
489 {
490 	union intel_x86_pebs_dse dse;
491 
492 	dse.val = status;
493 
494 	return __grt_latency_data(event, status, dse.mtl_dse,
495 				  dse.mtl_stlb_miss, dse.mtl_locked,
496 				  dse.mtl_fwd_blk);
497 }
498 
arw_latency_data(struct perf_event * event,u64 status)499 static u64 arw_latency_data(struct perf_event *event, u64 status)
500 {
501 	union intel_x86_pebs_dse dse;
502 	union perf_mem_data_src src;
503 	u64 val;
504 
505 	dse.val = status;
506 
507 	if (!dse.arw_l2_miss)
508 		val = arw_pebs_l2_hit_data_source[dse.arw_dse & 0xf];
509 	else
510 		val = parse_omr_data_source(dse.arw_dse);
511 
512 	if (!val)
513 		val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
514 
515 	if (dse.arw_stlb_miss)
516 		val |= P(TLB, MISS) | P(TLB, L2);
517 	else
518 		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
519 
520 	if (dse.arw_locked)
521 		val |= P(LOCK, LOCKED);
522 
523 	if (dse.arw_data_blk)
524 		val |= P(BLK, DATA);
525 	if (dse.arw_addr_blk)
526 		val |= P(BLK, ADDR);
527 	if (!dse.arw_data_blk && !dse.arw_addr_blk)
528 		val |= P(BLK, NA);
529 
530 	src.val = val;
531 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
532 		src.mem_op = P(OP, STORE);
533 
534 	return src.val;
535 }
536 
lnc_latency_data(struct perf_event * event,u64 status)537 static u64 lnc_latency_data(struct perf_event *event, u64 status)
538 {
539 	union intel_x86_pebs_dse dse;
540 	union perf_mem_data_src src;
541 	u64 val;
542 
543 	dse.val = status;
544 
545 	/* LNC core latency data */
546 	val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK];
547 	if (!val)
548 		val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
549 
550 	if (dse.lnc_stlb_miss)
551 		val |= P(TLB, MISS) | P(TLB, L2);
552 	else
553 		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
554 
555 	if (dse.lnc_locked)
556 		val |= P(LOCK, LOCKED);
557 
558 	if (dse.lnc_data_blk)
559 		val |= P(BLK, DATA);
560 	if (dse.lnc_addr_blk)
561 		val |= P(BLK, ADDR);
562 	if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
563 		val |= P(BLK, NA);
564 
565 	src.val = val;
566 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
567 		src.mem_op = P(OP, STORE);
568 
569 	return src.val;
570 }
571 
lnl_latency_data(struct perf_event * event,u64 status)572 u64 lnl_latency_data(struct perf_event *event, u64 status)
573 {
574 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
575 
576 	if (pmu->pmu_type == hybrid_small)
577 		return cmt_latency_data(event, status);
578 
579 	return lnc_latency_data(event, status);
580 }
581 
arl_h_latency_data(struct perf_event * event,u64 status)582 u64 arl_h_latency_data(struct perf_event *event, u64 status)
583 {
584 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
585 
586 	if (pmu->pmu_type == hybrid_tiny)
587 		return cmt_latency_data(event, status);
588 
589 	return lnl_latency_data(event, status);
590 }
591 
pnc_latency_data(struct perf_event * event,u64 status)592 u64 pnc_latency_data(struct perf_event *event, u64 status)
593 {
594 	union intel_x86_pebs_dse dse;
595 	union perf_mem_data_src src;
596 	u64 val;
597 
598 	dse.val = status;
599 
600 	if (!dse.pnc_l2_miss)
601 		val = pnc_pebs_l2_hit_data_source[dse.pnc_dse & 0xf];
602 	else
603 		val = parse_omr_data_source(dse.pnc_dse);
604 
605 	if (!val)
606 		val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
607 
608 	if (dse.pnc_stlb_miss)
609 		val |= P(TLB, MISS) | P(TLB, L2);
610 	else
611 		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
612 
613 	if (dse.pnc_locked)
614 		val |= P(LOCK, LOCKED);
615 
616 	if (dse.pnc_data_blk)
617 		val |= P(BLK, DATA);
618 	if (dse.pnc_addr_blk)
619 		val |= P(BLK, ADDR);
620 	if (!dse.pnc_data_blk && !dse.pnc_addr_blk)
621 		val |= P(BLK, NA);
622 
623 	src.val = val;
624 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
625 		src.mem_op = P(OP, STORE);
626 
627 	return src.val;
628 }
629 
nvl_latency_data(struct perf_event * event,u64 status)630 u64 nvl_latency_data(struct perf_event *event, u64 status)
631 {
632 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
633 
634 	if (pmu->pmu_type == hybrid_small)
635 		return arw_latency_data(event, status);
636 
637 	return pnc_latency_data(event, status);
638 }
639 
load_latency_data(struct perf_event * event,u64 status)640 static u64 load_latency_data(struct perf_event *event, u64 status)
641 {
642 	union intel_x86_pebs_dse dse;
643 	u64 val;
644 
645 	dse.val = status;
646 
647 	/*
648 	 * use the mapping table for bit 0-3
649 	 */
650 	val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
651 
652 	/*
653 	 * Nehalem models do not support TLB, Lock infos
654 	 */
655 	if (x86_pmu.pebs_no_tlb) {
656 		val |= P(TLB, NA) | P(LOCK, NA);
657 		return val;
658 	}
659 
660 	pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);
661 
662 	/*
663 	 * Ice Lake and earlier models do not support block infos.
664 	 */
665 	if (!x86_pmu.pebs_block) {
666 		val |= P(BLK, NA);
667 		return val;
668 	}
669 	/*
670 	 * bit 6: load was blocked since its data could not be forwarded
671 	 *        from a preceding store
672 	 */
673 	if (dse.ld_data_blk)
674 		val |= P(BLK, DATA);
675 
676 	/*
677 	 * bit 7: load was blocked due to potential address conflict with
678 	 *        a preceding store
679 	 */
680 	if (dse.ld_addr_blk)
681 		val |= P(BLK, ADDR);
682 
683 	if (!dse.ld_data_blk && !dse.ld_addr_blk)
684 		val |= P(BLK, NA);
685 
686 	return val;
687 }
688 
store_latency_data(struct perf_event * event,u64 status)689 static u64 store_latency_data(struct perf_event *event, u64 status)
690 {
691 	union intel_x86_pebs_dse dse;
692 	union perf_mem_data_src src;
693 	u64 val;
694 
695 	dse.val = status;
696 
697 	/*
698 	 * use the mapping table for bit 0-3
699 	 */
700 	val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];
701 
702 	pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);
703 
704 	val |= P(BLK, NA);
705 
706 	/*
707 	 * the pebs_data_source table is only for loads
708 	 * so override the mem_op to say STORE instead
709 	 */
710 	src.val = val;
711 	src.mem_op = P(OP,STORE);
712 
713 	return src.val;
714 }
715 
716 struct pebs_record_core {
717 	u64 flags, ip;
718 	u64 ax, bx, cx, dx;
719 	u64 si, di, bp, sp;
720 	u64 r8,  r9,  r10, r11;
721 	u64 r12, r13, r14, r15;
722 };
723 
724 struct pebs_record_nhm {
725 	u64 flags, ip;
726 	u64 ax, bx, cx, dx;
727 	u64 si, di, bp, sp;
728 	u64 r8,  r9,  r10, r11;
729 	u64 r12, r13, r14, r15;
730 	u64 status, dla, dse, lat;
731 };
732 
733 /*
734  * Same as pebs_record_nhm, with two additional fields.
735  */
736 struct pebs_record_hsw {
737 	u64 flags, ip;
738 	u64 ax, bx, cx, dx;
739 	u64 si, di, bp, sp;
740 	u64 r8,  r9,  r10, r11;
741 	u64 r12, r13, r14, r15;
742 	u64 status, dla, dse, lat;
743 	u64 real_ip, tsx_tuning;
744 };
745 
746 union hsw_tsx_tuning {
747 	struct {
748 		u32 cycles_last_block     : 32,
749 		    hle_abort		  : 1,
750 		    rtm_abort		  : 1,
751 		    instruction_abort     : 1,
752 		    non_instruction_abort : 1,
753 		    retry		  : 1,
754 		    data_conflict	  : 1,
755 		    capacity_writes	  : 1,
756 		    capacity_reads	  : 1;
757 	};
758 	u64	    value;
759 };
760 
761 #define PEBS_HSW_TSX_FLAGS	0xff00000000ULL
762 
763 /* Same as HSW, plus TSC */
764 
765 struct pebs_record_skl {
766 	u64 flags, ip;
767 	u64 ax, bx, cx, dx;
768 	u64 si, di, bp, sp;
769 	u64 r8,  r9,  r10, r11;
770 	u64 r12, r13, r14, r15;
771 	u64 status, dla, dse, lat;
772 	u64 real_ip, tsx_tuning;
773 	u64 tsc;
774 };
775 
init_debug_store_on_cpu(int cpu)776 void init_debug_store_on_cpu(int cpu)
777 {
778 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
779 
780 	if (!ds)
781 		return;
782 
783 	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
784 		     (u32)((u64)(unsigned long)ds),
785 		     (u32)((u64)(unsigned long)ds >> 32));
786 }
787 
fini_debug_store_on_cpu(int cpu)788 void fini_debug_store_on_cpu(int cpu)
789 {
790 	if (!per_cpu(cpu_hw_events, cpu).ds)
791 		return;
792 
793 	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
794 }
795 
796 static DEFINE_PER_CPU(void *, insn_buffer);
797 
ds_update_cea(void * cea,void * addr,size_t size,pgprot_t prot)798 static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
799 {
800 	unsigned long start = (unsigned long)cea;
801 	phys_addr_t pa;
802 	size_t msz = 0;
803 
804 	pa = virt_to_phys(addr);
805 
806 	preempt_disable();
807 	for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
808 		cea_set_pte(cea, pa, prot);
809 
810 	/*
811 	 * This is a cross-CPU update of the cpu_entry_area, we must shoot down
812 	 * all TLB entries for it.
813 	 */
814 	flush_tlb_kernel_range(start, start + size);
815 	preempt_enable();
816 }
817 
ds_clear_cea(void * cea,size_t size)818 static void ds_clear_cea(void *cea, size_t size)
819 {
820 	unsigned long start = (unsigned long)cea;
821 	size_t msz = 0;
822 
823 	preempt_disable();
824 	for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
825 		cea_set_pte(cea, 0, PAGE_NONE);
826 
827 	flush_tlb_kernel_range(start, start + size);
828 	preempt_enable();
829 }
830 
dsalloc_pages(size_t size,gfp_t flags,int cpu)831 static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
832 {
833 	unsigned int order = get_order(size);
834 	int node = cpu_to_node(cpu);
835 	struct page *page;
836 
837 	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
838 	return page ? page_address(page) : NULL;
839 }
840 
dsfree_pages(const void * buffer,size_t size)841 static void dsfree_pages(const void *buffer, size_t size)
842 {
843 	if (buffer)
844 		free_pages((unsigned long)buffer, get_order(size));
845 }
846 
alloc_pebs_buffer(int cpu)847 static int alloc_pebs_buffer(int cpu)
848 {
849 	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
850 	struct debug_store *ds = hwev->ds;
851 	size_t bsiz = x86_pmu.pebs_buffer_size;
852 	int max, node = cpu_to_node(cpu);
853 	void *buffer, *insn_buff, *cea;
854 
855 	if (!intel_pmu_has_pebs())
856 		return 0;
857 
858 	buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
859 	if (unlikely(!buffer))
860 		return -ENOMEM;
861 
862 	if (x86_pmu.arch_pebs) {
863 		hwev->pebs_vaddr = buffer;
864 		return 0;
865 	}
866 
867 	/*
868 	 * HSW+ already provides us the eventing ip; no need to allocate this
869 	 * buffer then.
870 	 */
871 	if (x86_pmu.intel_cap.pebs_format < 2) {
872 		insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
873 		if (!insn_buff) {
874 			dsfree_pages(buffer, bsiz);
875 			return -ENOMEM;
876 		}
877 		per_cpu(insn_buffer, cpu) = insn_buff;
878 	}
879 	hwev->pebs_vaddr = buffer;
880 	/* Update the cpu entry area mapping */
881 	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
882 	ds->pebs_buffer_base = (unsigned long) cea;
883 	ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
884 	ds->pebs_index = ds->pebs_buffer_base;
885 	max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
886 	ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
887 	return 0;
888 }
889 
release_pebs_buffer(int cpu)890 static void release_pebs_buffer(int cpu)
891 {
892 	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
893 	void *cea;
894 
895 	if (!intel_pmu_has_pebs())
896 		return;
897 
898 	if (x86_pmu.ds_pebs) {
899 		kfree(per_cpu(insn_buffer, cpu));
900 		per_cpu(insn_buffer, cpu) = NULL;
901 
902 		/* Clear the fixmap */
903 		cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
904 		ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
905 	}
906 
907 	dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size);
908 	hwev->pebs_vaddr = NULL;
909 }
910 
alloc_bts_buffer(int cpu)911 static int alloc_bts_buffer(int cpu)
912 {
913 	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
914 	struct debug_store *ds = hwev->ds;
915 	void *buffer, *cea;
916 	int max;
917 
918 	if (!x86_pmu.bts)
919 		return 0;
920 
921 	buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
922 	if (unlikely(!buffer)) {
923 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
924 		return -ENOMEM;
925 	}
926 	hwev->ds_bts_vaddr = buffer;
927 	/* Update the fixmap */
928 	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
929 	ds->bts_buffer_base = (unsigned long) cea;
930 	ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
931 	ds->bts_index = ds->bts_buffer_base;
932 	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
933 	ds->bts_absolute_maximum = ds->bts_buffer_base +
934 					max * BTS_RECORD_SIZE;
935 	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
936 					(max / 16) * BTS_RECORD_SIZE;
937 	return 0;
938 }
939 
release_bts_buffer(int cpu)940 static void release_bts_buffer(int cpu)
941 {
942 	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
943 	void *cea;
944 
945 	if (!x86_pmu.bts)
946 		return;
947 
948 	/* Clear the fixmap */
949 	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
950 	ds_clear_cea(cea, BTS_BUFFER_SIZE);
951 	dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
952 	hwev->ds_bts_vaddr = NULL;
953 }
954 
alloc_ds_buffer(int cpu)955 static int alloc_ds_buffer(int cpu)
956 {
957 	struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
958 
959 	memset(ds, 0, sizeof(*ds));
960 	per_cpu(cpu_hw_events, cpu).ds = ds;
961 	return 0;
962 }
963 
release_ds_buffer(int cpu)964 static void release_ds_buffer(int cpu)
965 {
966 	per_cpu(cpu_hw_events, cpu).ds = NULL;
967 }
968 
release_ds_buffers(void)969 void release_ds_buffers(void)
970 {
971 	int cpu;
972 
973 	if (!x86_pmu.bts && !x86_pmu.ds_pebs)
974 		return;
975 
976 	for_each_possible_cpu(cpu)
977 		release_ds_buffer(cpu);
978 
979 	for_each_possible_cpu(cpu) {
980 		/*
981 		 * Again, ignore errors from offline CPUs, they will no longer
982 		 * observe cpu_hw_events.ds and not program the DS_AREA when
983 		 * they come up.
984 		 */
985 		fini_debug_store_on_cpu(cpu);
986 	}
987 
988 	for_each_possible_cpu(cpu) {
989 		if (x86_pmu.ds_pebs)
990 			release_pebs_buffer(cpu);
991 		release_bts_buffer(cpu);
992 	}
993 }
994 
reserve_ds_buffers(void)995 void reserve_ds_buffers(void)
996 {
997 	int bts_err = 0, pebs_err = 0;
998 	int cpu;
999 
1000 	x86_pmu.bts_active = 0;
1001 
1002 	if (x86_pmu.ds_pebs)
1003 		x86_pmu.pebs_active = 0;
1004 
1005 	if (!x86_pmu.bts && !x86_pmu.ds_pebs)
1006 		return;
1007 
1008 	if (!x86_pmu.bts)
1009 		bts_err = 1;
1010 
1011 	if (!x86_pmu.ds_pebs)
1012 		pebs_err = 1;
1013 
1014 	for_each_possible_cpu(cpu) {
1015 		if (alloc_ds_buffer(cpu)) {
1016 			bts_err = 1;
1017 			pebs_err = 1;
1018 		}
1019 
1020 		if (!bts_err && alloc_bts_buffer(cpu))
1021 			bts_err = 1;
1022 
1023 		if (x86_pmu.ds_pebs && !pebs_err &&
1024 		    alloc_pebs_buffer(cpu))
1025 			pebs_err = 1;
1026 
1027 		if (bts_err && pebs_err)
1028 			break;
1029 	}
1030 
1031 	if (bts_err) {
1032 		for_each_possible_cpu(cpu)
1033 			release_bts_buffer(cpu);
1034 	}
1035 
1036 	if (x86_pmu.ds_pebs && pebs_err) {
1037 		for_each_possible_cpu(cpu)
1038 			release_pebs_buffer(cpu);
1039 	}
1040 
1041 	if (bts_err && pebs_err) {
1042 		for_each_possible_cpu(cpu)
1043 			release_ds_buffer(cpu);
1044 	} else {
1045 		if (x86_pmu.bts && !bts_err)
1046 			x86_pmu.bts_active = 1;
1047 
1048 		if (x86_pmu.ds_pebs && !pebs_err)
1049 			x86_pmu.pebs_active = 1;
1050 
1051 		for_each_possible_cpu(cpu) {
1052 			/*
1053 			 * Ignores wrmsr_on_cpu() errors for offline CPUs they
1054 			 * will get this call through intel_pmu_cpu_starting().
1055 			 */
1056 			init_debug_store_on_cpu(cpu);
1057 		}
1058 	}
1059 }
1060 
alloc_arch_pebs_buf_on_cpu(int cpu)1061 inline int alloc_arch_pebs_buf_on_cpu(int cpu)
1062 {
1063 	if (!x86_pmu.arch_pebs)
1064 		return 0;
1065 
1066 	return alloc_pebs_buffer(cpu);
1067 }
1068 
release_arch_pebs_buf_on_cpu(int cpu)1069 inline void release_arch_pebs_buf_on_cpu(int cpu)
1070 {
1071 	if (!x86_pmu.arch_pebs)
1072 		return;
1073 
1074 	release_pebs_buffer(cpu);
1075 }
1076 
init_arch_pebs_on_cpu(int cpu)1077 void init_arch_pebs_on_cpu(int cpu)
1078 {
1079 	struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
1080 	u64 arch_pebs_base;
1081 
1082 	if (!x86_pmu.arch_pebs)
1083 		return;
1084 
1085 	if (!cpuc->pebs_vaddr) {
1086 		WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu);
1087 		x86_pmu.pebs_active = 0;
1088 		return;
1089 	}
1090 
1091 	/*
1092 	 * 4KB-aligned pointer of the output buffer
1093 	 * (__alloc_pages_node() return page aligned address)
1094 	 * Buffer Size = 4KB * 2^SIZE
1095 	 * contiguous physical buffer (__alloc_pages_node() with order)
1096 	 */
1097 	arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT;
1098 	wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base,
1099 		     (u32)(arch_pebs_base >> 32));
1100 	x86_pmu.pebs_active = 1;
1101 }
1102 
fini_arch_pebs_on_cpu(int cpu)1103 inline void fini_arch_pebs_on_cpu(int cpu)
1104 {
1105 	if (!x86_pmu.arch_pebs)
1106 		return;
1107 
1108 	wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0);
1109 }
1110 
1111 /*
1112  * BTS
1113  */
1114 
1115 struct event_constraint bts_constraint =
1116 	EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
1117 
intel_pmu_enable_bts(u64 config)1118 void intel_pmu_enable_bts(u64 config)
1119 {
1120 	unsigned long debugctlmsr;
1121 
1122 	debugctlmsr = get_debugctlmsr();
1123 
1124 	debugctlmsr |= DEBUGCTLMSR_TR;
1125 	debugctlmsr |= DEBUGCTLMSR_BTS;
1126 	if (config & ARCH_PERFMON_EVENTSEL_INT)
1127 		debugctlmsr |= DEBUGCTLMSR_BTINT;
1128 
1129 	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
1130 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
1131 
1132 	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
1133 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
1134 
1135 	update_debugctlmsr(debugctlmsr);
1136 }
1137 
intel_pmu_disable_bts(void)1138 void intel_pmu_disable_bts(void)
1139 {
1140 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1141 	unsigned long debugctlmsr;
1142 
1143 	if (!cpuc->ds)
1144 		return;
1145 
1146 	debugctlmsr = get_debugctlmsr();
1147 
1148 	debugctlmsr &=
1149 		~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
1150 		  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
1151 
1152 	update_debugctlmsr(debugctlmsr);
1153 }
1154 
intel_pmu_drain_bts_buffer(void)1155 int intel_pmu_drain_bts_buffer(void)
1156 {
1157 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1158 	struct debug_store *ds = cpuc->ds;
1159 	struct bts_record {
1160 		u64	from;
1161 		u64	to;
1162 		u64	flags;
1163 	};
1164 	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
1165 	struct bts_record *at, *base, *top;
1166 	struct perf_output_handle handle;
1167 	struct perf_event_header header;
1168 	struct perf_sample_data data;
1169 	unsigned long skip = 0;
1170 	struct pt_regs regs;
1171 
1172 	if (!event)
1173 		return 0;
1174 
1175 	if (!x86_pmu.bts_active)
1176 		return 0;
1177 
1178 	base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1179 	top  = (struct bts_record *)(unsigned long)ds->bts_index;
1180 
1181 	if (top <= base)
1182 		return 0;
1183 
1184 	memset(&regs, 0, sizeof(regs));
1185 
1186 	ds->bts_index = ds->bts_buffer_base;
1187 
1188 	perf_sample_data_init(&data, 0, event->hw.last_period);
1189 
1190 	/*
1191 	 * BTS leaks kernel addresses in branches across the cpl boundary,
1192 	 * such as traps or system calls, so unless the user is asking for
1193 	 * kernel tracing (and right now it's not possible), we'd need to
1194 	 * filter them out. But first we need to count how many of those we
1195 	 * have in the current batch. This is an extra O(n) pass, however,
1196 	 * it's much faster than the other one especially considering that
1197 	 * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
1198 	 * alloc_bts_buffer()).
1199 	 */
1200 	for (at = base; at < top; at++) {
1201 		/*
1202 		 * Note that right now *this* BTS code only works if
1203 		 * attr::exclude_kernel is set, but let's keep this extra
1204 		 * check here in case that changes.
1205 		 */
1206 		if (event->attr.exclude_kernel &&
1207 		    (kernel_ip(at->from) || kernel_ip(at->to)))
1208 			skip++;
1209 	}
1210 
1211 	/*
1212 	 * Prepare a generic sample, i.e. fill in the invariant fields.
1213 	 * We will overwrite the from and to address before we output
1214 	 * the sample.
1215 	 */
1216 	rcu_read_lock();
1217 	perf_prepare_sample(&data, event, &regs);
1218 	perf_prepare_header(&header, &data, event, &regs);
1219 
1220 	if (perf_output_begin(&handle, &data, event,
1221 			      header.size * (top - base - skip)))
1222 		goto unlock;
1223 
1224 	for (at = base; at < top; at++) {
1225 		/* Filter out any records that contain kernel addresses. */
1226 		if (event->attr.exclude_kernel &&
1227 		    (kernel_ip(at->from) || kernel_ip(at->to)))
1228 			continue;
1229 
1230 		data.ip		= at->from;
1231 		data.addr	= at->to;
1232 
1233 		perf_output_sample(&handle, &header, &data, event);
1234 	}
1235 
1236 	perf_output_end(&handle);
1237 
1238 	/* There's new data available. */
1239 	event->hw.interrupts++;
1240 	event->pending_kill = POLL_IN;
1241 unlock:
1242 	rcu_read_unlock();
1243 	return 1;
1244 }
1245 
intel_pmu_drain_pebs_buffer(void)1246 void intel_pmu_drain_pebs_buffer(void)
1247 {
1248 	struct perf_sample_data data;
1249 
1250 	static_call(x86_pmu_drain_pebs)(NULL, &data);
1251 }
1252 
1253 /*
1254  * PEBS
1255  */
1256 struct event_constraint intel_core2_pebs_event_constraints[] = {
1257 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
1258 	INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
1259 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
1260 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
1261 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
1262 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1263 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
1264 	EVENT_CONSTRAINT_END
1265 };
1266 
1267 struct event_constraint intel_atom_pebs_event_constraints[] = {
1268 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
1269 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
1270 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
1271 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1272 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
1273 	/* Allow all events as PEBS with no flags */
1274 	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
1275 	EVENT_CONSTRAINT_END
1276 };
1277 
1278 struct event_constraint intel_slm_pebs_event_constraints[] = {
1279 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1280 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
1281 	/* Allow all events as PEBS with no flags */
1282 	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
1283 	EVENT_CONSTRAINT_END
1284 };
1285 
1286 struct event_constraint intel_glm_pebs_event_constraints[] = {
1287 	/* Allow all events as PEBS with no flags */
1288 	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
1289 	EVENT_CONSTRAINT_END
1290 };
1291 
1292 struct event_constraint intel_grt_pebs_event_constraints[] = {
1293 	/* Allow all events as PEBS with no flags */
1294 	INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
1295 	INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
1296 	EVENT_CONSTRAINT_END
1297 };
1298 
1299 struct event_constraint intel_arw_pebs_event_constraints[] = {
1300 	/* Allow all events as PEBS with no flags */
1301 	INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xff),
1302 	INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xff),
1303 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01d4, 0x1),
1304 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x02d4, 0x2),
1305 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x04d4, 0x4),
1306 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x08d4, 0x8),
1307 	EVENT_CONSTRAINT_END
1308 };
1309 
1310 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
1311 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
1312 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
1313 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
1314 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
1315 	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
1316 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
1317 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
1318 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
1319 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
1320 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
1321 	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
1322 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1323 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1324 	EVENT_CONSTRAINT_END
1325 };
1326 
1327 struct event_constraint intel_westmere_pebs_event_constraints[] = {
1328 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
1329 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
1330 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
1331 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
1332 	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
1333 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
1334 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
1335 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
1336 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
1337 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
1338 	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
1339 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1340 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1341 	EVENT_CONSTRAINT_END
1342 };
1343 
1344 struct event_constraint intel_snb_pebs_event_constraints[] = {
1345 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1346 	INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
1347 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
1348 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1349 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1350         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
1351         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
1352         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
1353         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
1354 	/* Allow all events as PEBS with no flags */
1355 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1356 	EVENT_CONSTRAINT_END
1357 };
1358 
1359 struct event_constraint intel_ivb_pebs_event_constraints[] = {
1360         INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1361         INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
1362 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
1363 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1364 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1365 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1366 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1367 	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
1368 	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
1369 	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
1370 	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
1371 	/* Allow all events as PEBS with no flags */
1372 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1373         EVENT_CONSTRAINT_END
1374 };
1375 
1376 struct event_constraint intel_hsw_pebs_event_constraints[] = {
1377 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1378 	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
1379 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1380 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1381 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1382 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1383 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
1384 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
1385 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
1386 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
1387 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
1388 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
1389 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
1390 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
1391 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
1392 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
1393 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
1394 	/* Allow all events as PEBS with no flags */
1395 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1396 	EVENT_CONSTRAINT_END
1397 };
1398 
1399 struct event_constraint intel_bdw_pebs_event_constraints[] = {
1400 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1401 	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
1402 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1403 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1404 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1405 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1406 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
1407 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
1408 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
1409 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
1410 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
1411 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
1412 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
1413 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
1414 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
1415 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
1416 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
1417 	/* Allow all events as PEBS with no flags */
1418 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1419 	EVENT_CONSTRAINT_END
1420 };
1421 
1422 
1423 struct event_constraint intel_skl_pebs_event_constraints[] = {
1424 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
1425 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1426 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1427 	/* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
1428 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1429 	INTEL_PLD_CONSTRAINT(0x1cd, 0xf),		      /* MEM_TRANS_RETIRED.* */
1430 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
1431 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
1432 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
1433 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
1434 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
1435 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
1436 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
1437 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
1438 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
1439 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
1440 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
1441 	/* Allow all events as PEBS with no flags */
1442 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1443 	EVENT_CONSTRAINT_END
1444 };
1445 
1446 struct event_constraint intel_icl_pebs_event_constraints[] = {
1447 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL),	/* old INST_RETIRED.PREC_DIST */
1448 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1449 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),	/* SLOTS */
1450 
1451 	INTEL_PLD_CONSTRAINT(0x1cd, 0xff),			/* MEM_TRANS_RETIRED.LOAD_LATENCY */
1452 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1453 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1454 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1455 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1456 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1457 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1458 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1459 
1460 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
1461 
1462 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),		/* MEM_INST_RETIRED.* */
1463 
1464 	/*
1465 	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1466 	 * need the full constraints from the main table.
1467 	 */
1468 
1469 	EVENT_CONSTRAINT_END
1470 };
1471 
1472 struct event_constraint intel_glc_pebs_event_constraints[] = {
1473 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1474 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1475 
1476 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
1477 	INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
1478 	INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
1479 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1480 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1481 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1482 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1483 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1484 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1485 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1486 
1487 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1488 
1489 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1490 
1491 	/*
1492 	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1493 	 * need the full constraints from the main table.
1494 	 */
1495 
1496 	EVENT_CONSTRAINT_END
1497 };
1498 
1499 struct event_constraint intel_lnc_pebs_event_constraints[] = {
1500 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1501 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1502 
1503 	INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc),
1504 	INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
1505 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1506 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1507 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1508 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1509 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1510 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1511 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1512 
1513 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1514 
1515 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1516 
1517 	/*
1518 	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1519 	 * need the full constraints from the main table.
1520 	 */
1521 
1522 	EVENT_CONSTRAINT_END
1523 };
1524 
1525 struct event_constraint intel_pnc_pebs_event_constraints[] = {
1526 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1527 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1528 
1529 	INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0xfc),
1530 	INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
1531 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1532 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1533 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1534 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1535 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1536 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1537 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1538 
1539 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1540 
1541 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1542 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd6, 0xf),
1543 
1544 	/*
1545 	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1546 	 * need the full constraints from the main table.
1547 	 */
1548 
1549 	EVENT_CONSTRAINT_END
1550 };
1551 
intel_pebs_constraints(struct perf_event * event)1552 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
1553 {
1554 	struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
1555 	struct event_constraint *c;
1556 
1557 	if (!event->attr.precise_ip)
1558 		return NULL;
1559 
1560 	if (pebs_constraints) {
1561 		for_each_event_constraint(c, pebs_constraints) {
1562 			if (constraint_match(c, event->hw.config)) {
1563 				event->hw.flags |= c->flags;
1564 				return c;
1565 			}
1566 		}
1567 	}
1568 
1569 	/*
1570 	 * Extended PEBS support
1571 	 * Makes the PEBS code search the normal constraints.
1572 	 */
1573 	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1574 		return NULL;
1575 
1576 	return &emptyconstraint;
1577 }
1578 
1579 /*
1580  * We need the sched_task callback even for per-cpu events when we use
1581  * the large interrupt threshold, such that we can provide PID and TID
1582  * to PEBS samples.
1583  */
pebs_needs_sched_cb(struct cpu_hw_events * cpuc)1584 static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
1585 {
1586 	if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
1587 		return false;
1588 
1589 	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
1590 }
1591 
intel_pmu_pebs_sched_task(struct perf_event_pmu_context * pmu_ctx,bool sched_in)1592 void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
1593 {
1594 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1595 
1596 	if (!sched_in && pebs_needs_sched_cb(cpuc))
1597 		intel_pmu_drain_pebs_buffer();
1598 }
1599 
pebs_update_threshold(struct cpu_hw_events * cpuc)1600 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
1601 {
1602 	struct debug_store *ds = cpuc->ds;
1603 	int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu);
1604 	u64 threshold;
1605 	int reserved;
1606 
1607 	if (cpuc->n_pebs_via_pt)
1608 		return;
1609 
1610 	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1611 		reserved = max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu);
1612 	else
1613 		reserved = max_pebs_events;
1614 
1615 	if (cpuc->n_pebs == cpuc->n_large_pebs) {
1616 		threshold = ds->pebs_absolute_maximum -
1617 			reserved * cpuc->pebs_record_size;
1618 	} else {
1619 		threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1620 	}
1621 
1622 	ds->pebs_interrupt_threshold = threshold;
1623 }
1624 
1625 #define PEBS_DATACFG_CNTRS(x)						\
1626 	((x >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DATACFG_CNTR_MASK)
1627 
1628 #define PEBS_DATACFG_CNTR_BIT(x)					\
1629 	(((1ULL << x) & PEBS_DATACFG_CNTR_MASK) << PEBS_DATACFG_CNTR_SHIFT)
1630 
1631 #define PEBS_DATACFG_FIX(x)						\
1632 	((x >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATACFG_FIX_MASK)
1633 
1634 #define PEBS_DATACFG_FIX_BIT(x)						\
1635 	(((1ULL << (x)) & PEBS_DATACFG_FIX_MASK)			\
1636 	 << PEBS_DATACFG_FIX_SHIFT)
1637 
adaptive_pebs_record_size_update(void)1638 static void adaptive_pebs_record_size_update(void)
1639 {
1640 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1641 	u64 pebs_data_cfg = cpuc->pebs_data_cfg;
1642 	int sz = sizeof(struct pebs_basic);
1643 
1644 	if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
1645 		sz += sizeof(struct pebs_meminfo);
1646 	if (pebs_data_cfg & PEBS_DATACFG_GP)
1647 		sz += sizeof(struct pebs_gprs);
1648 	if (pebs_data_cfg & PEBS_DATACFG_XMMS)
1649 		sz += sizeof(struct pebs_xmm);
1650 	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1651 		sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1652 	if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) {
1653 		sz += sizeof(struct pebs_cntr_header);
1654 
1655 		/* Metrics base and Metrics Data */
1656 		if (pebs_data_cfg & PEBS_DATACFG_METRICS)
1657 			sz += 2 * sizeof(u64);
1658 
1659 		if (pebs_data_cfg & PEBS_DATACFG_CNTR) {
1660 			sz += (hweight64(PEBS_DATACFG_CNTRS(pebs_data_cfg)) +
1661 			       hweight64(PEBS_DATACFG_FIX(pebs_data_cfg))) *
1662 			      sizeof(u64);
1663 		}
1664 	}
1665 
1666 	cpuc->pebs_record_size = sz;
1667 }
1668 
__intel_pmu_pebs_update_cfg(struct perf_event * event,int idx,u64 * pebs_data_cfg)1669 static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
1670 					int idx, u64 *pebs_data_cfg)
1671 {
1672 	if (is_metric_event(event)) {
1673 		*pebs_data_cfg |= PEBS_DATACFG_METRICS;
1674 		return;
1675 	}
1676 
1677 	*pebs_data_cfg |= PEBS_DATACFG_CNTR;
1678 
1679 	if (idx >= INTEL_PMC_IDX_FIXED)
1680 		*pebs_data_cfg |= PEBS_DATACFG_FIX_BIT(idx - INTEL_PMC_IDX_FIXED);
1681 	else
1682 		*pebs_data_cfg |= PEBS_DATACFG_CNTR_BIT(idx);
1683 }
1684 
1685 
intel_pmu_pebs_late_setup(struct cpu_hw_events * cpuc)1686 void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc)
1687 {
1688 	struct perf_event *event;
1689 	u64 pebs_data_cfg = 0;
1690 	int i;
1691 
1692 	for (i = 0; i < cpuc->n_events; i++) {
1693 		event = cpuc->event_list[i];
1694 		if (!is_pebs_counter_event_group(event))
1695 			continue;
1696 		__intel_pmu_pebs_update_cfg(event, cpuc->assign[i], &pebs_data_cfg);
1697 	}
1698 
1699 	if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
1700 		cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
1701 }
1702 
1703 #define PERF_PEBS_MEMINFO_TYPE	(PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
1704 				PERF_SAMPLE_PHYS_ADDR |			     \
1705 				PERF_SAMPLE_WEIGHT_TYPE |		     \
1706 				PERF_SAMPLE_TRANSACTION |		     \
1707 				PERF_SAMPLE_DATA_PAGE_SIZE)
1708 
pebs_update_adaptive_cfg(struct perf_event * event)1709 static u64 pebs_update_adaptive_cfg(struct perf_event *event)
1710 {
1711 	struct perf_event_attr *attr = &event->attr;
1712 	u64 sample_type = attr->sample_type;
1713 	u64 pebs_data_cfg = 0;
1714 	bool gprs, tsx_weight;
1715 
1716 	if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
1717 	    attr->precise_ip > 1)
1718 		return pebs_data_cfg;
1719 
1720 	if (sample_type & PERF_PEBS_MEMINFO_TYPE)
1721 		pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
1722 
1723 	/*
1724 	 * We need GPRs when:
1725 	 * + user requested them
1726 	 * + precise_ip < 2 for the non event IP
1727 	 * + For RTM TSX weight we need GPRs for the abort code.
1728 	 */
1729 	gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1730 		(attr->sample_regs_intr & PEBS_GP_REGS)) ||
1731 	       ((sample_type & PERF_SAMPLE_REGS_USER) &&
1732 		(attr->sample_regs_user & PEBS_GP_REGS));
1733 
1734 	tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1735 		     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
1736 		      x86_pmu.rtm_abort_event);
1737 
1738 	if (gprs || (attr->precise_ip < 2) || tsx_weight)
1739 		pebs_data_cfg |= PEBS_DATACFG_GP;
1740 
1741 	if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1742 	    (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1743 		pebs_data_cfg |= PEBS_DATACFG_XMMS;
1744 
1745 	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1746 		/*
1747 		 * For now always log all LBRs. Could configure this
1748 		 * later.
1749 		 */
1750 		pebs_data_cfg |= PEBS_DATACFG_LBRS |
1751 			((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1752 	}
1753 
1754 	return pebs_data_cfg;
1755 }
1756 
1757 static void
pebs_update_state(bool needed_cb,struct cpu_hw_events * cpuc,struct perf_event * event,bool add)1758 pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1759 		  struct perf_event *event, bool add)
1760 {
1761 	struct pmu *pmu = event->pmu;
1762 
1763 	/*
1764 	 * Make sure we get updated with the first PEBS event.
1765 	 * During removal, ->pebs_data_cfg is still valid for
1766 	 * the last PEBS event. Don't clear it.
1767 	 */
1768 	if ((cpuc->n_pebs == 1) && add)
1769 		cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
1770 
1771 	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
1772 		if (!needed_cb)
1773 			perf_sched_cb_inc(pmu);
1774 		else
1775 			perf_sched_cb_dec(pmu);
1776 
1777 		cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
1778 	}
1779 
1780 	/*
1781 	 * The PEBS record doesn't shrink on pmu::del(). Doing so would require
1782 	 * iterating all remaining PEBS events to reconstruct the config.
1783 	 */
1784 	if (x86_pmu.intel_cap.pebs_baseline && add) {
1785 		u64 pebs_data_cfg;
1786 
1787 		pebs_data_cfg = pebs_update_adaptive_cfg(event);
1788 		/*
1789 		 * Be sure to update the thresholds when we change the record.
1790 		 */
1791 		if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
1792 			cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
1793 	}
1794 }
1795 
intel_get_arch_pebs_data_config(struct perf_event * event)1796 u64 intel_get_arch_pebs_data_config(struct perf_event *event)
1797 {
1798 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1799 	u64 pebs_data_cfg = 0;
1800 	u64 cntr_mask;
1801 
1802 	if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
1803 		return 0;
1804 
1805 	pebs_data_cfg |= pebs_update_adaptive_cfg(event);
1806 
1807 	cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
1808 		    (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
1809 		    PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
1810 	pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
1811 
1812 	return pebs_data_cfg;
1813 }
1814 
intel_pmu_pebs_add(struct perf_event * event)1815 void intel_pmu_pebs_add(struct perf_event *event)
1816 {
1817 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1818 	struct hw_perf_event *hwc = &event->hw;
1819 	bool needed_cb = pebs_needs_sched_cb(cpuc);
1820 
1821 	cpuc->n_pebs++;
1822 	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1823 		cpuc->n_large_pebs++;
1824 	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1825 		cpuc->n_pebs_via_pt++;
1826 
1827 	pebs_update_state(needed_cb, cpuc, event, true);
1828 }
1829 
intel_pmu_pebs_via_pt_disable(struct perf_event * event)1830 static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1831 {
1832 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1833 
1834 	if (!is_pebs_pt(event))
1835 		return;
1836 
1837 	if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1838 		cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1839 }
1840 
intel_pmu_pebs_via_pt_enable(struct perf_event * event)1841 static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1842 {
1843 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1844 	struct hw_perf_event *hwc = &event->hw;
1845 	struct debug_store *ds = cpuc->ds;
1846 	u64 value = ds->pebs_event_reset[hwc->idx];
1847 	u32 base = MSR_RELOAD_PMC0;
1848 	unsigned int idx = hwc->idx;
1849 
1850 	if (!is_pebs_pt(event))
1851 		return;
1852 
1853 	if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1854 		cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1855 
1856 	cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1857 
1858 	if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
1859 		base = MSR_RELOAD_FIXED_CTR0;
1860 		idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1861 		if (x86_pmu.intel_cap.pebs_format < 5)
1862 			value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx];
1863 		else
1864 			value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
1865 	}
1866 	wrmsrq(base + idx, value);
1867 }
1868 
intel_pmu_drain_large_pebs(struct cpu_hw_events * cpuc)1869 static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
1870 {
1871 	if (cpuc->n_pebs == cpuc->n_large_pebs &&
1872 	    cpuc->n_pebs != cpuc->n_pebs_via_pt)
1873 		intel_pmu_drain_pebs_buffer();
1874 }
1875 
__intel_pmu_pebs_enable(struct perf_event * event)1876 static void __intel_pmu_pebs_enable(struct perf_event *event)
1877 {
1878 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1879 	struct hw_perf_event *hwc = &event->hw;
1880 
1881 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1882 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
1883 }
1884 
intel_pmu_pebs_enable(struct perf_event * event)1885 void intel_pmu_pebs_enable(struct perf_event *event)
1886 {
1887 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1888 	u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
1889 	struct hw_perf_event *hwc = &event->hw;
1890 	struct debug_store *ds = cpuc->ds;
1891 	unsigned int idx = hwc->idx;
1892 
1893 	__intel_pmu_pebs_enable(event);
1894 
1895 	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1896 		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1897 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1898 		cpuc->pebs_enabled |= 1ULL << 63;
1899 
1900 	if (x86_pmu.intel_cap.pebs_baseline) {
1901 		hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1902 		if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1903 			/*
1904 			 * drain_pebs() assumes uniform record size;
1905 			 * hence we need to drain when changing said
1906 			 * size.
1907 			 */
1908 			intel_pmu_drain_pebs_buffer();
1909 			adaptive_pebs_record_size_update();
1910 			wrmsrq(MSR_PEBS_DATA_CFG, pebs_data_cfg);
1911 			cpuc->active_pebs_data_cfg = pebs_data_cfg;
1912 		}
1913 	}
1914 	if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
1915 		cpuc->pebs_data_cfg = pebs_data_cfg;
1916 		pebs_update_threshold(cpuc);
1917 	}
1918 
1919 	if (idx >= INTEL_PMC_IDX_FIXED) {
1920 		if (x86_pmu.intel_cap.pebs_format < 5)
1921 			idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED);
1922 		else
1923 			idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
1924 	}
1925 
1926 	/*
1927 	 * Use auto-reload if possible to save a MSR write in the PMI.
1928 	 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1929 	 */
1930 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1931 		ds->pebs_event_reset[idx] =
1932 			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1933 	} else {
1934 		ds->pebs_event_reset[idx] = 0;
1935 	}
1936 
1937 	intel_pmu_pebs_via_pt_enable(event);
1938 }
1939 
intel_pmu_pebs_del(struct perf_event * event)1940 void intel_pmu_pebs_del(struct perf_event *event)
1941 {
1942 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1943 	struct hw_perf_event *hwc = &event->hw;
1944 	bool needed_cb = pebs_needs_sched_cb(cpuc);
1945 
1946 	cpuc->n_pebs--;
1947 	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1948 		cpuc->n_large_pebs--;
1949 	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1950 		cpuc->n_pebs_via_pt--;
1951 
1952 	pebs_update_state(needed_cb, cpuc, event, false);
1953 }
1954 
__intel_pmu_pebs_disable(struct perf_event * event)1955 static void __intel_pmu_pebs_disable(struct perf_event *event)
1956 {
1957 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1958 	struct hw_perf_event *hwc = &event->hw;
1959 
1960 	intel_pmu_drain_large_pebs(cpuc);
1961 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1962 	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1963 }
1964 
intel_pmu_pebs_disable(struct perf_event * event)1965 void intel_pmu_pebs_disable(struct perf_event *event)
1966 {
1967 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1968 	struct hw_perf_event *hwc = &event->hw;
1969 
1970 	__intel_pmu_pebs_disable(event);
1971 
1972 	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1973 	    (x86_pmu.version < 5))
1974 		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1975 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1976 		cpuc->pebs_enabled &= ~(1ULL << 63);
1977 
1978 	intel_pmu_pebs_via_pt_disable(event);
1979 
1980 	if (cpuc->enabled)
1981 		wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1982 }
1983 
intel_pmu_pebs_enable_all(void)1984 void intel_pmu_pebs_enable_all(void)
1985 {
1986 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1987 
1988 	if (cpuc->pebs_enabled)
1989 		wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1990 }
1991 
intel_pmu_pebs_disable_all(void)1992 void intel_pmu_pebs_disable_all(void)
1993 {
1994 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1995 
1996 	if (cpuc->pebs_enabled)
1997 		__intel_pmu_pebs_disable_all();
1998 }
1999 
intel_pmu_pebs_fixup_ip(struct pt_regs * regs)2000 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
2001 {
2002 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2003 	unsigned long from = cpuc->lbr_entries[0].from;
2004 	unsigned long old_to, to = cpuc->lbr_entries[0].to;
2005 	unsigned long ip = regs->ip;
2006 	int is_64bit = 0;
2007 	void *kaddr;
2008 	int size;
2009 
2010 	/*
2011 	 * We don't need to fixup if the PEBS assist is fault like
2012 	 */
2013 	if (!x86_pmu.intel_cap.pebs_trap)
2014 		return 1;
2015 
2016 	/*
2017 	 * No LBR entry, no basic block, no rewinding
2018 	 */
2019 	if (!cpuc->lbr_stack.nr || !from || !to)
2020 		return 0;
2021 
2022 	/*
2023 	 * Basic blocks should never cross user/kernel boundaries
2024 	 */
2025 	if (kernel_ip(ip) != kernel_ip(to))
2026 		return 0;
2027 
2028 	/*
2029 	 * unsigned math, either ip is before the start (impossible) or
2030 	 * the basic block is larger than 1 page (sanity)
2031 	 */
2032 	if ((ip - to) > PEBS_FIXUP_SIZE)
2033 		return 0;
2034 
2035 	/*
2036 	 * We sampled a branch insn, rewind using the LBR stack
2037 	 */
2038 	if (ip == to) {
2039 		set_linear_ip(regs, from);
2040 		return 1;
2041 	}
2042 
2043 	size = ip - to;
2044 	if (!kernel_ip(ip)) {
2045 		int bytes;
2046 		u8 *buf = this_cpu_read(insn_buffer);
2047 
2048 		/* 'size' must fit our buffer, see above */
2049 		bytes = copy_from_user_nmi(buf, (void __user *)to, size);
2050 		if (bytes != 0)
2051 			return 0;
2052 
2053 		kaddr = buf;
2054 	} else {
2055 		kaddr = (void *)to;
2056 	}
2057 
2058 	do {
2059 		struct insn insn;
2060 
2061 		old_to = to;
2062 
2063 #ifdef CONFIG_X86_64
2064 		is_64bit = kernel_ip(to) || any_64bit_mode(regs);
2065 #endif
2066 		insn_init(&insn, kaddr, size, is_64bit);
2067 
2068 		/*
2069 		 * Make sure there was not a problem decoding the instruction.
2070 		 * This is doubly important because we have an infinite loop if
2071 		 * insn.length=0.
2072 		 */
2073 		if (insn_get_length(&insn))
2074 			break;
2075 
2076 		to += insn.length;
2077 		kaddr += insn.length;
2078 		size -= insn.length;
2079 	} while (to < ip);
2080 
2081 	if (to == ip) {
2082 		set_linear_ip(regs, old_to);
2083 		return 1;
2084 	}
2085 
2086 	/*
2087 	 * Even though we decoded the basic block, the instruction stream
2088 	 * never matched the given IP, either the TO or the IP got corrupted.
2089 	 */
2090 	return 0;
2091 }
2092 
intel_get_tsx_weight(u64 tsx_tuning)2093 static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
2094 {
2095 	if (tsx_tuning) {
2096 		union hsw_tsx_tuning tsx = { .value = tsx_tuning };
2097 		return tsx.cycles_last_block;
2098 	}
2099 	return 0;
2100 }
2101 
intel_get_tsx_transaction(u64 tsx_tuning,u64 ax)2102 static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
2103 {
2104 	u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
2105 
2106 	/* For RTM XABORTs also log the abort code from AX */
2107 	if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
2108 		txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
2109 	return txn;
2110 }
2111 
get_pebs_status(void * n)2112 static inline u64 get_pebs_status(void *n)
2113 {
2114 	if (x86_pmu.intel_cap.pebs_format < 4)
2115 		return ((struct pebs_record_nhm *)n)->status;
2116 	return ((struct pebs_basic *)n)->applicable_counters;
2117 }
2118 
2119 #define PERF_X86_EVENT_PEBS_HSW_PREC \
2120 		(PERF_X86_EVENT_PEBS_ST_HSW | \
2121 		 PERF_X86_EVENT_PEBS_LD_HSW | \
2122 		 PERF_X86_EVENT_PEBS_NA_HSW)
2123 
get_data_src(struct perf_event * event,u64 aux)2124 static u64 get_data_src(struct perf_event *event, u64 aux)
2125 {
2126 	u64 val = PERF_MEM_NA;
2127 	int fl = event->hw.flags;
2128 	bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
2129 
2130 	if (fl & PERF_X86_EVENT_PEBS_LDLAT)
2131 		val = load_latency_data(event, aux);
2132 	else if (fl & PERF_X86_EVENT_PEBS_STLAT)
2133 		val = store_latency_data(event, aux);
2134 	else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
2135 		val = x86_pmu.pebs_latency_data(event, aux);
2136 	else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
2137 		val = precise_datala_hsw(event, aux);
2138 	else if (fst)
2139 		val = precise_store_data(aux);
2140 	return val;
2141 }
2142 
setup_pebs_time(struct perf_event * event,struct perf_sample_data * data,u64 tsc)2143 static void setup_pebs_time(struct perf_event *event,
2144 			    struct perf_sample_data *data,
2145 			    u64 tsc)
2146 {
2147 	/* Converting to a user-defined clock is not supported yet. */
2148 	if (event->attr.use_clockid != 0)
2149 		return;
2150 
2151 	/*
2152 	 * Doesn't support the conversion when the TSC is unstable.
2153 	 * The TSC unstable case is a corner case and very unlikely to
2154 	 * happen. If it happens, the TSC in a PEBS record will be
2155 	 * dropped and fall back to perf_event_clock().
2156 	 */
2157 	if (!using_native_sched_clock() || !sched_clock_stable())
2158 		return;
2159 
2160 	data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
2161 	data->sample_flags |= PERF_SAMPLE_TIME;
2162 }
2163 
2164 #define PERF_SAMPLE_ADDR_TYPE	(PERF_SAMPLE_ADDR |		\
2165 				 PERF_SAMPLE_PHYS_ADDR |	\
2166 				 PERF_SAMPLE_DATA_PAGE_SIZE)
2167 
setup_pebs_fixed_sample_data(struct perf_event * event,struct pt_regs * iregs,void * __pebs,struct perf_sample_data * data,struct pt_regs * regs)2168 static void setup_pebs_fixed_sample_data(struct perf_event *event,
2169 				   struct pt_regs *iregs, void *__pebs,
2170 				   struct perf_sample_data *data,
2171 				   struct pt_regs *regs)
2172 {
2173 	/*
2174 	 * We cast to the biggest pebs_record but are careful not to
2175 	 * unconditionally access the 'extra' entries.
2176 	 */
2177 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2178 	struct pebs_record_skl *pebs = __pebs;
2179 	u64 sample_type;
2180 	int fll;
2181 
2182 	if (pebs == NULL)
2183 		return;
2184 
2185 	sample_type = event->attr.sample_type;
2186 	fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
2187 
2188 	perf_sample_data_init(data, 0, event->hw.last_period);
2189 
2190 	/*
2191 	 * Use latency for weight (only avail with PEBS-LL)
2192 	 */
2193 	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
2194 		data->weight.full = pebs->lat;
2195 		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2196 	}
2197 
2198 	/*
2199 	 * data.data_src encodes the data source
2200 	 */
2201 	if (sample_type & PERF_SAMPLE_DATA_SRC) {
2202 		data->data_src.val = get_data_src(event, pebs->dse);
2203 		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
2204 	}
2205 
2206 	/*
2207 	 * We must however always use iregs for the unwinder to stay sane; the
2208 	 * record BP,SP,IP can point into thin air when the record is from a
2209 	 * previous PMI context or an (I)RET happened between the record and
2210 	 * PMI.
2211 	 */
2212 	perf_sample_save_callchain(data, event, iregs);
2213 
2214 	/*
2215 	 * We use the interrupt regs as a base because the PEBS record does not
2216 	 * contain a full regs set, specifically it seems to lack segment
2217 	 * descriptors, which get used by things like user_mode().
2218 	 *
2219 	 * In the simple case fix up only the IP for PERF_SAMPLE_IP.
2220 	 */
2221 	*regs = *iregs;
2222 
2223 	/*
2224 	 * Initialize regs_>flags from PEBS,
2225 	 * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
2226 	 * i.e., do not rely on it being zero:
2227 	 */
2228 	regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
2229 
2230 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
2231 		regs->ax = pebs->ax;
2232 		regs->bx = pebs->bx;
2233 		regs->cx = pebs->cx;
2234 		regs->dx = pebs->dx;
2235 		regs->si = pebs->si;
2236 		regs->di = pebs->di;
2237 
2238 		regs->bp = pebs->bp;
2239 		regs->sp = pebs->sp;
2240 
2241 #ifndef CONFIG_X86_32
2242 		regs->r8 = pebs->r8;
2243 		regs->r9 = pebs->r9;
2244 		regs->r10 = pebs->r10;
2245 		regs->r11 = pebs->r11;
2246 		regs->r12 = pebs->r12;
2247 		regs->r13 = pebs->r13;
2248 		regs->r14 = pebs->r14;
2249 		regs->r15 = pebs->r15;
2250 #endif
2251 	}
2252 
2253 	if (event->attr.precise_ip > 1) {
2254 		/*
2255 		 * Haswell and later processors have an 'eventing IP'
2256 		 * (real IP) which fixes the off-by-1 skid in hardware.
2257 		 * Use it when precise_ip >= 2 :
2258 		 */
2259 		if (x86_pmu.intel_cap.pebs_format >= 2) {
2260 			set_linear_ip(regs, pebs->real_ip);
2261 			regs->flags |= PERF_EFLAGS_EXACT;
2262 		} else {
2263 			/* Otherwise, use PEBS off-by-1 IP: */
2264 			set_linear_ip(regs, pebs->ip);
2265 
2266 			/*
2267 			 * With precise_ip >= 2, try to fix up the off-by-1 IP
2268 			 * using the LBR. If successful, the fixup function
2269 			 * corrects regs->ip and calls set_linear_ip() on regs:
2270 			 */
2271 			if (intel_pmu_pebs_fixup_ip(regs))
2272 				regs->flags |= PERF_EFLAGS_EXACT;
2273 		}
2274 	} else {
2275 		/*
2276 		 * When precise_ip == 1, return the PEBS off-by-1 IP,
2277 		 * no fixup attempted:
2278 		 */
2279 		set_linear_ip(regs, pebs->ip);
2280 	}
2281 
2282 
2283 	if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
2284 	    x86_pmu.intel_cap.pebs_format >= 1) {
2285 		data->addr = pebs->dla;
2286 		data->sample_flags |= PERF_SAMPLE_ADDR;
2287 	}
2288 
2289 	if (x86_pmu.intel_cap.pebs_format >= 2) {
2290 		/* Only set the TSX weight when no memory weight. */
2291 		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
2292 			data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
2293 			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2294 		}
2295 		if (sample_type & PERF_SAMPLE_TRANSACTION) {
2296 			data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
2297 							      pebs->ax);
2298 			data->sample_flags |= PERF_SAMPLE_TRANSACTION;
2299 		}
2300 	}
2301 
2302 	/*
2303 	 * v3 supplies an accurate time stamp, so we use that
2304 	 * for the time stamp.
2305 	 *
2306 	 * We can only do this for the default trace clock.
2307 	 */
2308 	if (x86_pmu.intel_cap.pebs_format >= 3)
2309 		setup_pebs_time(event, data, pebs->tsc);
2310 
2311 	perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
2312 }
2313 
adaptive_pebs_save_regs(struct pt_regs * regs,struct pebs_gprs * gprs)2314 static void adaptive_pebs_save_regs(struct pt_regs *regs,
2315 				    struct pebs_gprs *gprs)
2316 {
2317 	regs->ax = gprs->ax;
2318 	regs->bx = gprs->bx;
2319 	regs->cx = gprs->cx;
2320 	regs->dx = gprs->dx;
2321 	regs->si = gprs->si;
2322 	regs->di = gprs->di;
2323 	regs->bp = gprs->bp;
2324 	regs->sp = gprs->sp;
2325 #ifndef CONFIG_X86_32
2326 	regs->r8 = gprs->r8;
2327 	regs->r9 = gprs->r9;
2328 	regs->r10 = gprs->r10;
2329 	regs->r11 = gprs->r11;
2330 	regs->r12 = gprs->r12;
2331 	regs->r13 = gprs->r13;
2332 	regs->r14 = gprs->r14;
2333 	regs->r15 = gprs->r15;
2334 #endif
2335 }
2336 
intel_perf_event_update_pmc(struct perf_event * event,u64 pmc)2337 static void intel_perf_event_update_pmc(struct perf_event *event, u64 pmc)
2338 {
2339 	int shift = 64 - x86_pmu.cntval_bits;
2340 	struct hw_perf_event *hwc;
2341 	u64 delta, prev_pmc;
2342 
2343 	/*
2344 	 * A recorded counter may not have an assigned event in the
2345 	 * following cases. The value should be dropped.
2346 	 * - An event is deleted. There is still an active PEBS event.
2347 	 *   The PEBS record doesn't shrink on pmu::del().
2348 	 *   If the counter of the deleted event once occurred in a PEBS
2349 	 *   record, PEBS still records the counter until the counter is
2350 	 *   reassigned.
2351 	 * - An event is stopped for some reason, e.g., throttled.
2352 	 *   During this period, another event is added and takes the
2353 	 *   counter of the stopped event. The stopped event is assigned
2354 	 *   to another new and uninitialized counter, since the
2355 	 *   x86_pmu_start(RELOAD) is not invoked for a stopped event.
2356 	 *   The PEBS__DATA_CFG is updated regardless of the event state.
2357 	 *   The uninitialized counter can be recorded in a PEBS record.
2358 	 *   But the cpuc->events[uninitialized_counter] is always NULL,
2359 	 *   because the event is stopped. The uninitialized value is
2360 	 *   safely dropped.
2361 	 */
2362 	if (!event)
2363 		return;
2364 
2365 	hwc = &event->hw;
2366 	prev_pmc = local64_read(&hwc->prev_count);
2367 
2368 	/* Only update the count when the PMU is disabled */
2369 	WARN_ON(this_cpu_read(cpu_hw_events.enabled));
2370 	local64_set(&hwc->prev_count, pmc);
2371 
2372 	delta = (pmc << shift) - (prev_pmc << shift);
2373 	delta >>= shift;
2374 
2375 	local64_add(delta, &event->count);
2376 	local64_sub(delta, &hwc->period_left);
2377 }
2378 
__setup_pebs_counter_group(struct cpu_hw_events * cpuc,struct perf_event * event,struct pebs_cntr_header * cntr,void * next_record)2379 static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
2380 					      struct perf_event *event,
2381 					      struct pebs_cntr_header *cntr,
2382 					      void *next_record)
2383 {
2384 	int bit;
2385 
2386 	for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERIC) {
2387 		intel_perf_event_update_pmc(cpuc->events[bit], *(u64 *)next_record);
2388 		next_record += sizeof(u64);
2389 	}
2390 
2391 	for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED) {
2392 		/* The slots event will be handled with perf_metric later */
2393 		if ((cntr->metrics == INTEL_CNTR_METRICS) &&
2394 		    (bit + INTEL_PMC_IDX_FIXED == INTEL_PMC_IDX_FIXED_SLOTS)) {
2395 			next_record += sizeof(u64);
2396 			continue;
2397 		}
2398 		intel_perf_event_update_pmc(cpuc->events[bit + INTEL_PMC_IDX_FIXED],
2399 					    *(u64 *)next_record);
2400 		next_record += sizeof(u64);
2401 	}
2402 
2403 	/* HW will reload the value right after the overflow. */
2404 	if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2405 		local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period);
2406 
2407 	if (cntr->metrics == INTEL_CNTR_METRICS) {
2408 		static_call(intel_pmu_update_topdown_event)
2409 			   (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS],
2410 			    (u64 *)next_record);
2411 		next_record += 2 * sizeof(u64);
2412 	}
2413 }
2414 
2415 #define PEBS_LATENCY_MASK			0xffff
2416 
__setup_perf_sample_data(struct perf_event * event,struct pt_regs * iregs,struct perf_sample_data * data)2417 static inline void __setup_perf_sample_data(struct perf_event *event,
2418 					    struct pt_regs *iregs,
2419 					    struct perf_sample_data *data)
2420 {
2421 	perf_sample_data_init(data, 0, event->hw.last_period);
2422 
2423 	/*
2424 	 * We must however always use iregs for the unwinder to stay sane; the
2425 	 * record BP,SP,IP can point into thin air when the record is from a
2426 	 * previous PMI context or an (I)RET happened between the record and
2427 	 * PMI.
2428 	 */
2429 	perf_sample_save_callchain(data, event, iregs);
2430 }
2431 
__setup_pebs_basic_group(struct perf_event * event,struct pt_regs * regs,struct perf_sample_data * data,u64 sample_type,u64 ip,u64 tsc,u16 retire)2432 static inline void __setup_pebs_basic_group(struct perf_event *event,
2433 					    struct pt_regs *regs,
2434 					    struct perf_sample_data *data,
2435 					    u64 sample_type, u64 ip,
2436 					    u64 tsc, u16 retire)
2437 {
2438 	/* The ip in basic is EventingIP */
2439 	set_linear_ip(regs, ip);
2440 	regs->flags = PERF_EFLAGS_EXACT;
2441 	setup_pebs_time(event, data, tsc);
2442 
2443 	if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
2444 		data->weight.var3_w = retire;
2445 }
2446 
__setup_pebs_gpr_group(struct perf_event * event,struct pt_regs * regs,struct pebs_gprs * gprs,u64 sample_type)2447 static inline void __setup_pebs_gpr_group(struct perf_event *event,
2448 					  struct pt_regs *regs,
2449 					  struct pebs_gprs *gprs,
2450 					  u64 sample_type)
2451 {
2452 	if (event->attr.precise_ip < 2) {
2453 		set_linear_ip(regs, gprs->ip);
2454 		regs->flags &= ~PERF_EFLAGS_EXACT;
2455 	}
2456 
2457 	if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
2458 		adaptive_pebs_save_regs(regs, gprs);
2459 }
2460 
__setup_pebs_meminfo_group(struct perf_event * event,struct perf_sample_data * data,u64 sample_type,u64 latency,u16 instr_latency,u64 address,u64 aux,u64 tsx_tuning,u64 ax)2461 static inline void __setup_pebs_meminfo_group(struct perf_event *event,
2462 					      struct perf_sample_data *data,
2463 					      u64 sample_type, u64 latency,
2464 					      u16 instr_latency, u64 address,
2465 					      u64 aux, u64 tsx_tuning, u64 ax)
2466 {
2467 	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
2468 		u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
2469 
2470 		data->weight.var2_w = instr_latency;
2471 
2472 		/*
2473 		 * Although meminfo::latency is defined as a u64,
2474 		 * only the lower 32 bits include the valid data
2475 		 * in practice on Ice Lake and earlier platforms.
2476 		 */
2477 		if (sample_type & PERF_SAMPLE_WEIGHT)
2478 			data->weight.full = latency ?: tsx_latency;
2479 		else
2480 			data->weight.var1_dw = (u32)latency ?: tsx_latency;
2481 
2482 		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2483 	}
2484 
2485 	if (sample_type & PERF_SAMPLE_DATA_SRC) {
2486 		data->data_src.val = get_data_src(event, aux);
2487 		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
2488 	}
2489 
2490 	if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
2491 		data->addr = address;
2492 		data->sample_flags |= PERF_SAMPLE_ADDR;
2493 	}
2494 
2495 	if (sample_type & PERF_SAMPLE_TRANSACTION) {
2496 		data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
2497 		data->sample_flags |= PERF_SAMPLE_TRANSACTION;
2498 	}
2499 }
2500 
2501 /*
2502  * With adaptive PEBS the layout depends on what fields are configured.
2503  */
setup_pebs_adaptive_sample_data(struct perf_event * event,struct pt_regs * iregs,void * __pebs,struct perf_sample_data * data,struct pt_regs * regs)2504 static void setup_pebs_adaptive_sample_data(struct perf_event *event,
2505 					    struct pt_regs *iregs, void *__pebs,
2506 					    struct perf_sample_data *data,
2507 					    struct pt_regs *regs)
2508 {
2509 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2510 	u64 sample_type = event->attr.sample_type;
2511 	struct pebs_basic *basic = __pebs;
2512 	void *next_record = basic + 1;
2513 	struct pebs_meminfo *meminfo = NULL;
2514 	struct pebs_gprs *gprs = NULL;
2515 	struct x86_perf_regs *perf_regs;
2516 	u64 format_group;
2517 	u16 retire;
2518 
2519 	if (basic == NULL)
2520 		return;
2521 
2522 	perf_regs = container_of(regs, struct x86_perf_regs, regs);
2523 	perf_regs->xmm_regs = NULL;
2524 
2525 	format_group = basic->format_group;
2526 
2527 	__setup_perf_sample_data(event, iregs, data);
2528 
2529 	*regs = *iregs;
2530 
2531 	/* basic group */
2532 	retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
2533 			basic->retire_latency : 0;
2534 	__setup_pebs_basic_group(event, regs, data, sample_type,
2535 				 basic->ip, basic->tsc, retire);
2536 
2537 	/*
2538 	 * The record for MEMINFO is in front of GP
2539 	 * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
2540 	 * Save the pointer here but process later.
2541 	 */
2542 	if (format_group & PEBS_DATACFG_MEMINFO) {
2543 		meminfo = next_record;
2544 		next_record = meminfo + 1;
2545 	}
2546 
2547 	if (format_group & PEBS_DATACFG_GP) {
2548 		gprs = next_record;
2549 		next_record = gprs + 1;
2550 
2551 		__setup_pebs_gpr_group(event, regs, gprs, sample_type);
2552 	}
2553 
2554 	if (format_group & PEBS_DATACFG_MEMINFO) {
2555 		u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
2556 				meminfo->cache_latency : meminfo->mem_latency;
2557 		u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
2558 				meminfo->instr_latency : 0;
2559 		u64 ax = gprs ? gprs->ax : 0;
2560 
2561 		__setup_pebs_meminfo_group(event, data, sample_type, latency,
2562 					   instr_latency, meminfo->address,
2563 					   meminfo->aux, meminfo->tsx_tuning,
2564 					   ax);
2565 	}
2566 
2567 	if (format_group & PEBS_DATACFG_XMMS) {
2568 		struct pebs_xmm *xmm = next_record;
2569 
2570 		next_record = xmm + 1;
2571 		perf_regs->xmm_regs = xmm->xmm;
2572 	}
2573 
2574 	if (format_group & PEBS_DATACFG_LBRS) {
2575 		struct lbr_entry *lbr = next_record;
2576 		int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT)
2577 					& 0xff) + 1;
2578 		next_record = next_record + num_lbr * sizeof(struct lbr_entry);
2579 
2580 		if (has_branch_stack(event)) {
2581 			intel_pmu_store_pebs_lbrs(lbr);
2582 			intel_pmu_lbr_save_brstack(data, cpuc, event);
2583 		}
2584 	}
2585 
2586 	if (format_group & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) {
2587 		struct pebs_cntr_header *cntr = next_record;
2588 		unsigned int nr;
2589 
2590 		next_record += sizeof(struct pebs_cntr_header);
2591 		/*
2592 		 * The PEBS_DATA_CFG is a global register, which is the
2593 		 * superset configuration for all PEBS events.
2594 		 * For the PEBS record of non-sample-read group, ignore
2595 		 * the counter snapshot fields.
2596 		 */
2597 		if (is_pebs_counter_event_group(event)) {
2598 			__setup_pebs_counter_group(cpuc, event, cntr, next_record);
2599 			data->sample_flags |= PERF_SAMPLE_READ;
2600 		}
2601 
2602 		nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
2603 		if (cntr->metrics == INTEL_CNTR_METRICS)
2604 			nr += 2;
2605 		next_record += nr * sizeof(u64);
2606 	}
2607 
2608 	WARN_ONCE(next_record != __pebs + basic->format_size,
2609 			"PEBS record size %u, expected %llu, config %llx\n",
2610 			basic->format_size,
2611 			(u64)(next_record - __pebs),
2612 			format_group);
2613 }
2614 
arch_pebs_record_continued(struct arch_pebs_header * header)2615 static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
2616 {
2617 	/* Continue bit or null PEBS record indicates fragment follows. */
2618 	return header->cont || !(header->format & GENMASK_ULL(63, 16));
2619 }
2620 
setup_arch_pebs_sample_data(struct perf_event * event,struct pt_regs * iregs,void * __pebs,struct perf_sample_data * data,struct pt_regs * regs)2621 static void setup_arch_pebs_sample_data(struct perf_event *event,
2622 					struct pt_regs *iregs,
2623 					void *__pebs,
2624 					struct perf_sample_data *data,
2625 					struct pt_regs *regs)
2626 {
2627 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2628 	u64 sample_type = event->attr.sample_type;
2629 	struct arch_pebs_header *header = NULL;
2630 	struct arch_pebs_aux *meminfo = NULL;
2631 	struct arch_pebs_gprs *gprs = NULL;
2632 	struct x86_perf_regs *perf_regs;
2633 	void *next_record;
2634 	void *at = __pebs;
2635 
2636 	if (at == NULL)
2637 		return;
2638 
2639 	perf_regs = container_of(regs, struct x86_perf_regs, regs);
2640 	perf_regs->xmm_regs = NULL;
2641 
2642 	__setup_perf_sample_data(event, iregs, data);
2643 
2644 	*regs = *iregs;
2645 
2646 again:
2647 	header = at;
2648 	next_record = at + sizeof(struct arch_pebs_header);
2649 	if (header->basic) {
2650 		struct arch_pebs_basic *basic = next_record;
2651 		u16 retire = 0;
2652 
2653 		next_record = basic + 1;
2654 
2655 		if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
2656 			retire = basic->valid ? basic->retire : 0;
2657 		__setup_pebs_basic_group(event, regs, data, sample_type,
2658 				 basic->ip, basic->tsc, retire);
2659 	}
2660 
2661 	/*
2662 	 * The record for MEMINFO is in front of GP
2663 	 * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
2664 	 * Save the pointer here but process later.
2665 	 */
2666 	if (header->aux) {
2667 		meminfo = next_record;
2668 		next_record = meminfo + 1;
2669 	}
2670 
2671 	if (header->gpr) {
2672 		gprs = next_record;
2673 		next_record = gprs + 1;
2674 
2675 		__setup_pebs_gpr_group(event, regs,
2676 				       (struct pebs_gprs *)gprs,
2677 				       sample_type);
2678 	}
2679 
2680 	if (header->aux) {
2681 		u64 ax = gprs ? gprs->ax : 0;
2682 
2683 		__setup_pebs_meminfo_group(event, data, sample_type,
2684 					   meminfo->cache_latency,
2685 					   meminfo->instr_latency,
2686 					   meminfo->address, meminfo->aux,
2687 					   meminfo->tsx_tuning, ax);
2688 	}
2689 
2690 	if (header->xmm) {
2691 		struct pebs_xmm *xmm;
2692 
2693 		next_record += sizeof(struct arch_pebs_xer_header);
2694 
2695 		xmm = next_record;
2696 		perf_regs->xmm_regs = xmm->xmm;
2697 		next_record = xmm + 1;
2698 	}
2699 
2700 	if (header->lbr) {
2701 		struct arch_pebs_lbr_header *lbr_header = next_record;
2702 		struct lbr_entry *lbr;
2703 		int num_lbr;
2704 
2705 		next_record = lbr_header + 1;
2706 		lbr = next_record;
2707 
2708 		num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
2709 				lbr_header->depth :
2710 				header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
2711 		next_record += num_lbr * sizeof(struct lbr_entry);
2712 
2713 		if (has_branch_stack(event)) {
2714 			intel_pmu_store_pebs_lbrs(lbr);
2715 			intel_pmu_lbr_save_brstack(data, cpuc, event);
2716 		}
2717 	}
2718 
2719 	if (header->cntr) {
2720 		struct arch_pebs_cntr_header *cntr = next_record;
2721 		unsigned int nr;
2722 
2723 		next_record += sizeof(struct arch_pebs_cntr_header);
2724 
2725 		if (is_pebs_counter_event_group(event)) {
2726 			__setup_pebs_counter_group(cpuc, event,
2727 				(struct pebs_cntr_header *)cntr, next_record);
2728 			data->sample_flags |= PERF_SAMPLE_READ;
2729 		}
2730 
2731 		nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
2732 		if (cntr->metrics == INTEL_CNTR_METRICS)
2733 			nr += 2;
2734 		next_record += nr * sizeof(u64);
2735 	}
2736 
2737 	/* Parse followed fragments if there are. */
2738 	if (arch_pebs_record_continued(header)) {
2739 		at = at + header->size;
2740 		goto again;
2741 	}
2742 }
2743 
2744 static inline void *
get_next_pebs_record_by_bit(void * base,void * top,int bit)2745 get_next_pebs_record_by_bit(void *base, void *top, int bit)
2746 {
2747 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2748 	void *at;
2749 	u64 pebs_status;
2750 
2751 	/*
2752 	 * fmt0 does not have a status bitfield (does not use
2753 	 * perf_record_nhm format)
2754 	 */
2755 	if (x86_pmu.intel_cap.pebs_format < 1)
2756 		return base;
2757 
2758 	if (base == NULL)
2759 		return NULL;
2760 
2761 	for (at = base; at < top; at += cpuc->pebs_record_size) {
2762 		unsigned long status = get_pebs_status(at);
2763 
2764 		if (test_bit(bit, (unsigned long *)&status)) {
2765 			/* PEBS v3 has accurate status bits */
2766 			if (x86_pmu.intel_cap.pebs_format >= 3)
2767 				return at;
2768 
2769 			if (status == (1 << bit))
2770 				return at;
2771 
2772 			/* clear non-PEBS bit and re-check */
2773 			pebs_status = status & cpuc->pebs_enabled;
2774 			pebs_status &= PEBS_COUNTER_MASK;
2775 			if (pebs_status == (1 << bit))
2776 				return at;
2777 		}
2778 	}
2779 	return NULL;
2780 }
2781 
2782 /*
2783  * Special variant of intel_pmu_save_and_restart() for auto-reload.
2784  */
2785 static int
intel_pmu_save_and_restart_reload(struct perf_event * event,int count)2786 intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
2787 {
2788 	struct hw_perf_event *hwc = &event->hw;
2789 	int shift = 64 - x86_pmu.cntval_bits;
2790 	u64 period = hwc->sample_period;
2791 	u64 prev_raw_count, new_raw_count;
2792 	s64 new, old;
2793 
2794 	WARN_ON(!period);
2795 
2796 	/*
2797 	 * drain_pebs() only happens when the PMU is disabled.
2798 	 */
2799 	WARN_ON(this_cpu_read(cpu_hw_events.enabled));
2800 
2801 	prev_raw_count = local64_read(&hwc->prev_count);
2802 	new_raw_count = rdpmc(hwc->event_base_rdpmc);
2803 	local64_set(&hwc->prev_count, new_raw_count);
2804 
2805 	/*
2806 	 * Since the counter increments a negative counter value and
2807 	 * overflows on the sign switch, giving the interval:
2808 	 *
2809 	 *   [-period, 0]
2810 	 *
2811 	 * the difference between two consecutive reads is:
2812 	 *
2813 	 *   A) value2 - value1;
2814 	 *      when no overflows have happened in between,
2815 	 *
2816 	 *   B) (0 - value1) + (value2 - (-period));
2817 	 *      when one overflow happened in between,
2818 	 *
2819 	 *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
2820 	 *      when @n overflows happened in between.
2821 	 *
2822 	 * Here A) is the obvious difference, B) is the extension to the
2823 	 * discrete interval, where the first term is to the top of the
2824 	 * interval and the second term is from the bottom of the next
2825 	 * interval and C) the extension to multiple intervals, where the
2826 	 * middle term is the whole intervals covered.
2827 	 *
2828 	 * An equivalent of C, by reduction, is:
2829 	 *
2830 	 *   value2 - value1 + n * period
2831 	 */
2832 	new = ((s64)(new_raw_count << shift) >> shift);
2833 	old = ((s64)(prev_raw_count << shift) >> shift);
2834 	local64_add(new - old + count * period, &event->count);
2835 
2836 	local64_set(&hwc->period_left, -new);
2837 
2838 	perf_event_update_userpage(event);
2839 
2840 	return 0;
2841 }
2842 
2843 typedef void (*setup_fn)(struct perf_event *, struct pt_regs *, void *,
2844 			 struct perf_sample_data *, struct pt_regs *);
2845 
2846 static struct pt_regs dummy_iregs;
2847 
2848 static __always_inline void
__intel_pmu_pebs_event(struct perf_event * event,struct pt_regs * iregs,struct pt_regs * regs,struct perf_sample_data * data,void * at,setup_fn setup_sample)2849 __intel_pmu_pebs_event(struct perf_event *event,
2850 		       struct pt_regs *iregs,
2851 		       struct pt_regs *regs,
2852 		       struct perf_sample_data *data,
2853 		       void *at,
2854 		       setup_fn setup_sample)
2855 {
2856 	setup_sample(event, iregs, at, data, regs);
2857 	perf_event_output(event, data, regs);
2858 }
2859 
2860 static __always_inline void
__intel_pmu_pebs_last_event(struct perf_event * event,struct pt_regs * iregs,struct pt_regs * regs,struct perf_sample_data * data,void * at,int count,setup_fn setup_sample)2861 __intel_pmu_pebs_last_event(struct perf_event *event,
2862 			    struct pt_regs *iregs,
2863 			    struct pt_regs *regs,
2864 			    struct perf_sample_data *data,
2865 			    void *at,
2866 			    int count,
2867 			    setup_fn setup_sample)
2868 {
2869 	struct hw_perf_event *hwc = &event->hw;
2870 
2871 	setup_sample(event, iregs, at, data, regs);
2872 	if (iregs == &dummy_iregs) {
2873 		/*
2874 		 * The PEBS records may be drained in the non-overflow context,
2875 		 * e.g., large PEBS + context switch. Perf should treat the
2876 		 * last record the same as other PEBS records, and doesn't
2877 		 * invoke the generic overflow handler.
2878 		 */
2879 		perf_event_output(event, data, regs);
2880 	} else {
2881 		/*
2882 		 * All but the last records are processed.
2883 		 * The last one is left to be able to call the overflow handler.
2884 		 */
2885 		perf_event_overflow(event, data, regs);
2886 	}
2887 
2888 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
2889 		if ((is_pebs_counter_event_group(event))) {
2890 			/*
2891 			 * The value of each sample has been updated when setup
2892 			 * the corresponding sample data.
2893 			 */
2894 			perf_event_update_userpage(event);
2895 		} else {
2896 			/*
2897 			 * Now, auto-reload is only enabled in fixed period mode.
2898 			 * The reload value is always hwc->sample_period.
2899 			 * May need to change it, if auto-reload is enabled in
2900 			 * freq mode later.
2901 			 */
2902 			intel_pmu_save_and_restart_reload(event, count);
2903 		}
2904 	} else {
2905 		/*
2906 		 * For a non-precise event, it's possible the
2907 		 * counters-snapshotting records a positive value for the
2908 		 * overflowed event. Then the HW auto-reload mechanism
2909 		 * reset the counter to 0 immediately, because the
2910 		 * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD
2911 		 * is not set. The counter backwards may be observed in a
2912 		 * PMI handler.
2913 		 *
2914 		 * Since the event value has been updated when processing the
2915 		 * counters-snapshotting record, only needs to set the new
2916 		 * period for the counter.
2917 		 */
2918 		if (is_pebs_counter_event_group(event))
2919 			static_call(x86_pmu_set_period)(event);
2920 		else
2921 			intel_pmu_save_and_restart(event);
2922 	}
2923 }
2924 
2925 static __always_inline void
__intel_pmu_pebs_events(struct perf_event * event,struct pt_regs * iregs,struct perf_sample_data * data,void * base,void * top,int bit,int count,setup_fn setup_sample)2926 __intel_pmu_pebs_events(struct perf_event *event,
2927 			struct pt_regs *iregs,
2928 			struct perf_sample_data *data,
2929 			void *base, void *top,
2930 			int bit, int count,
2931 			setup_fn setup_sample)
2932 {
2933 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2934 	struct x86_perf_regs perf_regs;
2935 	struct pt_regs *regs = &perf_regs.regs;
2936 	void *at = get_next_pebs_record_by_bit(base, top, bit);
2937 	int cnt = count;
2938 
2939 	if (!iregs)
2940 		iregs = &dummy_iregs;
2941 
2942 	while (cnt > 1) {
2943 		__intel_pmu_pebs_event(event, iregs, regs, data, at, setup_sample);
2944 		at += cpuc->pebs_record_size;
2945 		at = get_next_pebs_record_by_bit(at, top, bit);
2946 		cnt--;
2947 	}
2948 
2949 	__intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample);
2950 }
2951 
intel_pmu_drain_pebs_core(struct pt_regs * iregs,struct perf_sample_data * data)2952 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
2953 {
2954 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2955 	struct debug_store *ds = cpuc->ds;
2956 	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
2957 	struct pebs_record_core *at, *top;
2958 	int n;
2959 
2960 	if (!x86_pmu.pebs_active)
2961 		return;
2962 
2963 	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
2964 	top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
2965 
2966 	/*
2967 	 * Whatever else happens, drain the thing
2968 	 */
2969 	ds->pebs_index = ds->pebs_buffer_base;
2970 
2971 	if (!test_bit(0, cpuc->active_mask))
2972 		return;
2973 
2974 	WARN_ON_ONCE(!event);
2975 
2976 	if (!event->attr.precise_ip)
2977 		return;
2978 
2979 	n = top - at;
2980 	if (n <= 0) {
2981 		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2982 			intel_pmu_save_and_restart_reload(event, 0);
2983 		return;
2984 	}
2985 
2986 	__intel_pmu_pebs_events(event, iregs, data, at, top, 0, n,
2987 				setup_pebs_fixed_sample_data);
2988 }
2989 
intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events * cpuc,u64 mask)2990 static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask)
2991 {
2992 	u64 pebs_enabled = cpuc->pebs_enabled & mask;
2993 	struct perf_event *event;
2994 	int bit;
2995 
2996 	/*
2997 	 * The drain_pebs() could be called twice in a short period
2998 	 * for auto-reload event in pmu::read(). There are no
2999 	 * overflows have happened in between.
3000 	 * It needs to call intel_pmu_save_and_restart_reload() to
3001 	 * update the event->count for this case.
3002 	 */
3003 	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
3004 		event = cpuc->events[bit];
3005 		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
3006 			intel_pmu_save_and_restart_reload(event, 0);
3007 	}
3008 }
3009 
intel_pmu_drain_pebs_nhm(struct pt_regs * iregs,struct perf_sample_data * data)3010 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
3011 {
3012 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3013 	struct debug_store *ds = cpuc->ds;
3014 	struct perf_event *event;
3015 	void *base, *at, *top;
3016 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3017 	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3018 	int max_pebs_events = intel_pmu_max_num_pebs(NULL);
3019 	int bit, i, size;
3020 	u64 mask;
3021 
3022 	if (!x86_pmu.pebs_active)
3023 		return;
3024 
3025 	base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
3026 	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
3027 
3028 	ds->pebs_index = ds->pebs_buffer_base;
3029 
3030 	mask = x86_pmu.pebs_events_mask;
3031 	size = max_pebs_events;
3032 	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
3033 		mask |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
3034 		size = INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL);
3035 	}
3036 
3037 	if (unlikely(base >= top)) {
3038 		intel_pmu_pebs_event_update_no_drain(cpuc, mask);
3039 		return;
3040 	}
3041 
3042 	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
3043 		struct pebs_record_nhm *p = at;
3044 		u64 pebs_status;
3045 
3046 		pebs_status = p->status & cpuc->pebs_enabled;
3047 		pebs_status &= mask;
3048 
3049 		/* PEBS v3 has more accurate status bits */
3050 		if (x86_pmu.intel_cap.pebs_format >= 3) {
3051 			for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
3052 				counts[bit]++;
3053 
3054 			continue;
3055 		}
3056 
3057 		/*
3058 		 * On some CPUs the PEBS status can be zero when PEBS is
3059 		 * racing with clearing of GLOBAL_STATUS.
3060 		 *
3061 		 * Normally we would drop that record, but in the
3062 		 * case when there is only a single active PEBS event
3063 		 * we can assume it's for that event.
3064 		 */
3065 		if (!pebs_status && cpuc->pebs_enabled &&
3066 			!(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
3067 			pebs_status = p->status = cpuc->pebs_enabled;
3068 
3069 		bit = find_first_bit((unsigned long *)&pebs_status,
3070 				     max_pebs_events);
3071 
3072 		if (!(x86_pmu.pebs_events_mask & (1 << bit)))
3073 			continue;
3074 
3075 		/*
3076 		 * The PEBS hardware does not deal well with the situation
3077 		 * when events happen near to each other and multiple bits
3078 		 * are set. But it should happen rarely.
3079 		 *
3080 		 * If these events include one PEBS and multiple non-PEBS
3081 		 * events, it doesn't impact PEBS record. The record will
3082 		 * be handled normally. (slow path)
3083 		 *
3084 		 * If these events include two or more PEBS events, the
3085 		 * records for the events can be collapsed into a single
3086 		 * one, and it's not possible to reconstruct all events
3087 		 * that caused the PEBS record. It's called collision.
3088 		 * If collision happened, the record will be dropped.
3089 		 */
3090 		if (pebs_status != (1ULL << bit)) {
3091 			for_each_set_bit(i, (unsigned long *)&pebs_status, size)
3092 				error[i]++;
3093 			continue;
3094 		}
3095 
3096 		counts[bit]++;
3097 	}
3098 
3099 	for_each_set_bit(bit, (unsigned long *)&mask, size) {
3100 		if ((counts[bit] == 0) && (error[bit] == 0))
3101 			continue;
3102 
3103 		event = cpuc->events[bit];
3104 		if (WARN_ON_ONCE(!event))
3105 			continue;
3106 
3107 		if (WARN_ON_ONCE(!event->attr.precise_ip))
3108 			continue;
3109 
3110 		/* log dropped samples number */
3111 		if (error[bit]) {
3112 			perf_log_lost_samples(event, error[bit]);
3113 
3114 			if (iregs)
3115 				perf_event_account_interrupt(event);
3116 		}
3117 
3118 		if (counts[bit]) {
3119 			__intel_pmu_pebs_events(event, iregs, data, base,
3120 						top, bit, counts[bit],
3121 						setup_pebs_fixed_sample_data);
3122 		}
3123 	}
3124 }
3125 
3126 static __always_inline void
__intel_pmu_handle_pebs_record(struct pt_regs * iregs,struct pt_regs * regs,struct perf_sample_data * data,void * at,u64 pebs_status,short * counts,void ** last,setup_fn setup_sample)3127 __intel_pmu_handle_pebs_record(struct pt_regs *iregs,
3128 			       struct pt_regs *regs,
3129 			       struct perf_sample_data *data,
3130 			       void *at, u64 pebs_status,
3131 			       short *counts, void **last,
3132 			       setup_fn setup_sample)
3133 {
3134 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3135 	struct perf_event *event;
3136 	int bit;
3137 
3138 	for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
3139 		event = cpuc->events[bit];
3140 
3141 		if (WARN_ON_ONCE(!event) ||
3142 		    WARN_ON_ONCE(!event->attr.precise_ip))
3143 			continue;
3144 
3145 		if (counts[bit]++) {
3146 			__intel_pmu_pebs_event(event, iregs, regs, data,
3147 					       last[bit], setup_sample);
3148 		}
3149 
3150 		last[bit] = at;
3151 	}
3152 }
3153 
3154 static __always_inline void
__intel_pmu_handle_last_pebs_record(struct pt_regs * iregs,struct pt_regs * regs,struct perf_sample_data * data,u64 mask,short * counts,void ** last,setup_fn setup_sample)3155 __intel_pmu_handle_last_pebs_record(struct pt_regs *iregs,
3156 				    struct pt_regs *regs,
3157 				    struct perf_sample_data *data,
3158 				    u64 mask, short *counts, void **last,
3159 				    setup_fn setup_sample)
3160 {
3161 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3162 	struct perf_event *event;
3163 	int bit;
3164 
3165 	for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
3166 		if (!counts[bit])
3167 			continue;
3168 
3169 		event = cpuc->events[bit];
3170 
3171 		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
3172 					    counts[bit], setup_sample);
3173 	}
3174 
3175 }
3176 
intel_pmu_drain_pebs_icl(struct pt_regs * iregs,struct perf_sample_data * data)3177 static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
3178 {
3179 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3180 	void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
3181 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3182 	struct debug_store *ds = cpuc->ds;
3183 	struct x86_perf_regs perf_regs;
3184 	struct pt_regs *regs = &perf_regs.regs;
3185 	struct pebs_basic *basic;
3186 	void *base, *at, *top;
3187 	u64 mask;
3188 
3189 	if (!x86_pmu.pebs_active)
3190 		return;
3191 
3192 	base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
3193 	top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
3194 
3195 	ds->pebs_index = ds->pebs_buffer_base;
3196 
3197 	mask = hybrid(cpuc->pmu, pebs_events_mask) |
3198 	       (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
3199 	mask &= cpuc->pebs_enabled;
3200 
3201 	if (unlikely(base >= top)) {
3202 		intel_pmu_pebs_event_update_no_drain(cpuc, mask);
3203 		return;
3204 	}
3205 
3206 	if (!iregs)
3207 		iregs = &dummy_iregs;
3208 
3209 	/* Process all but the last event for each counter. */
3210 	for (at = base; at < top; at += basic->format_size) {
3211 		u64 pebs_status;
3212 
3213 		basic = at;
3214 		if (basic->format_size != cpuc->pebs_record_size)
3215 			continue;
3216 
3217 		pebs_status = mask & basic->applicable_counters;
3218 		__intel_pmu_handle_pebs_record(iregs, regs, data, at,
3219 					       pebs_status, counts, last,
3220 					       setup_pebs_adaptive_sample_data);
3221 	}
3222 
3223 	__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, counts, last,
3224 					    setup_pebs_adaptive_sample_data);
3225 }
3226 
intel_pmu_drain_arch_pebs(struct pt_regs * iregs,struct perf_sample_data * data)3227 static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
3228 				      struct perf_sample_data *data)
3229 {
3230 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3231 	void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
3232 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3233 	union arch_pebs_index index;
3234 	struct x86_perf_regs perf_regs;
3235 	struct pt_regs *regs = &perf_regs.regs;
3236 	void *base, *at, *top;
3237 	u64 mask;
3238 
3239 	rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);
3240 
3241 	if (unlikely(!index.wr)) {
3242 		intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
3243 		return;
3244 	}
3245 
3246 	base = cpuc->pebs_vaddr;
3247 	top = cpuc->pebs_vaddr + (index.wr << ARCH_PEBS_INDEX_WR_SHIFT);
3248 
3249 	index.wr = 0;
3250 	index.full = 0;
3251 	index.en = 1;
3252 	if (cpuc->n_pebs == cpuc->n_large_pebs)
3253 		index.thresh = ARCH_PEBS_THRESH_MULTI;
3254 	else
3255 		index.thresh = ARCH_PEBS_THRESH_SINGLE;
3256 	wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
3257 
3258 	mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
3259 
3260 	if (!iregs)
3261 		iregs = &dummy_iregs;
3262 
3263 	/* Process all but the last event for each counter. */
3264 	for (at = base; at < top;) {
3265 		struct arch_pebs_header *header;
3266 		struct arch_pebs_basic *basic;
3267 		u64 pebs_status;
3268 
3269 		header = at;
3270 
3271 		if (WARN_ON_ONCE(!header->size))
3272 			break;
3273 
3274 		/* 1st fragment or single record must have basic group */
3275 		if (!header->basic) {
3276 			at += header->size;
3277 			continue;
3278 		}
3279 
3280 		basic = at + sizeof(struct arch_pebs_header);
3281 		pebs_status = mask & basic->applicable_counters;
3282 		__intel_pmu_handle_pebs_record(iregs, regs, data, at,
3283 					       pebs_status, counts, last,
3284 					       setup_arch_pebs_sample_data);
3285 
3286 		/* Skip non-last fragments */
3287 		while (arch_pebs_record_continued(header)) {
3288 			if (!header->size)
3289 				break;
3290 			at += header->size;
3291 			header = at;
3292 		}
3293 
3294 		/* Skip last fragment or the single record */
3295 		at += header->size;
3296 	}
3297 
3298 	__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
3299 					    counts, last,
3300 					    setup_arch_pebs_sample_data);
3301 }
3302 
intel_arch_pebs_init(void)3303 static void __init intel_arch_pebs_init(void)
3304 {
3305 	/*
3306 	 * Current hybrid platforms always both support arch-PEBS or not
3307 	 * on all kinds of cores. So directly set x86_pmu.arch_pebs flag
3308 	 * if boot cpu supports arch-PEBS.
3309 	 */
3310 	x86_pmu.arch_pebs = 1;
3311 	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
3312 	x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
3313 	x86_pmu.pebs_capable = ~0ULL;
3314 	x86_pmu.flags |= PMU_FL_PEBS_ALL;
3315 
3316 	x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
3317 	x86_pmu.pebs_disable = __intel_pmu_pebs_disable;
3318 }
3319 
3320 /*
3321  * PEBS probe and setup
3322  */
3323 
intel_ds_pebs_init(void)3324 static void __init intel_ds_pebs_init(void)
3325 {
3326 	/*
3327 	 * No support for 32bit formats
3328 	 */
3329 	if (!boot_cpu_has(X86_FEATURE_DTES64))
3330 		return;
3331 
3332 	x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS);
3333 	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
3334 	if (x86_pmu.version <= 4)
3335 		x86_pmu.pebs_no_isolation = 1;
3336 
3337 	if (x86_pmu.ds_pebs) {
3338 		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
3339 		char *pebs_qual = "";
3340 		int format = x86_pmu.intel_cap.pebs_format;
3341 
3342 		if (format < 4)
3343 			x86_pmu.intel_cap.pebs_baseline = 0;
3344 
3345 		x86_pmu.pebs_enable = intel_pmu_pebs_enable;
3346 		x86_pmu.pebs_disable = intel_pmu_pebs_disable;
3347 		x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all;
3348 		x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all;
3349 
3350 		switch (format) {
3351 		case 0:
3352 			pr_cont("PEBS fmt0%c, ", pebs_type);
3353 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
3354 			/*
3355 			 * Using >PAGE_SIZE buffers makes the WRMSR to
3356 			 * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
3357 			 * mysteriously hang on Core2.
3358 			 *
3359 			 * As a workaround, we don't do this.
3360 			 */
3361 			x86_pmu.pebs_buffer_size = PAGE_SIZE;
3362 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
3363 			break;
3364 
3365 		case 1:
3366 			pr_cont("PEBS fmt1%c, ", pebs_type);
3367 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
3368 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
3369 			break;
3370 
3371 		case 2:
3372 			pr_cont("PEBS fmt2%c, ", pebs_type);
3373 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
3374 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
3375 			break;
3376 
3377 		case 3:
3378 			pr_cont("PEBS fmt3%c, ", pebs_type);
3379 			x86_pmu.pebs_record_size =
3380 						sizeof(struct pebs_record_skl);
3381 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
3382 			x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
3383 			break;
3384 
3385 		case 6:
3386 			if (x86_pmu.intel_cap.pebs_baseline)
3387 				x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
3388 			fallthrough;
3389 		case 5:
3390 			x86_pmu.pebs_ept = 1;
3391 			fallthrough;
3392 		case 4:
3393 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
3394 			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
3395 			if (x86_pmu.intel_cap.pebs_baseline) {
3396 				x86_pmu.large_pebs_flags |=
3397 					PERF_SAMPLE_BRANCH_STACK |
3398 					PERF_SAMPLE_TIME;
3399 				x86_pmu.flags |= PMU_FL_PEBS_ALL;
3400 				x86_pmu.pebs_capable = ~0ULL;
3401 				pebs_qual = "-baseline";
3402 				x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
3403 			} else {
3404 				/* Only basic record supported */
3405 				x86_pmu.large_pebs_flags &=
3406 					~(PERF_SAMPLE_ADDR |
3407 					  PERF_SAMPLE_TIME |
3408 					  PERF_SAMPLE_DATA_SRC |
3409 					  PERF_SAMPLE_TRANSACTION |
3410 					  PERF_SAMPLE_REGS_USER |
3411 					  PERF_SAMPLE_REGS_INTR);
3412 			}
3413 			pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual);
3414 
3415 			/*
3416 			 * The PEBS-via-PT is not supported on hybrid platforms,
3417 			 * because not all CPUs of a hybrid machine support it.
3418 			 * The global x86_pmu.intel_cap, which only contains the
3419 			 * common capabilities, is used to check the availability
3420 			 * of the feature. The per-PMU pebs_output_pt_available
3421 			 * in a hybrid machine should be ignored.
3422 			 */
3423 			if (x86_pmu.intel_cap.pebs_output_pt_available) {
3424 				pr_cont("PEBS-via-PT, ");
3425 				x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
3426 			}
3427 
3428 			break;
3429 
3430 		default:
3431 			pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
3432 			x86_pmu.ds_pebs = 0;
3433 		}
3434 	}
3435 }
3436 
intel_pebs_init(void)3437 void __init intel_pebs_init(void)
3438 {
3439 	if (x86_pmu.intel_cap.pebs_format == 0xf)
3440 		intel_arch_pebs_init();
3441 	else
3442 		intel_ds_pebs_init();
3443 }
3444 
perf_restore_debug_store(void)3445 void perf_restore_debug_store(void)
3446 {
3447 	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
3448 
3449 	if (!x86_pmu.bts && !x86_pmu.ds_pebs)
3450 		return;
3451 
3452 	wrmsrq(MSR_IA32_DS_AREA, (unsigned long)ds);
3453 }
3454