xref: /linux/arch/x86/events/intel/ds.c (revision 7561361d7655828d50482cd9e80fa3bf73d9c92e)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/bitops.h>
3 #include <linux/types.h>
4 #include <linux/slab.h>
5 #include <linux/sched/clock.h>
6 
7 #include <asm/cpu_entry_area.h>
8 #include <asm/debugreg.h>
9 #include <asm/perf_event.h>
10 #include <asm/tlbflush.h>
11 #include <asm/insn.h>
12 #include <asm/io.h>
13 #include <asm/msr.h>
14 #include <asm/timer.h>
15 
16 #include "../perf_event.h"
17 
18 /* Waste a full page so it can be mapped into the cpu_entry_area */
19 DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
20 
21 /* The size of a BTS record in bytes: */
22 #define BTS_RECORD_SIZE		24
23 
24 #define PEBS_FIXUP_SIZE		PAGE_SIZE
25 
26 /*
27  * pebs_record_32 for p4 and core not supported
28 
29 struct pebs_record_32 {
30 	u32 flags, ip;
31 	u32 ax, bc, cx, dx;
32 	u32 si, di, bp, sp;
33 };
34 
35  */
36 
37 union omr_encoding {
38 	struct {
39 		u8 omr_source : 4;
40 		u8 omr_remote : 1;
41 		u8 omr_hitm : 1;
42 		u8 omr_snoop : 1;
43 		u8 omr_promoted : 1;
44 	};
45 	u8 omr_full;
46 };
47 
48 union intel_x86_pebs_dse {
49 	u64 val;
50 	struct {
51 		unsigned int ld_dse:4;
52 		unsigned int ld_stlb_miss:1;
53 		unsigned int ld_locked:1;
54 		unsigned int ld_data_blk:1;
55 		unsigned int ld_addr_blk:1;
56 		unsigned int ld_reserved:24;
57 	};
58 	struct {
59 		unsigned int st_l1d_hit:1;
60 		unsigned int st_reserved1:3;
61 		unsigned int st_stlb_miss:1;
62 		unsigned int st_locked:1;
63 		unsigned int st_reserved2:26;
64 	};
65 	struct {
66 		unsigned int st_lat_dse:4;
67 		unsigned int st_lat_stlb_miss:1;
68 		unsigned int st_lat_locked:1;
69 		unsigned int ld_reserved3:26;
70 	};
71 	struct {
72 		unsigned int mtl_dse:5;
73 		unsigned int mtl_locked:1;
74 		unsigned int mtl_stlb_miss:1;
75 		unsigned int mtl_fwd_blk:1;
76 		unsigned int ld_reserved4:24;
77 	};
78 	struct {
79 		unsigned int lnc_dse:8;
80 		unsigned int ld_reserved5:2;
81 		unsigned int lnc_stlb_miss:1;
82 		unsigned int lnc_locked:1;
83 		unsigned int lnc_data_blk:1;
84 		unsigned int lnc_addr_blk:1;
85 		unsigned int ld_reserved6:18;
86 	};
87 	struct {
88 		unsigned int pnc_dse: 8;
89 		unsigned int pnc_l2_miss:1;
90 		unsigned int pnc_stlb_clean_hit:1;
91 		unsigned int pnc_stlb_any_hit:1;
92 		unsigned int pnc_stlb_miss:1;
93 		unsigned int pnc_locked:1;
94 		unsigned int pnc_data_blk:1;
95 		unsigned int pnc_addr_blk:1;
96 		unsigned int pnc_fb_full:1;
97 		unsigned int ld_reserved8:16;
98 	};
99 	struct {
100 		unsigned int arw_dse:8;
101 		unsigned int arw_l2_miss:1;
102 		unsigned int arw_xq_promotion:1;
103 		unsigned int arw_reissue:1;
104 		unsigned int arw_stlb_miss:1;
105 		unsigned int arw_locked:1;
106 		unsigned int arw_data_blk:1;
107 		unsigned int arw_addr_blk:1;
108 		unsigned int arw_fb_full:1;
109 		unsigned int ld_reserved9:16;
110 	};
111 };
112 
113 
114 /*
115  * Map PEBS Load Latency Data Source encodings to generic
116  * memory data source information
117  */
118 #define P(a, b) PERF_MEM_S(a, b)
119 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
120 #define LEVEL(x) P(LVLNUM, x)
121 #define REM P(REMOTE, REMOTE)
122 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
123 
124 /* Version for Sandy Bridge and later */
125 static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
126 	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
127 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
128 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
129 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
130 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
131 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
132 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
133 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
134 	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
135 	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
136 	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
137 	OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
138 	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
139 	OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
140 	OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
141 	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
142 };
143 
144 /* Patch up minor differences in the bits */
145 void __init intel_pmu_pebs_data_source_nhm(void)
146 {
147 	pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
148 	pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
149 	pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
150 }
151 
152 static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
153 {
154 	u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
155 
156 	data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
157 	data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
158 	data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
159 	data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
160 	data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
161 }
162 
163 void __init intel_pmu_pebs_data_source_skl(bool pmem)
164 {
165 	__intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
166 }
167 
168 static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source)
169 {
170 	data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
171 	data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
172 	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
173 }
174 
175 void __init intel_pmu_pebs_data_source_grt(void)
176 {
177 	__intel_pmu_pebs_data_source_grt(pebs_data_source);
178 }
179 
180 void __init intel_pmu_pebs_data_source_adl(void)
181 {
182 	u64 *data_source;
183 
184 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
185 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
186 	__intel_pmu_pebs_data_source_skl(false, data_source);
187 
188 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
189 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
190 	__intel_pmu_pebs_data_source_grt(data_source);
191 }
192 
193 static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
194 {
195 	data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
196 	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
197 	data_source[0x0a] = OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, NONE);
198 	data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
199 	data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD);
200 	data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM);
201 }
202 
203 void __init intel_pmu_pebs_data_source_mtl(void)
204 {
205 	u64 *data_source;
206 
207 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
208 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
209 	__intel_pmu_pebs_data_source_skl(false, data_source);
210 
211 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
212 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
213 	__intel_pmu_pebs_data_source_cmt(data_source);
214 }
215 
216 void __init intel_pmu_pebs_data_source_arl_h(void)
217 {
218 	u64 *data_source;
219 
220 	intel_pmu_pebs_data_source_lnl();
221 
222 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
223 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
224 	__intel_pmu_pebs_data_source_cmt(data_source);
225 }
226 
227 void __init intel_pmu_pebs_data_source_cmt(void)
228 {
229 	__intel_pmu_pebs_data_source_cmt(pebs_data_source);
230 }
231 
232 /* Version for Lion Cove and later */
233 static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
234 	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),	/* 0x00: ukn L3 */
235 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x01: L1 hit */
236 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x02: L1 hit */
237 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),	/* 0x03: LFB/L1 Miss Handling Buffer hit */
238 	0,							/* 0x04: Reserved */
239 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),	/* 0x05: L2 Hit */
240 	OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE),			/* 0x06: L2 Miss Handling Buffer Hit */
241 	0,							/* 0x07: Reserved */
242 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),	/* 0x08: L3 Hit */
243 	0,							/* 0x09: Reserved */
244 	0,							/* 0x0a: Reserved */
245 	0,							/* 0x0b: Reserved */
246 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOPX, FWD),	/* 0x0c: L3 Hit Snoop Fwd */
247 	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),	/* 0x0d: L3 Hit Snoop HitM */
248 	0,							/* 0x0e: Reserved */
249 	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),	/* 0x0f: L3 Miss Snoop HitM */
250 	OP_LH | LEVEL(MSC) | P(SNOOP, NONE),			/* 0x10: Memory-side Cache Hit */
251 	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */
252 };
253 
254 void __init intel_pmu_pebs_data_source_lnl(void)
255 {
256 	u64 *data_source;
257 
258 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
259 	memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
260 
261 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
262 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
263 	__intel_pmu_pebs_data_source_cmt(data_source);
264 }
265 
266 /* Version for Panthercove and later */
267 
268 /* L2 hit */
269 #define PNC_PEBS_DATA_SOURCE_MAX	16
270 static u64 pnc_pebs_l2_hit_data_source[PNC_PEBS_DATA_SOURCE_MAX] = {
271 	P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA),	/* 0x00: non-cache access */
272 	OP_LH               | LEVEL(L0) | P(SNOOP, NONE),	/* 0x01: L0 hit */
273 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x02: L1 hit */
274 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),	/* 0x03: L1 Miss Handling Buffer hit */
275 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),	/* 0x04: L2 Hit Clean */
276 	0,							/* 0x05: Reserved */
277 	0,							/* 0x06: Reserved */
278 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HIT),	/* 0x07: L2 Hit Snoop HIT */
279 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HITM),	/* 0x08: L2 Hit Snoop Hit Modified */
280 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, MISS),	/* 0x09: Prefetch Promotion */
281 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, MISS),	/* 0x0a: Cross Core Prefetch Promotion */
282 	0,							/* 0x0b: Reserved */
283 	0,							/* 0x0c: Reserved */
284 	0,							/* 0x0d: Reserved */
285 	0,							/* 0x0e: Reserved */
286 	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE),	/* 0x0f: uncached */
287 };
288 
289 /* Version for Arctic Wolf and later */
290 
291 /* L2 hit */
292 #define ARW_PEBS_DATA_SOURCE_MAX	16
293 static u64 arw_pebs_l2_hit_data_source[ARW_PEBS_DATA_SOURCE_MAX] = {
294 	P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA),	/* 0x00: non-cache access */
295 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x01: L1 hit */
296 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),	/* 0x02: WCB Hit */
297 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),	/* 0x03: L2 Hit Clean */
298 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HIT),	/* 0x04: L2 Hit Snoop HIT */
299 	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HITM),	/* 0x05: L2 Hit Snoop Hit Modified */
300 	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE),	/* 0x06: uncached */
301 	0,							/* 0x07: Reserved */
302 	0,							/* 0x08: Reserved */
303 	0,							/* 0x09: Reserved */
304 	0,							/* 0x0a: Reserved */
305 	0,							/* 0x0b: Reserved */
306 	0,							/* 0x0c: Reserved */
307 	0,							/* 0x0d: Reserved */
308 	0,							/* 0x0e: Reserved */
309 	0,							/* 0x0f: Reserved */
310 };
311 
312 /* L2 miss */
313 #define OMR_DATA_SOURCE_MAX		16
314 static u64 omr_data_source[OMR_DATA_SOURCE_MAX] = {
315 	P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA),	/* 0x00: invalid */
316 	0,							/* 0x01: Reserved */
317 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_SHARE),	/* 0x02: local CA shared cache */
318 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_NON_SHARE),/* 0x03: local CA non-shared cache */
319 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_IO),	/* 0x04: other CA IO agent */
320 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_SHARE),	/* 0x05: other CA shared cache */
321 	OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_NON_SHARE),/* 0x06: other CA non-shared cache */
322 	OP_LH | LEVEL(RAM) | P(REGION, MMIO),			/* 0x07: MMIO */
323 	OP_LH | LEVEL(RAM) | P(REGION, MEM0),			/* 0x08: Memory region 0 */
324 	OP_LH | LEVEL(RAM) | P(REGION, MEM1),			/* 0x09: Memory region 1 */
325 	OP_LH | LEVEL(RAM) | P(REGION, MEM2),			/* 0x0a: Memory region 2 */
326 	OP_LH | LEVEL(RAM) | P(REGION, MEM3),			/* 0x0b: Memory region 3 */
327 	OP_LH | LEVEL(RAM) | P(REGION, MEM4),			/* 0x0c: Memory region 4 */
328 	OP_LH | LEVEL(RAM) | P(REGION, MEM5),			/* 0x0d: Memory region 5 */
329 	OP_LH | LEVEL(RAM) | P(REGION, MEM6),			/* 0x0e: Memory region 6 */
330 	OP_LH | LEVEL(RAM) | P(REGION, MEM7),			/* 0x0f: Memory region 7 */
331 };
332 
333 static u64 parse_omr_data_source(u8 dse)
334 {
335 	union omr_encoding omr;
336 	u64 val = 0;
337 
338 	omr.omr_full = dse;
339 	val = omr_data_source[omr.omr_source];
340 	if (omr.omr_source > 0x1 && omr.omr_source < 0x7)
341 		val |= omr.omr_remote ? P(LVL, REM_CCE1) : 0;
342 	else if (omr.omr_source > 0x7)
343 		val |= omr.omr_remote ? P(LVL, REM_RAM1) : P(LVL, LOC_RAM);
344 
345 	if (omr.omr_remote)
346 		val |= REM;
347 
348 	if (omr.omr_source == 0x2) {
349 		u8 snoop = omr.omr_snoop | (omr.omr_promoted << 1);
350 
351 		if (omr.omr_hitm)
352 			val |= P(SNOOP, HITM);
353 		else if (snoop == 0x0)
354 			val |= P(SNOOP, NA);
355 		else if (snoop == 0x1)
356 			val |= P(SNOOP, MISS);
357 		else if (snoop == 0x2)
358 			val |= P(SNOOP, HIT);
359 		else if (snoop == 0x3)
360 			val |= P(SNOOP, NONE);
361 	} else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) {
362 		val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT);
363 		val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0;
364 	} else {
365 		val |= P(SNOOP, NONE);
366 	}
367 
368 	return val;
369 }
370 
371 static u64 precise_store_data(u64 status)
372 {
373 	union intel_x86_pebs_dse dse;
374 	u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
375 
376 	dse.val = status;
377 
378 	/*
379 	 * bit 4: TLB access
380 	 * 1 = stored missed 2nd level TLB
381 	 *
382 	 * so it either hit the walker or the OS
383 	 * otherwise hit 2nd level TLB
384 	 */
385 	if (dse.st_stlb_miss)
386 		val |= P(TLB, MISS);
387 	else
388 		val |= P(TLB, HIT);
389 
390 	/*
391 	 * bit 0: hit L1 data cache
392 	 * if not set, then all we know is that
393 	 * it missed L1D
394 	 */
395 	if (dse.st_l1d_hit)
396 		val |= P(LVL, HIT);
397 	else
398 		val |= P(LVL, MISS);
399 
400 	/*
401 	 * bit 5: Locked prefix
402 	 */
403 	if (dse.st_locked)
404 		val |= P(LOCK, LOCKED);
405 
406 	return val;
407 }
408 
409 static u64 precise_datala_hsw(struct perf_event *event, u64 status)
410 {
411 	union perf_mem_data_src dse;
412 
413 	dse.val = PERF_MEM_NA;
414 
415 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
416 		dse.mem_op = PERF_MEM_OP_STORE;
417 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
418 		dse.mem_op = PERF_MEM_OP_LOAD;
419 
420 	/*
421 	 * L1 info only valid for following events:
422 	 *
423 	 * MEM_UOPS_RETIRED.STLB_MISS_STORES
424 	 * MEM_UOPS_RETIRED.LOCK_STORES
425 	 * MEM_UOPS_RETIRED.SPLIT_STORES
426 	 * MEM_UOPS_RETIRED.ALL_STORES
427 	 */
428 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
429 		if (status & 1)
430 			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
431 		else
432 			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
433 	}
434 	return dse.val;
435 }
436 
437 static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
438 {
439 	/*
440 	 * TLB access
441 	 * 0 = did not miss 2nd level TLB
442 	 * 1 = missed 2nd level TLB
443 	 */
444 	if (tlb)
445 		*val |= P(TLB, MISS) | P(TLB, L2);
446 	else
447 		*val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
448 
449 	/* locked prefix */
450 	if (lock)
451 		*val |= P(LOCK, LOCKED);
452 }
453 
454 /* Retrieve the latency data for e-core of ADL */
455 static u64 __grt_latency_data(struct perf_event *event, u64 status,
456 			       u8 dse, bool tlb, bool lock, bool blk)
457 {
458 	u64 val;
459 
460 	WARN_ON_ONCE(is_hybrid() &&
461 		     hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
462 
463 	dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
464 	val = hybrid_var(event->pmu, pebs_data_source)[dse];
465 
466 	pebs_set_tlb_lock(&val, tlb, lock);
467 
468 	if (blk)
469 		val |= P(BLK, DATA);
470 	else
471 		val |= P(BLK, NA);
472 
473 	return val;
474 }
475 
476 u64 grt_latency_data(struct perf_event *event, u64 status)
477 {
478 	union intel_x86_pebs_dse dse;
479 
480 	dse.val = status;
481 
482 	return __grt_latency_data(event, status, dse.ld_dse,
483 				  dse.ld_locked, dse.ld_stlb_miss,
484 				  dse.ld_data_blk);
485 }
486 
487 /* Retrieve the latency data for e-core of MTL */
488 u64 cmt_latency_data(struct perf_event *event, u64 status)
489 {
490 	union intel_x86_pebs_dse dse;
491 
492 	dse.val = status;
493 
494 	return __grt_latency_data(event, status, dse.mtl_dse,
495 				  dse.mtl_stlb_miss, dse.mtl_locked,
496 				  dse.mtl_fwd_blk);
497 }
498 
499 static u64 arw_latency_data(struct perf_event *event, u64 status)
500 {
501 	union intel_x86_pebs_dse dse;
502 	union perf_mem_data_src src;
503 	u64 val;
504 
505 	dse.val = status;
506 
507 	if (!dse.arw_l2_miss)
508 		val = arw_pebs_l2_hit_data_source[dse.arw_dse & 0xf];
509 	else
510 		val = parse_omr_data_source(dse.arw_dse);
511 
512 	if (!val)
513 		val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
514 
515 	if (dse.arw_stlb_miss)
516 		val |= P(TLB, MISS) | P(TLB, L2);
517 	else
518 		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
519 
520 	if (dse.arw_locked)
521 		val |= P(LOCK, LOCKED);
522 
523 	if (dse.arw_data_blk)
524 		val |= P(BLK, DATA);
525 	if (dse.arw_addr_blk)
526 		val |= P(BLK, ADDR);
527 	if (!dse.arw_data_blk && !dse.arw_addr_blk)
528 		val |= P(BLK, NA);
529 
530 	src.val = val;
531 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
532 		src.mem_op = P(OP, STORE);
533 
534 	return src.val;
535 }
536 
537 static u64 lnc_latency_data(struct perf_event *event, u64 status)
538 {
539 	union intel_x86_pebs_dse dse;
540 	union perf_mem_data_src src;
541 	u64 val;
542 
543 	dse.val = status;
544 
545 	/* LNC core latency data */
546 	val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK];
547 	if (!val)
548 		val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
549 
550 	if (dse.lnc_stlb_miss)
551 		val |= P(TLB, MISS) | P(TLB, L2);
552 	else
553 		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
554 
555 	if (dse.lnc_locked)
556 		val |= P(LOCK, LOCKED);
557 
558 	if (dse.lnc_data_blk)
559 		val |= P(BLK, DATA);
560 	if (dse.lnc_addr_blk)
561 		val |= P(BLK, ADDR);
562 	if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
563 		val |= P(BLK, NA);
564 
565 	src.val = val;
566 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
567 		src.mem_op = P(OP, STORE);
568 
569 	return src.val;
570 }
571 
572 u64 lnl_latency_data(struct perf_event *event, u64 status)
573 {
574 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
575 
576 	if (pmu->pmu_type == hybrid_small)
577 		return cmt_latency_data(event, status);
578 
579 	return lnc_latency_data(event, status);
580 }
581 
582 u64 arl_h_latency_data(struct perf_event *event, u64 status)
583 {
584 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
585 
586 	if (pmu->pmu_type == hybrid_tiny)
587 		return cmt_latency_data(event, status);
588 
589 	return lnl_latency_data(event, status);
590 }
591 
592 u64 pnc_latency_data(struct perf_event *event, u64 status)
593 {
594 	union intel_x86_pebs_dse dse;
595 	union perf_mem_data_src src;
596 	u64 val;
597 
598 	dse.val = status;
599 
600 	if (!dse.pnc_l2_miss)
601 		val = pnc_pebs_l2_hit_data_source[dse.pnc_dse & 0xf];
602 	else
603 		val = parse_omr_data_source(dse.pnc_dse);
604 
605 	if (!val)
606 		val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
607 
608 	if (dse.pnc_stlb_miss)
609 		val |= P(TLB, MISS) | P(TLB, L2);
610 	else
611 		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
612 
613 	if (dse.pnc_locked)
614 		val |= P(LOCK, LOCKED);
615 
616 	if (dse.pnc_data_blk)
617 		val |= P(BLK, DATA);
618 	if (dse.pnc_addr_blk)
619 		val |= P(BLK, ADDR);
620 	if (!dse.pnc_data_blk && !dse.pnc_addr_blk)
621 		val |= P(BLK, NA);
622 
623 	src.val = val;
624 	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
625 		src.mem_op = P(OP, STORE);
626 
627 	return src.val;
628 }
629 
630 u64 nvl_latency_data(struct perf_event *event, u64 status)
631 {
632 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
633 
634 	if (pmu->pmu_type == hybrid_small)
635 		return arw_latency_data(event, status);
636 
637 	return pnc_latency_data(event, status);
638 }
639 
640 static u64 load_latency_data(struct perf_event *event, u64 status)
641 {
642 	union intel_x86_pebs_dse dse;
643 	u64 val;
644 
645 	dse.val = status;
646 
647 	/*
648 	 * use the mapping table for bit 0-3
649 	 */
650 	val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
651 
652 	/*
653 	 * Nehalem models do not support TLB, Lock infos
654 	 */
655 	if (x86_pmu.pebs_no_tlb) {
656 		val |= P(TLB, NA) | P(LOCK, NA);
657 		return val;
658 	}
659 
660 	pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);
661 
662 	/*
663 	 * Ice Lake and earlier models do not support block infos.
664 	 */
665 	if (!x86_pmu.pebs_block) {
666 		val |= P(BLK, NA);
667 		return val;
668 	}
669 	/*
670 	 * bit 6: load was blocked since its data could not be forwarded
671 	 *        from a preceding store
672 	 */
673 	if (dse.ld_data_blk)
674 		val |= P(BLK, DATA);
675 
676 	/*
677 	 * bit 7: load was blocked due to potential address conflict with
678 	 *        a preceding store
679 	 */
680 	if (dse.ld_addr_blk)
681 		val |= P(BLK, ADDR);
682 
683 	if (!dse.ld_data_blk && !dse.ld_addr_blk)
684 		val |= P(BLK, NA);
685 
686 	return val;
687 }
688 
689 static u64 store_latency_data(struct perf_event *event, u64 status)
690 {
691 	union intel_x86_pebs_dse dse;
692 	union perf_mem_data_src src;
693 	u64 val;
694 
695 	dse.val = status;
696 
697 	/*
698 	 * use the mapping table for bit 0-3
699 	 */
700 	val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];
701 
702 	pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);
703 
704 	val |= P(BLK, NA);
705 
706 	/*
707 	 * the pebs_data_source table is only for loads
708 	 * so override the mem_op to say STORE instead
709 	 */
710 	src.val = val;
711 	src.mem_op = P(OP,STORE);
712 
713 	return src.val;
714 }
715 
716 struct pebs_record_core {
717 	u64 flags, ip;
718 	u64 ax, bx, cx, dx;
719 	u64 si, di, bp, sp;
720 	u64 r8,  r9,  r10, r11;
721 	u64 r12, r13, r14, r15;
722 };
723 
724 struct pebs_record_nhm {
725 	u64 flags, ip;
726 	u64 ax, bx, cx, dx;
727 	u64 si, di, bp, sp;
728 	u64 r8,  r9,  r10, r11;
729 	u64 r12, r13, r14, r15;
730 	u64 status, dla, dse, lat;
731 };
732 
733 /*
734  * Same as pebs_record_nhm, with two additional fields.
735  */
736 struct pebs_record_hsw {
737 	u64 flags, ip;
738 	u64 ax, bx, cx, dx;
739 	u64 si, di, bp, sp;
740 	u64 r8,  r9,  r10, r11;
741 	u64 r12, r13, r14, r15;
742 	u64 status, dla, dse, lat;
743 	u64 real_ip, tsx_tuning;
744 };
745 
746 union hsw_tsx_tuning {
747 	struct {
748 		u32 cycles_last_block     : 32,
749 		    hle_abort		  : 1,
750 		    rtm_abort		  : 1,
751 		    instruction_abort     : 1,
752 		    non_instruction_abort : 1,
753 		    retry		  : 1,
754 		    data_conflict	  : 1,
755 		    capacity_writes	  : 1,
756 		    capacity_reads	  : 1;
757 	};
758 	u64	    value;
759 };
760 
761 #define PEBS_HSW_TSX_FLAGS	0xff00000000ULL
762 
763 /* Same as HSW, plus TSC */
764 
765 struct pebs_record_skl {
766 	u64 flags, ip;
767 	u64 ax, bx, cx, dx;
768 	u64 si, di, bp, sp;
769 	u64 r8,  r9,  r10, r11;
770 	u64 r12, r13, r14, r15;
771 	u64 status, dla, dse, lat;
772 	u64 real_ip, tsx_tuning;
773 	u64 tsc;
774 };
775 
776 void init_debug_store_on_cpu(int cpu)
777 {
778 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
779 
780 	if (!ds)
781 		return;
782 
783 	wrmsrq_on_cpu(cpu, MSR_IA32_DS_AREA, (u64)(unsigned long)ds);
784 }
785 
786 void fini_debug_store_on_cpu(int cpu)
787 {
788 	if (!per_cpu(cpu_hw_events, cpu).ds)
789 		return;
790 
791 	wrmsrq_on_cpu(cpu, MSR_IA32_DS_AREA, 0);
792 }
793 
794 static DEFINE_PER_CPU(void *, insn_buffer);
795 
796 static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
797 {
798 	unsigned long start = (unsigned long)cea;
799 	phys_addr_t pa;
800 	size_t msz = 0;
801 
802 	pa = virt_to_phys(addr);
803 
804 	preempt_disable();
805 	for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
806 		cea_set_pte(cea, pa, prot);
807 
808 	/*
809 	 * This is a cross-CPU update of the cpu_entry_area, we must shoot down
810 	 * all TLB entries for it.
811 	 */
812 	flush_tlb_kernel_range(start, start + size);
813 	preempt_enable();
814 }
815 
816 static void ds_clear_cea(void *cea, size_t size)
817 {
818 	unsigned long start = (unsigned long)cea;
819 	size_t msz = 0;
820 
821 	preempt_disable();
822 	for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
823 		cea_set_pte(cea, 0, PAGE_NONE);
824 
825 	flush_tlb_kernel_range(start, start + size);
826 	preempt_enable();
827 }
828 
829 static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
830 {
831 	unsigned int order = get_order(size);
832 	int node = cpu_to_node(cpu);
833 	struct page *page;
834 
835 	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
836 	return page ? page_address(page) : NULL;
837 }
838 
839 static void dsfree_pages(const void *buffer, size_t size)
840 {
841 	if (buffer)
842 		free_pages((unsigned long)buffer, get_order(size));
843 }
844 
845 static int alloc_pebs_buffer(int cpu)
846 {
847 	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
848 	struct debug_store *ds = hwev->ds;
849 	size_t bsiz = x86_pmu.pebs_buffer_size;
850 	int max, node = cpu_to_node(cpu);
851 	void *buffer, *insn_buff, *cea;
852 
853 	if (!intel_pmu_has_pebs())
854 		return 0;
855 
856 	buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
857 	if (unlikely(!buffer))
858 		return -ENOMEM;
859 
860 	if (x86_pmu.arch_pebs) {
861 		hwev->pebs_vaddr = buffer;
862 		return 0;
863 	}
864 
865 	/*
866 	 * HSW+ already provides us the eventing ip; no need to allocate this
867 	 * buffer then.
868 	 */
869 	if (x86_pmu.intel_cap.pebs_format < 2) {
870 		insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
871 		if (!insn_buff) {
872 			dsfree_pages(buffer, bsiz);
873 			return -ENOMEM;
874 		}
875 		per_cpu(insn_buffer, cpu) = insn_buff;
876 	}
877 	hwev->pebs_vaddr = buffer;
878 	/* Update the cpu entry area mapping */
879 	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
880 	ds->pebs_buffer_base = (unsigned long) cea;
881 	ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
882 	ds->pebs_index = ds->pebs_buffer_base;
883 	max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
884 	ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
885 	return 0;
886 }
887 
888 static void release_pebs_buffer(int cpu)
889 {
890 	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
891 	void *cea;
892 
893 	if (!intel_pmu_has_pebs())
894 		return;
895 
896 	if (x86_pmu.ds_pebs) {
897 		kfree(per_cpu(insn_buffer, cpu));
898 		per_cpu(insn_buffer, cpu) = NULL;
899 
900 		/* Clear the fixmap */
901 		cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
902 		ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
903 	}
904 
905 	dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size);
906 	hwev->pebs_vaddr = NULL;
907 }
908 
909 static int alloc_bts_buffer(int cpu)
910 {
911 	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
912 	struct debug_store *ds = hwev->ds;
913 	void *buffer, *cea;
914 	int max;
915 
916 	if (!x86_pmu.bts)
917 		return 0;
918 
919 	buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
920 	if (unlikely(!buffer)) {
921 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
922 		return -ENOMEM;
923 	}
924 	hwev->ds_bts_vaddr = buffer;
925 	/* Update the fixmap */
926 	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
927 	ds->bts_buffer_base = (unsigned long) cea;
928 	ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
929 	ds->bts_index = ds->bts_buffer_base;
930 	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
931 	ds->bts_absolute_maximum = ds->bts_buffer_base +
932 					max * BTS_RECORD_SIZE;
933 	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
934 					(max / 16) * BTS_RECORD_SIZE;
935 	return 0;
936 }
937 
938 static void release_bts_buffer(int cpu)
939 {
940 	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
941 	void *cea;
942 
943 	if (!x86_pmu.bts)
944 		return;
945 
946 	/* Clear the fixmap */
947 	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
948 	ds_clear_cea(cea, BTS_BUFFER_SIZE);
949 	dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
950 	hwev->ds_bts_vaddr = NULL;
951 }
952 
953 static int alloc_ds_buffer(int cpu)
954 {
955 	struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
956 
957 	memset(ds, 0, sizeof(*ds));
958 	per_cpu(cpu_hw_events, cpu).ds = ds;
959 	return 0;
960 }
961 
962 static void release_ds_buffer(int cpu)
963 {
964 	per_cpu(cpu_hw_events, cpu).ds = NULL;
965 }
966 
967 void release_ds_buffers(void)
968 {
969 	int cpu;
970 
971 	if (!x86_pmu.bts && !x86_pmu.ds_pebs)
972 		return;
973 
974 	for_each_possible_cpu(cpu)
975 		release_ds_buffer(cpu);
976 
977 	for_each_possible_cpu(cpu) {
978 		/*
979 		 * Again, ignore errors from offline CPUs, they will no longer
980 		 * observe cpu_hw_events.ds and not program the DS_AREA when
981 		 * they come up.
982 		 */
983 		fini_debug_store_on_cpu(cpu);
984 	}
985 
986 	for_each_possible_cpu(cpu) {
987 		if (x86_pmu.ds_pebs)
988 			release_pebs_buffer(cpu);
989 		release_bts_buffer(cpu);
990 	}
991 }
992 
993 void reserve_ds_buffers(void)
994 {
995 	int bts_err = 0, pebs_err = 0;
996 	int cpu;
997 
998 	x86_pmu.bts_active = 0;
999 
1000 	if (x86_pmu.ds_pebs)
1001 		x86_pmu.pebs_active = 0;
1002 
1003 	if (!x86_pmu.bts && !x86_pmu.ds_pebs)
1004 		return;
1005 
1006 	if (!x86_pmu.bts)
1007 		bts_err = 1;
1008 
1009 	if (!x86_pmu.ds_pebs)
1010 		pebs_err = 1;
1011 
1012 	for_each_possible_cpu(cpu) {
1013 		if (alloc_ds_buffer(cpu)) {
1014 			bts_err = 1;
1015 			pebs_err = 1;
1016 		}
1017 
1018 		if (!bts_err && alloc_bts_buffer(cpu))
1019 			bts_err = 1;
1020 
1021 		if (x86_pmu.ds_pebs && !pebs_err &&
1022 		    alloc_pebs_buffer(cpu))
1023 			pebs_err = 1;
1024 
1025 		if (bts_err && pebs_err)
1026 			break;
1027 	}
1028 
1029 	if (bts_err) {
1030 		for_each_possible_cpu(cpu)
1031 			release_bts_buffer(cpu);
1032 	}
1033 
1034 	if (x86_pmu.ds_pebs && pebs_err) {
1035 		for_each_possible_cpu(cpu)
1036 			release_pebs_buffer(cpu);
1037 	}
1038 
1039 	if (bts_err && pebs_err) {
1040 		for_each_possible_cpu(cpu)
1041 			release_ds_buffer(cpu);
1042 	} else {
1043 		if (x86_pmu.bts && !bts_err)
1044 			x86_pmu.bts_active = 1;
1045 
1046 		if (x86_pmu.ds_pebs && !pebs_err)
1047 			x86_pmu.pebs_active = 1;
1048 
1049 		for_each_possible_cpu(cpu) {
1050 			/*
1051 			 * Ignores wrmsr_on_cpu() errors for offline CPUs they
1052 			 * will get this call through intel_pmu_cpu_starting().
1053 			 */
1054 			init_debug_store_on_cpu(cpu);
1055 		}
1056 	}
1057 }
1058 
1059 inline int alloc_arch_pebs_buf_on_cpu(int cpu)
1060 {
1061 	if (!x86_pmu.arch_pebs)
1062 		return 0;
1063 
1064 	return alloc_pebs_buffer(cpu);
1065 }
1066 
1067 inline void release_arch_pebs_buf_on_cpu(int cpu)
1068 {
1069 	if (!x86_pmu.arch_pebs)
1070 		return;
1071 
1072 	release_pebs_buffer(cpu);
1073 }
1074 
1075 void init_arch_pebs_on_cpu(int cpu)
1076 {
1077 	struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
1078 	u64 arch_pebs_base;
1079 
1080 	if (!x86_pmu.arch_pebs)
1081 		return;
1082 
1083 	if (!cpuc->pebs_vaddr) {
1084 		WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu);
1085 		x86_pmu.pebs_active = 0;
1086 		return;
1087 	}
1088 
1089 	/*
1090 	 * 4KB-aligned pointer of the output buffer
1091 	 * (__alloc_pages_node() return page aligned address)
1092 	 * Buffer Size = 4KB * 2^SIZE
1093 	 * contiguous physical buffer (__alloc_pages_node() with order)
1094 	 */
1095 	arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT;
1096 	wrmsrq_on_cpu(cpu, MSR_IA32_PEBS_BASE, arch_pebs_base);
1097 	x86_pmu.pebs_active = 1;
1098 }
1099 
1100 inline void fini_arch_pebs_on_cpu(int cpu)
1101 {
1102 	if (!x86_pmu.arch_pebs)
1103 		return;
1104 
1105 	wrmsrq_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0);
1106 }
1107 
1108 /*
1109  * BTS
1110  */
1111 
1112 struct event_constraint bts_constraint =
1113 	EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
1114 
1115 void intel_pmu_enable_bts(u64 config)
1116 {
1117 	unsigned long debugctlmsr;
1118 
1119 	debugctlmsr = get_debugctlmsr();
1120 
1121 	debugctlmsr |= DEBUGCTLMSR_TR;
1122 	debugctlmsr |= DEBUGCTLMSR_BTS;
1123 	if (config & ARCH_PERFMON_EVENTSEL_INT)
1124 		debugctlmsr |= DEBUGCTLMSR_BTINT;
1125 
1126 	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
1127 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
1128 
1129 	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
1130 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
1131 
1132 	update_debugctlmsr(debugctlmsr);
1133 }
1134 
1135 void intel_pmu_disable_bts(void)
1136 {
1137 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1138 	unsigned long debugctlmsr;
1139 
1140 	if (!cpuc->ds)
1141 		return;
1142 
1143 	debugctlmsr = get_debugctlmsr();
1144 
1145 	debugctlmsr &=
1146 		~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
1147 		  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
1148 
1149 	update_debugctlmsr(debugctlmsr);
1150 }
1151 
1152 int intel_pmu_drain_bts_buffer(void)
1153 {
1154 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1155 	struct debug_store *ds = cpuc->ds;
1156 	struct bts_record {
1157 		u64	from;
1158 		u64	to;
1159 		u64	flags;
1160 	};
1161 	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
1162 	struct bts_record *at, *base, *top;
1163 	struct perf_output_handle handle;
1164 	struct perf_event_header header;
1165 	struct perf_sample_data data;
1166 	unsigned long skip = 0;
1167 	struct pt_regs regs;
1168 
1169 	if (!event)
1170 		return 0;
1171 
1172 	if (!x86_pmu.bts_active)
1173 		return 0;
1174 
1175 	base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1176 	top  = (struct bts_record *)(unsigned long)ds->bts_index;
1177 
1178 	if (top <= base)
1179 		return 0;
1180 
1181 	memset(&regs, 0, sizeof(regs));
1182 
1183 	ds->bts_index = ds->bts_buffer_base;
1184 
1185 	perf_sample_data_init(&data, 0, event->hw.last_period);
1186 
1187 	/*
1188 	 * BTS leaks kernel addresses in branches across the cpl boundary,
1189 	 * such as traps or system calls, so unless the user is asking for
1190 	 * kernel tracing (and right now it's not possible), we'd need to
1191 	 * filter them out. But first we need to count how many of those we
1192 	 * have in the current batch. This is an extra O(n) pass, however,
1193 	 * it's much faster than the other one especially considering that
1194 	 * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
1195 	 * alloc_bts_buffer()).
1196 	 */
1197 	for (at = base; at < top; at++) {
1198 		/*
1199 		 * Note that right now *this* BTS code only works if
1200 		 * attr::exclude_kernel is set, but let's keep this extra
1201 		 * check here in case that changes.
1202 		 */
1203 		if (event->attr.exclude_kernel &&
1204 		    (kernel_ip(at->from) || kernel_ip(at->to)))
1205 			skip++;
1206 	}
1207 
1208 	/*
1209 	 * Prepare a generic sample, i.e. fill in the invariant fields.
1210 	 * We will overwrite the from and to address before we output
1211 	 * the sample.
1212 	 */
1213 	rcu_read_lock();
1214 	perf_prepare_sample(&data, event, &regs);
1215 	perf_prepare_header(&header, &data, event, &regs);
1216 
1217 	if (perf_output_begin(&handle, &data, event,
1218 			      header.size * (top - base - skip)))
1219 		goto unlock;
1220 
1221 	for (at = base; at < top; at++) {
1222 		/* Filter out any records that contain kernel addresses. */
1223 		if (event->attr.exclude_kernel &&
1224 		    (kernel_ip(at->from) || kernel_ip(at->to)))
1225 			continue;
1226 
1227 		data.ip		= at->from;
1228 		data.addr	= at->to;
1229 
1230 		perf_output_sample(&handle, &header, &data, event);
1231 	}
1232 
1233 	perf_output_end(&handle);
1234 
1235 	/* There's new data available. */
1236 	event->hw.interrupts++;
1237 	event->pending_kill = POLL_IN;
1238 unlock:
1239 	rcu_read_unlock();
1240 	return 1;
1241 }
1242 
1243 void intel_pmu_drain_pebs_buffer(void)
1244 {
1245 	struct perf_sample_data data;
1246 
1247 	static_call(x86_pmu_drain_pebs)(NULL, &data);
1248 }
1249 
1250 /*
1251  * PEBS
1252  */
1253 struct event_constraint intel_core2_pebs_event_constraints[] = {
1254 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
1255 	INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
1256 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
1257 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
1258 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
1259 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1260 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
1261 	EVENT_CONSTRAINT_END
1262 };
1263 
1264 struct event_constraint intel_atom_pebs_event_constraints[] = {
1265 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
1266 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
1267 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
1268 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1269 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
1270 	/* Allow all events as PEBS with no flags */
1271 	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
1272 	EVENT_CONSTRAINT_END
1273 };
1274 
1275 struct event_constraint intel_slm_pebs_event_constraints[] = {
1276 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1277 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
1278 	/* Allow all events as PEBS with no flags */
1279 	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
1280 	EVENT_CONSTRAINT_END
1281 };
1282 
1283 struct event_constraint intel_glm_pebs_event_constraints[] = {
1284 	/* Allow all events as PEBS with no flags */
1285 	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
1286 	EVENT_CONSTRAINT_END
1287 };
1288 
1289 struct event_constraint intel_grt_pebs_event_constraints[] = {
1290 	/* Allow all events as PEBS with no flags */
1291 	INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
1292 	INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0x3f),
1293 	EVENT_CONSTRAINT_END
1294 };
1295 
1296 struct event_constraint intel_cmt_pebs_event_constraints[] = {
1297 	/* Allow all events as PEBS with no flags */
1298 	INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
1299 	INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xff),
1300 	EVENT_CONSTRAINT_END
1301 };
1302 
1303 struct event_constraint intel_dkt_pebs_event_constraints[] = {
1304 	/* Allow all events as PEBS with no flags */
1305 	INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xff),
1306 	INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xff),
1307 	EVENT_CONSTRAINT_END
1308 };
1309 
1310 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
1311 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
1312 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
1313 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
1314 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
1315 	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
1316 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
1317 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
1318 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
1319 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
1320 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
1321 	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
1322 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1323 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1324 	EVENT_CONSTRAINT_END
1325 };
1326 
1327 struct event_constraint intel_westmere_pebs_event_constraints[] = {
1328 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
1329 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
1330 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
1331 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
1332 	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
1333 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
1334 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
1335 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
1336 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
1337 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
1338 	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
1339 	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
1340 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1341 	EVENT_CONSTRAINT_END
1342 };
1343 
1344 struct event_constraint intel_snb_pebs_event_constraints[] = {
1345 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1346 	INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
1347 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
1348 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1349 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1350         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
1351         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
1352         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
1353         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
1354 	/* Allow all events as PEBS with no flags */
1355 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1356 	EVENT_CONSTRAINT_END
1357 };
1358 
1359 struct event_constraint intel_ivb_pebs_event_constraints[] = {
1360         INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1361         INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
1362 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
1363 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1364 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1365 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1366 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1367 	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
1368 	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
1369 	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
1370 	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
1371 	/* Allow all events as PEBS with no flags */
1372 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1373         EVENT_CONSTRAINT_END
1374 };
1375 
1376 struct event_constraint intel_hsw_pebs_event_constraints[] = {
1377 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1378 	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
1379 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1380 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1381 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1382 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1383 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
1384 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
1385 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
1386 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
1387 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
1388 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
1389 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
1390 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
1391 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
1392 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
1393 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
1394 	/* Allow all events as PEBS with no flags */
1395 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1396 	EVENT_CONSTRAINT_END
1397 };
1398 
1399 struct event_constraint intel_bdw_pebs_event_constraints[] = {
1400 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
1401 	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
1402 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
1403 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
1404 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1405 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1406 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
1407 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
1408 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
1409 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
1410 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
1411 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
1412 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
1413 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
1414 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
1415 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
1416 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
1417 	/* Allow all events as PEBS with no flags */
1418 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1419 	EVENT_CONSTRAINT_END
1420 };
1421 
1422 
1423 struct event_constraint intel_skl_pebs_event_constraints[] = {
1424 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
1425 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1426 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1427 	/* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
1428 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1429 	INTEL_PLD_CONSTRAINT(0x1cd, 0xf),		      /* MEM_TRANS_RETIRED.* */
1430 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
1431 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
1432 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
1433 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
1434 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
1435 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
1436 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
1437 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
1438 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
1439 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
1440 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
1441 	/* Allow all events as PEBS with no flags */
1442 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1443 	EVENT_CONSTRAINT_END
1444 };
1445 
1446 struct event_constraint intel_icl_pebs_event_constraints[] = {
1447 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL),	/* old INST_RETIRED.PREC_DIST */
1448 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1449 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),	/* SLOTS */
1450 
1451 	INTEL_PLD_CONSTRAINT(0x1cd, 0xff),			/* MEM_TRANS_RETIRED.LOAD_LATENCY */
1452 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1453 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1454 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1455 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1456 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1457 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1458 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1459 
1460 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
1461 
1462 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),		/* MEM_INST_RETIRED.* */
1463 
1464 	/*
1465 	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1466 	 * need the full constraints from the main table.
1467 	 */
1468 
1469 	EVENT_CONSTRAINT_END
1470 };
1471 
1472 struct event_constraint intel_glc_pebs_event_constraints[] = {
1473 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1474 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1475 
1476 	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
1477 	INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
1478 	INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
1479 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1480 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1481 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1482 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1483 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1484 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1485 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1486 
1487 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1488 
1489 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1490 
1491 	/*
1492 	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1493 	 * need the full constraints from the main table.
1494 	 */
1495 
1496 	EVENT_CONSTRAINT_END
1497 };
1498 
1499 struct event_constraint intel_lnc_pebs_event_constraints[] = {
1500 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1501 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1502 
1503 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x012a, 0x1),		/* OCR.* events */
1504 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x012b, 0x1),		/* OCR.* events */
1505 
1506 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x04a4, 0x1),		/* TOPDOWN.BAD_SPEC_SLOTS */
1507 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x08a4, 0x1),		/* TOPDOWN.BR_MISPREDICT_SLOTS */
1508 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x10a4, 0x8),		/* TOPDOWN.MEMORY_BOUND_SLOTS */
1509 
1510 	INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc),
1511 	INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
1512 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1513 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1514 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1515 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1516 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1517 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1518 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1519 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x87d0, 0x3ff),		/* MEM_INST_RETIRED.ANY */
1520 
1521 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1522 
1523 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1524 
1525 	/*
1526 	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1527 	 * need the full constraints from the main table.
1528 	 */
1529 
1530 	EVENT_CONSTRAINT_END
1531 };
1532 
1533 struct event_constraint intel_pnc_pebs_event_constraints[] = {
1534 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1535 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1536 
1537 	INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0xfc),
1538 	INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
1539 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1540 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1541 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1542 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1543 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1544 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1545 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1546 
1547 	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1548 
1549 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1550 	INTEL_FLAGS_EVENT_CONSTRAINT(0xd6, 0xf),
1551 
1552 	/*
1553 	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1554 	 * need the full constraints from the main table.
1555 	 */
1556 
1557 	EVENT_CONSTRAINT_END
1558 };
1559 
1560 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
1561 {
1562 	struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
1563 	struct event_constraint *c;
1564 
1565 	if (!event->attr.precise_ip)
1566 		return NULL;
1567 
1568 	if (pebs_constraints) {
1569 		for_each_event_constraint(c, pebs_constraints) {
1570 			if (constraint_match(c, event->hw.config)) {
1571 				event->hw.flags |= c->flags;
1572 				return c;
1573 			}
1574 		}
1575 	}
1576 
1577 	/*
1578 	 * Extended PEBS support
1579 	 * Makes the PEBS code search the normal constraints.
1580 	 */
1581 	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1582 		return NULL;
1583 
1584 	return &emptyconstraint;
1585 }
1586 
1587 /*
1588  * We need the sched_task callback even for per-cpu events when we use
1589  * the large interrupt threshold, such that we can provide PID and TID
1590  * to PEBS samples.
1591  */
1592 static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
1593 {
1594 	if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
1595 		return false;
1596 
1597 	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
1598 }
1599 
1600 void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
1601 {
1602 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1603 
1604 	if (!sched_in && pebs_needs_sched_cb(cpuc))
1605 		intel_pmu_drain_pebs_buffer();
1606 }
1607 
1608 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
1609 {
1610 	struct debug_store *ds = cpuc->ds;
1611 	int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu);
1612 	u64 threshold;
1613 	int reserved;
1614 
1615 	if (cpuc->n_pebs_via_pt)
1616 		return;
1617 
1618 	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1619 		reserved = max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu);
1620 	else
1621 		reserved = max_pebs_events;
1622 
1623 	if (cpuc->n_pebs == cpuc->n_large_pebs) {
1624 		threshold = ds->pebs_absolute_maximum -
1625 			reserved * cpuc->pebs_record_size;
1626 	} else {
1627 		threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1628 	}
1629 
1630 	ds->pebs_interrupt_threshold = threshold;
1631 }
1632 
1633 #define PEBS_DATACFG_CNTRS(x)						\
1634 	((x >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DATACFG_CNTR_MASK)
1635 
1636 #define PEBS_DATACFG_CNTR_BIT(x)					\
1637 	(((1ULL << x) & PEBS_DATACFG_CNTR_MASK) << PEBS_DATACFG_CNTR_SHIFT)
1638 
1639 #define PEBS_DATACFG_FIX(x)						\
1640 	((x >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATACFG_FIX_MASK)
1641 
1642 #define PEBS_DATACFG_FIX_BIT(x)						\
1643 	(((1ULL << (x)) & PEBS_DATACFG_FIX_MASK)			\
1644 	 << PEBS_DATACFG_FIX_SHIFT)
1645 
1646 static void adaptive_pebs_record_size_update(void)
1647 {
1648 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1649 	u64 pebs_data_cfg = cpuc->pebs_data_cfg;
1650 	int sz = sizeof(struct pebs_basic);
1651 
1652 	if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
1653 		sz += sizeof(struct pebs_meminfo);
1654 	if (pebs_data_cfg & PEBS_DATACFG_GP)
1655 		sz += sizeof(struct pebs_gprs);
1656 	if (pebs_data_cfg & PEBS_DATACFG_XMMS)
1657 		sz += sizeof(struct pebs_xmm);
1658 	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1659 		sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1660 	if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) {
1661 		sz += sizeof(struct pebs_cntr_header);
1662 
1663 		/* Metrics base and Metrics Data */
1664 		if (pebs_data_cfg & PEBS_DATACFG_METRICS)
1665 			sz += 2 * sizeof(u64);
1666 
1667 		if (pebs_data_cfg & PEBS_DATACFG_CNTR) {
1668 			sz += (hweight64(PEBS_DATACFG_CNTRS(pebs_data_cfg)) +
1669 			       hweight64(PEBS_DATACFG_FIX(pebs_data_cfg))) *
1670 			      sizeof(u64);
1671 		}
1672 	}
1673 
1674 	cpuc->pebs_record_size = sz;
1675 }
1676 
1677 static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
1678 					int idx, u64 *pebs_data_cfg)
1679 {
1680 	if (is_metric_event(event)) {
1681 		*pebs_data_cfg |= PEBS_DATACFG_METRICS;
1682 		return;
1683 	}
1684 
1685 	*pebs_data_cfg |= PEBS_DATACFG_CNTR;
1686 
1687 	if (idx >= INTEL_PMC_IDX_FIXED)
1688 		*pebs_data_cfg |= PEBS_DATACFG_FIX_BIT(idx - INTEL_PMC_IDX_FIXED);
1689 	else
1690 		*pebs_data_cfg |= PEBS_DATACFG_CNTR_BIT(idx);
1691 }
1692 
1693 
1694 void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc)
1695 {
1696 	struct perf_event *event;
1697 	u64 pebs_data_cfg = 0;
1698 	int i;
1699 
1700 	for (i = 0; i < cpuc->n_events; i++) {
1701 		event = cpuc->event_list[i];
1702 		if (!is_pebs_counter_event_group(event))
1703 			continue;
1704 		__intel_pmu_pebs_update_cfg(event, cpuc->assign[i], &pebs_data_cfg);
1705 	}
1706 
1707 	if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
1708 		cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
1709 }
1710 
1711 #define PERF_PEBS_MEMINFO_TYPE	(PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
1712 				PERF_SAMPLE_PHYS_ADDR |			     \
1713 				PERF_SAMPLE_WEIGHT_TYPE |		     \
1714 				PERF_SAMPLE_TRANSACTION |		     \
1715 				PERF_SAMPLE_DATA_PAGE_SIZE)
1716 
1717 static u64 pebs_update_adaptive_cfg(struct perf_event *event)
1718 {
1719 	struct perf_event_attr *attr = &event->attr;
1720 	u64 sample_type = attr->sample_type;
1721 	u64 pebs_data_cfg = 0;
1722 	bool gprs, tsx_weight;
1723 
1724 	if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
1725 	    attr->precise_ip > 1)
1726 		return pebs_data_cfg;
1727 
1728 	if (sample_type & PERF_PEBS_MEMINFO_TYPE)
1729 		pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
1730 
1731 	/*
1732 	 * We need GPRs when:
1733 	 * + user requested them
1734 	 * + precise_ip < 2 for the non event IP
1735 	 * + For RTM TSX weight we need GPRs for the abort code.
1736 	 */
1737 	gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1738 		(attr->sample_regs_intr & PEBS_GP_REGS)) ||
1739 	       ((sample_type & PERF_SAMPLE_REGS_USER) &&
1740 		(attr->sample_regs_user & PEBS_GP_REGS));
1741 
1742 	tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1743 		     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
1744 		      x86_pmu.rtm_abort_event);
1745 
1746 	if (gprs || (attr->precise_ip < 2) || tsx_weight)
1747 		pebs_data_cfg |= PEBS_DATACFG_GP;
1748 
1749 	if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1750 	    (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1751 		pebs_data_cfg |= PEBS_DATACFG_XMMS;
1752 
1753 	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1754 		/*
1755 		 * For now always log all LBRs. Could configure this
1756 		 * later.
1757 		 */
1758 		pebs_data_cfg |= PEBS_DATACFG_LBRS |
1759 			((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1760 	}
1761 
1762 	return pebs_data_cfg;
1763 }
1764 
1765 static void
1766 pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1767 		  struct perf_event *event, bool add)
1768 {
1769 	struct pmu *pmu = event->pmu;
1770 
1771 	/*
1772 	 * Make sure we get updated with the first PEBS event.
1773 	 * During removal, ->pebs_data_cfg is still valid for
1774 	 * the last PEBS event. Don't clear it.
1775 	 */
1776 	if ((cpuc->n_pebs == 1) && add)
1777 		cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
1778 
1779 	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
1780 		if (!needed_cb)
1781 			perf_sched_cb_inc(pmu);
1782 		else
1783 			perf_sched_cb_dec(pmu);
1784 
1785 		cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
1786 	}
1787 
1788 	/*
1789 	 * The PEBS record doesn't shrink on pmu::del(). Doing so would require
1790 	 * iterating all remaining PEBS events to reconstruct the config.
1791 	 */
1792 	if (x86_pmu.intel_cap.pebs_baseline && add) {
1793 		u64 pebs_data_cfg;
1794 
1795 		pebs_data_cfg = pebs_update_adaptive_cfg(event);
1796 		/*
1797 		 * Be sure to update the thresholds when we change the record.
1798 		 */
1799 		if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
1800 			cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
1801 	}
1802 }
1803 
1804 u64 intel_get_arch_pebs_data_config(struct perf_event *event)
1805 {
1806 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1807 	u64 pebs_data_cfg = 0;
1808 	u64 cntr_mask;
1809 
1810 	if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
1811 		return 0;
1812 
1813 	pebs_data_cfg |= pebs_update_adaptive_cfg(event);
1814 
1815 	cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
1816 		    (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
1817 		    PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
1818 	pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
1819 
1820 	return pebs_data_cfg;
1821 }
1822 
1823 void intel_pmu_pebs_add(struct perf_event *event)
1824 {
1825 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1826 	struct hw_perf_event *hwc = &event->hw;
1827 	bool needed_cb = pebs_needs_sched_cb(cpuc);
1828 
1829 	cpuc->n_pebs++;
1830 	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1831 		cpuc->n_large_pebs++;
1832 	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1833 		cpuc->n_pebs_via_pt++;
1834 
1835 	pebs_update_state(needed_cb, cpuc, event, true);
1836 }
1837 
1838 static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1839 {
1840 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1841 
1842 	if (!is_pebs_pt(event))
1843 		return;
1844 
1845 	if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1846 		cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1847 }
1848 
1849 static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1850 {
1851 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1852 	struct hw_perf_event *hwc = &event->hw;
1853 	struct debug_store *ds = cpuc->ds;
1854 	u64 value = ds->pebs_event_reset[hwc->idx];
1855 	u32 base = MSR_RELOAD_PMC0;
1856 	unsigned int idx = hwc->idx;
1857 
1858 	if (!is_pebs_pt(event))
1859 		return;
1860 
1861 	if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1862 		cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1863 
1864 	cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1865 
1866 	if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
1867 		base = MSR_RELOAD_FIXED_CTR0;
1868 		idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1869 		if (x86_pmu.intel_cap.pebs_format < 5)
1870 			value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx];
1871 		else
1872 			value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
1873 	}
1874 	wrmsrq(base + idx, value);
1875 }
1876 
1877 static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
1878 {
1879 	if (cpuc->n_pebs == cpuc->n_large_pebs &&
1880 	    cpuc->n_pebs != cpuc->n_pebs_via_pt)
1881 		intel_pmu_drain_pebs_buffer();
1882 }
1883 
1884 static void __intel_pmu_pebs_enable(struct perf_event *event)
1885 {
1886 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1887 	struct hw_perf_event *hwc = &event->hw;
1888 
1889 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1890 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
1891 }
1892 
1893 void intel_pmu_pebs_enable(struct perf_event *event)
1894 {
1895 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1896 	u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
1897 	struct hw_perf_event *hwc = &event->hw;
1898 	struct debug_store *ds = cpuc->ds;
1899 	unsigned int idx = hwc->idx;
1900 
1901 	__intel_pmu_pebs_enable(event);
1902 
1903 	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1904 		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1905 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1906 		cpuc->pebs_enabled |= 1ULL << 63;
1907 
1908 	if (x86_pmu.intel_cap.pebs_baseline) {
1909 		hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1910 		if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1911 			/*
1912 			 * drain_pebs() assumes uniform record size;
1913 			 * hence we need to drain when changing said
1914 			 * size.
1915 			 */
1916 			intel_pmu_drain_pebs_buffer();
1917 			adaptive_pebs_record_size_update();
1918 			wrmsrq(MSR_PEBS_DATA_CFG, pebs_data_cfg);
1919 			cpuc->active_pebs_data_cfg = pebs_data_cfg;
1920 		}
1921 	}
1922 	if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
1923 		cpuc->pebs_data_cfg = pebs_data_cfg;
1924 		pebs_update_threshold(cpuc);
1925 	}
1926 
1927 	if (idx >= INTEL_PMC_IDX_FIXED) {
1928 		if (x86_pmu.intel_cap.pebs_format < 5)
1929 			idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED);
1930 		else
1931 			idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
1932 	}
1933 
1934 	/*
1935 	 * Use auto-reload if possible to save a MSR write in the PMI.
1936 	 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1937 	 */
1938 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1939 		ds->pebs_event_reset[idx] =
1940 			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1941 	} else {
1942 		ds->pebs_event_reset[idx] = 0;
1943 	}
1944 
1945 	intel_pmu_pebs_via_pt_enable(event);
1946 }
1947 
1948 void intel_pmu_pebs_del(struct perf_event *event)
1949 {
1950 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1951 	struct hw_perf_event *hwc = &event->hw;
1952 	bool needed_cb = pebs_needs_sched_cb(cpuc);
1953 
1954 	cpuc->n_pebs--;
1955 	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1956 		cpuc->n_large_pebs--;
1957 	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1958 		cpuc->n_pebs_via_pt--;
1959 
1960 	pebs_update_state(needed_cb, cpuc, event, false);
1961 }
1962 
1963 static void __intel_pmu_pebs_disable(struct perf_event *event)
1964 {
1965 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1966 	struct hw_perf_event *hwc = &event->hw;
1967 
1968 	intel_pmu_drain_large_pebs(cpuc);
1969 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1970 	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1971 }
1972 
1973 void intel_pmu_pebs_disable(struct perf_event *event)
1974 {
1975 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1976 	struct hw_perf_event *hwc = &event->hw;
1977 
1978 	__intel_pmu_pebs_disable(event);
1979 
1980 	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1981 	    (x86_pmu.version < 5))
1982 		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1983 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1984 		cpuc->pebs_enabled &= ~(1ULL << 63);
1985 
1986 	intel_pmu_pebs_via_pt_disable(event);
1987 
1988 	if (cpuc->enabled)
1989 		wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1990 }
1991 
1992 void intel_pmu_pebs_enable_all(void)
1993 {
1994 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1995 
1996 	if (cpuc->pebs_enabled)
1997 		wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1998 }
1999 
2000 void intel_pmu_pebs_disable_all(void)
2001 {
2002 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2003 
2004 	if (cpuc->pebs_enabled)
2005 		__intel_pmu_pebs_disable_all();
2006 }
2007 
2008 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
2009 {
2010 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2011 	unsigned long from = cpuc->lbr_entries[0].from;
2012 	unsigned long old_to, to = cpuc->lbr_entries[0].to;
2013 	unsigned long ip = regs->ip;
2014 	int is_64bit = 0;
2015 	void *kaddr;
2016 	int size;
2017 
2018 	/*
2019 	 * We don't need to fixup if the PEBS assist is fault like
2020 	 */
2021 	if (!x86_pmu.intel_cap.pebs_trap)
2022 		return 1;
2023 
2024 	/*
2025 	 * No LBR entry, no basic block, no rewinding
2026 	 */
2027 	if (!cpuc->lbr_stack.nr || !from || !to)
2028 		return 0;
2029 
2030 	/*
2031 	 * Basic blocks should never cross user/kernel boundaries
2032 	 */
2033 	if (kernel_ip(ip) != kernel_ip(to))
2034 		return 0;
2035 
2036 	/*
2037 	 * unsigned math, either ip is before the start (impossible) or
2038 	 * the basic block is larger than 1 page (sanity)
2039 	 */
2040 	if ((ip - to) > PEBS_FIXUP_SIZE)
2041 		return 0;
2042 
2043 	/*
2044 	 * We sampled a branch insn, rewind using the LBR stack
2045 	 */
2046 	if (ip == to) {
2047 		set_linear_ip(regs, from);
2048 		return 1;
2049 	}
2050 
2051 	size = ip - to;
2052 	if (!kernel_ip(ip)) {
2053 		int bytes;
2054 		u8 *buf = this_cpu_read(insn_buffer);
2055 
2056 		/* 'size' must fit our buffer, see above */
2057 		bytes = copy_from_user_nmi(buf, (void __user *)to, size);
2058 		if (bytes != 0)
2059 			return 0;
2060 
2061 		kaddr = buf;
2062 	} else {
2063 		kaddr = (void *)to;
2064 	}
2065 
2066 	do {
2067 		struct insn insn;
2068 
2069 		old_to = to;
2070 
2071 #ifdef CONFIG_X86_64
2072 		is_64bit = kernel_ip(to) || any_64bit_mode(regs);
2073 #endif
2074 		insn_init(&insn, kaddr, size, is_64bit);
2075 
2076 		/*
2077 		 * Make sure there was not a problem decoding the instruction.
2078 		 * This is doubly important because we have an infinite loop if
2079 		 * insn.length=0.
2080 		 */
2081 		if (insn_get_length(&insn))
2082 			break;
2083 
2084 		to += insn.length;
2085 		kaddr += insn.length;
2086 		size -= insn.length;
2087 	} while (to < ip);
2088 
2089 	if (to == ip) {
2090 		set_linear_ip(regs, old_to);
2091 		return 1;
2092 	}
2093 
2094 	/*
2095 	 * Even though we decoded the basic block, the instruction stream
2096 	 * never matched the given IP, either the TO or the IP got corrupted.
2097 	 */
2098 	return 0;
2099 }
2100 
2101 static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
2102 {
2103 	if (tsx_tuning) {
2104 		union hsw_tsx_tuning tsx = { .value = tsx_tuning };
2105 		return tsx.cycles_last_block;
2106 	}
2107 	return 0;
2108 }
2109 
2110 static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
2111 {
2112 	u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
2113 
2114 	/* For RTM XABORTs also log the abort code from AX */
2115 	if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
2116 		txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
2117 	return txn;
2118 }
2119 
2120 static inline u64 get_pebs_status(void *n)
2121 {
2122 	if (x86_pmu.intel_cap.pebs_format < 4)
2123 		return ((struct pebs_record_nhm *)n)->status;
2124 	return ((struct pebs_basic *)n)->applicable_counters;
2125 }
2126 
2127 #define PERF_X86_EVENT_PEBS_HSW_PREC \
2128 		(PERF_X86_EVENT_PEBS_ST_HSW | \
2129 		 PERF_X86_EVENT_PEBS_LD_HSW | \
2130 		 PERF_X86_EVENT_PEBS_NA_HSW)
2131 
2132 static u64 get_data_src(struct perf_event *event, u64 aux)
2133 {
2134 	u64 val = PERF_MEM_NA;
2135 	int fl = event->hw.flags;
2136 	bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
2137 
2138 	if (fl & PERF_X86_EVENT_PEBS_LDLAT)
2139 		val = load_latency_data(event, aux);
2140 	else if (fl & PERF_X86_EVENT_PEBS_STLAT)
2141 		val = store_latency_data(event, aux);
2142 	else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
2143 		val = x86_pmu.pebs_latency_data(event, aux);
2144 	else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
2145 		val = precise_datala_hsw(event, aux);
2146 	else if (fst)
2147 		val = precise_store_data(aux);
2148 	return val;
2149 }
2150 
2151 static void setup_pebs_time(struct perf_event *event,
2152 			    struct perf_sample_data *data,
2153 			    u64 tsc)
2154 {
2155 	/* Converting to a user-defined clock is not supported yet. */
2156 	if (event->attr.use_clockid != 0)
2157 		return;
2158 
2159 	/*
2160 	 * Doesn't support the conversion when the TSC is unstable.
2161 	 * The TSC unstable case is a corner case and very unlikely to
2162 	 * happen. If it happens, the TSC in a PEBS record will be
2163 	 * dropped and fall back to perf_event_clock().
2164 	 */
2165 	if (!using_native_sched_clock() || !sched_clock_stable())
2166 		return;
2167 
2168 	data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
2169 	data->sample_flags |= PERF_SAMPLE_TIME;
2170 }
2171 
2172 #define PERF_SAMPLE_ADDR_TYPE	(PERF_SAMPLE_ADDR |		\
2173 				 PERF_SAMPLE_PHYS_ADDR |	\
2174 				 PERF_SAMPLE_DATA_PAGE_SIZE)
2175 
2176 static void setup_pebs_fixed_sample_data(struct perf_event *event,
2177 				   struct pt_regs *iregs, void *__pebs,
2178 				   struct perf_sample_data *data,
2179 				   struct pt_regs *regs)
2180 {
2181 	/*
2182 	 * We cast to the biggest pebs_record but are careful not to
2183 	 * unconditionally access the 'extra' entries.
2184 	 */
2185 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2186 	struct pebs_record_skl *pebs = __pebs;
2187 	u64 sample_type;
2188 	int fll;
2189 
2190 	if (pebs == NULL)
2191 		return;
2192 
2193 	sample_type = event->attr.sample_type;
2194 	fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
2195 
2196 	perf_sample_data_init(data, 0, event->hw.last_period);
2197 
2198 	/*
2199 	 * Use latency for weight (only avail with PEBS-LL)
2200 	 */
2201 	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
2202 		data->weight.full = pebs->lat;
2203 		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2204 	}
2205 
2206 	/*
2207 	 * data.data_src encodes the data source
2208 	 */
2209 	if (sample_type & PERF_SAMPLE_DATA_SRC) {
2210 		data->data_src.val = get_data_src(event, pebs->dse);
2211 		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
2212 	}
2213 
2214 	/*
2215 	 * We must however always use iregs for the unwinder to stay sane; the
2216 	 * record BP,SP,IP can point into thin air when the record is from a
2217 	 * previous PMI context or an (I)RET happened between the record and
2218 	 * PMI.
2219 	 */
2220 	perf_sample_save_callchain(data, event, iregs);
2221 
2222 	/*
2223 	 * We use the interrupt regs as a base because the PEBS record does not
2224 	 * contain a full regs set, specifically it seems to lack segment
2225 	 * descriptors, which get used by things like user_mode().
2226 	 *
2227 	 * In the simple case fix up only the IP for PERF_SAMPLE_IP.
2228 	 */
2229 	*regs = *iregs;
2230 
2231 	/*
2232 	 * Initialize regs_>flags from PEBS,
2233 	 * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
2234 	 * i.e., do not rely on it being zero:
2235 	 */
2236 	regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
2237 
2238 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
2239 		regs->ax = pebs->ax;
2240 		regs->bx = pebs->bx;
2241 		regs->cx = pebs->cx;
2242 		regs->dx = pebs->dx;
2243 		regs->si = pebs->si;
2244 		regs->di = pebs->di;
2245 
2246 		regs->bp = pebs->bp;
2247 		regs->sp = pebs->sp;
2248 
2249 #ifndef CONFIG_X86_32
2250 		regs->r8 = pebs->r8;
2251 		regs->r9 = pebs->r9;
2252 		regs->r10 = pebs->r10;
2253 		regs->r11 = pebs->r11;
2254 		regs->r12 = pebs->r12;
2255 		regs->r13 = pebs->r13;
2256 		regs->r14 = pebs->r14;
2257 		regs->r15 = pebs->r15;
2258 #endif
2259 	}
2260 
2261 	if (event->attr.precise_ip > 1) {
2262 		/*
2263 		 * Haswell and later processors have an 'eventing IP'
2264 		 * (real IP) which fixes the off-by-1 skid in hardware.
2265 		 * Use it when precise_ip >= 2 :
2266 		 */
2267 		if (x86_pmu.intel_cap.pebs_format >= 2) {
2268 			set_linear_ip(regs, pebs->real_ip);
2269 			regs->flags |= PERF_EFLAGS_EXACT;
2270 		} else {
2271 			/* Otherwise, use PEBS off-by-1 IP: */
2272 			set_linear_ip(regs, pebs->ip);
2273 
2274 			/*
2275 			 * With precise_ip >= 2, try to fix up the off-by-1 IP
2276 			 * using the LBR. If successful, the fixup function
2277 			 * corrects regs->ip and calls set_linear_ip() on regs:
2278 			 */
2279 			if (intel_pmu_pebs_fixup_ip(regs))
2280 				regs->flags |= PERF_EFLAGS_EXACT;
2281 		}
2282 	} else {
2283 		/*
2284 		 * When precise_ip == 1, return the PEBS off-by-1 IP,
2285 		 * no fixup attempted:
2286 		 */
2287 		set_linear_ip(regs, pebs->ip);
2288 	}
2289 
2290 
2291 	if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
2292 	    x86_pmu.intel_cap.pebs_format >= 1) {
2293 		data->addr = pebs->dla;
2294 		data->sample_flags |= PERF_SAMPLE_ADDR;
2295 	}
2296 
2297 	if (x86_pmu.intel_cap.pebs_format >= 2) {
2298 		/* Only set the TSX weight when no memory weight. */
2299 		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
2300 			data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
2301 			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2302 		}
2303 		if (sample_type & PERF_SAMPLE_TRANSACTION) {
2304 			data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
2305 							      pebs->ax);
2306 			data->sample_flags |= PERF_SAMPLE_TRANSACTION;
2307 		}
2308 	}
2309 
2310 	/*
2311 	 * v3 supplies an accurate time stamp, so we use that
2312 	 * for the time stamp.
2313 	 *
2314 	 * We can only do this for the default trace clock.
2315 	 */
2316 	if (x86_pmu.intel_cap.pebs_format >= 3)
2317 		setup_pebs_time(event, data, pebs->tsc);
2318 
2319 	perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
2320 }
2321 
2322 static void adaptive_pebs_save_regs(struct pt_regs *regs,
2323 				    struct pebs_gprs *gprs)
2324 {
2325 	regs->ax = gprs->ax;
2326 	regs->bx = gprs->bx;
2327 	regs->cx = gprs->cx;
2328 	regs->dx = gprs->dx;
2329 	regs->si = gprs->si;
2330 	regs->di = gprs->di;
2331 	regs->bp = gprs->bp;
2332 	regs->sp = gprs->sp;
2333 #ifndef CONFIG_X86_32
2334 	regs->r8 = gprs->r8;
2335 	regs->r9 = gprs->r9;
2336 	regs->r10 = gprs->r10;
2337 	regs->r11 = gprs->r11;
2338 	regs->r12 = gprs->r12;
2339 	regs->r13 = gprs->r13;
2340 	regs->r14 = gprs->r14;
2341 	regs->r15 = gprs->r15;
2342 #endif
2343 }
2344 
2345 static void intel_perf_event_update_pmc(struct perf_event *event, u64 pmc)
2346 {
2347 	int shift = 64 - x86_pmu.cntval_bits;
2348 	struct hw_perf_event *hwc;
2349 	u64 delta, prev_pmc;
2350 
2351 	/*
2352 	 * A recorded counter may not have an assigned event in the
2353 	 * following cases. The value should be dropped.
2354 	 * - An event is deleted. There is still an active PEBS event.
2355 	 *   The PEBS record doesn't shrink on pmu::del().
2356 	 *   If the counter of the deleted event once occurred in a PEBS
2357 	 *   record, PEBS still records the counter until the counter is
2358 	 *   reassigned.
2359 	 * - An event is stopped for some reason, e.g., throttled.
2360 	 *   During this period, another event is added and takes the
2361 	 *   counter of the stopped event. The stopped event is assigned
2362 	 *   to another new and uninitialized counter, since the
2363 	 *   x86_pmu_start(RELOAD) is not invoked for a stopped event.
2364 	 *   The PEBS__DATA_CFG is updated regardless of the event state.
2365 	 *   The uninitialized counter can be recorded in a PEBS record.
2366 	 *   But the cpuc->events[uninitialized_counter] is always NULL,
2367 	 *   because the event is stopped. The uninitialized value is
2368 	 *   safely dropped.
2369 	 */
2370 	if (!event)
2371 		return;
2372 
2373 	hwc = &event->hw;
2374 	prev_pmc = local64_read(&hwc->prev_count);
2375 
2376 	/* Only update the count when the PMU is disabled */
2377 	WARN_ON(this_cpu_read(cpu_hw_events.enabled));
2378 	local64_set(&hwc->prev_count, pmc);
2379 
2380 	delta = (pmc << shift) - (prev_pmc << shift);
2381 	delta >>= shift;
2382 
2383 	local64_add(delta, &event->count);
2384 	local64_sub(delta, &hwc->period_left);
2385 }
2386 
2387 static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
2388 					      struct perf_event *event,
2389 					      struct pebs_cntr_header *cntr,
2390 					      void *next_record)
2391 {
2392 	int bit;
2393 
2394 	for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERIC) {
2395 		intel_perf_event_update_pmc(cpuc->events[bit], *(u64 *)next_record);
2396 		next_record += sizeof(u64);
2397 	}
2398 
2399 	for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED) {
2400 		/* The slots event will be handled with perf_metric later */
2401 		if ((cntr->metrics == INTEL_CNTR_METRICS) &&
2402 		    (bit + INTEL_PMC_IDX_FIXED == INTEL_PMC_IDX_FIXED_SLOTS)) {
2403 			next_record += sizeof(u64);
2404 			continue;
2405 		}
2406 		intel_perf_event_update_pmc(cpuc->events[bit + INTEL_PMC_IDX_FIXED],
2407 					    *(u64 *)next_record);
2408 		next_record += sizeof(u64);
2409 	}
2410 
2411 	/* HW will reload the value right after the overflow. */
2412 	if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2413 		local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period);
2414 
2415 	if (cntr->metrics == INTEL_CNTR_METRICS) {
2416 		static_call(intel_pmu_update_topdown_event)
2417 			   (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS],
2418 			    (u64 *)next_record);
2419 		next_record += 2 * sizeof(u64);
2420 	}
2421 }
2422 
2423 #define PEBS_LATENCY_MASK			0xffff
2424 
2425 static inline void __setup_perf_sample_data(struct perf_event *event,
2426 					    struct pt_regs *iregs,
2427 					    struct perf_sample_data *data)
2428 {
2429 	perf_sample_data_init(data, 0, event->hw.last_period);
2430 
2431 	/*
2432 	 * We must however always use iregs for the unwinder to stay sane; the
2433 	 * record BP,SP,IP can point into thin air when the record is from a
2434 	 * previous PMI context or an (I)RET happened between the record and
2435 	 * PMI.
2436 	 */
2437 	perf_sample_save_callchain(data, event, iregs);
2438 }
2439 
2440 static inline void __setup_pebs_basic_group(struct perf_event *event,
2441 					    struct pt_regs *regs,
2442 					    struct perf_sample_data *data,
2443 					    u64 sample_type, u64 ip,
2444 					    u64 tsc, u16 retire)
2445 {
2446 	/* The ip in basic is EventingIP */
2447 	set_linear_ip(regs, ip);
2448 	regs->flags = PERF_EFLAGS_EXACT;
2449 	setup_pebs_time(event, data, tsc);
2450 
2451 	if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
2452 		data->weight.var3_w = retire;
2453 }
2454 
2455 static inline void __setup_pebs_gpr_group(struct perf_event *event,
2456 					  struct pt_regs *regs,
2457 					  struct pebs_gprs *gprs,
2458 					  u64 sample_type)
2459 {
2460 	if (event->attr.precise_ip < 2) {
2461 		set_linear_ip(regs, gprs->ip);
2462 		regs->flags &= ~PERF_EFLAGS_EXACT;
2463 	}
2464 
2465 	if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
2466 		adaptive_pebs_save_regs(regs, gprs);
2467 }
2468 
2469 static inline void __setup_pebs_meminfo_group(struct perf_event *event,
2470 					      struct perf_sample_data *data,
2471 					      u64 sample_type, u64 latency,
2472 					      u16 instr_latency, u64 address,
2473 					      u64 aux, u64 tsx_tuning, u64 ax)
2474 {
2475 	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
2476 		u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
2477 
2478 		data->weight.var2_w = instr_latency;
2479 
2480 		/*
2481 		 * Although meminfo::latency is defined as a u64,
2482 		 * only the lower 32 bits include the valid data
2483 		 * in practice on Ice Lake and earlier platforms.
2484 		 */
2485 		if (sample_type & PERF_SAMPLE_WEIGHT)
2486 			data->weight.full = latency ?: tsx_latency;
2487 		else
2488 			data->weight.var1_dw = (u32)latency ?: tsx_latency;
2489 
2490 		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2491 	}
2492 
2493 	if (sample_type & PERF_SAMPLE_DATA_SRC) {
2494 		data->data_src.val = get_data_src(event, aux);
2495 		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
2496 	}
2497 
2498 	if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
2499 		data->addr = address;
2500 		data->sample_flags |= PERF_SAMPLE_ADDR;
2501 	}
2502 
2503 	if (sample_type & PERF_SAMPLE_TRANSACTION) {
2504 		data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
2505 		data->sample_flags |= PERF_SAMPLE_TRANSACTION;
2506 	}
2507 }
2508 
2509 /*
2510  * With adaptive PEBS the layout depends on what fields are configured.
2511  */
2512 static void setup_pebs_adaptive_sample_data(struct perf_event *event,
2513 					    struct pt_regs *iregs, void *__pebs,
2514 					    struct perf_sample_data *data,
2515 					    struct pt_regs *regs)
2516 {
2517 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2518 	u64 sample_type = event->attr.sample_type;
2519 	struct pebs_basic *basic = __pebs;
2520 	void *next_record = basic + 1;
2521 	struct pebs_meminfo *meminfo = NULL;
2522 	struct pebs_gprs *gprs = NULL;
2523 	struct x86_perf_regs *perf_regs;
2524 	u64 format_group;
2525 	u16 retire;
2526 
2527 	if (basic == NULL)
2528 		return;
2529 
2530 	perf_regs = container_of(regs, struct x86_perf_regs, regs);
2531 	perf_regs->xmm_regs = NULL;
2532 
2533 	format_group = basic->format_group;
2534 
2535 	__setup_perf_sample_data(event, iregs, data);
2536 
2537 	*regs = *iregs;
2538 
2539 	/* basic group */
2540 	retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
2541 			basic->retire_latency : 0;
2542 	__setup_pebs_basic_group(event, regs, data, sample_type,
2543 				 basic->ip, basic->tsc, retire);
2544 
2545 	/*
2546 	 * The record for MEMINFO is in front of GP
2547 	 * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
2548 	 * Save the pointer here but process later.
2549 	 */
2550 	if (format_group & PEBS_DATACFG_MEMINFO) {
2551 		meminfo = next_record;
2552 		next_record = meminfo + 1;
2553 	}
2554 
2555 	if (format_group & PEBS_DATACFG_GP) {
2556 		gprs = next_record;
2557 		next_record = gprs + 1;
2558 
2559 		__setup_pebs_gpr_group(event, regs, gprs, sample_type);
2560 	}
2561 
2562 	if (format_group & PEBS_DATACFG_MEMINFO) {
2563 		u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
2564 				meminfo->cache_latency : meminfo->mem_latency;
2565 		u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
2566 				meminfo->instr_latency : 0;
2567 		u64 ax = gprs ? gprs->ax : 0;
2568 
2569 		__setup_pebs_meminfo_group(event, data, sample_type, latency,
2570 					   instr_latency, meminfo->address,
2571 					   meminfo->aux, meminfo->tsx_tuning,
2572 					   ax);
2573 	}
2574 
2575 	if (format_group & PEBS_DATACFG_XMMS) {
2576 		struct pebs_xmm *xmm = next_record;
2577 
2578 		next_record = xmm + 1;
2579 		perf_regs->xmm_regs = xmm->xmm;
2580 	}
2581 
2582 	if (format_group & PEBS_DATACFG_LBRS) {
2583 		struct lbr_entry *lbr = next_record;
2584 		int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT)
2585 					& 0xff) + 1;
2586 		next_record = next_record + num_lbr * sizeof(struct lbr_entry);
2587 
2588 		if (has_branch_stack(event)) {
2589 			intel_pmu_store_pebs_lbrs(lbr);
2590 			intel_pmu_lbr_save_brstack(data, cpuc, event);
2591 		}
2592 	}
2593 
2594 	if (format_group & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) {
2595 		struct pebs_cntr_header *cntr = next_record;
2596 		unsigned int nr;
2597 
2598 		next_record += sizeof(struct pebs_cntr_header);
2599 		/*
2600 		 * The PEBS_DATA_CFG is a global register, which is the
2601 		 * superset configuration for all PEBS events.
2602 		 * For the PEBS record of non-sample-read group, ignore
2603 		 * the counter snapshot fields.
2604 		 */
2605 		if (is_pebs_counter_event_group(event)) {
2606 			__setup_pebs_counter_group(cpuc, event, cntr, next_record);
2607 			data->sample_flags |= PERF_SAMPLE_READ;
2608 		}
2609 
2610 		nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
2611 		if (cntr->metrics == INTEL_CNTR_METRICS)
2612 			nr += 2;
2613 		next_record += nr * sizeof(u64);
2614 	}
2615 
2616 	WARN_ONCE(next_record != __pebs + basic->format_size,
2617 			"PEBS record size %u, expected %llu, config %llx\n",
2618 			basic->format_size,
2619 			(u64)(next_record - __pebs),
2620 			format_group);
2621 }
2622 
2623 static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
2624 {
2625 	/* Continue bit or null PEBS record indicates fragment follows. */
2626 	return header->cont || !(header->format & GENMASK_ULL(63, 16));
2627 }
2628 
2629 static void setup_arch_pebs_sample_data(struct perf_event *event,
2630 					struct pt_regs *iregs,
2631 					void *__pebs,
2632 					struct perf_sample_data *data,
2633 					struct pt_regs *regs)
2634 {
2635 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2636 	u64 sample_type = event->attr.sample_type;
2637 	struct arch_pebs_header *header = NULL;
2638 	struct arch_pebs_aux *meminfo = NULL;
2639 	struct arch_pebs_gprs *gprs = NULL;
2640 	struct x86_perf_regs *perf_regs;
2641 	void *next_record;
2642 	void *at = __pebs;
2643 
2644 	if (at == NULL)
2645 		return;
2646 
2647 	perf_regs = container_of(regs, struct x86_perf_regs, regs);
2648 	perf_regs->xmm_regs = NULL;
2649 
2650 	__setup_perf_sample_data(event, iregs, data);
2651 
2652 	*regs = *iregs;
2653 
2654 again:
2655 	header = at;
2656 	next_record = at + sizeof(struct arch_pebs_header);
2657 	if (header->basic) {
2658 		struct arch_pebs_basic *basic = next_record;
2659 		u16 retire = 0;
2660 
2661 		next_record = basic + 1;
2662 
2663 		if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
2664 			retire = basic->valid ? basic->retire : 0;
2665 		__setup_pebs_basic_group(event, regs, data, sample_type,
2666 				 basic->ip, basic->tsc, retire);
2667 	}
2668 
2669 	/*
2670 	 * The record for MEMINFO is in front of GP
2671 	 * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
2672 	 * Save the pointer here but process later.
2673 	 */
2674 	if (header->aux) {
2675 		meminfo = next_record;
2676 		next_record = meminfo + 1;
2677 	}
2678 
2679 	if (header->gpr) {
2680 		gprs = next_record;
2681 		next_record = gprs + 1;
2682 
2683 		__setup_pebs_gpr_group(event, regs,
2684 				       (struct pebs_gprs *)gprs,
2685 				       sample_type);
2686 	}
2687 
2688 	if (header->aux) {
2689 		u64 ax = gprs ? gprs->ax : 0;
2690 
2691 		__setup_pebs_meminfo_group(event, data, sample_type,
2692 					   meminfo->cache_latency,
2693 					   meminfo->instr_latency,
2694 					   meminfo->address, meminfo->aux,
2695 					   meminfo->tsx_tuning, ax);
2696 	}
2697 
2698 	if (header->xmm) {
2699 		struct pebs_xmm *xmm;
2700 
2701 		next_record += sizeof(struct arch_pebs_xer_header);
2702 
2703 		xmm = next_record;
2704 		perf_regs->xmm_regs = xmm->xmm;
2705 		next_record = xmm + 1;
2706 	}
2707 
2708 	if (header->lbr) {
2709 		struct arch_pebs_lbr_header *lbr_header = next_record;
2710 		struct lbr_entry *lbr;
2711 		int num_lbr;
2712 
2713 		next_record = lbr_header + 1;
2714 		lbr = next_record;
2715 
2716 		num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
2717 				lbr_header->depth :
2718 				header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
2719 		next_record += num_lbr * sizeof(struct lbr_entry);
2720 
2721 		if (has_branch_stack(event)) {
2722 			intel_pmu_store_pebs_lbrs(lbr);
2723 			intel_pmu_lbr_save_brstack(data, cpuc, event);
2724 		}
2725 	}
2726 
2727 	if (header->cntr) {
2728 		struct arch_pebs_cntr_header *cntr = next_record;
2729 		unsigned int nr;
2730 
2731 		next_record += sizeof(struct arch_pebs_cntr_header);
2732 
2733 		if (is_pebs_counter_event_group(event)) {
2734 			__setup_pebs_counter_group(cpuc, event,
2735 				(struct pebs_cntr_header *)cntr, next_record);
2736 			data->sample_flags |= PERF_SAMPLE_READ;
2737 		}
2738 
2739 		nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
2740 		if (cntr->metrics == INTEL_CNTR_METRICS)
2741 			nr += 2;
2742 		next_record += nr * sizeof(u64);
2743 	}
2744 
2745 	/* Parse followed fragments if there are. */
2746 	if (arch_pebs_record_continued(header)) {
2747 		at = at + header->size;
2748 		goto again;
2749 	}
2750 }
2751 
2752 static inline void *
2753 get_next_pebs_record_by_bit(void *base, void *top, int bit)
2754 {
2755 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2756 	void *at;
2757 	u64 pebs_status;
2758 
2759 	/*
2760 	 * fmt0 does not have a status bitfield (does not use
2761 	 * perf_record_nhm format)
2762 	 */
2763 	if (x86_pmu.intel_cap.pebs_format < 1)
2764 		return base;
2765 
2766 	if (base == NULL)
2767 		return NULL;
2768 
2769 	for (at = base; at < top; at += cpuc->pebs_record_size) {
2770 		unsigned long status = get_pebs_status(at);
2771 
2772 		if (test_bit(bit, (unsigned long *)&status)) {
2773 			/* PEBS v3 has accurate status bits */
2774 			if (x86_pmu.intel_cap.pebs_format >= 3)
2775 				return at;
2776 
2777 			if (status == (1 << bit))
2778 				return at;
2779 
2780 			/* clear non-PEBS bit and re-check */
2781 			pebs_status = status & cpuc->pebs_enabled;
2782 			pebs_status &= PEBS_COUNTER_MASK;
2783 			if (pebs_status == (1 << bit))
2784 				return at;
2785 		}
2786 	}
2787 	return NULL;
2788 }
2789 
2790 /*
2791  * Special variant of intel_pmu_save_and_restart() for auto-reload.
2792  */
2793 static int
2794 intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
2795 {
2796 	struct hw_perf_event *hwc = &event->hw;
2797 	int shift = 64 - x86_pmu.cntval_bits;
2798 	u64 period = hwc->sample_period;
2799 	u64 prev_raw_count, new_raw_count;
2800 	s64 new, old;
2801 
2802 	WARN_ON(!period);
2803 
2804 	/*
2805 	 * drain_pebs() only happens when the PMU is disabled.
2806 	 */
2807 	WARN_ON(this_cpu_read(cpu_hw_events.enabled));
2808 
2809 	prev_raw_count = local64_read(&hwc->prev_count);
2810 	new_raw_count = rdpmc(hwc->event_base_rdpmc);
2811 	local64_set(&hwc->prev_count, new_raw_count);
2812 
2813 	/*
2814 	 * Since the counter increments a negative counter value and
2815 	 * overflows on the sign switch, giving the interval:
2816 	 *
2817 	 *   [-period, 0]
2818 	 *
2819 	 * the difference between two consecutive reads is:
2820 	 *
2821 	 *   A) value2 - value1;
2822 	 *      when no overflows have happened in between,
2823 	 *
2824 	 *   B) (0 - value1) + (value2 - (-period));
2825 	 *      when one overflow happened in between,
2826 	 *
2827 	 *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
2828 	 *      when @n overflows happened in between.
2829 	 *
2830 	 * Here A) is the obvious difference, B) is the extension to the
2831 	 * discrete interval, where the first term is to the top of the
2832 	 * interval and the second term is from the bottom of the next
2833 	 * interval and C) the extension to multiple intervals, where the
2834 	 * middle term is the whole intervals covered.
2835 	 *
2836 	 * An equivalent of C, by reduction, is:
2837 	 *
2838 	 *   value2 - value1 + n * period
2839 	 */
2840 	new = ((s64)(new_raw_count << shift) >> shift);
2841 	old = ((s64)(prev_raw_count << shift) >> shift);
2842 	local64_add(new - old + count * period, &event->count);
2843 
2844 	local64_set(&hwc->period_left, -new);
2845 
2846 	perf_event_update_userpage(event);
2847 
2848 	return 0;
2849 }
2850 
2851 typedef void (*setup_fn)(struct perf_event *, struct pt_regs *, void *,
2852 			 struct perf_sample_data *, struct pt_regs *);
2853 
2854 static struct pt_regs dummy_iregs;
2855 
2856 static __always_inline void
2857 __intel_pmu_pebs_event(struct perf_event *event,
2858 		       struct pt_regs *iregs,
2859 		       struct pt_regs *regs,
2860 		       struct perf_sample_data *data,
2861 		       void *at,
2862 		       setup_fn setup_sample)
2863 {
2864 	setup_sample(event, iregs, at, data, regs);
2865 	perf_event_output(event, data, regs);
2866 }
2867 
2868 static __always_inline void
2869 __intel_pmu_pebs_last_event(struct perf_event *event,
2870 			    struct pt_regs *iregs,
2871 			    struct pt_regs *regs,
2872 			    struct perf_sample_data *data,
2873 			    void *at,
2874 			    int count,
2875 			    setup_fn setup_sample)
2876 {
2877 	struct hw_perf_event *hwc = &event->hw;
2878 
2879 	setup_sample(event, iregs, at, data, regs);
2880 	if (iregs == &dummy_iregs) {
2881 		/*
2882 		 * The PEBS records may be drained in the non-overflow context,
2883 		 * e.g., large PEBS + context switch. Perf should treat the
2884 		 * last record the same as other PEBS records, and doesn't
2885 		 * invoke the generic overflow handler.
2886 		 */
2887 		perf_event_output(event, data, regs);
2888 	} else {
2889 		/*
2890 		 * All but the last records are processed.
2891 		 * The last one is left to be able to call the overflow handler.
2892 		 */
2893 		perf_event_overflow(event, data, regs);
2894 	}
2895 
2896 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
2897 		if ((is_pebs_counter_event_group(event))) {
2898 			/*
2899 			 * The value of each sample has been updated when setup
2900 			 * the corresponding sample data.
2901 			 */
2902 			perf_event_update_userpage(event);
2903 		} else {
2904 			/*
2905 			 * Now, auto-reload is only enabled in fixed period mode.
2906 			 * The reload value is always hwc->sample_period.
2907 			 * May need to change it, if auto-reload is enabled in
2908 			 * freq mode later.
2909 			 */
2910 			intel_pmu_save_and_restart_reload(event, count);
2911 		}
2912 	} else {
2913 		/*
2914 		 * For a non-precise event, it's possible the
2915 		 * counters-snapshotting records a positive value for the
2916 		 * overflowed event. Then the HW auto-reload mechanism
2917 		 * reset the counter to 0 immediately, because the
2918 		 * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD
2919 		 * is not set. The counter backwards may be observed in a
2920 		 * PMI handler.
2921 		 *
2922 		 * Since the event value has been updated when processing the
2923 		 * counters-snapshotting record, only needs to set the new
2924 		 * period for the counter.
2925 		 */
2926 		if (is_pebs_counter_event_group(event))
2927 			static_call(x86_pmu_set_period)(event);
2928 		else
2929 			intel_pmu_save_and_restart(event);
2930 	}
2931 }
2932 
2933 static __always_inline void
2934 __intel_pmu_pebs_events(struct perf_event *event,
2935 			struct pt_regs *iregs,
2936 			struct perf_sample_data *data,
2937 			void *base, void *top,
2938 			int bit, int count,
2939 			setup_fn setup_sample)
2940 {
2941 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2942 	struct x86_perf_regs perf_regs;
2943 	struct pt_regs *regs = &perf_regs.regs;
2944 	void *at = get_next_pebs_record_by_bit(base, top, bit);
2945 	int cnt = count;
2946 
2947 	if (!iregs)
2948 		iregs = &dummy_iregs;
2949 
2950 	while (cnt > 1) {
2951 		__intel_pmu_pebs_event(event, iregs, regs, data, at, setup_sample);
2952 		at += cpuc->pebs_record_size;
2953 		at = get_next_pebs_record_by_bit(at, top, bit);
2954 		cnt--;
2955 	}
2956 
2957 	__intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample);
2958 }
2959 
2960 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
2961 {
2962 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2963 	struct debug_store *ds = cpuc->ds;
2964 	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
2965 	struct pebs_record_core *at, *top;
2966 	int n;
2967 
2968 	if (!x86_pmu.pebs_active)
2969 		return;
2970 
2971 	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
2972 	top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
2973 
2974 	/*
2975 	 * Whatever else happens, drain the thing
2976 	 */
2977 	ds->pebs_index = ds->pebs_buffer_base;
2978 
2979 	if (!test_bit(0, cpuc->active_mask))
2980 		return;
2981 
2982 	WARN_ON_ONCE(!event);
2983 
2984 	if (!event->attr.precise_ip)
2985 		return;
2986 
2987 	n = top - at;
2988 	if (n <= 0) {
2989 		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2990 			intel_pmu_save_and_restart_reload(event, 0);
2991 		return;
2992 	}
2993 
2994 	__intel_pmu_pebs_events(event, iregs, data, at, top, 0, n,
2995 				setup_pebs_fixed_sample_data);
2996 }
2997 
2998 static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask)
2999 {
3000 	u64 pebs_enabled = cpuc->pebs_enabled & mask;
3001 	struct perf_event *event;
3002 	int bit;
3003 
3004 	/*
3005 	 * The drain_pebs() could be called twice in a short period
3006 	 * for auto-reload event in pmu::read(). There are no
3007 	 * overflows have happened in between.
3008 	 * It needs to call intel_pmu_save_and_restart_reload() to
3009 	 * update the event->count for this case.
3010 	 */
3011 	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
3012 		event = cpuc->events[bit];
3013 		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
3014 			intel_pmu_save_and_restart_reload(event, 0);
3015 	}
3016 }
3017 
3018 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
3019 {
3020 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3021 	struct debug_store *ds = cpuc->ds;
3022 	struct perf_event *event;
3023 	void *base, *at, *top;
3024 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3025 	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3026 	int max_pebs_events = intel_pmu_max_num_pebs(NULL);
3027 	int bit, i, size;
3028 	u64 mask;
3029 
3030 	if (!x86_pmu.pebs_active)
3031 		return;
3032 
3033 	base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
3034 	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
3035 
3036 	ds->pebs_index = ds->pebs_buffer_base;
3037 
3038 	mask = x86_pmu.pebs_events_mask;
3039 	size = max_pebs_events;
3040 	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
3041 		mask |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
3042 		size = INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL);
3043 	}
3044 
3045 	if (unlikely(base >= top)) {
3046 		intel_pmu_pebs_event_update_no_drain(cpuc, mask);
3047 		return;
3048 	}
3049 
3050 	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
3051 		struct pebs_record_nhm *p = at;
3052 		u64 pebs_status;
3053 
3054 		pebs_status = p->status & cpuc->pebs_enabled;
3055 		pebs_status &= mask;
3056 
3057 		/* PEBS v3 has more accurate status bits */
3058 		if (x86_pmu.intel_cap.pebs_format >= 3) {
3059 			for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
3060 				counts[bit]++;
3061 
3062 			continue;
3063 		}
3064 
3065 		/*
3066 		 * On some CPUs the PEBS status can be zero when PEBS is
3067 		 * racing with clearing of GLOBAL_STATUS.
3068 		 *
3069 		 * Normally we would drop that record, but in the
3070 		 * case when there is only a single active PEBS event
3071 		 * we can assume it's for that event.
3072 		 */
3073 		if (!pebs_status && cpuc->pebs_enabled &&
3074 			!(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
3075 			pebs_status = p->status = cpuc->pebs_enabled;
3076 
3077 		bit = find_first_bit((unsigned long *)&pebs_status,
3078 				     max_pebs_events);
3079 
3080 		if (!(x86_pmu.pebs_events_mask & (1 << bit)))
3081 			continue;
3082 
3083 		/*
3084 		 * The PEBS hardware does not deal well with the situation
3085 		 * when events happen near to each other and multiple bits
3086 		 * are set. But it should happen rarely.
3087 		 *
3088 		 * If these events include one PEBS and multiple non-PEBS
3089 		 * events, it doesn't impact PEBS record. The record will
3090 		 * be handled normally. (slow path)
3091 		 *
3092 		 * If these events include two or more PEBS events, the
3093 		 * records for the events can be collapsed into a single
3094 		 * one, and it's not possible to reconstruct all events
3095 		 * that caused the PEBS record. It's called collision.
3096 		 * If collision happened, the record will be dropped.
3097 		 */
3098 		if (pebs_status != (1ULL << bit)) {
3099 			for_each_set_bit(i, (unsigned long *)&pebs_status, size)
3100 				error[i]++;
3101 			continue;
3102 		}
3103 
3104 		counts[bit]++;
3105 	}
3106 
3107 	for_each_set_bit(bit, (unsigned long *)&mask, size) {
3108 		if ((counts[bit] == 0) && (error[bit] == 0))
3109 			continue;
3110 
3111 		event = cpuc->events[bit];
3112 		if (WARN_ON_ONCE(!event))
3113 			continue;
3114 
3115 		if (WARN_ON_ONCE(!event->attr.precise_ip))
3116 			continue;
3117 
3118 		/* log dropped samples number */
3119 		if (error[bit]) {
3120 			perf_log_lost_samples(event, error[bit]);
3121 
3122 			if (iregs)
3123 				perf_event_account_interrupt(event);
3124 		}
3125 
3126 		if (counts[bit]) {
3127 			__intel_pmu_pebs_events(event, iregs, data, base,
3128 						top, bit, counts[bit],
3129 						setup_pebs_fixed_sample_data);
3130 		}
3131 	}
3132 }
3133 
3134 static __always_inline void
3135 __intel_pmu_handle_pebs_record(struct pt_regs *iregs,
3136 			       struct pt_regs *regs,
3137 			       struct perf_sample_data *data,
3138 			       void *at, u64 pebs_status,
3139 			       short *counts, void **last,
3140 			       setup_fn setup_sample)
3141 {
3142 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3143 	struct perf_event *event;
3144 	int bit;
3145 
3146 	for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
3147 		event = cpuc->events[bit];
3148 
3149 		if (WARN_ON_ONCE(!event) ||
3150 		    WARN_ON_ONCE(!event->attr.precise_ip))
3151 			continue;
3152 
3153 		if (counts[bit]++) {
3154 			__intel_pmu_pebs_event(event, iregs, regs, data,
3155 					       last[bit], setup_sample);
3156 		}
3157 
3158 		last[bit] = at;
3159 	}
3160 }
3161 
3162 static __always_inline void
3163 __intel_pmu_handle_last_pebs_record(struct pt_regs *iregs,
3164 				    struct pt_regs *regs,
3165 				    struct perf_sample_data *data,
3166 				    u64 mask, short *counts, void **last,
3167 				    setup_fn setup_sample)
3168 {
3169 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3170 	struct perf_event *event;
3171 	int bit;
3172 
3173 	for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
3174 		if (!counts[bit])
3175 			continue;
3176 
3177 		event = cpuc->events[bit];
3178 
3179 		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
3180 					    counts[bit], setup_sample);
3181 	}
3182 
3183 }
3184 
3185 static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
3186 {
3187 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3188 	void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
3189 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3190 	struct debug_store *ds = cpuc->ds;
3191 	struct x86_perf_regs perf_regs;
3192 	struct pt_regs *regs = &perf_regs.regs;
3193 	struct pebs_basic *basic;
3194 	void *base, *at, *top;
3195 	u64 mask;
3196 
3197 	if (!x86_pmu.pebs_active)
3198 		return;
3199 
3200 	base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
3201 	top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
3202 
3203 	ds->pebs_index = ds->pebs_buffer_base;
3204 
3205 	mask = hybrid(cpuc->pmu, pebs_events_mask) |
3206 	       (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
3207 	mask &= cpuc->pebs_enabled;
3208 
3209 	if (unlikely(base >= top)) {
3210 		intel_pmu_pebs_event_update_no_drain(cpuc, mask);
3211 		return;
3212 	}
3213 
3214 	if (!iregs)
3215 		iregs = &dummy_iregs;
3216 
3217 	/* Process all but the last event for each counter. */
3218 	for (at = base; at < top; at += basic->format_size) {
3219 		u64 pebs_status;
3220 
3221 		basic = at;
3222 		if (basic->format_size != cpuc->pebs_record_size)
3223 			continue;
3224 
3225 		pebs_status = mask & basic->applicable_counters;
3226 		__intel_pmu_handle_pebs_record(iregs, regs, data, at,
3227 					       pebs_status, counts, last,
3228 					       setup_pebs_adaptive_sample_data);
3229 	}
3230 
3231 	__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, counts, last,
3232 					    setup_pebs_adaptive_sample_data);
3233 }
3234 
3235 static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
3236 				      struct perf_sample_data *data)
3237 {
3238 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
3239 	void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
3240 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3241 	union arch_pebs_index index;
3242 	struct x86_perf_regs perf_regs;
3243 	struct pt_regs *regs = &perf_regs.regs;
3244 	void *base, *at, *top;
3245 	u64 mask;
3246 
3247 	rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);
3248 
3249 	if (unlikely(!index.wr)) {
3250 		intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
3251 		return;
3252 	}
3253 
3254 	base = cpuc->pebs_vaddr;
3255 	top = cpuc->pebs_vaddr + (index.wr << ARCH_PEBS_INDEX_WR_SHIFT);
3256 
3257 	index.wr = 0;
3258 	index.full = 0;
3259 	index.en = 1;
3260 	if (cpuc->n_pebs == cpuc->n_large_pebs)
3261 		index.thresh = ARCH_PEBS_THRESH_MULTI;
3262 	else
3263 		index.thresh = ARCH_PEBS_THRESH_SINGLE;
3264 	wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
3265 
3266 	mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
3267 
3268 	if (!iregs)
3269 		iregs = &dummy_iregs;
3270 
3271 	/* Process all but the last event for each counter. */
3272 	for (at = base; at < top;) {
3273 		struct arch_pebs_header *header;
3274 		struct arch_pebs_basic *basic;
3275 		u64 pebs_status;
3276 
3277 		header = at;
3278 
3279 		if (WARN_ON_ONCE(!header->size))
3280 			break;
3281 
3282 		/* 1st fragment or single record must have basic group */
3283 		if (!header->basic) {
3284 			at += header->size;
3285 			continue;
3286 		}
3287 
3288 		basic = at + sizeof(struct arch_pebs_header);
3289 		pebs_status = mask & basic->applicable_counters;
3290 		__intel_pmu_handle_pebs_record(iregs, regs, data, at,
3291 					       pebs_status, counts, last,
3292 					       setup_arch_pebs_sample_data);
3293 
3294 		/* Skip non-last fragments */
3295 		while (arch_pebs_record_continued(header)) {
3296 			if (!header->size)
3297 				break;
3298 			at += header->size;
3299 			header = at;
3300 		}
3301 
3302 		/* Skip last fragment or the single record */
3303 		at += header->size;
3304 	}
3305 
3306 	__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
3307 					    counts, last,
3308 					    setup_arch_pebs_sample_data);
3309 }
3310 
3311 static void __init intel_arch_pebs_init(void)
3312 {
3313 	/*
3314 	 * Current hybrid platforms always both support arch-PEBS or not
3315 	 * on all kinds of cores. So directly set x86_pmu.arch_pebs flag
3316 	 * if boot cpu supports arch-PEBS.
3317 	 */
3318 	x86_pmu.arch_pebs = 1;
3319 	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
3320 	x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
3321 	x86_pmu.pebs_capable = ~0ULL;
3322 	x86_pmu.flags |= PMU_FL_PEBS_ALL;
3323 
3324 	x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
3325 	x86_pmu.pebs_disable = __intel_pmu_pebs_disable;
3326 }
3327 
3328 /*
3329  * PEBS probe and setup
3330  */
3331 
3332 static void __init intel_ds_pebs_init(void)
3333 {
3334 	/*
3335 	 * No support for 32bit formats
3336 	 */
3337 	if (!boot_cpu_has(X86_FEATURE_DTES64))
3338 		return;
3339 
3340 	x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS);
3341 	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
3342 	if (x86_pmu.version <= 4)
3343 		x86_pmu.pebs_no_isolation = 1;
3344 
3345 	if (x86_pmu.ds_pebs) {
3346 		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
3347 		char *pebs_qual = "";
3348 		int format = x86_pmu.intel_cap.pebs_format;
3349 
3350 		if (format < 4)
3351 			x86_pmu.intel_cap.pebs_baseline = 0;
3352 
3353 		x86_pmu.pebs_enable = intel_pmu_pebs_enable;
3354 		x86_pmu.pebs_disable = intel_pmu_pebs_disable;
3355 		x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all;
3356 		x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all;
3357 
3358 		switch (format) {
3359 		case 0:
3360 			pr_cont("PEBS fmt0%c, ", pebs_type);
3361 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
3362 			/*
3363 			 * Using >PAGE_SIZE buffers makes the WRMSR to
3364 			 * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
3365 			 * mysteriously hang on Core2.
3366 			 *
3367 			 * As a workaround, we don't do this.
3368 			 */
3369 			x86_pmu.pebs_buffer_size = PAGE_SIZE;
3370 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
3371 			break;
3372 
3373 		case 1:
3374 			pr_cont("PEBS fmt1%c, ", pebs_type);
3375 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
3376 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
3377 			break;
3378 
3379 		case 2:
3380 			pr_cont("PEBS fmt2%c, ", pebs_type);
3381 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
3382 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
3383 			break;
3384 
3385 		case 3:
3386 			pr_cont("PEBS fmt3%c, ", pebs_type);
3387 			x86_pmu.pebs_record_size =
3388 						sizeof(struct pebs_record_skl);
3389 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
3390 			x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
3391 			break;
3392 
3393 		case 6:
3394 			if (x86_pmu.intel_cap.pebs_baseline)
3395 				x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
3396 			fallthrough;
3397 		case 5:
3398 			x86_pmu.pebs_ept = 1;
3399 			fallthrough;
3400 		case 4:
3401 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
3402 			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
3403 			if (x86_pmu.intel_cap.pebs_baseline) {
3404 				x86_pmu.large_pebs_flags |=
3405 					PERF_SAMPLE_BRANCH_STACK |
3406 					PERF_SAMPLE_TIME;
3407 				x86_pmu.flags |= PMU_FL_PEBS_ALL;
3408 				x86_pmu.pebs_capable = ~0ULL;
3409 				pebs_qual = "-baseline";
3410 				x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
3411 			} else {
3412 				/* Only basic record supported */
3413 				x86_pmu.large_pebs_flags &=
3414 					~(PERF_SAMPLE_ADDR |
3415 					  PERF_SAMPLE_TIME |
3416 					  PERF_SAMPLE_DATA_SRC |
3417 					  PERF_SAMPLE_TRANSACTION |
3418 					  PERF_SAMPLE_REGS_USER |
3419 					  PERF_SAMPLE_REGS_INTR);
3420 			}
3421 			pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual);
3422 
3423 			/*
3424 			 * The PEBS-via-PT is not supported on hybrid platforms,
3425 			 * because not all CPUs of a hybrid machine support it.
3426 			 * The global x86_pmu.intel_cap, which only contains the
3427 			 * common capabilities, is used to check the availability
3428 			 * of the feature. The per-PMU pebs_output_pt_available
3429 			 * in a hybrid machine should be ignored.
3430 			 */
3431 			if (x86_pmu.intel_cap.pebs_output_pt_available) {
3432 				pr_cont("PEBS-via-PT, ");
3433 				x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
3434 			}
3435 
3436 			break;
3437 
3438 		default:
3439 			pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
3440 			x86_pmu.ds_pebs = 0;
3441 		}
3442 	}
3443 }
3444 
3445 void __init intel_pebs_init(void)
3446 {
3447 	if (x86_pmu.intel_cap.pebs_format == 0xf)
3448 		intel_arch_pebs_init();
3449 	else
3450 		intel_ds_pebs_init();
3451 }
3452 
3453 void perf_restore_debug_store(void)
3454 {
3455 	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
3456 
3457 	if (!x86_pmu.bts && !x86_pmu.ds_pebs)
3458 		return;
3459 
3460 	wrmsrq(MSR_IA32_DS_AREA, (unsigned long)ds);
3461 }
3462