xref: /linux/arch/x86/events/amd/lbr.c (revision 222408cde4d0ab17e54d4db26751c2b5cab9ac2b)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/perf_event.h>
3 #include <asm/perf_event.h>
4 
5 #include "../perf_event.h"
6 
7 /* LBR Branch Select valid bits */
8 #define LBR_SELECT_MASK		0x1ff
9 
10 /*
11  * LBR Branch Select filter bits which when set, ensures that the
12  * corresponding type of branches are not recorded
13  */
14 #define LBR_SELECT_KERNEL		0	/* Branches ending in CPL = 0 */
15 #define LBR_SELECT_USER			1	/* Branches ending in CPL > 0 */
16 #define LBR_SELECT_JCC			2	/* Conditional branches */
17 #define LBR_SELECT_CALL_NEAR_REL	3	/* Near relative calls */
18 #define LBR_SELECT_CALL_NEAR_IND	4	/* Indirect relative calls */
19 #define LBR_SELECT_RET_NEAR		5	/* Near returns */
20 #define LBR_SELECT_JMP_NEAR_IND		6	/* Near indirect jumps (excl. calls and returns) */
21 #define LBR_SELECT_JMP_NEAR_REL		7	/* Near relative jumps (excl. calls) */
22 #define LBR_SELECT_FAR_BRANCH		8	/* Far branches */
23 
24 #define LBR_KERNEL	BIT(LBR_SELECT_KERNEL)
25 #define LBR_USER	BIT(LBR_SELECT_USER)
26 #define LBR_JCC		BIT(LBR_SELECT_JCC)
27 #define LBR_REL_CALL	BIT(LBR_SELECT_CALL_NEAR_REL)
28 #define LBR_IND_CALL	BIT(LBR_SELECT_CALL_NEAR_IND)
29 #define LBR_RETURN	BIT(LBR_SELECT_RET_NEAR)
30 #define LBR_REL_JMP	BIT(LBR_SELECT_JMP_NEAR_REL)
31 #define LBR_IND_JMP	BIT(LBR_SELECT_JMP_NEAR_IND)
32 #define LBR_FAR		BIT(LBR_SELECT_FAR_BRANCH)
33 #define LBR_NOT_SUPP	-1	/* unsupported filter */
34 #define LBR_IGNORE	0
35 
36 #define LBR_ANY		\
37 	(LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN |	\
38 	 LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
39 
40 struct branch_entry {
41 	union {
42 		struct {
43 			u64	ip:58;
44 			u64	ip_sign_ext:5;
45 			u64	mispredict:1;
46 		} split;
47 		u64		full;
48 	} from;
49 
50 	union {
51 		struct {
52 			u64	ip:58;
53 			u64	ip_sign_ext:3;
54 			u64	reserved:1;
55 			u64	spec:1;
56 			u64	valid:1;
57 		} split;
58 		u64		full;
59 	} to;
60 };
61 
62 static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
63 {
64 	wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
65 }
66 
67 static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
68 {
69 	wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
70 }
71 
72 static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
73 {
74 	u64 val;
75 
76 	rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
77 
78 	return val;
79 }
80 
81 static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
82 {
83 	u64 val;
84 
85 	rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
86 
87 	return val;
88 }
89 
90 static __always_inline u64 sign_ext_branch_ip(u64 ip)
91 {
92 	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
93 
94 	return (u64)(((s64)ip << shift) >> shift);
95 }
96 
97 static void amd_pmu_lbr_filter(void)
98 {
99 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
100 	int br_sel = cpuc->br_sel, offset, type, i, j;
101 	bool compress = false;
102 	bool fused_only = false;
103 	u64 from, to;
104 
105 	/* If sampling all branches, there is nothing to filter */
106 	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
107 	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
108 		fused_only = true;
109 
110 	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
111 		from = cpuc->lbr_entries[i].from;
112 		to = cpuc->lbr_entries[i].to;
113 		type = branch_type_fused(from, to, 0, &offset);
114 
115 		/*
116 		 * Adjust the branch from address in case of instruction
117 		 * fusion where it points to an instruction preceding the
118 		 * actual branch
119 		 */
120 		if (offset) {
121 			cpuc->lbr_entries[i].from += offset;
122 			if (fused_only)
123 				continue;
124 		}
125 
126 		/* If type does not correspond, then discard */
127 		if (type == X86_BR_NONE || (br_sel & type) != type) {
128 			cpuc->lbr_entries[i].from = 0;	/* mark invalid */
129 			compress = true;
130 		}
131 
132 		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
133 			cpuc->lbr_entries[i].type = common_branch_type(type);
134 	}
135 
136 	if (!compress)
137 		return;
138 
139 	/* Remove all invalid entries */
140 	for (i = 0; i < cpuc->lbr_stack.nr; ) {
141 		if (!cpuc->lbr_entries[i].from) {
142 			j = i;
143 			while (++j < cpuc->lbr_stack.nr)
144 				cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
145 			cpuc->lbr_stack.nr--;
146 			if (!cpuc->lbr_entries[i].from)
147 				continue;
148 		}
149 		i++;
150 	}
151 }
152 
153 static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
154 	PERF_BR_SPEC_NA,
155 	PERF_BR_SPEC_WRONG_PATH,
156 	PERF_BR_NON_SPEC_CORRECT_PATH,
157 	PERF_BR_SPEC_CORRECT_PATH,
158 };
159 
160 void amd_pmu_lbr_read(void)
161 {
162 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
163 	struct perf_branch_entry *br = cpuc->lbr_entries;
164 	struct branch_entry entry;
165 	int out = 0, idx, i;
166 
167 	if (!cpuc->lbr_users)
168 		return;
169 
170 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
171 		entry.from.full	= amd_pmu_lbr_get_from(i);
172 		entry.to.full	= amd_pmu_lbr_get_to(i);
173 
174 		/*
175 		 * Check if a branch has been logged; if valid = 0, spec = 0
176 		 * then no branch was recorded; if reserved = 1 then an
177 		 * erroneous branch was recorded (see Erratum 1452)
178 		 */
179 		if ((!entry.to.split.valid && !entry.to.split.spec) ||
180 		    entry.to.split.reserved)
181 			continue;
182 
183 		perf_clear_branch_entry_bitfields(br + out);
184 
185 		br[out].from	= sign_ext_branch_ip(entry.from.split.ip);
186 		br[out].to	= sign_ext_branch_ip(entry.to.split.ip);
187 		br[out].mispred	= entry.from.split.mispredict;
188 		br[out].predicted = !br[out].mispred;
189 
190 		/*
191 		 * Set branch speculation information using the status of
192 		 * the valid and spec bits.
193 		 *
194 		 * When valid = 0, spec = 0, no branch was recorded and the
195 		 * entry is discarded as seen above.
196 		 *
197 		 * When valid = 0, spec = 1, the recorded branch was
198 		 * speculative but took the wrong path.
199 		 *
200 		 * When valid = 1, spec = 0, the recorded branch was
201 		 * non-speculative but took the correct path.
202 		 *
203 		 * When valid = 1, spec = 1, the recorded branch was
204 		 * speculative and took the correct path
205 		 */
206 		idx = (entry.to.split.valid << 1) | entry.to.split.spec;
207 		br[out].spec = lbr_spec_map[idx];
208 		out++;
209 	}
210 
211 	cpuc->lbr_stack.nr = out;
212 
213 	/*
214 	 * Internal register renaming always ensures that LBR From[0] and
215 	 * LBR To[0] always represent the TOS
216 	 */
217 	cpuc->lbr_stack.hw_idx = 0;
218 
219 	/* Perform further software filtering */
220 	amd_pmu_lbr_filter();
221 }
222 
223 static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
224 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
225 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
226 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGNORE,
227 
228 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
229 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
230 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
231 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
232 	[PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT]	= LBR_NOT_SUPP,
233 	[PERF_SAMPLE_BRANCH_IN_TX_SHIFT]	= LBR_NOT_SUPP,
234 	[PERF_SAMPLE_BRANCH_NO_TX_SHIFT]	= LBR_NOT_SUPP,
235 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
236 
237 	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_NOT_SUPP,
238 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
239 	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
240 
241 	[PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT]	= LBR_NOT_SUPP,
242 	[PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT]	= LBR_NOT_SUPP,
243 };
244 
245 static int amd_pmu_lbr_setup_filter(struct perf_event *event)
246 {
247 	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
248 	u64 br_type = event->attr.branch_sample_type;
249 	u64 mask = 0, v;
250 	int i;
251 
252 	/* No LBR support */
253 	if (!x86_pmu.lbr_nr)
254 		return -EOPNOTSUPP;
255 
256 	if (br_type & PERF_SAMPLE_BRANCH_USER)
257 		mask |= X86_BR_USER;
258 
259 	if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
260 		mask |= X86_BR_KERNEL;
261 
262 	/* Ignore BRANCH_HV here */
263 
264 	if (br_type & PERF_SAMPLE_BRANCH_ANY)
265 		mask |= X86_BR_ANY;
266 
267 	if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
268 		mask |= X86_BR_ANY_CALL;
269 
270 	if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
271 		mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
272 
273 	if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
274 		mask |= X86_BR_IND_CALL;
275 
276 	if (br_type & PERF_SAMPLE_BRANCH_COND)
277 		mask |= X86_BR_JCC;
278 
279 	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
280 		mask |= X86_BR_IND_JMP;
281 
282 	if (br_type & PERF_SAMPLE_BRANCH_CALL)
283 		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
284 
285 	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
286 		mask |= X86_BR_TYPE_SAVE;
287 
288 	reg->reg = mask;
289 	mask = 0;
290 
291 	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
292 		if (!(br_type & BIT_ULL(i)))
293 			continue;
294 
295 		v = lbr_select_map[i];
296 		if (v == LBR_NOT_SUPP)
297 			return -EOPNOTSUPP;
298 
299 		if (v != LBR_IGNORE)
300 			mask |= v;
301 	}
302 
303 	/* Filter bits operate in suppress mode */
304 	reg->config = mask ^ LBR_SELECT_MASK;
305 
306 	return 0;
307 }
308 
309 int amd_pmu_lbr_hw_config(struct perf_event *event)
310 {
311 	int ret = 0;
312 
313 	/* LBR is not recommended in counting mode */
314 	if (!is_sampling_event(event))
315 		return -EINVAL;
316 
317 	ret = amd_pmu_lbr_setup_filter(event);
318 	if (!ret)
319 		event->attach_state |= PERF_ATTACH_SCHED_CB;
320 
321 	return ret;
322 }
323 
324 void amd_pmu_lbr_reset(void)
325 {
326 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
327 	int i;
328 
329 	if (!x86_pmu.lbr_nr)
330 		return;
331 
332 	/* Reset all branch records individually */
333 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
334 		amd_pmu_lbr_set_from(i, 0);
335 		amd_pmu_lbr_set_to(i, 0);
336 	}
337 
338 	cpuc->last_task_ctx = NULL;
339 	cpuc->last_log_id = 0;
340 	wrmsrl(MSR_AMD64_LBR_SELECT, 0);
341 }
342 
343 void amd_pmu_lbr_add(struct perf_event *event)
344 {
345 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
346 	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
347 
348 	if (!x86_pmu.lbr_nr)
349 		return;
350 
351 	if (has_branch_stack(event)) {
352 		cpuc->lbr_select = 1;
353 		cpuc->lbr_sel->config = reg->config;
354 		cpuc->br_sel = reg->reg;
355 	}
356 
357 	perf_sched_cb_inc(event->pmu);
358 
359 	if (!cpuc->lbr_users++ && !event->total_time_running)
360 		amd_pmu_lbr_reset();
361 }
362 
363 void amd_pmu_lbr_del(struct perf_event *event)
364 {
365 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
366 
367 	if (!x86_pmu.lbr_nr)
368 		return;
369 
370 	if (has_branch_stack(event))
371 		cpuc->lbr_select = 0;
372 
373 	cpuc->lbr_users--;
374 	WARN_ON_ONCE(cpuc->lbr_users < 0);
375 	perf_sched_cb_dec(event->pmu);
376 }
377 
378 void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
379 {
380 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
381 
382 	/*
383 	 * A context switch can flip the address space and LBR entries are
384 	 * not tagged with an identifier. Hence, branches cannot be resolved
385 	 * from the old address space and the LBR records should be wiped.
386 	 */
387 	if (cpuc->lbr_users && sched_in)
388 		amd_pmu_lbr_reset();
389 }
390 
391 void amd_pmu_lbr_enable_all(void)
392 {
393 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
394 	u64 lbr_select, dbg_ctl, dbg_extn_cfg;
395 
396 	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
397 		return;
398 
399 	/* Set hardware branch filter */
400 	if (cpuc->lbr_select) {
401 		lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
402 		wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
403 	}
404 
405 	if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
406 		rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
407 		wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
408 	}
409 
410 	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
411 	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
412 }
413 
414 void amd_pmu_lbr_disable_all(void)
415 {
416 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
417 	u64 dbg_ctl, dbg_extn_cfg;
418 
419 	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
420 		return;
421 
422 	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
423 	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
424 
425 	if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
426 		rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
427 		wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
428 	}
429 }
430 
431 __init int amd_pmu_lbr_init(void)
432 {
433 	union cpuid_0x80000022_ebx ebx;
434 
435 	if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
436 		return -EOPNOTSUPP;
437 
438 	/* Set number of entries */
439 	ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
440 	x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
441 
442 	pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
443 
444 	return 0;
445 }
446