xref: /linux/arch/x86/events/amd/lbr.c (revision f9bff0e31881d03badf191d3b0005839391f5f2b)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/perf_event.h>
3 #include <asm/perf_event.h>
4 
5 #include "../perf_event.h"
6 
7 /* LBR Branch Select valid bits */
8 #define LBR_SELECT_MASK		0x1ff
9 
10 /*
11  * LBR Branch Select filter bits which when set, ensures that the
12  * corresponding type of branches are not recorded
13  */
14 #define LBR_SELECT_KERNEL		0	/* Branches ending in CPL = 0 */
15 #define LBR_SELECT_USER			1	/* Branches ending in CPL > 0 */
16 #define LBR_SELECT_JCC			2	/* Conditional branches */
17 #define LBR_SELECT_CALL_NEAR_REL	3	/* Near relative calls */
18 #define LBR_SELECT_CALL_NEAR_IND	4	/* Indirect relative calls */
19 #define LBR_SELECT_RET_NEAR		5	/* Near returns */
20 #define LBR_SELECT_JMP_NEAR_IND		6	/* Near indirect jumps (excl. calls and returns) */
21 #define LBR_SELECT_JMP_NEAR_REL		7	/* Near relative jumps (excl. calls) */
22 #define LBR_SELECT_FAR_BRANCH		8	/* Far branches */
23 
24 #define LBR_KERNEL	BIT(LBR_SELECT_KERNEL)
25 #define LBR_USER	BIT(LBR_SELECT_USER)
26 #define LBR_JCC		BIT(LBR_SELECT_JCC)
27 #define LBR_REL_CALL	BIT(LBR_SELECT_CALL_NEAR_REL)
28 #define LBR_IND_CALL	BIT(LBR_SELECT_CALL_NEAR_IND)
29 #define LBR_RETURN	BIT(LBR_SELECT_RET_NEAR)
30 #define LBR_REL_JMP	BIT(LBR_SELECT_JMP_NEAR_REL)
31 #define LBR_IND_JMP	BIT(LBR_SELECT_JMP_NEAR_IND)
32 #define LBR_FAR		BIT(LBR_SELECT_FAR_BRANCH)
33 #define LBR_NOT_SUPP	-1	/* unsupported filter */
34 #define LBR_IGNORE	0
35 
36 #define LBR_ANY		\
37 	(LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN |	\
38 	 LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
39 
40 struct branch_entry {
41 	union {
42 		struct {
43 			u64	ip:58;
44 			u64	ip_sign_ext:5;
45 			u64	mispredict:1;
46 		} split;
47 		u64		full;
48 	} from;
49 
50 	union {
51 		struct {
52 			u64	ip:58;
53 			u64	ip_sign_ext:3;
54 			u64	reserved:1;
55 			u64	spec:1;
56 			u64	valid:1;
57 		} split;
58 		u64		full;
59 	} to;
60 };
61 
62 static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
63 {
64 	wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
65 }
66 
67 static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
68 {
69 	wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
70 }
71 
72 static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
73 {
74 	u64 val;
75 
76 	rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
77 
78 	return val;
79 }
80 
81 static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
82 {
83 	u64 val;
84 
85 	rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
86 
87 	return val;
88 }
89 
90 static __always_inline u64 sign_ext_branch_ip(u64 ip)
91 {
92 	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
93 
94 	return (u64)(((s64)ip << shift) >> shift);
95 }
96 
97 static void amd_pmu_lbr_filter(void)
98 {
99 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
100 	int br_sel = cpuc->br_sel, offset, type, i, j;
101 	bool compress = false;
102 	bool fused_only = false;
103 	u64 from, to;
104 
105 	/* If sampling all branches, there is nothing to filter */
106 	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
107 	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
108 		fused_only = true;
109 
110 	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
111 		from = cpuc->lbr_entries[i].from;
112 		to = cpuc->lbr_entries[i].to;
113 		type = branch_type_fused(from, to, 0, &offset);
114 
115 		/*
116 		 * Adjust the branch from address in case of instruction
117 		 * fusion where it points to an instruction preceding the
118 		 * actual branch
119 		 */
120 		if (offset) {
121 			cpuc->lbr_entries[i].from += offset;
122 			if (fused_only)
123 				continue;
124 		}
125 
126 		/* If type does not correspond, then discard */
127 		if (type == X86_BR_NONE || (br_sel & type) != type) {
128 			cpuc->lbr_entries[i].from = 0;	/* mark invalid */
129 			compress = true;
130 		}
131 
132 		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
133 			cpuc->lbr_entries[i].type = common_branch_type(type);
134 	}
135 
136 	if (!compress)
137 		return;
138 
139 	/* Remove all invalid entries */
140 	for (i = 0; i < cpuc->lbr_stack.nr; ) {
141 		if (!cpuc->lbr_entries[i].from) {
142 			j = i;
143 			while (++j < cpuc->lbr_stack.nr)
144 				cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
145 			cpuc->lbr_stack.nr--;
146 			if (!cpuc->lbr_entries[i].from)
147 				continue;
148 		}
149 		i++;
150 	}
151 }
152 
153 static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
154 	PERF_BR_SPEC_NA,
155 	PERF_BR_SPEC_WRONG_PATH,
156 	PERF_BR_NON_SPEC_CORRECT_PATH,
157 	PERF_BR_SPEC_CORRECT_PATH,
158 };
159 
160 void amd_pmu_lbr_read(void)
161 {
162 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
163 	struct perf_branch_entry *br = cpuc->lbr_entries;
164 	struct branch_entry entry;
165 	int out = 0, idx, i;
166 
167 	if (!cpuc->lbr_users)
168 		return;
169 
170 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
171 		entry.from.full	= amd_pmu_lbr_get_from(i);
172 		entry.to.full	= amd_pmu_lbr_get_to(i);
173 
174 		/*
175 		 * Check if a branch has been logged; if valid = 0, spec = 0
176 		 * then no branch was recorded
177 		 */
178 		if (!entry.to.split.valid && !entry.to.split.spec)
179 			continue;
180 
181 		perf_clear_branch_entry_bitfields(br + out);
182 
183 		br[out].from	= sign_ext_branch_ip(entry.from.split.ip);
184 		br[out].to	= sign_ext_branch_ip(entry.to.split.ip);
185 		br[out].mispred	= entry.from.split.mispredict;
186 		br[out].predicted = !br[out].mispred;
187 
188 		/*
189 		 * Set branch speculation information using the status of
190 		 * the valid and spec bits.
191 		 *
192 		 * When valid = 0, spec = 0, no branch was recorded and the
193 		 * entry is discarded as seen above.
194 		 *
195 		 * When valid = 0, spec = 1, the recorded branch was
196 		 * speculative but took the wrong path.
197 		 *
198 		 * When valid = 1, spec = 0, the recorded branch was
199 		 * non-speculative but took the correct path.
200 		 *
201 		 * When valid = 1, spec = 1, the recorded branch was
202 		 * speculative and took the correct path
203 		 */
204 		idx = (entry.to.split.valid << 1) | entry.to.split.spec;
205 		br[out].spec = lbr_spec_map[idx];
206 		out++;
207 	}
208 
209 	cpuc->lbr_stack.nr = out;
210 
211 	/*
212 	 * Internal register renaming always ensures that LBR From[0] and
213 	 * LBR To[0] always represent the TOS
214 	 */
215 	cpuc->lbr_stack.hw_idx = 0;
216 
217 	/* Perform further software filtering */
218 	amd_pmu_lbr_filter();
219 }
220 
221 static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
222 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
223 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
224 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGNORE,
225 
226 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
227 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
228 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
229 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
230 	[PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT]	= LBR_NOT_SUPP,
231 	[PERF_SAMPLE_BRANCH_IN_TX_SHIFT]	= LBR_NOT_SUPP,
232 	[PERF_SAMPLE_BRANCH_NO_TX_SHIFT]	= LBR_NOT_SUPP,
233 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
234 
235 	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_NOT_SUPP,
236 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
237 	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
238 
239 	[PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT]	= LBR_NOT_SUPP,
240 	[PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT]	= LBR_NOT_SUPP,
241 };
242 
243 static int amd_pmu_lbr_setup_filter(struct perf_event *event)
244 {
245 	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
246 	u64 br_type = event->attr.branch_sample_type;
247 	u64 mask = 0, v;
248 	int i;
249 
250 	/* No LBR support */
251 	if (!x86_pmu.lbr_nr)
252 		return -EOPNOTSUPP;
253 
254 	if (br_type & PERF_SAMPLE_BRANCH_USER)
255 		mask |= X86_BR_USER;
256 
257 	if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
258 		mask |= X86_BR_KERNEL;
259 
260 	/* Ignore BRANCH_HV here */
261 
262 	if (br_type & PERF_SAMPLE_BRANCH_ANY)
263 		mask |= X86_BR_ANY;
264 
265 	if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
266 		mask |= X86_BR_ANY_CALL;
267 
268 	if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
269 		mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
270 
271 	if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
272 		mask |= X86_BR_IND_CALL;
273 
274 	if (br_type & PERF_SAMPLE_BRANCH_COND)
275 		mask |= X86_BR_JCC;
276 
277 	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
278 		mask |= X86_BR_IND_JMP;
279 
280 	if (br_type & PERF_SAMPLE_BRANCH_CALL)
281 		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
282 
283 	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
284 		mask |= X86_BR_TYPE_SAVE;
285 
286 	reg->reg = mask;
287 	mask = 0;
288 
289 	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
290 		if (!(br_type & BIT_ULL(i)))
291 			continue;
292 
293 		v = lbr_select_map[i];
294 		if (v == LBR_NOT_SUPP)
295 			return -EOPNOTSUPP;
296 
297 		if (v != LBR_IGNORE)
298 			mask |= v;
299 	}
300 
301 	/* Filter bits operate in suppress mode */
302 	reg->config = mask ^ LBR_SELECT_MASK;
303 
304 	return 0;
305 }
306 
307 int amd_pmu_lbr_hw_config(struct perf_event *event)
308 {
309 	int ret = 0;
310 
311 	/* LBR is not recommended in counting mode */
312 	if (!is_sampling_event(event))
313 		return -EINVAL;
314 
315 	ret = amd_pmu_lbr_setup_filter(event);
316 	if (!ret)
317 		event->attach_state |= PERF_ATTACH_SCHED_CB;
318 
319 	return ret;
320 }
321 
322 void amd_pmu_lbr_reset(void)
323 {
324 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
325 	int i;
326 
327 	if (!x86_pmu.lbr_nr)
328 		return;
329 
330 	/* Reset all branch records individually */
331 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
332 		amd_pmu_lbr_set_from(i, 0);
333 		amd_pmu_lbr_set_to(i, 0);
334 	}
335 
336 	cpuc->last_task_ctx = NULL;
337 	cpuc->last_log_id = 0;
338 	wrmsrl(MSR_AMD64_LBR_SELECT, 0);
339 }
340 
341 void amd_pmu_lbr_add(struct perf_event *event)
342 {
343 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
344 	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
345 
346 	if (!x86_pmu.lbr_nr)
347 		return;
348 
349 	if (has_branch_stack(event)) {
350 		cpuc->lbr_select = 1;
351 		cpuc->lbr_sel->config = reg->config;
352 		cpuc->br_sel = reg->reg;
353 	}
354 
355 	perf_sched_cb_inc(event->pmu);
356 
357 	if (!cpuc->lbr_users++ && !event->total_time_running)
358 		amd_pmu_lbr_reset();
359 }
360 
361 void amd_pmu_lbr_del(struct perf_event *event)
362 {
363 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
364 
365 	if (!x86_pmu.lbr_nr)
366 		return;
367 
368 	if (has_branch_stack(event))
369 		cpuc->lbr_select = 0;
370 
371 	cpuc->lbr_users--;
372 	WARN_ON_ONCE(cpuc->lbr_users < 0);
373 	perf_sched_cb_dec(event->pmu);
374 }
375 
376 void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
377 {
378 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
379 
380 	/*
381 	 * A context switch can flip the address space and LBR entries are
382 	 * not tagged with an identifier. Hence, branches cannot be resolved
383 	 * from the old address space and the LBR records should be wiped.
384 	 */
385 	if (cpuc->lbr_users && sched_in)
386 		amd_pmu_lbr_reset();
387 }
388 
389 void amd_pmu_lbr_enable_all(void)
390 {
391 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
392 	u64 lbr_select, dbg_ctl, dbg_extn_cfg;
393 
394 	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
395 		return;
396 
397 	/* Set hardware branch filter */
398 	if (cpuc->lbr_select) {
399 		lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
400 		wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
401 	}
402 
403 	rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
404 	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
405 
406 	wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
407 	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
408 }
409 
410 void amd_pmu_lbr_disable_all(void)
411 {
412 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
413 	u64 dbg_ctl, dbg_extn_cfg;
414 
415 	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
416 		return;
417 
418 	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
419 	rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
420 
421 	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
422 	wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
423 }
424 
425 __init int amd_pmu_lbr_init(void)
426 {
427 	union cpuid_0x80000022_ebx ebx;
428 
429 	if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
430 		return -EOPNOTSUPP;
431 
432 	/* Set number of entries */
433 	ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
434 	x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
435 
436 	pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
437 
438 	return 0;
439 }
440