xref: /linux/arch/x86/events/intel/lbr.c (revision f42be8651a7a9d5cb165e5d176fc0b09621b4f4d)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/perf_event.h>
3 #include <linux/types.h>
4 
5 #include <asm/perf_event.h>
6 #include <asm/msr.h>
7 #include <asm/insn.h>
8 
9 #include "../perf_event.h"
10 
11 static const enum {
12 	LBR_EIP_FLAGS		= 1,
13 	LBR_TSX			= 2,
14 } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
15 	[LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
16 	[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
17 };
18 
19 /*
20  * Intel LBR_SELECT bits
21  * Intel Vol3a, April 2011, Section 16.7 Table 16-10
22  *
23  * Hardware branch filter (not available on all CPUs)
24  */
25 #define LBR_KERNEL_BIT		0 /* do not capture at ring0 */
26 #define LBR_USER_BIT		1 /* do not capture at ring > 0 */
27 #define LBR_JCC_BIT		2 /* do not capture conditional branches */
28 #define LBR_REL_CALL_BIT	3 /* do not capture relative calls */
29 #define LBR_IND_CALL_BIT	4 /* do not capture indirect calls */
30 #define LBR_RETURN_BIT		5 /* do not capture near returns */
31 #define LBR_IND_JMP_BIT		6 /* do not capture indirect jumps */
32 #define LBR_REL_JMP_BIT		7 /* do not capture relative jumps */
33 #define LBR_FAR_BIT		8 /* do not capture far branches */
34 #define LBR_CALL_STACK_BIT	9 /* enable call stack */
35 
36 /*
37  * Following bit only exists in Linux; we mask it out before writing it to
38  * the actual MSR. But it helps the constraint perf code to understand
39  * that this is a separate configuration.
40  */
41 #define LBR_NO_INFO_BIT	       63 /* don't read LBR_INFO. */
42 
43 #define LBR_KERNEL	(1 << LBR_KERNEL_BIT)
44 #define LBR_USER	(1 << LBR_USER_BIT)
45 #define LBR_JCC		(1 << LBR_JCC_BIT)
46 #define LBR_REL_CALL	(1 << LBR_REL_CALL_BIT)
47 #define LBR_IND_CALL	(1 << LBR_IND_CALL_BIT)
48 #define LBR_RETURN	(1 << LBR_RETURN_BIT)
49 #define LBR_REL_JMP	(1 << LBR_REL_JMP_BIT)
50 #define LBR_IND_JMP	(1 << LBR_IND_JMP_BIT)
51 #define LBR_FAR		(1 << LBR_FAR_BIT)
52 #define LBR_CALL_STACK	(1 << LBR_CALL_STACK_BIT)
53 #define LBR_NO_INFO	(1ULL << LBR_NO_INFO_BIT)
54 
55 #define LBR_PLM (LBR_KERNEL | LBR_USER)
56 
57 #define LBR_SEL_MASK	0x3ff	/* valid bits in LBR_SELECT */
58 #define LBR_NOT_SUPP	-1	/* LBR filter not supported */
59 #define LBR_IGN		0	/* ignored */
60 
61 #define LBR_ANY		 \
62 	(LBR_JCC	|\
63 	 LBR_REL_CALL	|\
64 	 LBR_IND_CALL	|\
65 	 LBR_RETURN	|\
66 	 LBR_REL_JMP	|\
67 	 LBR_IND_JMP	|\
68 	 LBR_FAR)
69 
70 #define LBR_FROM_FLAG_MISPRED	BIT_ULL(63)
71 #define LBR_FROM_FLAG_IN_TX	BIT_ULL(62)
72 #define LBR_FROM_FLAG_ABORT	BIT_ULL(61)
73 
74 #define LBR_FROM_SIGNEXT_2MSB	(BIT_ULL(60) | BIT_ULL(59))
75 
76 /*
77  * x86control flow change classification
78  * x86control flow changes include branches, interrupts, traps, faults
79  */
80 enum {
81 	X86_BR_NONE		= 0,      /* unknown */
82 
83 	X86_BR_USER		= 1 << 0, /* branch target is user */
84 	X86_BR_KERNEL		= 1 << 1, /* branch target is kernel */
85 
86 	X86_BR_CALL		= 1 << 2, /* call */
87 	X86_BR_RET		= 1 << 3, /* return */
88 	X86_BR_SYSCALL		= 1 << 4, /* syscall */
89 	X86_BR_SYSRET		= 1 << 5, /* syscall return */
90 	X86_BR_INT		= 1 << 6, /* sw interrupt */
91 	X86_BR_IRET		= 1 << 7, /* return from interrupt */
92 	X86_BR_JCC		= 1 << 8, /* conditional */
93 	X86_BR_JMP		= 1 << 9, /* jump */
94 	X86_BR_IRQ		= 1 << 10,/* hw interrupt or trap or fault */
95 	X86_BR_IND_CALL		= 1 << 11,/* indirect calls */
96 	X86_BR_ABORT		= 1 << 12,/* transaction abort */
97 	X86_BR_IN_TX		= 1 << 13,/* in transaction */
98 	X86_BR_NO_TX		= 1 << 14,/* not in transaction */
99 	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
100 	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
101 	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
102 
103 	X86_BR_TYPE_SAVE	= 1 << 18,/* indicate to save branch type */
104 
105 };
106 
107 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
108 #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
109 
110 #define X86_BR_ANY       \
111 	(X86_BR_CALL    |\
112 	 X86_BR_RET     |\
113 	 X86_BR_SYSCALL |\
114 	 X86_BR_SYSRET  |\
115 	 X86_BR_INT     |\
116 	 X86_BR_IRET    |\
117 	 X86_BR_JCC     |\
118 	 X86_BR_JMP	 |\
119 	 X86_BR_IRQ	 |\
120 	 X86_BR_ABORT	 |\
121 	 X86_BR_IND_CALL |\
122 	 X86_BR_IND_JMP  |\
123 	 X86_BR_ZERO_CALL)
124 
125 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
126 
127 #define X86_BR_ANY_CALL		 \
128 	(X86_BR_CALL		|\
129 	 X86_BR_IND_CALL	|\
130 	 X86_BR_ZERO_CALL	|\
131 	 X86_BR_SYSCALL		|\
132 	 X86_BR_IRQ		|\
133 	 X86_BR_INT)
134 
135 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
136 
137 /*
138  * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
139  * otherwise it becomes near impossible to get a reliable stack.
140  */
141 
142 static void __intel_pmu_lbr_enable(bool pmi)
143 {
144 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
145 	u64 debugctl, lbr_select = 0, orig_debugctl;
146 
147 	/*
148 	 * No need to unfreeze manually, as v4 can do that as part
149 	 * of the GLOBAL_STATUS ack.
150 	 */
151 	if (pmi && x86_pmu.version >= 4)
152 		return;
153 
154 	/*
155 	 * No need to reprogram LBR_SELECT in a PMI, as it
156 	 * did not change.
157 	 */
158 	if (cpuc->lbr_sel)
159 		lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
160 	if (!pmi && cpuc->lbr_sel)
161 		wrmsrl(MSR_LBR_SELECT, lbr_select);
162 
163 	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
164 	orig_debugctl = debugctl;
165 	debugctl |= DEBUGCTLMSR_LBR;
166 	/*
167 	 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
168 	 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
169 	 * may cause superfluous increase/decrease of LBR_TOS.
170 	 */
171 	if (!(lbr_select & LBR_CALL_STACK))
172 		debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
173 	if (orig_debugctl != debugctl)
174 		wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
175 }
176 
177 static void __intel_pmu_lbr_disable(void)
178 {
179 	u64 debugctl;
180 
181 	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
182 	debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
183 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
184 }
185 
186 void intel_pmu_lbr_reset_32(void)
187 {
188 	int i;
189 
190 	for (i = 0; i < x86_pmu.lbr_nr; i++)
191 		wrmsrl(x86_pmu.lbr_from + i, 0);
192 }
193 
194 void intel_pmu_lbr_reset_64(void)
195 {
196 	int i;
197 
198 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
199 		wrmsrl(x86_pmu.lbr_from + i, 0);
200 		wrmsrl(x86_pmu.lbr_to   + i, 0);
201 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
202 			wrmsrl(MSR_LBR_INFO_0 + i, 0);
203 	}
204 }
205 
206 void intel_pmu_lbr_reset(void)
207 {
208 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
209 
210 	if (!x86_pmu.lbr_nr)
211 		return;
212 
213 	x86_pmu.lbr_reset();
214 
215 	cpuc->last_task_ctx = NULL;
216 	cpuc->last_log_id = 0;
217 }
218 
219 /*
220  * TOS = most recently recorded branch
221  */
222 static inline u64 intel_pmu_lbr_tos(void)
223 {
224 	u64 tos;
225 
226 	rdmsrl(x86_pmu.lbr_tos, tos);
227 	return tos;
228 }
229 
230 enum {
231 	LBR_NONE,
232 	LBR_VALID,
233 };
234 
235 /*
236  * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
237  * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
238  * TSX is not supported they have no consistent behavior:
239  *
240  *   - For wrmsr(), bits 61:62 are considered part of the sign extension.
241  *   - For HW updates (branch captures) bits 61:62 are always OFF and are not
242  *     part of the sign extension.
243  *
244  * Therefore, if:
245  *
246  *   1) LBR has TSX format
247  *   2) CPU has no TSX support enabled
248  *
249  * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
250  * value from rdmsr() must be converted to have a 61 bits sign extension,
251  * ignoring the TSX flags.
252  */
253 static inline bool lbr_from_signext_quirk_needed(void)
254 {
255 	int lbr_format = x86_pmu.intel_cap.lbr_format;
256 	bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
257 			   boot_cpu_has(X86_FEATURE_RTM);
258 
259 	return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
260 }
261 
262 static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
263 
264 /* If quirk is enabled, ensure sign extension is 63 bits: */
265 inline u64 lbr_from_signext_quirk_wr(u64 val)
266 {
267 	if (static_branch_unlikely(&lbr_from_quirk_key)) {
268 		/*
269 		 * Sign extend into bits 61:62 while preserving bit 63.
270 		 *
271 		 * Quirk is enabled when TSX is disabled. Therefore TSX bits
272 		 * in val are always OFF and must be changed to be sign
273 		 * extension bits. Since bits 59:60 are guaranteed to be
274 		 * part of the sign extension bits, we can just copy them
275 		 * to 61:62.
276 		 */
277 		val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
278 	}
279 	return val;
280 }
281 
282 /*
283  * If quirk is needed, ensure sign extension is 61 bits:
284  */
285 static u64 lbr_from_signext_quirk_rd(u64 val)
286 {
287 	if (static_branch_unlikely(&lbr_from_quirk_key)) {
288 		/*
289 		 * Quirk is on when TSX is not enabled. Therefore TSX
290 		 * flags must be read as OFF.
291 		 */
292 		val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
293 	}
294 	return val;
295 }
296 
297 static inline void wrlbr_from(unsigned int idx, u64 val)
298 {
299 	val = lbr_from_signext_quirk_wr(val);
300 	wrmsrl(x86_pmu.lbr_from + idx, val);
301 }
302 
303 static inline void wrlbr_to(unsigned int idx, u64 val)
304 {
305 	wrmsrl(x86_pmu.lbr_to + idx, val);
306 }
307 
308 static inline u64 rdlbr_from(unsigned int idx)
309 {
310 	u64 val;
311 
312 	rdmsrl(x86_pmu.lbr_from + idx, val);
313 
314 	return lbr_from_signext_quirk_rd(val);
315 }
316 
317 static inline u64 rdlbr_to(unsigned int idx)
318 {
319 	u64 val;
320 
321 	rdmsrl(x86_pmu.lbr_to + idx, val);
322 
323 	return val;
324 }
325 
326 void intel_pmu_lbr_restore(void *ctx)
327 {
328 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
329 	struct x86_perf_task_context *task_ctx = ctx;
330 	int i;
331 	unsigned lbr_idx, mask;
332 	u64 tos = task_ctx->tos;
333 
334 	mask = x86_pmu.lbr_nr - 1;
335 	for (i = 0; i < task_ctx->valid_lbrs; i++) {
336 		lbr_idx = (tos - i) & mask;
337 		wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
338 		wrlbr_to  (lbr_idx, task_ctx->lbr_to[i]);
339 
340 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
341 			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
342 	}
343 
344 	for (; i < x86_pmu.lbr_nr; i++) {
345 		lbr_idx = (tos - i) & mask;
346 		wrlbr_from(lbr_idx, 0);
347 		wrlbr_to(lbr_idx, 0);
348 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
349 			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0);
350 	}
351 
352 	wrmsrl(x86_pmu.lbr_tos, tos);
353 
354 	if (cpuc->lbr_select)
355 		wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
356 }
357 
358 static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
359 {
360 	return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos);
361 }
362 
363 static void __intel_pmu_lbr_restore(void *ctx)
364 {
365 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
366 
367 	if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
368 	    task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
369 		intel_pmu_lbr_reset();
370 		return;
371 	}
372 
373 	/*
374 	 * Does not restore the LBR registers, if
375 	 * - No one else touched them, and
376 	 * - Was not cleared in Cstate
377 	 */
378 	if ((ctx == cpuc->last_task_ctx) &&
379 	    (task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
380 	    !lbr_is_reset_in_cstate(ctx)) {
381 		task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
382 		return;
383 	}
384 
385 	x86_pmu.lbr_restore(ctx);
386 
387 	task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
388 }
389 
390 void intel_pmu_lbr_save(void *ctx)
391 {
392 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
393 	struct x86_perf_task_context *task_ctx = ctx;
394 	unsigned lbr_idx, mask;
395 	u64 tos, from;
396 	int i;
397 
398 	mask = x86_pmu.lbr_nr - 1;
399 	tos = intel_pmu_lbr_tos();
400 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
401 		lbr_idx = (tos - i) & mask;
402 		from = rdlbr_from(lbr_idx);
403 		if (!from)
404 			break;
405 		task_ctx->lbr_from[i] = from;
406 		task_ctx->lbr_to[i]   = rdlbr_to(lbr_idx);
407 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
408 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
409 	}
410 	task_ctx->valid_lbrs = i;
411 	task_ctx->tos = tos;
412 
413 	if (cpuc->lbr_select)
414 		rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
415 }
416 
417 static void __intel_pmu_lbr_save(void *ctx)
418 {
419 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
420 
421 	if (task_context_opt(ctx)->lbr_callstack_users == 0) {
422 		task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
423 		return;
424 	}
425 
426 	x86_pmu.lbr_save(ctx);
427 
428 	task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
429 
430 	cpuc->last_task_ctx = ctx;
431 	cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
432 }
433 
434 void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
435 				 struct perf_event_context *next)
436 {
437 	void *prev_ctx_data, *next_ctx_data;
438 
439 	swap(prev->task_ctx_data, next->task_ctx_data);
440 
441 	/*
442 	 * Architecture specific synchronization makes sense in
443 	 * case both prev->task_ctx_data and next->task_ctx_data
444 	 * pointers are allocated.
445 	 */
446 
447 	prev_ctx_data = next->task_ctx_data;
448 	next_ctx_data = prev->task_ctx_data;
449 
450 	if (!prev_ctx_data || !next_ctx_data)
451 		return;
452 
453 	swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
454 	     task_context_opt(next_ctx_data)->lbr_callstack_users);
455 }
456 
457 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
458 {
459 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
460 	void *task_ctx;
461 
462 	if (!cpuc->lbr_users)
463 		return;
464 
465 	/*
466 	 * If LBR callstack feature is enabled and the stack was saved when
467 	 * the task was scheduled out, restore the stack. Otherwise flush
468 	 * the LBR stack.
469 	 */
470 	task_ctx = ctx ? ctx->task_ctx_data : NULL;
471 	if (task_ctx) {
472 		if (sched_in)
473 			__intel_pmu_lbr_restore(task_ctx);
474 		else
475 			__intel_pmu_lbr_save(task_ctx);
476 		return;
477 	}
478 
479 	/*
480 	 * Since a context switch can flip the address space and LBR entries
481 	 * are not tagged with an identifier, we need to wipe the LBR, even for
482 	 * per-cpu events. You simply cannot resolve the branches from the old
483 	 * address space.
484 	 */
485 	if (sched_in)
486 		intel_pmu_lbr_reset();
487 }
488 
489 static inline bool branch_user_callstack(unsigned br_sel)
490 {
491 	return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
492 }
493 
494 void intel_pmu_lbr_add(struct perf_event *event)
495 {
496 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
497 
498 	if (!x86_pmu.lbr_nr)
499 		return;
500 
501 	if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
502 		cpuc->lbr_select = 1;
503 
504 	cpuc->br_sel = event->hw.branch_reg.reg;
505 
506 	if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
507 		task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
508 
509 	/*
510 	 * Request pmu::sched_task() callback, which will fire inside the
511 	 * regular perf event scheduling, so that call will:
512 	 *
513 	 *  - restore or wipe; when LBR-callstack,
514 	 *  - wipe; otherwise,
515 	 *
516 	 * when this is from __perf_event_task_sched_in().
517 	 *
518 	 * However, if this is from perf_install_in_context(), no such callback
519 	 * will follow and we'll need to reset the LBR here if this is the
520 	 * first LBR event.
521 	 *
522 	 * The problem is, we cannot tell these cases apart... but we can
523 	 * exclude the biggest chunk of cases by looking at
524 	 * event->total_time_running. An event that has accrued runtime cannot
525 	 * be 'new'. Conversely, a new event can get installed through the
526 	 * context switch path for the first time.
527 	 */
528 	if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
529 		cpuc->lbr_pebs_users++;
530 	perf_sched_cb_inc(event->ctx->pmu);
531 	if (!cpuc->lbr_users++ && !event->total_time_running)
532 		intel_pmu_lbr_reset();
533 }
534 
535 void intel_pmu_lbr_del(struct perf_event *event)
536 {
537 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
538 
539 	if (!x86_pmu.lbr_nr)
540 		return;
541 
542 	if (branch_user_callstack(cpuc->br_sel) &&
543 	    event->ctx->task_ctx_data)
544 		task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
545 
546 	if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
547 		cpuc->lbr_select = 0;
548 
549 	if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
550 		cpuc->lbr_pebs_users--;
551 	cpuc->lbr_users--;
552 	WARN_ON_ONCE(cpuc->lbr_users < 0);
553 	WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
554 	perf_sched_cb_dec(event->ctx->pmu);
555 }
556 
557 static inline bool vlbr_exclude_host(void)
558 {
559 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
560 
561 	return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
562 		(unsigned long *)&cpuc->intel_ctrl_guest_mask);
563 }
564 
565 void intel_pmu_lbr_enable_all(bool pmi)
566 {
567 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
568 
569 	if (cpuc->lbr_users && !vlbr_exclude_host())
570 		__intel_pmu_lbr_enable(pmi);
571 }
572 
573 void intel_pmu_lbr_disable_all(void)
574 {
575 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
576 
577 	if (cpuc->lbr_users && !vlbr_exclude_host())
578 		__intel_pmu_lbr_disable();
579 }
580 
581 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
582 {
583 	unsigned long mask = x86_pmu.lbr_nr - 1;
584 	u64 tos = intel_pmu_lbr_tos();
585 	int i;
586 
587 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
588 		unsigned long lbr_idx = (tos - i) & mask;
589 		union {
590 			struct {
591 				u32 from;
592 				u32 to;
593 			};
594 			u64     lbr;
595 		} msr_lastbranch;
596 
597 		rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
598 
599 		cpuc->lbr_entries[i].from	= msr_lastbranch.from;
600 		cpuc->lbr_entries[i].to		= msr_lastbranch.to;
601 		cpuc->lbr_entries[i].mispred	= 0;
602 		cpuc->lbr_entries[i].predicted	= 0;
603 		cpuc->lbr_entries[i].in_tx	= 0;
604 		cpuc->lbr_entries[i].abort	= 0;
605 		cpuc->lbr_entries[i].cycles	= 0;
606 		cpuc->lbr_entries[i].type	= 0;
607 		cpuc->lbr_entries[i].reserved	= 0;
608 	}
609 	cpuc->lbr_stack.nr = i;
610 	cpuc->lbr_stack.hw_idx = tos;
611 }
612 
613 /*
614  * Due to lack of segmentation in Linux the effective address (offset)
615  * is the same as the linear address, allowing us to merge the LIP and EIP
616  * LBR formats.
617  */
618 void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
619 {
620 	bool need_info = false, call_stack = false;
621 	unsigned long mask = x86_pmu.lbr_nr - 1;
622 	int lbr_format = x86_pmu.intel_cap.lbr_format;
623 	u64 tos = intel_pmu_lbr_tos();
624 	int i;
625 	int out = 0;
626 	int num = x86_pmu.lbr_nr;
627 
628 	if (cpuc->lbr_sel) {
629 		need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
630 		if (cpuc->lbr_sel->config & LBR_CALL_STACK)
631 			call_stack = true;
632 	}
633 
634 	for (i = 0; i < num; i++) {
635 		unsigned long lbr_idx = (tos - i) & mask;
636 		u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
637 		int skip = 0;
638 		u16 cycles = 0;
639 		int lbr_flags = lbr_desc[lbr_format];
640 
641 		from = rdlbr_from(lbr_idx);
642 		to   = rdlbr_to(lbr_idx);
643 
644 		/*
645 		 * Read LBR call stack entries
646 		 * until invalid entry (0s) is detected.
647 		 */
648 		if (call_stack && !from)
649 			break;
650 
651 		if (lbr_format == LBR_FORMAT_INFO && need_info) {
652 			u64 info;
653 
654 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
655 			mis = !!(info & LBR_INFO_MISPRED);
656 			pred = !mis;
657 			in_tx = !!(info & LBR_INFO_IN_TX);
658 			abort = !!(info & LBR_INFO_ABORT);
659 			cycles = (info & LBR_INFO_CYCLES);
660 		}
661 
662 		if (lbr_format == LBR_FORMAT_TIME) {
663 			mis = !!(from & LBR_FROM_FLAG_MISPRED);
664 			pred = !mis;
665 			skip = 1;
666 			cycles = ((to >> 48) & LBR_INFO_CYCLES);
667 
668 			to = (u64)((((s64)to) << 16) >> 16);
669 		}
670 
671 		if (lbr_flags & LBR_EIP_FLAGS) {
672 			mis = !!(from & LBR_FROM_FLAG_MISPRED);
673 			pred = !mis;
674 			skip = 1;
675 		}
676 		if (lbr_flags & LBR_TSX) {
677 			in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
678 			abort = !!(from & LBR_FROM_FLAG_ABORT);
679 			skip = 3;
680 		}
681 		from = (u64)((((s64)from) << skip) >> skip);
682 
683 		/*
684 		 * Some CPUs report duplicated abort records,
685 		 * with the second entry not having an abort bit set.
686 		 * Skip them here. This loop runs backwards,
687 		 * so we need to undo the previous record.
688 		 * If the abort just happened outside the window
689 		 * the extra entry cannot be removed.
690 		 */
691 		if (abort && x86_pmu.lbr_double_abort && out > 0)
692 			out--;
693 
694 		cpuc->lbr_entries[out].from	 = from;
695 		cpuc->lbr_entries[out].to	 = to;
696 		cpuc->lbr_entries[out].mispred	 = mis;
697 		cpuc->lbr_entries[out].predicted = pred;
698 		cpuc->lbr_entries[out].in_tx	 = in_tx;
699 		cpuc->lbr_entries[out].abort	 = abort;
700 		cpuc->lbr_entries[out].cycles	 = cycles;
701 		cpuc->lbr_entries[out].type	 = 0;
702 		cpuc->lbr_entries[out].reserved	 = 0;
703 		out++;
704 	}
705 	cpuc->lbr_stack.nr = out;
706 	cpuc->lbr_stack.hw_idx = tos;
707 }
708 
709 void intel_pmu_lbr_read(void)
710 {
711 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
712 
713 	/*
714 	 * Don't read when all LBRs users are using adaptive PEBS.
715 	 *
716 	 * This could be smarter and actually check the event,
717 	 * but this simple approach seems to work for now.
718 	 */
719 	if (!cpuc->lbr_users || vlbr_exclude_host() ||
720 	    cpuc->lbr_users == cpuc->lbr_pebs_users)
721 		return;
722 
723 	x86_pmu.lbr_read(cpuc);
724 
725 	intel_pmu_lbr_filter(cpuc);
726 }
727 
728 /*
729  * SW filter is used:
730  * - in case there is no HW filter
731  * - in case the HW filter has errata or limitations
732  */
733 static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
734 {
735 	u64 br_type = event->attr.branch_sample_type;
736 	int mask = 0;
737 
738 	if (br_type & PERF_SAMPLE_BRANCH_USER)
739 		mask |= X86_BR_USER;
740 
741 	if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
742 		mask |= X86_BR_KERNEL;
743 
744 	/* we ignore BRANCH_HV here */
745 
746 	if (br_type & PERF_SAMPLE_BRANCH_ANY)
747 		mask |= X86_BR_ANY;
748 
749 	if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
750 		mask |= X86_BR_ANY_CALL;
751 
752 	if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
753 		mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
754 
755 	if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
756 		mask |= X86_BR_IND_CALL;
757 
758 	if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
759 		mask |= X86_BR_ABORT;
760 
761 	if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
762 		mask |= X86_BR_IN_TX;
763 
764 	if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
765 		mask |= X86_BR_NO_TX;
766 
767 	if (br_type & PERF_SAMPLE_BRANCH_COND)
768 		mask |= X86_BR_JCC;
769 
770 	if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
771 		if (!x86_pmu_has_lbr_callstack())
772 			return -EOPNOTSUPP;
773 		if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
774 			return -EINVAL;
775 		mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
776 			X86_BR_CALL_STACK;
777 	}
778 
779 	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
780 		mask |= X86_BR_IND_JMP;
781 
782 	if (br_type & PERF_SAMPLE_BRANCH_CALL)
783 		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
784 
785 	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
786 		mask |= X86_BR_TYPE_SAVE;
787 
788 	/*
789 	 * stash actual user request into reg, it may
790 	 * be used by fixup code for some CPU
791 	 */
792 	event->hw.branch_reg.reg = mask;
793 	return 0;
794 }
795 
796 /*
797  * setup the HW LBR filter
798  * Used only when available, may not be enough to disambiguate
799  * all branches, may need the help of the SW filter
800  */
801 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
802 {
803 	struct hw_perf_event_extra *reg;
804 	u64 br_type = event->attr.branch_sample_type;
805 	u64 mask = 0, v;
806 	int i;
807 
808 	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
809 		if (!(br_type & (1ULL << i)))
810 			continue;
811 
812 		v = x86_pmu.lbr_sel_map[i];
813 		if (v == LBR_NOT_SUPP)
814 			return -EOPNOTSUPP;
815 
816 		if (v != LBR_IGN)
817 			mask |= v;
818 	}
819 
820 	reg = &event->hw.branch_reg;
821 	reg->idx = EXTRA_REG_LBR;
822 
823 	/*
824 	 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
825 	 * in suppress mode. So LBR_SELECT should be set to
826 	 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
827 	 * But the 10th bit LBR_CALL_STACK does not operate
828 	 * in suppress mode.
829 	 */
830 	reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
831 
832 	if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
833 	    (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
834 	    (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
835 		reg->config |= LBR_NO_INFO;
836 
837 	return 0;
838 }
839 
840 int intel_pmu_setup_lbr_filter(struct perf_event *event)
841 {
842 	int ret = 0;
843 
844 	/*
845 	 * no LBR on this PMU
846 	 */
847 	if (!x86_pmu.lbr_nr)
848 		return -EOPNOTSUPP;
849 
850 	/*
851 	 * setup SW LBR filter
852 	 */
853 	ret = intel_pmu_setup_sw_lbr_filter(event);
854 	if (ret)
855 		return ret;
856 
857 	/*
858 	 * setup HW LBR filter, if any
859 	 */
860 	if (x86_pmu.lbr_sel_map)
861 		ret = intel_pmu_setup_hw_lbr_filter(event);
862 
863 	return ret;
864 }
865 
866 /*
867  * return the type of control flow change at address "from"
868  * instruction is not necessarily a branch (in case of interrupt).
869  *
870  * The branch type returned also includes the priv level of the
871  * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
872  *
873  * If a branch type is unknown OR the instruction cannot be
874  * decoded (e.g., text page not present), then X86_BR_NONE is
875  * returned.
876  */
877 static int branch_type(unsigned long from, unsigned long to, int abort)
878 {
879 	struct insn insn;
880 	void *addr;
881 	int bytes_read, bytes_left;
882 	int ret = X86_BR_NONE;
883 	int ext, to_plm, from_plm;
884 	u8 buf[MAX_INSN_SIZE];
885 	int is64 = 0;
886 
887 	to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
888 	from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
889 
890 	/*
891 	 * maybe zero if lbr did not fill up after a reset by the time
892 	 * we get a PMU interrupt
893 	 */
894 	if (from == 0 || to == 0)
895 		return X86_BR_NONE;
896 
897 	if (abort)
898 		return X86_BR_ABORT | to_plm;
899 
900 	if (from_plm == X86_BR_USER) {
901 		/*
902 		 * can happen if measuring at the user level only
903 		 * and we interrupt in a kernel thread, e.g., idle.
904 		 */
905 		if (!current->mm)
906 			return X86_BR_NONE;
907 
908 		/* may fail if text not present */
909 		bytes_left = copy_from_user_nmi(buf, (void __user *)from,
910 						MAX_INSN_SIZE);
911 		bytes_read = MAX_INSN_SIZE - bytes_left;
912 		if (!bytes_read)
913 			return X86_BR_NONE;
914 
915 		addr = buf;
916 	} else {
917 		/*
918 		 * The LBR logs any address in the IP, even if the IP just
919 		 * faulted. This means userspace can control the from address.
920 		 * Ensure we don't blindy read any address by validating it is
921 		 * a known text address.
922 		 */
923 		if (kernel_text_address(from)) {
924 			addr = (void *)from;
925 			/*
926 			 * Assume we can get the maximum possible size
927 			 * when grabbing kernel data.  This is not
928 			 * _strictly_ true since we could possibly be
929 			 * executing up next to a memory hole, but
930 			 * it is very unlikely to be a problem.
931 			 */
932 			bytes_read = MAX_INSN_SIZE;
933 		} else {
934 			return X86_BR_NONE;
935 		}
936 	}
937 
938 	/*
939 	 * decoder needs to know the ABI especially
940 	 * on 64-bit systems running 32-bit apps
941 	 */
942 #ifdef CONFIG_X86_64
943 	is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
944 #endif
945 	insn_init(&insn, addr, bytes_read, is64);
946 	insn_get_opcode(&insn);
947 	if (!insn.opcode.got)
948 		return X86_BR_ABORT;
949 
950 	switch (insn.opcode.bytes[0]) {
951 	case 0xf:
952 		switch (insn.opcode.bytes[1]) {
953 		case 0x05: /* syscall */
954 		case 0x34: /* sysenter */
955 			ret = X86_BR_SYSCALL;
956 			break;
957 		case 0x07: /* sysret */
958 		case 0x35: /* sysexit */
959 			ret = X86_BR_SYSRET;
960 			break;
961 		case 0x80 ... 0x8f: /* conditional */
962 			ret = X86_BR_JCC;
963 			break;
964 		default:
965 			ret = X86_BR_NONE;
966 		}
967 		break;
968 	case 0x70 ... 0x7f: /* conditional */
969 		ret = X86_BR_JCC;
970 		break;
971 	case 0xc2: /* near ret */
972 	case 0xc3: /* near ret */
973 	case 0xca: /* far ret */
974 	case 0xcb: /* far ret */
975 		ret = X86_BR_RET;
976 		break;
977 	case 0xcf: /* iret */
978 		ret = X86_BR_IRET;
979 		break;
980 	case 0xcc ... 0xce: /* int */
981 		ret = X86_BR_INT;
982 		break;
983 	case 0xe8: /* call near rel */
984 		insn_get_immediate(&insn);
985 		if (insn.immediate1.value == 0) {
986 			/* zero length call */
987 			ret = X86_BR_ZERO_CALL;
988 			break;
989 		}
990 		/* fall through */
991 	case 0x9a: /* call far absolute */
992 		ret = X86_BR_CALL;
993 		break;
994 	case 0xe0 ... 0xe3: /* loop jmp */
995 		ret = X86_BR_JCC;
996 		break;
997 	case 0xe9 ... 0xeb: /* jmp */
998 		ret = X86_BR_JMP;
999 		break;
1000 	case 0xff: /* call near absolute, call far absolute ind */
1001 		insn_get_modrm(&insn);
1002 		ext = (insn.modrm.bytes[0] >> 3) & 0x7;
1003 		switch (ext) {
1004 		case 2: /* near ind call */
1005 		case 3: /* far ind call */
1006 			ret = X86_BR_IND_CALL;
1007 			break;
1008 		case 4:
1009 		case 5:
1010 			ret = X86_BR_IND_JMP;
1011 			break;
1012 		}
1013 		break;
1014 	default:
1015 		ret = X86_BR_NONE;
1016 	}
1017 	/*
1018 	 * interrupts, traps, faults (and thus ring transition) may
1019 	 * occur on any instructions. Thus, to classify them correctly,
1020 	 * we need to first look at the from and to priv levels. If they
1021 	 * are different and to is in the kernel, then it indicates
1022 	 * a ring transition. If the from instruction is not a ring
1023 	 * transition instr (syscall, systenter, int), then it means
1024 	 * it was a irq, trap or fault.
1025 	 *
1026 	 * we have no way of detecting kernel to kernel faults.
1027 	 */
1028 	if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
1029 	    && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
1030 		ret = X86_BR_IRQ;
1031 
1032 	/*
1033 	 * branch priv level determined by target as
1034 	 * is done by HW when LBR_SELECT is implemented
1035 	 */
1036 	if (ret != X86_BR_NONE)
1037 		ret |= to_plm;
1038 
1039 	return ret;
1040 }
1041 
1042 #define X86_BR_TYPE_MAP_MAX	16
1043 
1044 static int branch_map[X86_BR_TYPE_MAP_MAX] = {
1045 	PERF_BR_CALL,		/* X86_BR_CALL */
1046 	PERF_BR_RET,		/* X86_BR_RET */
1047 	PERF_BR_SYSCALL,	/* X86_BR_SYSCALL */
1048 	PERF_BR_SYSRET,		/* X86_BR_SYSRET */
1049 	PERF_BR_UNKNOWN,	/* X86_BR_INT */
1050 	PERF_BR_UNKNOWN,	/* X86_BR_IRET */
1051 	PERF_BR_COND,		/* X86_BR_JCC */
1052 	PERF_BR_UNCOND,		/* X86_BR_JMP */
1053 	PERF_BR_UNKNOWN,	/* X86_BR_IRQ */
1054 	PERF_BR_IND_CALL,	/* X86_BR_IND_CALL */
1055 	PERF_BR_UNKNOWN,	/* X86_BR_ABORT */
1056 	PERF_BR_UNKNOWN,	/* X86_BR_IN_TX */
1057 	PERF_BR_UNKNOWN,	/* X86_BR_NO_TX */
1058 	PERF_BR_CALL,		/* X86_BR_ZERO_CALL */
1059 	PERF_BR_UNKNOWN,	/* X86_BR_CALL_STACK */
1060 	PERF_BR_IND,		/* X86_BR_IND_JMP */
1061 };
1062 
1063 static int
1064 common_branch_type(int type)
1065 {
1066 	int i;
1067 
1068 	type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
1069 
1070 	if (type) {
1071 		i = __ffs(type);
1072 		if (i < X86_BR_TYPE_MAP_MAX)
1073 			return branch_map[i];
1074 	}
1075 
1076 	return PERF_BR_UNKNOWN;
1077 }
1078 
1079 /*
1080  * implement actual branch filter based on user demand.
1081  * Hardware may not exactly satisfy that request, thus
1082  * we need to inspect opcodes. Mismatched branches are
1083  * discarded. Therefore, the number of branches returned
1084  * in PERF_SAMPLE_BRANCH_STACK sample may vary.
1085  */
1086 static void
1087 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
1088 {
1089 	u64 from, to;
1090 	int br_sel = cpuc->br_sel;
1091 	int i, j, type;
1092 	bool compress = false;
1093 
1094 	/* if sampling all branches, then nothing to filter */
1095 	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
1096 	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
1097 		return;
1098 
1099 	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
1100 
1101 		from = cpuc->lbr_entries[i].from;
1102 		to = cpuc->lbr_entries[i].to;
1103 
1104 		type = branch_type(from, to, cpuc->lbr_entries[i].abort);
1105 		if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
1106 			if (cpuc->lbr_entries[i].in_tx)
1107 				type |= X86_BR_IN_TX;
1108 			else
1109 				type |= X86_BR_NO_TX;
1110 		}
1111 
1112 		/* if type does not correspond, then discard */
1113 		if (type == X86_BR_NONE || (br_sel & type) != type) {
1114 			cpuc->lbr_entries[i].from = 0;
1115 			compress = true;
1116 		}
1117 
1118 		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
1119 			cpuc->lbr_entries[i].type = common_branch_type(type);
1120 	}
1121 
1122 	if (!compress)
1123 		return;
1124 
1125 	/* remove all entries with from=0 */
1126 	for (i = 0; i < cpuc->lbr_stack.nr; ) {
1127 		if (!cpuc->lbr_entries[i].from) {
1128 			j = i;
1129 			while (++j < cpuc->lbr_stack.nr)
1130 				cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
1131 			cpuc->lbr_stack.nr--;
1132 			if (!cpuc->lbr_entries[i].from)
1133 				continue;
1134 		}
1135 		i++;
1136 	}
1137 }
1138 
1139 void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
1140 {
1141 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1142 	int i;
1143 
1144 	cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
1145 
1146 	/* Cannot get TOS for large PEBS */
1147 	if (cpuc->n_pebs == cpuc->n_large_pebs)
1148 		cpuc->lbr_stack.hw_idx = -1ULL;
1149 	else
1150 		cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
1151 
1152 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
1153 		u64 info = lbr->lbr[i].info;
1154 		struct perf_branch_entry *e = &cpuc->lbr_entries[i];
1155 
1156 		e->from		= lbr->lbr[i].from;
1157 		e->to		= lbr->lbr[i].to;
1158 		e->mispred	= !!(info & LBR_INFO_MISPRED);
1159 		e->predicted	= !(info & LBR_INFO_MISPRED);
1160 		e->in_tx	= !!(info & LBR_INFO_IN_TX);
1161 		e->abort	= !!(info & LBR_INFO_ABORT);
1162 		e->cycles	= info & LBR_INFO_CYCLES;
1163 		e->reserved	= 0;
1164 	}
1165 	intel_pmu_lbr_filter(cpuc);
1166 }
1167 
1168 /*
1169  * Map interface branch filters onto LBR filters
1170  */
1171 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1172 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
1173 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
1174 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
1175 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
1176 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_REL_JMP
1177 						| LBR_IND_JMP | LBR_FAR,
1178 	/*
1179 	 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
1180 	 */
1181 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
1182 	 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
1183 	/*
1184 	 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
1185 	 */
1186 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
1187 	[PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
1188 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1189 };
1190 
1191 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1192 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
1193 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
1194 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
1195 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
1196 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
1197 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
1198 						| LBR_FAR,
1199 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
1200 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
1201 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
1202 	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
1203 };
1204 
1205 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1206 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
1207 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
1208 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
1209 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
1210 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
1211 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
1212 						| LBR_FAR,
1213 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
1214 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
1215 	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
1216 						| LBR_RETURN | LBR_CALL_STACK,
1217 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
1218 	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
1219 };
1220 
1221 /* core */
1222 void __init intel_pmu_lbr_init_core(void)
1223 {
1224 	x86_pmu.lbr_nr     = 4;
1225 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1226 	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1227 	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1228 
1229 	/*
1230 	 * SW branch filter usage:
1231 	 * - compensate for lack of HW filter
1232 	 */
1233 }
1234 
1235 /* nehalem/westmere */
1236 void __init intel_pmu_lbr_init_nhm(void)
1237 {
1238 	x86_pmu.lbr_nr     = 16;
1239 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1240 	x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1241 	x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1242 
1243 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1244 	x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1245 
1246 	/*
1247 	 * SW branch filter usage:
1248 	 * - workaround LBR_SEL errata (see above)
1249 	 * - support syscall, sysret capture.
1250 	 *   That requires LBR_FAR but that means far
1251 	 *   jmp need to be filtered out
1252 	 */
1253 }
1254 
1255 /* sandy bridge */
1256 void __init intel_pmu_lbr_init_snb(void)
1257 {
1258 	x86_pmu.lbr_nr	 = 16;
1259 	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
1260 	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1261 	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1262 
1263 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1264 	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1265 
1266 	/*
1267 	 * SW branch filter usage:
1268 	 * - support syscall, sysret capture.
1269 	 *   That requires LBR_FAR but that means far
1270 	 *   jmp need to be filtered out
1271 	 */
1272 }
1273 
1274 /* haswell */
1275 void intel_pmu_lbr_init_hsw(void)
1276 {
1277 	x86_pmu.lbr_nr	 = 16;
1278 	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
1279 	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1280 	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1281 
1282 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1283 	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1284 
1285 	if (lbr_from_signext_quirk_needed())
1286 		static_branch_enable(&lbr_from_quirk_key);
1287 }
1288 
1289 /* skylake */
1290 __init void intel_pmu_lbr_init_skl(void)
1291 {
1292 	x86_pmu.lbr_nr	 = 32;
1293 	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
1294 	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1295 	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1296 
1297 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1298 	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1299 
1300 	/*
1301 	 * SW branch filter usage:
1302 	 * - support syscall, sysret capture.
1303 	 *   That requires LBR_FAR but that means far
1304 	 *   jmp need to be filtered out
1305 	 */
1306 }
1307 
1308 /* atom */
1309 void __init intel_pmu_lbr_init_atom(void)
1310 {
1311 	/*
1312 	 * only models starting at stepping 10 seems
1313 	 * to have an operational LBR which can freeze
1314 	 * on PMU interrupt
1315 	 */
1316 	if (boot_cpu_data.x86_model == 28
1317 	    && boot_cpu_data.x86_stepping < 10) {
1318 		pr_cont("LBR disabled due to erratum");
1319 		return;
1320 	}
1321 
1322 	x86_pmu.lbr_nr	   = 8;
1323 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1324 	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1325 	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1326 
1327 	/*
1328 	 * SW branch filter usage:
1329 	 * - compensate for lack of HW filter
1330 	 */
1331 }
1332 
1333 /* slm */
1334 void __init intel_pmu_lbr_init_slm(void)
1335 {
1336 	x86_pmu.lbr_nr	   = 8;
1337 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1338 	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1339 	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1340 
1341 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1342 	x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1343 
1344 	/*
1345 	 * SW branch filter usage:
1346 	 * - compensate for lack of HW filter
1347 	 */
1348 	pr_cont("8-deep LBR, ");
1349 }
1350 
1351 /* Knights Landing */
1352 void intel_pmu_lbr_init_knl(void)
1353 {
1354 	x86_pmu.lbr_nr	   = 8;
1355 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1356 	x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1357 	x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1358 
1359 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1360 	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1361 
1362 	/* Knights Landing does have MISPREDICT bit */
1363 	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
1364 		x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
1365 }
1366 
1367 /**
1368  * x86_perf_get_lbr - get the LBR records information
1369  *
1370  * @lbr: the caller's memory to store the LBR records information
1371  *
1372  * Returns: 0 indicates the LBR info has been successfully obtained
1373  */
1374 int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
1375 {
1376 	int lbr_fmt = x86_pmu.intel_cap.lbr_format;
1377 
1378 	lbr->nr = x86_pmu.lbr_nr;
1379 	lbr->from = x86_pmu.lbr_from;
1380 	lbr->to = x86_pmu.lbr_to;
1381 	lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? MSR_LBR_INFO_0 : 0;
1382 
1383 	return 0;
1384 }
1385 EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
1386 
1387 struct event_constraint vlbr_constraint =
1388 	__EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
1389 			  FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);
1390