xref: /linux/arch/x86/events/amd/core.c (revision e53b20598f394e37951d6355f1c88ae01165b53f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/perf_event.h>
3 #include <linux/jump_label.h>
4 #include <linux/export.h>
5 #include <linux/types.h>
6 #include <linux/init.h>
7 #include <linux/slab.h>
8 #include <linux/delay.h>
9 #include <linux/jiffies.h>
10 #include <asm/apicdef.h>
11 #include <asm/apic.h>
12 #include <asm/nmi.h>
13 
14 #include "../perf_event.h"
15 
16 static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
17 static unsigned long perf_nmi_window;
18 
19 /* AMD Event 0xFFF: Merge.  Used with Large Increment per Cycle events */
20 #define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
21 #define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
22 
23 /* PMC Enable and Overflow bits for PerfCntrGlobal* registers */
24 static u64 amd_pmu_global_cntr_mask __read_mostly;
25 
26 static __initconst const u64 amd_hw_cache_event_ids
27 				[PERF_COUNT_HW_CACHE_MAX]
28 				[PERF_COUNT_HW_CACHE_OP_MAX]
29 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
30 {
31  [ C(L1D) ] = {
32 	[ C(OP_READ) ] = {
33 		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
34 		[ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
35 	},
36 	[ C(OP_WRITE) ] = {
37 		[ C(RESULT_ACCESS) ] = 0,
38 		[ C(RESULT_MISS)   ] = 0,
39 	},
40 	[ C(OP_PREFETCH) ] = {
41 		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
42 		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
43 	},
44  },
45  [ C(L1I ) ] = {
46 	[ C(OP_READ) ] = {
47 		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
48 		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
49 	},
50 	[ C(OP_WRITE) ] = {
51 		[ C(RESULT_ACCESS) ] = -1,
52 		[ C(RESULT_MISS)   ] = -1,
53 	},
54 	[ C(OP_PREFETCH) ] = {
55 		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
56 		[ C(RESULT_MISS)   ] = 0,
57 	},
58  },
59  [ C(LL  ) ] = {
60 	[ C(OP_READ) ] = {
61 		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
62 		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
63 	},
64 	[ C(OP_WRITE) ] = {
65 		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
66 		[ C(RESULT_MISS)   ] = 0,
67 	},
68 	[ C(OP_PREFETCH) ] = {
69 		[ C(RESULT_ACCESS) ] = 0,
70 		[ C(RESULT_MISS)   ] = 0,
71 	},
72  },
73  [ C(DTLB) ] = {
74 	[ C(OP_READ) ] = {
75 		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
76 		[ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
77 	},
78 	[ C(OP_WRITE) ] = {
79 		[ C(RESULT_ACCESS) ] = 0,
80 		[ C(RESULT_MISS)   ] = 0,
81 	},
82 	[ C(OP_PREFETCH) ] = {
83 		[ C(RESULT_ACCESS) ] = 0,
84 		[ C(RESULT_MISS)   ] = 0,
85 	},
86  },
87  [ C(ITLB) ] = {
88 	[ C(OP_READ) ] = {
89 		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
90 		[ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
91 	},
92 	[ C(OP_WRITE) ] = {
93 		[ C(RESULT_ACCESS) ] = -1,
94 		[ C(RESULT_MISS)   ] = -1,
95 	},
96 	[ C(OP_PREFETCH) ] = {
97 		[ C(RESULT_ACCESS) ] = -1,
98 		[ C(RESULT_MISS)   ] = -1,
99 	},
100  },
101  [ C(BPU ) ] = {
102 	[ C(OP_READ) ] = {
103 		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
104 		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
105 	},
106 	[ C(OP_WRITE) ] = {
107 		[ C(RESULT_ACCESS) ] = -1,
108 		[ C(RESULT_MISS)   ] = -1,
109 	},
110 	[ C(OP_PREFETCH) ] = {
111 		[ C(RESULT_ACCESS) ] = -1,
112 		[ C(RESULT_MISS)   ] = -1,
113 	},
114  },
115  [ C(NODE) ] = {
116 	[ C(OP_READ) ] = {
117 		[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
118 		[ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
119 	},
120 	[ C(OP_WRITE) ] = {
121 		[ C(RESULT_ACCESS) ] = -1,
122 		[ C(RESULT_MISS)   ] = -1,
123 	},
124 	[ C(OP_PREFETCH) ] = {
125 		[ C(RESULT_ACCESS) ] = -1,
126 		[ C(RESULT_MISS)   ] = -1,
127 	},
128  },
129 };
130 
131 static __initconst const u64 amd_hw_cache_event_ids_f17h
132 				[PERF_COUNT_HW_CACHE_MAX]
133 				[PERF_COUNT_HW_CACHE_OP_MAX]
134 				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
135 [C(L1D)] = {
136 	[C(OP_READ)] = {
137 		[C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
138 		[C(RESULT_MISS)]   = 0xc860, /* L2$ access from DC Miss */
139 	},
140 	[C(OP_WRITE)] = {
141 		[C(RESULT_ACCESS)] = 0,
142 		[C(RESULT_MISS)]   = 0,
143 	},
144 	[C(OP_PREFETCH)] = {
145 		[C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
146 		[C(RESULT_MISS)]   = 0,
147 	},
148 },
149 [C(L1I)] = {
150 	[C(OP_READ)] = {
151 		[C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches  */
152 		[C(RESULT_MISS)]   = 0x0081, /* Instruction cache misses   */
153 	},
154 	[C(OP_WRITE)] = {
155 		[C(RESULT_ACCESS)] = -1,
156 		[C(RESULT_MISS)]   = -1,
157 	},
158 	[C(OP_PREFETCH)] = {
159 		[C(RESULT_ACCESS)] = 0,
160 		[C(RESULT_MISS)]   = 0,
161 	},
162 },
163 [C(LL)] = {
164 	[C(OP_READ)] = {
165 		[C(RESULT_ACCESS)] = 0,
166 		[C(RESULT_MISS)]   = 0,
167 	},
168 	[C(OP_WRITE)] = {
169 		[C(RESULT_ACCESS)] = 0,
170 		[C(RESULT_MISS)]   = 0,
171 	},
172 	[C(OP_PREFETCH)] = {
173 		[C(RESULT_ACCESS)] = 0,
174 		[C(RESULT_MISS)]   = 0,
175 	},
176 },
177 [C(DTLB)] = {
178 	[C(OP_READ)] = {
179 		[C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
180 		[C(RESULT_MISS)]   = 0xf045, /* L2 DTLB misses (PT walks) */
181 	},
182 	[C(OP_WRITE)] = {
183 		[C(RESULT_ACCESS)] = 0,
184 		[C(RESULT_MISS)]   = 0,
185 	},
186 	[C(OP_PREFETCH)] = {
187 		[C(RESULT_ACCESS)] = 0,
188 		[C(RESULT_MISS)]   = 0,
189 	},
190 },
191 [C(ITLB)] = {
192 	[C(OP_READ)] = {
193 		[C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
194 		[C(RESULT_MISS)]   = 0xff85, /* L1 ITLB misses, L2 misses */
195 	},
196 	[C(OP_WRITE)] = {
197 		[C(RESULT_ACCESS)] = -1,
198 		[C(RESULT_MISS)]   = -1,
199 	},
200 	[C(OP_PREFETCH)] = {
201 		[C(RESULT_ACCESS)] = -1,
202 		[C(RESULT_MISS)]   = -1,
203 	},
204 },
205 [C(BPU)] = {
206 	[C(OP_READ)] = {
207 		[C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr.      */
208 		[C(RESULT_MISS)]   = 0x00c3, /* Retired Mispredicted BI    */
209 	},
210 	[C(OP_WRITE)] = {
211 		[C(RESULT_ACCESS)] = -1,
212 		[C(RESULT_MISS)]   = -1,
213 	},
214 	[C(OP_PREFETCH)] = {
215 		[C(RESULT_ACCESS)] = -1,
216 		[C(RESULT_MISS)]   = -1,
217 	},
218 },
219 [C(NODE)] = {
220 	[C(OP_READ)] = {
221 		[C(RESULT_ACCESS)] = 0,
222 		[C(RESULT_MISS)]   = 0,
223 	},
224 	[C(OP_WRITE)] = {
225 		[C(RESULT_ACCESS)] = -1,
226 		[C(RESULT_MISS)]   = -1,
227 	},
228 	[C(OP_PREFETCH)] = {
229 		[C(RESULT_ACCESS)] = -1,
230 		[C(RESULT_MISS)]   = -1,
231 	},
232 },
233 };
234 
235 /*
236  * AMD Performance Monitor K7 and later, up to and including Family 16h:
237  */
238 static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
239 {
240 	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
241 	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
242 	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x077d,
243 	[PERF_COUNT_HW_CACHE_MISSES]		= 0x077e,
244 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
245 	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
246 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
247 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
248 };
249 
250 /*
251  * AMD Performance Monitor Family 17h and later:
252  */
253 static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
254 {
255 	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
256 	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
257 	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
258 	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
259 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
260 	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
261 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x0287,
262 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x0187,
263 };
264 
265 static u64 amd_pmu_event_map(int hw_event)
266 {
267 	if (boot_cpu_data.x86 >= 0x17)
268 		return amd_f17h_perfmon_event_map[hw_event];
269 
270 	return amd_perfmon_event_map[hw_event];
271 }
272 
273 /*
274  * Previously calculated offsets
275  */
276 static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
277 static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
278 
279 /*
280  * Legacy CPUs:
281  *   4 counters starting at 0xc0010000 each offset by 1
282  *
283  * CPUs with core performance counter extensions:
284  *   6 counters starting at 0xc0010200 each offset by 2
285  */
286 static inline int amd_pmu_addr_offset(int index, bool eventsel)
287 {
288 	int offset;
289 
290 	if (!index)
291 		return index;
292 
293 	if (eventsel)
294 		offset = event_offsets[index];
295 	else
296 		offset = count_offsets[index];
297 
298 	if (offset)
299 		return offset;
300 
301 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
302 		offset = index;
303 	else
304 		offset = index << 1;
305 
306 	if (eventsel)
307 		event_offsets[index] = offset;
308 	else
309 		count_offsets[index] = offset;
310 
311 	return offset;
312 }
313 
314 /*
315  * AMD64 events are detected based on their event codes.
316  */
317 static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
318 {
319 	return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
320 }
321 
322 static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
323 {
324 	if (!(x86_pmu.flags & PMU_FL_PAIR))
325 		return false;
326 
327 	switch (amd_get_event_code(hwc)) {
328 	case 0x003:	return true;	/* Retired SSE/AVX FLOPs */
329 	default:	return false;
330 	}
331 }
332 
333 #define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
334 static inline int amd_is_brs_event(struct perf_event *e)
335 {
336 	return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
337 }
338 
339 static int amd_core_hw_config(struct perf_event *event)
340 {
341 	int ret = 0;
342 
343 	if (event->attr.exclude_host && event->attr.exclude_guest)
344 		/*
345 		 * When HO == GO == 1 the hardware treats that as GO == HO == 0
346 		 * and will count in both modes. We don't want to count in that
347 		 * case so we emulate no-counting by setting US = OS = 0.
348 		 */
349 		event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
350 				      ARCH_PERFMON_EVENTSEL_OS);
351 	else if (event->attr.exclude_host)
352 		event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
353 	else if (event->attr.exclude_guest)
354 		event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
355 
356 	if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
357 		event->hw.flags |= PERF_X86_EVENT_PAIR;
358 
359 	/*
360 	 * if branch stack is requested
361 	 */
362 	if (has_branch_stack(event)) {
363 		/*
364 		 * Due to interrupt holding, BRS is not recommended in
365 		 * counting mode.
366 		 */
367 		if (!is_sampling_event(event))
368 			return -EINVAL;
369 
370 		/*
371 		 * Due to the way BRS operates by holding the interrupt until
372 		 * lbr_nr entries have been captured, it does not make sense
373 		 * to allow sampling on BRS with an event that does not match
374 		 * what BRS is capturing, i.e., retired taken branches.
375 		 * Otherwise the correlation with the event's period is even
376 		 * more loose:
377 		 *
378 		 * With retired taken branch:
379 		 *   Effective P = P + 16 + X
380 		 * With any other event:
381 		 *   Effective P = P + Y + X
382 		 *
383 		 * Where X is the number of taken branches due to interrupt
384 		 * skid. Skid is large.
385 		 *
386 		 * Where Y is the occurences of the event while BRS is
387 		 * capturing the lbr_nr entries.
388 		 *
389 		 * By using retired taken branches, we limit the impact on the
390 		 * Y variable. We know it cannot be more than the depth of
391 		 * BRS.
392 		 */
393 		if (!amd_is_brs_event(event))
394 			return -EINVAL;
395 
396 		/*
397 		 * BRS implementation does not work with frequency mode
398 		 * reprogramming of the period.
399 		 */
400 		if (event->attr.freq)
401 			return -EINVAL;
402 		/*
403 		 * The kernel subtracts BRS depth from period, so it must
404 		 * be big enough.
405 		 */
406 		if (event->attr.sample_period <= x86_pmu.lbr_nr)
407 			return -EINVAL;
408 
409 		/*
410 		 * Check if we can allow PERF_SAMPLE_BRANCH_STACK
411 		 */
412 		ret = amd_brs_setup_filter(event);
413 
414 		/* only set in case of success */
415 		if (!ret)
416 			event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
417 	}
418 	return ret;
419 }
420 
421 static inline int amd_is_nb_event(struct hw_perf_event *hwc)
422 {
423 	return (hwc->config & 0xe0) == 0xe0;
424 }
425 
426 static inline int amd_has_nb(struct cpu_hw_events *cpuc)
427 {
428 	struct amd_nb *nb = cpuc->amd_nb;
429 
430 	return nb && nb->nb_id != -1;
431 }
432 
433 static int amd_pmu_hw_config(struct perf_event *event)
434 {
435 	int ret;
436 
437 	/* pass precise event sampling to ibs: */
438 	if (event->attr.precise_ip && get_ibs_caps())
439 		return -ENOENT;
440 
441 	if (has_branch_stack(event) && !x86_pmu.lbr_nr)
442 		return -EOPNOTSUPP;
443 
444 	ret = x86_pmu_hw_config(event);
445 	if (ret)
446 		return ret;
447 
448 	if (event->attr.type == PERF_TYPE_RAW)
449 		event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
450 
451 	return amd_core_hw_config(event);
452 }
453 
454 static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
455 					   struct perf_event *event)
456 {
457 	struct amd_nb *nb = cpuc->amd_nb;
458 	int i;
459 
460 	/*
461 	 * need to scan whole list because event may not have
462 	 * been assigned during scheduling
463 	 *
464 	 * no race condition possible because event can only
465 	 * be removed on one CPU at a time AND PMU is disabled
466 	 * when we come here
467 	 */
468 	for (i = 0; i < x86_pmu.num_counters; i++) {
469 		if (cmpxchg(nb->owners + i, event, NULL) == event)
470 			break;
471 	}
472 }
473 
474  /*
475   * AMD64 NorthBridge events need special treatment because
476   * counter access needs to be synchronized across all cores
477   * of a package. Refer to BKDG section 3.12
478   *
479   * NB events are events measuring L3 cache, Hypertransport
480   * traffic. They are identified by an event code >= 0xe00.
481   * They measure events on the NorthBride which is shared
482   * by all cores on a package. NB events are counted on a
483   * shared set of counters. When a NB event is programmed
484   * in a counter, the data actually comes from a shared
485   * counter. Thus, access to those counters needs to be
486   * synchronized.
487   *
488   * We implement the synchronization such that no two cores
489   * can be measuring NB events using the same counters. Thus,
490   * we maintain a per-NB allocation table. The available slot
491   * is propagated using the event_constraint structure.
492   *
493   * We provide only one choice for each NB event based on
494   * the fact that only NB events have restrictions. Consequently,
495   * if a counter is available, there is a guarantee the NB event
496   * will be assigned to it. If no slot is available, an empty
497   * constraint is returned and scheduling will eventually fail
498   * for this event.
499   *
500   * Note that all cores attached the same NB compete for the same
501   * counters to host NB events, this is why we use atomic ops. Some
502   * multi-chip CPUs may have more than one NB.
503   *
504   * Given that resources are allocated (cmpxchg), they must be
505   * eventually freed for others to use. This is accomplished by
506   * calling __amd_put_nb_event_constraints()
507   *
508   * Non NB events are not impacted by this restriction.
509   */
510 static struct event_constraint *
511 __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
512 			       struct event_constraint *c)
513 {
514 	struct hw_perf_event *hwc = &event->hw;
515 	struct amd_nb *nb = cpuc->amd_nb;
516 	struct perf_event *old;
517 	int idx, new = -1;
518 
519 	if (!c)
520 		c = &unconstrained;
521 
522 	if (cpuc->is_fake)
523 		return c;
524 
525 	/*
526 	 * detect if already present, if so reuse
527 	 *
528 	 * cannot merge with actual allocation
529 	 * because of possible holes
530 	 *
531 	 * event can already be present yet not assigned (in hwc->idx)
532 	 * because of successive calls to x86_schedule_events() from
533 	 * hw_perf_group_sched_in() without hw_perf_enable()
534 	 */
535 	for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
536 		if (new == -1 || hwc->idx == idx)
537 			/* assign free slot, prefer hwc->idx */
538 			old = cmpxchg(nb->owners + idx, NULL, event);
539 		else if (nb->owners[idx] == event)
540 			/* event already present */
541 			old = event;
542 		else
543 			continue;
544 
545 		if (old && old != event)
546 			continue;
547 
548 		/* reassign to this slot */
549 		if (new != -1)
550 			cmpxchg(nb->owners + new, event, NULL);
551 		new = idx;
552 
553 		/* already present, reuse */
554 		if (old == event)
555 			break;
556 	}
557 
558 	if (new == -1)
559 		return &emptyconstraint;
560 
561 	return &nb->event_constraints[new];
562 }
563 
564 static struct amd_nb *amd_alloc_nb(int cpu)
565 {
566 	struct amd_nb *nb;
567 	int i;
568 
569 	nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
570 	if (!nb)
571 		return NULL;
572 
573 	nb->nb_id = -1;
574 
575 	/*
576 	 * initialize all possible NB constraints
577 	 */
578 	for (i = 0; i < x86_pmu.num_counters; i++) {
579 		__set_bit(i, nb->event_constraints[i].idxmsk);
580 		nb->event_constraints[i].weight = 1;
581 	}
582 	return nb;
583 }
584 
585 static void amd_pmu_cpu_reset(int cpu)
586 {
587 	if (x86_pmu.version < 2)
588 		return;
589 
590 	/* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
591 	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
592 
593 	/* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */
594 	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask);
595 }
596 
597 static int amd_pmu_cpu_prepare(int cpu)
598 {
599 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
600 
601 	WARN_ON_ONCE(cpuc->amd_nb);
602 
603 	if (!x86_pmu.amd_nb_constraints)
604 		return 0;
605 
606 	cpuc->amd_nb = amd_alloc_nb(cpu);
607 	if (!cpuc->amd_nb)
608 		return -ENOMEM;
609 
610 	return 0;
611 }
612 
613 static void amd_pmu_cpu_starting(int cpu)
614 {
615 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
616 	void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
617 	struct amd_nb *nb;
618 	int i, nb_id;
619 
620 	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
621 
622 	if (!x86_pmu.amd_nb_constraints)
623 		return;
624 
625 	nb_id = topology_die_id(cpu);
626 	WARN_ON_ONCE(nb_id == BAD_APICID);
627 
628 	for_each_online_cpu(i) {
629 		nb = per_cpu(cpu_hw_events, i).amd_nb;
630 		if (WARN_ON_ONCE(!nb))
631 			continue;
632 
633 		if (nb->nb_id == nb_id) {
634 			*onln = cpuc->amd_nb;
635 			cpuc->amd_nb = nb;
636 			break;
637 		}
638 	}
639 
640 	cpuc->amd_nb->nb_id = nb_id;
641 	cpuc->amd_nb->refcnt++;
642 
643 	amd_brs_reset();
644 	amd_pmu_cpu_reset(cpu);
645 }
646 
647 static void amd_pmu_cpu_dead(int cpu)
648 {
649 	struct cpu_hw_events *cpuhw;
650 
651 	if (!x86_pmu.amd_nb_constraints)
652 		return;
653 
654 	cpuhw = &per_cpu(cpu_hw_events, cpu);
655 
656 	if (cpuhw->amd_nb) {
657 		struct amd_nb *nb = cpuhw->amd_nb;
658 
659 		if (nb->nb_id == -1 || --nb->refcnt == 0)
660 			kfree(nb);
661 
662 		cpuhw->amd_nb = NULL;
663 	}
664 
665 	amd_pmu_cpu_reset(cpu);
666 }
667 
668 static inline void amd_pmu_set_global_ctl(u64 ctl)
669 {
670 	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
671 }
672 
673 static inline u64 amd_pmu_get_global_status(void)
674 {
675 	u64 status;
676 
677 	/* PerfCntrGlobalStatus is read-only */
678 	rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
679 
680 	return status & amd_pmu_global_cntr_mask;
681 }
682 
683 static inline void amd_pmu_ack_global_status(u64 status)
684 {
685 	/*
686 	 * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
687 	 * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
688 	 * clears the same bit in PerfCntrGlobalStatus
689 	 */
690 
691 	/* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
692 	status &= amd_pmu_global_cntr_mask;
693 	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
694 }
695 
696 static bool amd_pmu_test_overflow_topbit(int idx)
697 {
698 	u64 counter;
699 
700 	rdmsrl(x86_pmu_event_addr(idx), counter);
701 
702 	return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
703 }
704 
705 static bool amd_pmu_test_overflow_status(int idx)
706 {
707 	return amd_pmu_get_global_status() & BIT_ULL(idx);
708 }
709 
710 DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit);
711 
712 /*
713  * When a PMC counter overflows, an NMI is used to process the event and
714  * reset the counter. NMI latency can result in the counter being updated
715  * before the NMI can run, which can result in what appear to be spurious
716  * NMIs. This function is intended to wait for the NMI to run and reset
717  * the counter to avoid possible unhandled NMI messages.
718  */
719 #define OVERFLOW_WAIT_COUNT	50
720 
721 static void amd_pmu_wait_on_overflow(int idx)
722 {
723 	unsigned int i;
724 
725 	/*
726 	 * Wait for the counter to be reset if it has overflowed. This loop
727 	 * should exit very, very quickly, but just in case, don't wait
728 	 * forever...
729 	 */
730 	for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
731 		if (!static_call(amd_pmu_test_overflow)(idx))
732 			break;
733 
734 		/* Might be in IRQ context, so can't sleep */
735 		udelay(1);
736 	}
737 }
738 
739 static void amd_pmu_check_overflow(void)
740 {
741 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
742 	int idx;
743 
744 	/*
745 	 * This shouldn't be called from NMI context, but add a safeguard here
746 	 * to return, since if we're in NMI context we can't wait for an NMI
747 	 * to reset an overflowed counter value.
748 	 */
749 	if (in_nmi())
750 		return;
751 
752 	/*
753 	 * Check each counter for overflow and wait for it to be reset by the
754 	 * NMI if it has overflowed. This relies on the fact that all active
755 	 * counters are always enabled when this function is called and
756 	 * ARCH_PERFMON_EVENTSEL_INT is always set.
757 	 */
758 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
759 		if (!test_bit(idx, cpuc->active_mask))
760 			continue;
761 
762 		amd_pmu_wait_on_overflow(idx);
763 	}
764 }
765 
766 static void amd_pmu_enable_event(struct perf_event *event)
767 {
768 	x86_pmu_enable_event(event);
769 }
770 
771 static void amd_pmu_enable_all(int added)
772 {
773 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
774 	int idx;
775 
776 	amd_brs_enable_all();
777 
778 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
779 		/* only activate events which are marked as active */
780 		if (!test_bit(idx, cpuc->active_mask))
781 			continue;
782 
783 		amd_pmu_enable_event(cpuc->events[idx]);
784 	}
785 }
786 
787 static void amd_pmu_v2_enable_event(struct perf_event *event)
788 {
789 	struct hw_perf_event *hwc = &event->hw;
790 
791 	/*
792 	 * Testing cpu_hw_events.enabled should be skipped in this case unlike
793 	 * in x86_pmu_enable_event().
794 	 *
795 	 * Since cpu_hw_events.enabled is set only after returning from
796 	 * x86_pmu_start(), the PMCs must be programmed and kept ready.
797 	 * Counting starts only after x86_pmu_enable_all() is called.
798 	 */
799 	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
800 }
801 
802 static void amd_pmu_v2_enable_all(int added)
803 {
804 	amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
805 }
806 
807 static void amd_pmu_disable_event(struct perf_event *event)
808 {
809 	x86_pmu_disable_event(event);
810 
811 	/*
812 	 * This can be called from NMI context (via x86_pmu_stop). The counter
813 	 * may have overflowed, but either way, we'll never see it get reset
814 	 * by the NMI if we're already in the NMI. And the NMI latency support
815 	 * below will take care of any pending NMI that might have been
816 	 * generated by the overflow.
817 	 */
818 	if (in_nmi())
819 		return;
820 
821 	amd_pmu_wait_on_overflow(event->hw.idx);
822 }
823 
824 static void amd_pmu_disable_all(void)
825 {
826 	amd_brs_disable_all();
827 	x86_pmu_disable_all();
828 	amd_pmu_check_overflow();
829 }
830 
831 static void amd_pmu_v2_disable_all(void)
832 {
833 	/* Disable all PMCs */
834 	amd_pmu_set_global_ctl(0);
835 	amd_pmu_check_overflow();
836 }
837 
838 static void amd_pmu_add_event(struct perf_event *event)
839 {
840 	if (needs_branch_stack(event))
841 		amd_pmu_brs_add(event);
842 }
843 
844 static void amd_pmu_del_event(struct perf_event *event)
845 {
846 	if (needs_branch_stack(event))
847 		amd_pmu_brs_del(event);
848 }
849 
850 /*
851  * Because of NMI latency, if multiple PMC counters are active or other sources
852  * of NMIs are received, the perf NMI handler can handle one or more overflowed
853  * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
854  * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
855  * back-to-back NMI support won't be active. This PMC handler needs to take into
856  * account that this can occur, otherwise this could result in unknown NMI
857  * messages being issued. Examples of this is PMC overflow while in the NMI
858  * handler when multiple PMCs are active or PMC overflow while handling some
859  * other source of an NMI.
860  *
861  * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
862  * received during this window will be claimed. This prevents extending the
863  * window past when it is possible that latent NMIs should be received. The
864  * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
865  * handled a counter. When an un-handled NMI is received, it will be claimed
866  * only if arriving within that window.
867  */
868 static inline int amd_pmu_adjust_nmi_window(int handled)
869 {
870 	/*
871 	 * If a counter was handled, record a timestamp such that un-handled
872 	 * NMIs will be claimed if arriving within that window.
873 	 */
874 	if (handled) {
875 		this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
876 
877 		return handled;
878 	}
879 
880 	if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
881 		return NMI_DONE;
882 
883 	return NMI_HANDLED;
884 }
885 
886 static int amd_pmu_handle_irq(struct pt_regs *regs)
887 {
888 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
889 	int handled;
890 	int pmu_enabled;
891 
892 	/*
893 	 * Save the PMU state.
894 	 * It needs to be restored when leaving the handler.
895 	 */
896 	pmu_enabled = cpuc->enabled;
897 	cpuc->enabled = 0;
898 
899 	/* stop everything (includes BRS) */
900 	amd_pmu_disable_all();
901 
902 	/* Drain BRS is in use (could be inactive) */
903 	if (cpuc->lbr_users)
904 		amd_brs_drain();
905 
906 	/* Process any counter overflows */
907 	handled = x86_pmu_handle_irq(regs);
908 
909 	cpuc->enabled = pmu_enabled;
910 	if (pmu_enabled)
911 		amd_pmu_enable_all(0);
912 
913 	return amd_pmu_adjust_nmi_window(handled);
914 }
915 
916 static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
917 {
918 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
919 	struct perf_sample_data data;
920 	struct hw_perf_event *hwc;
921 	struct perf_event *event;
922 	int handled = 0, idx;
923 	u64 status, mask;
924 	bool pmu_enabled;
925 
926 	/*
927 	 * Save the PMU state as it needs to be restored when leaving the
928 	 * handler
929 	 */
930 	pmu_enabled = cpuc->enabled;
931 	cpuc->enabled = 0;
932 
933 	/* Stop counting */
934 	amd_pmu_v2_disable_all();
935 
936 	status = amd_pmu_get_global_status();
937 
938 	/* Check if any overflows are pending */
939 	if (!status)
940 		goto done;
941 
942 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
943 		if (!test_bit(idx, cpuc->active_mask))
944 			continue;
945 
946 		event = cpuc->events[idx];
947 		hwc = &event->hw;
948 		x86_perf_event_update(event);
949 		mask = BIT_ULL(idx);
950 
951 		if (!(status & mask))
952 			continue;
953 
954 		/* Event overflow */
955 		handled++;
956 		perf_sample_data_init(&data, 0, hwc->last_period);
957 
958 		if (!x86_perf_event_set_period(event))
959 			continue;
960 
961 		if (perf_event_overflow(event, &data, regs))
962 			x86_pmu_stop(event, 0);
963 
964 		status &= ~mask;
965 	}
966 
967 	/*
968 	 * It should never be the case that some overflows are not handled as
969 	 * the corresponding PMCs are expected to be inactive according to the
970 	 * active_mask
971 	 */
972 	WARN_ON(status > 0);
973 
974 	/* Clear overflow bits */
975 	amd_pmu_ack_global_status(~status);
976 
977 	/*
978 	 * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT
979 	 * PMI entry is not set by the local APIC when a PMC overflow occurs
980 	 */
981 	inc_irq_stat(apic_perf_irqs);
982 
983 done:
984 	cpuc->enabled = pmu_enabled;
985 
986 	/* Resume counting only if PMU is active */
987 	if (pmu_enabled)
988 		amd_pmu_v2_enable_all(0);
989 
990 	return amd_pmu_adjust_nmi_window(handled);
991 }
992 
993 static struct event_constraint *
994 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
995 			  struct perf_event *event)
996 {
997 	/*
998 	 * if not NB event or no NB, then no constraints
999 	 */
1000 	if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
1001 		return &unconstrained;
1002 
1003 	return __amd_get_nb_event_constraints(cpuc, event, NULL);
1004 }
1005 
1006 static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
1007 				      struct perf_event *event)
1008 {
1009 	if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
1010 		__amd_put_nb_event_constraints(cpuc, event);
1011 }
1012 
1013 PMU_FORMAT_ATTR(event,	"config:0-7,32-35");
1014 PMU_FORMAT_ATTR(umask,	"config:8-15"	);
1015 PMU_FORMAT_ATTR(edge,	"config:18"	);
1016 PMU_FORMAT_ATTR(inv,	"config:23"	);
1017 PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
1018 
1019 static struct attribute *amd_format_attr[] = {
1020 	&format_attr_event.attr,
1021 	&format_attr_umask.attr,
1022 	&format_attr_edge.attr,
1023 	&format_attr_inv.attr,
1024 	&format_attr_cmask.attr,
1025 	NULL,
1026 };
1027 
1028 /* AMD Family 15h */
1029 
1030 #define AMD_EVENT_TYPE_MASK	0x000000F0ULL
1031 
1032 #define AMD_EVENT_FP		0x00000000ULL ... 0x00000010ULL
1033 #define AMD_EVENT_LS		0x00000020ULL ... 0x00000030ULL
1034 #define AMD_EVENT_DC		0x00000040ULL ... 0x00000050ULL
1035 #define AMD_EVENT_CU		0x00000060ULL ... 0x00000070ULL
1036 #define AMD_EVENT_IC_DE		0x00000080ULL ... 0x00000090ULL
1037 #define AMD_EVENT_EX_LS		0x000000C0ULL
1038 #define AMD_EVENT_DE		0x000000D0ULL
1039 #define AMD_EVENT_NB		0x000000E0ULL ... 0x000000F0ULL
1040 
1041 /*
1042  * AMD family 15h event code/PMC mappings:
1043  *
1044  * type = event_code & 0x0F0:
1045  *
1046  * 0x000	FP	PERF_CTL[5:3]
1047  * 0x010	FP	PERF_CTL[5:3]
1048  * 0x020	LS	PERF_CTL[5:0]
1049  * 0x030	LS	PERF_CTL[5:0]
1050  * 0x040	DC	PERF_CTL[5:0]
1051  * 0x050	DC	PERF_CTL[5:0]
1052  * 0x060	CU	PERF_CTL[2:0]
1053  * 0x070	CU	PERF_CTL[2:0]
1054  * 0x080	IC/DE	PERF_CTL[2:0]
1055  * 0x090	IC/DE	PERF_CTL[2:0]
1056  * 0x0A0	---
1057  * 0x0B0	---
1058  * 0x0C0	EX/LS	PERF_CTL[5:0]
1059  * 0x0D0	DE	PERF_CTL[2:0]
1060  * 0x0E0	NB	NB_PERF_CTL[3:0]
1061  * 0x0F0	NB	NB_PERF_CTL[3:0]
1062  *
1063  * Exceptions:
1064  *
1065  * 0x000	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
1066  * 0x003	FP	PERF_CTL[3]
1067  * 0x004	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
1068  * 0x00B	FP	PERF_CTL[3]
1069  * 0x00D	FP	PERF_CTL[3]
1070  * 0x023	DE	PERF_CTL[2:0]
1071  * 0x02D	LS	PERF_CTL[3]
1072  * 0x02E	LS	PERF_CTL[3,0]
1073  * 0x031	LS	PERF_CTL[2:0] (**)
1074  * 0x043	CU	PERF_CTL[2:0]
1075  * 0x045	CU	PERF_CTL[2:0]
1076  * 0x046	CU	PERF_CTL[2:0]
1077  * 0x054	CU	PERF_CTL[2:0]
1078  * 0x055	CU	PERF_CTL[2:0]
1079  * 0x08F	IC	PERF_CTL[0]
1080  * 0x187	DE	PERF_CTL[0]
1081  * 0x188	DE	PERF_CTL[0]
1082  * 0x0DB	EX	PERF_CTL[5:0]
1083  * 0x0DC	LS	PERF_CTL[5:0]
1084  * 0x0DD	LS	PERF_CTL[5:0]
1085  * 0x0DE	LS	PERF_CTL[5:0]
1086  * 0x0DF	LS	PERF_CTL[5:0]
1087  * 0x1C0	EX	PERF_CTL[5:3]
1088  * 0x1D6	EX	PERF_CTL[5:0]
1089  * 0x1D8	EX	PERF_CTL[5:0]
1090  *
1091  * (*)  depending on the umask all FPU counters may be used
1092  * (**) only one unitmask enabled at a time
1093  */
1094 
1095 static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
1096 static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
1097 static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
1098 static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
1099 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
1100 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
1101 
1102 static struct event_constraint *
1103 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
1104 			       struct perf_event *event)
1105 {
1106 	struct hw_perf_event *hwc = &event->hw;
1107 	unsigned int event_code = amd_get_event_code(hwc);
1108 
1109 	switch (event_code & AMD_EVENT_TYPE_MASK) {
1110 	case AMD_EVENT_FP:
1111 		switch (event_code) {
1112 		case 0x000:
1113 			if (!(hwc->config & 0x0000F000ULL))
1114 				break;
1115 			if (!(hwc->config & 0x00000F00ULL))
1116 				break;
1117 			return &amd_f15_PMC3;
1118 		case 0x004:
1119 			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
1120 				break;
1121 			return &amd_f15_PMC3;
1122 		case 0x003:
1123 		case 0x00B:
1124 		case 0x00D:
1125 			return &amd_f15_PMC3;
1126 		}
1127 		return &amd_f15_PMC53;
1128 	case AMD_EVENT_LS:
1129 	case AMD_EVENT_DC:
1130 	case AMD_EVENT_EX_LS:
1131 		switch (event_code) {
1132 		case 0x023:
1133 		case 0x043:
1134 		case 0x045:
1135 		case 0x046:
1136 		case 0x054:
1137 		case 0x055:
1138 			return &amd_f15_PMC20;
1139 		case 0x02D:
1140 			return &amd_f15_PMC3;
1141 		case 0x02E:
1142 			return &amd_f15_PMC30;
1143 		case 0x031:
1144 			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
1145 				return &amd_f15_PMC20;
1146 			return &emptyconstraint;
1147 		case 0x1C0:
1148 			return &amd_f15_PMC53;
1149 		default:
1150 			return &amd_f15_PMC50;
1151 		}
1152 	case AMD_EVENT_CU:
1153 	case AMD_EVENT_IC_DE:
1154 	case AMD_EVENT_DE:
1155 		switch (event_code) {
1156 		case 0x08F:
1157 		case 0x187:
1158 		case 0x188:
1159 			return &amd_f15_PMC0;
1160 		case 0x0DB ... 0x0DF:
1161 		case 0x1D6:
1162 		case 0x1D8:
1163 			return &amd_f15_PMC50;
1164 		default:
1165 			return &amd_f15_PMC20;
1166 		}
1167 	case AMD_EVENT_NB:
1168 		/* moved to uncore.c */
1169 		return &emptyconstraint;
1170 	default:
1171 		return &emptyconstraint;
1172 	}
1173 }
1174 
1175 static struct event_constraint pair_constraint;
1176 
1177 static struct event_constraint *
1178 amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
1179 			       struct perf_event *event)
1180 {
1181 	struct hw_perf_event *hwc = &event->hw;
1182 
1183 	if (amd_is_pair_event_code(hwc))
1184 		return &pair_constraint;
1185 
1186 	return &unconstrained;
1187 }
1188 
1189 static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
1190 					   struct perf_event *event)
1191 {
1192 	struct hw_perf_event *hwc = &event->hw;
1193 
1194 	if (is_counter_pair(hwc))
1195 		--cpuc->n_pair;
1196 }
1197 
1198 /*
1199  * Because of the way BRS operates with an inactive and active phases, and
1200  * the link to one counter, it is not possible to have two events using BRS
1201  * scheduled at the same time. There would be an issue with enforcing the
1202  * period of each one and given that the BRS saturates, it would not be possible
1203  * to guarantee correlated content for all events. Therefore, in situations
1204  * where multiple events want to use BRS, the kernel enforces mutual exclusion.
1205  * Exclusion is enforced by chosing only one counter for events using BRS.
1206  * The event scheduling logic will then automatically multiplex the
1207  * events and ensure that at most one event is actively using BRS.
1208  *
1209  * The BRS counter could be any counter, but there is no constraint on Fam19h,
1210  * therefore all counters are equal and thus we pick the first one: PMC0
1211  */
1212 static struct event_constraint amd_fam19h_brs_cntr0_constraint =
1213 	EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK);
1214 
1215 static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint =
1216 	__EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR);
1217 
1218 static struct event_constraint *
1219 amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx,
1220 			  struct perf_event *event)
1221 {
1222 	struct hw_perf_event *hwc = &event->hw;
1223 	bool has_brs = has_amd_brs(hwc);
1224 
1225 	/*
1226 	 * In case BRS is used with an event requiring a counter pair,
1227 	 * the kernel allows it but only on counter 0 & 1 to enforce
1228 	 * multiplexing requiring to protect BRS in case of multiple
1229 	 * BRS users
1230 	 */
1231 	if (amd_is_pair_event_code(hwc)) {
1232 		return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint
1233 			       : &pair_constraint;
1234 	}
1235 
1236 	if (has_brs)
1237 		return &amd_fam19h_brs_cntr0_constraint;
1238 
1239 	return &unconstrained;
1240 }
1241 
1242 
1243 static ssize_t amd_event_sysfs_show(char *page, u64 config)
1244 {
1245 	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
1246 		    (config & AMD64_EVENTSEL_EVENT) >> 24;
1247 
1248 	return x86_event_sysfs_show(page, config, event);
1249 }
1250 
1251 static void amd_pmu_sched_task(struct perf_event_context *ctx,
1252 				 bool sched_in)
1253 {
1254 	if (sched_in && x86_pmu.lbr_nr)
1255 		amd_pmu_brs_sched_task(ctx, sched_in);
1256 }
1257 
1258 static u64 amd_pmu_limit_period(struct perf_event *event, u64 left)
1259 {
1260 	/*
1261 	 * Decrease period by the depth of the BRS feature to get the last N
1262 	 * taken branches and approximate the desired period
1263 	 */
1264 	if (has_branch_stack(event) && left > x86_pmu.lbr_nr)
1265 		left -= x86_pmu.lbr_nr;
1266 
1267 	return left;
1268 }
1269 
1270 static __initconst const struct x86_pmu amd_pmu = {
1271 	.name			= "AMD",
1272 	.handle_irq		= amd_pmu_handle_irq,
1273 	.disable_all		= amd_pmu_disable_all,
1274 	.enable_all		= amd_pmu_enable_all,
1275 	.enable			= amd_pmu_enable_event,
1276 	.disable		= amd_pmu_disable_event,
1277 	.hw_config		= amd_pmu_hw_config,
1278 	.schedule_events	= x86_schedule_events,
1279 	.eventsel		= MSR_K7_EVNTSEL0,
1280 	.perfctr		= MSR_K7_PERFCTR0,
1281 	.addr_offset            = amd_pmu_addr_offset,
1282 	.event_map		= amd_pmu_event_map,
1283 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
1284 	.num_counters		= AMD64_NUM_COUNTERS,
1285 	.add			= amd_pmu_add_event,
1286 	.del			= amd_pmu_del_event,
1287 	.cntval_bits		= 48,
1288 	.cntval_mask		= (1ULL << 48) - 1,
1289 	.apic			= 1,
1290 	/* use highest bit to detect overflow */
1291 	.max_period		= (1ULL << 47) - 1,
1292 	.get_event_constraints	= amd_get_event_constraints,
1293 	.put_event_constraints	= amd_put_event_constraints,
1294 
1295 	.format_attrs		= amd_format_attr,
1296 	.events_sysfs_show	= amd_event_sysfs_show,
1297 
1298 	.cpu_prepare		= amd_pmu_cpu_prepare,
1299 	.cpu_starting		= amd_pmu_cpu_starting,
1300 	.cpu_dead		= amd_pmu_cpu_dead,
1301 
1302 	.amd_nb_constraints	= 1,
1303 };
1304 
1305 static ssize_t branches_show(struct device *cdev,
1306 			      struct device_attribute *attr,
1307 			      char *buf)
1308 {
1309 	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
1310 }
1311 
1312 static DEVICE_ATTR_RO(branches);
1313 
1314 static struct attribute *amd_pmu_brs_attrs[] = {
1315 	&dev_attr_branches.attr,
1316 	NULL,
1317 };
1318 
1319 static umode_t
1320 amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
1321 {
1322 	return x86_pmu.lbr_nr ? attr->mode : 0;
1323 }
1324 
1325 static struct attribute_group group_caps_amd_brs = {
1326 	.name  = "caps",
1327 	.attrs = amd_pmu_brs_attrs,
1328 	.is_visible = amd_brs_is_visible,
1329 };
1330 
1331 EVENT_ATTR_STR(branch-brs, amd_branch_brs,
1332 	       "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");
1333 
1334 static struct attribute *amd_brs_events_attrs[] = {
1335 	EVENT_PTR(amd_branch_brs),
1336 	NULL,
1337 };
1338 
1339 static struct attribute_group group_events_amd_brs = {
1340 	.name       = "events",
1341 	.attrs      = amd_brs_events_attrs,
1342 	.is_visible = amd_brs_is_visible,
1343 };
1344 
1345 static const struct attribute_group *amd_attr_update[] = {
1346 	&group_caps_amd_brs,
1347 	&group_events_amd_brs,
1348 	NULL,
1349 };
1350 
1351 static int __init amd_core_pmu_init(void)
1352 {
1353 	union cpuid_0x80000022_ebx ebx;
1354 	u64 even_ctr_mask = 0ULL;
1355 	int i;
1356 
1357 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
1358 		return 0;
1359 
1360 	/* Avoid calculating the value each time in the NMI handler */
1361 	perf_nmi_window = msecs_to_jiffies(100);
1362 
1363 	/*
1364 	 * If core performance counter extensions exists, we must use
1365 	 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
1366 	 * amd_pmu_addr_offset().
1367 	 */
1368 	x86_pmu.eventsel	= MSR_F15H_PERF_CTL;
1369 	x86_pmu.perfctr		= MSR_F15H_PERF_CTR;
1370 	x86_pmu.num_counters	= AMD64_NUM_COUNTERS_CORE;
1371 
1372 	/* Check for Performance Monitoring v2 support */
1373 	if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
1374 		ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
1375 
1376 		/* Update PMU version for later usage */
1377 		x86_pmu.version = 2;
1378 
1379 		/* Find the number of available Core PMCs */
1380 		x86_pmu.num_counters = ebx.split.num_core_pmc;
1381 
1382 		amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
1383 
1384 		/* Update PMC handling functions */
1385 		x86_pmu.enable_all = amd_pmu_v2_enable_all;
1386 		x86_pmu.disable_all = amd_pmu_v2_disable_all;
1387 		x86_pmu.enable = amd_pmu_v2_enable_event;
1388 		x86_pmu.handle_irq = amd_pmu_v2_handle_irq;
1389 		static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status);
1390 	}
1391 
1392 	/*
1393 	 * AMD Core perfctr has separate MSRs for the NB events, see
1394 	 * the amd/uncore.c driver.
1395 	 */
1396 	x86_pmu.amd_nb_constraints = 0;
1397 
1398 	if (boot_cpu_data.x86 == 0x15) {
1399 		pr_cont("Fam15h ");
1400 		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
1401 	}
1402 	if (boot_cpu_data.x86 >= 0x17) {
1403 		pr_cont("Fam17h+ ");
1404 		/*
1405 		 * Family 17h and compatibles have constraints for Large
1406 		 * Increment per Cycle events: they may only be assigned an
1407 		 * even numbered counter that has a consecutive adjacent odd
1408 		 * numbered counter following it.
1409 		 */
1410 		for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
1411 			even_ctr_mask |= 1 << i;
1412 
1413 		pair_constraint = (struct event_constraint)
1414 				    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
1415 				    x86_pmu.num_counters / 2, 0,
1416 				    PERF_X86_EVENT_PAIR);
1417 
1418 		x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
1419 		x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
1420 		x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
1421 		x86_pmu.flags |= PMU_FL_PAIR;
1422 	}
1423 
1424 	/*
1425 	 * BRS requires special event constraints and flushing on ctxsw.
1426 	 */
1427 	if (boot_cpu_data.x86 >= 0x19 && !amd_brs_init()) {
1428 		x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
1429 		x86_pmu.sched_task = amd_pmu_sched_task;
1430 		x86_pmu.limit_period = amd_pmu_limit_period;
1431 		/*
1432 		 * put_event_constraints callback same as Fam17h, set above
1433 		 */
1434 
1435 		/* branch sampling must be stopped when entering low power */
1436 		amd_brs_lopwr_init();
1437 	}
1438 
1439 	x86_pmu.attr_update = amd_attr_update;
1440 
1441 	pr_cont("core perfctr, ");
1442 	return 0;
1443 }
1444 
1445 __init int amd_pmu_init(void)
1446 {
1447 	int ret;
1448 
1449 	/* Performance-monitoring supported from K7 and later: */
1450 	if (boot_cpu_data.x86 < 6)
1451 		return -ENODEV;
1452 
1453 	x86_pmu = amd_pmu;
1454 
1455 	ret = amd_core_pmu_init();
1456 	if (ret)
1457 		return ret;
1458 
1459 	if (num_possible_cpus() == 1) {
1460 		/*
1461 		 * No point in allocating data structures to serialize
1462 		 * against other CPUs, when there is only the one CPU.
1463 		 */
1464 		x86_pmu.amd_nb_constraints = 0;
1465 	}
1466 
1467 	if (boot_cpu_data.x86 >= 0x17)
1468 		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
1469 	else
1470 		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));
1471 
1472 	return 0;
1473 }
1474 
1475 static inline void amd_pmu_reload_virt(void)
1476 {
1477 	if (x86_pmu.version >= 2) {
1478 		/*
1479 		 * Clear global enable bits, reprogram the PERF_CTL
1480 		 * registers with updated perf_ctr_virt_mask and then
1481 		 * set global enable bits once again
1482 		 */
1483 		amd_pmu_v2_disable_all();
1484 		amd_pmu_enable_all(0);
1485 		amd_pmu_v2_enable_all(0);
1486 		return;
1487 	}
1488 
1489 	amd_pmu_disable_all();
1490 	amd_pmu_enable_all(0);
1491 }
1492 
1493 void amd_pmu_enable_virt(void)
1494 {
1495 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1496 
1497 	cpuc->perf_ctr_virt_mask = 0;
1498 
1499 	/* Reload all events */
1500 	amd_pmu_reload_virt();
1501 }
1502 EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
1503 
1504 void amd_pmu_disable_virt(void)
1505 {
1506 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1507 
1508 	/*
1509 	 * We only mask out the Host-only bit so that host-only counting works
1510 	 * when SVM is disabled. If someone sets up a guest-only counter when
1511 	 * SVM is disabled the Guest-only bits still gets set and the counter
1512 	 * will not count anything.
1513 	 */
1514 	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
1515 
1516 	/* Reload all events */
1517 	amd_pmu_reload_virt();
1518 }
1519 EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
1520