xref: /linux/arch/x86/events/amd/uncore.c (revision 41e0d49104dbff888ef6446ea46842fde66c0a76)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Advanced Micro Devices, Inc.
4  *
5  * Author: Jacob Shin <jacob.shin@amd.com>
6  */
7 
8 #include <linux/perf_event.h>
9 #include <linux/percpu.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/init.h>
13 #include <linux/cpu.h>
14 #include <linux/cpumask.h>
15 #include <linux/cpufeature.h>
16 #include <linux/smp.h>
17 
18 #include <asm/perf_event.h>
19 #include <asm/msr.h>
20 
21 #define NUM_COUNTERS_NB		4
22 #define NUM_COUNTERS_L2		4
23 #define NUM_COUNTERS_L3		6
24 
25 #define RDPMC_BASE_NB		6
26 #define RDPMC_BASE_LLC		10
27 
28 #define COUNTER_SHIFT		16
29 
30 #undef pr_fmt
31 #define pr_fmt(fmt)	"amd_uncore: " fmt
32 
33 static int pmu_version;
34 static int num_counters_llc;
35 static int num_counters_nb;
36 static bool l3_mask;
37 
38 static HLIST_HEAD(uncore_unused_list);
39 
40 struct amd_uncore {
41 	int id;
42 	int refcnt;
43 	int cpu;
44 	int num_counters;
45 	int rdpmc_base;
46 	u32 msr_base;
47 	cpumask_t *active_mask;
48 	struct pmu *pmu;
49 	struct perf_event **events;
50 	struct hlist_node node;
51 };
52 
53 static struct amd_uncore * __percpu *amd_uncore_nb;
54 static struct amd_uncore * __percpu *amd_uncore_llc;
55 
56 static struct pmu amd_nb_pmu;
57 static struct pmu amd_llc_pmu;
58 
59 static cpumask_t amd_nb_active_mask;
60 static cpumask_t amd_llc_active_mask;
61 
62 static bool is_nb_event(struct perf_event *event)
63 {
64 	return event->pmu->type == amd_nb_pmu.type;
65 }
66 
67 static bool is_llc_event(struct perf_event *event)
68 {
69 	return event->pmu->type == amd_llc_pmu.type;
70 }
71 
72 static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
73 {
74 	if (is_nb_event(event) && amd_uncore_nb)
75 		return *per_cpu_ptr(amd_uncore_nb, event->cpu);
76 	else if (is_llc_event(event) && amd_uncore_llc)
77 		return *per_cpu_ptr(amd_uncore_llc, event->cpu);
78 
79 	return NULL;
80 }
81 
82 static void amd_uncore_read(struct perf_event *event)
83 {
84 	struct hw_perf_event *hwc = &event->hw;
85 	u64 prev, new;
86 	s64 delta;
87 
88 	/*
89 	 * since we do not enable counter overflow interrupts,
90 	 * we do not have to worry about prev_count changing on us
91 	 */
92 
93 	prev = local64_read(&hwc->prev_count);
94 	rdpmcl(hwc->event_base_rdpmc, new);
95 	local64_set(&hwc->prev_count, new);
96 	delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
97 	delta >>= COUNTER_SHIFT;
98 	local64_add(delta, &event->count);
99 }
100 
101 static void amd_uncore_start(struct perf_event *event, int flags)
102 {
103 	struct hw_perf_event *hwc = &event->hw;
104 
105 	if (flags & PERF_EF_RELOAD)
106 		wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
107 
108 	hwc->state = 0;
109 	wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
110 	perf_event_update_userpage(event);
111 }
112 
113 static void amd_uncore_stop(struct perf_event *event, int flags)
114 {
115 	struct hw_perf_event *hwc = &event->hw;
116 
117 	wrmsrl(hwc->config_base, hwc->config);
118 	hwc->state |= PERF_HES_STOPPED;
119 
120 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
121 		amd_uncore_read(event);
122 		hwc->state |= PERF_HES_UPTODATE;
123 	}
124 }
125 
126 static int amd_uncore_add(struct perf_event *event, int flags)
127 {
128 	int i;
129 	struct amd_uncore *uncore = event_to_amd_uncore(event);
130 	struct hw_perf_event *hwc = &event->hw;
131 
132 	/* are we already assigned? */
133 	if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
134 		goto out;
135 
136 	for (i = 0; i < uncore->num_counters; i++) {
137 		if (uncore->events[i] == event) {
138 			hwc->idx = i;
139 			goto out;
140 		}
141 	}
142 
143 	/* if not, take the first available counter */
144 	hwc->idx = -1;
145 	for (i = 0; i < uncore->num_counters; i++) {
146 		if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
147 			hwc->idx = i;
148 			break;
149 		}
150 	}
151 
152 out:
153 	if (hwc->idx == -1)
154 		return -EBUSY;
155 
156 	hwc->config_base = uncore->msr_base + (2 * hwc->idx);
157 	hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
158 	hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
159 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
160 
161 	/*
162 	 * The first four DF counters are accessible via RDPMC index 6 to 9
163 	 * followed by the L3 counters from index 10 to 15. For processors
164 	 * with more than four DF counters, the DF RDPMC assignments become
165 	 * discontiguous as the additional counters are accessible starting
166 	 * from index 16.
167 	 */
168 	if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB)
169 		hwc->event_base_rdpmc += NUM_COUNTERS_L3;
170 
171 	if (flags & PERF_EF_START)
172 		amd_uncore_start(event, PERF_EF_RELOAD);
173 
174 	return 0;
175 }
176 
177 static void amd_uncore_del(struct perf_event *event, int flags)
178 {
179 	int i;
180 	struct amd_uncore *uncore = event_to_amd_uncore(event);
181 	struct hw_perf_event *hwc = &event->hw;
182 
183 	amd_uncore_stop(event, PERF_EF_UPDATE);
184 
185 	for (i = 0; i < uncore->num_counters; i++) {
186 		if (cmpxchg(&uncore->events[i], event, NULL) == event)
187 			break;
188 	}
189 
190 	hwc->idx = -1;
191 }
192 
193 /*
194  * Return a full thread and slice mask unless user
195  * has provided them
196  */
197 static u64 l3_thread_slice_mask(u64 config)
198 {
199 	if (boot_cpu_data.x86 <= 0x18)
200 		return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
201 		       ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
202 
203 	/*
204 	 * If the user doesn't specify a threadmask, they're not trying to
205 	 * count core 0, so we enable all cores & threads.
206 	 * We'll also assume that they want to count slice 0 if they specify
207 	 * a threadmask and leave sliceid and enallslices unpopulated.
208 	 */
209 	if (!(config & AMD64_L3_F19H_THREAD_MASK))
210 		return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
211 		       AMD64_L3_EN_ALL_CORES;
212 
213 	return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
214 			 AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
215 			 AMD64_L3_COREID_MASK);
216 }
217 
218 static int amd_uncore_event_init(struct perf_event *event)
219 {
220 	struct amd_uncore *uncore;
221 	struct hw_perf_event *hwc = &event->hw;
222 	u64 event_mask = AMD64_RAW_EVENT_MASK_NB;
223 
224 	if (event->attr.type != event->pmu->type)
225 		return -ENOENT;
226 
227 	if (pmu_version >= 2 && is_nb_event(event))
228 		event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB;
229 
230 	/*
231 	 * NB and Last level cache counters (MSRs) are shared across all cores
232 	 * that share the same NB / Last level cache.  On family 16h and below,
233 	 * Interrupts can be directed to a single target core, however, event
234 	 * counts generated by processes running on other cores cannot be masked
235 	 * out. So we do not support sampling and per-thread events via
236 	 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
237 	 */
238 	hwc->config = event->attr.config & event_mask;
239 	hwc->idx = -1;
240 
241 	if (event->cpu < 0)
242 		return -EINVAL;
243 
244 	/*
245 	 * SliceMask and ThreadMask need to be set for certain L3 events.
246 	 * For other events, the two fields do not affect the count.
247 	 */
248 	if (l3_mask && is_llc_event(event))
249 		hwc->config |= l3_thread_slice_mask(event->attr.config);
250 
251 	uncore = event_to_amd_uncore(event);
252 	if (!uncore)
253 		return -ENODEV;
254 
255 	/*
256 	 * since request can come in to any of the shared cores, we will remap
257 	 * to a single common cpu.
258 	 */
259 	event->cpu = uncore->cpu;
260 
261 	return 0;
262 }
263 
264 static umode_t
265 amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
266 {
267 	return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
268 	       attr->mode : 0;
269 }
270 
271 static umode_t
272 amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
273 {
274 	return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
275 }
276 
277 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
278 					    struct device_attribute *attr,
279 					    char *buf)
280 {
281 	cpumask_t *active_mask;
282 	struct pmu *pmu = dev_get_drvdata(dev);
283 
284 	if (pmu->type == amd_nb_pmu.type)
285 		active_mask = &amd_nb_active_mask;
286 	else if (pmu->type == amd_llc_pmu.type)
287 		active_mask = &amd_llc_active_mask;
288 	else
289 		return 0;
290 
291 	return cpumap_print_to_pagebuf(true, buf, active_mask);
292 }
293 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
294 
295 static struct attribute *amd_uncore_attrs[] = {
296 	&dev_attr_cpumask.attr,
297 	NULL,
298 };
299 
300 static struct attribute_group amd_uncore_attr_group = {
301 	.attrs = amd_uncore_attrs,
302 };
303 
304 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\
305 static ssize_t __uncore_##_var##_show(struct device *dev,		\
306 				struct device_attribute *attr,		\
307 				char *page)				\
308 {									\
309 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
310 	return sprintf(page, _format "\n");				\
311 }									\
312 static struct device_attribute format_attr_##_var =			\
313 	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
314 
315 DEFINE_UNCORE_FORMAT_ATTR(event12,	event,		"config:0-7,32-35");
316 DEFINE_UNCORE_FORMAT_ATTR(event14,	event,		"config:0-7,32-35,59-60"); /* F17h+ DF */
317 DEFINE_UNCORE_FORMAT_ATTR(event14v2,	event,		"config:0-7,32-37");	   /* PerfMonV2 DF */
318 DEFINE_UNCORE_FORMAT_ATTR(event8,	event,		"config:0-7");		   /* F17h+ L3 */
319 DEFINE_UNCORE_FORMAT_ATTR(umask8,	umask,		"config:8-15");
320 DEFINE_UNCORE_FORMAT_ATTR(umask12,	umask,		"config:8-15,24-27");	   /* PerfMonV2 DF */
321 DEFINE_UNCORE_FORMAT_ATTR(coreid,	coreid,		"config:42-44");	   /* F19h L3 */
322 DEFINE_UNCORE_FORMAT_ATTR(slicemask,	slicemask,	"config:48-51");	   /* F17h L3 */
323 DEFINE_UNCORE_FORMAT_ATTR(threadmask8,	threadmask,	"config:56-63");	   /* F17h L3 */
324 DEFINE_UNCORE_FORMAT_ATTR(threadmask2,	threadmask,	"config:56-57");	   /* F19h L3 */
325 DEFINE_UNCORE_FORMAT_ATTR(enallslices,	enallslices,	"config:46");		   /* F19h L3 */
326 DEFINE_UNCORE_FORMAT_ATTR(enallcores,	enallcores,	"config:47");		   /* F19h L3 */
327 DEFINE_UNCORE_FORMAT_ATTR(sliceid,	sliceid,	"config:48-50");	   /* F19h L3 */
328 
329 /* Common DF and NB attributes */
330 static struct attribute *amd_uncore_df_format_attr[] = {
331 	&format_attr_event12.attr,	/* event */
332 	&format_attr_umask8.attr,	/* umask */
333 	NULL,
334 };
335 
336 /* Common L2 and L3 attributes */
337 static struct attribute *amd_uncore_l3_format_attr[] = {
338 	&format_attr_event12.attr,	/* event */
339 	&format_attr_umask8.attr,	/* umask */
340 	NULL,				/* threadmask */
341 	NULL,
342 };
343 
344 /* F17h unique L3 attributes */
345 static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
346 	&format_attr_slicemask.attr,	/* slicemask */
347 	NULL,
348 };
349 
350 /* F19h unique L3 attributes */
351 static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
352 	&format_attr_coreid.attr,	/* coreid */
353 	&format_attr_enallslices.attr,	/* enallslices */
354 	&format_attr_enallcores.attr,	/* enallcores */
355 	&format_attr_sliceid.attr,	/* sliceid */
356 	NULL,
357 };
358 
359 static struct attribute_group amd_uncore_df_format_group = {
360 	.name = "format",
361 	.attrs = amd_uncore_df_format_attr,
362 };
363 
364 static struct attribute_group amd_uncore_l3_format_group = {
365 	.name = "format",
366 	.attrs = amd_uncore_l3_format_attr,
367 };
368 
369 static struct attribute_group amd_f17h_uncore_l3_format_group = {
370 	.name = "format",
371 	.attrs = amd_f17h_uncore_l3_format_attr,
372 	.is_visible = amd_f17h_uncore_is_visible,
373 };
374 
375 static struct attribute_group amd_f19h_uncore_l3_format_group = {
376 	.name = "format",
377 	.attrs = amd_f19h_uncore_l3_format_attr,
378 	.is_visible = amd_f19h_uncore_is_visible,
379 };
380 
381 static const struct attribute_group *amd_uncore_df_attr_groups[] = {
382 	&amd_uncore_attr_group,
383 	&amd_uncore_df_format_group,
384 	NULL,
385 };
386 
387 static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
388 	&amd_uncore_attr_group,
389 	&amd_uncore_l3_format_group,
390 	NULL,
391 };
392 
393 static const struct attribute_group *amd_uncore_l3_attr_update[] = {
394 	&amd_f17h_uncore_l3_format_group,
395 	&amd_f19h_uncore_l3_format_group,
396 	NULL,
397 };
398 
399 static struct pmu amd_nb_pmu = {
400 	.task_ctx_nr	= perf_invalid_context,
401 	.attr_groups	= amd_uncore_df_attr_groups,
402 	.name		= "amd_nb",
403 	.event_init	= amd_uncore_event_init,
404 	.add		= amd_uncore_add,
405 	.del		= amd_uncore_del,
406 	.start		= amd_uncore_start,
407 	.stop		= amd_uncore_stop,
408 	.read		= amd_uncore_read,
409 	.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
410 	.module		= THIS_MODULE,
411 };
412 
413 static struct pmu amd_llc_pmu = {
414 	.task_ctx_nr	= perf_invalid_context,
415 	.attr_groups	= amd_uncore_l3_attr_groups,
416 	.attr_update	= amd_uncore_l3_attr_update,
417 	.name		= "amd_l2",
418 	.event_init	= amd_uncore_event_init,
419 	.add		= amd_uncore_add,
420 	.del		= amd_uncore_del,
421 	.start		= amd_uncore_start,
422 	.stop		= amd_uncore_stop,
423 	.read		= amd_uncore_read,
424 	.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
425 	.module		= THIS_MODULE,
426 };
427 
428 static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
429 {
430 	return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
431 			cpu_to_node(cpu));
432 }
433 
434 static inline struct perf_event **
435 amd_uncore_events_alloc(unsigned int num, unsigned int cpu)
436 {
437 	return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL,
438 			    cpu_to_node(cpu));
439 }
440 
441 static int amd_uncore_cpu_up_prepare(unsigned int cpu)
442 {
443 	struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL;
444 
445 	if (amd_uncore_nb) {
446 		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
447 		uncore_nb = amd_uncore_alloc(cpu);
448 		if (!uncore_nb)
449 			goto fail;
450 		uncore_nb->cpu = cpu;
451 		uncore_nb->num_counters = num_counters_nb;
452 		uncore_nb->rdpmc_base = RDPMC_BASE_NB;
453 		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
454 		uncore_nb->active_mask = &amd_nb_active_mask;
455 		uncore_nb->pmu = &amd_nb_pmu;
456 		uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu);
457 		if (!uncore_nb->events)
458 			goto fail;
459 		uncore_nb->id = -1;
460 		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
461 	}
462 
463 	if (amd_uncore_llc) {
464 		*per_cpu_ptr(amd_uncore_llc, cpu) = NULL;
465 		uncore_llc = amd_uncore_alloc(cpu);
466 		if (!uncore_llc)
467 			goto fail;
468 		uncore_llc->cpu = cpu;
469 		uncore_llc->num_counters = num_counters_llc;
470 		uncore_llc->rdpmc_base = RDPMC_BASE_LLC;
471 		uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
472 		uncore_llc->active_mask = &amd_llc_active_mask;
473 		uncore_llc->pmu = &amd_llc_pmu;
474 		uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu);
475 		if (!uncore_llc->events)
476 			goto fail;
477 		uncore_llc->id = -1;
478 		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
479 	}
480 
481 	return 0;
482 
483 fail:
484 	if (uncore_nb) {
485 		kfree(uncore_nb->events);
486 		kfree(uncore_nb);
487 	}
488 
489 	if (uncore_llc) {
490 		kfree(uncore_llc->events);
491 		kfree(uncore_llc);
492 	}
493 
494 	return -ENOMEM;
495 }
496 
497 static struct amd_uncore *
498 amd_uncore_find_online_sibling(struct amd_uncore *this,
499 			       struct amd_uncore * __percpu *uncores)
500 {
501 	unsigned int cpu;
502 	struct amd_uncore *that;
503 
504 	for_each_online_cpu(cpu) {
505 		that = *per_cpu_ptr(uncores, cpu);
506 
507 		if (!that)
508 			continue;
509 
510 		if (this == that)
511 			continue;
512 
513 		if (this->id == that->id) {
514 			hlist_add_head(&this->node, &uncore_unused_list);
515 			this = that;
516 			break;
517 		}
518 	}
519 
520 	this->refcnt++;
521 	return this;
522 }
523 
524 static int amd_uncore_cpu_starting(unsigned int cpu)
525 {
526 	unsigned int eax, ebx, ecx, edx;
527 	struct amd_uncore *uncore;
528 
529 	if (amd_uncore_nb) {
530 		uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
531 		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
532 		uncore->id = ecx & 0xff;
533 
534 		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
535 		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
536 	}
537 
538 	if (amd_uncore_llc) {
539 		uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
540 		uncore->id = get_llc_id(cpu);
541 
542 		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
543 		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
544 	}
545 
546 	return 0;
547 }
548 
549 static void uncore_clean_online(void)
550 {
551 	struct amd_uncore *uncore;
552 	struct hlist_node *n;
553 
554 	hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
555 		hlist_del(&uncore->node);
556 		kfree(uncore->events);
557 		kfree(uncore);
558 	}
559 }
560 
561 static void uncore_online(unsigned int cpu,
562 			  struct amd_uncore * __percpu *uncores)
563 {
564 	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
565 
566 	uncore_clean_online();
567 
568 	if (cpu == uncore->cpu)
569 		cpumask_set_cpu(cpu, uncore->active_mask);
570 }
571 
572 static int amd_uncore_cpu_online(unsigned int cpu)
573 {
574 	if (amd_uncore_nb)
575 		uncore_online(cpu, amd_uncore_nb);
576 
577 	if (amd_uncore_llc)
578 		uncore_online(cpu, amd_uncore_llc);
579 
580 	return 0;
581 }
582 
583 static void uncore_down_prepare(unsigned int cpu,
584 				struct amd_uncore * __percpu *uncores)
585 {
586 	unsigned int i;
587 	struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
588 
589 	if (this->cpu != cpu)
590 		return;
591 
592 	/* this cpu is going down, migrate to a shared sibling if possible */
593 	for_each_online_cpu(i) {
594 		struct amd_uncore *that = *per_cpu_ptr(uncores, i);
595 
596 		if (cpu == i)
597 			continue;
598 
599 		if (this == that) {
600 			perf_pmu_migrate_context(this->pmu, cpu, i);
601 			cpumask_clear_cpu(cpu, that->active_mask);
602 			cpumask_set_cpu(i, that->active_mask);
603 			that->cpu = i;
604 			break;
605 		}
606 	}
607 }
608 
609 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
610 {
611 	if (amd_uncore_nb)
612 		uncore_down_prepare(cpu, amd_uncore_nb);
613 
614 	if (amd_uncore_llc)
615 		uncore_down_prepare(cpu, amd_uncore_llc);
616 
617 	return 0;
618 }
619 
620 static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
621 {
622 	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
623 
624 	if (cpu == uncore->cpu)
625 		cpumask_clear_cpu(cpu, uncore->active_mask);
626 
627 	if (!--uncore->refcnt) {
628 		kfree(uncore->events);
629 		kfree(uncore);
630 	}
631 
632 	*per_cpu_ptr(uncores, cpu) = NULL;
633 }
634 
635 static int amd_uncore_cpu_dead(unsigned int cpu)
636 {
637 	if (amd_uncore_nb)
638 		uncore_dead(cpu, amd_uncore_nb);
639 
640 	if (amd_uncore_llc)
641 		uncore_dead(cpu, amd_uncore_llc);
642 
643 	return 0;
644 }
645 
646 static int __init amd_uncore_init(void)
647 {
648 	struct attribute **df_attr = amd_uncore_df_format_attr;
649 	struct attribute **l3_attr = amd_uncore_l3_format_attr;
650 	union cpuid_0x80000022_ebx ebx;
651 	int ret = -ENODEV;
652 
653 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
654 	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
655 		return -ENODEV;
656 
657 	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
658 		return -ENODEV;
659 
660 	if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
661 		pmu_version = 2;
662 
663 	num_counters_nb	= NUM_COUNTERS_NB;
664 	num_counters_llc = NUM_COUNTERS_L2;
665 	if (boot_cpu_data.x86 >= 0x17) {
666 		/*
667 		 * For F17h and above, the Northbridge counters are
668 		 * repurposed as Data Fabric counters. Also, L3
669 		 * counters are supported too. The PMUs are exported
670 		 * based on family as either L2 or L3 and NB or DF.
671 		 */
672 		num_counters_llc	  = NUM_COUNTERS_L3;
673 		amd_nb_pmu.name		  = "amd_df";
674 		amd_llc_pmu.name	  = "amd_l3";
675 		l3_mask			  = true;
676 	}
677 
678 	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
679 		if (pmu_version >= 2) {
680 			*df_attr++ = &format_attr_event14v2.attr;
681 			*df_attr++ = &format_attr_umask12.attr;
682 		} else if (boot_cpu_data.x86 >= 0x17) {
683 			*df_attr = &format_attr_event14.attr;
684 		}
685 
686 		amd_uncore_nb = alloc_percpu(struct amd_uncore *);
687 		if (!amd_uncore_nb) {
688 			ret = -ENOMEM;
689 			goto fail_nb;
690 		}
691 		ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
692 		if (ret)
693 			goto fail_nb;
694 
695 		if (pmu_version >= 2) {
696 			ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
697 			num_counters_nb = ebx.split.num_df_pmc;
698 		}
699 
700 		pr_info("%d %s %s counters detected\n", num_counters_nb,
701 			boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON" : "",
702 			amd_nb_pmu.name);
703 
704 		ret = 0;
705 	}
706 
707 	if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
708 		if (boot_cpu_data.x86 >= 0x19) {
709 			*l3_attr++ = &format_attr_event8.attr;
710 			*l3_attr++ = &format_attr_umask8.attr;
711 			*l3_attr++ = &format_attr_threadmask2.attr;
712 		} else if (boot_cpu_data.x86 >= 0x17) {
713 			*l3_attr++ = &format_attr_event8.attr;
714 			*l3_attr++ = &format_attr_umask8.attr;
715 			*l3_attr++ = &format_attr_threadmask8.attr;
716 		}
717 
718 		amd_uncore_llc = alloc_percpu(struct amd_uncore *);
719 		if (!amd_uncore_llc) {
720 			ret = -ENOMEM;
721 			goto fail_llc;
722 		}
723 		ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1);
724 		if (ret)
725 			goto fail_llc;
726 
727 		pr_info("%d %s %s counters detected\n", num_counters_llc,
728 			boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON" : "",
729 			amd_llc_pmu.name);
730 		ret = 0;
731 	}
732 
733 	/*
734 	 * Install callbacks. Core will call them for each online cpu.
735 	 */
736 	if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
737 			      "perf/x86/amd/uncore:prepare",
738 			      amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
739 		goto fail_llc;
740 
741 	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
742 			      "perf/x86/amd/uncore:starting",
743 			      amd_uncore_cpu_starting, NULL))
744 		goto fail_prep;
745 	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
746 			      "perf/x86/amd/uncore:online",
747 			      amd_uncore_cpu_online,
748 			      amd_uncore_cpu_down_prepare))
749 		goto fail_start;
750 	return 0;
751 
752 fail_start:
753 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
754 fail_prep:
755 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
756 fail_llc:
757 	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
758 		perf_pmu_unregister(&amd_nb_pmu);
759 	free_percpu(amd_uncore_llc);
760 fail_nb:
761 	free_percpu(amd_uncore_nb);
762 
763 	return ret;
764 }
765 
766 static void __exit amd_uncore_exit(void)
767 {
768 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
769 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
770 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
771 
772 	if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
773 		perf_pmu_unregister(&amd_llc_pmu);
774 		free_percpu(amd_uncore_llc);
775 		amd_uncore_llc = NULL;
776 	}
777 
778 	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
779 		perf_pmu_unregister(&amd_nb_pmu);
780 		free_percpu(amd_uncore_nb);
781 		amd_uncore_nb = NULL;
782 	}
783 }
784 
785 module_init(amd_uncore_init);
786 module_exit(amd_uncore_exit);
787 
788 MODULE_DESCRIPTION("AMD Uncore Driver");
789 MODULE_LICENSE("GPL v2");
790