xref: /linux/arch/x86/events/amd/uncore.c (revision ff5ccdb8d5bd242f1064c6f7996603e47e28d095)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Advanced Micro Devices, Inc.
4  *
5  * Author: Jacob Shin <jacob.shin@amd.com>
6  */
7 
8 #include <linux/perf_event.h>
9 #include <linux/percpu.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/init.h>
13 #include <linux/cpu.h>
14 #include <linux/cpumask.h>
15 #include <linux/cpufeature.h>
16 #include <linux/smp.h>
17 
18 #include <asm/perf_event.h>
19 #include <asm/cpuid/api.h>
20 #include <asm/msr.h>
21 
22 #define NUM_COUNTERS_NB		4
23 #define NUM_COUNTERS_L2		4
24 #define NUM_COUNTERS_L3		6
25 #define NUM_COUNTERS_MAX	64
26 
27 #define RDPMC_BASE_NB		6
28 #define RDPMC_BASE_LLC		10
29 
30 #define COUNTER_SHIFT		16
31 #define UNCORE_NAME_LEN		16
32 #define UNCORE_GROUP_MAX	256
33 
34 #undef pr_fmt
35 #define pr_fmt(fmt)	"amd_uncore: " fmt
36 
37 static int pmu_version;
38 
39 struct amd_uncore_ctx {
40 	int refcnt;
41 	int cpu;
42 	struct perf_event **events;
43 	unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
44 	int nr_active;
45 	struct hrtimer hrtimer;
46 	u64 hrtimer_duration;
47 };
48 
49 struct amd_uncore_pmu {
50 	char name[UNCORE_NAME_LEN];
51 	int num_counters;
52 	int rdpmc_base;
53 	u32 msr_base;
54 	int group;
55 	cpumask_t active_mask;
56 	struct pmu pmu;
57 	struct amd_uncore_ctx * __percpu *ctx;
58 };
59 
60 enum {
61 	UNCORE_TYPE_DF,
62 	UNCORE_TYPE_L3,
63 	UNCORE_TYPE_UMC,
64 
65 	UNCORE_TYPE_MAX
66 };
67 
68 union amd_uncore_info {
69 	struct {
70 		u64	aux_data:32;	/* auxiliary data */
71 		u64	num_pmcs:8;	/* number of counters */
72 		u64	gid:8;		/* group id */
73 		u64	cid:8;		/* context id */
74 	} split;
75 	u64		full;
76 };
77 
78 struct amd_uncore {
79 	union amd_uncore_info  __percpu *info;
80 	struct amd_uncore_pmu *pmus;
81 	unsigned int num_pmus;
82 	bool init_done;
83 	void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
84 	int  (*init)(struct amd_uncore *uncore, unsigned int cpu);
85 	void (*move)(struct amd_uncore *uncore, unsigned int cpu);
86 	void (*free)(struct amd_uncore *uncore, unsigned int cpu);
87 };
88 
89 static struct amd_uncore uncores[UNCORE_TYPE_MAX];
90 
91 /* Interval for hrtimer, defaults to 60000 milliseconds */
92 static unsigned int update_interval = 60 * MSEC_PER_SEC;
93 module_param(update_interval, uint, 0444);
94 
95 static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
96 {
97 	return container_of(event->pmu, struct amd_uncore_pmu, pmu);
98 }
99 
100 static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
101 {
102 	struct amd_uncore_ctx *ctx;
103 	struct perf_event *event;
104 	int bit;
105 
106 	ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
107 
108 	if (!ctx->nr_active || ctx->cpu != smp_processor_id())
109 		return HRTIMER_NORESTART;
110 
111 	for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
112 		event = ctx->events[bit];
113 		event->pmu->read(event);
114 	}
115 
116 	hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
117 	return HRTIMER_RESTART;
118 }
119 
120 static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
121 {
122 	hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
123 		      HRTIMER_MODE_REL_PINNED_HARD);
124 }
125 
126 static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
127 {
128 	hrtimer_cancel(&ctx->hrtimer);
129 }
130 
131 static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
132 {
133 	hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
134 }
135 
136 static void amd_uncore_read(struct perf_event *event)
137 {
138 	struct hw_perf_event *hwc = &event->hw;
139 	u64 prev, new;
140 	s64 delta;
141 
142 	/*
143 	 * since we do not enable counter overflow interrupts,
144 	 * we do not have to worry about prev_count changing on us
145 	 */
146 
147 	prev = local64_read(&hwc->prev_count);
148 
149 	/*
150 	 * Some uncore PMUs do not have RDPMC assignments. In such cases,
151 	 * read counts directly from the corresponding PERF_CTR.
152 	 */
153 	if (hwc->event_base_rdpmc < 0)
154 		rdmsrq(hwc->event_base, new);
155 	else
156 		new = rdpmc(hwc->event_base_rdpmc);
157 
158 	local64_set(&hwc->prev_count, new);
159 	delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
160 	delta >>= COUNTER_SHIFT;
161 	local64_add(delta, &event->count);
162 }
163 
164 static void amd_uncore_start(struct perf_event *event, int flags)
165 {
166 	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
167 	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
168 	struct hw_perf_event *hwc = &event->hw;
169 
170 	if (!ctx->nr_active++)
171 		amd_uncore_start_hrtimer(ctx);
172 
173 	if (flags & PERF_EF_RELOAD)
174 		wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
175 
176 	hwc->state = 0;
177 	__set_bit(hwc->idx, ctx->active_mask);
178 	wrmsrq(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
179 	perf_event_update_userpage(event);
180 }
181 
182 static void amd_uncore_stop(struct perf_event *event, int flags)
183 {
184 	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
185 	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
186 	struct hw_perf_event *hwc = &event->hw;
187 
188 	wrmsrq(hwc->config_base, hwc->config);
189 	hwc->state |= PERF_HES_STOPPED;
190 
191 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
192 		event->pmu->read(event);
193 		hwc->state |= PERF_HES_UPTODATE;
194 	}
195 
196 	if (!--ctx->nr_active)
197 		amd_uncore_cancel_hrtimer(ctx);
198 
199 	__clear_bit(hwc->idx, ctx->active_mask);
200 }
201 
202 static int amd_uncore_add(struct perf_event *event, int flags)
203 {
204 	int i;
205 	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
206 	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
207 	struct hw_perf_event *hwc = &event->hw;
208 
209 	/* are we already assigned? */
210 	if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
211 		goto out;
212 
213 	for (i = 0; i < pmu->num_counters; i++) {
214 		if (ctx->events[i] == event) {
215 			hwc->idx = i;
216 			goto out;
217 		}
218 	}
219 
220 	/* if not, take the first available counter */
221 	hwc->idx = -1;
222 	for (i = 0; i < pmu->num_counters; i++) {
223 		struct perf_event *tmp = NULL;
224 
225 		if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
226 			hwc->idx = i;
227 			break;
228 		}
229 	}
230 
231 out:
232 	if (hwc->idx == -1)
233 		return -EBUSY;
234 
235 	hwc->config_base = pmu->msr_base + (2 * hwc->idx);
236 	hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
237 	hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
238 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
239 
240 	if (pmu->rdpmc_base < 0)
241 		hwc->event_base_rdpmc = -1;
242 
243 	if (flags & PERF_EF_START)
244 		event->pmu->start(event, PERF_EF_RELOAD);
245 
246 	return 0;
247 }
248 
249 static void amd_uncore_del(struct perf_event *event, int flags)
250 {
251 	int i;
252 	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
253 	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
254 	struct hw_perf_event *hwc = &event->hw;
255 
256 	event->pmu->stop(event, PERF_EF_UPDATE);
257 
258 	for (i = 0; i < pmu->num_counters; i++) {
259 		struct perf_event *tmp = event;
260 
261 		if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
262 			break;
263 	}
264 
265 	hwc->idx = -1;
266 }
267 
268 static int amd_uncore_event_init(struct perf_event *event)
269 {
270 	struct amd_uncore_pmu *pmu;
271 	struct amd_uncore_ctx *ctx;
272 	struct hw_perf_event *hwc = &event->hw;
273 
274 	if (event->attr.type != event->pmu->type)
275 		return -ENOENT;
276 
277 	if (event->cpu < 0)
278 		return -EINVAL;
279 
280 	pmu = event_to_amd_uncore_pmu(event);
281 	ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
282 	if (!ctx)
283 		return -ENODEV;
284 
285 	/*
286 	 * NB and Last level cache counters (MSRs) are shared across all cores
287 	 * that share the same NB / Last level cache.  On family 16h and below,
288 	 * Interrupts can be directed to a single target core, however, event
289 	 * counts generated by processes running on other cores cannot be masked
290 	 * out. So we do not support sampling and per-thread events via
291 	 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
292 	 */
293 	hwc->config = event->attr.config;
294 	hwc->idx = -1;
295 
296 	/*
297 	 * since request can come in to any of the shared cores, we will remap
298 	 * to a single common cpu.
299 	 */
300 	event->cpu = ctx->cpu;
301 
302 	return 0;
303 }
304 
305 static umode_t
306 amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
307 {
308 	return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
309 	       attr->mode : 0;
310 }
311 
312 static umode_t
313 amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
314 {
315 	return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
316 }
317 
318 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
319 					    struct device_attribute *attr,
320 					    char *buf)
321 {
322 	struct pmu *ptr = dev_get_drvdata(dev);
323 	struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
324 
325 	return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
326 }
327 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
328 
329 static struct attribute *amd_uncore_attrs[] = {
330 	&dev_attr_cpumask.attr,
331 	NULL,
332 };
333 
334 static struct attribute_group amd_uncore_attr_group = {
335 	.attrs = amd_uncore_attrs,
336 };
337 
338 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\
339 static ssize_t __uncore_##_var##_show(struct device *dev,		\
340 				struct device_attribute *attr,		\
341 				char *page)				\
342 {									\
343 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
344 	return sprintf(page, _format "\n");				\
345 }									\
346 static struct device_attribute format_attr_##_var =			\
347 	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
348 
349 DEFINE_UNCORE_FORMAT_ATTR(event12,	event,		"config:0-7,32-35");
350 DEFINE_UNCORE_FORMAT_ATTR(event14,	event,		"config:0-7,32-35,59-60"); /* F17h+ DF */
351 DEFINE_UNCORE_FORMAT_ATTR(event14v2,	event,		"config:0-7,32-37");	   /* PerfMonV2 DF */
352 DEFINE_UNCORE_FORMAT_ATTR(event8,	event,		"config:0-7");		   /* F17h+ L3, PerfMonV2 UMC */
353 DEFINE_UNCORE_FORMAT_ATTR(umask8,	umask,		"config:8-15");
354 DEFINE_UNCORE_FORMAT_ATTR(umask12,	umask,		"config:8-15,24-27");	   /* PerfMonV2 DF */
355 DEFINE_UNCORE_FORMAT_ATTR(coreid,	coreid,		"config:42-44");	   /* F19h L3 */
356 DEFINE_UNCORE_FORMAT_ATTR(slicemask,	slicemask,	"config:48-51");	   /* F17h L3 */
357 DEFINE_UNCORE_FORMAT_ATTR(threadmask8,	threadmask,	"config:56-63");	   /* F17h L3 */
358 DEFINE_UNCORE_FORMAT_ATTR(threadmask2,	threadmask,	"config:56-57");	   /* F19h L3 */
359 DEFINE_UNCORE_FORMAT_ATTR(enallslices,	enallslices,	"config:46");		   /* F19h L3 */
360 DEFINE_UNCORE_FORMAT_ATTR(enallcores,	enallcores,	"config:47");		   /* F19h L3 */
361 DEFINE_UNCORE_FORMAT_ATTR(sliceid,	sliceid,	"config:48-50");	   /* F19h L3 */
362 DEFINE_UNCORE_FORMAT_ATTR(rdwrmask,	rdwrmask,	"config:8-9");		   /* PerfMonV2 UMC */
363 
364 /* Common DF and NB attributes */
365 static struct attribute *amd_uncore_df_format_attr[] = {
366 	&format_attr_event12.attr,	/* event */
367 	&format_attr_umask8.attr,	/* umask */
368 	NULL,
369 };
370 
371 /* Common L2 and L3 attributes */
372 static struct attribute *amd_uncore_l3_format_attr[] = {
373 	&format_attr_event12.attr,	/* event */
374 	&format_attr_umask8.attr,	/* umask */
375 	NULL,				/* threadmask */
376 	NULL,
377 };
378 
379 /* Common UMC attributes */
380 static struct attribute *amd_uncore_umc_format_attr[] = {
381 	&format_attr_event8.attr,       /* event */
382 	&format_attr_rdwrmask.attr,     /* rdwrmask */
383 	NULL,
384 };
385 
386 /* F17h unique L3 attributes */
387 static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
388 	&format_attr_slicemask.attr,	/* slicemask */
389 	NULL,
390 };
391 
392 /* F19h unique L3 attributes */
393 static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
394 	&format_attr_coreid.attr,	/* coreid */
395 	&format_attr_enallslices.attr,	/* enallslices */
396 	&format_attr_enallcores.attr,	/* enallcores */
397 	&format_attr_sliceid.attr,	/* sliceid */
398 	NULL,
399 };
400 
401 static struct attribute_group amd_uncore_df_format_group = {
402 	.name = "format",
403 	.attrs = amd_uncore_df_format_attr,
404 };
405 
406 static struct attribute_group amd_uncore_l3_format_group = {
407 	.name = "format",
408 	.attrs = amd_uncore_l3_format_attr,
409 };
410 
411 static struct attribute_group amd_f17h_uncore_l3_format_group = {
412 	.name = "format",
413 	.attrs = amd_f17h_uncore_l3_format_attr,
414 	.is_visible = amd_f17h_uncore_is_visible,
415 };
416 
417 static struct attribute_group amd_f19h_uncore_l3_format_group = {
418 	.name = "format",
419 	.attrs = amd_f19h_uncore_l3_format_attr,
420 	.is_visible = amd_f19h_uncore_is_visible,
421 };
422 
423 static struct attribute_group amd_uncore_umc_format_group = {
424 	.name = "format",
425 	.attrs = amd_uncore_umc_format_attr,
426 };
427 
428 static const struct attribute_group *amd_uncore_df_attr_groups[] = {
429 	&amd_uncore_attr_group,
430 	&amd_uncore_df_format_group,
431 	NULL,
432 };
433 
434 static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
435 	&amd_uncore_attr_group,
436 	&amd_uncore_l3_format_group,
437 	NULL,
438 };
439 
440 static const struct attribute_group *amd_uncore_l3_attr_update[] = {
441 	&amd_f17h_uncore_l3_format_group,
442 	&amd_f19h_uncore_l3_format_group,
443 	NULL,
444 };
445 
446 static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
447 	&amd_uncore_attr_group,
448 	&amd_uncore_umc_format_group,
449 	NULL,
450 };
451 
452 static __always_inline
453 int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
454 {
455 	union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
456 	return info->split.cid;
457 }
458 
459 static __always_inline
460 int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
461 {
462 	union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
463 	return info->split.gid;
464 }
465 
466 static __always_inline
467 int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
468 {
469 	union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
470 	return info->split.num_pmcs;
471 }
472 
473 static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
474 {
475 	struct amd_uncore_pmu *pmu;
476 	struct amd_uncore_ctx *ctx;
477 	int i;
478 
479 	if (!uncore->init_done)
480 		return;
481 
482 	for (i = 0; i < uncore->num_pmus; i++) {
483 		pmu = &uncore->pmus[i];
484 		ctx = *per_cpu_ptr(pmu->ctx, cpu);
485 		if (!ctx)
486 			continue;
487 
488 		if (cpu == ctx->cpu)
489 			cpumask_clear_cpu(cpu, &pmu->active_mask);
490 
491 		if (!--ctx->refcnt) {
492 			kfree(ctx->events);
493 			kfree(ctx);
494 		}
495 
496 		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
497 	}
498 }
499 
500 static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
501 {
502 	struct amd_uncore_ctx *curr, *prev;
503 	struct amd_uncore_pmu *pmu;
504 	int node, cid, gid, i, j;
505 
506 	if (!uncore->init_done || !uncore->num_pmus)
507 		return 0;
508 
509 	cid = amd_uncore_ctx_cid(uncore, cpu);
510 	gid = amd_uncore_ctx_gid(uncore, cpu);
511 
512 	for (i = 0; i < uncore->num_pmus; i++) {
513 		pmu = &uncore->pmus[i];
514 		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
515 		curr = NULL;
516 
517 		/* Check for group exclusivity */
518 		if (gid != pmu->group)
519 			continue;
520 
521 		/* Find a sibling context */
522 		for_each_online_cpu(j) {
523 			if (cpu == j)
524 				continue;
525 
526 			prev = *per_cpu_ptr(pmu->ctx, j);
527 			if (!prev)
528 				continue;
529 
530 			if (cid == amd_uncore_ctx_cid(uncore, j)) {
531 				curr = prev;
532 				break;
533 			}
534 		}
535 
536 		/* Allocate context if sibling does not exist */
537 		if (!curr) {
538 			node = cpu_to_node(cpu);
539 			curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
540 			if (!curr)
541 				goto fail;
542 
543 			curr->cpu = cpu;
544 			curr->events = kzalloc_node(sizeof(*curr->events) *
545 						    pmu->num_counters,
546 						    GFP_KERNEL, node);
547 			if (!curr->events) {
548 				kfree(curr);
549 				goto fail;
550 			}
551 
552 			amd_uncore_init_hrtimer(curr);
553 			curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC;
554 
555 			cpumask_set_cpu(cpu, &pmu->active_mask);
556 		}
557 
558 		curr->refcnt++;
559 		*per_cpu_ptr(pmu->ctx, cpu) = curr;
560 	}
561 
562 	return 0;
563 
564 fail:
565 	amd_uncore_ctx_free(uncore, cpu);
566 
567 	return -ENOMEM;
568 }
569 
570 static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
571 {
572 	struct amd_uncore_ctx *curr, *next;
573 	struct amd_uncore_pmu *pmu;
574 	int i, j;
575 
576 	if (!uncore->init_done)
577 		return;
578 
579 	for (i = 0; i < uncore->num_pmus; i++) {
580 		pmu = &uncore->pmus[i];
581 		curr = *per_cpu_ptr(pmu->ctx, cpu);
582 		if (!curr)
583 			continue;
584 
585 		/* Migrate to a shared sibling if possible */
586 		for_each_online_cpu(j) {
587 			next = *per_cpu_ptr(pmu->ctx, j);
588 			if (!next || cpu == j)
589 				continue;
590 
591 			if (curr == next) {
592 				perf_pmu_migrate_context(&pmu->pmu, cpu, j);
593 				cpumask_clear_cpu(cpu, &pmu->active_mask);
594 				cpumask_set_cpu(j, &pmu->active_mask);
595 				next->cpu = j;
596 				break;
597 			}
598 		}
599 	}
600 }
601 
602 static int amd_uncore_cpu_starting(unsigned int cpu)
603 {
604 	struct amd_uncore *uncore;
605 	int i;
606 
607 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
608 		uncore = &uncores[i];
609 		uncore->scan(uncore, cpu);
610 	}
611 
612 	return 0;
613 }
614 
615 static int amd_uncore_cpu_online(unsigned int cpu)
616 {
617 	struct amd_uncore *uncore;
618 	int i;
619 
620 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
621 		uncore = &uncores[i];
622 		if (uncore->init(uncore, cpu))
623 			break;
624 	}
625 
626 	return 0;
627 }
628 
629 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
630 {
631 	struct amd_uncore *uncore;
632 	int i;
633 
634 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
635 		uncore = &uncores[i];
636 		uncore->move(uncore, cpu);
637 	}
638 
639 	return 0;
640 }
641 
642 static int amd_uncore_cpu_dead(unsigned int cpu)
643 {
644 	struct amd_uncore *uncore;
645 	int i;
646 
647 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
648 		uncore = &uncores[i];
649 		uncore->free(uncore, cpu);
650 	}
651 
652 	return 0;
653 }
654 
655 static int amd_uncore_df_event_init(struct perf_event *event)
656 {
657 	struct hw_perf_event *hwc = &event->hw;
658 	int ret = amd_uncore_event_init(event);
659 
660 	hwc->config = event->attr.config &
661 		      (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
662 					  AMD64_RAW_EVENT_MASK_NB);
663 
664 	return ret;
665 }
666 
667 static int amd_uncore_df_add(struct perf_event *event, int flags)
668 {
669 	int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
670 	struct hw_perf_event *hwc = &event->hw;
671 
672 	if (ret)
673 		return ret;
674 
675 	/*
676 	 * The first four DF counters are accessible via RDPMC index 6 to 9
677 	 * followed by the L3 counters from index 10 to 15. For processors
678 	 * with more than four DF counters, the DF RDPMC assignments become
679 	 * discontiguous as the additional counters are accessible starting
680 	 * from index 16.
681 	 */
682 	if (hwc->idx >= NUM_COUNTERS_NB)
683 		hwc->event_base_rdpmc += NUM_COUNTERS_L3;
684 
685 	/* Delayed start after rdpmc base update */
686 	if (flags & PERF_EF_START)
687 		amd_uncore_start(event, PERF_EF_RELOAD);
688 
689 	return 0;
690 }
691 
692 static
693 void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
694 {
695 	union cpuid_0x80000022_ebx ebx;
696 	union amd_uncore_info info;
697 
698 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
699 		return;
700 
701 	info.split.aux_data = 0;
702 	info.split.num_pmcs = NUM_COUNTERS_NB;
703 	info.split.gid = 0;
704 	info.split.cid = topology_amd_node_id(cpu);
705 
706 	if (pmu_version >= 2) {
707 		ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
708 		info.split.num_pmcs = ebx.split.num_df_pmc;
709 	}
710 
711 	*per_cpu_ptr(uncore->info, cpu) = info;
712 }
713 
714 static
715 int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
716 {
717 	struct attribute **df_attr = amd_uncore_df_format_attr;
718 	struct amd_uncore_pmu *pmu;
719 	int num_counters;
720 
721 	/* Run just once */
722 	if (uncore->init_done)
723 		return amd_uncore_ctx_init(uncore, cpu);
724 
725 	num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
726 	if (!num_counters)
727 		goto done;
728 
729 	/* No grouping, single instance for a system */
730 	uncore->pmus = kzalloc_obj(*uncore->pmus);
731 	if (!uncore->pmus)
732 		goto done;
733 
734 	/*
735 	 * For Family 17h and above, the Northbridge counters are repurposed
736 	 * as Data Fabric counters. The PMUs are exported based on family as
737 	 * either NB or DF.
738 	 */
739 	pmu = &uncore->pmus[0];
740 	strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
741 		sizeof(pmu->name));
742 	pmu->num_counters = num_counters;
743 	pmu->msr_base = MSR_F15H_NB_PERF_CTL;
744 	pmu->rdpmc_base = RDPMC_BASE_NB;
745 	pmu->group = amd_uncore_ctx_gid(uncore, cpu);
746 
747 	if (pmu_version >= 2) {
748 		*df_attr++ = &format_attr_event14v2.attr;
749 		*df_attr++ = &format_attr_umask12.attr;
750 	} else if (boot_cpu_data.x86 >= 0x17) {
751 		*df_attr = &format_attr_event14.attr;
752 	}
753 
754 	pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
755 	if (!pmu->ctx)
756 		goto done;
757 
758 	pmu->pmu = (struct pmu) {
759 		.task_ctx_nr	= perf_invalid_context,
760 		.attr_groups	= amd_uncore_df_attr_groups,
761 		.name		= pmu->name,
762 		.event_init	= amd_uncore_df_event_init,
763 		.add		= amd_uncore_df_add,
764 		.del		= amd_uncore_del,
765 		.start		= amd_uncore_start,
766 		.stop		= amd_uncore_stop,
767 		.read		= amd_uncore_read,
768 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
769 		.module		= THIS_MODULE,
770 	};
771 
772 	if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
773 		free_percpu(pmu->ctx);
774 		pmu->ctx = NULL;
775 		goto done;
776 	}
777 
778 	pr_info("%d %s%s counters detected\n", pmu->num_counters,
779 		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
780 		pmu->pmu.name);
781 
782 	uncore->num_pmus = 1;
783 
784 done:
785 	uncore->init_done = true;
786 
787 	return amd_uncore_ctx_init(uncore, cpu);
788 }
789 
790 static int amd_uncore_l3_event_init(struct perf_event *event)
791 {
792 	int ret = amd_uncore_event_init(event);
793 	struct hw_perf_event *hwc = &event->hw;
794 	u64 config = event->attr.config;
795 	u64 mask;
796 
797 	hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
798 
799 	/*
800 	 * SliceMask and ThreadMask need to be set for certain L3 events.
801 	 * For other events, the two fields do not affect the count.
802 	 */
803 	if (ret || boot_cpu_data.x86 < 0x17)
804 		return ret;
805 
806 	mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
807 			 AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
808 			 AMD64_L3_COREID_MASK);
809 
810 	if (boot_cpu_data.x86 <= 0x18)
811 		mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
812 		       ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
813 
814 	/*
815 	 * If the user doesn't specify a ThreadMask, they're not trying to
816 	 * count core 0, so we enable all cores & threads.
817 	 * We'll also assume that they want to count slice 0 if they specify
818 	 * a ThreadMask and leave SliceId and EnAllSlices unpopulated.
819 	 */
820 	else if (!(config & AMD64_L3_F19H_THREAD_MASK))
821 		mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
822 		       AMD64_L3_EN_ALL_CORES;
823 
824 	hwc->config |= mask;
825 
826 	return 0;
827 }
828 
829 static
830 void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
831 {
832 	union amd_uncore_info info;
833 
834 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
835 		return;
836 
837 	info.split.aux_data = 0;
838 	info.split.num_pmcs = NUM_COUNTERS_L2;
839 	info.split.gid = 0;
840 	info.split.cid = per_cpu_llc_id(cpu);
841 
842 	if (boot_cpu_data.x86 >= 0x17)
843 		info.split.num_pmcs = NUM_COUNTERS_L3;
844 
845 	*per_cpu_ptr(uncore->info, cpu) = info;
846 }
847 
848 static
849 int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
850 {
851 	struct attribute **l3_attr = amd_uncore_l3_format_attr;
852 	struct amd_uncore_pmu *pmu;
853 	int num_counters;
854 
855 	/* Run just once */
856 	if (uncore->init_done)
857 		return amd_uncore_ctx_init(uncore, cpu);
858 
859 	num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
860 	if (!num_counters)
861 		goto done;
862 
863 	/* No grouping, single instance for a system */
864 	uncore->pmus = kzalloc_obj(*uncore->pmus);
865 	if (!uncore->pmus)
866 		goto done;
867 
868 	/*
869 	 * For Family 17h and above, L3 cache counters are available instead
870 	 * of L2 cache counters. The PMUs are exported based on family as
871 	 * either L2 or L3.
872 	 */
873 	pmu = &uncore->pmus[0];
874 	strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
875 		sizeof(pmu->name));
876 	pmu->num_counters = num_counters;
877 	pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
878 	pmu->rdpmc_base = RDPMC_BASE_LLC;
879 	pmu->group = amd_uncore_ctx_gid(uncore, cpu);
880 
881 	if (boot_cpu_data.x86 >= 0x17) {
882 		*l3_attr++ = &format_attr_event8.attr;
883 		*l3_attr++ = &format_attr_umask8.attr;
884 		*l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
885 			     &format_attr_threadmask2.attr :
886 			     &format_attr_threadmask8.attr;
887 	}
888 
889 	pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
890 	if (!pmu->ctx)
891 		goto done;
892 
893 	pmu->pmu = (struct pmu) {
894 		.task_ctx_nr	= perf_invalid_context,
895 		.attr_groups	= amd_uncore_l3_attr_groups,
896 		.attr_update	= amd_uncore_l3_attr_update,
897 		.name		= pmu->name,
898 		.event_init	= amd_uncore_l3_event_init,
899 		.add		= amd_uncore_add,
900 		.del		= amd_uncore_del,
901 		.start		= amd_uncore_start,
902 		.stop		= amd_uncore_stop,
903 		.read		= amd_uncore_read,
904 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
905 		.module		= THIS_MODULE,
906 	};
907 
908 	if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
909 		free_percpu(pmu->ctx);
910 		pmu->ctx = NULL;
911 		goto done;
912 	}
913 
914 	pr_info("%d %s%s counters detected\n", pmu->num_counters,
915 		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
916 		pmu->pmu.name);
917 
918 	uncore->num_pmus = 1;
919 
920 done:
921 	uncore->init_done = true;
922 
923 	return amd_uncore_ctx_init(uncore, cpu);
924 }
925 
926 static int amd_uncore_umc_event_init(struct perf_event *event)
927 {
928 	struct hw_perf_event *hwc = &event->hw;
929 	int ret = amd_uncore_event_init(event);
930 
931 	if (ret)
932 		return ret;
933 
934 	hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
935 
936 	return 0;
937 }
938 
939 static void amd_uncore_umc_start(struct perf_event *event, int flags)
940 {
941 	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
942 	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
943 	struct hw_perf_event *hwc = &event->hw;
944 
945 	if (!ctx->nr_active++)
946 		amd_uncore_start_hrtimer(ctx);
947 
948 	if (flags & PERF_EF_RELOAD)
949 		wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
950 
951 	hwc->state = 0;
952 	__set_bit(hwc->idx, ctx->active_mask);
953 	wrmsrq(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
954 	perf_event_update_userpage(event);
955 }
956 
957 static void amd_uncore_umc_read(struct perf_event *event)
958 {
959 	struct hw_perf_event *hwc = &event->hw;
960 	u64 prev, new, shift;
961 	s64 delta;
962 
963 	shift = COUNTER_SHIFT + 1;
964 	prev = local64_read(&hwc->prev_count);
965 
966 	/*
967 	 * UMC counters do not have RDPMC assignments. Read counts directly
968 	 * from the corresponding PERF_CTR.
969 	 */
970 	rdmsrq(hwc->event_base, new);
971 
972 	/*
973 	 * Unlike the other uncore counters, UMC counters saturate and set the
974 	 * Overflow bit (bit 48) on overflow. Since they do not roll over,
975 	 * proactively reset the corresponding PERF_CTR when bit 47 is set so
976 	 * that the counter never gets a chance to saturate.
977 	 */
978 	if (new & BIT_ULL(63 - COUNTER_SHIFT)) {
979 		wrmsrq(hwc->event_base, 0);
980 		local64_set(&hwc->prev_count, 0);
981 	} else {
982 		local64_set(&hwc->prev_count, new);
983 	}
984 
985 	delta = (new << shift) - (prev << shift);
986 	delta >>= shift;
987 	local64_add(delta, &event->count);
988 }
989 
990 static
991 void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
992 {
993 	union cpuid_0x80000022_ebx ebx;
994 	union amd_uncore_info info;
995 	unsigned int eax, ecx, edx;
996 
997 	if (pmu_version < 2)
998 		return;
999 
1000 	cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
1001 	info.split.aux_data = ecx;	/* stash active mask */
1002 	info.split.num_pmcs = ebx.split.num_umc_pmc;
1003 	info.split.gid = topology_amd_node_id(cpu);
1004 	info.split.cid = topology_amd_node_id(cpu);
1005 	*per_cpu_ptr(uncore->info, cpu) = info;
1006 }
1007 
1008 static
1009 int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
1010 {
1011 	DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
1012 	u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
1013 	u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
1014 	union amd_uncore_info info;
1015 	struct amd_uncore_pmu *pmu;
1016 	int gid, i;
1017 	u16 index = 0;
1018 
1019 	if (pmu_version < 2)
1020 		return 0;
1021 
1022 	/* Run just once */
1023 	if (uncore->init_done)
1024 		return amd_uncore_ctx_init(uncore, cpu);
1025 
1026 	/* Find unique groups */
1027 	for_each_online_cpu(i) {
1028 		info = *per_cpu_ptr(uncore->info, i);
1029 		gid = info.split.gid;
1030 		if (test_bit(gid, gmask))
1031 			continue;
1032 
1033 		__set_bit(gid, gmask);
1034 		group_num_pmus[gid] = hweight32(info.split.aux_data);
1035 		group_num_pmcs[gid] = info.split.num_pmcs;
1036 		uncore->num_pmus += group_num_pmus[gid];
1037 	}
1038 
1039 	uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus,
1040 			       GFP_KERNEL);
1041 	if (!uncore->pmus) {
1042 		uncore->num_pmus = 0;
1043 		goto done;
1044 	}
1045 
1046 	for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
1047 		for (i = 0; i < group_num_pmus[gid]; i++) {
1048 			pmu = &uncore->pmus[index];
1049 			snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index);
1050 			pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
1051 			pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
1052 			pmu->rdpmc_base = -1;
1053 			pmu->group = gid;
1054 
1055 			pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
1056 			if (!pmu->ctx)
1057 				goto done;
1058 
1059 			pmu->pmu = (struct pmu) {
1060 				.task_ctx_nr	= perf_invalid_context,
1061 				.attr_groups	= amd_uncore_umc_attr_groups,
1062 				.name		= pmu->name,
1063 				.event_init	= amd_uncore_umc_event_init,
1064 				.add		= amd_uncore_add,
1065 				.del		= amd_uncore_del,
1066 				.start		= amd_uncore_umc_start,
1067 				.stop		= amd_uncore_stop,
1068 				.read		= amd_uncore_umc_read,
1069 				.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
1070 				.module		= THIS_MODULE,
1071 			};
1072 
1073 			if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
1074 				free_percpu(pmu->ctx);
1075 				pmu->ctx = NULL;
1076 				goto done;
1077 			}
1078 
1079 			pr_info("%d %s counters detected\n", pmu->num_counters,
1080 				pmu->pmu.name);
1081 
1082 			index++;
1083 		}
1084 	}
1085 
1086 done:
1087 	uncore->num_pmus = index;
1088 	uncore->init_done = true;
1089 
1090 	return amd_uncore_ctx_init(uncore, cpu);
1091 }
1092 
1093 static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
1094 	/* UNCORE_TYPE_DF */
1095 	{
1096 		.scan = amd_uncore_df_ctx_scan,
1097 		.init = amd_uncore_df_ctx_init,
1098 		.move = amd_uncore_ctx_move,
1099 		.free = amd_uncore_ctx_free,
1100 	},
1101 	/* UNCORE_TYPE_L3 */
1102 	{
1103 		.scan = amd_uncore_l3_ctx_scan,
1104 		.init = amd_uncore_l3_ctx_init,
1105 		.move = amd_uncore_ctx_move,
1106 		.free = amd_uncore_ctx_free,
1107 	},
1108 	/* UNCORE_TYPE_UMC */
1109 	{
1110 		.scan = amd_uncore_umc_ctx_scan,
1111 		.init = amd_uncore_umc_ctx_init,
1112 		.move = amd_uncore_ctx_move,
1113 		.free = amd_uncore_ctx_free,
1114 	},
1115 };
1116 
1117 static int __init amd_uncore_init(void)
1118 {
1119 	struct amd_uncore *uncore;
1120 	int ret = -ENODEV;
1121 	int i;
1122 
1123 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
1124 	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
1125 		return -ENODEV;
1126 
1127 	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
1128 		return -ENODEV;
1129 
1130 	if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
1131 		pmu_version = 2;
1132 
1133 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1134 		uncore = &uncores[i];
1135 
1136 		BUG_ON(!uncore->scan);
1137 		BUG_ON(!uncore->init);
1138 		BUG_ON(!uncore->move);
1139 		BUG_ON(!uncore->free);
1140 
1141 		uncore->info = alloc_percpu(union amd_uncore_info);
1142 		if (!uncore->info) {
1143 			ret = -ENOMEM;
1144 			goto fail;
1145 		}
1146 	};
1147 
1148 	/*
1149 	 * Install callbacks. Core will call them for each online cpu.
1150 	 */
1151 	ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
1152 				"perf/x86/amd/uncore:prepare",
1153 				NULL, amd_uncore_cpu_dead);
1154 	if (ret)
1155 		goto fail;
1156 
1157 	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
1158 				"perf/x86/amd/uncore:starting",
1159 				amd_uncore_cpu_starting, NULL);
1160 	if (ret)
1161 		goto fail_prep;
1162 
1163 	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
1164 				"perf/x86/amd/uncore:online",
1165 				amd_uncore_cpu_online,
1166 				amd_uncore_cpu_down_prepare);
1167 	if (ret)
1168 		goto fail_start;
1169 
1170 	return 0;
1171 
1172 fail_start:
1173 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1174 fail_prep:
1175 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1176 fail:
1177 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1178 		uncore = &uncores[i];
1179 		if (uncore->info) {
1180 			free_percpu(uncore->info);
1181 			uncore->info = NULL;
1182 		}
1183 	}
1184 
1185 	return ret;
1186 }
1187 
1188 static void __exit amd_uncore_exit(void)
1189 {
1190 	struct amd_uncore *uncore;
1191 	struct amd_uncore_pmu *pmu;
1192 	int i, j;
1193 
1194 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
1195 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1196 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1197 
1198 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1199 		uncore = &uncores[i];
1200 		if (!uncore->info)
1201 			continue;
1202 
1203 		free_percpu(uncore->info);
1204 		uncore->info = NULL;
1205 
1206 		for (j = 0; j < uncore->num_pmus; j++) {
1207 			pmu = &uncore->pmus[j];
1208 			if (!pmu->ctx)
1209 				continue;
1210 
1211 			perf_pmu_unregister(&pmu->pmu);
1212 			free_percpu(pmu->ctx);
1213 			pmu->ctx = NULL;
1214 		}
1215 
1216 		kfree(uncore->pmus);
1217 		uncore->pmus = NULL;
1218 	}
1219 }
1220 
1221 module_init(amd_uncore_init);
1222 module_exit(amd_uncore_exit);
1223 
1224 MODULE_DESCRIPTION("AMD Uncore Driver");
1225 MODULE_LICENSE("GPL v2");
1226