1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2013 Advanced Micro Devices, Inc.
4 *
5 * Author: Jacob Shin <jacob.shin@amd.com>
6 */
7
8 #include <linux/perf_event.h>
9 #include <linux/percpu.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/init.h>
13 #include <linux/cpu.h>
14 #include <linux/cpumask.h>
15 #include <linux/cpufeature.h>
16 #include <linux/smp.h>
17
18 #include <asm/perf_event.h>
19 #include <asm/msr.h>
20
21 #define NUM_COUNTERS_NB 4
22 #define NUM_COUNTERS_L2 4
23 #define NUM_COUNTERS_L3 6
24
25 #define RDPMC_BASE_NB 6
26 #define RDPMC_BASE_LLC 10
27
28 #define COUNTER_SHIFT 16
29 #define UNCORE_NAME_LEN 16
30 #define UNCORE_GROUP_MAX 256
31
32 #undef pr_fmt
33 #define pr_fmt(fmt) "amd_uncore: " fmt
34
35 static int pmu_version;
36
37 struct amd_uncore_ctx {
38 int refcnt;
39 int cpu;
40 struct perf_event **events;
41 struct hlist_node node;
42 };
43
44 struct amd_uncore_pmu {
45 char name[UNCORE_NAME_LEN];
46 int num_counters;
47 int rdpmc_base;
48 u32 msr_base;
49 int group;
50 cpumask_t active_mask;
51 struct pmu pmu;
52 struct amd_uncore_ctx * __percpu *ctx;
53 };
54
55 enum {
56 UNCORE_TYPE_DF,
57 UNCORE_TYPE_L3,
58 UNCORE_TYPE_UMC,
59
60 UNCORE_TYPE_MAX
61 };
62
63 union amd_uncore_info {
64 struct {
65 u64 aux_data:32; /* auxiliary data */
66 u64 num_pmcs:8; /* number of counters */
67 u64 gid:8; /* group id */
68 u64 cid:8; /* context id */
69 } split;
70 u64 full;
71 };
72
73 struct amd_uncore {
74 union amd_uncore_info __percpu *info;
75 struct amd_uncore_pmu *pmus;
76 unsigned int num_pmus;
77 bool init_done;
78 void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
79 int (*init)(struct amd_uncore *uncore, unsigned int cpu);
80 void (*move)(struct amd_uncore *uncore, unsigned int cpu);
81 void (*free)(struct amd_uncore *uncore, unsigned int cpu);
82 };
83
84 static struct amd_uncore uncores[UNCORE_TYPE_MAX];
85
event_to_amd_uncore_pmu(struct perf_event * event)86 static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
87 {
88 return container_of(event->pmu, struct amd_uncore_pmu, pmu);
89 }
90
amd_uncore_read(struct perf_event * event)91 static void amd_uncore_read(struct perf_event *event)
92 {
93 struct hw_perf_event *hwc = &event->hw;
94 u64 prev, new;
95 s64 delta;
96
97 /*
98 * since we do not enable counter overflow interrupts,
99 * we do not have to worry about prev_count changing on us
100 */
101
102 prev = local64_read(&hwc->prev_count);
103
104 /*
105 * Some uncore PMUs do not have RDPMC assignments. In such cases,
106 * read counts directly from the corresponding PERF_CTR.
107 */
108 if (hwc->event_base_rdpmc < 0)
109 rdmsrl(hwc->event_base, new);
110 else
111 rdpmcl(hwc->event_base_rdpmc, new);
112
113 local64_set(&hwc->prev_count, new);
114 delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
115 delta >>= COUNTER_SHIFT;
116 local64_add(delta, &event->count);
117 }
118
amd_uncore_start(struct perf_event * event,int flags)119 static void amd_uncore_start(struct perf_event *event, int flags)
120 {
121 struct hw_perf_event *hwc = &event->hw;
122
123 if (flags & PERF_EF_RELOAD)
124 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
125
126 hwc->state = 0;
127 wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
128 perf_event_update_userpage(event);
129 }
130
amd_uncore_stop(struct perf_event * event,int flags)131 static void amd_uncore_stop(struct perf_event *event, int flags)
132 {
133 struct hw_perf_event *hwc = &event->hw;
134
135 wrmsrl(hwc->config_base, hwc->config);
136 hwc->state |= PERF_HES_STOPPED;
137
138 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
139 event->pmu->read(event);
140 hwc->state |= PERF_HES_UPTODATE;
141 }
142 }
143
amd_uncore_add(struct perf_event * event,int flags)144 static int amd_uncore_add(struct perf_event *event, int flags)
145 {
146 int i;
147 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
148 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
149 struct hw_perf_event *hwc = &event->hw;
150
151 /* are we already assigned? */
152 if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
153 goto out;
154
155 for (i = 0; i < pmu->num_counters; i++) {
156 if (ctx->events[i] == event) {
157 hwc->idx = i;
158 goto out;
159 }
160 }
161
162 /* if not, take the first available counter */
163 hwc->idx = -1;
164 for (i = 0; i < pmu->num_counters; i++) {
165 struct perf_event *tmp = NULL;
166
167 if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
168 hwc->idx = i;
169 break;
170 }
171 }
172
173 out:
174 if (hwc->idx == -1)
175 return -EBUSY;
176
177 hwc->config_base = pmu->msr_base + (2 * hwc->idx);
178 hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
179 hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
180 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
181
182 if (pmu->rdpmc_base < 0)
183 hwc->event_base_rdpmc = -1;
184
185 if (flags & PERF_EF_START)
186 event->pmu->start(event, PERF_EF_RELOAD);
187
188 return 0;
189 }
190
amd_uncore_del(struct perf_event * event,int flags)191 static void amd_uncore_del(struct perf_event *event, int flags)
192 {
193 int i;
194 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
195 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
196 struct hw_perf_event *hwc = &event->hw;
197
198 event->pmu->stop(event, PERF_EF_UPDATE);
199
200 for (i = 0; i < pmu->num_counters; i++) {
201 struct perf_event *tmp = event;
202
203 if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
204 break;
205 }
206
207 hwc->idx = -1;
208 }
209
amd_uncore_event_init(struct perf_event * event)210 static int amd_uncore_event_init(struct perf_event *event)
211 {
212 struct amd_uncore_pmu *pmu;
213 struct amd_uncore_ctx *ctx;
214 struct hw_perf_event *hwc = &event->hw;
215
216 if (event->attr.type != event->pmu->type)
217 return -ENOENT;
218
219 if (event->cpu < 0)
220 return -EINVAL;
221
222 pmu = event_to_amd_uncore_pmu(event);
223 ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
224 if (!ctx)
225 return -ENODEV;
226
227 /*
228 * NB and Last level cache counters (MSRs) are shared across all cores
229 * that share the same NB / Last level cache. On family 16h and below,
230 * Interrupts can be directed to a single target core, however, event
231 * counts generated by processes running on other cores cannot be masked
232 * out. So we do not support sampling and per-thread events via
233 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
234 */
235 hwc->config = event->attr.config;
236 hwc->idx = -1;
237
238 /*
239 * since request can come in to any of the shared cores, we will remap
240 * to a single common cpu.
241 */
242 event->cpu = ctx->cpu;
243
244 return 0;
245 }
246
247 static umode_t
amd_f17h_uncore_is_visible(struct kobject * kobj,struct attribute * attr,int i)248 amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
249 {
250 return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
251 attr->mode : 0;
252 }
253
254 static umode_t
amd_f19h_uncore_is_visible(struct kobject * kobj,struct attribute * attr,int i)255 amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
256 {
257 return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
258 }
259
amd_uncore_attr_show_cpumask(struct device * dev,struct device_attribute * attr,char * buf)260 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
261 struct device_attribute *attr,
262 char *buf)
263 {
264 struct pmu *ptr = dev_get_drvdata(dev);
265 struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
266
267 return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
268 }
269 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
270
271 static struct attribute *amd_uncore_attrs[] = {
272 &dev_attr_cpumask.attr,
273 NULL,
274 };
275
276 static struct attribute_group amd_uncore_attr_group = {
277 .attrs = amd_uncore_attrs,
278 };
279
280 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
281 static ssize_t __uncore_##_var##_show(struct device *dev, \
282 struct device_attribute *attr, \
283 char *page) \
284 { \
285 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
286 return sprintf(page, _format "\n"); \
287 } \
288 static struct device_attribute format_attr_##_var = \
289 __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
290
291 DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
292 DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
293 DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
294 DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */
295 DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
296 DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
297 DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
298 DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
299 DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
300 DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
301 DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
302 DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
303 DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
304 DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */
305
306 /* Common DF and NB attributes */
307 static struct attribute *amd_uncore_df_format_attr[] = {
308 &format_attr_event12.attr, /* event */
309 &format_attr_umask8.attr, /* umask */
310 NULL,
311 };
312
313 /* Common L2 and L3 attributes */
314 static struct attribute *amd_uncore_l3_format_attr[] = {
315 &format_attr_event12.attr, /* event */
316 &format_attr_umask8.attr, /* umask */
317 NULL, /* threadmask */
318 NULL,
319 };
320
321 /* Common UMC attributes */
322 static struct attribute *amd_uncore_umc_format_attr[] = {
323 &format_attr_event8.attr, /* event */
324 &format_attr_rdwrmask.attr, /* rdwrmask */
325 NULL,
326 };
327
328 /* F17h unique L3 attributes */
329 static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
330 &format_attr_slicemask.attr, /* slicemask */
331 NULL,
332 };
333
334 /* F19h unique L3 attributes */
335 static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
336 &format_attr_coreid.attr, /* coreid */
337 &format_attr_enallslices.attr, /* enallslices */
338 &format_attr_enallcores.attr, /* enallcores */
339 &format_attr_sliceid.attr, /* sliceid */
340 NULL,
341 };
342
343 static struct attribute_group amd_uncore_df_format_group = {
344 .name = "format",
345 .attrs = amd_uncore_df_format_attr,
346 };
347
348 static struct attribute_group amd_uncore_l3_format_group = {
349 .name = "format",
350 .attrs = amd_uncore_l3_format_attr,
351 };
352
353 static struct attribute_group amd_f17h_uncore_l3_format_group = {
354 .name = "format",
355 .attrs = amd_f17h_uncore_l3_format_attr,
356 .is_visible = amd_f17h_uncore_is_visible,
357 };
358
359 static struct attribute_group amd_f19h_uncore_l3_format_group = {
360 .name = "format",
361 .attrs = amd_f19h_uncore_l3_format_attr,
362 .is_visible = amd_f19h_uncore_is_visible,
363 };
364
365 static struct attribute_group amd_uncore_umc_format_group = {
366 .name = "format",
367 .attrs = amd_uncore_umc_format_attr,
368 };
369
370 static const struct attribute_group *amd_uncore_df_attr_groups[] = {
371 &amd_uncore_attr_group,
372 &amd_uncore_df_format_group,
373 NULL,
374 };
375
376 static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
377 &amd_uncore_attr_group,
378 &amd_uncore_l3_format_group,
379 NULL,
380 };
381
382 static const struct attribute_group *amd_uncore_l3_attr_update[] = {
383 &amd_f17h_uncore_l3_format_group,
384 &amd_f19h_uncore_l3_format_group,
385 NULL,
386 };
387
388 static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
389 &amd_uncore_attr_group,
390 &amd_uncore_umc_format_group,
391 NULL,
392 };
393
394 static __always_inline
amd_uncore_ctx_cid(struct amd_uncore * uncore,unsigned int cpu)395 int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
396 {
397 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
398 return info->split.cid;
399 }
400
401 static __always_inline
amd_uncore_ctx_gid(struct amd_uncore * uncore,unsigned int cpu)402 int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
403 {
404 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
405 return info->split.gid;
406 }
407
408 static __always_inline
amd_uncore_ctx_num_pmcs(struct amd_uncore * uncore,unsigned int cpu)409 int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
410 {
411 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
412 return info->split.num_pmcs;
413 }
414
amd_uncore_ctx_free(struct amd_uncore * uncore,unsigned int cpu)415 static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
416 {
417 struct amd_uncore_pmu *pmu;
418 struct amd_uncore_ctx *ctx;
419 int i;
420
421 if (!uncore->init_done)
422 return;
423
424 for (i = 0; i < uncore->num_pmus; i++) {
425 pmu = &uncore->pmus[i];
426 ctx = *per_cpu_ptr(pmu->ctx, cpu);
427 if (!ctx)
428 continue;
429
430 if (cpu == ctx->cpu)
431 cpumask_clear_cpu(cpu, &pmu->active_mask);
432
433 if (!--ctx->refcnt) {
434 kfree(ctx->events);
435 kfree(ctx);
436 }
437
438 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
439 }
440 }
441
amd_uncore_ctx_init(struct amd_uncore * uncore,unsigned int cpu)442 static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
443 {
444 struct amd_uncore_ctx *curr, *prev;
445 struct amd_uncore_pmu *pmu;
446 int node, cid, gid, i, j;
447
448 if (!uncore->init_done || !uncore->num_pmus)
449 return 0;
450
451 cid = amd_uncore_ctx_cid(uncore, cpu);
452 gid = amd_uncore_ctx_gid(uncore, cpu);
453
454 for (i = 0; i < uncore->num_pmus; i++) {
455 pmu = &uncore->pmus[i];
456 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
457 curr = NULL;
458
459 /* Check for group exclusivity */
460 if (gid != pmu->group)
461 continue;
462
463 /* Find a sibling context */
464 for_each_online_cpu(j) {
465 if (cpu == j)
466 continue;
467
468 prev = *per_cpu_ptr(pmu->ctx, j);
469 if (!prev)
470 continue;
471
472 if (cid == amd_uncore_ctx_cid(uncore, j)) {
473 curr = prev;
474 break;
475 }
476 }
477
478 /* Allocate context if sibling does not exist */
479 if (!curr) {
480 node = cpu_to_node(cpu);
481 curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
482 if (!curr)
483 goto fail;
484
485 curr->cpu = cpu;
486 curr->events = kzalloc_node(sizeof(*curr->events) *
487 pmu->num_counters,
488 GFP_KERNEL, node);
489 if (!curr->events) {
490 kfree(curr);
491 goto fail;
492 }
493
494 cpumask_set_cpu(cpu, &pmu->active_mask);
495 }
496
497 curr->refcnt++;
498 *per_cpu_ptr(pmu->ctx, cpu) = curr;
499 }
500
501 return 0;
502
503 fail:
504 amd_uncore_ctx_free(uncore, cpu);
505
506 return -ENOMEM;
507 }
508
amd_uncore_ctx_move(struct amd_uncore * uncore,unsigned int cpu)509 static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
510 {
511 struct amd_uncore_ctx *curr, *next;
512 struct amd_uncore_pmu *pmu;
513 int i, j;
514
515 if (!uncore->init_done)
516 return;
517
518 for (i = 0; i < uncore->num_pmus; i++) {
519 pmu = &uncore->pmus[i];
520 curr = *per_cpu_ptr(pmu->ctx, cpu);
521 if (!curr)
522 continue;
523
524 /* Migrate to a shared sibling if possible */
525 for_each_online_cpu(j) {
526 next = *per_cpu_ptr(pmu->ctx, j);
527 if (!next || cpu == j)
528 continue;
529
530 if (curr == next) {
531 perf_pmu_migrate_context(&pmu->pmu, cpu, j);
532 cpumask_clear_cpu(cpu, &pmu->active_mask);
533 cpumask_set_cpu(j, &pmu->active_mask);
534 next->cpu = j;
535 break;
536 }
537 }
538 }
539 }
540
amd_uncore_cpu_starting(unsigned int cpu)541 static int amd_uncore_cpu_starting(unsigned int cpu)
542 {
543 struct amd_uncore *uncore;
544 int i;
545
546 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
547 uncore = &uncores[i];
548 uncore->scan(uncore, cpu);
549 }
550
551 return 0;
552 }
553
amd_uncore_cpu_online(unsigned int cpu)554 static int amd_uncore_cpu_online(unsigned int cpu)
555 {
556 struct amd_uncore *uncore;
557 int i;
558
559 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
560 uncore = &uncores[i];
561 if (uncore->init(uncore, cpu))
562 break;
563 }
564
565 return 0;
566 }
567
amd_uncore_cpu_down_prepare(unsigned int cpu)568 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
569 {
570 struct amd_uncore *uncore;
571 int i;
572
573 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
574 uncore = &uncores[i];
575 uncore->move(uncore, cpu);
576 }
577
578 return 0;
579 }
580
amd_uncore_cpu_dead(unsigned int cpu)581 static int amd_uncore_cpu_dead(unsigned int cpu)
582 {
583 struct amd_uncore *uncore;
584 int i;
585
586 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
587 uncore = &uncores[i];
588 uncore->free(uncore, cpu);
589 }
590
591 return 0;
592 }
593
amd_uncore_df_event_init(struct perf_event * event)594 static int amd_uncore_df_event_init(struct perf_event *event)
595 {
596 struct hw_perf_event *hwc = &event->hw;
597 int ret = amd_uncore_event_init(event);
598
599 if (ret || pmu_version < 2)
600 return ret;
601
602 hwc->config = event->attr.config &
603 (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
604 AMD64_RAW_EVENT_MASK_NB);
605
606 return 0;
607 }
608
amd_uncore_df_add(struct perf_event * event,int flags)609 static int amd_uncore_df_add(struct perf_event *event, int flags)
610 {
611 int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
612 struct hw_perf_event *hwc = &event->hw;
613
614 if (ret)
615 return ret;
616
617 /*
618 * The first four DF counters are accessible via RDPMC index 6 to 9
619 * followed by the L3 counters from index 10 to 15. For processors
620 * with more than four DF counters, the DF RDPMC assignments become
621 * discontiguous as the additional counters are accessible starting
622 * from index 16.
623 */
624 if (hwc->idx >= NUM_COUNTERS_NB)
625 hwc->event_base_rdpmc += NUM_COUNTERS_L3;
626
627 /* Delayed start after rdpmc base update */
628 if (flags & PERF_EF_START)
629 amd_uncore_start(event, PERF_EF_RELOAD);
630
631 return 0;
632 }
633
634 static
amd_uncore_df_ctx_scan(struct amd_uncore * uncore,unsigned int cpu)635 void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
636 {
637 union cpuid_0x80000022_ebx ebx;
638 union amd_uncore_info info;
639
640 if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
641 return;
642
643 info.split.aux_data = 0;
644 info.split.num_pmcs = NUM_COUNTERS_NB;
645 info.split.gid = 0;
646 info.split.cid = topology_logical_package_id(cpu);
647
648 if (pmu_version >= 2) {
649 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
650 info.split.num_pmcs = ebx.split.num_df_pmc;
651 }
652
653 *per_cpu_ptr(uncore->info, cpu) = info;
654 }
655
656 static
amd_uncore_df_ctx_init(struct amd_uncore * uncore,unsigned int cpu)657 int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
658 {
659 struct attribute **df_attr = amd_uncore_df_format_attr;
660 struct amd_uncore_pmu *pmu;
661 int num_counters;
662
663 /* Run just once */
664 if (uncore->init_done)
665 return amd_uncore_ctx_init(uncore, cpu);
666
667 num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
668 if (!num_counters)
669 goto done;
670
671 /* No grouping, single instance for a system */
672 uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
673 if (!uncore->pmus)
674 goto done;
675
676 /*
677 * For Family 17h and above, the Northbridge counters are repurposed
678 * as Data Fabric counters. The PMUs are exported based on family as
679 * either NB or DF.
680 */
681 pmu = &uncore->pmus[0];
682 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
683 sizeof(pmu->name));
684 pmu->num_counters = num_counters;
685 pmu->msr_base = MSR_F15H_NB_PERF_CTL;
686 pmu->rdpmc_base = RDPMC_BASE_NB;
687 pmu->group = amd_uncore_ctx_gid(uncore, cpu);
688
689 if (pmu_version >= 2) {
690 *df_attr++ = &format_attr_event14v2.attr;
691 *df_attr++ = &format_attr_umask12.attr;
692 } else if (boot_cpu_data.x86 >= 0x17) {
693 *df_attr = &format_attr_event14.attr;
694 }
695
696 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
697 if (!pmu->ctx)
698 goto done;
699
700 pmu->pmu = (struct pmu) {
701 .task_ctx_nr = perf_invalid_context,
702 .attr_groups = amd_uncore_df_attr_groups,
703 .name = pmu->name,
704 .event_init = amd_uncore_df_event_init,
705 .add = amd_uncore_df_add,
706 .del = amd_uncore_del,
707 .start = amd_uncore_start,
708 .stop = amd_uncore_stop,
709 .read = amd_uncore_read,
710 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
711 .module = THIS_MODULE,
712 };
713
714 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
715 free_percpu(pmu->ctx);
716 pmu->ctx = NULL;
717 goto done;
718 }
719
720 pr_info("%d %s%s counters detected\n", pmu->num_counters,
721 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
722 pmu->pmu.name);
723
724 uncore->num_pmus = 1;
725
726 done:
727 uncore->init_done = true;
728
729 return amd_uncore_ctx_init(uncore, cpu);
730 }
731
amd_uncore_l3_event_init(struct perf_event * event)732 static int amd_uncore_l3_event_init(struct perf_event *event)
733 {
734 int ret = amd_uncore_event_init(event);
735 struct hw_perf_event *hwc = &event->hw;
736 u64 config = event->attr.config;
737 u64 mask;
738
739 hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
740
741 /*
742 * SliceMask and ThreadMask need to be set for certain L3 events.
743 * For other events, the two fields do not affect the count.
744 */
745 if (ret || boot_cpu_data.x86 < 0x17)
746 return ret;
747
748 mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
749 AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
750 AMD64_L3_COREID_MASK);
751
752 if (boot_cpu_data.x86 <= 0x18)
753 mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
754 ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
755
756 /*
757 * If the user doesn't specify a ThreadMask, they're not trying to
758 * count core 0, so we enable all cores & threads.
759 * We'll also assume that they want to count slice 0 if they specify
760 * a ThreadMask and leave SliceId and EnAllSlices unpopulated.
761 */
762 else if (!(config & AMD64_L3_F19H_THREAD_MASK))
763 mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
764 AMD64_L3_EN_ALL_CORES;
765
766 hwc->config |= mask;
767
768 return 0;
769 }
770
771 static
amd_uncore_l3_ctx_scan(struct amd_uncore * uncore,unsigned int cpu)772 void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
773 {
774 union amd_uncore_info info;
775
776 if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
777 return;
778
779 info.split.aux_data = 0;
780 info.split.num_pmcs = NUM_COUNTERS_L2;
781 info.split.gid = 0;
782 info.split.cid = per_cpu_llc_id(cpu);
783
784 if (boot_cpu_data.x86 >= 0x17)
785 info.split.num_pmcs = NUM_COUNTERS_L3;
786
787 *per_cpu_ptr(uncore->info, cpu) = info;
788 }
789
790 static
amd_uncore_l3_ctx_init(struct amd_uncore * uncore,unsigned int cpu)791 int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
792 {
793 struct attribute **l3_attr = amd_uncore_l3_format_attr;
794 struct amd_uncore_pmu *pmu;
795 int num_counters;
796
797 /* Run just once */
798 if (uncore->init_done)
799 return amd_uncore_ctx_init(uncore, cpu);
800
801 num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
802 if (!num_counters)
803 goto done;
804
805 /* No grouping, single instance for a system */
806 uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
807 if (!uncore->pmus)
808 goto done;
809
810 /*
811 * For Family 17h and above, L3 cache counters are available instead
812 * of L2 cache counters. The PMUs are exported based on family as
813 * either L2 or L3.
814 */
815 pmu = &uncore->pmus[0];
816 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
817 sizeof(pmu->name));
818 pmu->num_counters = num_counters;
819 pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
820 pmu->rdpmc_base = RDPMC_BASE_LLC;
821 pmu->group = amd_uncore_ctx_gid(uncore, cpu);
822
823 if (boot_cpu_data.x86 >= 0x17) {
824 *l3_attr++ = &format_attr_event8.attr;
825 *l3_attr++ = &format_attr_umask8.attr;
826 *l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
827 &format_attr_threadmask2.attr :
828 &format_attr_threadmask8.attr;
829 }
830
831 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
832 if (!pmu->ctx)
833 goto done;
834
835 pmu->pmu = (struct pmu) {
836 .task_ctx_nr = perf_invalid_context,
837 .attr_groups = amd_uncore_l3_attr_groups,
838 .attr_update = amd_uncore_l3_attr_update,
839 .name = pmu->name,
840 .event_init = amd_uncore_l3_event_init,
841 .add = amd_uncore_add,
842 .del = amd_uncore_del,
843 .start = amd_uncore_start,
844 .stop = amd_uncore_stop,
845 .read = amd_uncore_read,
846 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
847 .module = THIS_MODULE,
848 };
849
850 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
851 free_percpu(pmu->ctx);
852 pmu->ctx = NULL;
853 goto done;
854 }
855
856 pr_info("%d %s%s counters detected\n", pmu->num_counters,
857 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
858 pmu->pmu.name);
859
860 uncore->num_pmus = 1;
861
862 done:
863 uncore->init_done = true;
864
865 return amd_uncore_ctx_init(uncore, cpu);
866 }
867
amd_uncore_umc_event_init(struct perf_event * event)868 static int amd_uncore_umc_event_init(struct perf_event *event)
869 {
870 struct hw_perf_event *hwc = &event->hw;
871 int ret = amd_uncore_event_init(event);
872
873 if (ret)
874 return ret;
875
876 hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
877
878 return 0;
879 }
880
amd_uncore_umc_start(struct perf_event * event,int flags)881 static void amd_uncore_umc_start(struct perf_event *event, int flags)
882 {
883 struct hw_perf_event *hwc = &event->hw;
884
885 if (flags & PERF_EF_RELOAD)
886 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
887
888 hwc->state = 0;
889 wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
890 perf_event_update_userpage(event);
891 }
892
893 static
amd_uncore_umc_ctx_scan(struct amd_uncore * uncore,unsigned int cpu)894 void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
895 {
896 union cpuid_0x80000022_ebx ebx;
897 union amd_uncore_info info;
898 unsigned int eax, ecx, edx;
899
900 if (pmu_version < 2)
901 return;
902
903 cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
904 info.split.aux_data = ecx; /* stash active mask */
905 info.split.num_pmcs = ebx.split.num_umc_pmc;
906 info.split.gid = topology_logical_package_id(cpu);
907 info.split.cid = topology_logical_package_id(cpu);
908 *per_cpu_ptr(uncore->info, cpu) = info;
909 }
910
911 static
amd_uncore_umc_ctx_init(struct amd_uncore * uncore,unsigned int cpu)912 int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
913 {
914 DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
915 u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
916 u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
917 union amd_uncore_info info;
918 struct amd_uncore_pmu *pmu;
919 int index = 0, gid, i;
920
921 if (pmu_version < 2)
922 return 0;
923
924 /* Run just once */
925 if (uncore->init_done)
926 return amd_uncore_ctx_init(uncore, cpu);
927
928 /* Find unique groups */
929 for_each_online_cpu(i) {
930 info = *per_cpu_ptr(uncore->info, i);
931 gid = info.split.gid;
932 if (test_bit(gid, gmask))
933 continue;
934
935 __set_bit(gid, gmask);
936 group_num_pmus[gid] = hweight32(info.split.aux_data);
937 group_num_pmcs[gid] = info.split.num_pmcs;
938 uncore->num_pmus += group_num_pmus[gid];
939 }
940
941 uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus,
942 GFP_KERNEL);
943 if (!uncore->pmus) {
944 uncore->num_pmus = 0;
945 goto done;
946 }
947
948 for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
949 for (i = 0; i < group_num_pmus[gid]; i++) {
950 pmu = &uncore->pmus[index];
951 snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
952 pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
953 pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
954 pmu->rdpmc_base = -1;
955 pmu->group = gid;
956
957 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
958 if (!pmu->ctx)
959 goto done;
960
961 pmu->pmu = (struct pmu) {
962 .task_ctx_nr = perf_invalid_context,
963 .attr_groups = amd_uncore_umc_attr_groups,
964 .name = pmu->name,
965 .event_init = amd_uncore_umc_event_init,
966 .add = amd_uncore_add,
967 .del = amd_uncore_del,
968 .start = amd_uncore_umc_start,
969 .stop = amd_uncore_stop,
970 .read = amd_uncore_read,
971 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
972 .module = THIS_MODULE,
973 };
974
975 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
976 free_percpu(pmu->ctx);
977 pmu->ctx = NULL;
978 goto done;
979 }
980
981 pr_info("%d %s counters detected\n", pmu->num_counters,
982 pmu->pmu.name);
983
984 index++;
985 }
986 }
987
988 done:
989 uncore->num_pmus = index;
990 uncore->init_done = true;
991
992 return amd_uncore_ctx_init(uncore, cpu);
993 }
994
995 static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
996 /* UNCORE_TYPE_DF */
997 {
998 .scan = amd_uncore_df_ctx_scan,
999 .init = amd_uncore_df_ctx_init,
1000 .move = amd_uncore_ctx_move,
1001 .free = amd_uncore_ctx_free,
1002 },
1003 /* UNCORE_TYPE_L3 */
1004 {
1005 .scan = amd_uncore_l3_ctx_scan,
1006 .init = amd_uncore_l3_ctx_init,
1007 .move = amd_uncore_ctx_move,
1008 .free = amd_uncore_ctx_free,
1009 },
1010 /* UNCORE_TYPE_UMC */
1011 {
1012 .scan = amd_uncore_umc_ctx_scan,
1013 .init = amd_uncore_umc_ctx_init,
1014 .move = amd_uncore_ctx_move,
1015 .free = amd_uncore_ctx_free,
1016 },
1017 };
1018
amd_uncore_init(void)1019 static int __init amd_uncore_init(void)
1020 {
1021 struct amd_uncore *uncore;
1022 int ret = -ENODEV;
1023 int i;
1024
1025 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
1026 boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
1027 return -ENODEV;
1028
1029 if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
1030 return -ENODEV;
1031
1032 if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
1033 pmu_version = 2;
1034
1035 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1036 uncore = &uncores[i];
1037
1038 BUG_ON(!uncore->scan);
1039 BUG_ON(!uncore->init);
1040 BUG_ON(!uncore->move);
1041 BUG_ON(!uncore->free);
1042
1043 uncore->info = alloc_percpu(union amd_uncore_info);
1044 if (!uncore->info) {
1045 ret = -ENOMEM;
1046 goto fail;
1047 }
1048 };
1049
1050 /*
1051 * Install callbacks. Core will call them for each online cpu.
1052 */
1053 ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
1054 "perf/x86/amd/uncore:prepare",
1055 NULL, amd_uncore_cpu_dead);
1056 if (ret)
1057 goto fail;
1058
1059 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
1060 "perf/x86/amd/uncore:starting",
1061 amd_uncore_cpu_starting, NULL);
1062 if (ret)
1063 goto fail_prep;
1064
1065 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
1066 "perf/x86/amd/uncore:online",
1067 amd_uncore_cpu_online,
1068 amd_uncore_cpu_down_prepare);
1069 if (ret)
1070 goto fail_start;
1071
1072 return 0;
1073
1074 fail_start:
1075 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1076 fail_prep:
1077 cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1078 fail:
1079 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1080 uncore = &uncores[i];
1081 if (uncore->info) {
1082 free_percpu(uncore->info);
1083 uncore->info = NULL;
1084 }
1085 }
1086
1087 return ret;
1088 }
1089
amd_uncore_exit(void)1090 static void __exit amd_uncore_exit(void)
1091 {
1092 struct amd_uncore *uncore;
1093 struct amd_uncore_pmu *pmu;
1094 int i, j;
1095
1096 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
1097 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1098 cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1099
1100 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1101 uncore = &uncores[i];
1102 if (!uncore->info)
1103 continue;
1104
1105 free_percpu(uncore->info);
1106 uncore->info = NULL;
1107
1108 for (j = 0; j < uncore->num_pmus; j++) {
1109 pmu = &uncore->pmus[j];
1110 if (!pmu->ctx)
1111 continue;
1112
1113 perf_pmu_unregister(&pmu->pmu);
1114 free_percpu(pmu->ctx);
1115 pmu->ctx = NULL;
1116 }
1117
1118 kfree(uncore->pmus);
1119 uncore->pmus = NULL;
1120 }
1121 }
1122
1123 module_init(amd_uncore_init);
1124 module_exit(amd_uncore_exit);
1125
1126 MODULE_DESCRIPTION("AMD Uncore Driver");
1127 MODULE_LICENSE("GPL v2");
1128