1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2013 Advanced Micro Devices, Inc. 4 * 5 * Author: Jacob Shin <jacob.shin@amd.com> 6 */ 7 8 #include <linux/perf_event.h> 9 #include <linux/percpu.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/init.h> 13 #include <linux/cpu.h> 14 #include <linux/cpumask.h> 15 #include <linux/cpufeature.h> 16 #include <linux/smp.h> 17 18 #include <asm/perf_event.h> 19 #include <asm/msr.h> 20 21 #define NUM_COUNTERS_NB 4 22 #define NUM_COUNTERS_L2 4 23 #define NUM_COUNTERS_L3 6 24 25 #define RDPMC_BASE_NB 6 26 #define RDPMC_BASE_LLC 10 27 28 #define COUNTER_SHIFT 16 29 #define UNCORE_NAME_LEN 16 30 #define UNCORE_GROUP_MAX 256 31 32 #undef pr_fmt 33 #define pr_fmt(fmt) "amd_uncore: " fmt 34 35 static int pmu_version; 36 37 struct amd_uncore_ctx { 38 int refcnt; 39 int cpu; 40 struct perf_event **events; 41 struct hlist_node node; 42 }; 43 44 struct amd_uncore_pmu { 45 char name[UNCORE_NAME_LEN]; 46 int num_counters; 47 int rdpmc_base; 48 u32 msr_base; 49 int group; 50 cpumask_t active_mask; 51 struct pmu pmu; 52 struct amd_uncore_ctx * __percpu *ctx; 53 }; 54 55 enum { 56 UNCORE_TYPE_DF, 57 UNCORE_TYPE_L3, 58 UNCORE_TYPE_UMC, 59 60 UNCORE_TYPE_MAX 61 }; 62 63 union amd_uncore_info { 64 struct { 65 u64 aux_data:32; /* auxiliary data */ 66 u64 num_pmcs:8; /* number of counters */ 67 u64 gid:8; /* group id */ 68 u64 cid:8; /* context id */ 69 } split; 70 u64 full; 71 }; 72 73 struct amd_uncore { 74 union amd_uncore_info __percpu *info; 75 struct amd_uncore_pmu *pmus; 76 unsigned int num_pmus; 77 bool init_done; 78 void (*scan)(struct amd_uncore *uncore, unsigned int cpu); 79 int (*init)(struct amd_uncore *uncore, unsigned int cpu); 80 void (*move)(struct amd_uncore *uncore, unsigned int cpu); 81 void (*free)(struct amd_uncore *uncore, unsigned int cpu); 82 }; 83 84 static struct amd_uncore uncores[UNCORE_TYPE_MAX]; 85 86 static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event) 87 { 88 return container_of(event->pmu, struct amd_uncore_pmu, pmu); 89 } 90 91 static void amd_uncore_read(struct perf_event *event) 92 { 93 struct hw_perf_event *hwc = &event->hw; 94 u64 prev, new; 95 s64 delta; 96 97 /* 98 * since we do not enable counter overflow interrupts, 99 * we do not have to worry about prev_count changing on us 100 */ 101 102 prev = local64_read(&hwc->prev_count); 103 104 /* 105 * Some uncore PMUs do not have RDPMC assignments. In such cases, 106 * read counts directly from the corresponding PERF_CTR. 107 */ 108 if (hwc->event_base_rdpmc < 0) 109 rdmsrl(hwc->event_base, new); 110 else 111 rdpmcl(hwc->event_base_rdpmc, new); 112 113 local64_set(&hwc->prev_count, new); 114 delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); 115 delta >>= COUNTER_SHIFT; 116 local64_add(delta, &event->count); 117 } 118 119 static void amd_uncore_start(struct perf_event *event, int flags) 120 { 121 struct hw_perf_event *hwc = &event->hw; 122 123 if (flags & PERF_EF_RELOAD) 124 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); 125 126 hwc->state = 0; 127 wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); 128 perf_event_update_userpage(event); 129 } 130 131 static void amd_uncore_stop(struct perf_event *event, int flags) 132 { 133 struct hw_perf_event *hwc = &event->hw; 134 135 wrmsrl(hwc->config_base, hwc->config); 136 hwc->state |= PERF_HES_STOPPED; 137 138 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 139 event->pmu->read(event); 140 hwc->state |= PERF_HES_UPTODATE; 141 } 142 } 143 144 static int amd_uncore_add(struct perf_event *event, int flags) 145 { 146 int i; 147 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); 148 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); 149 struct hw_perf_event *hwc = &event->hw; 150 151 /* are we already assigned? */ 152 if (hwc->idx != -1 && ctx->events[hwc->idx] == event) 153 goto out; 154 155 for (i = 0; i < pmu->num_counters; i++) { 156 if (ctx->events[i] == event) { 157 hwc->idx = i; 158 goto out; 159 } 160 } 161 162 /* if not, take the first available counter */ 163 hwc->idx = -1; 164 for (i = 0; i < pmu->num_counters; i++) { 165 struct perf_event *tmp = NULL; 166 167 if (try_cmpxchg(&ctx->events[i], &tmp, event)) { 168 hwc->idx = i; 169 break; 170 } 171 } 172 173 out: 174 if (hwc->idx == -1) 175 return -EBUSY; 176 177 hwc->config_base = pmu->msr_base + (2 * hwc->idx); 178 hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx); 179 hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx; 180 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 181 182 if (pmu->rdpmc_base < 0) 183 hwc->event_base_rdpmc = -1; 184 185 if (flags & PERF_EF_START) 186 event->pmu->start(event, PERF_EF_RELOAD); 187 188 return 0; 189 } 190 191 static void amd_uncore_del(struct perf_event *event, int flags) 192 { 193 int i; 194 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); 195 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); 196 struct hw_perf_event *hwc = &event->hw; 197 198 event->pmu->stop(event, PERF_EF_UPDATE); 199 200 for (i = 0; i < pmu->num_counters; i++) { 201 struct perf_event *tmp = event; 202 203 if (try_cmpxchg(&ctx->events[i], &tmp, NULL)) 204 break; 205 } 206 207 hwc->idx = -1; 208 } 209 210 static int amd_uncore_event_init(struct perf_event *event) 211 { 212 struct amd_uncore_pmu *pmu; 213 struct amd_uncore_ctx *ctx; 214 struct hw_perf_event *hwc = &event->hw; 215 216 if (event->attr.type != event->pmu->type) 217 return -ENOENT; 218 219 if (event->cpu < 0) 220 return -EINVAL; 221 222 pmu = event_to_amd_uncore_pmu(event); 223 ctx = *per_cpu_ptr(pmu->ctx, event->cpu); 224 if (!ctx) 225 return -ENODEV; 226 227 /* 228 * NB and Last level cache counters (MSRs) are shared across all cores 229 * that share the same NB / Last level cache. On family 16h and below, 230 * Interrupts can be directed to a single target core, however, event 231 * counts generated by processes running on other cores cannot be masked 232 * out. So we do not support sampling and per-thread events via 233 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: 234 */ 235 hwc->config = event->attr.config; 236 hwc->idx = -1; 237 238 /* 239 * since request can come in to any of the shared cores, we will remap 240 * to a single common cpu. 241 */ 242 event->cpu = ctx->cpu; 243 244 return 0; 245 } 246 247 static umode_t 248 amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i) 249 { 250 return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ? 251 attr->mode : 0; 252 } 253 254 static umode_t 255 amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i) 256 { 257 return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0; 258 } 259 260 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, 261 struct device_attribute *attr, 262 char *buf) 263 { 264 struct pmu *ptr = dev_get_drvdata(dev); 265 struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu); 266 267 return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask); 268 } 269 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL); 270 271 static struct attribute *amd_uncore_attrs[] = { 272 &dev_attr_cpumask.attr, 273 NULL, 274 }; 275 276 static struct attribute_group amd_uncore_attr_group = { 277 .attrs = amd_uncore_attrs, 278 }; 279 280 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ 281 static ssize_t __uncore_##_var##_show(struct device *dev, \ 282 struct device_attribute *attr, \ 283 char *page) \ 284 { \ 285 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ 286 return sprintf(page, _format "\n"); \ 287 } \ 288 static struct device_attribute format_attr_##_var = \ 289 __ATTR(_name, 0444, __uncore_##_var##_show, NULL) 290 291 DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35"); 292 DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */ 293 DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */ 294 DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */ 295 DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15"); 296 DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */ 297 DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */ 298 DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */ 299 DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */ 300 DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */ 301 DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */ 302 DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */ 303 DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */ 304 DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */ 305 306 /* Common DF and NB attributes */ 307 static struct attribute *amd_uncore_df_format_attr[] = { 308 &format_attr_event12.attr, /* event */ 309 &format_attr_umask8.attr, /* umask */ 310 NULL, 311 }; 312 313 /* Common L2 and L3 attributes */ 314 static struct attribute *amd_uncore_l3_format_attr[] = { 315 &format_attr_event12.attr, /* event */ 316 &format_attr_umask8.attr, /* umask */ 317 NULL, /* threadmask */ 318 NULL, 319 }; 320 321 /* Common UMC attributes */ 322 static struct attribute *amd_uncore_umc_format_attr[] = { 323 &format_attr_event8.attr, /* event */ 324 &format_attr_rdwrmask.attr, /* rdwrmask */ 325 NULL, 326 }; 327 328 /* F17h unique L3 attributes */ 329 static struct attribute *amd_f17h_uncore_l3_format_attr[] = { 330 &format_attr_slicemask.attr, /* slicemask */ 331 NULL, 332 }; 333 334 /* F19h unique L3 attributes */ 335 static struct attribute *amd_f19h_uncore_l3_format_attr[] = { 336 &format_attr_coreid.attr, /* coreid */ 337 &format_attr_enallslices.attr, /* enallslices */ 338 &format_attr_enallcores.attr, /* enallcores */ 339 &format_attr_sliceid.attr, /* sliceid */ 340 NULL, 341 }; 342 343 static struct attribute_group amd_uncore_df_format_group = { 344 .name = "format", 345 .attrs = amd_uncore_df_format_attr, 346 }; 347 348 static struct attribute_group amd_uncore_l3_format_group = { 349 .name = "format", 350 .attrs = amd_uncore_l3_format_attr, 351 }; 352 353 static struct attribute_group amd_f17h_uncore_l3_format_group = { 354 .name = "format", 355 .attrs = amd_f17h_uncore_l3_format_attr, 356 .is_visible = amd_f17h_uncore_is_visible, 357 }; 358 359 static struct attribute_group amd_f19h_uncore_l3_format_group = { 360 .name = "format", 361 .attrs = amd_f19h_uncore_l3_format_attr, 362 .is_visible = amd_f19h_uncore_is_visible, 363 }; 364 365 static struct attribute_group amd_uncore_umc_format_group = { 366 .name = "format", 367 .attrs = amd_uncore_umc_format_attr, 368 }; 369 370 static const struct attribute_group *amd_uncore_df_attr_groups[] = { 371 &amd_uncore_attr_group, 372 &amd_uncore_df_format_group, 373 NULL, 374 }; 375 376 static const struct attribute_group *amd_uncore_l3_attr_groups[] = { 377 &amd_uncore_attr_group, 378 &amd_uncore_l3_format_group, 379 NULL, 380 }; 381 382 static const struct attribute_group *amd_uncore_l3_attr_update[] = { 383 &amd_f17h_uncore_l3_format_group, 384 &amd_f19h_uncore_l3_format_group, 385 NULL, 386 }; 387 388 static const struct attribute_group *amd_uncore_umc_attr_groups[] = { 389 &amd_uncore_attr_group, 390 &amd_uncore_umc_format_group, 391 NULL, 392 }; 393 394 static __always_inline 395 int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu) 396 { 397 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); 398 return info->split.cid; 399 } 400 401 static __always_inline 402 int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu) 403 { 404 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); 405 return info->split.gid; 406 } 407 408 static __always_inline 409 int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu) 410 { 411 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); 412 return info->split.num_pmcs; 413 } 414 415 static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu) 416 { 417 struct amd_uncore_pmu *pmu; 418 struct amd_uncore_ctx *ctx; 419 int i; 420 421 if (!uncore->init_done) 422 return; 423 424 for (i = 0; i < uncore->num_pmus; i++) { 425 pmu = &uncore->pmus[i]; 426 ctx = *per_cpu_ptr(pmu->ctx, cpu); 427 if (!ctx) 428 continue; 429 430 if (cpu == ctx->cpu) 431 cpumask_clear_cpu(cpu, &pmu->active_mask); 432 433 if (!--ctx->refcnt) { 434 kfree(ctx->events); 435 kfree(ctx); 436 } 437 438 *per_cpu_ptr(pmu->ctx, cpu) = NULL; 439 } 440 } 441 442 static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu) 443 { 444 struct amd_uncore_ctx *curr, *prev; 445 struct amd_uncore_pmu *pmu; 446 int node, cid, gid, i, j; 447 448 if (!uncore->init_done || !uncore->num_pmus) 449 return 0; 450 451 cid = amd_uncore_ctx_cid(uncore, cpu); 452 gid = amd_uncore_ctx_gid(uncore, cpu); 453 454 for (i = 0; i < uncore->num_pmus; i++) { 455 pmu = &uncore->pmus[i]; 456 *per_cpu_ptr(pmu->ctx, cpu) = NULL; 457 curr = NULL; 458 459 /* Check for group exclusivity */ 460 if (gid != pmu->group) 461 continue; 462 463 /* Find a sibling context */ 464 for_each_online_cpu(j) { 465 if (cpu == j) 466 continue; 467 468 prev = *per_cpu_ptr(pmu->ctx, j); 469 if (!prev) 470 continue; 471 472 if (cid == amd_uncore_ctx_cid(uncore, j)) { 473 curr = prev; 474 break; 475 } 476 } 477 478 /* Allocate context if sibling does not exist */ 479 if (!curr) { 480 node = cpu_to_node(cpu); 481 curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node); 482 if (!curr) 483 goto fail; 484 485 curr->cpu = cpu; 486 curr->events = kzalloc_node(sizeof(*curr->events) * 487 pmu->num_counters, 488 GFP_KERNEL, node); 489 if (!curr->events) { 490 kfree(curr); 491 goto fail; 492 } 493 494 cpumask_set_cpu(cpu, &pmu->active_mask); 495 } 496 497 curr->refcnt++; 498 *per_cpu_ptr(pmu->ctx, cpu) = curr; 499 } 500 501 return 0; 502 503 fail: 504 amd_uncore_ctx_free(uncore, cpu); 505 506 return -ENOMEM; 507 } 508 509 static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu) 510 { 511 struct amd_uncore_ctx *curr, *next; 512 struct amd_uncore_pmu *pmu; 513 int i, j; 514 515 if (!uncore->init_done) 516 return; 517 518 for (i = 0; i < uncore->num_pmus; i++) { 519 pmu = &uncore->pmus[i]; 520 curr = *per_cpu_ptr(pmu->ctx, cpu); 521 if (!curr) 522 continue; 523 524 /* Migrate to a shared sibling if possible */ 525 for_each_online_cpu(j) { 526 next = *per_cpu_ptr(pmu->ctx, j); 527 if (!next || cpu == j) 528 continue; 529 530 if (curr == next) { 531 perf_pmu_migrate_context(&pmu->pmu, cpu, j); 532 cpumask_clear_cpu(cpu, &pmu->active_mask); 533 cpumask_set_cpu(j, &pmu->active_mask); 534 next->cpu = j; 535 break; 536 } 537 } 538 } 539 } 540 541 static int amd_uncore_cpu_starting(unsigned int cpu) 542 { 543 struct amd_uncore *uncore; 544 int i; 545 546 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 547 uncore = &uncores[i]; 548 uncore->scan(uncore, cpu); 549 } 550 551 return 0; 552 } 553 554 static int amd_uncore_cpu_online(unsigned int cpu) 555 { 556 struct amd_uncore *uncore; 557 int i; 558 559 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 560 uncore = &uncores[i]; 561 if (uncore->init(uncore, cpu)) 562 break; 563 } 564 565 return 0; 566 } 567 568 static int amd_uncore_cpu_down_prepare(unsigned int cpu) 569 { 570 struct amd_uncore *uncore; 571 int i; 572 573 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 574 uncore = &uncores[i]; 575 uncore->move(uncore, cpu); 576 } 577 578 return 0; 579 } 580 581 static int amd_uncore_cpu_dead(unsigned int cpu) 582 { 583 struct amd_uncore *uncore; 584 int i; 585 586 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 587 uncore = &uncores[i]; 588 uncore->free(uncore, cpu); 589 } 590 591 return 0; 592 } 593 594 static int amd_uncore_df_event_init(struct perf_event *event) 595 { 596 struct hw_perf_event *hwc = &event->hw; 597 int ret = amd_uncore_event_init(event); 598 599 if (ret || pmu_version < 2) 600 return ret; 601 602 hwc->config = event->attr.config & 603 (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB : 604 AMD64_RAW_EVENT_MASK_NB); 605 606 return 0; 607 } 608 609 static int amd_uncore_df_add(struct perf_event *event, int flags) 610 { 611 int ret = amd_uncore_add(event, flags & ~PERF_EF_START); 612 struct hw_perf_event *hwc = &event->hw; 613 614 if (ret) 615 return ret; 616 617 /* 618 * The first four DF counters are accessible via RDPMC index 6 to 9 619 * followed by the L3 counters from index 10 to 15. For processors 620 * with more than four DF counters, the DF RDPMC assignments become 621 * discontiguous as the additional counters are accessible starting 622 * from index 16. 623 */ 624 if (hwc->idx >= NUM_COUNTERS_NB) 625 hwc->event_base_rdpmc += NUM_COUNTERS_L3; 626 627 /* Delayed start after rdpmc base update */ 628 if (flags & PERF_EF_START) 629 amd_uncore_start(event, PERF_EF_RELOAD); 630 631 return 0; 632 } 633 634 static 635 void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) 636 { 637 union cpuid_0x80000022_ebx ebx; 638 union amd_uncore_info info; 639 640 if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB)) 641 return; 642 643 info.split.aux_data = 0; 644 info.split.num_pmcs = NUM_COUNTERS_NB; 645 info.split.gid = 0; 646 info.split.cid = topology_logical_package_id(cpu); 647 648 if (pmu_version >= 2) { 649 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); 650 info.split.num_pmcs = ebx.split.num_df_pmc; 651 } 652 653 *per_cpu_ptr(uncore->info, cpu) = info; 654 } 655 656 static 657 int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) 658 { 659 struct attribute **df_attr = amd_uncore_df_format_attr; 660 struct amd_uncore_pmu *pmu; 661 int num_counters; 662 663 /* Run just once */ 664 if (uncore->init_done) 665 return amd_uncore_ctx_init(uncore, cpu); 666 667 num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); 668 if (!num_counters) 669 goto done; 670 671 /* No grouping, single instance for a system */ 672 uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); 673 if (!uncore->pmus) 674 goto done; 675 676 /* 677 * For Family 17h and above, the Northbridge counters are repurposed 678 * as Data Fabric counters. The PMUs are exported based on family as 679 * either NB or DF. 680 */ 681 pmu = &uncore->pmus[0]; 682 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", 683 sizeof(pmu->name)); 684 pmu->num_counters = num_counters; 685 pmu->msr_base = MSR_F15H_NB_PERF_CTL; 686 pmu->rdpmc_base = RDPMC_BASE_NB; 687 pmu->group = amd_uncore_ctx_gid(uncore, cpu); 688 689 if (pmu_version >= 2) { 690 *df_attr++ = &format_attr_event14v2.attr; 691 *df_attr++ = &format_attr_umask12.attr; 692 } else if (boot_cpu_data.x86 >= 0x17) { 693 *df_attr = &format_attr_event14.attr; 694 } 695 696 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); 697 if (!pmu->ctx) 698 goto done; 699 700 pmu->pmu = (struct pmu) { 701 .task_ctx_nr = perf_invalid_context, 702 .attr_groups = amd_uncore_df_attr_groups, 703 .name = pmu->name, 704 .event_init = amd_uncore_df_event_init, 705 .add = amd_uncore_df_add, 706 .del = amd_uncore_del, 707 .start = amd_uncore_start, 708 .stop = amd_uncore_stop, 709 .read = amd_uncore_read, 710 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, 711 .module = THIS_MODULE, 712 }; 713 714 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { 715 free_percpu(pmu->ctx); 716 pmu->ctx = NULL; 717 goto done; 718 } 719 720 pr_info("%d %s%s counters detected\n", pmu->num_counters, 721 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "", 722 pmu->pmu.name); 723 724 uncore->num_pmus = 1; 725 726 done: 727 uncore->init_done = true; 728 729 return amd_uncore_ctx_init(uncore, cpu); 730 } 731 732 static int amd_uncore_l3_event_init(struct perf_event *event) 733 { 734 int ret = amd_uncore_event_init(event); 735 struct hw_perf_event *hwc = &event->hw; 736 u64 config = event->attr.config; 737 u64 mask; 738 739 hwc->config = config & AMD64_RAW_EVENT_MASK_NB; 740 741 /* 742 * SliceMask and ThreadMask need to be set for certain L3 events. 743 * For other events, the two fields do not affect the count. 744 */ 745 if (ret || boot_cpu_data.x86 < 0x17) 746 return ret; 747 748 mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK | 749 AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES | 750 AMD64_L3_COREID_MASK); 751 752 if (boot_cpu_data.x86 <= 0x18) 753 mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | 754 ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); 755 756 /* 757 * If the user doesn't specify a ThreadMask, they're not trying to 758 * count core 0, so we enable all cores & threads. 759 * We'll also assume that they want to count slice 0 if they specify 760 * a ThreadMask and leave SliceId and EnAllSlices unpopulated. 761 */ 762 else if (!(config & AMD64_L3_F19H_THREAD_MASK)) 763 mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES | 764 AMD64_L3_EN_ALL_CORES; 765 766 hwc->config |= mask; 767 768 return 0; 769 } 770 771 static 772 void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) 773 { 774 union amd_uncore_info info; 775 776 if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) 777 return; 778 779 info.split.aux_data = 0; 780 info.split.num_pmcs = NUM_COUNTERS_L2; 781 info.split.gid = 0; 782 info.split.cid = per_cpu_llc_id(cpu); 783 784 if (boot_cpu_data.x86 >= 0x17) 785 info.split.num_pmcs = NUM_COUNTERS_L3; 786 787 *per_cpu_ptr(uncore->info, cpu) = info; 788 } 789 790 static 791 int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) 792 { 793 struct attribute **l3_attr = amd_uncore_l3_format_attr; 794 struct amd_uncore_pmu *pmu; 795 int num_counters; 796 797 /* Run just once */ 798 if (uncore->init_done) 799 return amd_uncore_ctx_init(uncore, cpu); 800 801 num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); 802 if (!num_counters) 803 goto done; 804 805 /* No grouping, single instance for a system */ 806 uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); 807 if (!uncore->pmus) 808 goto done; 809 810 /* 811 * For Family 17h and above, L3 cache counters are available instead 812 * of L2 cache counters. The PMUs are exported based on family as 813 * either L2 or L3. 814 */ 815 pmu = &uncore->pmus[0]; 816 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", 817 sizeof(pmu->name)); 818 pmu->num_counters = num_counters; 819 pmu->msr_base = MSR_F16H_L2I_PERF_CTL; 820 pmu->rdpmc_base = RDPMC_BASE_LLC; 821 pmu->group = amd_uncore_ctx_gid(uncore, cpu); 822 823 if (boot_cpu_data.x86 >= 0x17) { 824 *l3_attr++ = &format_attr_event8.attr; 825 *l3_attr++ = &format_attr_umask8.attr; 826 *l3_attr++ = boot_cpu_data.x86 >= 0x19 ? 827 &format_attr_threadmask2.attr : 828 &format_attr_threadmask8.attr; 829 } 830 831 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); 832 if (!pmu->ctx) 833 goto done; 834 835 pmu->pmu = (struct pmu) { 836 .task_ctx_nr = perf_invalid_context, 837 .attr_groups = amd_uncore_l3_attr_groups, 838 .attr_update = amd_uncore_l3_attr_update, 839 .name = pmu->name, 840 .event_init = amd_uncore_l3_event_init, 841 .add = amd_uncore_add, 842 .del = amd_uncore_del, 843 .start = amd_uncore_start, 844 .stop = amd_uncore_stop, 845 .read = amd_uncore_read, 846 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, 847 .module = THIS_MODULE, 848 }; 849 850 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { 851 free_percpu(pmu->ctx); 852 pmu->ctx = NULL; 853 goto done; 854 } 855 856 pr_info("%d %s%s counters detected\n", pmu->num_counters, 857 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "", 858 pmu->pmu.name); 859 860 uncore->num_pmus = 1; 861 862 done: 863 uncore->init_done = true; 864 865 return amd_uncore_ctx_init(uncore, cpu); 866 } 867 868 static int amd_uncore_umc_event_init(struct perf_event *event) 869 { 870 struct hw_perf_event *hwc = &event->hw; 871 int ret = amd_uncore_event_init(event); 872 873 if (ret) 874 return ret; 875 876 hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC; 877 878 return 0; 879 } 880 881 static void amd_uncore_umc_start(struct perf_event *event, int flags) 882 { 883 struct hw_perf_event *hwc = &event->hw; 884 885 if (flags & PERF_EF_RELOAD) 886 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); 887 888 hwc->state = 0; 889 wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC)); 890 perf_event_update_userpage(event); 891 } 892 893 static 894 void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) 895 { 896 union cpuid_0x80000022_ebx ebx; 897 union amd_uncore_info info; 898 unsigned int eax, ecx, edx; 899 900 if (pmu_version < 2) 901 return; 902 903 cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx); 904 info.split.aux_data = ecx; /* stash active mask */ 905 info.split.num_pmcs = ebx.split.num_umc_pmc; 906 info.split.gid = topology_logical_package_id(cpu); 907 info.split.cid = topology_logical_package_id(cpu); 908 *per_cpu_ptr(uncore->info, cpu) = info; 909 } 910 911 static 912 int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) 913 { 914 DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 }; 915 u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 }; 916 u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 }; 917 union amd_uncore_info info; 918 struct amd_uncore_pmu *pmu; 919 int index = 0, gid, i; 920 921 if (pmu_version < 2) 922 return 0; 923 924 /* Run just once */ 925 if (uncore->init_done) 926 return amd_uncore_ctx_init(uncore, cpu); 927 928 /* Find unique groups */ 929 for_each_online_cpu(i) { 930 info = *per_cpu_ptr(uncore->info, i); 931 gid = info.split.gid; 932 if (test_bit(gid, gmask)) 933 continue; 934 935 __set_bit(gid, gmask); 936 group_num_pmus[gid] = hweight32(info.split.aux_data); 937 group_num_pmcs[gid] = info.split.num_pmcs; 938 uncore->num_pmus += group_num_pmus[gid]; 939 } 940 941 uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus, 942 GFP_KERNEL); 943 if (!uncore->pmus) { 944 uncore->num_pmus = 0; 945 goto done; 946 } 947 948 for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) { 949 for (i = 0; i < group_num_pmus[gid]; i++) { 950 pmu = &uncore->pmus[index]; 951 snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index); 952 pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid]; 953 pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2; 954 pmu->rdpmc_base = -1; 955 pmu->group = gid; 956 957 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); 958 if (!pmu->ctx) 959 goto done; 960 961 pmu->pmu = (struct pmu) { 962 .task_ctx_nr = perf_invalid_context, 963 .attr_groups = amd_uncore_umc_attr_groups, 964 .name = pmu->name, 965 .event_init = amd_uncore_umc_event_init, 966 .add = amd_uncore_add, 967 .del = amd_uncore_del, 968 .start = amd_uncore_umc_start, 969 .stop = amd_uncore_stop, 970 .read = amd_uncore_read, 971 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, 972 .module = THIS_MODULE, 973 }; 974 975 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { 976 free_percpu(pmu->ctx); 977 pmu->ctx = NULL; 978 goto done; 979 } 980 981 pr_info("%d %s counters detected\n", pmu->num_counters, 982 pmu->pmu.name); 983 984 index++; 985 } 986 } 987 988 done: 989 uncore->num_pmus = index; 990 uncore->init_done = true; 991 992 return amd_uncore_ctx_init(uncore, cpu); 993 } 994 995 static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { 996 /* UNCORE_TYPE_DF */ 997 { 998 .scan = amd_uncore_df_ctx_scan, 999 .init = amd_uncore_df_ctx_init, 1000 .move = amd_uncore_ctx_move, 1001 .free = amd_uncore_ctx_free, 1002 }, 1003 /* UNCORE_TYPE_L3 */ 1004 { 1005 .scan = amd_uncore_l3_ctx_scan, 1006 .init = amd_uncore_l3_ctx_init, 1007 .move = amd_uncore_ctx_move, 1008 .free = amd_uncore_ctx_free, 1009 }, 1010 /* UNCORE_TYPE_UMC */ 1011 { 1012 .scan = amd_uncore_umc_ctx_scan, 1013 .init = amd_uncore_umc_ctx_init, 1014 .move = amd_uncore_ctx_move, 1015 .free = amd_uncore_ctx_free, 1016 }, 1017 }; 1018 1019 static int __init amd_uncore_init(void) 1020 { 1021 struct amd_uncore *uncore; 1022 int ret = -ENODEV; 1023 int i; 1024 1025 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && 1026 boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) 1027 return -ENODEV; 1028 1029 if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) 1030 return -ENODEV; 1031 1032 if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) 1033 pmu_version = 2; 1034 1035 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 1036 uncore = &uncores[i]; 1037 1038 BUG_ON(!uncore->scan); 1039 BUG_ON(!uncore->init); 1040 BUG_ON(!uncore->move); 1041 BUG_ON(!uncore->free); 1042 1043 uncore->info = alloc_percpu(union amd_uncore_info); 1044 if (!uncore->info) { 1045 ret = -ENOMEM; 1046 goto fail; 1047 } 1048 }; 1049 1050 /* 1051 * Install callbacks. Core will call them for each online cpu. 1052 */ 1053 ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, 1054 "perf/x86/amd/uncore:prepare", 1055 NULL, amd_uncore_cpu_dead); 1056 if (ret) 1057 goto fail; 1058 1059 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, 1060 "perf/x86/amd/uncore:starting", 1061 amd_uncore_cpu_starting, NULL); 1062 if (ret) 1063 goto fail_prep; 1064 1065 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, 1066 "perf/x86/amd/uncore:online", 1067 amd_uncore_cpu_online, 1068 amd_uncore_cpu_down_prepare); 1069 if (ret) 1070 goto fail_start; 1071 1072 return 0; 1073 1074 fail_start: 1075 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); 1076 fail_prep: 1077 cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); 1078 fail: 1079 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 1080 uncore = &uncores[i]; 1081 if (uncore->info) { 1082 free_percpu(uncore->info); 1083 uncore->info = NULL; 1084 } 1085 } 1086 1087 return ret; 1088 } 1089 1090 static void __exit amd_uncore_exit(void) 1091 { 1092 struct amd_uncore *uncore; 1093 struct amd_uncore_pmu *pmu; 1094 int i, j; 1095 1096 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE); 1097 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); 1098 cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); 1099 1100 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 1101 uncore = &uncores[i]; 1102 if (!uncore->info) 1103 continue; 1104 1105 free_percpu(uncore->info); 1106 uncore->info = NULL; 1107 1108 for (j = 0; j < uncore->num_pmus; j++) { 1109 pmu = &uncore->pmus[j]; 1110 if (!pmu->ctx) 1111 continue; 1112 1113 perf_pmu_unregister(&pmu->pmu); 1114 free_percpu(pmu->ctx); 1115 pmu->ctx = NULL; 1116 } 1117 1118 kfree(uncore->pmus); 1119 uncore->pmus = NULL; 1120 } 1121 } 1122 1123 module_init(amd_uncore_init); 1124 module_exit(amd_uncore_exit); 1125 1126 MODULE_DESCRIPTION("AMD Uncore Driver"); 1127 MODULE_LICENSE("GPL v2"); 1128