1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2013 Advanced Micro Devices, Inc. 4 * 5 * Author: Jacob Shin <jacob.shin@amd.com> 6 */ 7 8 #include <linux/perf_event.h> 9 #include <linux/percpu.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/init.h> 13 #include <linux/cpu.h> 14 #include <linux/cpumask.h> 15 #include <linux/cpufeature.h> 16 #include <linux/smp.h> 17 18 #include <asm/perf_event.h> 19 #include <asm/msr.h> 20 21 #define NUM_COUNTERS_NB 4 22 #define NUM_COUNTERS_L2 4 23 #define NUM_COUNTERS_L3 6 24 25 #define RDPMC_BASE_NB 6 26 #define RDPMC_BASE_LLC 10 27 28 #define COUNTER_SHIFT 16 29 #define UNCORE_NAME_LEN 16 30 #define UNCORE_GROUP_MAX 256 31 32 #undef pr_fmt 33 #define pr_fmt(fmt) "amd_uncore: " fmt 34 35 static int pmu_version; 36 37 struct amd_uncore_ctx { 38 int refcnt; 39 int cpu; 40 struct perf_event **events; 41 struct hlist_node node; 42 }; 43 44 struct amd_uncore_pmu { 45 char name[UNCORE_NAME_LEN]; 46 int num_counters; 47 int rdpmc_base; 48 u32 msr_base; 49 int group; 50 cpumask_t active_mask; 51 struct pmu pmu; 52 struct amd_uncore_ctx * __percpu *ctx; 53 }; 54 55 enum { 56 UNCORE_TYPE_DF, 57 UNCORE_TYPE_L3, 58 UNCORE_TYPE_UMC, 59 60 UNCORE_TYPE_MAX 61 }; 62 63 union amd_uncore_info { 64 struct { 65 u64 aux_data:32; /* auxiliary data */ 66 u64 num_pmcs:8; /* number of counters */ 67 u64 gid:8; /* group id */ 68 u64 cid:8; /* context id */ 69 } split; 70 u64 full; 71 }; 72 73 struct amd_uncore { 74 union amd_uncore_info __percpu *info; 75 struct amd_uncore_pmu *pmus; 76 unsigned int num_pmus; 77 bool init_done; 78 void (*scan)(struct amd_uncore *uncore, unsigned int cpu); 79 int (*init)(struct amd_uncore *uncore, unsigned int cpu); 80 void (*move)(struct amd_uncore *uncore, unsigned int cpu); 81 void (*free)(struct amd_uncore *uncore, unsigned int cpu); 82 }; 83 84 static struct amd_uncore uncores[UNCORE_TYPE_MAX]; 85 86 static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event) 87 { 88 return container_of(event->pmu, struct amd_uncore_pmu, pmu); 89 } 90 91 static void amd_uncore_read(struct perf_event *event) 92 { 93 struct hw_perf_event *hwc = &event->hw; 94 u64 prev, new; 95 s64 delta; 96 97 /* 98 * since we do not enable counter overflow interrupts, 99 * we do not have to worry about prev_count changing on us 100 */ 101 102 prev = local64_read(&hwc->prev_count); 103 104 /* 105 * Some uncore PMUs do not have RDPMC assignments. In such cases, 106 * read counts directly from the corresponding PERF_CTR. 107 */ 108 if (hwc->event_base_rdpmc < 0) 109 rdmsrl(hwc->event_base, new); 110 else 111 rdpmcl(hwc->event_base_rdpmc, new); 112 113 local64_set(&hwc->prev_count, new); 114 delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); 115 delta >>= COUNTER_SHIFT; 116 local64_add(delta, &event->count); 117 } 118 119 static void amd_uncore_start(struct perf_event *event, int flags) 120 { 121 struct hw_perf_event *hwc = &event->hw; 122 123 if (flags & PERF_EF_RELOAD) 124 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); 125 126 hwc->state = 0; 127 wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); 128 perf_event_update_userpage(event); 129 } 130 131 static void amd_uncore_stop(struct perf_event *event, int flags) 132 { 133 struct hw_perf_event *hwc = &event->hw; 134 135 wrmsrl(hwc->config_base, hwc->config); 136 hwc->state |= PERF_HES_STOPPED; 137 138 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 139 event->pmu->read(event); 140 hwc->state |= PERF_HES_UPTODATE; 141 } 142 } 143 144 static int amd_uncore_add(struct perf_event *event, int flags) 145 { 146 int i; 147 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); 148 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); 149 struct hw_perf_event *hwc = &event->hw; 150 151 /* are we already assigned? */ 152 if (hwc->idx != -1 && ctx->events[hwc->idx] == event) 153 goto out; 154 155 for (i = 0; i < pmu->num_counters; i++) { 156 if (ctx->events[i] == event) { 157 hwc->idx = i; 158 goto out; 159 } 160 } 161 162 /* if not, take the first available counter */ 163 hwc->idx = -1; 164 for (i = 0; i < pmu->num_counters; i++) { 165 if (cmpxchg(&ctx->events[i], NULL, event) == NULL) { 166 hwc->idx = i; 167 break; 168 } 169 } 170 171 out: 172 if (hwc->idx == -1) 173 return -EBUSY; 174 175 hwc->config_base = pmu->msr_base + (2 * hwc->idx); 176 hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx); 177 hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx; 178 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 179 180 if (pmu->rdpmc_base < 0) 181 hwc->event_base_rdpmc = -1; 182 183 if (flags & PERF_EF_START) 184 event->pmu->start(event, PERF_EF_RELOAD); 185 186 return 0; 187 } 188 189 static void amd_uncore_del(struct perf_event *event, int flags) 190 { 191 int i; 192 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); 193 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); 194 struct hw_perf_event *hwc = &event->hw; 195 196 event->pmu->stop(event, PERF_EF_UPDATE); 197 198 for (i = 0; i < pmu->num_counters; i++) { 199 if (cmpxchg(&ctx->events[i], event, NULL) == event) 200 break; 201 } 202 203 hwc->idx = -1; 204 } 205 206 static int amd_uncore_event_init(struct perf_event *event) 207 { 208 struct amd_uncore_pmu *pmu; 209 struct amd_uncore_ctx *ctx; 210 struct hw_perf_event *hwc = &event->hw; 211 212 if (event->attr.type != event->pmu->type) 213 return -ENOENT; 214 215 if (event->cpu < 0) 216 return -EINVAL; 217 218 pmu = event_to_amd_uncore_pmu(event); 219 ctx = *per_cpu_ptr(pmu->ctx, event->cpu); 220 if (!ctx) 221 return -ENODEV; 222 223 /* 224 * NB and Last level cache counters (MSRs) are shared across all cores 225 * that share the same NB / Last level cache. On family 16h and below, 226 * Interrupts can be directed to a single target core, however, event 227 * counts generated by processes running on other cores cannot be masked 228 * out. So we do not support sampling and per-thread events via 229 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: 230 */ 231 hwc->config = event->attr.config; 232 hwc->idx = -1; 233 234 /* 235 * since request can come in to any of the shared cores, we will remap 236 * to a single common cpu. 237 */ 238 event->cpu = ctx->cpu; 239 240 return 0; 241 } 242 243 static umode_t 244 amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i) 245 { 246 return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ? 247 attr->mode : 0; 248 } 249 250 static umode_t 251 amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i) 252 { 253 return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0; 254 } 255 256 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, 257 struct device_attribute *attr, 258 char *buf) 259 { 260 struct pmu *ptr = dev_get_drvdata(dev); 261 struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu); 262 263 return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask); 264 } 265 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL); 266 267 static struct attribute *amd_uncore_attrs[] = { 268 &dev_attr_cpumask.attr, 269 NULL, 270 }; 271 272 static struct attribute_group amd_uncore_attr_group = { 273 .attrs = amd_uncore_attrs, 274 }; 275 276 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ 277 static ssize_t __uncore_##_var##_show(struct device *dev, \ 278 struct device_attribute *attr, \ 279 char *page) \ 280 { \ 281 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ 282 return sprintf(page, _format "\n"); \ 283 } \ 284 static struct device_attribute format_attr_##_var = \ 285 __ATTR(_name, 0444, __uncore_##_var##_show, NULL) 286 287 DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35"); 288 DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */ 289 DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */ 290 DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */ 291 DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15"); 292 DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */ 293 DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */ 294 DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */ 295 DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */ 296 DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */ 297 DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */ 298 DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */ 299 DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */ 300 DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */ 301 302 /* Common DF and NB attributes */ 303 static struct attribute *amd_uncore_df_format_attr[] = { 304 &format_attr_event12.attr, /* event */ 305 &format_attr_umask8.attr, /* umask */ 306 NULL, 307 }; 308 309 /* Common L2 and L3 attributes */ 310 static struct attribute *amd_uncore_l3_format_attr[] = { 311 &format_attr_event12.attr, /* event */ 312 &format_attr_umask8.attr, /* umask */ 313 NULL, /* threadmask */ 314 NULL, 315 }; 316 317 /* Common UMC attributes */ 318 static struct attribute *amd_uncore_umc_format_attr[] = { 319 &format_attr_event8.attr, /* event */ 320 &format_attr_rdwrmask.attr, /* rdwrmask */ 321 NULL, 322 }; 323 324 /* F17h unique L3 attributes */ 325 static struct attribute *amd_f17h_uncore_l3_format_attr[] = { 326 &format_attr_slicemask.attr, /* slicemask */ 327 NULL, 328 }; 329 330 /* F19h unique L3 attributes */ 331 static struct attribute *amd_f19h_uncore_l3_format_attr[] = { 332 &format_attr_coreid.attr, /* coreid */ 333 &format_attr_enallslices.attr, /* enallslices */ 334 &format_attr_enallcores.attr, /* enallcores */ 335 &format_attr_sliceid.attr, /* sliceid */ 336 NULL, 337 }; 338 339 static struct attribute_group amd_uncore_df_format_group = { 340 .name = "format", 341 .attrs = amd_uncore_df_format_attr, 342 }; 343 344 static struct attribute_group amd_uncore_l3_format_group = { 345 .name = "format", 346 .attrs = amd_uncore_l3_format_attr, 347 }; 348 349 static struct attribute_group amd_f17h_uncore_l3_format_group = { 350 .name = "format", 351 .attrs = amd_f17h_uncore_l3_format_attr, 352 .is_visible = amd_f17h_uncore_is_visible, 353 }; 354 355 static struct attribute_group amd_f19h_uncore_l3_format_group = { 356 .name = "format", 357 .attrs = amd_f19h_uncore_l3_format_attr, 358 .is_visible = amd_f19h_uncore_is_visible, 359 }; 360 361 static struct attribute_group amd_uncore_umc_format_group = { 362 .name = "format", 363 .attrs = amd_uncore_umc_format_attr, 364 }; 365 366 static const struct attribute_group *amd_uncore_df_attr_groups[] = { 367 &amd_uncore_attr_group, 368 &amd_uncore_df_format_group, 369 NULL, 370 }; 371 372 static const struct attribute_group *amd_uncore_l3_attr_groups[] = { 373 &amd_uncore_attr_group, 374 &amd_uncore_l3_format_group, 375 NULL, 376 }; 377 378 static const struct attribute_group *amd_uncore_l3_attr_update[] = { 379 &amd_f17h_uncore_l3_format_group, 380 &amd_f19h_uncore_l3_format_group, 381 NULL, 382 }; 383 384 static const struct attribute_group *amd_uncore_umc_attr_groups[] = { 385 &amd_uncore_attr_group, 386 &amd_uncore_umc_format_group, 387 NULL, 388 }; 389 390 static __always_inline 391 int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu) 392 { 393 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); 394 return info->split.cid; 395 } 396 397 static __always_inline 398 int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu) 399 { 400 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); 401 return info->split.gid; 402 } 403 404 static __always_inline 405 int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu) 406 { 407 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); 408 return info->split.num_pmcs; 409 } 410 411 static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu) 412 { 413 struct amd_uncore_pmu *pmu; 414 struct amd_uncore_ctx *ctx; 415 int i; 416 417 if (!uncore->init_done) 418 return; 419 420 for (i = 0; i < uncore->num_pmus; i++) { 421 pmu = &uncore->pmus[i]; 422 ctx = *per_cpu_ptr(pmu->ctx, cpu); 423 if (!ctx) 424 continue; 425 426 if (cpu == ctx->cpu) 427 cpumask_clear_cpu(cpu, &pmu->active_mask); 428 429 if (!--ctx->refcnt) { 430 kfree(ctx->events); 431 kfree(ctx); 432 } 433 434 *per_cpu_ptr(pmu->ctx, cpu) = NULL; 435 } 436 } 437 438 static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu) 439 { 440 struct amd_uncore_ctx *curr, *prev; 441 struct amd_uncore_pmu *pmu; 442 int node, cid, gid, i, j; 443 444 if (!uncore->init_done || !uncore->num_pmus) 445 return 0; 446 447 cid = amd_uncore_ctx_cid(uncore, cpu); 448 gid = amd_uncore_ctx_gid(uncore, cpu); 449 450 for (i = 0; i < uncore->num_pmus; i++) { 451 pmu = &uncore->pmus[i]; 452 *per_cpu_ptr(pmu->ctx, cpu) = NULL; 453 curr = NULL; 454 455 /* Check for group exclusivity */ 456 if (gid != pmu->group) 457 continue; 458 459 /* Find a sibling context */ 460 for_each_online_cpu(j) { 461 if (cpu == j) 462 continue; 463 464 prev = *per_cpu_ptr(pmu->ctx, j); 465 if (!prev) 466 continue; 467 468 if (cid == amd_uncore_ctx_cid(uncore, j)) { 469 curr = prev; 470 break; 471 } 472 } 473 474 /* Allocate context if sibling does not exist */ 475 if (!curr) { 476 node = cpu_to_node(cpu); 477 curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node); 478 if (!curr) 479 goto fail; 480 481 curr->cpu = cpu; 482 curr->events = kzalloc_node(sizeof(*curr->events) * 483 pmu->num_counters, 484 GFP_KERNEL, node); 485 if (!curr->events) { 486 kfree(curr); 487 goto fail; 488 } 489 490 cpumask_set_cpu(cpu, &pmu->active_mask); 491 } 492 493 curr->refcnt++; 494 *per_cpu_ptr(pmu->ctx, cpu) = curr; 495 } 496 497 return 0; 498 499 fail: 500 amd_uncore_ctx_free(uncore, cpu); 501 502 return -ENOMEM; 503 } 504 505 static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu) 506 { 507 struct amd_uncore_ctx *curr, *next; 508 struct amd_uncore_pmu *pmu; 509 int i, j; 510 511 if (!uncore->init_done) 512 return; 513 514 for (i = 0; i < uncore->num_pmus; i++) { 515 pmu = &uncore->pmus[i]; 516 curr = *per_cpu_ptr(pmu->ctx, cpu); 517 if (!curr) 518 continue; 519 520 /* Migrate to a shared sibling if possible */ 521 for_each_online_cpu(j) { 522 next = *per_cpu_ptr(pmu->ctx, j); 523 if (!next || cpu == j) 524 continue; 525 526 if (curr == next) { 527 perf_pmu_migrate_context(&pmu->pmu, cpu, j); 528 cpumask_clear_cpu(cpu, &pmu->active_mask); 529 cpumask_set_cpu(j, &pmu->active_mask); 530 next->cpu = j; 531 break; 532 } 533 } 534 } 535 } 536 537 static int amd_uncore_cpu_starting(unsigned int cpu) 538 { 539 struct amd_uncore *uncore; 540 int i; 541 542 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 543 uncore = &uncores[i]; 544 uncore->scan(uncore, cpu); 545 } 546 547 return 0; 548 } 549 550 static int amd_uncore_cpu_online(unsigned int cpu) 551 { 552 struct amd_uncore *uncore; 553 int i; 554 555 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 556 uncore = &uncores[i]; 557 if (uncore->init(uncore, cpu)) 558 break; 559 } 560 561 return 0; 562 } 563 564 static int amd_uncore_cpu_down_prepare(unsigned int cpu) 565 { 566 struct amd_uncore *uncore; 567 int i; 568 569 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 570 uncore = &uncores[i]; 571 uncore->move(uncore, cpu); 572 } 573 574 return 0; 575 } 576 577 static int amd_uncore_cpu_dead(unsigned int cpu) 578 { 579 struct amd_uncore *uncore; 580 int i; 581 582 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 583 uncore = &uncores[i]; 584 uncore->free(uncore, cpu); 585 } 586 587 return 0; 588 } 589 590 static int amd_uncore_df_event_init(struct perf_event *event) 591 { 592 struct hw_perf_event *hwc = &event->hw; 593 int ret = amd_uncore_event_init(event); 594 595 if (ret || pmu_version < 2) 596 return ret; 597 598 hwc->config = event->attr.config & 599 (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB : 600 AMD64_RAW_EVENT_MASK_NB); 601 602 return 0; 603 } 604 605 static int amd_uncore_df_add(struct perf_event *event, int flags) 606 { 607 int ret = amd_uncore_add(event, flags & ~PERF_EF_START); 608 struct hw_perf_event *hwc = &event->hw; 609 610 if (ret) 611 return ret; 612 613 /* 614 * The first four DF counters are accessible via RDPMC index 6 to 9 615 * followed by the L3 counters from index 10 to 15. For processors 616 * with more than four DF counters, the DF RDPMC assignments become 617 * discontiguous as the additional counters are accessible starting 618 * from index 16. 619 */ 620 if (hwc->idx >= NUM_COUNTERS_NB) 621 hwc->event_base_rdpmc += NUM_COUNTERS_L3; 622 623 /* Delayed start after rdpmc base update */ 624 if (flags & PERF_EF_START) 625 amd_uncore_start(event, PERF_EF_RELOAD); 626 627 return 0; 628 } 629 630 static 631 void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) 632 { 633 union cpuid_0x80000022_ebx ebx; 634 union amd_uncore_info info; 635 636 if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB)) 637 return; 638 639 info.split.aux_data = 0; 640 info.split.num_pmcs = NUM_COUNTERS_NB; 641 info.split.gid = 0; 642 info.split.cid = topology_die_id(cpu); 643 644 if (pmu_version >= 2) { 645 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); 646 info.split.num_pmcs = ebx.split.num_df_pmc; 647 } 648 649 *per_cpu_ptr(uncore->info, cpu) = info; 650 } 651 652 static 653 int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) 654 { 655 struct attribute **df_attr = amd_uncore_df_format_attr; 656 struct amd_uncore_pmu *pmu; 657 658 /* Run just once */ 659 if (uncore->init_done) 660 return amd_uncore_ctx_init(uncore, cpu); 661 662 /* No grouping, single instance for a system */ 663 uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); 664 if (!uncore->pmus) { 665 uncore->num_pmus = 0; 666 goto done; 667 } 668 669 /* 670 * For Family 17h and above, the Northbridge counters are repurposed 671 * as Data Fabric counters. The PMUs are exported based on family as 672 * either NB or DF. 673 */ 674 pmu = &uncore->pmus[0]; 675 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", 676 sizeof(pmu->name)); 677 pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); 678 pmu->msr_base = MSR_F15H_NB_PERF_CTL; 679 pmu->rdpmc_base = RDPMC_BASE_NB; 680 pmu->group = amd_uncore_ctx_gid(uncore, cpu); 681 682 if (pmu_version >= 2) { 683 *df_attr++ = &format_attr_event14v2.attr; 684 *df_attr++ = &format_attr_umask12.attr; 685 } else if (boot_cpu_data.x86 >= 0x17) { 686 *df_attr = &format_attr_event14.attr; 687 } 688 689 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); 690 if (!pmu->ctx) 691 goto done; 692 693 pmu->pmu = (struct pmu) { 694 .task_ctx_nr = perf_invalid_context, 695 .attr_groups = amd_uncore_df_attr_groups, 696 .name = pmu->name, 697 .event_init = amd_uncore_df_event_init, 698 .add = amd_uncore_df_add, 699 .del = amd_uncore_del, 700 .start = amd_uncore_start, 701 .stop = amd_uncore_stop, 702 .read = amd_uncore_read, 703 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, 704 .module = THIS_MODULE, 705 }; 706 707 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { 708 free_percpu(pmu->ctx); 709 pmu->ctx = NULL; 710 goto done; 711 } 712 713 pr_info("%d %s%s counters detected\n", pmu->num_counters, 714 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "", 715 pmu->pmu.name); 716 717 uncore->num_pmus = 1; 718 719 done: 720 uncore->init_done = true; 721 722 return amd_uncore_ctx_init(uncore, cpu); 723 } 724 725 static int amd_uncore_l3_event_init(struct perf_event *event) 726 { 727 int ret = amd_uncore_event_init(event); 728 struct hw_perf_event *hwc = &event->hw; 729 u64 config = event->attr.config; 730 u64 mask; 731 732 hwc->config = config & AMD64_RAW_EVENT_MASK_NB; 733 734 /* 735 * SliceMask and ThreadMask need to be set for certain L3 events. 736 * For other events, the two fields do not affect the count. 737 */ 738 if (ret || boot_cpu_data.x86 < 0x17) 739 return ret; 740 741 mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK | 742 AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES | 743 AMD64_L3_COREID_MASK); 744 745 if (boot_cpu_data.x86 <= 0x18) 746 mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | 747 ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); 748 749 /* 750 * If the user doesn't specify a ThreadMask, they're not trying to 751 * count core 0, so we enable all cores & threads. 752 * We'll also assume that they want to count slice 0 if they specify 753 * a ThreadMask and leave SliceId and EnAllSlices unpopulated. 754 */ 755 else if (!(config & AMD64_L3_F19H_THREAD_MASK)) 756 mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES | 757 AMD64_L3_EN_ALL_CORES; 758 759 hwc->config |= mask; 760 761 return 0; 762 } 763 764 static 765 void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) 766 { 767 union amd_uncore_info info; 768 769 if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) 770 return; 771 772 info.split.aux_data = 0; 773 info.split.num_pmcs = NUM_COUNTERS_L2; 774 info.split.gid = 0; 775 info.split.cid = per_cpu_llc_id(cpu); 776 777 if (boot_cpu_data.x86 >= 0x17) 778 info.split.num_pmcs = NUM_COUNTERS_L3; 779 780 *per_cpu_ptr(uncore->info, cpu) = info; 781 } 782 783 static 784 int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) 785 { 786 struct attribute **l3_attr = amd_uncore_l3_format_attr; 787 struct amd_uncore_pmu *pmu; 788 789 /* Run just once */ 790 if (uncore->init_done) 791 return amd_uncore_ctx_init(uncore, cpu); 792 793 /* No grouping, single instance for a system */ 794 uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); 795 if (!uncore->pmus) { 796 uncore->num_pmus = 0; 797 goto done; 798 } 799 800 /* 801 * For Family 17h and above, L3 cache counters are available instead 802 * of L2 cache counters. The PMUs are exported based on family as 803 * either L2 or L3. 804 */ 805 pmu = &uncore->pmus[0]; 806 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", 807 sizeof(pmu->name)); 808 pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); 809 pmu->msr_base = MSR_F16H_L2I_PERF_CTL; 810 pmu->rdpmc_base = RDPMC_BASE_LLC; 811 pmu->group = amd_uncore_ctx_gid(uncore, cpu); 812 813 if (boot_cpu_data.x86 >= 0x17) { 814 *l3_attr++ = &format_attr_event8.attr; 815 *l3_attr++ = &format_attr_umask8.attr; 816 *l3_attr++ = boot_cpu_data.x86 >= 0x19 ? 817 &format_attr_threadmask2.attr : 818 &format_attr_threadmask8.attr; 819 } 820 821 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); 822 if (!pmu->ctx) 823 goto done; 824 825 pmu->pmu = (struct pmu) { 826 .task_ctx_nr = perf_invalid_context, 827 .attr_groups = amd_uncore_l3_attr_groups, 828 .attr_update = amd_uncore_l3_attr_update, 829 .name = pmu->name, 830 .event_init = amd_uncore_l3_event_init, 831 .add = amd_uncore_add, 832 .del = amd_uncore_del, 833 .start = amd_uncore_start, 834 .stop = amd_uncore_stop, 835 .read = amd_uncore_read, 836 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, 837 .module = THIS_MODULE, 838 }; 839 840 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { 841 free_percpu(pmu->ctx); 842 pmu->ctx = NULL; 843 goto done; 844 } 845 846 pr_info("%d %s%s counters detected\n", pmu->num_counters, 847 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "", 848 pmu->pmu.name); 849 850 uncore->num_pmus = 1; 851 852 done: 853 uncore->init_done = true; 854 855 return amd_uncore_ctx_init(uncore, cpu); 856 } 857 858 static int amd_uncore_umc_event_init(struct perf_event *event) 859 { 860 struct hw_perf_event *hwc = &event->hw; 861 int ret = amd_uncore_event_init(event); 862 863 if (ret) 864 return ret; 865 866 hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC; 867 868 return 0; 869 } 870 871 static void amd_uncore_umc_start(struct perf_event *event, int flags) 872 { 873 struct hw_perf_event *hwc = &event->hw; 874 875 if (flags & PERF_EF_RELOAD) 876 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); 877 878 hwc->state = 0; 879 wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC)); 880 perf_event_update_userpage(event); 881 } 882 883 static 884 void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) 885 { 886 union cpuid_0x80000022_ebx ebx; 887 union amd_uncore_info info; 888 unsigned int eax, ecx, edx; 889 890 if (pmu_version < 2) 891 return; 892 893 cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx); 894 info.split.aux_data = ecx; /* stash active mask */ 895 info.split.num_pmcs = ebx.split.num_umc_pmc; 896 info.split.gid = topology_die_id(cpu); 897 info.split.cid = topology_die_id(cpu); 898 *per_cpu_ptr(uncore->info, cpu) = info; 899 } 900 901 static 902 int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) 903 { 904 DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 }; 905 u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 }; 906 u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 }; 907 union amd_uncore_info info; 908 struct amd_uncore_pmu *pmu; 909 int index = 0, gid, i; 910 911 if (pmu_version < 2) 912 return 0; 913 914 /* Run just once */ 915 if (uncore->init_done) 916 return amd_uncore_ctx_init(uncore, cpu); 917 918 /* Find unique groups */ 919 for_each_online_cpu(i) { 920 info = *per_cpu_ptr(uncore->info, i); 921 gid = info.split.gid; 922 if (test_bit(gid, gmask)) 923 continue; 924 925 __set_bit(gid, gmask); 926 group_num_pmus[gid] = hweight32(info.split.aux_data); 927 group_num_pmcs[gid] = info.split.num_pmcs; 928 uncore->num_pmus += group_num_pmus[gid]; 929 } 930 931 uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus, 932 GFP_KERNEL); 933 if (!uncore->pmus) { 934 uncore->num_pmus = 0; 935 goto done; 936 } 937 938 for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) { 939 for (i = 0; i < group_num_pmus[gid]; i++) { 940 pmu = &uncore->pmus[index]; 941 snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index); 942 pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid]; 943 pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2; 944 pmu->rdpmc_base = -1; 945 pmu->group = gid; 946 947 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); 948 if (!pmu->ctx) 949 goto done; 950 951 pmu->pmu = (struct pmu) { 952 .task_ctx_nr = perf_invalid_context, 953 .attr_groups = amd_uncore_umc_attr_groups, 954 .name = pmu->name, 955 .event_init = amd_uncore_umc_event_init, 956 .add = amd_uncore_add, 957 .del = amd_uncore_del, 958 .start = amd_uncore_umc_start, 959 .stop = amd_uncore_stop, 960 .read = amd_uncore_read, 961 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, 962 .module = THIS_MODULE, 963 }; 964 965 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { 966 free_percpu(pmu->ctx); 967 pmu->ctx = NULL; 968 goto done; 969 } 970 971 pr_info("%d %s counters detected\n", pmu->num_counters, 972 pmu->pmu.name); 973 974 index++; 975 } 976 } 977 978 done: 979 uncore->num_pmus = index; 980 uncore->init_done = true; 981 982 return amd_uncore_ctx_init(uncore, cpu); 983 } 984 985 static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { 986 /* UNCORE_TYPE_DF */ 987 { 988 .scan = amd_uncore_df_ctx_scan, 989 .init = amd_uncore_df_ctx_init, 990 .move = amd_uncore_ctx_move, 991 .free = amd_uncore_ctx_free, 992 }, 993 /* UNCORE_TYPE_L3 */ 994 { 995 .scan = amd_uncore_l3_ctx_scan, 996 .init = amd_uncore_l3_ctx_init, 997 .move = amd_uncore_ctx_move, 998 .free = amd_uncore_ctx_free, 999 }, 1000 /* UNCORE_TYPE_UMC */ 1001 { 1002 .scan = amd_uncore_umc_ctx_scan, 1003 .init = amd_uncore_umc_ctx_init, 1004 .move = amd_uncore_ctx_move, 1005 .free = amd_uncore_ctx_free, 1006 }, 1007 }; 1008 1009 static int __init amd_uncore_init(void) 1010 { 1011 struct amd_uncore *uncore; 1012 int ret = -ENODEV; 1013 int i; 1014 1015 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && 1016 boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) 1017 return -ENODEV; 1018 1019 if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) 1020 return -ENODEV; 1021 1022 if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) 1023 pmu_version = 2; 1024 1025 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 1026 uncore = &uncores[i]; 1027 1028 BUG_ON(!uncore->scan); 1029 BUG_ON(!uncore->init); 1030 BUG_ON(!uncore->move); 1031 BUG_ON(!uncore->free); 1032 1033 uncore->info = alloc_percpu(union amd_uncore_info); 1034 if (!uncore->info) { 1035 ret = -ENOMEM; 1036 goto fail; 1037 } 1038 }; 1039 1040 /* 1041 * Install callbacks. Core will call them for each online cpu. 1042 */ 1043 ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, 1044 "perf/x86/amd/uncore:prepare", 1045 NULL, amd_uncore_cpu_dead); 1046 if (ret) 1047 goto fail; 1048 1049 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, 1050 "perf/x86/amd/uncore:starting", 1051 amd_uncore_cpu_starting, NULL); 1052 if (ret) 1053 goto fail_prep; 1054 1055 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, 1056 "perf/x86/amd/uncore:online", 1057 amd_uncore_cpu_online, 1058 amd_uncore_cpu_down_prepare); 1059 if (ret) 1060 goto fail_start; 1061 1062 return 0; 1063 1064 fail_start: 1065 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); 1066 fail_prep: 1067 cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); 1068 fail: 1069 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 1070 uncore = &uncores[i]; 1071 if (uncore->info) { 1072 free_percpu(uncore->info); 1073 uncore->info = NULL; 1074 } 1075 } 1076 1077 return ret; 1078 } 1079 1080 static void __exit amd_uncore_exit(void) 1081 { 1082 struct amd_uncore *uncore; 1083 struct amd_uncore_pmu *pmu; 1084 int i, j; 1085 1086 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE); 1087 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); 1088 cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); 1089 1090 for (i = 0; i < UNCORE_TYPE_MAX; i++) { 1091 uncore = &uncores[i]; 1092 if (!uncore->info) 1093 continue; 1094 1095 free_percpu(uncore->info); 1096 uncore->info = NULL; 1097 1098 for (j = 0; j < uncore->num_pmus; j++) { 1099 pmu = &uncore->pmus[j]; 1100 if (!pmu->ctx) 1101 continue; 1102 1103 perf_pmu_unregister(&pmu->pmu); 1104 free_percpu(pmu->ctx); 1105 pmu->ctx = NULL; 1106 } 1107 1108 kfree(uncore->pmus); 1109 uncore->pmus = NULL; 1110 } 1111 } 1112 1113 module_init(amd_uncore_init); 1114 module_exit(amd_uncore_exit); 1115 1116 MODULE_DESCRIPTION("AMD Uncore Driver"); 1117 MODULE_LICENSE("GPL v2"); 1118