1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/module.h> 3 4 #include <asm/cpu_device_id.h> 5 #include <asm/intel-family.h> 6 #include <asm/msr.h> 7 #include "uncore.h" 8 #include "uncore_discovery.h" 9 10 static bool uncore_no_discover; 11 module_param(uncore_no_discover, bool, 0); 12 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism " 13 "(default: enable the discovery mechanism)."); 14 struct intel_uncore_type *empty_uncore[] = { NULL, }; 15 struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 16 struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 17 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; 18 19 static bool pcidrv_registered; 20 struct pci_driver *uncore_pci_driver; 21 /* The PCI driver for the device which the uncore doesn't own. */ 22 struct pci_driver *uncore_pci_sub_driver; 23 /* pci bus to socket mapping */ 24 DEFINE_RAW_SPINLOCK(pci2phy_map_lock); 25 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); 26 struct pci_extra_dev *uncore_extra_pci_dev; 27 int __uncore_max_dies; 28 29 /* mask of cpus that collect uncore events */ 30 static cpumask_t uncore_cpu_mask; 31 32 /* constraint for the fixed counter */ 33 static struct event_constraint uncore_constraint_fixed = 34 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); 35 struct event_constraint uncore_constraint_empty = 36 EVENT_CONSTRAINT(0, 0, 0); 37 38 MODULE_DESCRIPTION("Support for Intel uncore performance events"); 39 MODULE_LICENSE("GPL"); 40 41 int uncore_pcibus_to_dieid(struct pci_bus *bus) 42 { 43 struct pci2phy_map *map; 44 int die_id = -1; 45 46 raw_spin_lock(&pci2phy_map_lock); 47 list_for_each_entry(map, &pci2phy_map_head, list) { 48 if (map->segment == pci_domain_nr(bus)) { 49 die_id = map->pbus_to_dieid[bus->number]; 50 break; 51 } 52 } 53 raw_spin_unlock(&pci2phy_map_lock); 54 55 return die_id; 56 } 57 58 int uncore_die_to_segment(int die) 59 { 60 struct pci_bus *bus = NULL; 61 62 /* Find first pci bus which attributes to specified die. */ 63 while ((bus = pci_find_next_bus(bus)) && 64 (die != uncore_pcibus_to_dieid(bus))) 65 ; 66 67 return bus ? pci_domain_nr(bus) : -EINVAL; 68 } 69 70 int uncore_device_to_die(struct pci_dev *dev) 71 { 72 int node = pcibus_to_node(dev->bus); 73 int cpu; 74 75 for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) { 76 struct cpuinfo_x86 *c = &cpu_data(cpu); 77 78 if (c->initialized && cpu_to_node(cpu) == node) 79 return c->topo.logical_die_id; 80 } 81 82 return -1; 83 } 84 85 static void uncore_free_pcibus_map(void) 86 { 87 struct pci2phy_map *map, *tmp; 88 89 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { 90 list_del(&map->list); 91 kfree(map); 92 } 93 } 94 95 struct pci2phy_map *__find_pci2phy_map(int segment) 96 { 97 struct pci2phy_map *map, *alloc = NULL; 98 int i; 99 100 lockdep_assert_held(&pci2phy_map_lock); 101 102 lookup: 103 list_for_each_entry(map, &pci2phy_map_head, list) { 104 if (map->segment == segment) 105 goto end; 106 } 107 108 if (!alloc) { 109 raw_spin_unlock(&pci2phy_map_lock); 110 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); 111 raw_spin_lock(&pci2phy_map_lock); 112 113 if (!alloc) 114 return NULL; 115 116 goto lookup; 117 } 118 119 map = alloc; 120 alloc = NULL; 121 map->segment = segment; 122 for (i = 0; i < 256; i++) 123 map->pbus_to_dieid[i] = -1; 124 list_add_tail(&map->list, &pci2phy_map_head); 125 126 end: 127 kfree(alloc); 128 return map; 129 } 130 131 ssize_t uncore_event_show(struct device *dev, 132 struct device_attribute *attr, char *buf) 133 { 134 struct uncore_event_desc *event = 135 container_of(attr, struct uncore_event_desc, attr); 136 return sprintf(buf, "%s", event->config); 137 } 138 139 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 140 { 141 unsigned int dieid = topology_logical_die_id(cpu); 142 143 /* 144 * The unsigned check also catches the '-1' return value for non 145 * existent mappings in the topology map. 146 */ 147 return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL; 148 } 149 150 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 151 { 152 u64 count; 153 154 rdmsrq(event->hw.event_base, count); 155 156 return count; 157 } 158 159 void uncore_mmio_exit_box(struct intel_uncore_box *box) 160 { 161 if (box->io_addr) 162 iounmap(box->io_addr); 163 } 164 165 u64 uncore_mmio_read_counter(struct intel_uncore_box *box, 166 struct perf_event *event) 167 { 168 if (!box->io_addr) 169 return 0; 170 171 if (!uncore_mmio_is_valid_offset(box, event->hw.event_base)) 172 return 0; 173 174 return readq(box->io_addr + event->hw.event_base); 175 } 176 177 /* 178 * generic get constraint function for shared match/mask registers. 179 */ 180 struct event_constraint * 181 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) 182 { 183 struct intel_uncore_extra_reg *er; 184 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 185 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; 186 unsigned long flags; 187 bool ok = false; 188 189 /* 190 * reg->alloc can be set due to existing state, so for fake box we 191 * need to ignore this, otherwise we might fail to allocate proper 192 * fake state for this extra reg constraint. 193 */ 194 if (reg1->idx == EXTRA_REG_NONE || 195 (!uncore_box_is_fake(box) && reg1->alloc)) 196 return NULL; 197 198 er = &box->shared_regs[reg1->idx]; 199 raw_spin_lock_irqsave(&er->lock, flags); 200 if (!atomic_read(&er->ref) || 201 (er->config1 == reg1->config && er->config2 == reg2->config)) { 202 atomic_inc(&er->ref); 203 er->config1 = reg1->config; 204 er->config2 = reg2->config; 205 ok = true; 206 } 207 raw_spin_unlock_irqrestore(&er->lock, flags); 208 209 if (ok) { 210 if (!uncore_box_is_fake(box)) 211 reg1->alloc = 1; 212 return NULL; 213 } 214 215 return &uncore_constraint_empty; 216 } 217 218 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) 219 { 220 struct intel_uncore_extra_reg *er; 221 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 222 223 /* 224 * Only put constraint if extra reg was actually allocated. Also 225 * takes care of event which do not use an extra shared reg. 226 * 227 * Also, if this is a fake box we shouldn't touch any event state 228 * (reg->alloc) and we don't care about leaving inconsistent box 229 * state either since it will be thrown out. 230 */ 231 if (uncore_box_is_fake(box) || !reg1->alloc) 232 return; 233 234 er = &box->shared_regs[reg1->idx]; 235 atomic_dec(&er->ref); 236 reg1->alloc = 0; 237 } 238 239 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) 240 { 241 struct intel_uncore_extra_reg *er; 242 unsigned long flags; 243 u64 config; 244 245 er = &box->shared_regs[idx]; 246 247 raw_spin_lock_irqsave(&er->lock, flags); 248 config = er->config; 249 raw_spin_unlock_irqrestore(&er->lock, flags); 250 251 return config; 252 } 253 254 static void uncore_assign_hw_event(struct intel_uncore_box *box, 255 struct perf_event *event, int idx) 256 { 257 struct hw_perf_event *hwc = &event->hw; 258 259 hwc->idx = idx; 260 hwc->last_tag = ++box->tags[idx]; 261 262 if (uncore_pmc_fixed(hwc->idx)) { 263 hwc->event_base = uncore_fixed_ctr(box); 264 hwc->config_base = uncore_fixed_ctl(box); 265 return; 266 } 267 268 if (intel_generic_uncore_assign_hw_event(event, box)) 269 return; 270 271 hwc->config_base = uncore_event_ctl(box, hwc->idx); 272 hwc->event_base = uncore_perf_ctr(box, hwc->idx); 273 } 274 275 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) 276 { 277 u64 prev_count, new_count, delta; 278 int shift; 279 280 if (uncore_pmc_freerunning(event->hw.idx)) 281 shift = 64 - uncore_freerunning_bits(box, event); 282 else if (uncore_pmc_fixed(event->hw.idx)) 283 shift = 64 - uncore_fixed_ctr_bits(box); 284 else 285 shift = 64 - uncore_perf_ctr_bits(box); 286 287 /* the hrtimer might modify the previous event value */ 288 again: 289 prev_count = local64_read(&event->hw.prev_count); 290 new_count = uncore_read_counter(box, event); 291 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) 292 goto again; 293 294 delta = (new_count << shift) - (prev_count << shift); 295 delta >>= shift; 296 297 local64_add(delta, &event->count); 298 } 299 300 /* 301 * The overflow interrupt is unavailable for SandyBridge-EP, is broken 302 * for SandyBridge. So we use hrtimer to periodically poll the counter 303 * to avoid overflow. 304 */ 305 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 306 { 307 struct intel_uncore_box *box; 308 struct perf_event *event; 309 int bit; 310 311 box = container_of(hrtimer, struct intel_uncore_box, hrtimer); 312 if (!box->n_active || box->cpu != smp_processor_id()) 313 return HRTIMER_NORESTART; 314 315 /* 316 * handle boxes with an active event list as opposed to active 317 * counters 318 */ 319 list_for_each_entry(event, &box->active_list, active_entry) { 320 uncore_perf_event_update(box, event); 321 } 322 323 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 324 uncore_perf_event_update(box, box->events[bit]); 325 326 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); 327 return HRTIMER_RESTART; 328 } 329 330 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 331 { 332 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 333 HRTIMER_MODE_REL_PINNED_HARD); 334 } 335 336 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 337 { 338 hrtimer_cancel(&box->hrtimer); 339 } 340 341 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) 342 { 343 hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); 344 } 345 346 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, 347 int node) 348 { 349 int i, size, numshared = type->num_shared_regs ; 350 struct intel_uncore_box *box; 351 352 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); 353 354 box = kzalloc_node(size, GFP_KERNEL, node); 355 if (!box) 356 return NULL; 357 358 for (i = 0; i < numshared; i++) 359 raw_spin_lock_init(&box->shared_regs[i].lock); 360 361 uncore_pmu_init_hrtimer(box); 362 box->cpu = -1; 363 box->dieid = -1; 364 365 /* set default hrtimer timeout */ 366 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 367 368 INIT_LIST_HEAD(&box->active_list); 369 370 return box; 371 } 372 373 /* 374 * Using uncore_pmu_event_init pmu event_init callback 375 * as a detection point for uncore events. 376 */ 377 static int uncore_pmu_event_init(struct perf_event *event); 378 379 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) 380 { 381 return &box->pmu->pmu == event->pmu; 382 } 383 384 static int 385 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, 386 bool dogrp) 387 { 388 struct perf_event *event; 389 int n, max_count; 390 391 max_count = box->pmu->type->num_counters; 392 if (box->pmu->type->fixed_ctl) 393 max_count++; 394 395 if (box->n_events >= max_count) 396 return -EINVAL; 397 398 n = box->n_events; 399 400 if (is_box_event(box, leader)) { 401 box->event_list[n] = leader; 402 n++; 403 } 404 405 if (!dogrp) 406 return n; 407 408 for_each_sibling_event(event, leader) { 409 if (!is_box_event(box, event) || 410 event->state <= PERF_EVENT_STATE_OFF) 411 continue; 412 413 if (n >= max_count) 414 return -EINVAL; 415 416 box->event_list[n] = event; 417 n++; 418 } 419 return n; 420 } 421 422 static struct event_constraint * 423 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 424 { 425 struct intel_uncore_type *type = box->pmu->type; 426 struct event_constraint *c; 427 428 if (type->ops->get_constraint) { 429 c = type->ops->get_constraint(box, event); 430 if (c) 431 return c; 432 } 433 434 if (event->attr.config == UNCORE_FIXED_EVENT) 435 return &uncore_constraint_fixed; 436 437 if (type->constraints) { 438 for_each_event_constraint(c, type->constraints) { 439 if ((event->hw.config & c->cmask) == c->code) 440 return c; 441 } 442 } 443 444 return &type->unconstrainted; 445 } 446 447 static void uncore_put_event_constraint(struct intel_uncore_box *box, 448 struct perf_event *event) 449 { 450 if (box->pmu->type->ops->put_constraint) 451 box->pmu->type->ops->put_constraint(box, event); 452 } 453 454 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 455 { 456 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 457 struct event_constraint *c; 458 int i, wmin, wmax, ret = 0; 459 struct hw_perf_event *hwc; 460 461 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 462 463 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 464 c = uncore_get_event_constraint(box, box->event_list[i]); 465 box->event_constraint[i] = c; 466 wmin = min(wmin, c->weight); 467 wmax = max(wmax, c->weight); 468 } 469 470 /* fastpath, try to reuse previous register */ 471 for (i = 0; i < n; i++) { 472 hwc = &box->event_list[i]->hw; 473 c = box->event_constraint[i]; 474 475 /* never assigned */ 476 if (hwc->idx == -1) 477 break; 478 479 /* constraint still honored */ 480 if (!test_bit(hwc->idx, c->idxmsk)) 481 break; 482 483 /* not already used */ 484 if (test_bit(hwc->idx, used_mask)) 485 break; 486 487 __set_bit(hwc->idx, used_mask); 488 if (assign) 489 assign[i] = hwc->idx; 490 } 491 /* slow path */ 492 if (i != n) 493 ret = perf_assign_events(box->event_constraint, n, 494 wmin, wmax, n, assign); 495 496 if (!assign || ret) { 497 for (i = 0; i < n; i++) 498 uncore_put_event_constraint(box, box->event_list[i]); 499 } 500 return ret ? -EINVAL : 0; 501 } 502 503 void uncore_pmu_event_start(struct perf_event *event, int flags) 504 { 505 struct intel_uncore_box *box = uncore_event_to_box(event); 506 int idx = event->hw.idx; 507 508 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) 509 return; 510 511 /* 512 * Free running counter is read-only and always active. 513 * Use the current counter value as start point. 514 * There is no overflow interrupt for free running counter. 515 * Use hrtimer to periodically poll the counter to avoid overflow. 516 */ 517 if (uncore_pmc_freerunning(event->hw.idx)) { 518 list_add_tail(&event->active_entry, &box->active_list); 519 local64_set(&event->hw.prev_count, 520 uncore_read_counter(box, event)); 521 if (box->n_active++ == 0) 522 uncore_pmu_start_hrtimer(box); 523 return; 524 } 525 526 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 527 return; 528 529 event->hw.state = 0; 530 box->events[idx] = event; 531 box->n_active++; 532 __set_bit(idx, box->active_mask); 533 534 local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); 535 uncore_enable_event(box, event); 536 537 if (box->n_active == 1) 538 uncore_pmu_start_hrtimer(box); 539 } 540 541 void uncore_pmu_event_stop(struct perf_event *event, int flags) 542 { 543 struct intel_uncore_box *box = uncore_event_to_box(event); 544 struct hw_perf_event *hwc = &event->hw; 545 546 /* Cannot disable free running counter which is read-only */ 547 if (uncore_pmc_freerunning(hwc->idx)) { 548 list_del(&event->active_entry); 549 if (--box->n_active == 0) 550 uncore_pmu_cancel_hrtimer(box); 551 uncore_perf_event_update(box, event); 552 return; 553 } 554 555 if (__test_and_clear_bit(hwc->idx, box->active_mask)) { 556 uncore_disable_event(box, event); 557 box->n_active--; 558 box->events[hwc->idx] = NULL; 559 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 560 hwc->state |= PERF_HES_STOPPED; 561 562 if (box->n_active == 0) 563 uncore_pmu_cancel_hrtimer(box); 564 } 565 566 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 567 /* 568 * Drain the remaining delta count out of a event 569 * that we are disabling: 570 */ 571 uncore_perf_event_update(box, event); 572 hwc->state |= PERF_HES_UPTODATE; 573 } 574 } 575 576 int uncore_pmu_event_add(struct perf_event *event, int flags) 577 { 578 struct intel_uncore_box *box = uncore_event_to_box(event); 579 struct hw_perf_event *hwc = &event->hw; 580 int assign[UNCORE_PMC_IDX_MAX]; 581 int i, n, ret; 582 583 if (!box) 584 return -ENODEV; 585 586 /* 587 * The free funning counter is assigned in event_init(). 588 * The free running counter event and free running counter 589 * are 1:1 mapped. It doesn't need to be tracked in event_list. 590 */ 591 if (uncore_pmc_freerunning(hwc->idx)) { 592 if (flags & PERF_EF_START) 593 uncore_pmu_event_start(event, 0); 594 return 0; 595 } 596 597 ret = n = uncore_collect_events(box, event, false); 598 if (ret < 0) 599 return ret; 600 601 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 602 if (!(flags & PERF_EF_START)) 603 hwc->state |= PERF_HES_ARCH; 604 605 ret = uncore_assign_events(box, assign, n); 606 if (ret) 607 return ret; 608 609 /* save events moving to new counters */ 610 for (i = 0; i < box->n_events; i++) { 611 event = box->event_list[i]; 612 hwc = &event->hw; 613 614 if (hwc->idx == assign[i] && 615 hwc->last_tag == box->tags[assign[i]]) 616 continue; 617 /* 618 * Ensure we don't accidentally enable a stopped 619 * counter simply because we rescheduled. 620 */ 621 if (hwc->state & PERF_HES_STOPPED) 622 hwc->state |= PERF_HES_ARCH; 623 624 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 625 } 626 627 /* reprogram moved events into new counters */ 628 for (i = 0; i < n; i++) { 629 event = box->event_list[i]; 630 hwc = &event->hw; 631 632 if (hwc->idx != assign[i] || 633 hwc->last_tag != box->tags[assign[i]]) 634 uncore_assign_hw_event(box, event, assign[i]); 635 else if (i < box->n_events) 636 continue; 637 638 if (hwc->state & PERF_HES_ARCH) 639 continue; 640 641 uncore_pmu_event_start(event, 0); 642 } 643 box->n_events = n; 644 645 return 0; 646 } 647 648 void uncore_pmu_event_del(struct perf_event *event, int flags) 649 { 650 struct intel_uncore_box *box = uncore_event_to_box(event); 651 int i; 652 653 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 654 655 /* 656 * The event for free running counter is not tracked by event_list. 657 * It doesn't need to force event->hw.idx = -1 to reassign the counter. 658 * Because the event and the free running counter are 1:1 mapped. 659 */ 660 if (uncore_pmc_freerunning(event->hw.idx)) 661 return; 662 663 for (i = 0; i < box->n_events; i++) { 664 if (event == box->event_list[i]) { 665 uncore_put_event_constraint(box, event); 666 667 for (++i; i < box->n_events; i++) 668 box->event_list[i - 1] = box->event_list[i]; 669 670 --box->n_events; 671 break; 672 } 673 } 674 675 event->hw.idx = -1; 676 event->hw.last_tag = ~0ULL; 677 } 678 679 void uncore_pmu_event_read(struct perf_event *event) 680 { 681 struct intel_uncore_box *box = uncore_event_to_box(event); 682 uncore_perf_event_update(box, event); 683 } 684 685 /* 686 * validation ensures the group can be loaded onto the 687 * PMU if it was the only group available. 688 */ 689 static int uncore_validate_group(struct intel_uncore_pmu *pmu, 690 struct perf_event *event) 691 { 692 struct perf_event *leader = event->group_leader; 693 struct intel_uncore_box *fake_box; 694 int ret = -EINVAL, n; 695 696 /* The free running counter is always active. */ 697 if (uncore_pmc_freerunning(event->hw.idx)) 698 return 0; 699 700 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); 701 if (!fake_box) 702 return -ENOMEM; 703 704 fake_box->pmu = pmu; 705 /* 706 * the event is not yet connected with its 707 * siblings therefore we must first collect 708 * existing siblings, then add the new event 709 * before we can simulate the scheduling 710 */ 711 n = uncore_collect_events(fake_box, leader, true); 712 if (n < 0) 713 goto out; 714 715 fake_box->n_events = n; 716 n = uncore_collect_events(fake_box, event, false); 717 if (n < 0) 718 goto out; 719 720 fake_box->n_events = n; 721 722 ret = uncore_assign_events(fake_box, NULL, n); 723 out: 724 kfree(fake_box); 725 return ret; 726 } 727 728 static int uncore_pmu_event_init(struct perf_event *event) 729 { 730 struct intel_uncore_pmu *pmu; 731 struct intel_uncore_box *box; 732 struct hw_perf_event *hwc = &event->hw; 733 int ret; 734 735 if (event->attr.type != event->pmu->type) 736 return -ENOENT; 737 738 pmu = uncore_event_to_pmu(event); 739 /* no device found for this pmu */ 740 if (!pmu->registered) 741 return -ENOENT; 742 743 /* Sampling not supported yet */ 744 if (hwc->sample_period) 745 return -EINVAL; 746 747 /* 748 * Place all uncore events for a particular physical package 749 * onto a single cpu 750 */ 751 if (event->cpu < 0) 752 return -EINVAL; 753 box = uncore_pmu_to_box(pmu, event->cpu); 754 if (!box || box->cpu < 0) 755 return -EINVAL; 756 event->cpu = box->cpu; 757 event->pmu_private = box; 758 759 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; 760 761 event->hw.idx = -1; 762 event->hw.last_tag = ~0ULL; 763 event->hw.extra_reg.idx = EXTRA_REG_NONE; 764 event->hw.branch_reg.idx = EXTRA_REG_NONE; 765 766 if (event->attr.config == UNCORE_FIXED_EVENT) { 767 /* no fixed counter */ 768 if (!pmu->type->fixed_ctl) 769 return -EINVAL; 770 /* 771 * if there is only one fixed counter, only the first pmu 772 * can access the fixed counter 773 */ 774 if (pmu->type->single_fixed && pmu->pmu_idx > 0) 775 return -EINVAL; 776 777 /* fixed counters have event field hardcoded to zero */ 778 hwc->config = 0ULL; 779 } else if (is_freerunning_event(event)) { 780 hwc->config = event->attr.config; 781 if (!check_valid_freerunning_event(box, event)) 782 return -EINVAL; 783 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING; 784 /* 785 * The free running counter event and free running counter 786 * are always 1:1 mapped. 787 * The free running counter is always active. 788 * Assign the free running counter here. 789 */ 790 event->hw.event_base = uncore_freerunning_counter(box, event); 791 } else { 792 hwc->config = event->attr.config & 793 (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); 794 if (pmu->type->ops->hw_config) { 795 ret = pmu->type->ops->hw_config(box, event); 796 if (ret) 797 return ret; 798 } 799 } 800 801 if (event->group_leader != event) 802 ret = uncore_validate_group(pmu, event); 803 else 804 ret = 0; 805 806 return ret; 807 } 808 809 static void uncore_pmu_enable(struct pmu *pmu) 810 { 811 struct intel_uncore_pmu *uncore_pmu; 812 struct intel_uncore_box *box; 813 814 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 815 816 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 817 if (!box) 818 return; 819 820 if (uncore_pmu->type->ops->enable_box) 821 uncore_pmu->type->ops->enable_box(box); 822 } 823 824 static void uncore_pmu_disable(struct pmu *pmu) 825 { 826 struct intel_uncore_pmu *uncore_pmu; 827 struct intel_uncore_box *box; 828 829 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 830 831 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 832 if (!box) 833 return; 834 835 if (uncore_pmu->type->ops->disable_box) 836 uncore_pmu->type->ops->disable_box(box); 837 } 838 839 static ssize_t uncore_get_attr_cpumask(struct device *dev, 840 struct device_attribute *attr, char *buf) 841 { 842 struct intel_uncore_pmu *pmu = container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu); 843 844 return cpumap_print_to_pagebuf(true, buf, &pmu->cpu_mask); 845 } 846 847 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); 848 849 static struct attribute *uncore_pmu_attrs[] = { 850 &dev_attr_cpumask.attr, 851 NULL, 852 }; 853 854 static const struct attribute_group uncore_pmu_attr_group = { 855 .attrs = uncore_pmu_attrs, 856 }; 857 858 static inline int uncore_get_box_id(struct intel_uncore_type *type, 859 struct intel_uncore_pmu *pmu) 860 { 861 if (type->boxes) 862 return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx); 863 864 return pmu->pmu_idx; 865 } 866 867 void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) 868 { 869 struct intel_uncore_type *type = pmu->type; 870 871 if (type->num_boxes == 1) 872 sprintf(pmu_name, "uncore_type_%u", type->type_id); 873 else { 874 sprintf(pmu_name, "uncore_type_%u_%d", 875 type->type_id, uncore_get_box_id(type, pmu)); 876 } 877 } 878 879 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) 880 { 881 struct intel_uncore_type *type = pmu->type; 882 883 /* 884 * No uncore block name in discovery table. 885 * Use uncore_type_&typeid_&boxid as name. 886 */ 887 if (!type->name) { 888 uncore_get_alias_name(pmu->name, pmu); 889 return; 890 } 891 892 if (type->num_boxes == 1) { 893 if (strlen(type->name) > 0) 894 sprintf(pmu->name, "uncore_%s", type->name); 895 else 896 sprintf(pmu->name, "uncore"); 897 } else { 898 /* 899 * Use the box ID from the discovery table if applicable. 900 */ 901 sprintf(pmu->name, "uncore_%s_%d", type->name, 902 uncore_get_box_id(type, pmu)); 903 } 904 } 905 906 static int uncore_pmu_register(struct intel_uncore_pmu *pmu) 907 { 908 int ret; 909 910 if (!pmu->type->pmu) { 911 pmu->pmu = (struct pmu) { 912 .attr_groups = pmu->type->attr_groups, 913 .task_ctx_nr = perf_invalid_context, 914 .pmu_enable = uncore_pmu_enable, 915 .pmu_disable = uncore_pmu_disable, 916 .event_init = uncore_pmu_event_init, 917 .add = uncore_pmu_event_add, 918 .del = uncore_pmu_event_del, 919 .start = uncore_pmu_event_start, 920 .stop = uncore_pmu_event_stop, 921 .read = uncore_pmu_event_read, 922 .module = THIS_MODULE, 923 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 924 .attr_update = pmu->type->attr_update, 925 }; 926 } else { 927 pmu->pmu = *pmu->type->pmu; 928 pmu->pmu.attr_groups = pmu->type->attr_groups; 929 pmu->pmu.attr_update = pmu->type->attr_update; 930 } 931 932 uncore_get_pmu_name(pmu); 933 934 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 935 if (!ret) 936 pmu->registered = true; 937 return ret; 938 } 939 940 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) 941 { 942 if (!pmu->registered) 943 return; 944 perf_pmu_unregister(&pmu->pmu); 945 pmu->registered = false; 946 } 947 948 static void uncore_free_boxes(struct intel_uncore_pmu *pmu) 949 { 950 int die; 951 952 for (die = 0; die < uncore_max_dies(); die++) 953 kfree(pmu->boxes[die]); 954 kfree(pmu->boxes); 955 } 956 957 static void uncore_type_exit(struct intel_uncore_type *type) 958 { 959 struct intel_uncore_pmu *pmu = type->pmus; 960 int i; 961 962 if (type->cleanup_mapping) 963 type->cleanup_mapping(type); 964 965 if (type->cleanup_extra_boxes) 966 type->cleanup_extra_boxes(type); 967 968 if (pmu) { 969 for (i = 0; i < type->num_boxes; i++, pmu++) { 970 uncore_pmu_unregister(pmu); 971 uncore_free_boxes(pmu); 972 } 973 kfree(type->pmus); 974 type->pmus = NULL; 975 } 976 977 kfree(type->events_group); 978 type->events_group = NULL; 979 } 980 981 static void uncore_types_exit(struct intel_uncore_type **types) 982 { 983 for (; *types; types++) 984 uncore_type_exit(*types); 985 } 986 987 static int __init uncore_type_init(struct intel_uncore_type *type) 988 { 989 struct intel_uncore_pmu *pmus; 990 size_t size; 991 int i, j; 992 993 pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL); 994 if (!pmus) 995 return -ENOMEM; 996 997 size = uncore_max_dies() * sizeof(struct intel_uncore_box *); 998 999 for (i = 0; i < type->num_boxes; i++) { 1000 pmus[i].pmu_idx = i; 1001 pmus[i].type = type; 1002 pmus[i].boxes = kzalloc(size, GFP_KERNEL); 1003 if (!pmus[i].boxes) 1004 goto err; 1005 } 1006 1007 type->pmus = pmus; 1008 type->unconstrainted = (struct event_constraint) 1009 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 1010 0, type->num_counters, 0, 0); 1011 1012 if (type->event_descs) { 1013 struct { 1014 struct attribute_group group; 1015 struct attribute *attrs[]; 1016 } *attr_group; 1017 for (i = 0; type->event_descs[i].attr.attr.name; i++); 1018 1019 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1), 1020 GFP_KERNEL); 1021 if (!attr_group) 1022 goto err; 1023 1024 attr_group->group.name = "events"; 1025 attr_group->group.attrs = attr_group->attrs; 1026 1027 for (j = 0; j < i; j++) 1028 attr_group->attrs[j] = &type->event_descs[j].attr.attr; 1029 1030 type->events_group = &attr_group->group; 1031 } 1032 1033 type->pmu_group = &uncore_pmu_attr_group; 1034 1035 if (type->set_mapping) 1036 type->set_mapping(type); 1037 1038 return 0; 1039 1040 err: 1041 for (i = 0; i < type->num_boxes; i++) 1042 kfree(pmus[i].boxes); 1043 kfree(pmus); 1044 1045 return -ENOMEM; 1046 } 1047 1048 static int __init 1049 uncore_types_init(struct intel_uncore_type **types) 1050 { 1051 int ret; 1052 1053 for (; *types; types++) { 1054 ret = uncore_type_init(*types); 1055 if (ret) 1056 return ret; 1057 } 1058 return 0; 1059 } 1060 1061 /* 1062 * Get the die information of a PCI device. 1063 * @pdev: The PCI device. 1064 * @die: The die id which the device maps to. 1065 */ 1066 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die) 1067 { 1068 *die = uncore_pcibus_to_dieid(pdev->bus); 1069 if (*die < 0) 1070 return -EINVAL; 1071 1072 return 0; 1073 } 1074 1075 static struct intel_uncore_pmu * 1076 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) 1077 { 1078 struct intel_uncore_type **types = uncore_pci_uncores; 1079 struct intel_uncore_discovery_unit *unit; 1080 struct intel_uncore_type *type; 1081 struct rb_node *node; 1082 1083 for (; *types; types++) { 1084 type = *types; 1085 1086 for (node = rb_first(type->boxes); node; node = rb_next(node)) { 1087 unit = rb_entry(node, struct intel_uncore_discovery_unit, node); 1088 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(unit->addr) && 1089 pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(unit->addr) && 1090 pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr)) 1091 return &type->pmus[unit->pmu_idx]; 1092 } 1093 } 1094 1095 return NULL; 1096 } 1097 1098 /* 1099 * Find the PMU of a PCI device. 1100 * @pdev: The PCI device. 1101 * @ids: The ID table of the available PCI devices with a PMU. 1102 * If NULL, search the whole uncore_pci_uncores. 1103 */ 1104 static struct intel_uncore_pmu * 1105 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) 1106 { 1107 struct intel_uncore_pmu *pmu = NULL; 1108 struct intel_uncore_type *type; 1109 kernel_ulong_t data; 1110 unsigned int devfn; 1111 1112 if (!ids) 1113 return uncore_pci_find_dev_pmu_from_types(pdev); 1114 1115 while (ids && ids->vendor) { 1116 if ((ids->vendor == pdev->vendor) && 1117 (ids->device == pdev->device)) { 1118 data = ids->driver_data; 1119 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data), 1120 UNCORE_PCI_DEV_FUNC(data)); 1121 if (devfn == pdev->devfn) { 1122 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)]; 1123 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)]; 1124 break; 1125 } 1126 } 1127 ids++; 1128 } 1129 return pmu; 1130 } 1131 1132 /* 1133 * Register the PMU for a PCI device 1134 * @pdev: The PCI device. 1135 * @type: The corresponding PMU type of the device. 1136 * @pmu: The corresponding PMU of the device. 1137 * @die: The die id which the device maps to. 1138 */ 1139 static int uncore_pci_pmu_register(struct pci_dev *pdev, 1140 struct intel_uncore_type *type, 1141 struct intel_uncore_pmu *pmu, 1142 int die) 1143 { 1144 struct intel_uncore_box *box; 1145 int ret; 1146 1147 if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) 1148 return -EINVAL; 1149 1150 box = uncore_alloc_box(type, NUMA_NO_NODE); 1151 if (!box) 1152 return -ENOMEM; 1153 1154 atomic_inc(&box->refcnt); 1155 box->dieid = die; 1156 box->pci_dev = pdev; 1157 box->pmu = pmu; 1158 uncore_box_init(box); 1159 1160 pmu->boxes[die] = box; 1161 if (atomic_inc_return(&pmu->activeboxes) > 1) 1162 return 0; 1163 1164 /* First active box registers the pmu */ 1165 ret = uncore_pmu_register(pmu); 1166 if (ret) { 1167 pmu->boxes[die] = NULL; 1168 uncore_box_exit(box); 1169 kfree(box); 1170 } 1171 return ret; 1172 } 1173 1174 /* 1175 * add a pci uncore device 1176 */ 1177 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 1178 { 1179 struct intel_uncore_type *type; 1180 struct intel_uncore_pmu *pmu = NULL; 1181 int die, ret; 1182 1183 ret = uncore_pci_get_dev_die_info(pdev, &die); 1184 if (ret) 1185 return ret; 1186 1187 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 1188 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 1189 1190 uncore_extra_pci_dev[die].dev[idx] = pdev; 1191 pci_set_drvdata(pdev, NULL); 1192 return 0; 1193 } 1194 1195 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 1196 1197 /* 1198 * Some platforms, e.g. Knights Landing, use a common PCI device ID 1199 * for multiple instances of an uncore PMU device type. We should check 1200 * PCI slot and func to indicate the uncore box. 1201 */ 1202 if (id->driver_data & ~0xffff) { 1203 struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver); 1204 1205 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); 1206 if (pmu == NULL) 1207 return -ENODEV; 1208 } else { 1209 /* 1210 * for performance monitoring unit with multiple boxes, 1211 * each box has a different function id. 1212 */ 1213 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 1214 } 1215 1216 ret = uncore_pci_pmu_register(pdev, type, pmu, die); 1217 1218 pci_set_drvdata(pdev, pmu->boxes[die]); 1219 1220 return ret; 1221 } 1222 1223 /* 1224 * Unregister the PMU of a PCI device 1225 * @pmu: The corresponding PMU is unregistered. 1226 * @die: The die id which the device maps to. 1227 */ 1228 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die) 1229 { 1230 struct intel_uncore_box *box = pmu->boxes[die]; 1231 1232 pmu->boxes[die] = NULL; 1233 if (atomic_dec_return(&pmu->activeboxes) == 0) 1234 uncore_pmu_unregister(pmu); 1235 uncore_box_exit(box); 1236 kfree(box); 1237 } 1238 1239 static void uncore_pci_remove(struct pci_dev *pdev) 1240 { 1241 struct intel_uncore_box *box; 1242 struct intel_uncore_pmu *pmu; 1243 int i, die; 1244 1245 if (uncore_pci_get_dev_die_info(pdev, &die)) 1246 return; 1247 1248 box = pci_get_drvdata(pdev); 1249 if (!box) { 1250 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 1251 if (uncore_extra_pci_dev[die].dev[i] == pdev) { 1252 uncore_extra_pci_dev[die].dev[i] = NULL; 1253 break; 1254 } 1255 } 1256 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); 1257 return; 1258 } 1259 1260 pmu = box->pmu; 1261 1262 pci_set_drvdata(pdev, NULL); 1263 1264 uncore_pci_pmu_unregister(pmu, die); 1265 } 1266 1267 static int uncore_bus_notify(struct notifier_block *nb, 1268 unsigned long action, void *data, 1269 const struct pci_device_id *ids) 1270 { 1271 struct device *dev = data; 1272 struct pci_dev *pdev = to_pci_dev(dev); 1273 struct intel_uncore_pmu *pmu; 1274 int die; 1275 1276 /* Unregister the PMU when the device is going to be deleted. */ 1277 if (action != BUS_NOTIFY_DEL_DEVICE) 1278 return NOTIFY_DONE; 1279 1280 pmu = uncore_pci_find_dev_pmu(pdev, ids); 1281 if (!pmu) 1282 return NOTIFY_DONE; 1283 1284 if (uncore_pci_get_dev_die_info(pdev, &die)) 1285 return NOTIFY_DONE; 1286 1287 uncore_pci_pmu_unregister(pmu, die); 1288 1289 return NOTIFY_OK; 1290 } 1291 1292 static int uncore_pci_sub_bus_notify(struct notifier_block *nb, 1293 unsigned long action, void *data) 1294 { 1295 return uncore_bus_notify(nb, action, data, 1296 uncore_pci_sub_driver->id_table); 1297 } 1298 1299 static struct notifier_block uncore_pci_sub_notifier = { 1300 .notifier_call = uncore_pci_sub_bus_notify, 1301 }; 1302 1303 static void uncore_pci_sub_driver_init(void) 1304 { 1305 const struct pci_device_id *ids = uncore_pci_sub_driver->id_table; 1306 struct intel_uncore_type *type; 1307 struct intel_uncore_pmu *pmu; 1308 struct pci_dev *pci_sub_dev; 1309 bool notify = false; 1310 unsigned int devfn; 1311 int die; 1312 1313 while (ids && ids->vendor) { 1314 pci_sub_dev = NULL; 1315 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)]; 1316 /* 1317 * Search the available device, and register the 1318 * corresponding PMU. 1319 */ 1320 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 1321 ids->device, pci_sub_dev))) { 1322 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), 1323 UNCORE_PCI_DEV_FUNC(ids->driver_data)); 1324 if (devfn != pci_sub_dev->devfn) 1325 continue; 1326 1327 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; 1328 if (!pmu) 1329 continue; 1330 1331 if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) 1332 continue; 1333 1334 if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, 1335 die)) 1336 notify = true; 1337 } 1338 ids++; 1339 } 1340 1341 if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier)) 1342 notify = false; 1343 1344 if (!notify) 1345 uncore_pci_sub_driver = NULL; 1346 } 1347 1348 static int uncore_pci_bus_notify(struct notifier_block *nb, 1349 unsigned long action, void *data) 1350 { 1351 return uncore_bus_notify(nb, action, data, NULL); 1352 } 1353 1354 static struct notifier_block uncore_pci_notifier = { 1355 .notifier_call = uncore_pci_bus_notify, 1356 }; 1357 1358 1359 static void uncore_pci_pmus_register(void) 1360 { 1361 struct intel_uncore_type **types = uncore_pci_uncores; 1362 struct intel_uncore_discovery_unit *unit; 1363 struct intel_uncore_type *type; 1364 struct intel_uncore_pmu *pmu; 1365 struct rb_node *node; 1366 struct pci_dev *pdev; 1367 1368 for (; *types; types++) { 1369 type = *types; 1370 1371 for (node = rb_first(type->boxes); node; node = rb_next(node)) { 1372 unit = rb_entry(node, struct intel_uncore_discovery_unit, node); 1373 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr), 1374 UNCORE_DISCOVERY_PCI_BUS(unit->addr), 1375 UNCORE_DISCOVERY_PCI_DEVFN(unit->addr)); 1376 1377 if (!pdev) 1378 continue; 1379 pmu = &type->pmus[unit->pmu_idx]; 1380 uncore_pci_pmu_register(pdev, type, pmu, unit->die); 1381 } 1382 } 1383 1384 bus_register_notifier(&pci_bus_type, &uncore_pci_notifier); 1385 } 1386 1387 static int __init uncore_pci_init(void) 1388 { 1389 size_t size; 1390 int ret; 1391 1392 size = uncore_max_dies() * sizeof(struct pci_extra_dev); 1393 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); 1394 if (!uncore_extra_pci_dev) { 1395 ret = -ENOMEM; 1396 goto err; 1397 } 1398 1399 ret = uncore_types_init(uncore_pci_uncores); 1400 if (ret) 1401 goto errtype; 1402 1403 if (uncore_pci_driver) { 1404 uncore_pci_driver->probe = uncore_pci_probe; 1405 uncore_pci_driver->remove = uncore_pci_remove; 1406 1407 ret = pci_register_driver(uncore_pci_driver); 1408 if (ret) 1409 goto errtype; 1410 } else 1411 uncore_pci_pmus_register(); 1412 1413 if (uncore_pci_sub_driver) 1414 uncore_pci_sub_driver_init(); 1415 1416 pcidrv_registered = true; 1417 return 0; 1418 1419 errtype: 1420 uncore_types_exit(uncore_pci_uncores); 1421 kfree(uncore_extra_pci_dev); 1422 uncore_extra_pci_dev = NULL; 1423 uncore_free_pcibus_map(); 1424 err: 1425 uncore_pci_uncores = empty_uncore; 1426 return ret; 1427 } 1428 1429 static void uncore_pci_exit(void) 1430 { 1431 if (pcidrv_registered) { 1432 pcidrv_registered = false; 1433 if (uncore_pci_sub_driver) 1434 bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); 1435 if (uncore_pci_driver) 1436 pci_unregister_driver(uncore_pci_driver); 1437 else 1438 bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier); 1439 uncore_types_exit(uncore_pci_uncores); 1440 kfree(uncore_extra_pci_dev); 1441 uncore_free_pcibus_map(); 1442 } 1443 } 1444 1445 static bool uncore_die_has_box(struct intel_uncore_type *type, 1446 int die, unsigned int pmu_idx) 1447 { 1448 if (!type->boxes) 1449 return true; 1450 1451 if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0) 1452 return false; 1453 1454 return true; 1455 } 1456 1457 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, 1458 int new_cpu) 1459 { 1460 struct intel_uncore_pmu *pmu = type->pmus; 1461 struct intel_uncore_box *box; 1462 int i, die; 1463 1464 die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); 1465 for (i = 0; i < type->num_boxes; i++, pmu++) { 1466 box = pmu->boxes[die]; 1467 if (!box) 1468 continue; 1469 1470 if (old_cpu < 0) { 1471 WARN_ON_ONCE(box->cpu != -1); 1472 if (uncore_die_has_box(type, die, pmu->pmu_idx)) { 1473 box->cpu = new_cpu; 1474 cpumask_set_cpu(new_cpu, &pmu->cpu_mask); 1475 } 1476 continue; 1477 } 1478 1479 WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu); 1480 box->cpu = -1; 1481 cpumask_clear_cpu(old_cpu, &pmu->cpu_mask); 1482 if (new_cpu < 0) 1483 continue; 1484 1485 if (!uncore_die_has_box(type, die, pmu->pmu_idx)) 1486 continue; 1487 uncore_pmu_cancel_hrtimer(box); 1488 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); 1489 box->cpu = new_cpu; 1490 cpumask_set_cpu(new_cpu, &pmu->cpu_mask); 1491 } 1492 } 1493 1494 static void uncore_change_context(struct intel_uncore_type **uncores, 1495 int old_cpu, int new_cpu) 1496 { 1497 for (; *uncores; uncores++) 1498 uncore_change_type_ctx(*uncores, old_cpu, new_cpu); 1499 } 1500 1501 static void uncore_box_unref(struct intel_uncore_type **types, int id) 1502 { 1503 struct intel_uncore_type *type; 1504 struct intel_uncore_pmu *pmu; 1505 struct intel_uncore_box *box; 1506 int i; 1507 1508 for (; *types; types++) { 1509 type = *types; 1510 pmu = type->pmus; 1511 for (i = 0; i < type->num_boxes; i++, pmu++) { 1512 box = pmu->boxes[id]; 1513 if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0) 1514 uncore_box_exit(box); 1515 } 1516 } 1517 } 1518 1519 static int uncore_event_cpu_offline(unsigned int cpu) 1520 { 1521 int die, target; 1522 1523 /* Check if exiting cpu is used for collecting uncore events */ 1524 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1525 goto unref; 1526 /* Find a new cpu to collect uncore events */ 1527 target = cpumask_any_but(topology_die_cpumask(cpu), cpu); 1528 1529 /* Migrate uncore events to the new target */ 1530 if (target < nr_cpu_ids) 1531 cpumask_set_cpu(target, &uncore_cpu_mask); 1532 else 1533 target = -1; 1534 1535 uncore_change_context(uncore_msr_uncores, cpu, target); 1536 uncore_change_context(uncore_mmio_uncores, cpu, target); 1537 uncore_change_context(uncore_pci_uncores, cpu, target); 1538 1539 unref: 1540 /* Clear the references */ 1541 die = topology_logical_die_id(cpu); 1542 uncore_box_unref(uncore_msr_uncores, die); 1543 uncore_box_unref(uncore_mmio_uncores, die); 1544 return 0; 1545 } 1546 1547 static int allocate_boxes(struct intel_uncore_type **types, 1548 unsigned int die, unsigned int cpu) 1549 { 1550 struct intel_uncore_box *box, *tmp; 1551 struct intel_uncore_type *type; 1552 struct intel_uncore_pmu *pmu; 1553 LIST_HEAD(allocated); 1554 int i; 1555 1556 /* Try to allocate all required boxes */ 1557 for (; *types; types++) { 1558 type = *types; 1559 pmu = type->pmus; 1560 for (i = 0; i < type->num_boxes; i++, pmu++) { 1561 if (pmu->boxes[die]) 1562 continue; 1563 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1564 if (!box) 1565 goto cleanup; 1566 box->pmu = pmu; 1567 box->dieid = die; 1568 list_add(&box->active_list, &allocated); 1569 } 1570 } 1571 /* Install them in the pmus */ 1572 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1573 list_del_init(&box->active_list); 1574 box->pmu->boxes[die] = box; 1575 } 1576 return 0; 1577 1578 cleanup: 1579 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1580 list_del_init(&box->active_list); 1581 kfree(box); 1582 } 1583 return -ENOMEM; 1584 } 1585 1586 static int uncore_box_ref(struct intel_uncore_type **types, 1587 int id, unsigned int cpu) 1588 { 1589 struct intel_uncore_type *type; 1590 struct intel_uncore_pmu *pmu; 1591 struct intel_uncore_box *box; 1592 int i, ret; 1593 1594 ret = allocate_boxes(types, id, cpu); 1595 if (ret) 1596 return ret; 1597 1598 for (; *types; types++) { 1599 type = *types; 1600 pmu = type->pmus; 1601 for (i = 0; i < type->num_boxes; i++, pmu++) { 1602 box = pmu->boxes[id]; 1603 if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1) 1604 uncore_box_init(box); 1605 } 1606 } 1607 return 0; 1608 } 1609 1610 static int uncore_event_cpu_online(unsigned int cpu) 1611 { 1612 int die, target, msr_ret, mmio_ret; 1613 1614 die = topology_logical_die_id(cpu); 1615 msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); 1616 mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); 1617 if (msr_ret && mmio_ret) 1618 return -ENOMEM; 1619 1620 /* 1621 * Check if there is an online cpu in the package 1622 * which collects uncore events already. 1623 */ 1624 target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); 1625 if (target < nr_cpu_ids) 1626 return 0; 1627 1628 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1629 1630 if (!msr_ret) 1631 uncore_change_context(uncore_msr_uncores, -1, cpu); 1632 if (!mmio_ret) 1633 uncore_change_context(uncore_mmio_uncores, -1, cpu); 1634 uncore_change_context(uncore_pci_uncores, -1, cpu); 1635 return 0; 1636 } 1637 1638 static int __init type_pmu_register(struct intel_uncore_type *type) 1639 { 1640 int i, ret; 1641 1642 for (i = 0; i < type->num_boxes; i++) { 1643 ret = uncore_pmu_register(&type->pmus[i]); 1644 if (ret) 1645 return ret; 1646 } 1647 return 0; 1648 } 1649 1650 static int __init uncore_msr_pmus_register(void) 1651 { 1652 struct intel_uncore_type **types = uncore_msr_uncores; 1653 int ret; 1654 1655 for (; *types; types++) { 1656 ret = type_pmu_register(*types); 1657 if (ret) 1658 return ret; 1659 } 1660 return 0; 1661 } 1662 1663 static int __init uncore_cpu_init(void) 1664 { 1665 int ret; 1666 1667 ret = uncore_types_init(uncore_msr_uncores); 1668 if (ret) 1669 goto err; 1670 1671 ret = uncore_msr_pmus_register(); 1672 if (ret) 1673 goto err; 1674 return 0; 1675 err: 1676 uncore_types_exit(uncore_msr_uncores); 1677 uncore_msr_uncores = empty_uncore; 1678 return ret; 1679 } 1680 1681 static int __init uncore_mmio_init(void) 1682 { 1683 struct intel_uncore_type **types = uncore_mmio_uncores; 1684 int ret; 1685 1686 ret = uncore_types_init(types); 1687 if (ret) 1688 goto err; 1689 1690 for (; *types; types++) { 1691 ret = type_pmu_register(*types); 1692 if (ret) 1693 goto err; 1694 } 1695 return 0; 1696 err: 1697 uncore_types_exit(uncore_mmio_uncores); 1698 uncore_mmio_uncores = empty_uncore; 1699 return ret; 1700 } 1701 1702 struct intel_uncore_init_fun { 1703 void (*cpu_init)(void); 1704 int (*pci_init)(void); 1705 void (*mmio_init)(void); 1706 /* Discovery table is required */ 1707 bool use_discovery; 1708 /* The units in the discovery table should be ignored. */ 1709 int *uncore_units_ignore; 1710 }; 1711 1712 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { 1713 .cpu_init = nhm_uncore_cpu_init, 1714 }; 1715 1716 static const struct intel_uncore_init_fun snb_uncore_init __initconst = { 1717 .cpu_init = snb_uncore_cpu_init, 1718 .pci_init = snb_uncore_pci_init, 1719 }; 1720 1721 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = { 1722 .cpu_init = snb_uncore_cpu_init, 1723 .pci_init = ivb_uncore_pci_init, 1724 }; 1725 1726 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = { 1727 .cpu_init = snb_uncore_cpu_init, 1728 .pci_init = hsw_uncore_pci_init, 1729 }; 1730 1731 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = { 1732 .cpu_init = snb_uncore_cpu_init, 1733 .pci_init = bdw_uncore_pci_init, 1734 }; 1735 1736 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = { 1737 .cpu_init = snbep_uncore_cpu_init, 1738 .pci_init = snbep_uncore_pci_init, 1739 }; 1740 1741 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = { 1742 .cpu_init = nhmex_uncore_cpu_init, 1743 }; 1744 1745 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = { 1746 .cpu_init = ivbep_uncore_cpu_init, 1747 .pci_init = ivbep_uncore_pci_init, 1748 }; 1749 1750 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = { 1751 .cpu_init = hswep_uncore_cpu_init, 1752 .pci_init = hswep_uncore_pci_init, 1753 }; 1754 1755 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = { 1756 .cpu_init = bdx_uncore_cpu_init, 1757 .pci_init = bdx_uncore_pci_init, 1758 }; 1759 1760 static const struct intel_uncore_init_fun knl_uncore_init __initconst = { 1761 .cpu_init = knl_uncore_cpu_init, 1762 .pci_init = knl_uncore_pci_init, 1763 }; 1764 1765 static const struct intel_uncore_init_fun skl_uncore_init __initconst = { 1766 .cpu_init = skl_uncore_cpu_init, 1767 .pci_init = skl_uncore_pci_init, 1768 }; 1769 1770 static const struct intel_uncore_init_fun skx_uncore_init __initconst = { 1771 .cpu_init = skx_uncore_cpu_init, 1772 .pci_init = skx_uncore_pci_init, 1773 }; 1774 1775 static const struct intel_uncore_init_fun icl_uncore_init __initconst = { 1776 .cpu_init = icl_uncore_cpu_init, 1777 .pci_init = skl_uncore_pci_init, 1778 }; 1779 1780 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { 1781 .cpu_init = tgl_uncore_cpu_init, 1782 .mmio_init = tgl_uncore_mmio_init, 1783 }; 1784 1785 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { 1786 .cpu_init = tgl_uncore_cpu_init, 1787 .mmio_init = tgl_l_uncore_mmio_init, 1788 }; 1789 1790 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { 1791 .cpu_init = tgl_uncore_cpu_init, 1792 .pci_init = skl_uncore_pci_init, 1793 }; 1794 1795 static const struct intel_uncore_init_fun adl_uncore_init __initconst = { 1796 .cpu_init = adl_uncore_cpu_init, 1797 .mmio_init = adl_uncore_mmio_init, 1798 }; 1799 1800 static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { 1801 .cpu_init = mtl_uncore_cpu_init, 1802 .mmio_init = adl_uncore_mmio_init, 1803 }; 1804 1805 static const struct intel_uncore_init_fun lnl_uncore_init __initconst = { 1806 .cpu_init = lnl_uncore_cpu_init, 1807 .mmio_init = lnl_uncore_mmio_init, 1808 }; 1809 1810 static const struct intel_uncore_init_fun icx_uncore_init __initconst = { 1811 .cpu_init = icx_uncore_cpu_init, 1812 .pci_init = icx_uncore_pci_init, 1813 .mmio_init = icx_uncore_mmio_init, 1814 }; 1815 1816 static const struct intel_uncore_init_fun snr_uncore_init __initconst = { 1817 .cpu_init = snr_uncore_cpu_init, 1818 .pci_init = snr_uncore_pci_init, 1819 .mmio_init = snr_uncore_mmio_init, 1820 }; 1821 1822 static const struct intel_uncore_init_fun spr_uncore_init __initconst = { 1823 .cpu_init = spr_uncore_cpu_init, 1824 .pci_init = spr_uncore_pci_init, 1825 .mmio_init = spr_uncore_mmio_init, 1826 .use_discovery = true, 1827 .uncore_units_ignore = spr_uncore_units_ignore, 1828 }; 1829 1830 static const struct intel_uncore_init_fun gnr_uncore_init __initconst = { 1831 .cpu_init = gnr_uncore_cpu_init, 1832 .pci_init = gnr_uncore_pci_init, 1833 .mmio_init = gnr_uncore_mmio_init, 1834 .use_discovery = true, 1835 .uncore_units_ignore = gnr_uncore_units_ignore, 1836 }; 1837 1838 static const struct intel_uncore_init_fun generic_uncore_init __initconst = { 1839 .cpu_init = intel_uncore_generic_uncore_cpu_init, 1840 .pci_init = intel_uncore_generic_uncore_pci_init, 1841 .mmio_init = intel_uncore_generic_uncore_mmio_init, 1842 }; 1843 1844 static const struct x86_cpu_id intel_uncore_match[] __initconst = { 1845 X86_MATCH_VFM(INTEL_NEHALEM_EP, &nhm_uncore_init), 1846 X86_MATCH_VFM(INTEL_NEHALEM, &nhm_uncore_init), 1847 X86_MATCH_VFM(INTEL_WESTMERE, &nhm_uncore_init), 1848 X86_MATCH_VFM(INTEL_WESTMERE_EP, &nhm_uncore_init), 1849 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &snb_uncore_init), 1850 X86_MATCH_VFM(INTEL_IVYBRIDGE, &ivb_uncore_init), 1851 X86_MATCH_VFM(INTEL_HASWELL, &hsw_uncore_init), 1852 X86_MATCH_VFM(INTEL_HASWELL_L, &hsw_uncore_init), 1853 X86_MATCH_VFM(INTEL_HASWELL_G, &hsw_uncore_init), 1854 X86_MATCH_VFM(INTEL_BROADWELL, &bdw_uncore_init), 1855 X86_MATCH_VFM(INTEL_BROADWELL_G, &bdw_uncore_init), 1856 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &snbep_uncore_init), 1857 X86_MATCH_VFM(INTEL_NEHALEM_EX, &nhmex_uncore_init), 1858 X86_MATCH_VFM(INTEL_WESTMERE_EX, &nhmex_uncore_init), 1859 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ivbep_uncore_init), 1860 X86_MATCH_VFM(INTEL_HASWELL_X, &hswep_uncore_init), 1861 X86_MATCH_VFM(INTEL_BROADWELL_X, &bdx_uncore_init), 1862 X86_MATCH_VFM(INTEL_BROADWELL_D, &bdx_uncore_init), 1863 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &knl_uncore_init), 1864 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &knl_uncore_init), 1865 X86_MATCH_VFM(INTEL_SKYLAKE, &skl_uncore_init), 1866 X86_MATCH_VFM(INTEL_SKYLAKE_L, &skl_uncore_init), 1867 X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_uncore_init), 1868 X86_MATCH_VFM(INTEL_KABYLAKE_L, &skl_uncore_init), 1869 X86_MATCH_VFM(INTEL_KABYLAKE, &skl_uncore_init), 1870 X86_MATCH_VFM(INTEL_COMETLAKE_L, &skl_uncore_init), 1871 X86_MATCH_VFM(INTEL_COMETLAKE, &skl_uncore_init), 1872 X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_uncore_init), 1873 X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &icl_uncore_init), 1874 X86_MATCH_VFM(INTEL_ICELAKE, &icl_uncore_init), 1875 X86_MATCH_VFM(INTEL_ICELAKE_D, &icx_uncore_init), 1876 X86_MATCH_VFM(INTEL_ICELAKE_X, &icx_uncore_init), 1877 X86_MATCH_VFM(INTEL_TIGERLAKE_L, &tgl_l_uncore_init), 1878 X86_MATCH_VFM(INTEL_TIGERLAKE, &tgl_uncore_init), 1879 X86_MATCH_VFM(INTEL_ROCKETLAKE, &rkl_uncore_init), 1880 X86_MATCH_VFM(INTEL_ALDERLAKE, &adl_uncore_init), 1881 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &adl_uncore_init), 1882 X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_uncore_init), 1883 X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &adl_uncore_init), 1884 X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init), 1885 X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init), 1886 X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init), 1887 X86_MATCH_VFM(INTEL_ARROWLAKE, &mtl_uncore_init), 1888 X86_MATCH_VFM(INTEL_ARROWLAKE_U, &mtl_uncore_init), 1889 X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), 1890 X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), 1891 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), 1892 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), 1893 X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), 1894 X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_uncore_init), 1895 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &snr_uncore_init), 1896 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), 1897 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), 1898 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), 1899 X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_uncore_init), 1900 {}, 1901 }; 1902 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); 1903 1904 static int __init intel_uncore_init(void) 1905 { 1906 const struct x86_cpu_id *id; 1907 struct intel_uncore_init_fun *uncore_init; 1908 int pret = 0, cret = 0, mret = 0, ret; 1909 1910 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1911 return -ENODEV; 1912 1913 __uncore_max_dies = 1914 topology_max_packages() * topology_max_dies_per_package(); 1915 1916 id = x86_match_cpu(intel_uncore_match); 1917 if (!id) { 1918 if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL)) 1919 uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; 1920 else 1921 return -ENODEV; 1922 } else { 1923 uncore_init = (struct intel_uncore_init_fun *)id->driver_data; 1924 if (uncore_no_discover && uncore_init->use_discovery) 1925 return -ENODEV; 1926 if (uncore_init->use_discovery && 1927 !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore)) 1928 return -ENODEV; 1929 } 1930 1931 if (uncore_init->pci_init) { 1932 pret = uncore_init->pci_init(); 1933 if (!pret) 1934 pret = uncore_pci_init(); 1935 } 1936 1937 if (uncore_init->cpu_init) { 1938 uncore_init->cpu_init(); 1939 cret = uncore_cpu_init(); 1940 } 1941 1942 if (uncore_init->mmio_init) { 1943 uncore_init->mmio_init(); 1944 mret = uncore_mmio_init(); 1945 } 1946 1947 if (cret && pret && mret) { 1948 ret = -ENODEV; 1949 goto free_discovery; 1950 } 1951 1952 /* Install hotplug callbacks to setup the targets for each package */ 1953 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, 1954 "perf/x86/intel/uncore:online", 1955 uncore_event_cpu_online, 1956 uncore_event_cpu_offline); 1957 if (ret) 1958 goto err; 1959 return 0; 1960 1961 err: 1962 uncore_types_exit(uncore_msr_uncores); 1963 uncore_types_exit(uncore_mmio_uncores); 1964 uncore_pci_exit(); 1965 free_discovery: 1966 intel_uncore_clear_discovery_tables(); 1967 return ret; 1968 } 1969 module_init(intel_uncore_init); 1970 1971 static void __exit intel_uncore_exit(void) 1972 { 1973 cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); 1974 uncore_types_exit(uncore_msr_uncores); 1975 uncore_types_exit(uncore_mmio_uncores); 1976 uncore_pci_exit(); 1977 intel_uncore_clear_discovery_tables(); 1978 } 1979 module_exit(intel_uncore_exit); 1980