1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/module.h> 3 4 #include <asm/cpu_device_id.h> 5 #include <asm/intel-family.h> 6 #include "uncore.h" 7 #include "uncore_discovery.h" 8 9 static bool uncore_no_discover; 10 module_param(uncore_no_discover, bool, 0); 11 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism " 12 "(default: enable the discovery mechanism)."); 13 struct intel_uncore_type *empty_uncore[] = { NULL, }; 14 struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 15 struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 16 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; 17 18 static bool pcidrv_registered; 19 struct pci_driver *uncore_pci_driver; 20 /* The PCI driver for the device which the uncore doesn't own. */ 21 struct pci_driver *uncore_pci_sub_driver; 22 /* pci bus to socket mapping */ 23 DEFINE_RAW_SPINLOCK(pci2phy_map_lock); 24 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); 25 struct pci_extra_dev *uncore_extra_pci_dev; 26 int __uncore_max_dies; 27 28 /* mask of cpus that collect uncore events */ 29 static cpumask_t uncore_cpu_mask; 30 31 /* constraint for the fixed counter */ 32 static struct event_constraint uncore_constraint_fixed = 33 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); 34 struct event_constraint uncore_constraint_empty = 35 EVENT_CONSTRAINT(0, 0, 0); 36 37 MODULE_DESCRIPTION("Support for Intel uncore performance events"); 38 MODULE_LICENSE("GPL"); 39 40 int uncore_pcibus_to_dieid(struct pci_bus *bus) 41 { 42 struct pci2phy_map *map; 43 int die_id = -1; 44 45 raw_spin_lock(&pci2phy_map_lock); 46 list_for_each_entry(map, &pci2phy_map_head, list) { 47 if (map->segment == pci_domain_nr(bus)) { 48 die_id = map->pbus_to_dieid[bus->number]; 49 break; 50 } 51 } 52 raw_spin_unlock(&pci2phy_map_lock); 53 54 return die_id; 55 } 56 57 int uncore_die_to_segment(int die) 58 { 59 struct pci_bus *bus = NULL; 60 61 /* Find first pci bus which attributes to specified die. */ 62 while ((bus = pci_find_next_bus(bus)) && 63 (die != uncore_pcibus_to_dieid(bus))) 64 ; 65 66 return bus ? pci_domain_nr(bus) : -EINVAL; 67 } 68 69 int uncore_device_to_die(struct pci_dev *dev) 70 { 71 int node = pcibus_to_node(dev->bus); 72 int cpu; 73 74 for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) { 75 struct cpuinfo_x86 *c = &cpu_data(cpu); 76 77 if (c->initialized && cpu_to_node(cpu) == node) 78 return c->topo.logical_die_id; 79 } 80 81 return -1; 82 } 83 84 static void uncore_free_pcibus_map(void) 85 { 86 struct pci2phy_map *map, *tmp; 87 88 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { 89 list_del(&map->list); 90 kfree(map); 91 } 92 } 93 94 struct pci2phy_map *__find_pci2phy_map(int segment) 95 { 96 struct pci2phy_map *map, *alloc = NULL; 97 int i; 98 99 lockdep_assert_held(&pci2phy_map_lock); 100 101 lookup: 102 list_for_each_entry(map, &pci2phy_map_head, list) { 103 if (map->segment == segment) 104 goto end; 105 } 106 107 if (!alloc) { 108 raw_spin_unlock(&pci2phy_map_lock); 109 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); 110 raw_spin_lock(&pci2phy_map_lock); 111 112 if (!alloc) 113 return NULL; 114 115 goto lookup; 116 } 117 118 map = alloc; 119 alloc = NULL; 120 map->segment = segment; 121 for (i = 0; i < 256; i++) 122 map->pbus_to_dieid[i] = -1; 123 list_add_tail(&map->list, &pci2phy_map_head); 124 125 end: 126 kfree(alloc); 127 return map; 128 } 129 130 ssize_t uncore_event_show(struct device *dev, 131 struct device_attribute *attr, char *buf) 132 { 133 struct uncore_event_desc *event = 134 container_of(attr, struct uncore_event_desc, attr); 135 return sprintf(buf, "%s", event->config); 136 } 137 138 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 139 { 140 unsigned int dieid = topology_logical_die_id(cpu); 141 142 /* 143 * The unsigned check also catches the '-1' return value for non 144 * existent mappings in the topology map. 145 */ 146 return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL; 147 } 148 149 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 150 { 151 u64 count; 152 153 rdmsrl(event->hw.event_base, count); 154 155 return count; 156 } 157 158 void uncore_mmio_exit_box(struct intel_uncore_box *box) 159 { 160 if (box->io_addr) 161 iounmap(box->io_addr); 162 } 163 164 u64 uncore_mmio_read_counter(struct intel_uncore_box *box, 165 struct perf_event *event) 166 { 167 if (!box->io_addr) 168 return 0; 169 170 if (!uncore_mmio_is_valid_offset(box, event->hw.event_base)) 171 return 0; 172 173 return readq(box->io_addr + event->hw.event_base); 174 } 175 176 /* 177 * generic get constraint function for shared match/mask registers. 178 */ 179 struct event_constraint * 180 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) 181 { 182 struct intel_uncore_extra_reg *er; 183 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 184 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; 185 unsigned long flags; 186 bool ok = false; 187 188 /* 189 * reg->alloc can be set due to existing state, so for fake box we 190 * need to ignore this, otherwise we might fail to allocate proper 191 * fake state for this extra reg constraint. 192 */ 193 if (reg1->idx == EXTRA_REG_NONE || 194 (!uncore_box_is_fake(box) && reg1->alloc)) 195 return NULL; 196 197 er = &box->shared_regs[reg1->idx]; 198 raw_spin_lock_irqsave(&er->lock, flags); 199 if (!atomic_read(&er->ref) || 200 (er->config1 == reg1->config && er->config2 == reg2->config)) { 201 atomic_inc(&er->ref); 202 er->config1 = reg1->config; 203 er->config2 = reg2->config; 204 ok = true; 205 } 206 raw_spin_unlock_irqrestore(&er->lock, flags); 207 208 if (ok) { 209 if (!uncore_box_is_fake(box)) 210 reg1->alloc = 1; 211 return NULL; 212 } 213 214 return &uncore_constraint_empty; 215 } 216 217 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) 218 { 219 struct intel_uncore_extra_reg *er; 220 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 221 222 /* 223 * Only put constraint if extra reg was actually allocated. Also 224 * takes care of event which do not use an extra shared reg. 225 * 226 * Also, if this is a fake box we shouldn't touch any event state 227 * (reg->alloc) and we don't care about leaving inconsistent box 228 * state either since it will be thrown out. 229 */ 230 if (uncore_box_is_fake(box) || !reg1->alloc) 231 return; 232 233 er = &box->shared_regs[reg1->idx]; 234 atomic_dec(&er->ref); 235 reg1->alloc = 0; 236 } 237 238 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) 239 { 240 struct intel_uncore_extra_reg *er; 241 unsigned long flags; 242 u64 config; 243 244 er = &box->shared_regs[idx]; 245 246 raw_spin_lock_irqsave(&er->lock, flags); 247 config = er->config; 248 raw_spin_unlock_irqrestore(&er->lock, flags); 249 250 return config; 251 } 252 253 static void uncore_assign_hw_event(struct intel_uncore_box *box, 254 struct perf_event *event, int idx) 255 { 256 struct hw_perf_event *hwc = &event->hw; 257 258 hwc->idx = idx; 259 hwc->last_tag = ++box->tags[idx]; 260 261 if (uncore_pmc_fixed(hwc->idx)) { 262 hwc->event_base = uncore_fixed_ctr(box); 263 hwc->config_base = uncore_fixed_ctl(box); 264 return; 265 } 266 267 if (intel_generic_uncore_assign_hw_event(event, box)) 268 return; 269 270 hwc->config_base = uncore_event_ctl(box, hwc->idx); 271 hwc->event_base = uncore_perf_ctr(box, hwc->idx); 272 } 273 274 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) 275 { 276 u64 prev_count, new_count, delta; 277 int shift; 278 279 if (uncore_pmc_freerunning(event->hw.idx)) 280 shift = 64 - uncore_freerunning_bits(box, event); 281 else if (uncore_pmc_fixed(event->hw.idx)) 282 shift = 64 - uncore_fixed_ctr_bits(box); 283 else 284 shift = 64 - uncore_perf_ctr_bits(box); 285 286 /* the hrtimer might modify the previous event value */ 287 again: 288 prev_count = local64_read(&event->hw.prev_count); 289 new_count = uncore_read_counter(box, event); 290 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) 291 goto again; 292 293 delta = (new_count << shift) - (prev_count << shift); 294 delta >>= shift; 295 296 local64_add(delta, &event->count); 297 } 298 299 /* 300 * The overflow interrupt is unavailable for SandyBridge-EP, is broken 301 * for SandyBridge. So we use hrtimer to periodically poll the counter 302 * to avoid overflow. 303 */ 304 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 305 { 306 struct intel_uncore_box *box; 307 struct perf_event *event; 308 unsigned long flags; 309 int bit; 310 311 box = container_of(hrtimer, struct intel_uncore_box, hrtimer); 312 if (!box->n_active || box->cpu != smp_processor_id()) 313 return HRTIMER_NORESTART; 314 /* 315 * disable local interrupt to prevent uncore_pmu_event_start/stop 316 * to interrupt the update process 317 */ 318 local_irq_save(flags); 319 320 /* 321 * handle boxes with an active event list as opposed to active 322 * counters 323 */ 324 list_for_each_entry(event, &box->active_list, active_entry) { 325 uncore_perf_event_update(box, event); 326 } 327 328 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 329 uncore_perf_event_update(box, box->events[bit]); 330 331 local_irq_restore(flags); 332 333 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); 334 return HRTIMER_RESTART; 335 } 336 337 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 338 { 339 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 340 HRTIMER_MODE_REL_PINNED); 341 } 342 343 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 344 { 345 hrtimer_cancel(&box->hrtimer); 346 } 347 348 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) 349 { 350 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 351 box->hrtimer.function = uncore_pmu_hrtimer; 352 } 353 354 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, 355 int node) 356 { 357 int i, size, numshared = type->num_shared_regs ; 358 struct intel_uncore_box *box; 359 360 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); 361 362 box = kzalloc_node(size, GFP_KERNEL, node); 363 if (!box) 364 return NULL; 365 366 for (i = 0; i < numshared; i++) 367 raw_spin_lock_init(&box->shared_regs[i].lock); 368 369 uncore_pmu_init_hrtimer(box); 370 box->cpu = -1; 371 box->dieid = -1; 372 373 /* set default hrtimer timeout */ 374 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 375 376 INIT_LIST_HEAD(&box->active_list); 377 378 return box; 379 } 380 381 /* 382 * Using uncore_pmu_event_init pmu event_init callback 383 * as a detection point for uncore events. 384 */ 385 static int uncore_pmu_event_init(struct perf_event *event); 386 387 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) 388 { 389 return &box->pmu->pmu == event->pmu; 390 } 391 392 static int 393 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, 394 bool dogrp) 395 { 396 struct perf_event *event; 397 int n, max_count; 398 399 max_count = box->pmu->type->num_counters; 400 if (box->pmu->type->fixed_ctl) 401 max_count++; 402 403 if (box->n_events >= max_count) 404 return -EINVAL; 405 406 n = box->n_events; 407 408 if (is_box_event(box, leader)) { 409 box->event_list[n] = leader; 410 n++; 411 } 412 413 if (!dogrp) 414 return n; 415 416 for_each_sibling_event(event, leader) { 417 if (!is_box_event(box, event) || 418 event->state <= PERF_EVENT_STATE_OFF) 419 continue; 420 421 if (n >= max_count) 422 return -EINVAL; 423 424 box->event_list[n] = event; 425 n++; 426 } 427 return n; 428 } 429 430 static struct event_constraint * 431 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 432 { 433 struct intel_uncore_type *type = box->pmu->type; 434 struct event_constraint *c; 435 436 if (type->ops->get_constraint) { 437 c = type->ops->get_constraint(box, event); 438 if (c) 439 return c; 440 } 441 442 if (event->attr.config == UNCORE_FIXED_EVENT) 443 return &uncore_constraint_fixed; 444 445 if (type->constraints) { 446 for_each_event_constraint(c, type->constraints) { 447 if ((event->hw.config & c->cmask) == c->code) 448 return c; 449 } 450 } 451 452 return &type->unconstrainted; 453 } 454 455 static void uncore_put_event_constraint(struct intel_uncore_box *box, 456 struct perf_event *event) 457 { 458 if (box->pmu->type->ops->put_constraint) 459 box->pmu->type->ops->put_constraint(box, event); 460 } 461 462 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 463 { 464 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 465 struct event_constraint *c; 466 int i, wmin, wmax, ret = 0; 467 struct hw_perf_event *hwc; 468 469 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 470 471 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 472 c = uncore_get_event_constraint(box, box->event_list[i]); 473 box->event_constraint[i] = c; 474 wmin = min(wmin, c->weight); 475 wmax = max(wmax, c->weight); 476 } 477 478 /* fastpath, try to reuse previous register */ 479 for (i = 0; i < n; i++) { 480 hwc = &box->event_list[i]->hw; 481 c = box->event_constraint[i]; 482 483 /* never assigned */ 484 if (hwc->idx == -1) 485 break; 486 487 /* constraint still honored */ 488 if (!test_bit(hwc->idx, c->idxmsk)) 489 break; 490 491 /* not already used */ 492 if (test_bit(hwc->idx, used_mask)) 493 break; 494 495 __set_bit(hwc->idx, used_mask); 496 if (assign) 497 assign[i] = hwc->idx; 498 } 499 /* slow path */ 500 if (i != n) 501 ret = perf_assign_events(box->event_constraint, n, 502 wmin, wmax, n, assign); 503 504 if (!assign || ret) { 505 for (i = 0; i < n; i++) 506 uncore_put_event_constraint(box, box->event_list[i]); 507 } 508 return ret ? -EINVAL : 0; 509 } 510 511 void uncore_pmu_event_start(struct perf_event *event, int flags) 512 { 513 struct intel_uncore_box *box = uncore_event_to_box(event); 514 int idx = event->hw.idx; 515 516 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) 517 return; 518 519 /* 520 * Free running counter is read-only and always active. 521 * Use the current counter value as start point. 522 * There is no overflow interrupt for free running counter. 523 * Use hrtimer to periodically poll the counter to avoid overflow. 524 */ 525 if (uncore_pmc_freerunning(event->hw.idx)) { 526 list_add_tail(&event->active_entry, &box->active_list); 527 local64_set(&event->hw.prev_count, 528 uncore_read_counter(box, event)); 529 if (box->n_active++ == 0) 530 uncore_pmu_start_hrtimer(box); 531 return; 532 } 533 534 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 535 return; 536 537 event->hw.state = 0; 538 box->events[idx] = event; 539 box->n_active++; 540 __set_bit(idx, box->active_mask); 541 542 local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); 543 uncore_enable_event(box, event); 544 545 if (box->n_active == 1) 546 uncore_pmu_start_hrtimer(box); 547 } 548 549 void uncore_pmu_event_stop(struct perf_event *event, int flags) 550 { 551 struct intel_uncore_box *box = uncore_event_to_box(event); 552 struct hw_perf_event *hwc = &event->hw; 553 554 /* Cannot disable free running counter which is read-only */ 555 if (uncore_pmc_freerunning(hwc->idx)) { 556 list_del(&event->active_entry); 557 if (--box->n_active == 0) 558 uncore_pmu_cancel_hrtimer(box); 559 uncore_perf_event_update(box, event); 560 return; 561 } 562 563 if (__test_and_clear_bit(hwc->idx, box->active_mask)) { 564 uncore_disable_event(box, event); 565 box->n_active--; 566 box->events[hwc->idx] = NULL; 567 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 568 hwc->state |= PERF_HES_STOPPED; 569 570 if (box->n_active == 0) 571 uncore_pmu_cancel_hrtimer(box); 572 } 573 574 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 575 /* 576 * Drain the remaining delta count out of a event 577 * that we are disabling: 578 */ 579 uncore_perf_event_update(box, event); 580 hwc->state |= PERF_HES_UPTODATE; 581 } 582 } 583 584 int uncore_pmu_event_add(struct perf_event *event, int flags) 585 { 586 struct intel_uncore_box *box = uncore_event_to_box(event); 587 struct hw_perf_event *hwc = &event->hw; 588 int assign[UNCORE_PMC_IDX_MAX]; 589 int i, n, ret; 590 591 if (!box) 592 return -ENODEV; 593 594 /* 595 * The free funning counter is assigned in event_init(). 596 * The free running counter event and free running counter 597 * are 1:1 mapped. It doesn't need to be tracked in event_list. 598 */ 599 if (uncore_pmc_freerunning(hwc->idx)) { 600 if (flags & PERF_EF_START) 601 uncore_pmu_event_start(event, 0); 602 return 0; 603 } 604 605 ret = n = uncore_collect_events(box, event, false); 606 if (ret < 0) 607 return ret; 608 609 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 610 if (!(flags & PERF_EF_START)) 611 hwc->state |= PERF_HES_ARCH; 612 613 ret = uncore_assign_events(box, assign, n); 614 if (ret) 615 return ret; 616 617 /* save events moving to new counters */ 618 for (i = 0; i < box->n_events; i++) { 619 event = box->event_list[i]; 620 hwc = &event->hw; 621 622 if (hwc->idx == assign[i] && 623 hwc->last_tag == box->tags[assign[i]]) 624 continue; 625 /* 626 * Ensure we don't accidentally enable a stopped 627 * counter simply because we rescheduled. 628 */ 629 if (hwc->state & PERF_HES_STOPPED) 630 hwc->state |= PERF_HES_ARCH; 631 632 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 633 } 634 635 /* reprogram moved events into new counters */ 636 for (i = 0; i < n; i++) { 637 event = box->event_list[i]; 638 hwc = &event->hw; 639 640 if (hwc->idx != assign[i] || 641 hwc->last_tag != box->tags[assign[i]]) 642 uncore_assign_hw_event(box, event, assign[i]); 643 else if (i < box->n_events) 644 continue; 645 646 if (hwc->state & PERF_HES_ARCH) 647 continue; 648 649 uncore_pmu_event_start(event, 0); 650 } 651 box->n_events = n; 652 653 return 0; 654 } 655 656 void uncore_pmu_event_del(struct perf_event *event, int flags) 657 { 658 struct intel_uncore_box *box = uncore_event_to_box(event); 659 int i; 660 661 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 662 663 /* 664 * The event for free running counter is not tracked by event_list. 665 * It doesn't need to force event->hw.idx = -1 to reassign the counter. 666 * Because the event and the free running counter are 1:1 mapped. 667 */ 668 if (uncore_pmc_freerunning(event->hw.idx)) 669 return; 670 671 for (i = 0; i < box->n_events; i++) { 672 if (event == box->event_list[i]) { 673 uncore_put_event_constraint(box, event); 674 675 for (++i; i < box->n_events; i++) 676 box->event_list[i - 1] = box->event_list[i]; 677 678 --box->n_events; 679 break; 680 } 681 } 682 683 event->hw.idx = -1; 684 event->hw.last_tag = ~0ULL; 685 } 686 687 void uncore_pmu_event_read(struct perf_event *event) 688 { 689 struct intel_uncore_box *box = uncore_event_to_box(event); 690 uncore_perf_event_update(box, event); 691 } 692 693 /* 694 * validation ensures the group can be loaded onto the 695 * PMU if it was the only group available. 696 */ 697 static int uncore_validate_group(struct intel_uncore_pmu *pmu, 698 struct perf_event *event) 699 { 700 struct perf_event *leader = event->group_leader; 701 struct intel_uncore_box *fake_box; 702 int ret = -EINVAL, n; 703 704 /* The free running counter is always active. */ 705 if (uncore_pmc_freerunning(event->hw.idx)) 706 return 0; 707 708 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); 709 if (!fake_box) 710 return -ENOMEM; 711 712 fake_box->pmu = pmu; 713 /* 714 * the event is not yet connected with its 715 * siblings therefore we must first collect 716 * existing siblings, then add the new event 717 * before we can simulate the scheduling 718 */ 719 n = uncore_collect_events(fake_box, leader, true); 720 if (n < 0) 721 goto out; 722 723 fake_box->n_events = n; 724 n = uncore_collect_events(fake_box, event, false); 725 if (n < 0) 726 goto out; 727 728 fake_box->n_events = n; 729 730 ret = uncore_assign_events(fake_box, NULL, n); 731 out: 732 kfree(fake_box); 733 return ret; 734 } 735 736 static int uncore_pmu_event_init(struct perf_event *event) 737 { 738 struct intel_uncore_pmu *pmu; 739 struct intel_uncore_box *box; 740 struct hw_perf_event *hwc = &event->hw; 741 int ret; 742 743 if (event->attr.type != event->pmu->type) 744 return -ENOENT; 745 746 pmu = uncore_event_to_pmu(event); 747 /* no device found for this pmu */ 748 if (pmu->func_id < 0) 749 return -ENOENT; 750 751 /* Sampling not supported yet */ 752 if (hwc->sample_period) 753 return -EINVAL; 754 755 /* 756 * Place all uncore events for a particular physical package 757 * onto a single cpu 758 */ 759 if (event->cpu < 0) 760 return -EINVAL; 761 box = uncore_pmu_to_box(pmu, event->cpu); 762 if (!box || box->cpu < 0) 763 return -EINVAL; 764 event->cpu = box->cpu; 765 event->pmu_private = box; 766 767 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; 768 769 event->hw.idx = -1; 770 event->hw.last_tag = ~0ULL; 771 event->hw.extra_reg.idx = EXTRA_REG_NONE; 772 event->hw.branch_reg.idx = EXTRA_REG_NONE; 773 774 if (event->attr.config == UNCORE_FIXED_EVENT) { 775 /* no fixed counter */ 776 if (!pmu->type->fixed_ctl) 777 return -EINVAL; 778 /* 779 * if there is only one fixed counter, only the first pmu 780 * can access the fixed counter 781 */ 782 if (pmu->type->single_fixed && pmu->pmu_idx > 0) 783 return -EINVAL; 784 785 /* fixed counters have event field hardcoded to zero */ 786 hwc->config = 0ULL; 787 } else if (is_freerunning_event(event)) { 788 hwc->config = event->attr.config; 789 if (!check_valid_freerunning_event(box, event)) 790 return -EINVAL; 791 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING; 792 /* 793 * The free running counter event and free running counter 794 * are always 1:1 mapped. 795 * The free running counter is always active. 796 * Assign the free running counter here. 797 */ 798 event->hw.event_base = uncore_freerunning_counter(box, event); 799 } else { 800 hwc->config = event->attr.config & 801 (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); 802 if (pmu->type->ops->hw_config) { 803 ret = pmu->type->ops->hw_config(box, event); 804 if (ret) 805 return ret; 806 } 807 } 808 809 if (event->group_leader != event) 810 ret = uncore_validate_group(pmu, event); 811 else 812 ret = 0; 813 814 return ret; 815 } 816 817 static void uncore_pmu_enable(struct pmu *pmu) 818 { 819 struct intel_uncore_pmu *uncore_pmu; 820 struct intel_uncore_box *box; 821 822 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 823 824 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 825 if (!box) 826 return; 827 828 if (uncore_pmu->type->ops->enable_box) 829 uncore_pmu->type->ops->enable_box(box); 830 } 831 832 static void uncore_pmu_disable(struct pmu *pmu) 833 { 834 struct intel_uncore_pmu *uncore_pmu; 835 struct intel_uncore_box *box; 836 837 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 838 839 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 840 if (!box) 841 return; 842 843 if (uncore_pmu->type->ops->disable_box) 844 uncore_pmu->type->ops->disable_box(box); 845 } 846 847 static ssize_t uncore_get_attr_cpumask(struct device *dev, 848 struct device_attribute *attr, char *buf) 849 { 850 struct intel_uncore_pmu *pmu = container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu); 851 852 return cpumap_print_to_pagebuf(true, buf, &pmu->cpu_mask); 853 } 854 855 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); 856 857 static struct attribute *uncore_pmu_attrs[] = { 858 &dev_attr_cpumask.attr, 859 NULL, 860 }; 861 862 static const struct attribute_group uncore_pmu_attr_group = { 863 .attrs = uncore_pmu_attrs, 864 }; 865 866 static inline int uncore_get_box_id(struct intel_uncore_type *type, 867 struct intel_uncore_pmu *pmu) 868 { 869 if (type->boxes) 870 return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx); 871 872 return pmu->pmu_idx; 873 } 874 875 void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) 876 { 877 struct intel_uncore_type *type = pmu->type; 878 879 if (type->num_boxes == 1) 880 sprintf(pmu_name, "uncore_type_%u", type->type_id); 881 else { 882 sprintf(pmu_name, "uncore_type_%u_%d", 883 type->type_id, uncore_get_box_id(type, pmu)); 884 } 885 } 886 887 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) 888 { 889 struct intel_uncore_type *type = pmu->type; 890 891 /* 892 * No uncore block name in discovery table. 893 * Use uncore_type_&typeid_&boxid as name. 894 */ 895 if (!type->name) { 896 uncore_get_alias_name(pmu->name, pmu); 897 return; 898 } 899 900 if (type->num_boxes == 1) { 901 if (strlen(type->name) > 0) 902 sprintf(pmu->name, "uncore_%s", type->name); 903 else 904 sprintf(pmu->name, "uncore"); 905 } else { 906 /* 907 * Use the box ID from the discovery table if applicable. 908 */ 909 sprintf(pmu->name, "uncore_%s_%d", type->name, 910 uncore_get_box_id(type, pmu)); 911 } 912 } 913 914 static int uncore_pmu_register(struct intel_uncore_pmu *pmu) 915 { 916 int ret; 917 918 if (!pmu->type->pmu) { 919 pmu->pmu = (struct pmu) { 920 .attr_groups = pmu->type->attr_groups, 921 .task_ctx_nr = perf_invalid_context, 922 .pmu_enable = uncore_pmu_enable, 923 .pmu_disable = uncore_pmu_disable, 924 .event_init = uncore_pmu_event_init, 925 .add = uncore_pmu_event_add, 926 .del = uncore_pmu_event_del, 927 .start = uncore_pmu_event_start, 928 .stop = uncore_pmu_event_stop, 929 .read = uncore_pmu_event_read, 930 .module = THIS_MODULE, 931 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 932 .attr_update = pmu->type->attr_update, 933 }; 934 } else { 935 pmu->pmu = *pmu->type->pmu; 936 pmu->pmu.attr_groups = pmu->type->attr_groups; 937 pmu->pmu.attr_update = pmu->type->attr_update; 938 } 939 940 uncore_get_pmu_name(pmu); 941 942 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 943 if (!ret) 944 pmu->registered = true; 945 return ret; 946 } 947 948 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) 949 { 950 if (!pmu->registered) 951 return; 952 perf_pmu_unregister(&pmu->pmu); 953 pmu->registered = false; 954 } 955 956 static void uncore_free_boxes(struct intel_uncore_pmu *pmu) 957 { 958 int die; 959 960 for (die = 0; die < uncore_max_dies(); die++) 961 kfree(pmu->boxes[die]); 962 kfree(pmu->boxes); 963 } 964 965 static void uncore_type_exit(struct intel_uncore_type *type) 966 { 967 struct intel_uncore_pmu *pmu = type->pmus; 968 int i; 969 970 if (type->cleanup_mapping) 971 type->cleanup_mapping(type); 972 973 if (type->cleanup_extra_boxes) 974 type->cleanup_extra_boxes(type); 975 976 if (pmu) { 977 for (i = 0; i < type->num_boxes; i++, pmu++) { 978 uncore_pmu_unregister(pmu); 979 uncore_free_boxes(pmu); 980 } 981 kfree(type->pmus); 982 type->pmus = NULL; 983 } 984 985 kfree(type->events_group); 986 type->events_group = NULL; 987 } 988 989 static void uncore_types_exit(struct intel_uncore_type **types) 990 { 991 for (; *types; types++) 992 uncore_type_exit(*types); 993 } 994 995 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) 996 { 997 struct intel_uncore_pmu *pmus; 998 size_t size; 999 int i, j; 1000 1001 pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL); 1002 if (!pmus) 1003 return -ENOMEM; 1004 1005 size = uncore_max_dies() * sizeof(struct intel_uncore_box *); 1006 1007 for (i = 0; i < type->num_boxes; i++) { 1008 pmus[i].func_id = setid ? i : -1; 1009 pmus[i].pmu_idx = i; 1010 pmus[i].type = type; 1011 pmus[i].boxes = kzalloc(size, GFP_KERNEL); 1012 if (!pmus[i].boxes) 1013 goto err; 1014 } 1015 1016 type->pmus = pmus; 1017 type->unconstrainted = (struct event_constraint) 1018 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 1019 0, type->num_counters, 0, 0); 1020 1021 if (type->event_descs) { 1022 struct { 1023 struct attribute_group group; 1024 struct attribute *attrs[]; 1025 } *attr_group; 1026 for (i = 0; type->event_descs[i].attr.attr.name; i++); 1027 1028 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1), 1029 GFP_KERNEL); 1030 if (!attr_group) 1031 goto err; 1032 1033 attr_group->group.name = "events"; 1034 attr_group->group.attrs = attr_group->attrs; 1035 1036 for (j = 0; j < i; j++) 1037 attr_group->attrs[j] = &type->event_descs[j].attr.attr; 1038 1039 type->events_group = &attr_group->group; 1040 } 1041 1042 type->pmu_group = &uncore_pmu_attr_group; 1043 1044 if (type->set_mapping) 1045 type->set_mapping(type); 1046 1047 return 0; 1048 1049 err: 1050 for (i = 0; i < type->num_boxes; i++) 1051 kfree(pmus[i].boxes); 1052 kfree(pmus); 1053 1054 return -ENOMEM; 1055 } 1056 1057 static int __init 1058 uncore_types_init(struct intel_uncore_type **types, bool setid) 1059 { 1060 int ret; 1061 1062 for (; *types; types++) { 1063 ret = uncore_type_init(*types, setid); 1064 if (ret) 1065 return ret; 1066 } 1067 return 0; 1068 } 1069 1070 /* 1071 * Get the die information of a PCI device. 1072 * @pdev: The PCI device. 1073 * @die: The die id which the device maps to. 1074 */ 1075 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die) 1076 { 1077 *die = uncore_pcibus_to_dieid(pdev->bus); 1078 if (*die < 0) 1079 return -EINVAL; 1080 1081 return 0; 1082 } 1083 1084 static struct intel_uncore_pmu * 1085 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) 1086 { 1087 struct intel_uncore_type **types = uncore_pci_uncores; 1088 struct intel_uncore_discovery_unit *unit; 1089 struct intel_uncore_type *type; 1090 struct rb_node *node; 1091 1092 for (; *types; types++) { 1093 type = *types; 1094 1095 for (node = rb_first(type->boxes); node; node = rb_next(node)) { 1096 unit = rb_entry(node, struct intel_uncore_discovery_unit, node); 1097 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(unit->addr) && 1098 pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(unit->addr) && 1099 pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr)) 1100 return &type->pmus[unit->pmu_idx]; 1101 } 1102 } 1103 1104 return NULL; 1105 } 1106 1107 /* 1108 * Find the PMU of a PCI device. 1109 * @pdev: The PCI device. 1110 * @ids: The ID table of the available PCI devices with a PMU. 1111 * If NULL, search the whole uncore_pci_uncores. 1112 */ 1113 static struct intel_uncore_pmu * 1114 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) 1115 { 1116 struct intel_uncore_pmu *pmu = NULL; 1117 struct intel_uncore_type *type; 1118 kernel_ulong_t data; 1119 unsigned int devfn; 1120 1121 if (!ids) 1122 return uncore_pci_find_dev_pmu_from_types(pdev); 1123 1124 while (ids && ids->vendor) { 1125 if ((ids->vendor == pdev->vendor) && 1126 (ids->device == pdev->device)) { 1127 data = ids->driver_data; 1128 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data), 1129 UNCORE_PCI_DEV_FUNC(data)); 1130 if (devfn == pdev->devfn) { 1131 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)]; 1132 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)]; 1133 break; 1134 } 1135 } 1136 ids++; 1137 } 1138 return pmu; 1139 } 1140 1141 /* 1142 * Register the PMU for a PCI device 1143 * @pdev: The PCI device. 1144 * @type: The corresponding PMU type of the device. 1145 * @pmu: The corresponding PMU of the device. 1146 * @die: The die id which the device maps to. 1147 */ 1148 static int uncore_pci_pmu_register(struct pci_dev *pdev, 1149 struct intel_uncore_type *type, 1150 struct intel_uncore_pmu *pmu, 1151 int die) 1152 { 1153 struct intel_uncore_box *box; 1154 int ret; 1155 1156 if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) 1157 return -EINVAL; 1158 1159 box = uncore_alloc_box(type, NUMA_NO_NODE); 1160 if (!box) 1161 return -ENOMEM; 1162 1163 if (pmu->func_id < 0) 1164 pmu->func_id = pdev->devfn; 1165 else 1166 WARN_ON_ONCE(pmu->func_id != pdev->devfn); 1167 1168 atomic_inc(&box->refcnt); 1169 box->dieid = die; 1170 box->pci_dev = pdev; 1171 box->pmu = pmu; 1172 uncore_box_init(box); 1173 1174 pmu->boxes[die] = box; 1175 if (atomic_inc_return(&pmu->activeboxes) > 1) 1176 return 0; 1177 1178 /* First active box registers the pmu */ 1179 ret = uncore_pmu_register(pmu); 1180 if (ret) { 1181 pmu->boxes[die] = NULL; 1182 uncore_box_exit(box); 1183 kfree(box); 1184 } 1185 return ret; 1186 } 1187 1188 /* 1189 * add a pci uncore device 1190 */ 1191 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 1192 { 1193 struct intel_uncore_type *type; 1194 struct intel_uncore_pmu *pmu = NULL; 1195 int die, ret; 1196 1197 ret = uncore_pci_get_dev_die_info(pdev, &die); 1198 if (ret) 1199 return ret; 1200 1201 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 1202 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 1203 1204 uncore_extra_pci_dev[die].dev[idx] = pdev; 1205 pci_set_drvdata(pdev, NULL); 1206 return 0; 1207 } 1208 1209 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 1210 1211 /* 1212 * Some platforms, e.g. Knights Landing, use a common PCI device ID 1213 * for multiple instances of an uncore PMU device type. We should check 1214 * PCI slot and func to indicate the uncore box. 1215 */ 1216 if (id->driver_data & ~0xffff) { 1217 struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver); 1218 1219 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); 1220 if (pmu == NULL) 1221 return -ENODEV; 1222 } else { 1223 /* 1224 * for performance monitoring unit with multiple boxes, 1225 * each box has a different function id. 1226 */ 1227 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 1228 } 1229 1230 ret = uncore_pci_pmu_register(pdev, type, pmu, die); 1231 1232 pci_set_drvdata(pdev, pmu->boxes[die]); 1233 1234 return ret; 1235 } 1236 1237 /* 1238 * Unregister the PMU of a PCI device 1239 * @pmu: The corresponding PMU is unregistered. 1240 * @die: The die id which the device maps to. 1241 */ 1242 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die) 1243 { 1244 struct intel_uncore_box *box = pmu->boxes[die]; 1245 1246 pmu->boxes[die] = NULL; 1247 if (atomic_dec_return(&pmu->activeboxes) == 0) 1248 uncore_pmu_unregister(pmu); 1249 uncore_box_exit(box); 1250 kfree(box); 1251 } 1252 1253 static void uncore_pci_remove(struct pci_dev *pdev) 1254 { 1255 struct intel_uncore_box *box; 1256 struct intel_uncore_pmu *pmu; 1257 int i, die; 1258 1259 if (uncore_pci_get_dev_die_info(pdev, &die)) 1260 return; 1261 1262 box = pci_get_drvdata(pdev); 1263 if (!box) { 1264 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 1265 if (uncore_extra_pci_dev[die].dev[i] == pdev) { 1266 uncore_extra_pci_dev[die].dev[i] = NULL; 1267 break; 1268 } 1269 } 1270 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); 1271 return; 1272 } 1273 1274 pmu = box->pmu; 1275 1276 pci_set_drvdata(pdev, NULL); 1277 1278 uncore_pci_pmu_unregister(pmu, die); 1279 } 1280 1281 static int uncore_bus_notify(struct notifier_block *nb, 1282 unsigned long action, void *data, 1283 const struct pci_device_id *ids) 1284 { 1285 struct device *dev = data; 1286 struct pci_dev *pdev = to_pci_dev(dev); 1287 struct intel_uncore_pmu *pmu; 1288 int die; 1289 1290 /* Unregister the PMU when the device is going to be deleted. */ 1291 if (action != BUS_NOTIFY_DEL_DEVICE) 1292 return NOTIFY_DONE; 1293 1294 pmu = uncore_pci_find_dev_pmu(pdev, ids); 1295 if (!pmu) 1296 return NOTIFY_DONE; 1297 1298 if (uncore_pci_get_dev_die_info(pdev, &die)) 1299 return NOTIFY_DONE; 1300 1301 uncore_pci_pmu_unregister(pmu, die); 1302 1303 return NOTIFY_OK; 1304 } 1305 1306 static int uncore_pci_sub_bus_notify(struct notifier_block *nb, 1307 unsigned long action, void *data) 1308 { 1309 return uncore_bus_notify(nb, action, data, 1310 uncore_pci_sub_driver->id_table); 1311 } 1312 1313 static struct notifier_block uncore_pci_sub_notifier = { 1314 .notifier_call = uncore_pci_sub_bus_notify, 1315 }; 1316 1317 static void uncore_pci_sub_driver_init(void) 1318 { 1319 const struct pci_device_id *ids = uncore_pci_sub_driver->id_table; 1320 struct intel_uncore_type *type; 1321 struct intel_uncore_pmu *pmu; 1322 struct pci_dev *pci_sub_dev; 1323 bool notify = false; 1324 unsigned int devfn; 1325 int die; 1326 1327 while (ids && ids->vendor) { 1328 pci_sub_dev = NULL; 1329 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)]; 1330 /* 1331 * Search the available device, and register the 1332 * corresponding PMU. 1333 */ 1334 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 1335 ids->device, pci_sub_dev))) { 1336 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), 1337 UNCORE_PCI_DEV_FUNC(ids->driver_data)); 1338 if (devfn != pci_sub_dev->devfn) 1339 continue; 1340 1341 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; 1342 if (!pmu) 1343 continue; 1344 1345 if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) 1346 continue; 1347 1348 if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, 1349 die)) 1350 notify = true; 1351 } 1352 ids++; 1353 } 1354 1355 if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier)) 1356 notify = false; 1357 1358 if (!notify) 1359 uncore_pci_sub_driver = NULL; 1360 } 1361 1362 static int uncore_pci_bus_notify(struct notifier_block *nb, 1363 unsigned long action, void *data) 1364 { 1365 return uncore_bus_notify(nb, action, data, NULL); 1366 } 1367 1368 static struct notifier_block uncore_pci_notifier = { 1369 .notifier_call = uncore_pci_bus_notify, 1370 }; 1371 1372 1373 static void uncore_pci_pmus_register(void) 1374 { 1375 struct intel_uncore_type **types = uncore_pci_uncores; 1376 struct intel_uncore_discovery_unit *unit; 1377 struct intel_uncore_type *type; 1378 struct intel_uncore_pmu *pmu; 1379 struct rb_node *node; 1380 struct pci_dev *pdev; 1381 1382 for (; *types; types++) { 1383 type = *types; 1384 1385 for (node = rb_first(type->boxes); node; node = rb_next(node)) { 1386 unit = rb_entry(node, struct intel_uncore_discovery_unit, node); 1387 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr), 1388 UNCORE_DISCOVERY_PCI_BUS(unit->addr), 1389 UNCORE_DISCOVERY_PCI_DEVFN(unit->addr)); 1390 1391 if (!pdev) 1392 continue; 1393 pmu = &type->pmus[unit->pmu_idx]; 1394 uncore_pci_pmu_register(pdev, type, pmu, unit->die); 1395 } 1396 } 1397 1398 bus_register_notifier(&pci_bus_type, &uncore_pci_notifier); 1399 } 1400 1401 static int __init uncore_pci_init(void) 1402 { 1403 size_t size; 1404 int ret; 1405 1406 size = uncore_max_dies() * sizeof(struct pci_extra_dev); 1407 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); 1408 if (!uncore_extra_pci_dev) { 1409 ret = -ENOMEM; 1410 goto err; 1411 } 1412 1413 ret = uncore_types_init(uncore_pci_uncores, false); 1414 if (ret) 1415 goto errtype; 1416 1417 if (uncore_pci_driver) { 1418 uncore_pci_driver->probe = uncore_pci_probe; 1419 uncore_pci_driver->remove = uncore_pci_remove; 1420 1421 ret = pci_register_driver(uncore_pci_driver); 1422 if (ret) 1423 goto errtype; 1424 } else 1425 uncore_pci_pmus_register(); 1426 1427 if (uncore_pci_sub_driver) 1428 uncore_pci_sub_driver_init(); 1429 1430 pcidrv_registered = true; 1431 return 0; 1432 1433 errtype: 1434 uncore_types_exit(uncore_pci_uncores); 1435 kfree(uncore_extra_pci_dev); 1436 uncore_extra_pci_dev = NULL; 1437 uncore_free_pcibus_map(); 1438 err: 1439 uncore_pci_uncores = empty_uncore; 1440 return ret; 1441 } 1442 1443 static void uncore_pci_exit(void) 1444 { 1445 if (pcidrv_registered) { 1446 pcidrv_registered = false; 1447 if (uncore_pci_sub_driver) 1448 bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); 1449 if (uncore_pci_driver) 1450 pci_unregister_driver(uncore_pci_driver); 1451 else 1452 bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier); 1453 uncore_types_exit(uncore_pci_uncores); 1454 kfree(uncore_extra_pci_dev); 1455 uncore_free_pcibus_map(); 1456 } 1457 } 1458 1459 static bool uncore_die_has_box(struct intel_uncore_type *type, 1460 int die, unsigned int pmu_idx) 1461 { 1462 if (!type->boxes) 1463 return true; 1464 1465 if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0) 1466 return false; 1467 1468 return true; 1469 } 1470 1471 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, 1472 int new_cpu) 1473 { 1474 struct intel_uncore_pmu *pmu = type->pmus; 1475 struct intel_uncore_box *box; 1476 int i, die; 1477 1478 die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); 1479 for (i = 0; i < type->num_boxes; i++, pmu++) { 1480 box = pmu->boxes[die]; 1481 if (!box) 1482 continue; 1483 1484 if (old_cpu < 0) { 1485 WARN_ON_ONCE(box->cpu != -1); 1486 if (uncore_die_has_box(type, die, pmu->pmu_idx)) { 1487 box->cpu = new_cpu; 1488 cpumask_set_cpu(new_cpu, &pmu->cpu_mask); 1489 } 1490 continue; 1491 } 1492 1493 WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu); 1494 box->cpu = -1; 1495 cpumask_clear_cpu(old_cpu, &pmu->cpu_mask); 1496 if (new_cpu < 0) 1497 continue; 1498 1499 if (!uncore_die_has_box(type, die, pmu->pmu_idx)) 1500 continue; 1501 uncore_pmu_cancel_hrtimer(box); 1502 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); 1503 box->cpu = new_cpu; 1504 cpumask_set_cpu(new_cpu, &pmu->cpu_mask); 1505 } 1506 } 1507 1508 static void uncore_change_context(struct intel_uncore_type **uncores, 1509 int old_cpu, int new_cpu) 1510 { 1511 for (; *uncores; uncores++) 1512 uncore_change_type_ctx(*uncores, old_cpu, new_cpu); 1513 } 1514 1515 static void uncore_box_unref(struct intel_uncore_type **types, int id) 1516 { 1517 struct intel_uncore_type *type; 1518 struct intel_uncore_pmu *pmu; 1519 struct intel_uncore_box *box; 1520 int i; 1521 1522 for (; *types; types++) { 1523 type = *types; 1524 pmu = type->pmus; 1525 for (i = 0; i < type->num_boxes; i++, pmu++) { 1526 box = pmu->boxes[id]; 1527 if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0) 1528 uncore_box_exit(box); 1529 } 1530 } 1531 } 1532 1533 static int uncore_event_cpu_offline(unsigned int cpu) 1534 { 1535 int die, target; 1536 1537 /* Check if exiting cpu is used for collecting uncore events */ 1538 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1539 goto unref; 1540 /* Find a new cpu to collect uncore events */ 1541 target = cpumask_any_but(topology_die_cpumask(cpu), cpu); 1542 1543 /* Migrate uncore events to the new target */ 1544 if (target < nr_cpu_ids) 1545 cpumask_set_cpu(target, &uncore_cpu_mask); 1546 else 1547 target = -1; 1548 1549 uncore_change_context(uncore_msr_uncores, cpu, target); 1550 uncore_change_context(uncore_mmio_uncores, cpu, target); 1551 uncore_change_context(uncore_pci_uncores, cpu, target); 1552 1553 unref: 1554 /* Clear the references */ 1555 die = topology_logical_die_id(cpu); 1556 uncore_box_unref(uncore_msr_uncores, die); 1557 uncore_box_unref(uncore_mmio_uncores, die); 1558 return 0; 1559 } 1560 1561 static int allocate_boxes(struct intel_uncore_type **types, 1562 unsigned int die, unsigned int cpu) 1563 { 1564 struct intel_uncore_box *box, *tmp; 1565 struct intel_uncore_type *type; 1566 struct intel_uncore_pmu *pmu; 1567 LIST_HEAD(allocated); 1568 int i; 1569 1570 /* Try to allocate all required boxes */ 1571 for (; *types; types++) { 1572 type = *types; 1573 pmu = type->pmus; 1574 for (i = 0; i < type->num_boxes; i++, pmu++) { 1575 if (pmu->boxes[die]) 1576 continue; 1577 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1578 if (!box) 1579 goto cleanup; 1580 box->pmu = pmu; 1581 box->dieid = die; 1582 list_add(&box->active_list, &allocated); 1583 } 1584 } 1585 /* Install them in the pmus */ 1586 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1587 list_del_init(&box->active_list); 1588 box->pmu->boxes[die] = box; 1589 } 1590 return 0; 1591 1592 cleanup: 1593 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1594 list_del_init(&box->active_list); 1595 kfree(box); 1596 } 1597 return -ENOMEM; 1598 } 1599 1600 static int uncore_box_ref(struct intel_uncore_type **types, 1601 int id, unsigned int cpu) 1602 { 1603 struct intel_uncore_type *type; 1604 struct intel_uncore_pmu *pmu; 1605 struct intel_uncore_box *box; 1606 int i, ret; 1607 1608 ret = allocate_boxes(types, id, cpu); 1609 if (ret) 1610 return ret; 1611 1612 for (; *types; types++) { 1613 type = *types; 1614 pmu = type->pmus; 1615 for (i = 0; i < type->num_boxes; i++, pmu++) { 1616 box = pmu->boxes[id]; 1617 if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1) 1618 uncore_box_init(box); 1619 } 1620 } 1621 return 0; 1622 } 1623 1624 static int uncore_event_cpu_online(unsigned int cpu) 1625 { 1626 int die, target, msr_ret, mmio_ret; 1627 1628 die = topology_logical_die_id(cpu); 1629 msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); 1630 mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); 1631 if (msr_ret && mmio_ret) 1632 return -ENOMEM; 1633 1634 /* 1635 * Check if there is an online cpu in the package 1636 * which collects uncore events already. 1637 */ 1638 target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); 1639 if (target < nr_cpu_ids) 1640 return 0; 1641 1642 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1643 1644 if (!msr_ret) 1645 uncore_change_context(uncore_msr_uncores, -1, cpu); 1646 if (!mmio_ret) 1647 uncore_change_context(uncore_mmio_uncores, -1, cpu); 1648 uncore_change_context(uncore_pci_uncores, -1, cpu); 1649 return 0; 1650 } 1651 1652 static int __init type_pmu_register(struct intel_uncore_type *type) 1653 { 1654 int i, ret; 1655 1656 for (i = 0; i < type->num_boxes; i++) { 1657 ret = uncore_pmu_register(&type->pmus[i]); 1658 if (ret) 1659 return ret; 1660 } 1661 return 0; 1662 } 1663 1664 static int __init uncore_msr_pmus_register(void) 1665 { 1666 struct intel_uncore_type **types = uncore_msr_uncores; 1667 int ret; 1668 1669 for (; *types; types++) { 1670 ret = type_pmu_register(*types); 1671 if (ret) 1672 return ret; 1673 } 1674 return 0; 1675 } 1676 1677 static int __init uncore_cpu_init(void) 1678 { 1679 int ret; 1680 1681 ret = uncore_types_init(uncore_msr_uncores, true); 1682 if (ret) 1683 goto err; 1684 1685 ret = uncore_msr_pmus_register(); 1686 if (ret) 1687 goto err; 1688 return 0; 1689 err: 1690 uncore_types_exit(uncore_msr_uncores); 1691 uncore_msr_uncores = empty_uncore; 1692 return ret; 1693 } 1694 1695 static int __init uncore_mmio_init(void) 1696 { 1697 struct intel_uncore_type **types = uncore_mmio_uncores; 1698 int ret; 1699 1700 ret = uncore_types_init(types, true); 1701 if (ret) 1702 goto err; 1703 1704 for (; *types; types++) { 1705 ret = type_pmu_register(*types); 1706 if (ret) 1707 goto err; 1708 } 1709 return 0; 1710 err: 1711 uncore_types_exit(uncore_mmio_uncores); 1712 uncore_mmio_uncores = empty_uncore; 1713 return ret; 1714 } 1715 1716 struct intel_uncore_init_fun { 1717 void (*cpu_init)(void); 1718 int (*pci_init)(void); 1719 void (*mmio_init)(void); 1720 /* Discovery table is required */ 1721 bool use_discovery; 1722 /* The units in the discovery table should be ignored. */ 1723 int *uncore_units_ignore; 1724 }; 1725 1726 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { 1727 .cpu_init = nhm_uncore_cpu_init, 1728 }; 1729 1730 static const struct intel_uncore_init_fun snb_uncore_init __initconst = { 1731 .cpu_init = snb_uncore_cpu_init, 1732 .pci_init = snb_uncore_pci_init, 1733 }; 1734 1735 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = { 1736 .cpu_init = snb_uncore_cpu_init, 1737 .pci_init = ivb_uncore_pci_init, 1738 }; 1739 1740 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = { 1741 .cpu_init = snb_uncore_cpu_init, 1742 .pci_init = hsw_uncore_pci_init, 1743 }; 1744 1745 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = { 1746 .cpu_init = snb_uncore_cpu_init, 1747 .pci_init = bdw_uncore_pci_init, 1748 }; 1749 1750 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = { 1751 .cpu_init = snbep_uncore_cpu_init, 1752 .pci_init = snbep_uncore_pci_init, 1753 }; 1754 1755 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = { 1756 .cpu_init = nhmex_uncore_cpu_init, 1757 }; 1758 1759 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = { 1760 .cpu_init = ivbep_uncore_cpu_init, 1761 .pci_init = ivbep_uncore_pci_init, 1762 }; 1763 1764 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = { 1765 .cpu_init = hswep_uncore_cpu_init, 1766 .pci_init = hswep_uncore_pci_init, 1767 }; 1768 1769 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = { 1770 .cpu_init = bdx_uncore_cpu_init, 1771 .pci_init = bdx_uncore_pci_init, 1772 }; 1773 1774 static const struct intel_uncore_init_fun knl_uncore_init __initconst = { 1775 .cpu_init = knl_uncore_cpu_init, 1776 .pci_init = knl_uncore_pci_init, 1777 }; 1778 1779 static const struct intel_uncore_init_fun skl_uncore_init __initconst = { 1780 .cpu_init = skl_uncore_cpu_init, 1781 .pci_init = skl_uncore_pci_init, 1782 }; 1783 1784 static const struct intel_uncore_init_fun skx_uncore_init __initconst = { 1785 .cpu_init = skx_uncore_cpu_init, 1786 .pci_init = skx_uncore_pci_init, 1787 }; 1788 1789 static const struct intel_uncore_init_fun icl_uncore_init __initconst = { 1790 .cpu_init = icl_uncore_cpu_init, 1791 .pci_init = skl_uncore_pci_init, 1792 }; 1793 1794 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { 1795 .cpu_init = tgl_uncore_cpu_init, 1796 .mmio_init = tgl_uncore_mmio_init, 1797 }; 1798 1799 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { 1800 .cpu_init = tgl_uncore_cpu_init, 1801 .mmio_init = tgl_l_uncore_mmio_init, 1802 }; 1803 1804 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { 1805 .cpu_init = tgl_uncore_cpu_init, 1806 .pci_init = skl_uncore_pci_init, 1807 }; 1808 1809 static const struct intel_uncore_init_fun adl_uncore_init __initconst = { 1810 .cpu_init = adl_uncore_cpu_init, 1811 .mmio_init = adl_uncore_mmio_init, 1812 }; 1813 1814 static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { 1815 .cpu_init = mtl_uncore_cpu_init, 1816 .mmio_init = adl_uncore_mmio_init, 1817 }; 1818 1819 static const struct intel_uncore_init_fun lnl_uncore_init __initconst = { 1820 .cpu_init = lnl_uncore_cpu_init, 1821 .mmio_init = lnl_uncore_mmio_init, 1822 }; 1823 1824 static const struct intel_uncore_init_fun icx_uncore_init __initconst = { 1825 .cpu_init = icx_uncore_cpu_init, 1826 .pci_init = icx_uncore_pci_init, 1827 .mmio_init = icx_uncore_mmio_init, 1828 }; 1829 1830 static const struct intel_uncore_init_fun snr_uncore_init __initconst = { 1831 .cpu_init = snr_uncore_cpu_init, 1832 .pci_init = snr_uncore_pci_init, 1833 .mmio_init = snr_uncore_mmio_init, 1834 }; 1835 1836 static const struct intel_uncore_init_fun spr_uncore_init __initconst = { 1837 .cpu_init = spr_uncore_cpu_init, 1838 .pci_init = spr_uncore_pci_init, 1839 .mmio_init = spr_uncore_mmio_init, 1840 .use_discovery = true, 1841 .uncore_units_ignore = spr_uncore_units_ignore, 1842 }; 1843 1844 static const struct intel_uncore_init_fun gnr_uncore_init __initconst = { 1845 .cpu_init = gnr_uncore_cpu_init, 1846 .pci_init = gnr_uncore_pci_init, 1847 .mmio_init = gnr_uncore_mmio_init, 1848 .use_discovery = true, 1849 .uncore_units_ignore = gnr_uncore_units_ignore, 1850 }; 1851 1852 static const struct intel_uncore_init_fun generic_uncore_init __initconst = { 1853 .cpu_init = intel_uncore_generic_uncore_cpu_init, 1854 .pci_init = intel_uncore_generic_uncore_pci_init, 1855 .mmio_init = intel_uncore_generic_uncore_mmio_init, 1856 }; 1857 1858 static const struct x86_cpu_id intel_uncore_match[] __initconst = { 1859 X86_MATCH_VFM(INTEL_NEHALEM_EP, &nhm_uncore_init), 1860 X86_MATCH_VFM(INTEL_NEHALEM, &nhm_uncore_init), 1861 X86_MATCH_VFM(INTEL_WESTMERE, &nhm_uncore_init), 1862 X86_MATCH_VFM(INTEL_WESTMERE_EP, &nhm_uncore_init), 1863 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &snb_uncore_init), 1864 X86_MATCH_VFM(INTEL_IVYBRIDGE, &ivb_uncore_init), 1865 X86_MATCH_VFM(INTEL_HASWELL, &hsw_uncore_init), 1866 X86_MATCH_VFM(INTEL_HASWELL_L, &hsw_uncore_init), 1867 X86_MATCH_VFM(INTEL_HASWELL_G, &hsw_uncore_init), 1868 X86_MATCH_VFM(INTEL_BROADWELL, &bdw_uncore_init), 1869 X86_MATCH_VFM(INTEL_BROADWELL_G, &bdw_uncore_init), 1870 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &snbep_uncore_init), 1871 X86_MATCH_VFM(INTEL_NEHALEM_EX, &nhmex_uncore_init), 1872 X86_MATCH_VFM(INTEL_WESTMERE_EX, &nhmex_uncore_init), 1873 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ivbep_uncore_init), 1874 X86_MATCH_VFM(INTEL_HASWELL_X, &hswep_uncore_init), 1875 X86_MATCH_VFM(INTEL_BROADWELL_X, &bdx_uncore_init), 1876 X86_MATCH_VFM(INTEL_BROADWELL_D, &bdx_uncore_init), 1877 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &knl_uncore_init), 1878 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &knl_uncore_init), 1879 X86_MATCH_VFM(INTEL_SKYLAKE, &skl_uncore_init), 1880 X86_MATCH_VFM(INTEL_SKYLAKE_L, &skl_uncore_init), 1881 X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_uncore_init), 1882 X86_MATCH_VFM(INTEL_KABYLAKE_L, &skl_uncore_init), 1883 X86_MATCH_VFM(INTEL_KABYLAKE, &skl_uncore_init), 1884 X86_MATCH_VFM(INTEL_COMETLAKE_L, &skl_uncore_init), 1885 X86_MATCH_VFM(INTEL_COMETLAKE, &skl_uncore_init), 1886 X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_uncore_init), 1887 X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &icl_uncore_init), 1888 X86_MATCH_VFM(INTEL_ICELAKE, &icl_uncore_init), 1889 X86_MATCH_VFM(INTEL_ICELAKE_D, &icx_uncore_init), 1890 X86_MATCH_VFM(INTEL_ICELAKE_X, &icx_uncore_init), 1891 X86_MATCH_VFM(INTEL_TIGERLAKE_L, &tgl_l_uncore_init), 1892 X86_MATCH_VFM(INTEL_TIGERLAKE, &tgl_uncore_init), 1893 X86_MATCH_VFM(INTEL_ROCKETLAKE, &rkl_uncore_init), 1894 X86_MATCH_VFM(INTEL_ALDERLAKE, &adl_uncore_init), 1895 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &adl_uncore_init), 1896 X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_uncore_init), 1897 X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &adl_uncore_init), 1898 X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init), 1899 X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init), 1900 X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init), 1901 X86_MATCH_VFM(INTEL_ARROWLAKE, &mtl_uncore_init), 1902 X86_MATCH_VFM(INTEL_ARROWLAKE_U, &mtl_uncore_init), 1903 X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), 1904 X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), 1905 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), 1906 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), 1907 X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), 1908 X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_uncore_init), 1909 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &snr_uncore_init), 1910 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), 1911 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), 1912 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), 1913 {}, 1914 }; 1915 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); 1916 1917 static int __init intel_uncore_init(void) 1918 { 1919 const struct x86_cpu_id *id; 1920 struct intel_uncore_init_fun *uncore_init; 1921 int pret = 0, cret = 0, mret = 0, ret; 1922 1923 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1924 return -ENODEV; 1925 1926 __uncore_max_dies = 1927 topology_max_packages() * topology_max_dies_per_package(); 1928 1929 id = x86_match_cpu(intel_uncore_match); 1930 if (!id) { 1931 if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL)) 1932 uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; 1933 else 1934 return -ENODEV; 1935 } else { 1936 uncore_init = (struct intel_uncore_init_fun *)id->driver_data; 1937 if (uncore_no_discover && uncore_init->use_discovery) 1938 return -ENODEV; 1939 if (uncore_init->use_discovery && 1940 !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore)) 1941 return -ENODEV; 1942 } 1943 1944 if (uncore_init->pci_init) { 1945 pret = uncore_init->pci_init(); 1946 if (!pret) 1947 pret = uncore_pci_init(); 1948 } 1949 1950 if (uncore_init->cpu_init) { 1951 uncore_init->cpu_init(); 1952 cret = uncore_cpu_init(); 1953 } 1954 1955 if (uncore_init->mmio_init) { 1956 uncore_init->mmio_init(); 1957 mret = uncore_mmio_init(); 1958 } 1959 1960 if (cret && pret && mret) { 1961 ret = -ENODEV; 1962 goto free_discovery; 1963 } 1964 1965 /* Install hotplug callbacks to setup the targets for each package */ 1966 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, 1967 "perf/x86/intel/uncore:online", 1968 uncore_event_cpu_online, 1969 uncore_event_cpu_offline); 1970 if (ret) 1971 goto err; 1972 return 0; 1973 1974 err: 1975 uncore_types_exit(uncore_msr_uncores); 1976 uncore_types_exit(uncore_mmio_uncores); 1977 uncore_pci_exit(); 1978 free_discovery: 1979 intel_uncore_clear_discovery_tables(); 1980 return ret; 1981 } 1982 module_init(intel_uncore_init); 1983 1984 static void __exit intel_uncore_exit(void) 1985 { 1986 cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); 1987 uncore_types_exit(uncore_msr_uncores); 1988 uncore_types_exit(uncore_mmio_uncores); 1989 uncore_pci_exit(); 1990 intel_uncore_clear_discovery_tables(); 1991 } 1992 module_exit(intel_uncore_exit); 1993