1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/module.h> 3 4 #include <asm/cpu_device_id.h> 5 #include <asm/intel-family.h> 6 #include "uncore.h" 7 #include "uncore_discovery.h" 8 9 static bool uncore_no_discover; 10 module_param(uncore_no_discover, bool, 0); 11 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism " 12 "(default: enable the discovery mechanism)."); 13 struct intel_uncore_type *empty_uncore[] = { NULL, }; 14 struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 15 struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 16 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; 17 18 static bool pcidrv_registered; 19 struct pci_driver *uncore_pci_driver; 20 /* The PCI driver for the device which the uncore doesn't own. */ 21 struct pci_driver *uncore_pci_sub_driver; 22 /* pci bus to socket mapping */ 23 DEFINE_RAW_SPINLOCK(pci2phy_map_lock); 24 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); 25 struct pci_extra_dev *uncore_extra_pci_dev; 26 int __uncore_max_dies; 27 28 /* mask of cpus that collect uncore events */ 29 static cpumask_t uncore_cpu_mask; 30 31 /* constraint for the fixed counter */ 32 static struct event_constraint uncore_constraint_fixed = 33 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); 34 struct event_constraint uncore_constraint_empty = 35 EVENT_CONSTRAINT(0, 0, 0); 36 37 MODULE_DESCRIPTION("Support for Intel uncore performance events"); 38 MODULE_LICENSE("GPL"); 39 40 int uncore_pcibus_to_dieid(struct pci_bus *bus) 41 { 42 struct pci2phy_map *map; 43 int die_id = -1; 44 45 raw_spin_lock(&pci2phy_map_lock); 46 list_for_each_entry(map, &pci2phy_map_head, list) { 47 if (map->segment == pci_domain_nr(bus)) { 48 die_id = map->pbus_to_dieid[bus->number]; 49 break; 50 } 51 } 52 raw_spin_unlock(&pci2phy_map_lock); 53 54 return die_id; 55 } 56 57 int uncore_die_to_segment(int die) 58 { 59 struct pci_bus *bus = NULL; 60 61 /* Find first pci bus which attributes to specified die. */ 62 while ((bus = pci_find_next_bus(bus)) && 63 (die != uncore_pcibus_to_dieid(bus))) 64 ; 65 66 return bus ? pci_domain_nr(bus) : -EINVAL; 67 } 68 69 int uncore_device_to_die(struct pci_dev *dev) 70 { 71 int node = pcibus_to_node(dev->bus); 72 int cpu; 73 74 for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) { 75 struct cpuinfo_x86 *c = &cpu_data(cpu); 76 77 if (c->initialized && cpu_to_node(cpu) == node) 78 return c->topo.logical_die_id; 79 } 80 81 return -1; 82 } 83 84 static void uncore_free_pcibus_map(void) 85 { 86 struct pci2phy_map *map, *tmp; 87 88 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { 89 list_del(&map->list); 90 kfree(map); 91 } 92 } 93 94 struct pci2phy_map *__find_pci2phy_map(int segment) 95 { 96 struct pci2phy_map *map, *alloc = NULL; 97 int i; 98 99 lockdep_assert_held(&pci2phy_map_lock); 100 101 lookup: 102 list_for_each_entry(map, &pci2phy_map_head, list) { 103 if (map->segment == segment) 104 goto end; 105 } 106 107 if (!alloc) { 108 raw_spin_unlock(&pci2phy_map_lock); 109 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); 110 raw_spin_lock(&pci2phy_map_lock); 111 112 if (!alloc) 113 return NULL; 114 115 goto lookup; 116 } 117 118 map = alloc; 119 alloc = NULL; 120 map->segment = segment; 121 for (i = 0; i < 256; i++) 122 map->pbus_to_dieid[i] = -1; 123 list_add_tail(&map->list, &pci2phy_map_head); 124 125 end: 126 kfree(alloc); 127 return map; 128 } 129 130 ssize_t uncore_event_show(struct device *dev, 131 struct device_attribute *attr, char *buf) 132 { 133 struct uncore_event_desc *event = 134 container_of(attr, struct uncore_event_desc, attr); 135 return sprintf(buf, "%s", event->config); 136 } 137 138 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 139 { 140 unsigned int dieid = topology_logical_die_id(cpu); 141 142 /* 143 * The unsigned check also catches the '-1' return value for non 144 * existent mappings in the topology map. 145 */ 146 return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL; 147 } 148 149 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 150 { 151 u64 count; 152 153 rdmsrl(event->hw.event_base, count); 154 155 return count; 156 } 157 158 void uncore_mmio_exit_box(struct intel_uncore_box *box) 159 { 160 if (box->io_addr) 161 iounmap(box->io_addr); 162 } 163 164 u64 uncore_mmio_read_counter(struct intel_uncore_box *box, 165 struct perf_event *event) 166 { 167 if (!box->io_addr) 168 return 0; 169 170 if (!uncore_mmio_is_valid_offset(box, event->hw.event_base)) 171 return 0; 172 173 return readq(box->io_addr + event->hw.event_base); 174 } 175 176 /* 177 * generic get constraint function for shared match/mask registers. 178 */ 179 struct event_constraint * 180 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) 181 { 182 struct intel_uncore_extra_reg *er; 183 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 184 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; 185 unsigned long flags; 186 bool ok = false; 187 188 /* 189 * reg->alloc can be set due to existing state, so for fake box we 190 * need to ignore this, otherwise we might fail to allocate proper 191 * fake state for this extra reg constraint. 192 */ 193 if (reg1->idx == EXTRA_REG_NONE || 194 (!uncore_box_is_fake(box) && reg1->alloc)) 195 return NULL; 196 197 er = &box->shared_regs[reg1->idx]; 198 raw_spin_lock_irqsave(&er->lock, flags); 199 if (!atomic_read(&er->ref) || 200 (er->config1 == reg1->config && er->config2 == reg2->config)) { 201 atomic_inc(&er->ref); 202 er->config1 = reg1->config; 203 er->config2 = reg2->config; 204 ok = true; 205 } 206 raw_spin_unlock_irqrestore(&er->lock, flags); 207 208 if (ok) { 209 if (!uncore_box_is_fake(box)) 210 reg1->alloc = 1; 211 return NULL; 212 } 213 214 return &uncore_constraint_empty; 215 } 216 217 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) 218 { 219 struct intel_uncore_extra_reg *er; 220 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 221 222 /* 223 * Only put constraint if extra reg was actually allocated. Also 224 * takes care of event which do not use an extra shared reg. 225 * 226 * Also, if this is a fake box we shouldn't touch any event state 227 * (reg->alloc) and we don't care about leaving inconsistent box 228 * state either since it will be thrown out. 229 */ 230 if (uncore_box_is_fake(box) || !reg1->alloc) 231 return; 232 233 er = &box->shared_regs[reg1->idx]; 234 atomic_dec(&er->ref); 235 reg1->alloc = 0; 236 } 237 238 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) 239 { 240 struct intel_uncore_extra_reg *er; 241 unsigned long flags; 242 u64 config; 243 244 er = &box->shared_regs[idx]; 245 246 raw_spin_lock_irqsave(&er->lock, flags); 247 config = er->config; 248 raw_spin_unlock_irqrestore(&er->lock, flags); 249 250 return config; 251 } 252 253 static void uncore_assign_hw_event(struct intel_uncore_box *box, 254 struct perf_event *event, int idx) 255 { 256 struct hw_perf_event *hwc = &event->hw; 257 258 hwc->idx = idx; 259 hwc->last_tag = ++box->tags[idx]; 260 261 if (uncore_pmc_fixed(hwc->idx)) { 262 hwc->event_base = uncore_fixed_ctr(box); 263 hwc->config_base = uncore_fixed_ctl(box); 264 return; 265 } 266 267 if (intel_generic_uncore_assign_hw_event(event, box)) 268 return; 269 270 hwc->config_base = uncore_event_ctl(box, hwc->idx); 271 hwc->event_base = uncore_perf_ctr(box, hwc->idx); 272 } 273 274 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) 275 { 276 u64 prev_count, new_count, delta; 277 int shift; 278 279 if (uncore_pmc_freerunning(event->hw.idx)) 280 shift = 64 - uncore_freerunning_bits(box, event); 281 else if (uncore_pmc_fixed(event->hw.idx)) 282 shift = 64 - uncore_fixed_ctr_bits(box); 283 else 284 shift = 64 - uncore_perf_ctr_bits(box); 285 286 /* the hrtimer might modify the previous event value */ 287 again: 288 prev_count = local64_read(&event->hw.prev_count); 289 new_count = uncore_read_counter(box, event); 290 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) 291 goto again; 292 293 delta = (new_count << shift) - (prev_count << shift); 294 delta >>= shift; 295 296 local64_add(delta, &event->count); 297 } 298 299 /* 300 * The overflow interrupt is unavailable for SandyBridge-EP, is broken 301 * for SandyBridge. So we use hrtimer to periodically poll the counter 302 * to avoid overflow. 303 */ 304 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 305 { 306 struct intel_uncore_box *box; 307 struct perf_event *event; 308 unsigned long flags; 309 int bit; 310 311 box = container_of(hrtimer, struct intel_uncore_box, hrtimer); 312 if (!box->n_active || box->cpu != smp_processor_id()) 313 return HRTIMER_NORESTART; 314 /* 315 * disable local interrupt to prevent uncore_pmu_event_start/stop 316 * to interrupt the update process 317 */ 318 local_irq_save(flags); 319 320 /* 321 * handle boxes with an active event list as opposed to active 322 * counters 323 */ 324 list_for_each_entry(event, &box->active_list, active_entry) { 325 uncore_perf_event_update(box, event); 326 } 327 328 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 329 uncore_perf_event_update(box, box->events[bit]); 330 331 local_irq_restore(flags); 332 333 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); 334 return HRTIMER_RESTART; 335 } 336 337 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 338 { 339 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 340 HRTIMER_MODE_REL_PINNED); 341 } 342 343 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 344 { 345 hrtimer_cancel(&box->hrtimer); 346 } 347 348 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) 349 { 350 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 351 box->hrtimer.function = uncore_pmu_hrtimer; 352 } 353 354 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, 355 int node) 356 { 357 int i, size, numshared = type->num_shared_regs ; 358 struct intel_uncore_box *box; 359 360 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); 361 362 box = kzalloc_node(size, GFP_KERNEL, node); 363 if (!box) 364 return NULL; 365 366 for (i = 0; i < numshared; i++) 367 raw_spin_lock_init(&box->shared_regs[i].lock); 368 369 uncore_pmu_init_hrtimer(box); 370 box->cpu = -1; 371 box->dieid = -1; 372 373 /* set default hrtimer timeout */ 374 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 375 376 INIT_LIST_HEAD(&box->active_list); 377 378 return box; 379 } 380 381 /* 382 * Using uncore_pmu_event_init pmu event_init callback 383 * as a detection point for uncore events. 384 */ 385 static int uncore_pmu_event_init(struct perf_event *event); 386 387 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) 388 { 389 return &box->pmu->pmu == event->pmu; 390 } 391 392 static int 393 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, 394 bool dogrp) 395 { 396 struct perf_event *event; 397 int n, max_count; 398 399 max_count = box->pmu->type->num_counters; 400 if (box->pmu->type->fixed_ctl) 401 max_count++; 402 403 if (box->n_events >= max_count) 404 return -EINVAL; 405 406 n = box->n_events; 407 408 if (is_box_event(box, leader)) { 409 box->event_list[n] = leader; 410 n++; 411 } 412 413 if (!dogrp) 414 return n; 415 416 for_each_sibling_event(event, leader) { 417 if (!is_box_event(box, event) || 418 event->state <= PERF_EVENT_STATE_OFF) 419 continue; 420 421 if (n >= max_count) 422 return -EINVAL; 423 424 box->event_list[n] = event; 425 n++; 426 } 427 return n; 428 } 429 430 static struct event_constraint * 431 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 432 { 433 struct intel_uncore_type *type = box->pmu->type; 434 struct event_constraint *c; 435 436 if (type->ops->get_constraint) { 437 c = type->ops->get_constraint(box, event); 438 if (c) 439 return c; 440 } 441 442 if (event->attr.config == UNCORE_FIXED_EVENT) 443 return &uncore_constraint_fixed; 444 445 if (type->constraints) { 446 for_each_event_constraint(c, type->constraints) { 447 if ((event->hw.config & c->cmask) == c->code) 448 return c; 449 } 450 } 451 452 return &type->unconstrainted; 453 } 454 455 static void uncore_put_event_constraint(struct intel_uncore_box *box, 456 struct perf_event *event) 457 { 458 if (box->pmu->type->ops->put_constraint) 459 box->pmu->type->ops->put_constraint(box, event); 460 } 461 462 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 463 { 464 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 465 struct event_constraint *c; 466 int i, wmin, wmax, ret = 0; 467 struct hw_perf_event *hwc; 468 469 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 470 471 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 472 c = uncore_get_event_constraint(box, box->event_list[i]); 473 box->event_constraint[i] = c; 474 wmin = min(wmin, c->weight); 475 wmax = max(wmax, c->weight); 476 } 477 478 /* fastpath, try to reuse previous register */ 479 for (i = 0; i < n; i++) { 480 hwc = &box->event_list[i]->hw; 481 c = box->event_constraint[i]; 482 483 /* never assigned */ 484 if (hwc->idx == -1) 485 break; 486 487 /* constraint still honored */ 488 if (!test_bit(hwc->idx, c->idxmsk)) 489 break; 490 491 /* not already used */ 492 if (test_bit(hwc->idx, used_mask)) 493 break; 494 495 __set_bit(hwc->idx, used_mask); 496 if (assign) 497 assign[i] = hwc->idx; 498 } 499 /* slow path */ 500 if (i != n) 501 ret = perf_assign_events(box->event_constraint, n, 502 wmin, wmax, n, assign); 503 504 if (!assign || ret) { 505 for (i = 0; i < n; i++) 506 uncore_put_event_constraint(box, box->event_list[i]); 507 } 508 return ret ? -EINVAL : 0; 509 } 510 511 void uncore_pmu_event_start(struct perf_event *event, int flags) 512 { 513 struct intel_uncore_box *box = uncore_event_to_box(event); 514 int idx = event->hw.idx; 515 516 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) 517 return; 518 519 /* 520 * Free running counter is read-only and always active. 521 * Use the current counter value as start point. 522 * There is no overflow interrupt for free running counter. 523 * Use hrtimer to periodically poll the counter to avoid overflow. 524 */ 525 if (uncore_pmc_freerunning(event->hw.idx)) { 526 list_add_tail(&event->active_entry, &box->active_list); 527 local64_set(&event->hw.prev_count, 528 uncore_read_counter(box, event)); 529 if (box->n_active++ == 0) 530 uncore_pmu_start_hrtimer(box); 531 return; 532 } 533 534 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 535 return; 536 537 event->hw.state = 0; 538 box->events[idx] = event; 539 box->n_active++; 540 __set_bit(idx, box->active_mask); 541 542 local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); 543 uncore_enable_event(box, event); 544 545 if (box->n_active == 1) 546 uncore_pmu_start_hrtimer(box); 547 } 548 549 void uncore_pmu_event_stop(struct perf_event *event, int flags) 550 { 551 struct intel_uncore_box *box = uncore_event_to_box(event); 552 struct hw_perf_event *hwc = &event->hw; 553 554 /* Cannot disable free running counter which is read-only */ 555 if (uncore_pmc_freerunning(hwc->idx)) { 556 list_del(&event->active_entry); 557 if (--box->n_active == 0) 558 uncore_pmu_cancel_hrtimer(box); 559 uncore_perf_event_update(box, event); 560 return; 561 } 562 563 if (__test_and_clear_bit(hwc->idx, box->active_mask)) { 564 uncore_disable_event(box, event); 565 box->n_active--; 566 box->events[hwc->idx] = NULL; 567 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 568 hwc->state |= PERF_HES_STOPPED; 569 570 if (box->n_active == 0) 571 uncore_pmu_cancel_hrtimer(box); 572 } 573 574 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 575 /* 576 * Drain the remaining delta count out of a event 577 * that we are disabling: 578 */ 579 uncore_perf_event_update(box, event); 580 hwc->state |= PERF_HES_UPTODATE; 581 } 582 } 583 584 int uncore_pmu_event_add(struct perf_event *event, int flags) 585 { 586 struct intel_uncore_box *box = uncore_event_to_box(event); 587 struct hw_perf_event *hwc = &event->hw; 588 int assign[UNCORE_PMC_IDX_MAX]; 589 int i, n, ret; 590 591 if (!box) 592 return -ENODEV; 593 594 /* 595 * The free funning counter is assigned in event_init(). 596 * The free running counter event and free running counter 597 * are 1:1 mapped. It doesn't need to be tracked in event_list. 598 */ 599 if (uncore_pmc_freerunning(hwc->idx)) { 600 if (flags & PERF_EF_START) 601 uncore_pmu_event_start(event, 0); 602 return 0; 603 } 604 605 ret = n = uncore_collect_events(box, event, false); 606 if (ret < 0) 607 return ret; 608 609 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 610 if (!(flags & PERF_EF_START)) 611 hwc->state |= PERF_HES_ARCH; 612 613 ret = uncore_assign_events(box, assign, n); 614 if (ret) 615 return ret; 616 617 /* save events moving to new counters */ 618 for (i = 0; i < box->n_events; i++) { 619 event = box->event_list[i]; 620 hwc = &event->hw; 621 622 if (hwc->idx == assign[i] && 623 hwc->last_tag == box->tags[assign[i]]) 624 continue; 625 /* 626 * Ensure we don't accidentally enable a stopped 627 * counter simply because we rescheduled. 628 */ 629 if (hwc->state & PERF_HES_STOPPED) 630 hwc->state |= PERF_HES_ARCH; 631 632 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 633 } 634 635 /* reprogram moved events into new counters */ 636 for (i = 0; i < n; i++) { 637 event = box->event_list[i]; 638 hwc = &event->hw; 639 640 if (hwc->idx != assign[i] || 641 hwc->last_tag != box->tags[assign[i]]) 642 uncore_assign_hw_event(box, event, assign[i]); 643 else if (i < box->n_events) 644 continue; 645 646 if (hwc->state & PERF_HES_ARCH) 647 continue; 648 649 uncore_pmu_event_start(event, 0); 650 } 651 box->n_events = n; 652 653 return 0; 654 } 655 656 void uncore_pmu_event_del(struct perf_event *event, int flags) 657 { 658 struct intel_uncore_box *box = uncore_event_to_box(event); 659 int i; 660 661 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 662 663 /* 664 * The event for free running counter is not tracked by event_list. 665 * It doesn't need to force event->hw.idx = -1 to reassign the counter. 666 * Because the event and the free running counter are 1:1 mapped. 667 */ 668 if (uncore_pmc_freerunning(event->hw.idx)) 669 return; 670 671 for (i = 0; i < box->n_events; i++) { 672 if (event == box->event_list[i]) { 673 uncore_put_event_constraint(box, event); 674 675 for (++i; i < box->n_events; i++) 676 box->event_list[i - 1] = box->event_list[i]; 677 678 --box->n_events; 679 break; 680 } 681 } 682 683 event->hw.idx = -1; 684 event->hw.last_tag = ~0ULL; 685 } 686 687 void uncore_pmu_event_read(struct perf_event *event) 688 { 689 struct intel_uncore_box *box = uncore_event_to_box(event); 690 uncore_perf_event_update(box, event); 691 } 692 693 /* 694 * validation ensures the group can be loaded onto the 695 * PMU if it was the only group available. 696 */ 697 static int uncore_validate_group(struct intel_uncore_pmu *pmu, 698 struct perf_event *event) 699 { 700 struct perf_event *leader = event->group_leader; 701 struct intel_uncore_box *fake_box; 702 int ret = -EINVAL, n; 703 704 /* The free running counter is always active. */ 705 if (uncore_pmc_freerunning(event->hw.idx)) 706 return 0; 707 708 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); 709 if (!fake_box) 710 return -ENOMEM; 711 712 fake_box->pmu = pmu; 713 /* 714 * the event is not yet connected with its 715 * siblings therefore we must first collect 716 * existing siblings, then add the new event 717 * before we can simulate the scheduling 718 */ 719 n = uncore_collect_events(fake_box, leader, true); 720 if (n < 0) 721 goto out; 722 723 fake_box->n_events = n; 724 n = uncore_collect_events(fake_box, event, false); 725 if (n < 0) 726 goto out; 727 728 fake_box->n_events = n; 729 730 ret = uncore_assign_events(fake_box, NULL, n); 731 out: 732 kfree(fake_box); 733 return ret; 734 } 735 736 static int uncore_pmu_event_init(struct perf_event *event) 737 { 738 struct intel_uncore_pmu *pmu; 739 struct intel_uncore_box *box; 740 struct hw_perf_event *hwc = &event->hw; 741 int ret; 742 743 if (event->attr.type != event->pmu->type) 744 return -ENOENT; 745 746 pmu = uncore_event_to_pmu(event); 747 /* no device found for this pmu */ 748 if (!pmu->registered) 749 return -ENOENT; 750 751 /* Sampling not supported yet */ 752 if (hwc->sample_period) 753 return -EINVAL; 754 755 /* 756 * Place all uncore events for a particular physical package 757 * onto a single cpu 758 */ 759 if (event->cpu < 0) 760 return -EINVAL; 761 box = uncore_pmu_to_box(pmu, event->cpu); 762 if (!box || box->cpu < 0) 763 return -EINVAL; 764 event->cpu = box->cpu; 765 event->pmu_private = box; 766 767 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; 768 769 event->hw.idx = -1; 770 event->hw.last_tag = ~0ULL; 771 event->hw.extra_reg.idx = EXTRA_REG_NONE; 772 event->hw.branch_reg.idx = EXTRA_REG_NONE; 773 774 if (event->attr.config == UNCORE_FIXED_EVENT) { 775 /* no fixed counter */ 776 if (!pmu->type->fixed_ctl) 777 return -EINVAL; 778 /* 779 * if there is only one fixed counter, only the first pmu 780 * can access the fixed counter 781 */ 782 if (pmu->type->single_fixed && pmu->pmu_idx > 0) 783 return -EINVAL; 784 785 /* fixed counters have event field hardcoded to zero */ 786 hwc->config = 0ULL; 787 } else if (is_freerunning_event(event)) { 788 hwc->config = event->attr.config; 789 if (!check_valid_freerunning_event(box, event)) 790 return -EINVAL; 791 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING; 792 /* 793 * The free running counter event and free running counter 794 * are always 1:1 mapped. 795 * The free running counter is always active. 796 * Assign the free running counter here. 797 */ 798 event->hw.event_base = uncore_freerunning_counter(box, event); 799 } else { 800 hwc->config = event->attr.config & 801 (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); 802 if (pmu->type->ops->hw_config) { 803 ret = pmu->type->ops->hw_config(box, event); 804 if (ret) 805 return ret; 806 } 807 } 808 809 if (event->group_leader != event) 810 ret = uncore_validate_group(pmu, event); 811 else 812 ret = 0; 813 814 return ret; 815 } 816 817 static void uncore_pmu_enable(struct pmu *pmu) 818 { 819 struct intel_uncore_pmu *uncore_pmu; 820 struct intel_uncore_box *box; 821 822 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 823 824 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 825 if (!box) 826 return; 827 828 if (uncore_pmu->type->ops->enable_box) 829 uncore_pmu->type->ops->enable_box(box); 830 } 831 832 static void uncore_pmu_disable(struct pmu *pmu) 833 { 834 struct intel_uncore_pmu *uncore_pmu; 835 struct intel_uncore_box *box; 836 837 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 838 839 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 840 if (!box) 841 return; 842 843 if (uncore_pmu->type->ops->disable_box) 844 uncore_pmu->type->ops->disable_box(box); 845 } 846 847 static ssize_t uncore_get_attr_cpumask(struct device *dev, 848 struct device_attribute *attr, char *buf) 849 { 850 struct intel_uncore_pmu *pmu = container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu); 851 852 return cpumap_print_to_pagebuf(true, buf, &pmu->cpu_mask); 853 } 854 855 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); 856 857 static struct attribute *uncore_pmu_attrs[] = { 858 &dev_attr_cpumask.attr, 859 NULL, 860 }; 861 862 static const struct attribute_group uncore_pmu_attr_group = { 863 .attrs = uncore_pmu_attrs, 864 }; 865 866 static inline int uncore_get_box_id(struct intel_uncore_type *type, 867 struct intel_uncore_pmu *pmu) 868 { 869 if (type->boxes) 870 return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx); 871 872 return pmu->pmu_idx; 873 } 874 875 void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) 876 { 877 struct intel_uncore_type *type = pmu->type; 878 879 if (type->num_boxes == 1) 880 sprintf(pmu_name, "uncore_type_%u", type->type_id); 881 else { 882 sprintf(pmu_name, "uncore_type_%u_%d", 883 type->type_id, uncore_get_box_id(type, pmu)); 884 } 885 } 886 887 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) 888 { 889 struct intel_uncore_type *type = pmu->type; 890 891 /* 892 * No uncore block name in discovery table. 893 * Use uncore_type_&typeid_&boxid as name. 894 */ 895 if (!type->name) { 896 uncore_get_alias_name(pmu->name, pmu); 897 return; 898 } 899 900 if (type->num_boxes == 1) { 901 if (strlen(type->name) > 0) 902 sprintf(pmu->name, "uncore_%s", type->name); 903 else 904 sprintf(pmu->name, "uncore"); 905 } else { 906 /* 907 * Use the box ID from the discovery table if applicable. 908 */ 909 sprintf(pmu->name, "uncore_%s_%d", type->name, 910 uncore_get_box_id(type, pmu)); 911 } 912 } 913 914 static int uncore_pmu_register(struct intel_uncore_pmu *pmu) 915 { 916 int ret; 917 918 if (!pmu->type->pmu) { 919 pmu->pmu = (struct pmu) { 920 .attr_groups = pmu->type->attr_groups, 921 .task_ctx_nr = perf_invalid_context, 922 .pmu_enable = uncore_pmu_enable, 923 .pmu_disable = uncore_pmu_disable, 924 .event_init = uncore_pmu_event_init, 925 .add = uncore_pmu_event_add, 926 .del = uncore_pmu_event_del, 927 .start = uncore_pmu_event_start, 928 .stop = uncore_pmu_event_stop, 929 .read = uncore_pmu_event_read, 930 .module = THIS_MODULE, 931 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 932 .attr_update = pmu->type->attr_update, 933 }; 934 } else { 935 pmu->pmu = *pmu->type->pmu; 936 pmu->pmu.attr_groups = pmu->type->attr_groups; 937 pmu->pmu.attr_update = pmu->type->attr_update; 938 } 939 940 uncore_get_pmu_name(pmu); 941 942 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 943 if (!ret) 944 pmu->registered = true; 945 return ret; 946 } 947 948 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) 949 { 950 if (!pmu->registered) 951 return; 952 perf_pmu_unregister(&pmu->pmu); 953 pmu->registered = false; 954 } 955 956 static void uncore_free_boxes(struct intel_uncore_pmu *pmu) 957 { 958 int die; 959 960 for (die = 0; die < uncore_max_dies(); die++) 961 kfree(pmu->boxes[die]); 962 kfree(pmu->boxes); 963 } 964 965 static void uncore_type_exit(struct intel_uncore_type *type) 966 { 967 struct intel_uncore_pmu *pmu = type->pmus; 968 int i; 969 970 if (type->cleanup_mapping) 971 type->cleanup_mapping(type); 972 973 if (type->cleanup_extra_boxes) 974 type->cleanup_extra_boxes(type); 975 976 if (pmu) { 977 for (i = 0; i < type->num_boxes; i++, pmu++) { 978 uncore_pmu_unregister(pmu); 979 uncore_free_boxes(pmu); 980 } 981 kfree(type->pmus); 982 type->pmus = NULL; 983 } 984 985 kfree(type->events_group); 986 type->events_group = NULL; 987 } 988 989 static void uncore_types_exit(struct intel_uncore_type **types) 990 { 991 for (; *types; types++) 992 uncore_type_exit(*types); 993 } 994 995 static int __init uncore_type_init(struct intel_uncore_type *type) 996 { 997 struct intel_uncore_pmu *pmus; 998 size_t size; 999 int i, j; 1000 1001 pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL); 1002 if (!pmus) 1003 return -ENOMEM; 1004 1005 size = uncore_max_dies() * sizeof(struct intel_uncore_box *); 1006 1007 for (i = 0; i < type->num_boxes; i++) { 1008 pmus[i].pmu_idx = i; 1009 pmus[i].type = type; 1010 pmus[i].boxes = kzalloc(size, GFP_KERNEL); 1011 if (!pmus[i].boxes) 1012 goto err; 1013 } 1014 1015 type->pmus = pmus; 1016 type->unconstrainted = (struct event_constraint) 1017 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 1018 0, type->num_counters, 0, 0); 1019 1020 if (type->event_descs) { 1021 struct { 1022 struct attribute_group group; 1023 struct attribute *attrs[]; 1024 } *attr_group; 1025 for (i = 0; type->event_descs[i].attr.attr.name; i++); 1026 1027 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1), 1028 GFP_KERNEL); 1029 if (!attr_group) 1030 goto err; 1031 1032 attr_group->group.name = "events"; 1033 attr_group->group.attrs = attr_group->attrs; 1034 1035 for (j = 0; j < i; j++) 1036 attr_group->attrs[j] = &type->event_descs[j].attr.attr; 1037 1038 type->events_group = &attr_group->group; 1039 } 1040 1041 type->pmu_group = &uncore_pmu_attr_group; 1042 1043 if (type->set_mapping) 1044 type->set_mapping(type); 1045 1046 return 0; 1047 1048 err: 1049 for (i = 0; i < type->num_boxes; i++) 1050 kfree(pmus[i].boxes); 1051 kfree(pmus); 1052 1053 return -ENOMEM; 1054 } 1055 1056 static int __init 1057 uncore_types_init(struct intel_uncore_type **types) 1058 { 1059 int ret; 1060 1061 for (; *types; types++) { 1062 ret = uncore_type_init(*types); 1063 if (ret) 1064 return ret; 1065 } 1066 return 0; 1067 } 1068 1069 /* 1070 * Get the die information of a PCI device. 1071 * @pdev: The PCI device. 1072 * @die: The die id which the device maps to. 1073 */ 1074 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die) 1075 { 1076 *die = uncore_pcibus_to_dieid(pdev->bus); 1077 if (*die < 0) 1078 return -EINVAL; 1079 1080 return 0; 1081 } 1082 1083 static struct intel_uncore_pmu * 1084 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) 1085 { 1086 struct intel_uncore_type **types = uncore_pci_uncores; 1087 struct intel_uncore_discovery_unit *unit; 1088 struct intel_uncore_type *type; 1089 struct rb_node *node; 1090 1091 for (; *types; types++) { 1092 type = *types; 1093 1094 for (node = rb_first(type->boxes); node; node = rb_next(node)) { 1095 unit = rb_entry(node, struct intel_uncore_discovery_unit, node); 1096 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(unit->addr) && 1097 pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(unit->addr) && 1098 pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr)) 1099 return &type->pmus[unit->pmu_idx]; 1100 } 1101 } 1102 1103 return NULL; 1104 } 1105 1106 /* 1107 * Find the PMU of a PCI device. 1108 * @pdev: The PCI device. 1109 * @ids: The ID table of the available PCI devices with a PMU. 1110 * If NULL, search the whole uncore_pci_uncores. 1111 */ 1112 static struct intel_uncore_pmu * 1113 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) 1114 { 1115 struct intel_uncore_pmu *pmu = NULL; 1116 struct intel_uncore_type *type; 1117 kernel_ulong_t data; 1118 unsigned int devfn; 1119 1120 if (!ids) 1121 return uncore_pci_find_dev_pmu_from_types(pdev); 1122 1123 while (ids && ids->vendor) { 1124 if ((ids->vendor == pdev->vendor) && 1125 (ids->device == pdev->device)) { 1126 data = ids->driver_data; 1127 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data), 1128 UNCORE_PCI_DEV_FUNC(data)); 1129 if (devfn == pdev->devfn) { 1130 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)]; 1131 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)]; 1132 break; 1133 } 1134 } 1135 ids++; 1136 } 1137 return pmu; 1138 } 1139 1140 /* 1141 * Register the PMU for a PCI device 1142 * @pdev: The PCI device. 1143 * @type: The corresponding PMU type of the device. 1144 * @pmu: The corresponding PMU of the device. 1145 * @die: The die id which the device maps to. 1146 */ 1147 static int uncore_pci_pmu_register(struct pci_dev *pdev, 1148 struct intel_uncore_type *type, 1149 struct intel_uncore_pmu *pmu, 1150 int die) 1151 { 1152 struct intel_uncore_box *box; 1153 int ret; 1154 1155 if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) 1156 return -EINVAL; 1157 1158 box = uncore_alloc_box(type, NUMA_NO_NODE); 1159 if (!box) 1160 return -ENOMEM; 1161 1162 atomic_inc(&box->refcnt); 1163 box->dieid = die; 1164 box->pci_dev = pdev; 1165 box->pmu = pmu; 1166 uncore_box_init(box); 1167 1168 pmu->boxes[die] = box; 1169 if (atomic_inc_return(&pmu->activeboxes) > 1) 1170 return 0; 1171 1172 /* First active box registers the pmu */ 1173 ret = uncore_pmu_register(pmu); 1174 if (ret) { 1175 pmu->boxes[die] = NULL; 1176 uncore_box_exit(box); 1177 kfree(box); 1178 } 1179 return ret; 1180 } 1181 1182 /* 1183 * add a pci uncore device 1184 */ 1185 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 1186 { 1187 struct intel_uncore_type *type; 1188 struct intel_uncore_pmu *pmu = NULL; 1189 int die, ret; 1190 1191 ret = uncore_pci_get_dev_die_info(pdev, &die); 1192 if (ret) 1193 return ret; 1194 1195 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 1196 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 1197 1198 uncore_extra_pci_dev[die].dev[idx] = pdev; 1199 pci_set_drvdata(pdev, NULL); 1200 return 0; 1201 } 1202 1203 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 1204 1205 /* 1206 * Some platforms, e.g. Knights Landing, use a common PCI device ID 1207 * for multiple instances of an uncore PMU device type. We should check 1208 * PCI slot and func to indicate the uncore box. 1209 */ 1210 if (id->driver_data & ~0xffff) { 1211 struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver); 1212 1213 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); 1214 if (pmu == NULL) 1215 return -ENODEV; 1216 } else { 1217 /* 1218 * for performance monitoring unit with multiple boxes, 1219 * each box has a different function id. 1220 */ 1221 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 1222 } 1223 1224 ret = uncore_pci_pmu_register(pdev, type, pmu, die); 1225 1226 pci_set_drvdata(pdev, pmu->boxes[die]); 1227 1228 return ret; 1229 } 1230 1231 /* 1232 * Unregister the PMU of a PCI device 1233 * @pmu: The corresponding PMU is unregistered. 1234 * @die: The die id which the device maps to. 1235 */ 1236 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die) 1237 { 1238 struct intel_uncore_box *box = pmu->boxes[die]; 1239 1240 pmu->boxes[die] = NULL; 1241 if (atomic_dec_return(&pmu->activeboxes) == 0) 1242 uncore_pmu_unregister(pmu); 1243 uncore_box_exit(box); 1244 kfree(box); 1245 } 1246 1247 static void uncore_pci_remove(struct pci_dev *pdev) 1248 { 1249 struct intel_uncore_box *box; 1250 struct intel_uncore_pmu *pmu; 1251 int i, die; 1252 1253 if (uncore_pci_get_dev_die_info(pdev, &die)) 1254 return; 1255 1256 box = pci_get_drvdata(pdev); 1257 if (!box) { 1258 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 1259 if (uncore_extra_pci_dev[die].dev[i] == pdev) { 1260 uncore_extra_pci_dev[die].dev[i] = NULL; 1261 break; 1262 } 1263 } 1264 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); 1265 return; 1266 } 1267 1268 pmu = box->pmu; 1269 1270 pci_set_drvdata(pdev, NULL); 1271 1272 uncore_pci_pmu_unregister(pmu, die); 1273 } 1274 1275 static int uncore_bus_notify(struct notifier_block *nb, 1276 unsigned long action, void *data, 1277 const struct pci_device_id *ids) 1278 { 1279 struct device *dev = data; 1280 struct pci_dev *pdev = to_pci_dev(dev); 1281 struct intel_uncore_pmu *pmu; 1282 int die; 1283 1284 /* Unregister the PMU when the device is going to be deleted. */ 1285 if (action != BUS_NOTIFY_DEL_DEVICE) 1286 return NOTIFY_DONE; 1287 1288 pmu = uncore_pci_find_dev_pmu(pdev, ids); 1289 if (!pmu) 1290 return NOTIFY_DONE; 1291 1292 if (uncore_pci_get_dev_die_info(pdev, &die)) 1293 return NOTIFY_DONE; 1294 1295 uncore_pci_pmu_unregister(pmu, die); 1296 1297 return NOTIFY_OK; 1298 } 1299 1300 static int uncore_pci_sub_bus_notify(struct notifier_block *nb, 1301 unsigned long action, void *data) 1302 { 1303 return uncore_bus_notify(nb, action, data, 1304 uncore_pci_sub_driver->id_table); 1305 } 1306 1307 static struct notifier_block uncore_pci_sub_notifier = { 1308 .notifier_call = uncore_pci_sub_bus_notify, 1309 }; 1310 1311 static void uncore_pci_sub_driver_init(void) 1312 { 1313 const struct pci_device_id *ids = uncore_pci_sub_driver->id_table; 1314 struct intel_uncore_type *type; 1315 struct intel_uncore_pmu *pmu; 1316 struct pci_dev *pci_sub_dev; 1317 bool notify = false; 1318 unsigned int devfn; 1319 int die; 1320 1321 while (ids && ids->vendor) { 1322 pci_sub_dev = NULL; 1323 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)]; 1324 /* 1325 * Search the available device, and register the 1326 * corresponding PMU. 1327 */ 1328 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 1329 ids->device, pci_sub_dev))) { 1330 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), 1331 UNCORE_PCI_DEV_FUNC(ids->driver_data)); 1332 if (devfn != pci_sub_dev->devfn) 1333 continue; 1334 1335 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; 1336 if (!pmu) 1337 continue; 1338 1339 if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) 1340 continue; 1341 1342 if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, 1343 die)) 1344 notify = true; 1345 } 1346 ids++; 1347 } 1348 1349 if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier)) 1350 notify = false; 1351 1352 if (!notify) 1353 uncore_pci_sub_driver = NULL; 1354 } 1355 1356 static int uncore_pci_bus_notify(struct notifier_block *nb, 1357 unsigned long action, void *data) 1358 { 1359 return uncore_bus_notify(nb, action, data, NULL); 1360 } 1361 1362 static struct notifier_block uncore_pci_notifier = { 1363 .notifier_call = uncore_pci_bus_notify, 1364 }; 1365 1366 1367 static void uncore_pci_pmus_register(void) 1368 { 1369 struct intel_uncore_type **types = uncore_pci_uncores; 1370 struct intel_uncore_discovery_unit *unit; 1371 struct intel_uncore_type *type; 1372 struct intel_uncore_pmu *pmu; 1373 struct rb_node *node; 1374 struct pci_dev *pdev; 1375 1376 for (; *types; types++) { 1377 type = *types; 1378 1379 for (node = rb_first(type->boxes); node; node = rb_next(node)) { 1380 unit = rb_entry(node, struct intel_uncore_discovery_unit, node); 1381 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr), 1382 UNCORE_DISCOVERY_PCI_BUS(unit->addr), 1383 UNCORE_DISCOVERY_PCI_DEVFN(unit->addr)); 1384 1385 if (!pdev) 1386 continue; 1387 pmu = &type->pmus[unit->pmu_idx]; 1388 uncore_pci_pmu_register(pdev, type, pmu, unit->die); 1389 } 1390 } 1391 1392 bus_register_notifier(&pci_bus_type, &uncore_pci_notifier); 1393 } 1394 1395 static int __init uncore_pci_init(void) 1396 { 1397 size_t size; 1398 int ret; 1399 1400 size = uncore_max_dies() * sizeof(struct pci_extra_dev); 1401 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); 1402 if (!uncore_extra_pci_dev) { 1403 ret = -ENOMEM; 1404 goto err; 1405 } 1406 1407 ret = uncore_types_init(uncore_pci_uncores); 1408 if (ret) 1409 goto errtype; 1410 1411 if (uncore_pci_driver) { 1412 uncore_pci_driver->probe = uncore_pci_probe; 1413 uncore_pci_driver->remove = uncore_pci_remove; 1414 1415 ret = pci_register_driver(uncore_pci_driver); 1416 if (ret) 1417 goto errtype; 1418 } else 1419 uncore_pci_pmus_register(); 1420 1421 if (uncore_pci_sub_driver) 1422 uncore_pci_sub_driver_init(); 1423 1424 pcidrv_registered = true; 1425 return 0; 1426 1427 errtype: 1428 uncore_types_exit(uncore_pci_uncores); 1429 kfree(uncore_extra_pci_dev); 1430 uncore_extra_pci_dev = NULL; 1431 uncore_free_pcibus_map(); 1432 err: 1433 uncore_pci_uncores = empty_uncore; 1434 return ret; 1435 } 1436 1437 static void uncore_pci_exit(void) 1438 { 1439 if (pcidrv_registered) { 1440 pcidrv_registered = false; 1441 if (uncore_pci_sub_driver) 1442 bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); 1443 if (uncore_pci_driver) 1444 pci_unregister_driver(uncore_pci_driver); 1445 else 1446 bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier); 1447 uncore_types_exit(uncore_pci_uncores); 1448 kfree(uncore_extra_pci_dev); 1449 uncore_free_pcibus_map(); 1450 } 1451 } 1452 1453 static bool uncore_die_has_box(struct intel_uncore_type *type, 1454 int die, unsigned int pmu_idx) 1455 { 1456 if (!type->boxes) 1457 return true; 1458 1459 if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0) 1460 return false; 1461 1462 return true; 1463 } 1464 1465 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, 1466 int new_cpu) 1467 { 1468 struct intel_uncore_pmu *pmu = type->pmus; 1469 struct intel_uncore_box *box; 1470 int i, die; 1471 1472 die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); 1473 for (i = 0; i < type->num_boxes; i++, pmu++) { 1474 box = pmu->boxes[die]; 1475 if (!box) 1476 continue; 1477 1478 if (old_cpu < 0) { 1479 WARN_ON_ONCE(box->cpu != -1); 1480 if (uncore_die_has_box(type, die, pmu->pmu_idx)) { 1481 box->cpu = new_cpu; 1482 cpumask_set_cpu(new_cpu, &pmu->cpu_mask); 1483 } 1484 continue; 1485 } 1486 1487 WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu); 1488 box->cpu = -1; 1489 cpumask_clear_cpu(old_cpu, &pmu->cpu_mask); 1490 if (new_cpu < 0) 1491 continue; 1492 1493 if (!uncore_die_has_box(type, die, pmu->pmu_idx)) 1494 continue; 1495 uncore_pmu_cancel_hrtimer(box); 1496 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); 1497 box->cpu = new_cpu; 1498 cpumask_set_cpu(new_cpu, &pmu->cpu_mask); 1499 } 1500 } 1501 1502 static void uncore_change_context(struct intel_uncore_type **uncores, 1503 int old_cpu, int new_cpu) 1504 { 1505 for (; *uncores; uncores++) 1506 uncore_change_type_ctx(*uncores, old_cpu, new_cpu); 1507 } 1508 1509 static void uncore_box_unref(struct intel_uncore_type **types, int id) 1510 { 1511 struct intel_uncore_type *type; 1512 struct intel_uncore_pmu *pmu; 1513 struct intel_uncore_box *box; 1514 int i; 1515 1516 for (; *types; types++) { 1517 type = *types; 1518 pmu = type->pmus; 1519 for (i = 0; i < type->num_boxes; i++, pmu++) { 1520 box = pmu->boxes[id]; 1521 if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0) 1522 uncore_box_exit(box); 1523 } 1524 } 1525 } 1526 1527 static int uncore_event_cpu_offline(unsigned int cpu) 1528 { 1529 int die, target; 1530 1531 /* Check if exiting cpu is used for collecting uncore events */ 1532 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1533 goto unref; 1534 /* Find a new cpu to collect uncore events */ 1535 target = cpumask_any_but(topology_die_cpumask(cpu), cpu); 1536 1537 /* Migrate uncore events to the new target */ 1538 if (target < nr_cpu_ids) 1539 cpumask_set_cpu(target, &uncore_cpu_mask); 1540 else 1541 target = -1; 1542 1543 uncore_change_context(uncore_msr_uncores, cpu, target); 1544 uncore_change_context(uncore_mmio_uncores, cpu, target); 1545 uncore_change_context(uncore_pci_uncores, cpu, target); 1546 1547 unref: 1548 /* Clear the references */ 1549 die = topology_logical_die_id(cpu); 1550 uncore_box_unref(uncore_msr_uncores, die); 1551 uncore_box_unref(uncore_mmio_uncores, die); 1552 return 0; 1553 } 1554 1555 static int allocate_boxes(struct intel_uncore_type **types, 1556 unsigned int die, unsigned int cpu) 1557 { 1558 struct intel_uncore_box *box, *tmp; 1559 struct intel_uncore_type *type; 1560 struct intel_uncore_pmu *pmu; 1561 LIST_HEAD(allocated); 1562 int i; 1563 1564 /* Try to allocate all required boxes */ 1565 for (; *types; types++) { 1566 type = *types; 1567 pmu = type->pmus; 1568 for (i = 0; i < type->num_boxes; i++, pmu++) { 1569 if (pmu->boxes[die]) 1570 continue; 1571 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1572 if (!box) 1573 goto cleanup; 1574 box->pmu = pmu; 1575 box->dieid = die; 1576 list_add(&box->active_list, &allocated); 1577 } 1578 } 1579 /* Install them in the pmus */ 1580 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1581 list_del_init(&box->active_list); 1582 box->pmu->boxes[die] = box; 1583 } 1584 return 0; 1585 1586 cleanup: 1587 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1588 list_del_init(&box->active_list); 1589 kfree(box); 1590 } 1591 return -ENOMEM; 1592 } 1593 1594 static int uncore_box_ref(struct intel_uncore_type **types, 1595 int id, unsigned int cpu) 1596 { 1597 struct intel_uncore_type *type; 1598 struct intel_uncore_pmu *pmu; 1599 struct intel_uncore_box *box; 1600 int i, ret; 1601 1602 ret = allocate_boxes(types, id, cpu); 1603 if (ret) 1604 return ret; 1605 1606 for (; *types; types++) { 1607 type = *types; 1608 pmu = type->pmus; 1609 for (i = 0; i < type->num_boxes; i++, pmu++) { 1610 box = pmu->boxes[id]; 1611 if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1) 1612 uncore_box_init(box); 1613 } 1614 } 1615 return 0; 1616 } 1617 1618 static int uncore_event_cpu_online(unsigned int cpu) 1619 { 1620 int die, target, msr_ret, mmio_ret; 1621 1622 die = topology_logical_die_id(cpu); 1623 msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); 1624 mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); 1625 if (msr_ret && mmio_ret) 1626 return -ENOMEM; 1627 1628 /* 1629 * Check if there is an online cpu in the package 1630 * which collects uncore events already. 1631 */ 1632 target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); 1633 if (target < nr_cpu_ids) 1634 return 0; 1635 1636 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1637 1638 if (!msr_ret) 1639 uncore_change_context(uncore_msr_uncores, -1, cpu); 1640 if (!mmio_ret) 1641 uncore_change_context(uncore_mmio_uncores, -1, cpu); 1642 uncore_change_context(uncore_pci_uncores, -1, cpu); 1643 return 0; 1644 } 1645 1646 static int __init type_pmu_register(struct intel_uncore_type *type) 1647 { 1648 int i, ret; 1649 1650 for (i = 0; i < type->num_boxes; i++) { 1651 ret = uncore_pmu_register(&type->pmus[i]); 1652 if (ret) 1653 return ret; 1654 } 1655 return 0; 1656 } 1657 1658 static int __init uncore_msr_pmus_register(void) 1659 { 1660 struct intel_uncore_type **types = uncore_msr_uncores; 1661 int ret; 1662 1663 for (; *types; types++) { 1664 ret = type_pmu_register(*types); 1665 if (ret) 1666 return ret; 1667 } 1668 return 0; 1669 } 1670 1671 static int __init uncore_cpu_init(void) 1672 { 1673 int ret; 1674 1675 ret = uncore_types_init(uncore_msr_uncores); 1676 if (ret) 1677 goto err; 1678 1679 ret = uncore_msr_pmus_register(); 1680 if (ret) 1681 goto err; 1682 return 0; 1683 err: 1684 uncore_types_exit(uncore_msr_uncores); 1685 uncore_msr_uncores = empty_uncore; 1686 return ret; 1687 } 1688 1689 static int __init uncore_mmio_init(void) 1690 { 1691 struct intel_uncore_type **types = uncore_mmio_uncores; 1692 int ret; 1693 1694 ret = uncore_types_init(types); 1695 if (ret) 1696 goto err; 1697 1698 for (; *types; types++) { 1699 ret = type_pmu_register(*types); 1700 if (ret) 1701 goto err; 1702 } 1703 return 0; 1704 err: 1705 uncore_types_exit(uncore_mmio_uncores); 1706 uncore_mmio_uncores = empty_uncore; 1707 return ret; 1708 } 1709 1710 struct intel_uncore_init_fun { 1711 void (*cpu_init)(void); 1712 int (*pci_init)(void); 1713 void (*mmio_init)(void); 1714 /* Discovery table is required */ 1715 bool use_discovery; 1716 /* The units in the discovery table should be ignored. */ 1717 int *uncore_units_ignore; 1718 }; 1719 1720 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { 1721 .cpu_init = nhm_uncore_cpu_init, 1722 }; 1723 1724 static const struct intel_uncore_init_fun snb_uncore_init __initconst = { 1725 .cpu_init = snb_uncore_cpu_init, 1726 .pci_init = snb_uncore_pci_init, 1727 }; 1728 1729 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = { 1730 .cpu_init = snb_uncore_cpu_init, 1731 .pci_init = ivb_uncore_pci_init, 1732 }; 1733 1734 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = { 1735 .cpu_init = snb_uncore_cpu_init, 1736 .pci_init = hsw_uncore_pci_init, 1737 }; 1738 1739 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = { 1740 .cpu_init = snb_uncore_cpu_init, 1741 .pci_init = bdw_uncore_pci_init, 1742 }; 1743 1744 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = { 1745 .cpu_init = snbep_uncore_cpu_init, 1746 .pci_init = snbep_uncore_pci_init, 1747 }; 1748 1749 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = { 1750 .cpu_init = nhmex_uncore_cpu_init, 1751 }; 1752 1753 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = { 1754 .cpu_init = ivbep_uncore_cpu_init, 1755 .pci_init = ivbep_uncore_pci_init, 1756 }; 1757 1758 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = { 1759 .cpu_init = hswep_uncore_cpu_init, 1760 .pci_init = hswep_uncore_pci_init, 1761 }; 1762 1763 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = { 1764 .cpu_init = bdx_uncore_cpu_init, 1765 .pci_init = bdx_uncore_pci_init, 1766 }; 1767 1768 static const struct intel_uncore_init_fun knl_uncore_init __initconst = { 1769 .cpu_init = knl_uncore_cpu_init, 1770 .pci_init = knl_uncore_pci_init, 1771 }; 1772 1773 static const struct intel_uncore_init_fun skl_uncore_init __initconst = { 1774 .cpu_init = skl_uncore_cpu_init, 1775 .pci_init = skl_uncore_pci_init, 1776 }; 1777 1778 static const struct intel_uncore_init_fun skx_uncore_init __initconst = { 1779 .cpu_init = skx_uncore_cpu_init, 1780 .pci_init = skx_uncore_pci_init, 1781 }; 1782 1783 static const struct intel_uncore_init_fun icl_uncore_init __initconst = { 1784 .cpu_init = icl_uncore_cpu_init, 1785 .pci_init = skl_uncore_pci_init, 1786 }; 1787 1788 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { 1789 .cpu_init = tgl_uncore_cpu_init, 1790 .mmio_init = tgl_uncore_mmio_init, 1791 }; 1792 1793 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { 1794 .cpu_init = tgl_uncore_cpu_init, 1795 .mmio_init = tgl_l_uncore_mmio_init, 1796 }; 1797 1798 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { 1799 .cpu_init = tgl_uncore_cpu_init, 1800 .pci_init = skl_uncore_pci_init, 1801 }; 1802 1803 static const struct intel_uncore_init_fun adl_uncore_init __initconst = { 1804 .cpu_init = adl_uncore_cpu_init, 1805 .mmio_init = adl_uncore_mmio_init, 1806 }; 1807 1808 static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { 1809 .cpu_init = mtl_uncore_cpu_init, 1810 .mmio_init = adl_uncore_mmio_init, 1811 }; 1812 1813 static const struct intel_uncore_init_fun lnl_uncore_init __initconst = { 1814 .cpu_init = lnl_uncore_cpu_init, 1815 .mmio_init = lnl_uncore_mmio_init, 1816 }; 1817 1818 static const struct intel_uncore_init_fun icx_uncore_init __initconst = { 1819 .cpu_init = icx_uncore_cpu_init, 1820 .pci_init = icx_uncore_pci_init, 1821 .mmio_init = icx_uncore_mmio_init, 1822 }; 1823 1824 static const struct intel_uncore_init_fun snr_uncore_init __initconst = { 1825 .cpu_init = snr_uncore_cpu_init, 1826 .pci_init = snr_uncore_pci_init, 1827 .mmio_init = snr_uncore_mmio_init, 1828 }; 1829 1830 static const struct intel_uncore_init_fun spr_uncore_init __initconst = { 1831 .cpu_init = spr_uncore_cpu_init, 1832 .pci_init = spr_uncore_pci_init, 1833 .mmio_init = spr_uncore_mmio_init, 1834 .use_discovery = true, 1835 .uncore_units_ignore = spr_uncore_units_ignore, 1836 }; 1837 1838 static const struct intel_uncore_init_fun gnr_uncore_init __initconst = { 1839 .cpu_init = gnr_uncore_cpu_init, 1840 .pci_init = gnr_uncore_pci_init, 1841 .mmio_init = gnr_uncore_mmio_init, 1842 .use_discovery = true, 1843 .uncore_units_ignore = gnr_uncore_units_ignore, 1844 }; 1845 1846 static const struct intel_uncore_init_fun generic_uncore_init __initconst = { 1847 .cpu_init = intel_uncore_generic_uncore_cpu_init, 1848 .pci_init = intel_uncore_generic_uncore_pci_init, 1849 .mmio_init = intel_uncore_generic_uncore_mmio_init, 1850 }; 1851 1852 static const struct x86_cpu_id intel_uncore_match[] __initconst = { 1853 X86_MATCH_VFM(INTEL_NEHALEM_EP, &nhm_uncore_init), 1854 X86_MATCH_VFM(INTEL_NEHALEM, &nhm_uncore_init), 1855 X86_MATCH_VFM(INTEL_WESTMERE, &nhm_uncore_init), 1856 X86_MATCH_VFM(INTEL_WESTMERE_EP, &nhm_uncore_init), 1857 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &snb_uncore_init), 1858 X86_MATCH_VFM(INTEL_IVYBRIDGE, &ivb_uncore_init), 1859 X86_MATCH_VFM(INTEL_HASWELL, &hsw_uncore_init), 1860 X86_MATCH_VFM(INTEL_HASWELL_L, &hsw_uncore_init), 1861 X86_MATCH_VFM(INTEL_HASWELL_G, &hsw_uncore_init), 1862 X86_MATCH_VFM(INTEL_BROADWELL, &bdw_uncore_init), 1863 X86_MATCH_VFM(INTEL_BROADWELL_G, &bdw_uncore_init), 1864 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &snbep_uncore_init), 1865 X86_MATCH_VFM(INTEL_NEHALEM_EX, &nhmex_uncore_init), 1866 X86_MATCH_VFM(INTEL_WESTMERE_EX, &nhmex_uncore_init), 1867 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ivbep_uncore_init), 1868 X86_MATCH_VFM(INTEL_HASWELL_X, &hswep_uncore_init), 1869 X86_MATCH_VFM(INTEL_BROADWELL_X, &bdx_uncore_init), 1870 X86_MATCH_VFM(INTEL_BROADWELL_D, &bdx_uncore_init), 1871 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &knl_uncore_init), 1872 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &knl_uncore_init), 1873 X86_MATCH_VFM(INTEL_SKYLAKE, &skl_uncore_init), 1874 X86_MATCH_VFM(INTEL_SKYLAKE_L, &skl_uncore_init), 1875 X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_uncore_init), 1876 X86_MATCH_VFM(INTEL_KABYLAKE_L, &skl_uncore_init), 1877 X86_MATCH_VFM(INTEL_KABYLAKE, &skl_uncore_init), 1878 X86_MATCH_VFM(INTEL_COMETLAKE_L, &skl_uncore_init), 1879 X86_MATCH_VFM(INTEL_COMETLAKE, &skl_uncore_init), 1880 X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_uncore_init), 1881 X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &icl_uncore_init), 1882 X86_MATCH_VFM(INTEL_ICELAKE, &icl_uncore_init), 1883 X86_MATCH_VFM(INTEL_ICELAKE_D, &icx_uncore_init), 1884 X86_MATCH_VFM(INTEL_ICELAKE_X, &icx_uncore_init), 1885 X86_MATCH_VFM(INTEL_TIGERLAKE_L, &tgl_l_uncore_init), 1886 X86_MATCH_VFM(INTEL_TIGERLAKE, &tgl_uncore_init), 1887 X86_MATCH_VFM(INTEL_ROCKETLAKE, &rkl_uncore_init), 1888 X86_MATCH_VFM(INTEL_ALDERLAKE, &adl_uncore_init), 1889 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &adl_uncore_init), 1890 X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_uncore_init), 1891 X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &adl_uncore_init), 1892 X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init), 1893 X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init), 1894 X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init), 1895 X86_MATCH_VFM(INTEL_ARROWLAKE, &mtl_uncore_init), 1896 X86_MATCH_VFM(INTEL_ARROWLAKE_U, &mtl_uncore_init), 1897 X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), 1898 X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), 1899 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), 1900 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), 1901 X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), 1902 X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_uncore_init), 1903 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &snr_uncore_init), 1904 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), 1905 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), 1906 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), 1907 X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_uncore_init), 1908 {}, 1909 }; 1910 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); 1911 1912 static int __init intel_uncore_init(void) 1913 { 1914 const struct x86_cpu_id *id; 1915 struct intel_uncore_init_fun *uncore_init; 1916 int pret = 0, cret = 0, mret = 0, ret; 1917 1918 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1919 return -ENODEV; 1920 1921 __uncore_max_dies = 1922 topology_max_packages() * topology_max_dies_per_package(); 1923 1924 id = x86_match_cpu(intel_uncore_match); 1925 if (!id) { 1926 if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL)) 1927 uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; 1928 else 1929 return -ENODEV; 1930 } else { 1931 uncore_init = (struct intel_uncore_init_fun *)id->driver_data; 1932 if (uncore_no_discover && uncore_init->use_discovery) 1933 return -ENODEV; 1934 if (uncore_init->use_discovery && 1935 !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore)) 1936 return -ENODEV; 1937 } 1938 1939 if (uncore_init->pci_init) { 1940 pret = uncore_init->pci_init(); 1941 if (!pret) 1942 pret = uncore_pci_init(); 1943 } 1944 1945 if (uncore_init->cpu_init) { 1946 uncore_init->cpu_init(); 1947 cret = uncore_cpu_init(); 1948 } 1949 1950 if (uncore_init->mmio_init) { 1951 uncore_init->mmio_init(); 1952 mret = uncore_mmio_init(); 1953 } 1954 1955 if (cret && pret && mret) { 1956 ret = -ENODEV; 1957 goto free_discovery; 1958 } 1959 1960 /* Install hotplug callbacks to setup the targets for each package */ 1961 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, 1962 "perf/x86/intel/uncore:online", 1963 uncore_event_cpu_online, 1964 uncore_event_cpu_offline); 1965 if (ret) 1966 goto err; 1967 return 0; 1968 1969 err: 1970 uncore_types_exit(uncore_msr_uncores); 1971 uncore_types_exit(uncore_mmio_uncores); 1972 uncore_pci_exit(); 1973 free_discovery: 1974 intel_uncore_clear_discovery_tables(); 1975 return ret; 1976 } 1977 module_init(intel_uncore_init); 1978 1979 static void __exit intel_uncore_exit(void) 1980 { 1981 cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); 1982 uncore_types_exit(uncore_msr_uncores); 1983 uncore_types_exit(uncore_mmio_uncores); 1984 uncore_pci_exit(); 1985 intel_uncore_clear_discovery_tables(); 1986 } 1987 module_exit(intel_uncore_exit); 1988