1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/module.h> 3 4 #include <asm/cpu_device_id.h> 5 #include <asm/intel-family.h> 6 #include "uncore.h" 7 #include "uncore_discovery.h" 8 9 static bool uncore_no_discover; 10 module_param(uncore_no_discover, bool, 0); 11 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism " 12 "(default: enable the discovery mechanism)."); 13 struct intel_uncore_type *empty_uncore[] = { NULL, }; 14 struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 15 struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 16 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; 17 18 static bool pcidrv_registered; 19 struct pci_driver *uncore_pci_driver; 20 /* The PCI driver for the device which the uncore doesn't own. */ 21 struct pci_driver *uncore_pci_sub_driver; 22 /* pci bus to socket mapping */ 23 DEFINE_RAW_SPINLOCK(pci2phy_map_lock); 24 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); 25 struct pci_extra_dev *uncore_extra_pci_dev; 26 int __uncore_max_dies; 27 28 /* mask of cpus that collect uncore events */ 29 static cpumask_t uncore_cpu_mask; 30 31 /* constraint for the fixed counter */ 32 static struct event_constraint uncore_constraint_fixed = 33 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); 34 struct event_constraint uncore_constraint_empty = 35 EVENT_CONSTRAINT(0, 0, 0); 36 37 MODULE_DESCRIPTION("Support for Intel uncore performance events"); 38 MODULE_LICENSE("GPL"); 39 40 int uncore_pcibus_to_dieid(struct pci_bus *bus) 41 { 42 struct pci2phy_map *map; 43 int die_id = -1; 44 45 raw_spin_lock(&pci2phy_map_lock); 46 list_for_each_entry(map, &pci2phy_map_head, list) { 47 if (map->segment == pci_domain_nr(bus)) { 48 die_id = map->pbus_to_dieid[bus->number]; 49 break; 50 } 51 } 52 raw_spin_unlock(&pci2phy_map_lock); 53 54 return die_id; 55 } 56 57 int uncore_die_to_segment(int die) 58 { 59 struct pci_bus *bus = NULL; 60 61 /* Find first pci bus which attributes to specified die. */ 62 while ((bus = pci_find_next_bus(bus)) && 63 (die != uncore_pcibus_to_dieid(bus))) 64 ; 65 66 return bus ? pci_domain_nr(bus) : -EINVAL; 67 } 68 69 int uncore_device_to_die(struct pci_dev *dev) 70 { 71 int node = pcibus_to_node(dev->bus); 72 int cpu; 73 74 for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) { 75 struct cpuinfo_x86 *c = &cpu_data(cpu); 76 77 if (c->initialized && cpu_to_node(cpu) == node) 78 return c->topo.logical_die_id; 79 } 80 81 return -1; 82 } 83 84 static void uncore_free_pcibus_map(void) 85 { 86 struct pci2phy_map *map, *tmp; 87 88 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { 89 list_del(&map->list); 90 kfree(map); 91 } 92 } 93 94 struct pci2phy_map *__find_pci2phy_map(int segment) 95 { 96 struct pci2phy_map *map, *alloc = NULL; 97 int i; 98 99 lockdep_assert_held(&pci2phy_map_lock); 100 101 lookup: 102 list_for_each_entry(map, &pci2phy_map_head, list) { 103 if (map->segment == segment) 104 goto end; 105 } 106 107 if (!alloc) { 108 raw_spin_unlock(&pci2phy_map_lock); 109 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); 110 raw_spin_lock(&pci2phy_map_lock); 111 112 if (!alloc) 113 return NULL; 114 115 goto lookup; 116 } 117 118 map = alloc; 119 alloc = NULL; 120 map->segment = segment; 121 for (i = 0; i < 256; i++) 122 map->pbus_to_dieid[i] = -1; 123 list_add_tail(&map->list, &pci2phy_map_head); 124 125 end: 126 kfree(alloc); 127 return map; 128 } 129 130 ssize_t uncore_event_show(struct device *dev, 131 struct device_attribute *attr, char *buf) 132 { 133 struct uncore_event_desc *event = 134 container_of(attr, struct uncore_event_desc, attr); 135 return sprintf(buf, "%s", event->config); 136 } 137 138 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 139 { 140 unsigned int dieid = topology_logical_die_id(cpu); 141 142 /* 143 * The unsigned check also catches the '-1' return value for non 144 * existent mappings in the topology map. 145 */ 146 return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL; 147 } 148 149 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 150 { 151 u64 count; 152 153 rdmsrl(event->hw.event_base, count); 154 155 return count; 156 } 157 158 void uncore_mmio_exit_box(struct intel_uncore_box *box) 159 { 160 if (box->io_addr) 161 iounmap(box->io_addr); 162 } 163 164 u64 uncore_mmio_read_counter(struct intel_uncore_box *box, 165 struct perf_event *event) 166 { 167 if (!box->io_addr) 168 return 0; 169 170 if (!uncore_mmio_is_valid_offset(box, event->hw.event_base)) 171 return 0; 172 173 return readq(box->io_addr + event->hw.event_base); 174 } 175 176 /* 177 * generic get constraint function for shared match/mask registers. 178 */ 179 struct event_constraint * 180 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) 181 { 182 struct intel_uncore_extra_reg *er; 183 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 184 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; 185 unsigned long flags; 186 bool ok = false; 187 188 /* 189 * reg->alloc can be set due to existing state, so for fake box we 190 * need to ignore this, otherwise we might fail to allocate proper 191 * fake state for this extra reg constraint. 192 */ 193 if (reg1->idx == EXTRA_REG_NONE || 194 (!uncore_box_is_fake(box) && reg1->alloc)) 195 return NULL; 196 197 er = &box->shared_regs[reg1->idx]; 198 raw_spin_lock_irqsave(&er->lock, flags); 199 if (!atomic_read(&er->ref) || 200 (er->config1 == reg1->config && er->config2 == reg2->config)) { 201 atomic_inc(&er->ref); 202 er->config1 = reg1->config; 203 er->config2 = reg2->config; 204 ok = true; 205 } 206 raw_spin_unlock_irqrestore(&er->lock, flags); 207 208 if (ok) { 209 if (!uncore_box_is_fake(box)) 210 reg1->alloc = 1; 211 return NULL; 212 } 213 214 return &uncore_constraint_empty; 215 } 216 217 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) 218 { 219 struct intel_uncore_extra_reg *er; 220 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 221 222 /* 223 * Only put constraint if extra reg was actually allocated. Also 224 * takes care of event which do not use an extra shared reg. 225 * 226 * Also, if this is a fake box we shouldn't touch any event state 227 * (reg->alloc) and we don't care about leaving inconsistent box 228 * state either since it will be thrown out. 229 */ 230 if (uncore_box_is_fake(box) || !reg1->alloc) 231 return; 232 233 er = &box->shared_regs[reg1->idx]; 234 atomic_dec(&er->ref); 235 reg1->alloc = 0; 236 } 237 238 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) 239 { 240 struct intel_uncore_extra_reg *er; 241 unsigned long flags; 242 u64 config; 243 244 er = &box->shared_regs[idx]; 245 246 raw_spin_lock_irqsave(&er->lock, flags); 247 config = er->config; 248 raw_spin_unlock_irqrestore(&er->lock, flags); 249 250 return config; 251 } 252 253 static void uncore_assign_hw_event(struct intel_uncore_box *box, 254 struct perf_event *event, int idx) 255 { 256 struct hw_perf_event *hwc = &event->hw; 257 258 hwc->idx = idx; 259 hwc->last_tag = ++box->tags[idx]; 260 261 if (uncore_pmc_fixed(hwc->idx)) { 262 hwc->event_base = uncore_fixed_ctr(box); 263 hwc->config_base = uncore_fixed_ctl(box); 264 return; 265 } 266 267 if (intel_generic_uncore_assign_hw_event(event, box)) 268 return; 269 270 hwc->config_base = uncore_event_ctl(box, hwc->idx); 271 hwc->event_base = uncore_perf_ctr(box, hwc->idx); 272 } 273 274 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) 275 { 276 u64 prev_count, new_count, delta; 277 int shift; 278 279 if (uncore_pmc_freerunning(event->hw.idx)) 280 shift = 64 - uncore_freerunning_bits(box, event); 281 else if (uncore_pmc_fixed(event->hw.idx)) 282 shift = 64 - uncore_fixed_ctr_bits(box); 283 else 284 shift = 64 - uncore_perf_ctr_bits(box); 285 286 /* the hrtimer might modify the previous event value */ 287 again: 288 prev_count = local64_read(&event->hw.prev_count); 289 new_count = uncore_read_counter(box, event); 290 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) 291 goto again; 292 293 delta = (new_count << shift) - (prev_count << shift); 294 delta >>= shift; 295 296 local64_add(delta, &event->count); 297 } 298 299 /* 300 * The overflow interrupt is unavailable for SandyBridge-EP, is broken 301 * for SandyBridge. So we use hrtimer to periodically poll the counter 302 * to avoid overflow. 303 */ 304 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 305 { 306 struct intel_uncore_box *box; 307 struct perf_event *event; 308 unsigned long flags; 309 int bit; 310 311 box = container_of(hrtimer, struct intel_uncore_box, hrtimer); 312 if (!box->n_active || box->cpu != smp_processor_id()) 313 return HRTIMER_NORESTART; 314 /* 315 * disable local interrupt to prevent uncore_pmu_event_start/stop 316 * to interrupt the update process 317 */ 318 local_irq_save(flags); 319 320 /* 321 * handle boxes with an active event list as opposed to active 322 * counters 323 */ 324 list_for_each_entry(event, &box->active_list, active_entry) { 325 uncore_perf_event_update(box, event); 326 } 327 328 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 329 uncore_perf_event_update(box, box->events[bit]); 330 331 local_irq_restore(flags); 332 333 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); 334 return HRTIMER_RESTART; 335 } 336 337 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 338 { 339 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 340 HRTIMER_MODE_REL_PINNED); 341 } 342 343 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 344 { 345 hrtimer_cancel(&box->hrtimer); 346 } 347 348 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) 349 { 350 hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 351 } 352 353 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, 354 int node) 355 { 356 int i, size, numshared = type->num_shared_regs ; 357 struct intel_uncore_box *box; 358 359 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); 360 361 box = kzalloc_node(size, GFP_KERNEL, node); 362 if (!box) 363 return NULL; 364 365 for (i = 0; i < numshared; i++) 366 raw_spin_lock_init(&box->shared_regs[i].lock); 367 368 uncore_pmu_init_hrtimer(box); 369 box->cpu = -1; 370 box->dieid = -1; 371 372 /* set default hrtimer timeout */ 373 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 374 375 INIT_LIST_HEAD(&box->active_list); 376 377 return box; 378 } 379 380 /* 381 * Using uncore_pmu_event_init pmu event_init callback 382 * as a detection point for uncore events. 383 */ 384 static int uncore_pmu_event_init(struct perf_event *event); 385 386 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) 387 { 388 return &box->pmu->pmu == event->pmu; 389 } 390 391 static int 392 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, 393 bool dogrp) 394 { 395 struct perf_event *event; 396 int n, max_count; 397 398 max_count = box->pmu->type->num_counters; 399 if (box->pmu->type->fixed_ctl) 400 max_count++; 401 402 if (box->n_events >= max_count) 403 return -EINVAL; 404 405 n = box->n_events; 406 407 if (is_box_event(box, leader)) { 408 box->event_list[n] = leader; 409 n++; 410 } 411 412 if (!dogrp) 413 return n; 414 415 for_each_sibling_event(event, leader) { 416 if (!is_box_event(box, event) || 417 event->state <= PERF_EVENT_STATE_OFF) 418 continue; 419 420 if (n >= max_count) 421 return -EINVAL; 422 423 box->event_list[n] = event; 424 n++; 425 } 426 return n; 427 } 428 429 static struct event_constraint * 430 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 431 { 432 struct intel_uncore_type *type = box->pmu->type; 433 struct event_constraint *c; 434 435 if (type->ops->get_constraint) { 436 c = type->ops->get_constraint(box, event); 437 if (c) 438 return c; 439 } 440 441 if (event->attr.config == UNCORE_FIXED_EVENT) 442 return &uncore_constraint_fixed; 443 444 if (type->constraints) { 445 for_each_event_constraint(c, type->constraints) { 446 if ((event->hw.config & c->cmask) == c->code) 447 return c; 448 } 449 } 450 451 return &type->unconstrainted; 452 } 453 454 static void uncore_put_event_constraint(struct intel_uncore_box *box, 455 struct perf_event *event) 456 { 457 if (box->pmu->type->ops->put_constraint) 458 box->pmu->type->ops->put_constraint(box, event); 459 } 460 461 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 462 { 463 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 464 struct event_constraint *c; 465 int i, wmin, wmax, ret = 0; 466 struct hw_perf_event *hwc; 467 468 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 469 470 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 471 c = uncore_get_event_constraint(box, box->event_list[i]); 472 box->event_constraint[i] = c; 473 wmin = min(wmin, c->weight); 474 wmax = max(wmax, c->weight); 475 } 476 477 /* fastpath, try to reuse previous register */ 478 for (i = 0; i < n; i++) { 479 hwc = &box->event_list[i]->hw; 480 c = box->event_constraint[i]; 481 482 /* never assigned */ 483 if (hwc->idx == -1) 484 break; 485 486 /* constraint still honored */ 487 if (!test_bit(hwc->idx, c->idxmsk)) 488 break; 489 490 /* not already used */ 491 if (test_bit(hwc->idx, used_mask)) 492 break; 493 494 __set_bit(hwc->idx, used_mask); 495 if (assign) 496 assign[i] = hwc->idx; 497 } 498 /* slow path */ 499 if (i != n) 500 ret = perf_assign_events(box->event_constraint, n, 501 wmin, wmax, n, assign); 502 503 if (!assign || ret) { 504 for (i = 0; i < n; i++) 505 uncore_put_event_constraint(box, box->event_list[i]); 506 } 507 return ret ? -EINVAL : 0; 508 } 509 510 void uncore_pmu_event_start(struct perf_event *event, int flags) 511 { 512 struct intel_uncore_box *box = uncore_event_to_box(event); 513 int idx = event->hw.idx; 514 515 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) 516 return; 517 518 /* 519 * Free running counter is read-only and always active. 520 * Use the current counter value as start point. 521 * There is no overflow interrupt for free running counter. 522 * Use hrtimer to periodically poll the counter to avoid overflow. 523 */ 524 if (uncore_pmc_freerunning(event->hw.idx)) { 525 list_add_tail(&event->active_entry, &box->active_list); 526 local64_set(&event->hw.prev_count, 527 uncore_read_counter(box, event)); 528 if (box->n_active++ == 0) 529 uncore_pmu_start_hrtimer(box); 530 return; 531 } 532 533 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 534 return; 535 536 event->hw.state = 0; 537 box->events[idx] = event; 538 box->n_active++; 539 __set_bit(idx, box->active_mask); 540 541 local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); 542 uncore_enable_event(box, event); 543 544 if (box->n_active == 1) 545 uncore_pmu_start_hrtimer(box); 546 } 547 548 void uncore_pmu_event_stop(struct perf_event *event, int flags) 549 { 550 struct intel_uncore_box *box = uncore_event_to_box(event); 551 struct hw_perf_event *hwc = &event->hw; 552 553 /* Cannot disable free running counter which is read-only */ 554 if (uncore_pmc_freerunning(hwc->idx)) { 555 list_del(&event->active_entry); 556 if (--box->n_active == 0) 557 uncore_pmu_cancel_hrtimer(box); 558 uncore_perf_event_update(box, event); 559 return; 560 } 561 562 if (__test_and_clear_bit(hwc->idx, box->active_mask)) { 563 uncore_disable_event(box, event); 564 box->n_active--; 565 box->events[hwc->idx] = NULL; 566 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 567 hwc->state |= PERF_HES_STOPPED; 568 569 if (box->n_active == 0) 570 uncore_pmu_cancel_hrtimer(box); 571 } 572 573 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 574 /* 575 * Drain the remaining delta count out of a event 576 * that we are disabling: 577 */ 578 uncore_perf_event_update(box, event); 579 hwc->state |= PERF_HES_UPTODATE; 580 } 581 } 582 583 int uncore_pmu_event_add(struct perf_event *event, int flags) 584 { 585 struct intel_uncore_box *box = uncore_event_to_box(event); 586 struct hw_perf_event *hwc = &event->hw; 587 int assign[UNCORE_PMC_IDX_MAX]; 588 int i, n, ret; 589 590 if (!box) 591 return -ENODEV; 592 593 /* 594 * The free funning counter is assigned in event_init(). 595 * The free running counter event and free running counter 596 * are 1:1 mapped. It doesn't need to be tracked in event_list. 597 */ 598 if (uncore_pmc_freerunning(hwc->idx)) { 599 if (flags & PERF_EF_START) 600 uncore_pmu_event_start(event, 0); 601 return 0; 602 } 603 604 ret = n = uncore_collect_events(box, event, false); 605 if (ret < 0) 606 return ret; 607 608 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 609 if (!(flags & PERF_EF_START)) 610 hwc->state |= PERF_HES_ARCH; 611 612 ret = uncore_assign_events(box, assign, n); 613 if (ret) 614 return ret; 615 616 /* save events moving to new counters */ 617 for (i = 0; i < box->n_events; i++) { 618 event = box->event_list[i]; 619 hwc = &event->hw; 620 621 if (hwc->idx == assign[i] && 622 hwc->last_tag == box->tags[assign[i]]) 623 continue; 624 /* 625 * Ensure we don't accidentally enable a stopped 626 * counter simply because we rescheduled. 627 */ 628 if (hwc->state & PERF_HES_STOPPED) 629 hwc->state |= PERF_HES_ARCH; 630 631 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 632 } 633 634 /* reprogram moved events into new counters */ 635 for (i = 0; i < n; i++) { 636 event = box->event_list[i]; 637 hwc = &event->hw; 638 639 if (hwc->idx != assign[i] || 640 hwc->last_tag != box->tags[assign[i]]) 641 uncore_assign_hw_event(box, event, assign[i]); 642 else if (i < box->n_events) 643 continue; 644 645 if (hwc->state & PERF_HES_ARCH) 646 continue; 647 648 uncore_pmu_event_start(event, 0); 649 } 650 box->n_events = n; 651 652 return 0; 653 } 654 655 void uncore_pmu_event_del(struct perf_event *event, int flags) 656 { 657 struct intel_uncore_box *box = uncore_event_to_box(event); 658 int i; 659 660 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 661 662 /* 663 * The event for free running counter is not tracked by event_list. 664 * It doesn't need to force event->hw.idx = -1 to reassign the counter. 665 * Because the event and the free running counter are 1:1 mapped. 666 */ 667 if (uncore_pmc_freerunning(event->hw.idx)) 668 return; 669 670 for (i = 0; i < box->n_events; i++) { 671 if (event == box->event_list[i]) { 672 uncore_put_event_constraint(box, event); 673 674 for (++i; i < box->n_events; i++) 675 box->event_list[i - 1] = box->event_list[i]; 676 677 --box->n_events; 678 break; 679 } 680 } 681 682 event->hw.idx = -1; 683 event->hw.last_tag = ~0ULL; 684 } 685 686 void uncore_pmu_event_read(struct perf_event *event) 687 { 688 struct intel_uncore_box *box = uncore_event_to_box(event); 689 uncore_perf_event_update(box, event); 690 } 691 692 /* 693 * validation ensures the group can be loaded onto the 694 * PMU if it was the only group available. 695 */ 696 static int uncore_validate_group(struct intel_uncore_pmu *pmu, 697 struct perf_event *event) 698 { 699 struct perf_event *leader = event->group_leader; 700 struct intel_uncore_box *fake_box; 701 int ret = -EINVAL, n; 702 703 /* The free running counter is always active. */ 704 if (uncore_pmc_freerunning(event->hw.idx)) 705 return 0; 706 707 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); 708 if (!fake_box) 709 return -ENOMEM; 710 711 fake_box->pmu = pmu; 712 /* 713 * the event is not yet connected with its 714 * siblings therefore we must first collect 715 * existing siblings, then add the new event 716 * before we can simulate the scheduling 717 */ 718 n = uncore_collect_events(fake_box, leader, true); 719 if (n < 0) 720 goto out; 721 722 fake_box->n_events = n; 723 n = uncore_collect_events(fake_box, event, false); 724 if (n < 0) 725 goto out; 726 727 fake_box->n_events = n; 728 729 ret = uncore_assign_events(fake_box, NULL, n); 730 out: 731 kfree(fake_box); 732 return ret; 733 } 734 735 static int uncore_pmu_event_init(struct perf_event *event) 736 { 737 struct intel_uncore_pmu *pmu; 738 struct intel_uncore_box *box; 739 struct hw_perf_event *hwc = &event->hw; 740 int ret; 741 742 if (event->attr.type != event->pmu->type) 743 return -ENOENT; 744 745 pmu = uncore_event_to_pmu(event); 746 /* no device found for this pmu */ 747 if (!pmu->registered) 748 return -ENOENT; 749 750 /* Sampling not supported yet */ 751 if (hwc->sample_period) 752 return -EINVAL; 753 754 /* 755 * Place all uncore events for a particular physical package 756 * onto a single cpu 757 */ 758 if (event->cpu < 0) 759 return -EINVAL; 760 box = uncore_pmu_to_box(pmu, event->cpu); 761 if (!box || box->cpu < 0) 762 return -EINVAL; 763 event->cpu = box->cpu; 764 event->pmu_private = box; 765 766 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; 767 768 event->hw.idx = -1; 769 event->hw.last_tag = ~0ULL; 770 event->hw.extra_reg.idx = EXTRA_REG_NONE; 771 event->hw.branch_reg.idx = EXTRA_REG_NONE; 772 773 if (event->attr.config == UNCORE_FIXED_EVENT) { 774 /* no fixed counter */ 775 if (!pmu->type->fixed_ctl) 776 return -EINVAL; 777 /* 778 * if there is only one fixed counter, only the first pmu 779 * can access the fixed counter 780 */ 781 if (pmu->type->single_fixed && pmu->pmu_idx > 0) 782 return -EINVAL; 783 784 /* fixed counters have event field hardcoded to zero */ 785 hwc->config = 0ULL; 786 } else if (is_freerunning_event(event)) { 787 hwc->config = event->attr.config; 788 if (!check_valid_freerunning_event(box, event)) 789 return -EINVAL; 790 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING; 791 /* 792 * The free running counter event and free running counter 793 * are always 1:1 mapped. 794 * The free running counter is always active. 795 * Assign the free running counter here. 796 */ 797 event->hw.event_base = uncore_freerunning_counter(box, event); 798 } else { 799 hwc->config = event->attr.config & 800 (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); 801 if (pmu->type->ops->hw_config) { 802 ret = pmu->type->ops->hw_config(box, event); 803 if (ret) 804 return ret; 805 } 806 } 807 808 if (event->group_leader != event) 809 ret = uncore_validate_group(pmu, event); 810 else 811 ret = 0; 812 813 return ret; 814 } 815 816 static void uncore_pmu_enable(struct pmu *pmu) 817 { 818 struct intel_uncore_pmu *uncore_pmu; 819 struct intel_uncore_box *box; 820 821 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 822 823 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 824 if (!box) 825 return; 826 827 if (uncore_pmu->type->ops->enable_box) 828 uncore_pmu->type->ops->enable_box(box); 829 } 830 831 static void uncore_pmu_disable(struct pmu *pmu) 832 { 833 struct intel_uncore_pmu *uncore_pmu; 834 struct intel_uncore_box *box; 835 836 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 837 838 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 839 if (!box) 840 return; 841 842 if (uncore_pmu->type->ops->disable_box) 843 uncore_pmu->type->ops->disable_box(box); 844 } 845 846 static ssize_t uncore_get_attr_cpumask(struct device *dev, 847 struct device_attribute *attr, char *buf) 848 { 849 struct intel_uncore_pmu *pmu = container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu); 850 851 return cpumap_print_to_pagebuf(true, buf, &pmu->cpu_mask); 852 } 853 854 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); 855 856 static struct attribute *uncore_pmu_attrs[] = { 857 &dev_attr_cpumask.attr, 858 NULL, 859 }; 860 861 static const struct attribute_group uncore_pmu_attr_group = { 862 .attrs = uncore_pmu_attrs, 863 }; 864 865 static inline int uncore_get_box_id(struct intel_uncore_type *type, 866 struct intel_uncore_pmu *pmu) 867 { 868 if (type->boxes) 869 return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx); 870 871 return pmu->pmu_idx; 872 } 873 874 void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) 875 { 876 struct intel_uncore_type *type = pmu->type; 877 878 if (type->num_boxes == 1) 879 sprintf(pmu_name, "uncore_type_%u", type->type_id); 880 else { 881 sprintf(pmu_name, "uncore_type_%u_%d", 882 type->type_id, uncore_get_box_id(type, pmu)); 883 } 884 } 885 886 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) 887 { 888 struct intel_uncore_type *type = pmu->type; 889 890 /* 891 * No uncore block name in discovery table. 892 * Use uncore_type_&typeid_&boxid as name. 893 */ 894 if (!type->name) { 895 uncore_get_alias_name(pmu->name, pmu); 896 return; 897 } 898 899 if (type->num_boxes == 1) { 900 if (strlen(type->name) > 0) 901 sprintf(pmu->name, "uncore_%s", type->name); 902 else 903 sprintf(pmu->name, "uncore"); 904 } else { 905 /* 906 * Use the box ID from the discovery table if applicable. 907 */ 908 sprintf(pmu->name, "uncore_%s_%d", type->name, 909 uncore_get_box_id(type, pmu)); 910 } 911 } 912 913 static int uncore_pmu_register(struct intel_uncore_pmu *pmu) 914 { 915 int ret; 916 917 if (!pmu->type->pmu) { 918 pmu->pmu = (struct pmu) { 919 .attr_groups = pmu->type->attr_groups, 920 .task_ctx_nr = perf_invalid_context, 921 .pmu_enable = uncore_pmu_enable, 922 .pmu_disable = uncore_pmu_disable, 923 .event_init = uncore_pmu_event_init, 924 .add = uncore_pmu_event_add, 925 .del = uncore_pmu_event_del, 926 .start = uncore_pmu_event_start, 927 .stop = uncore_pmu_event_stop, 928 .read = uncore_pmu_event_read, 929 .module = THIS_MODULE, 930 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 931 .attr_update = pmu->type->attr_update, 932 }; 933 } else { 934 pmu->pmu = *pmu->type->pmu; 935 pmu->pmu.attr_groups = pmu->type->attr_groups; 936 pmu->pmu.attr_update = pmu->type->attr_update; 937 } 938 939 uncore_get_pmu_name(pmu); 940 941 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 942 if (!ret) 943 pmu->registered = true; 944 return ret; 945 } 946 947 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) 948 { 949 if (!pmu->registered) 950 return; 951 perf_pmu_unregister(&pmu->pmu); 952 pmu->registered = false; 953 } 954 955 static void uncore_free_boxes(struct intel_uncore_pmu *pmu) 956 { 957 int die; 958 959 for (die = 0; die < uncore_max_dies(); die++) 960 kfree(pmu->boxes[die]); 961 kfree(pmu->boxes); 962 } 963 964 static void uncore_type_exit(struct intel_uncore_type *type) 965 { 966 struct intel_uncore_pmu *pmu = type->pmus; 967 int i; 968 969 if (type->cleanup_mapping) 970 type->cleanup_mapping(type); 971 972 if (type->cleanup_extra_boxes) 973 type->cleanup_extra_boxes(type); 974 975 if (pmu) { 976 for (i = 0; i < type->num_boxes; i++, pmu++) { 977 uncore_pmu_unregister(pmu); 978 uncore_free_boxes(pmu); 979 } 980 kfree(type->pmus); 981 type->pmus = NULL; 982 } 983 984 kfree(type->events_group); 985 type->events_group = NULL; 986 } 987 988 static void uncore_types_exit(struct intel_uncore_type **types) 989 { 990 for (; *types; types++) 991 uncore_type_exit(*types); 992 } 993 994 static int __init uncore_type_init(struct intel_uncore_type *type) 995 { 996 struct intel_uncore_pmu *pmus; 997 size_t size; 998 int i, j; 999 1000 pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL); 1001 if (!pmus) 1002 return -ENOMEM; 1003 1004 size = uncore_max_dies() * sizeof(struct intel_uncore_box *); 1005 1006 for (i = 0; i < type->num_boxes; i++) { 1007 pmus[i].pmu_idx = i; 1008 pmus[i].type = type; 1009 pmus[i].boxes = kzalloc(size, GFP_KERNEL); 1010 if (!pmus[i].boxes) 1011 goto err; 1012 } 1013 1014 type->pmus = pmus; 1015 type->unconstrainted = (struct event_constraint) 1016 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 1017 0, type->num_counters, 0, 0); 1018 1019 if (type->event_descs) { 1020 struct { 1021 struct attribute_group group; 1022 struct attribute *attrs[]; 1023 } *attr_group; 1024 for (i = 0; type->event_descs[i].attr.attr.name; i++); 1025 1026 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1), 1027 GFP_KERNEL); 1028 if (!attr_group) 1029 goto err; 1030 1031 attr_group->group.name = "events"; 1032 attr_group->group.attrs = attr_group->attrs; 1033 1034 for (j = 0; j < i; j++) 1035 attr_group->attrs[j] = &type->event_descs[j].attr.attr; 1036 1037 type->events_group = &attr_group->group; 1038 } 1039 1040 type->pmu_group = &uncore_pmu_attr_group; 1041 1042 if (type->set_mapping) 1043 type->set_mapping(type); 1044 1045 return 0; 1046 1047 err: 1048 for (i = 0; i < type->num_boxes; i++) 1049 kfree(pmus[i].boxes); 1050 kfree(pmus); 1051 1052 return -ENOMEM; 1053 } 1054 1055 static int __init 1056 uncore_types_init(struct intel_uncore_type **types) 1057 { 1058 int ret; 1059 1060 for (; *types; types++) { 1061 ret = uncore_type_init(*types); 1062 if (ret) 1063 return ret; 1064 } 1065 return 0; 1066 } 1067 1068 /* 1069 * Get the die information of a PCI device. 1070 * @pdev: The PCI device. 1071 * @die: The die id which the device maps to. 1072 */ 1073 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die) 1074 { 1075 *die = uncore_pcibus_to_dieid(pdev->bus); 1076 if (*die < 0) 1077 return -EINVAL; 1078 1079 return 0; 1080 } 1081 1082 static struct intel_uncore_pmu * 1083 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) 1084 { 1085 struct intel_uncore_type **types = uncore_pci_uncores; 1086 struct intel_uncore_discovery_unit *unit; 1087 struct intel_uncore_type *type; 1088 struct rb_node *node; 1089 1090 for (; *types; types++) { 1091 type = *types; 1092 1093 for (node = rb_first(type->boxes); node; node = rb_next(node)) { 1094 unit = rb_entry(node, struct intel_uncore_discovery_unit, node); 1095 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(unit->addr) && 1096 pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(unit->addr) && 1097 pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr)) 1098 return &type->pmus[unit->pmu_idx]; 1099 } 1100 } 1101 1102 return NULL; 1103 } 1104 1105 /* 1106 * Find the PMU of a PCI device. 1107 * @pdev: The PCI device. 1108 * @ids: The ID table of the available PCI devices with a PMU. 1109 * If NULL, search the whole uncore_pci_uncores. 1110 */ 1111 static struct intel_uncore_pmu * 1112 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) 1113 { 1114 struct intel_uncore_pmu *pmu = NULL; 1115 struct intel_uncore_type *type; 1116 kernel_ulong_t data; 1117 unsigned int devfn; 1118 1119 if (!ids) 1120 return uncore_pci_find_dev_pmu_from_types(pdev); 1121 1122 while (ids && ids->vendor) { 1123 if ((ids->vendor == pdev->vendor) && 1124 (ids->device == pdev->device)) { 1125 data = ids->driver_data; 1126 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data), 1127 UNCORE_PCI_DEV_FUNC(data)); 1128 if (devfn == pdev->devfn) { 1129 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)]; 1130 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)]; 1131 break; 1132 } 1133 } 1134 ids++; 1135 } 1136 return pmu; 1137 } 1138 1139 /* 1140 * Register the PMU for a PCI device 1141 * @pdev: The PCI device. 1142 * @type: The corresponding PMU type of the device. 1143 * @pmu: The corresponding PMU of the device. 1144 * @die: The die id which the device maps to. 1145 */ 1146 static int uncore_pci_pmu_register(struct pci_dev *pdev, 1147 struct intel_uncore_type *type, 1148 struct intel_uncore_pmu *pmu, 1149 int die) 1150 { 1151 struct intel_uncore_box *box; 1152 int ret; 1153 1154 if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) 1155 return -EINVAL; 1156 1157 box = uncore_alloc_box(type, NUMA_NO_NODE); 1158 if (!box) 1159 return -ENOMEM; 1160 1161 atomic_inc(&box->refcnt); 1162 box->dieid = die; 1163 box->pci_dev = pdev; 1164 box->pmu = pmu; 1165 uncore_box_init(box); 1166 1167 pmu->boxes[die] = box; 1168 if (atomic_inc_return(&pmu->activeboxes) > 1) 1169 return 0; 1170 1171 /* First active box registers the pmu */ 1172 ret = uncore_pmu_register(pmu); 1173 if (ret) { 1174 pmu->boxes[die] = NULL; 1175 uncore_box_exit(box); 1176 kfree(box); 1177 } 1178 return ret; 1179 } 1180 1181 /* 1182 * add a pci uncore device 1183 */ 1184 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 1185 { 1186 struct intel_uncore_type *type; 1187 struct intel_uncore_pmu *pmu = NULL; 1188 int die, ret; 1189 1190 ret = uncore_pci_get_dev_die_info(pdev, &die); 1191 if (ret) 1192 return ret; 1193 1194 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 1195 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 1196 1197 uncore_extra_pci_dev[die].dev[idx] = pdev; 1198 pci_set_drvdata(pdev, NULL); 1199 return 0; 1200 } 1201 1202 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 1203 1204 /* 1205 * Some platforms, e.g. Knights Landing, use a common PCI device ID 1206 * for multiple instances of an uncore PMU device type. We should check 1207 * PCI slot and func to indicate the uncore box. 1208 */ 1209 if (id->driver_data & ~0xffff) { 1210 struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver); 1211 1212 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); 1213 if (pmu == NULL) 1214 return -ENODEV; 1215 } else { 1216 /* 1217 * for performance monitoring unit with multiple boxes, 1218 * each box has a different function id. 1219 */ 1220 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 1221 } 1222 1223 ret = uncore_pci_pmu_register(pdev, type, pmu, die); 1224 1225 pci_set_drvdata(pdev, pmu->boxes[die]); 1226 1227 return ret; 1228 } 1229 1230 /* 1231 * Unregister the PMU of a PCI device 1232 * @pmu: The corresponding PMU is unregistered. 1233 * @die: The die id which the device maps to. 1234 */ 1235 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die) 1236 { 1237 struct intel_uncore_box *box = pmu->boxes[die]; 1238 1239 pmu->boxes[die] = NULL; 1240 if (atomic_dec_return(&pmu->activeboxes) == 0) 1241 uncore_pmu_unregister(pmu); 1242 uncore_box_exit(box); 1243 kfree(box); 1244 } 1245 1246 static void uncore_pci_remove(struct pci_dev *pdev) 1247 { 1248 struct intel_uncore_box *box; 1249 struct intel_uncore_pmu *pmu; 1250 int i, die; 1251 1252 if (uncore_pci_get_dev_die_info(pdev, &die)) 1253 return; 1254 1255 box = pci_get_drvdata(pdev); 1256 if (!box) { 1257 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 1258 if (uncore_extra_pci_dev[die].dev[i] == pdev) { 1259 uncore_extra_pci_dev[die].dev[i] = NULL; 1260 break; 1261 } 1262 } 1263 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); 1264 return; 1265 } 1266 1267 pmu = box->pmu; 1268 1269 pci_set_drvdata(pdev, NULL); 1270 1271 uncore_pci_pmu_unregister(pmu, die); 1272 } 1273 1274 static int uncore_bus_notify(struct notifier_block *nb, 1275 unsigned long action, void *data, 1276 const struct pci_device_id *ids) 1277 { 1278 struct device *dev = data; 1279 struct pci_dev *pdev = to_pci_dev(dev); 1280 struct intel_uncore_pmu *pmu; 1281 int die; 1282 1283 /* Unregister the PMU when the device is going to be deleted. */ 1284 if (action != BUS_NOTIFY_DEL_DEVICE) 1285 return NOTIFY_DONE; 1286 1287 pmu = uncore_pci_find_dev_pmu(pdev, ids); 1288 if (!pmu) 1289 return NOTIFY_DONE; 1290 1291 if (uncore_pci_get_dev_die_info(pdev, &die)) 1292 return NOTIFY_DONE; 1293 1294 uncore_pci_pmu_unregister(pmu, die); 1295 1296 return NOTIFY_OK; 1297 } 1298 1299 static int uncore_pci_sub_bus_notify(struct notifier_block *nb, 1300 unsigned long action, void *data) 1301 { 1302 return uncore_bus_notify(nb, action, data, 1303 uncore_pci_sub_driver->id_table); 1304 } 1305 1306 static struct notifier_block uncore_pci_sub_notifier = { 1307 .notifier_call = uncore_pci_sub_bus_notify, 1308 }; 1309 1310 static void uncore_pci_sub_driver_init(void) 1311 { 1312 const struct pci_device_id *ids = uncore_pci_sub_driver->id_table; 1313 struct intel_uncore_type *type; 1314 struct intel_uncore_pmu *pmu; 1315 struct pci_dev *pci_sub_dev; 1316 bool notify = false; 1317 unsigned int devfn; 1318 int die; 1319 1320 while (ids && ids->vendor) { 1321 pci_sub_dev = NULL; 1322 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)]; 1323 /* 1324 * Search the available device, and register the 1325 * corresponding PMU. 1326 */ 1327 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 1328 ids->device, pci_sub_dev))) { 1329 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), 1330 UNCORE_PCI_DEV_FUNC(ids->driver_data)); 1331 if (devfn != pci_sub_dev->devfn) 1332 continue; 1333 1334 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; 1335 if (!pmu) 1336 continue; 1337 1338 if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) 1339 continue; 1340 1341 if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, 1342 die)) 1343 notify = true; 1344 } 1345 ids++; 1346 } 1347 1348 if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier)) 1349 notify = false; 1350 1351 if (!notify) 1352 uncore_pci_sub_driver = NULL; 1353 } 1354 1355 static int uncore_pci_bus_notify(struct notifier_block *nb, 1356 unsigned long action, void *data) 1357 { 1358 return uncore_bus_notify(nb, action, data, NULL); 1359 } 1360 1361 static struct notifier_block uncore_pci_notifier = { 1362 .notifier_call = uncore_pci_bus_notify, 1363 }; 1364 1365 1366 static void uncore_pci_pmus_register(void) 1367 { 1368 struct intel_uncore_type **types = uncore_pci_uncores; 1369 struct intel_uncore_discovery_unit *unit; 1370 struct intel_uncore_type *type; 1371 struct intel_uncore_pmu *pmu; 1372 struct rb_node *node; 1373 struct pci_dev *pdev; 1374 1375 for (; *types; types++) { 1376 type = *types; 1377 1378 for (node = rb_first(type->boxes); node; node = rb_next(node)) { 1379 unit = rb_entry(node, struct intel_uncore_discovery_unit, node); 1380 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr), 1381 UNCORE_DISCOVERY_PCI_BUS(unit->addr), 1382 UNCORE_DISCOVERY_PCI_DEVFN(unit->addr)); 1383 1384 if (!pdev) 1385 continue; 1386 pmu = &type->pmus[unit->pmu_idx]; 1387 uncore_pci_pmu_register(pdev, type, pmu, unit->die); 1388 } 1389 } 1390 1391 bus_register_notifier(&pci_bus_type, &uncore_pci_notifier); 1392 } 1393 1394 static int __init uncore_pci_init(void) 1395 { 1396 size_t size; 1397 int ret; 1398 1399 size = uncore_max_dies() * sizeof(struct pci_extra_dev); 1400 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); 1401 if (!uncore_extra_pci_dev) { 1402 ret = -ENOMEM; 1403 goto err; 1404 } 1405 1406 ret = uncore_types_init(uncore_pci_uncores); 1407 if (ret) 1408 goto errtype; 1409 1410 if (uncore_pci_driver) { 1411 uncore_pci_driver->probe = uncore_pci_probe; 1412 uncore_pci_driver->remove = uncore_pci_remove; 1413 1414 ret = pci_register_driver(uncore_pci_driver); 1415 if (ret) 1416 goto errtype; 1417 } else 1418 uncore_pci_pmus_register(); 1419 1420 if (uncore_pci_sub_driver) 1421 uncore_pci_sub_driver_init(); 1422 1423 pcidrv_registered = true; 1424 return 0; 1425 1426 errtype: 1427 uncore_types_exit(uncore_pci_uncores); 1428 kfree(uncore_extra_pci_dev); 1429 uncore_extra_pci_dev = NULL; 1430 uncore_free_pcibus_map(); 1431 err: 1432 uncore_pci_uncores = empty_uncore; 1433 return ret; 1434 } 1435 1436 static void uncore_pci_exit(void) 1437 { 1438 if (pcidrv_registered) { 1439 pcidrv_registered = false; 1440 if (uncore_pci_sub_driver) 1441 bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); 1442 if (uncore_pci_driver) 1443 pci_unregister_driver(uncore_pci_driver); 1444 else 1445 bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier); 1446 uncore_types_exit(uncore_pci_uncores); 1447 kfree(uncore_extra_pci_dev); 1448 uncore_free_pcibus_map(); 1449 } 1450 } 1451 1452 static bool uncore_die_has_box(struct intel_uncore_type *type, 1453 int die, unsigned int pmu_idx) 1454 { 1455 if (!type->boxes) 1456 return true; 1457 1458 if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0) 1459 return false; 1460 1461 return true; 1462 } 1463 1464 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, 1465 int new_cpu) 1466 { 1467 struct intel_uncore_pmu *pmu = type->pmus; 1468 struct intel_uncore_box *box; 1469 int i, die; 1470 1471 die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); 1472 for (i = 0; i < type->num_boxes; i++, pmu++) { 1473 box = pmu->boxes[die]; 1474 if (!box) 1475 continue; 1476 1477 if (old_cpu < 0) { 1478 WARN_ON_ONCE(box->cpu != -1); 1479 if (uncore_die_has_box(type, die, pmu->pmu_idx)) { 1480 box->cpu = new_cpu; 1481 cpumask_set_cpu(new_cpu, &pmu->cpu_mask); 1482 } 1483 continue; 1484 } 1485 1486 WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu); 1487 box->cpu = -1; 1488 cpumask_clear_cpu(old_cpu, &pmu->cpu_mask); 1489 if (new_cpu < 0) 1490 continue; 1491 1492 if (!uncore_die_has_box(type, die, pmu->pmu_idx)) 1493 continue; 1494 uncore_pmu_cancel_hrtimer(box); 1495 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); 1496 box->cpu = new_cpu; 1497 cpumask_set_cpu(new_cpu, &pmu->cpu_mask); 1498 } 1499 } 1500 1501 static void uncore_change_context(struct intel_uncore_type **uncores, 1502 int old_cpu, int new_cpu) 1503 { 1504 for (; *uncores; uncores++) 1505 uncore_change_type_ctx(*uncores, old_cpu, new_cpu); 1506 } 1507 1508 static void uncore_box_unref(struct intel_uncore_type **types, int id) 1509 { 1510 struct intel_uncore_type *type; 1511 struct intel_uncore_pmu *pmu; 1512 struct intel_uncore_box *box; 1513 int i; 1514 1515 for (; *types; types++) { 1516 type = *types; 1517 pmu = type->pmus; 1518 for (i = 0; i < type->num_boxes; i++, pmu++) { 1519 box = pmu->boxes[id]; 1520 if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0) 1521 uncore_box_exit(box); 1522 } 1523 } 1524 } 1525 1526 static int uncore_event_cpu_offline(unsigned int cpu) 1527 { 1528 int die, target; 1529 1530 /* Check if exiting cpu is used for collecting uncore events */ 1531 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1532 goto unref; 1533 /* Find a new cpu to collect uncore events */ 1534 target = cpumask_any_but(topology_die_cpumask(cpu), cpu); 1535 1536 /* Migrate uncore events to the new target */ 1537 if (target < nr_cpu_ids) 1538 cpumask_set_cpu(target, &uncore_cpu_mask); 1539 else 1540 target = -1; 1541 1542 uncore_change_context(uncore_msr_uncores, cpu, target); 1543 uncore_change_context(uncore_mmio_uncores, cpu, target); 1544 uncore_change_context(uncore_pci_uncores, cpu, target); 1545 1546 unref: 1547 /* Clear the references */ 1548 die = topology_logical_die_id(cpu); 1549 uncore_box_unref(uncore_msr_uncores, die); 1550 uncore_box_unref(uncore_mmio_uncores, die); 1551 return 0; 1552 } 1553 1554 static int allocate_boxes(struct intel_uncore_type **types, 1555 unsigned int die, unsigned int cpu) 1556 { 1557 struct intel_uncore_box *box, *tmp; 1558 struct intel_uncore_type *type; 1559 struct intel_uncore_pmu *pmu; 1560 LIST_HEAD(allocated); 1561 int i; 1562 1563 /* Try to allocate all required boxes */ 1564 for (; *types; types++) { 1565 type = *types; 1566 pmu = type->pmus; 1567 for (i = 0; i < type->num_boxes; i++, pmu++) { 1568 if (pmu->boxes[die]) 1569 continue; 1570 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1571 if (!box) 1572 goto cleanup; 1573 box->pmu = pmu; 1574 box->dieid = die; 1575 list_add(&box->active_list, &allocated); 1576 } 1577 } 1578 /* Install them in the pmus */ 1579 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1580 list_del_init(&box->active_list); 1581 box->pmu->boxes[die] = box; 1582 } 1583 return 0; 1584 1585 cleanup: 1586 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1587 list_del_init(&box->active_list); 1588 kfree(box); 1589 } 1590 return -ENOMEM; 1591 } 1592 1593 static int uncore_box_ref(struct intel_uncore_type **types, 1594 int id, unsigned int cpu) 1595 { 1596 struct intel_uncore_type *type; 1597 struct intel_uncore_pmu *pmu; 1598 struct intel_uncore_box *box; 1599 int i, ret; 1600 1601 ret = allocate_boxes(types, id, cpu); 1602 if (ret) 1603 return ret; 1604 1605 for (; *types; types++) { 1606 type = *types; 1607 pmu = type->pmus; 1608 for (i = 0; i < type->num_boxes; i++, pmu++) { 1609 box = pmu->boxes[id]; 1610 if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1) 1611 uncore_box_init(box); 1612 } 1613 } 1614 return 0; 1615 } 1616 1617 static int uncore_event_cpu_online(unsigned int cpu) 1618 { 1619 int die, target, msr_ret, mmio_ret; 1620 1621 die = topology_logical_die_id(cpu); 1622 msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); 1623 mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); 1624 if (msr_ret && mmio_ret) 1625 return -ENOMEM; 1626 1627 /* 1628 * Check if there is an online cpu in the package 1629 * which collects uncore events already. 1630 */ 1631 target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); 1632 if (target < nr_cpu_ids) 1633 return 0; 1634 1635 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1636 1637 if (!msr_ret) 1638 uncore_change_context(uncore_msr_uncores, -1, cpu); 1639 if (!mmio_ret) 1640 uncore_change_context(uncore_mmio_uncores, -1, cpu); 1641 uncore_change_context(uncore_pci_uncores, -1, cpu); 1642 return 0; 1643 } 1644 1645 static int __init type_pmu_register(struct intel_uncore_type *type) 1646 { 1647 int i, ret; 1648 1649 for (i = 0; i < type->num_boxes; i++) { 1650 ret = uncore_pmu_register(&type->pmus[i]); 1651 if (ret) 1652 return ret; 1653 } 1654 return 0; 1655 } 1656 1657 static int __init uncore_msr_pmus_register(void) 1658 { 1659 struct intel_uncore_type **types = uncore_msr_uncores; 1660 int ret; 1661 1662 for (; *types; types++) { 1663 ret = type_pmu_register(*types); 1664 if (ret) 1665 return ret; 1666 } 1667 return 0; 1668 } 1669 1670 static int __init uncore_cpu_init(void) 1671 { 1672 int ret; 1673 1674 ret = uncore_types_init(uncore_msr_uncores); 1675 if (ret) 1676 goto err; 1677 1678 ret = uncore_msr_pmus_register(); 1679 if (ret) 1680 goto err; 1681 return 0; 1682 err: 1683 uncore_types_exit(uncore_msr_uncores); 1684 uncore_msr_uncores = empty_uncore; 1685 return ret; 1686 } 1687 1688 static int __init uncore_mmio_init(void) 1689 { 1690 struct intel_uncore_type **types = uncore_mmio_uncores; 1691 int ret; 1692 1693 ret = uncore_types_init(types); 1694 if (ret) 1695 goto err; 1696 1697 for (; *types; types++) { 1698 ret = type_pmu_register(*types); 1699 if (ret) 1700 goto err; 1701 } 1702 return 0; 1703 err: 1704 uncore_types_exit(uncore_mmio_uncores); 1705 uncore_mmio_uncores = empty_uncore; 1706 return ret; 1707 } 1708 1709 struct intel_uncore_init_fun { 1710 void (*cpu_init)(void); 1711 int (*pci_init)(void); 1712 void (*mmio_init)(void); 1713 /* Discovery table is required */ 1714 bool use_discovery; 1715 /* The units in the discovery table should be ignored. */ 1716 int *uncore_units_ignore; 1717 }; 1718 1719 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { 1720 .cpu_init = nhm_uncore_cpu_init, 1721 }; 1722 1723 static const struct intel_uncore_init_fun snb_uncore_init __initconst = { 1724 .cpu_init = snb_uncore_cpu_init, 1725 .pci_init = snb_uncore_pci_init, 1726 }; 1727 1728 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = { 1729 .cpu_init = snb_uncore_cpu_init, 1730 .pci_init = ivb_uncore_pci_init, 1731 }; 1732 1733 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = { 1734 .cpu_init = snb_uncore_cpu_init, 1735 .pci_init = hsw_uncore_pci_init, 1736 }; 1737 1738 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = { 1739 .cpu_init = snb_uncore_cpu_init, 1740 .pci_init = bdw_uncore_pci_init, 1741 }; 1742 1743 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = { 1744 .cpu_init = snbep_uncore_cpu_init, 1745 .pci_init = snbep_uncore_pci_init, 1746 }; 1747 1748 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = { 1749 .cpu_init = nhmex_uncore_cpu_init, 1750 }; 1751 1752 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = { 1753 .cpu_init = ivbep_uncore_cpu_init, 1754 .pci_init = ivbep_uncore_pci_init, 1755 }; 1756 1757 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = { 1758 .cpu_init = hswep_uncore_cpu_init, 1759 .pci_init = hswep_uncore_pci_init, 1760 }; 1761 1762 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = { 1763 .cpu_init = bdx_uncore_cpu_init, 1764 .pci_init = bdx_uncore_pci_init, 1765 }; 1766 1767 static const struct intel_uncore_init_fun knl_uncore_init __initconst = { 1768 .cpu_init = knl_uncore_cpu_init, 1769 .pci_init = knl_uncore_pci_init, 1770 }; 1771 1772 static const struct intel_uncore_init_fun skl_uncore_init __initconst = { 1773 .cpu_init = skl_uncore_cpu_init, 1774 .pci_init = skl_uncore_pci_init, 1775 }; 1776 1777 static const struct intel_uncore_init_fun skx_uncore_init __initconst = { 1778 .cpu_init = skx_uncore_cpu_init, 1779 .pci_init = skx_uncore_pci_init, 1780 }; 1781 1782 static const struct intel_uncore_init_fun icl_uncore_init __initconst = { 1783 .cpu_init = icl_uncore_cpu_init, 1784 .pci_init = skl_uncore_pci_init, 1785 }; 1786 1787 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { 1788 .cpu_init = tgl_uncore_cpu_init, 1789 .mmio_init = tgl_uncore_mmio_init, 1790 }; 1791 1792 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { 1793 .cpu_init = tgl_uncore_cpu_init, 1794 .mmio_init = tgl_l_uncore_mmio_init, 1795 }; 1796 1797 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { 1798 .cpu_init = tgl_uncore_cpu_init, 1799 .pci_init = skl_uncore_pci_init, 1800 }; 1801 1802 static const struct intel_uncore_init_fun adl_uncore_init __initconst = { 1803 .cpu_init = adl_uncore_cpu_init, 1804 .mmio_init = adl_uncore_mmio_init, 1805 }; 1806 1807 static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { 1808 .cpu_init = mtl_uncore_cpu_init, 1809 .mmio_init = adl_uncore_mmio_init, 1810 }; 1811 1812 static const struct intel_uncore_init_fun lnl_uncore_init __initconst = { 1813 .cpu_init = lnl_uncore_cpu_init, 1814 .mmio_init = lnl_uncore_mmio_init, 1815 }; 1816 1817 static const struct intel_uncore_init_fun icx_uncore_init __initconst = { 1818 .cpu_init = icx_uncore_cpu_init, 1819 .pci_init = icx_uncore_pci_init, 1820 .mmio_init = icx_uncore_mmio_init, 1821 }; 1822 1823 static const struct intel_uncore_init_fun snr_uncore_init __initconst = { 1824 .cpu_init = snr_uncore_cpu_init, 1825 .pci_init = snr_uncore_pci_init, 1826 .mmio_init = snr_uncore_mmio_init, 1827 }; 1828 1829 static const struct intel_uncore_init_fun spr_uncore_init __initconst = { 1830 .cpu_init = spr_uncore_cpu_init, 1831 .pci_init = spr_uncore_pci_init, 1832 .mmio_init = spr_uncore_mmio_init, 1833 .use_discovery = true, 1834 .uncore_units_ignore = spr_uncore_units_ignore, 1835 }; 1836 1837 static const struct intel_uncore_init_fun gnr_uncore_init __initconst = { 1838 .cpu_init = gnr_uncore_cpu_init, 1839 .pci_init = gnr_uncore_pci_init, 1840 .mmio_init = gnr_uncore_mmio_init, 1841 .use_discovery = true, 1842 .uncore_units_ignore = gnr_uncore_units_ignore, 1843 }; 1844 1845 static const struct intel_uncore_init_fun generic_uncore_init __initconst = { 1846 .cpu_init = intel_uncore_generic_uncore_cpu_init, 1847 .pci_init = intel_uncore_generic_uncore_pci_init, 1848 .mmio_init = intel_uncore_generic_uncore_mmio_init, 1849 }; 1850 1851 static const struct x86_cpu_id intel_uncore_match[] __initconst = { 1852 X86_MATCH_VFM(INTEL_NEHALEM_EP, &nhm_uncore_init), 1853 X86_MATCH_VFM(INTEL_NEHALEM, &nhm_uncore_init), 1854 X86_MATCH_VFM(INTEL_WESTMERE, &nhm_uncore_init), 1855 X86_MATCH_VFM(INTEL_WESTMERE_EP, &nhm_uncore_init), 1856 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &snb_uncore_init), 1857 X86_MATCH_VFM(INTEL_IVYBRIDGE, &ivb_uncore_init), 1858 X86_MATCH_VFM(INTEL_HASWELL, &hsw_uncore_init), 1859 X86_MATCH_VFM(INTEL_HASWELL_L, &hsw_uncore_init), 1860 X86_MATCH_VFM(INTEL_HASWELL_G, &hsw_uncore_init), 1861 X86_MATCH_VFM(INTEL_BROADWELL, &bdw_uncore_init), 1862 X86_MATCH_VFM(INTEL_BROADWELL_G, &bdw_uncore_init), 1863 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &snbep_uncore_init), 1864 X86_MATCH_VFM(INTEL_NEHALEM_EX, &nhmex_uncore_init), 1865 X86_MATCH_VFM(INTEL_WESTMERE_EX, &nhmex_uncore_init), 1866 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ivbep_uncore_init), 1867 X86_MATCH_VFM(INTEL_HASWELL_X, &hswep_uncore_init), 1868 X86_MATCH_VFM(INTEL_BROADWELL_X, &bdx_uncore_init), 1869 X86_MATCH_VFM(INTEL_BROADWELL_D, &bdx_uncore_init), 1870 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &knl_uncore_init), 1871 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &knl_uncore_init), 1872 X86_MATCH_VFM(INTEL_SKYLAKE, &skl_uncore_init), 1873 X86_MATCH_VFM(INTEL_SKYLAKE_L, &skl_uncore_init), 1874 X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_uncore_init), 1875 X86_MATCH_VFM(INTEL_KABYLAKE_L, &skl_uncore_init), 1876 X86_MATCH_VFM(INTEL_KABYLAKE, &skl_uncore_init), 1877 X86_MATCH_VFM(INTEL_COMETLAKE_L, &skl_uncore_init), 1878 X86_MATCH_VFM(INTEL_COMETLAKE, &skl_uncore_init), 1879 X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_uncore_init), 1880 X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &icl_uncore_init), 1881 X86_MATCH_VFM(INTEL_ICELAKE, &icl_uncore_init), 1882 X86_MATCH_VFM(INTEL_ICELAKE_D, &icx_uncore_init), 1883 X86_MATCH_VFM(INTEL_ICELAKE_X, &icx_uncore_init), 1884 X86_MATCH_VFM(INTEL_TIGERLAKE_L, &tgl_l_uncore_init), 1885 X86_MATCH_VFM(INTEL_TIGERLAKE, &tgl_uncore_init), 1886 X86_MATCH_VFM(INTEL_ROCKETLAKE, &rkl_uncore_init), 1887 X86_MATCH_VFM(INTEL_ALDERLAKE, &adl_uncore_init), 1888 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &adl_uncore_init), 1889 X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_uncore_init), 1890 X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &adl_uncore_init), 1891 X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init), 1892 X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init), 1893 X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init), 1894 X86_MATCH_VFM(INTEL_ARROWLAKE, &mtl_uncore_init), 1895 X86_MATCH_VFM(INTEL_ARROWLAKE_U, &mtl_uncore_init), 1896 X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), 1897 X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), 1898 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), 1899 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), 1900 X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), 1901 X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_uncore_init), 1902 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &snr_uncore_init), 1903 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), 1904 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), 1905 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), 1906 X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_uncore_init), 1907 {}, 1908 }; 1909 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); 1910 1911 static int __init intel_uncore_init(void) 1912 { 1913 const struct x86_cpu_id *id; 1914 struct intel_uncore_init_fun *uncore_init; 1915 int pret = 0, cret = 0, mret = 0, ret; 1916 1917 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1918 return -ENODEV; 1919 1920 __uncore_max_dies = 1921 topology_max_packages() * topology_max_dies_per_package(); 1922 1923 id = x86_match_cpu(intel_uncore_match); 1924 if (!id) { 1925 if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL)) 1926 uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; 1927 else 1928 return -ENODEV; 1929 } else { 1930 uncore_init = (struct intel_uncore_init_fun *)id->driver_data; 1931 if (uncore_no_discover && uncore_init->use_discovery) 1932 return -ENODEV; 1933 if (uncore_init->use_discovery && 1934 !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore)) 1935 return -ENODEV; 1936 } 1937 1938 if (uncore_init->pci_init) { 1939 pret = uncore_init->pci_init(); 1940 if (!pret) 1941 pret = uncore_pci_init(); 1942 } 1943 1944 if (uncore_init->cpu_init) { 1945 uncore_init->cpu_init(); 1946 cret = uncore_cpu_init(); 1947 } 1948 1949 if (uncore_init->mmio_init) { 1950 uncore_init->mmio_init(); 1951 mret = uncore_mmio_init(); 1952 } 1953 1954 if (cret && pret && mret) { 1955 ret = -ENODEV; 1956 goto free_discovery; 1957 } 1958 1959 /* Install hotplug callbacks to setup the targets for each package */ 1960 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, 1961 "perf/x86/intel/uncore:online", 1962 uncore_event_cpu_online, 1963 uncore_event_cpu_offline); 1964 if (ret) 1965 goto err; 1966 return 0; 1967 1968 err: 1969 uncore_types_exit(uncore_msr_uncores); 1970 uncore_types_exit(uncore_mmio_uncores); 1971 uncore_pci_exit(); 1972 free_discovery: 1973 intel_uncore_clear_discovery_tables(); 1974 return ret; 1975 } 1976 module_init(intel_uncore_init); 1977 1978 static void __exit intel_uncore_exit(void) 1979 { 1980 cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); 1981 uncore_types_exit(uncore_msr_uncores); 1982 uncore_types_exit(uncore_mmio_uncores); 1983 uncore_pci_exit(); 1984 intel_uncore_clear_discovery_tables(); 1985 } 1986 module_exit(intel_uncore_exit); 1987