1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/module.h> 3 4 #include <asm/cpu_device_id.h> 5 #include <asm/intel-family.h> 6 #include "uncore.h" 7 #include "uncore_discovery.h" 8 9 static bool uncore_no_discover; 10 module_param(uncore_no_discover, bool, 0); 11 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism " 12 "(default: enable the discovery mechanism)."); 13 struct intel_uncore_type *empty_uncore[] = { NULL, }; 14 struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 15 struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 16 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; 17 18 static bool pcidrv_registered; 19 struct pci_driver *uncore_pci_driver; 20 /* The PCI driver for the device which the uncore doesn't own. */ 21 struct pci_driver *uncore_pci_sub_driver; 22 /* pci bus to socket mapping */ 23 DEFINE_RAW_SPINLOCK(pci2phy_map_lock); 24 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); 25 struct pci_extra_dev *uncore_extra_pci_dev; 26 int __uncore_max_dies; 27 28 /* mask of cpus that collect uncore events */ 29 static cpumask_t uncore_cpu_mask; 30 31 /* constraint for the fixed counter */ 32 static struct event_constraint uncore_constraint_fixed = 33 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); 34 struct event_constraint uncore_constraint_empty = 35 EVENT_CONSTRAINT(0, 0, 0); 36 37 MODULE_DESCRIPTION("Support for Intel uncore performance events"); 38 MODULE_LICENSE("GPL"); 39 40 int uncore_pcibus_to_dieid(struct pci_bus *bus) 41 { 42 struct pci2phy_map *map; 43 int die_id = -1; 44 45 raw_spin_lock(&pci2phy_map_lock); 46 list_for_each_entry(map, &pci2phy_map_head, list) { 47 if (map->segment == pci_domain_nr(bus)) { 48 die_id = map->pbus_to_dieid[bus->number]; 49 break; 50 } 51 } 52 raw_spin_unlock(&pci2phy_map_lock); 53 54 return die_id; 55 } 56 57 int uncore_die_to_segment(int die) 58 { 59 struct pci_bus *bus = NULL; 60 61 /* Find first pci bus which attributes to specified die. */ 62 while ((bus = pci_find_next_bus(bus)) && 63 (die != uncore_pcibus_to_dieid(bus))) 64 ; 65 66 return bus ? pci_domain_nr(bus) : -EINVAL; 67 } 68 69 int uncore_device_to_die(struct pci_dev *dev) 70 { 71 int node = pcibus_to_node(dev->bus); 72 int cpu; 73 74 for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) { 75 struct cpuinfo_x86 *c = &cpu_data(cpu); 76 77 if (c->initialized && cpu_to_node(cpu) == node) 78 return c->topo.logical_die_id; 79 } 80 81 return -1; 82 } 83 84 static void uncore_free_pcibus_map(void) 85 { 86 struct pci2phy_map *map, *tmp; 87 88 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { 89 list_del(&map->list); 90 kfree(map); 91 } 92 } 93 94 struct pci2phy_map *__find_pci2phy_map(int segment) 95 { 96 struct pci2phy_map *map, *alloc = NULL; 97 int i; 98 99 lockdep_assert_held(&pci2phy_map_lock); 100 101 lookup: 102 list_for_each_entry(map, &pci2phy_map_head, list) { 103 if (map->segment == segment) 104 goto end; 105 } 106 107 if (!alloc) { 108 raw_spin_unlock(&pci2phy_map_lock); 109 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); 110 raw_spin_lock(&pci2phy_map_lock); 111 112 if (!alloc) 113 return NULL; 114 115 goto lookup; 116 } 117 118 map = alloc; 119 alloc = NULL; 120 map->segment = segment; 121 for (i = 0; i < 256; i++) 122 map->pbus_to_dieid[i] = -1; 123 list_add_tail(&map->list, &pci2phy_map_head); 124 125 end: 126 kfree(alloc); 127 return map; 128 } 129 130 ssize_t uncore_event_show(struct device *dev, 131 struct device_attribute *attr, char *buf) 132 { 133 struct uncore_event_desc *event = 134 container_of(attr, struct uncore_event_desc, attr); 135 return sprintf(buf, "%s", event->config); 136 } 137 138 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 139 { 140 unsigned int dieid = topology_logical_die_id(cpu); 141 142 /* 143 * The unsigned check also catches the '-1' return value for non 144 * existent mappings in the topology map. 145 */ 146 return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL; 147 } 148 149 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 150 { 151 u64 count; 152 153 rdmsrl(event->hw.event_base, count); 154 155 return count; 156 } 157 158 void uncore_mmio_exit_box(struct intel_uncore_box *box) 159 { 160 if (box->io_addr) 161 iounmap(box->io_addr); 162 } 163 164 u64 uncore_mmio_read_counter(struct intel_uncore_box *box, 165 struct perf_event *event) 166 { 167 if (!box->io_addr) 168 return 0; 169 170 if (!uncore_mmio_is_valid_offset(box, event->hw.event_base)) 171 return 0; 172 173 return readq(box->io_addr + event->hw.event_base); 174 } 175 176 /* 177 * generic get constraint function for shared match/mask registers. 178 */ 179 struct event_constraint * 180 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) 181 { 182 struct intel_uncore_extra_reg *er; 183 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 184 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; 185 unsigned long flags; 186 bool ok = false; 187 188 /* 189 * reg->alloc can be set due to existing state, so for fake box we 190 * need to ignore this, otherwise we might fail to allocate proper 191 * fake state for this extra reg constraint. 192 */ 193 if (reg1->idx == EXTRA_REG_NONE || 194 (!uncore_box_is_fake(box) && reg1->alloc)) 195 return NULL; 196 197 er = &box->shared_regs[reg1->idx]; 198 raw_spin_lock_irqsave(&er->lock, flags); 199 if (!atomic_read(&er->ref) || 200 (er->config1 == reg1->config && er->config2 == reg2->config)) { 201 atomic_inc(&er->ref); 202 er->config1 = reg1->config; 203 er->config2 = reg2->config; 204 ok = true; 205 } 206 raw_spin_unlock_irqrestore(&er->lock, flags); 207 208 if (ok) { 209 if (!uncore_box_is_fake(box)) 210 reg1->alloc = 1; 211 return NULL; 212 } 213 214 return &uncore_constraint_empty; 215 } 216 217 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) 218 { 219 struct intel_uncore_extra_reg *er; 220 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 221 222 /* 223 * Only put constraint if extra reg was actually allocated. Also 224 * takes care of event which do not use an extra shared reg. 225 * 226 * Also, if this is a fake box we shouldn't touch any event state 227 * (reg->alloc) and we don't care about leaving inconsistent box 228 * state either since it will be thrown out. 229 */ 230 if (uncore_box_is_fake(box) || !reg1->alloc) 231 return; 232 233 er = &box->shared_regs[reg1->idx]; 234 atomic_dec(&er->ref); 235 reg1->alloc = 0; 236 } 237 238 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) 239 { 240 struct intel_uncore_extra_reg *er; 241 unsigned long flags; 242 u64 config; 243 244 er = &box->shared_regs[idx]; 245 246 raw_spin_lock_irqsave(&er->lock, flags); 247 config = er->config; 248 raw_spin_unlock_irqrestore(&er->lock, flags); 249 250 return config; 251 } 252 253 static void uncore_assign_hw_event(struct intel_uncore_box *box, 254 struct perf_event *event, int idx) 255 { 256 struct hw_perf_event *hwc = &event->hw; 257 258 hwc->idx = idx; 259 hwc->last_tag = ++box->tags[idx]; 260 261 if (uncore_pmc_fixed(hwc->idx)) { 262 hwc->event_base = uncore_fixed_ctr(box); 263 hwc->config_base = uncore_fixed_ctl(box); 264 return; 265 } 266 267 hwc->config_base = uncore_event_ctl(box, hwc->idx); 268 hwc->event_base = uncore_perf_ctr(box, hwc->idx); 269 } 270 271 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) 272 { 273 u64 prev_count, new_count, delta; 274 int shift; 275 276 if (uncore_pmc_freerunning(event->hw.idx)) 277 shift = 64 - uncore_freerunning_bits(box, event); 278 else if (uncore_pmc_fixed(event->hw.idx)) 279 shift = 64 - uncore_fixed_ctr_bits(box); 280 else 281 shift = 64 - uncore_perf_ctr_bits(box); 282 283 /* the hrtimer might modify the previous event value */ 284 again: 285 prev_count = local64_read(&event->hw.prev_count); 286 new_count = uncore_read_counter(box, event); 287 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) 288 goto again; 289 290 delta = (new_count << shift) - (prev_count << shift); 291 delta >>= shift; 292 293 local64_add(delta, &event->count); 294 } 295 296 /* 297 * The overflow interrupt is unavailable for SandyBridge-EP, is broken 298 * for SandyBridge. So we use hrtimer to periodically poll the counter 299 * to avoid overflow. 300 */ 301 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 302 { 303 struct intel_uncore_box *box; 304 struct perf_event *event; 305 unsigned long flags; 306 int bit; 307 308 box = container_of(hrtimer, struct intel_uncore_box, hrtimer); 309 if (!box->n_active || box->cpu != smp_processor_id()) 310 return HRTIMER_NORESTART; 311 /* 312 * disable local interrupt to prevent uncore_pmu_event_start/stop 313 * to interrupt the update process 314 */ 315 local_irq_save(flags); 316 317 /* 318 * handle boxes with an active event list as opposed to active 319 * counters 320 */ 321 list_for_each_entry(event, &box->active_list, active_entry) { 322 uncore_perf_event_update(box, event); 323 } 324 325 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 326 uncore_perf_event_update(box, box->events[bit]); 327 328 local_irq_restore(flags); 329 330 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); 331 return HRTIMER_RESTART; 332 } 333 334 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 335 { 336 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 337 HRTIMER_MODE_REL_PINNED); 338 } 339 340 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 341 { 342 hrtimer_cancel(&box->hrtimer); 343 } 344 345 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) 346 { 347 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 348 box->hrtimer.function = uncore_pmu_hrtimer; 349 } 350 351 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, 352 int node) 353 { 354 int i, size, numshared = type->num_shared_regs ; 355 struct intel_uncore_box *box; 356 357 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); 358 359 box = kzalloc_node(size, GFP_KERNEL, node); 360 if (!box) 361 return NULL; 362 363 for (i = 0; i < numshared; i++) 364 raw_spin_lock_init(&box->shared_regs[i].lock); 365 366 uncore_pmu_init_hrtimer(box); 367 box->cpu = -1; 368 box->dieid = -1; 369 370 /* set default hrtimer timeout */ 371 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 372 373 INIT_LIST_HEAD(&box->active_list); 374 375 return box; 376 } 377 378 /* 379 * Using uncore_pmu_event_init pmu event_init callback 380 * as a detection point for uncore events. 381 */ 382 static int uncore_pmu_event_init(struct perf_event *event); 383 384 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) 385 { 386 return &box->pmu->pmu == event->pmu; 387 } 388 389 static int 390 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, 391 bool dogrp) 392 { 393 struct perf_event *event; 394 int n, max_count; 395 396 max_count = box->pmu->type->num_counters; 397 if (box->pmu->type->fixed_ctl) 398 max_count++; 399 400 if (box->n_events >= max_count) 401 return -EINVAL; 402 403 n = box->n_events; 404 405 if (is_box_event(box, leader)) { 406 box->event_list[n] = leader; 407 n++; 408 } 409 410 if (!dogrp) 411 return n; 412 413 for_each_sibling_event(event, leader) { 414 if (!is_box_event(box, event) || 415 event->state <= PERF_EVENT_STATE_OFF) 416 continue; 417 418 if (n >= max_count) 419 return -EINVAL; 420 421 box->event_list[n] = event; 422 n++; 423 } 424 return n; 425 } 426 427 static struct event_constraint * 428 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 429 { 430 struct intel_uncore_type *type = box->pmu->type; 431 struct event_constraint *c; 432 433 if (type->ops->get_constraint) { 434 c = type->ops->get_constraint(box, event); 435 if (c) 436 return c; 437 } 438 439 if (event->attr.config == UNCORE_FIXED_EVENT) 440 return &uncore_constraint_fixed; 441 442 if (type->constraints) { 443 for_each_event_constraint(c, type->constraints) { 444 if ((event->hw.config & c->cmask) == c->code) 445 return c; 446 } 447 } 448 449 return &type->unconstrainted; 450 } 451 452 static void uncore_put_event_constraint(struct intel_uncore_box *box, 453 struct perf_event *event) 454 { 455 if (box->pmu->type->ops->put_constraint) 456 box->pmu->type->ops->put_constraint(box, event); 457 } 458 459 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 460 { 461 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 462 struct event_constraint *c; 463 int i, wmin, wmax, ret = 0; 464 struct hw_perf_event *hwc; 465 466 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 467 468 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 469 c = uncore_get_event_constraint(box, box->event_list[i]); 470 box->event_constraint[i] = c; 471 wmin = min(wmin, c->weight); 472 wmax = max(wmax, c->weight); 473 } 474 475 /* fastpath, try to reuse previous register */ 476 for (i = 0; i < n; i++) { 477 hwc = &box->event_list[i]->hw; 478 c = box->event_constraint[i]; 479 480 /* never assigned */ 481 if (hwc->idx == -1) 482 break; 483 484 /* constraint still honored */ 485 if (!test_bit(hwc->idx, c->idxmsk)) 486 break; 487 488 /* not already used */ 489 if (test_bit(hwc->idx, used_mask)) 490 break; 491 492 __set_bit(hwc->idx, used_mask); 493 if (assign) 494 assign[i] = hwc->idx; 495 } 496 /* slow path */ 497 if (i != n) 498 ret = perf_assign_events(box->event_constraint, n, 499 wmin, wmax, n, assign); 500 501 if (!assign || ret) { 502 for (i = 0; i < n; i++) 503 uncore_put_event_constraint(box, box->event_list[i]); 504 } 505 return ret ? -EINVAL : 0; 506 } 507 508 void uncore_pmu_event_start(struct perf_event *event, int flags) 509 { 510 struct intel_uncore_box *box = uncore_event_to_box(event); 511 int idx = event->hw.idx; 512 513 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) 514 return; 515 516 /* 517 * Free running counter is read-only and always active. 518 * Use the current counter value as start point. 519 * There is no overflow interrupt for free running counter. 520 * Use hrtimer to periodically poll the counter to avoid overflow. 521 */ 522 if (uncore_pmc_freerunning(event->hw.idx)) { 523 list_add_tail(&event->active_entry, &box->active_list); 524 local64_set(&event->hw.prev_count, 525 uncore_read_counter(box, event)); 526 if (box->n_active++ == 0) 527 uncore_pmu_start_hrtimer(box); 528 return; 529 } 530 531 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 532 return; 533 534 event->hw.state = 0; 535 box->events[idx] = event; 536 box->n_active++; 537 __set_bit(idx, box->active_mask); 538 539 local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); 540 uncore_enable_event(box, event); 541 542 if (box->n_active == 1) 543 uncore_pmu_start_hrtimer(box); 544 } 545 546 void uncore_pmu_event_stop(struct perf_event *event, int flags) 547 { 548 struct intel_uncore_box *box = uncore_event_to_box(event); 549 struct hw_perf_event *hwc = &event->hw; 550 551 /* Cannot disable free running counter which is read-only */ 552 if (uncore_pmc_freerunning(hwc->idx)) { 553 list_del(&event->active_entry); 554 if (--box->n_active == 0) 555 uncore_pmu_cancel_hrtimer(box); 556 uncore_perf_event_update(box, event); 557 return; 558 } 559 560 if (__test_and_clear_bit(hwc->idx, box->active_mask)) { 561 uncore_disable_event(box, event); 562 box->n_active--; 563 box->events[hwc->idx] = NULL; 564 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 565 hwc->state |= PERF_HES_STOPPED; 566 567 if (box->n_active == 0) 568 uncore_pmu_cancel_hrtimer(box); 569 } 570 571 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 572 /* 573 * Drain the remaining delta count out of a event 574 * that we are disabling: 575 */ 576 uncore_perf_event_update(box, event); 577 hwc->state |= PERF_HES_UPTODATE; 578 } 579 } 580 581 int uncore_pmu_event_add(struct perf_event *event, int flags) 582 { 583 struct intel_uncore_box *box = uncore_event_to_box(event); 584 struct hw_perf_event *hwc = &event->hw; 585 int assign[UNCORE_PMC_IDX_MAX]; 586 int i, n, ret; 587 588 if (!box) 589 return -ENODEV; 590 591 /* 592 * The free funning counter is assigned in event_init(). 593 * The free running counter event and free running counter 594 * are 1:1 mapped. It doesn't need to be tracked in event_list. 595 */ 596 if (uncore_pmc_freerunning(hwc->idx)) { 597 if (flags & PERF_EF_START) 598 uncore_pmu_event_start(event, 0); 599 return 0; 600 } 601 602 ret = n = uncore_collect_events(box, event, false); 603 if (ret < 0) 604 return ret; 605 606 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 607 if (!(flags & PERF_EF_START)) 608 hwc->state |= PERF_HES_ARCH; 609 610 ret = uncore_assign_events(box, assign, n); 611 if (ret) 612 return ret; 613 614 /* save events moving to new counters */ 615 for (i = 0; i < box->n_events; i++) { 616 event = box->event_list[i]; 617 hwc = &event->hw; 618 619 if (hwc->idx == assign[i] && 620 hwc->last_tag == box->tags[assign[i]]) 621 continue; 622 /* 623 * Ensure we don't accidentally enable a stopped 624 * counter simply because we rescheduled. 625 */ 626 if (hwc->state & PERF_HES_STOPPED) 627 hwc->state |= PERF_HES_ARCH; 628 629 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 630 } 631 632 /* reprogram moved events into new counters */ 633 for (i = 0; i < n; i++) { 634 event = box->event_list[i]; 635 hwc = &event->hw; 636 637 if (hwc->idx != assign[i] || 638 hwc->last_tag != box->tags[assign[i]]) 639 uncore_assign_hw_event(box, event, assign[i]); 640 else if (i < box->n_events) 641 continue; 642 643 if (hwc->state & PERF_HES_ARCH) 644 continue; 645 646 uncore_pmu_event_start(event, 0); 647 } 648 box->n_events = n; 649 650 return 0; 651 } 652 653 void uncore_pmu_event_del(struct perf_event *event, int flags) 654 { 655 struct intel_uncore_box *box = uncore_event_to_box(event); 656 int i; 657 658 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 659 660 /* 661 * The event for free running counter is not tracked by event_list. 662 * It doesn't need to force event->hw.idx = -1 to reassign the counter. 663 * Because the event and the free running counter are 1:1 mapped. 664 */ 665 if (uncore_pmc_freerunning(event->hw.idx)) 666 return; 667 668 for (i = 0; i < box->n_events; i++) { 669 if (event == box->event_list[i]) { 670 uncore_put_event_constraint(box, event); 671 672 for (++i; i < box->n_events; i++) 673 box->event_list[i - 1] = box->event_list[i]; 674 675 --box->n_events; 676 break; 677 } 678 } 679 680 event->hw.idx = -1; 681 event->hw.last_tag = ~0ULL; 682 } 683 684 void uncore_pmu_event_read(struct perf_event *event) 685 { 686 struct intel_uncore_box *box = uncore_event_to_box(event); 687 uncore_perf_event_update(box, event); 688 } 689 690 /* 691 * validation ensures the group can be loaded onto the 692 * PMU if it was the only group available. 693 */ 694 static int uncore_validate_group(struct intel_uncore_pmu *pmu, 695 struct perf_event *event) 696 { 697 struct perf_event *leader = event->group_leader; 698 struct intel_uncore_box *fake_box; 699 int ret = -EINVAL, n; 700 701 /* The free running counter is always active. */ 702 if (uncore_pmc_freerunning(event->hw.idx)) 703 return 0; 704 705 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); 706 if (!fake_box) 707 return -ENOMEM; 708 709 fake_box->pmu = pmu; 710 /* 711 * the event is not yet connected with its 712 * siblings therefore we must first collect 713 * existing siblings, then add the new event 714 * before we can simulate the scheduling 715 */ 716 n = uncore_collect_events(fake_box, leader, true); 717 if (n < 0) 718 goto out; 719 720 fake_box->n_events = n; 721 n = uncore_collect_events(fake_box, event, false); 722 if (n < 0) 723 goto out; 724 725 fake_box->n_events = n; 726 727 ret = uncore_assign_events(fake_box, NULL, n); 728 out: 729 kfree(fake_box); 730 return ret; 731 } 732 733 static int uncore_pmu_event_init(struct perf_event *event) 734 { 735 struct intel_uncore_pmu *pmu; 736 struct intel_uncore_box *box; 737 struct hw_perf_event *hwc = &event->hw; 738 int ret; 739 740 if (event->attr.type != event->pmu->type) 741 return -ENOENT; 742 743 pmu = uncore_event_to_pmu(event); 744 /* no device found for this pmu */ 745 if (pmu->func_id < 0) 746 return -ENOENT; 747 748 /* Sampling not supported yet */ 749 if (hwc->sample_period) 750 return -EINVAL; 751 752 /* 753 * Place all uncore events for a particular physical package 754 * onto a single cpu 755 */ 756 if (event->cpu < 0) 757 return -EINVAL; 758 box = uncore_pmu_to_box(pmu, event->cpu); 759 if (!box || box->cpu < 0) 760 return -EINVAL; 761 event->cpu = box->cpu; 762 event->pmu_private = box; 763 764 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; 765 766 event->hw.idx = -1; 767 event->hw.last_tag = ~0ULL; 768 event->hw.extra_reg.idx = EXTRA_REG_NONE; 769 event->hw.branch_reg.idx = EXTRA_REG_NONE; 770 771 if (event->attr.config == UNCORE_FIXED_EVENT) { 772 /* no fixed counter */ 773 if (!pmu->type->fixed_ctl) 774 return -EINVAL; 775 /* 776 * if there is only one fixed counter, only the first pmu 777 * can access the fixed counter 778 */ 779 if (pmu->type->single_fixed && pmu->pmu_idx > 0) 780 return -EINVAL; 781 782 /* fixed counters have event field hardcoded to zero */ 783 hwc->config = 0ULL; 784 } else if (is_freerunning_event(event)) { 785 hwc->config = event->attr.config; 786 if (!check_valid_freerunning_event(box, event)) 787 return -EINVAL; 788 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING; 789 /* 790 * The free running counter event and free running counter 791 * are always 1:1 mapped. 792 * The free running counter is always active. 793 * Assign the free running counter here. 794 */ 795 event->hw.event_base = uncore_freerunning_counter(box, event); 796 } else { 797 hwc->config = event->attr.config & 798 (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); 799 if (pmu->type->ops->hw_config) { 800 ret = pmu->type->ops->hw_config(box, event); 801 if (ret) 802 return ret; 803 } 804 } 805 806 if (event->group_leader != event) 807 ret = uncore_validate_group(pmu, event); 808 else 809 ret = 0; 810 811 return ret; 812 } 813 814 static void uncore_pmu_enable(struct pmu *pmu) 815 { 816 struct intel_uncore_pmu *uncore_pmu; 817 struct intel_uncore_box *box; 818 819 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 820 821 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 822 if (!box) 823 return; 824 825 if (uncore_pmu->type->ops->enable_box) 826 uncore_pmu->type->ops->enable_box(box); 827 } 828 829 static void uncore_pmu_disable(struct pmu *pmu) 830 { 831 struct intel_uncore_pmu *uncore_pmu; 832 struct intel_uncore_box *box; 833 834 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 835 836 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 837 if (!box) 838 return; 839 840 if (uncore_pmu->type->ops->disable_box) 841 uncore_pmu->type->ops->disable_box(box); 842 } 843 844 static ssize_t uncore_get_attr_cpumask(struct device *dev, 845 struct device_attribute *attr, char *buf) 846 { 847 return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask); 848 } 849 850 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); 851 852 static struct attribute *uncore_pmu_attrs[] = { 853 &dev_attr_cpumask.attr, 854 NULL, 855 }; 856 857 static const struct attribute_group uncore_pmu_attr_group = { 858 .attrs = uncore_pmu_attrs, 859 }; 860 861 static inline int uncore_get_box_id(struct intel_uncore_type *type, 862 struct intel_uncore_pmu *pmu) 863 { 864 return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx; 865 } 866 867 void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) 868 { 869 struct intel_uncore_type *type = pmu->type; 870 871 if (type->num_boxes == 1) 872 sprintf(pmu_name, "uncore_type_%u", type->type_id); 873 else { 874 sprintf(pmu_name, "uncore_type_%u_%d", 875 type->type_id, uncore_get_box_id(type, pmu)); 876 } 877 } 878 879 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) 880 { 881 struct intel_uncore_type *type = pmu->type; 882 883 /* 884 * No uncore block name in discovery table. 885 * Use uncore_type_&typeid_&boxid as name. 886 */ 887 if (!type->name) { 888 uncore_get_alias_name(pmu->name, pmu); 889 return; 890 } 891 892 if (type->num_boxes == 1) { 893 if (strlen(type->name) > 0) 894 sprintf(pmu->name, "uncore_%s", type->name); 895 else 896 sprintf(pmu->name, "uncore"); 897 } else { 898 /* 899 * Use the box ID from the discovery table if applicable. 900 */ 901 sprintf(pmu->name, "uncore_%s_%d", type->name, 902 uncore_get_box_id(type, pmu)); 903 } 904 } 905 906 static int uncore_pmu_register(struct intel_uncore_pmu *pmu) 907 { 908 int ret; 909 910 if (!pmu->type->pmu) { 911 pmu->pmu = (struct pmu) { 912 .attr_groups = pmu->type->attr_groups, 913 .task_ctx_nr = perf_invalid_context, 914 .pmu_enable = uncore_pmu_enable, 915 .pmu_disable = uncore_pmu_disable, 916 .event_init = uncore_pmu_event_init, 917 .add = uncore_pmu_event_add, 918 .del = uncore_pmu_event_del, 919 .start = uncore_pmu_event_start, 920 .stop = uncore_pmu_event_stop, 921 .read = uncore_pmu_event_read, 922 .module = THIS_MODULE, 923 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 924 .attr_update = pmu->type->attr_update, 925 }; 926 } else { 927 pmu->pmu = *pmu->type->pmu; 928 pmu->pmu.attr_groups = pmu->type->attr_groups; 929 pmu->pmu.attr_update = pmu->type->attr_update; 930 } 931 932 uncore_get_pmu_name(pmu); 933 934 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 935 if (!ret) 936 pmu->registered = true; 937 return ret; 938 } 939 940 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) 941 { 942 if (!pmu->registered) 943 return; 944 perf_pmu_unregister(&pmu->pmu); 945 pmu->registered = false; 946 } 947 948 static void uncore_free_boxes(struct intel_uncore_pmu *pmu) 949 { 950 int die; 951 952 for (die = 0; die < uncore_max_dies(); die++) 953 kfree(pmu->boxes[die]); 954 kfree(pmu->boxes); 955 } 956 957 static void uncore_type_exit(struct intel_uncore_type *type) 958 { 959 struct intel_uncore_pmu *pmu = type->pmus; 960 int i; 961 962 if (type->cleanup_mapping) 963 type->cleanup_mapping(type); 964 965 if (pmu) { 966 for (i = 0; i < type->num_boxes; i++, pmu++) { 967 uncore_pmu_unregister(pmu); 968 uncore_free_boxes(pmu); 969 } 970 kfree(type->pmus); 971 type->pmus = NULL; 972 } 973 if (type->box_ids) { 974 kfree(type->box_ids); 975 type->box_ids = NULL; 976 } 977 kfree(type->events_group); 978 type->events_group = NULL; 979 } 980 981 static void uncore_types_exit(struct intel_uncore_type **types) 982 { 983 for (; *types; types++) 984 uncore_type_exit(*types); 985 } 986 987 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) 988 { 989 struct intel_uncore_pmu *pmus; 990 size_t size; 991 int i, j; 992 993 pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL); 994 if (!pmus) 995 return -ENOMEM; 996 997 size = uncore_max_dies() * sizeof(struct intel_uncore_box *); 998 999 for (i = 0; i < type->num_boxes; i++) { 1000 pmus[i].func_id = setid ? i : -1; 1001 pmus[i].pmu_idx = i; 1002 pmus[i].type = type; 1003 pmus[i].boxes = kzalloc(size, GFP_KERNEL); 1004 if (!pmus[i].boxes) 1005 goto err; 1006 } 1007 1008 type->pmus = pmus; 1009 type->unconstrainted = (struct event_constraint) 1010 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 1011 0, type->num_counters, 0, 0); 1012 1013 if (type->event_descs) { 1014 struct { 1015 struct attribute_group group; 1016 struct attribute *attrs[]; 1017 } *attr_group; 1018 for (i = 0; type->event_descs[i].attr.attr.name; i++); 1019 1020 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1), 1021 GFP_KERNEL); 1022 if (!attr_group) 1023 goto err; 1024 1025 attr_group->group.name = "events"; 1026 attr_group->group.attrs = attr_group->attrs; 1027 1028 for (j = 0; j < i; j++) 1029 attr_group->attrs[j] = &type->event_descs[j].attr.attr; 1030 1031 type->events_group = &attr_group->group; 1032 } 1033 1034 type->pmu_group = &uncore_pmu_attr_group; 1035 1036 if (type->set_mapping) 1037 type->set_mapping(type); 1038 1039 return 0; 1040 1041 err: 1042 for (i = 0; i < type->num_boxes; i++) 1043 kfree(pmus[i].boxes); 1044 kfree(pmus); 1045 1046 return -ENOMEM; 1047 } 1048 1049 static int __init 1050 uncore_types_init(struct intel_uncore_type **types, bool setid) 1051 { 1052 int ret; 1053 1054 for (; *types; types++) { 1055 ret = uncore_type_init(*types, setid); 1056 if (ret) 1057 return ret; 1058 } 1059 return 0; 1060 } 1061 1062 /* 1063 * Get the die information of a PCI device. 1064 * @pdev: The PCI device. 1065 * @die: The die id which the device maps to. 1066 */ 1067 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die) 1068 { 1069 *die = uncore_pcibus_to_dieid(pdev->bus); 1070 if (*die < 0) 1071 return -EINVAL; 1072 1073 return 0; 1074 } 1075 1076 static struct intel_uncore_pmu * 1077 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) 1078 { 1079 struct intel_uncore_type **types = uncore_pci_uncores; 1080 struct intel_uncore_type *type; 1081 u64 box_ctl; 1082 int i, die; 1083 1084 for (; *types; types++) { 1085 type = *types; 1086 for (die = 0; die < __uncore_max_dies; die++) { 1087 for (i = 0; i < type->num_boxes; i++) { 1088 if (!type->box_ctls[die]) 1089 continue; 1090 box_ctl = type->box_ctls[die] + type->pci_offsets[i]; 1091 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) && 1092 pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) && 1093 pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl)) 1094 return &type->pmus[i]; 1095 } 1096 } 1097 } 1098 1099 return NULL; 1100 } 1101 1102 /* 1103 * Find the PMU of a PCI device. 1104 * @pdev: The PCI device. 1105 * @ids: The ID table of the available PCI devices with a PMU. 1106 * If NULL, search the whole uncore_pci_uncores. 1107 */ 1108 static struct intel_uncore_pmu * 1109 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) 1110 { 1111 struct intel_uncore_pmu *pmu = NULL; 1112 struct intel_uncore_type *type; 1113 kernel_ulong_t data; 1114 unsigned int devfn; 1115 1116 if (!ids) 1117 return uncore_pci_find_dev_pmu_from_types(pdev); 1118 1119 while (ids && ids->vendor) { 1120 if ((ids->vendor == pdev->vendor) && 1121 (ids->device == pdev->device)) { 1122 data = ids->driver_data; 1123 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data), 1124 UNCORE_PCI_DEV_FUNC(data)); 1125 if (devfn == pdev->devfn) { 1126 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)]; 1127 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)]; 1128 break; 1129 } 1130 } 1131 ids++; 1132 } 1133 return pmu; 1134 } 1135 1136 /* 1137 * Register the PMU for a PCI device 1138 * @pdev: The PCI device. 1139 * @type: The corresponding PMU type of the device. 1140 * @pmu: The corresponding PMU of the device. 1141 * @die: The die id which the device maps to. 1142 */ 1143 static int uncore_pci_pmu_register(struct pci_dev *pdev, 1144 struct intel_uncore_type *type, 1145 struct intel_uncore_pmu *pmu, 1146 int die) 1147 { 1148 struct intel_uncore_box *box; 1149 int ret; 1150 1151 if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) 1152 return -EINVAL; 1153 1154 box = uncore_alloc_box(type, NUMA_NO_NODE); 1155 if (!box) 1156 return -ENOMEM; 1157 1158 if (pmu->func_id < 0) 1159 pmu->func_id = pdev->devfn; 1160 else 1161 WARN_ON_ONCE(pmu->func_id != pdev->devfn); 1162 1163 atomic_inc(&box->refcnt); 1164 box->dieid = die; 1165 box->pci_dev = pdev; 1166 box->pmu = pmu; 1167 uncore_box_init(box); 1168 1169 pmu->boxes[die] = box; 1170 if (atomic_inc_return(&pmu->activeboxes) > 1) 1171 return 0; 1172 1173 /* First active box registers the pmu */ 1174 ret = uncore_pmu_register(pmu); 1175 if (ret) { 1176 pmu->boxes[die] = NULL; 1177 uncore_box_exit(box); 1178 kfree(box); 1179 } 1180 return ret; 1181 } 1182 1183 /* 1184 * add a pci uncore device 1185 */ 1186 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 1187 { 1188 struct intel_uncore_type *type; 1189 struct intel_uncore_pmu *pmu = NULL; 1190 int die, ret; 1191 1192 ret = uncore_pci_get_dev_die_info(pdev, &die); 1193 if (ret) 1194 return ret; 1195 1196 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 1197 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 1198 1199 uncore_extra_pci_dev[die].dev[idx] = pdev; 1200 pci_set_drvdata(pdev, NULL); 1201 return 0; 1202 } 1203 1204 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 1205 1206 /* 1207 * Some platforms, e.g. Knights Landing, use a common PCI device ID 1208 * for multiple instances of an uncore PMU device type. We should check 1209 * PCI slot and func to indicate the uncore box. 1210 */ 1211 if (id->driver_data & ~0xffff) { 1212 struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver); 1213 1214 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); 1215 if (pmu == NULL) 1216 return -ENODEV; 1217 } else { 1218 /* 1219 * for performance monitoring unit with multiple boxes, 1220 * each box has a different function id. 1221 */ 1222 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 1223 } 1224 1225 ret = uncore_pci_pmu_register(pdev, type, pmu, die); 1226 1227 pci_set_drvdata(pdev, pmu->boxes[die]); 1228 1229 return ret; 1230 } 1231 1232 /* 1233 * Unregister the PMU of a PCI device 1234 * @pmu: The corresponding PMU is unregistered. 1235 * @die: The die id which the device maps to. 1236 */ 1237 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die) 1238 { 1239 struct intel_uncore_box *box = pmu->boxes[die]; 1240 1241 pmu->boxes[die] = NULL; 1242 if (atomic_dec_return(&pmu->activeboxes) == 0) 1243 uncore_pmu_unregister(pmu); 1244 uncore_box_exit(box); 1245 kfree(box); 1246 } 1247 1248 static void uncore_pci_remove(struct pci_dev *pdev) 1249 { 1250 struct intel_uncore_box *box; 1251 struct intel_uncore_pmu *pmu; 1252 int i, die; 1253 1254 if (uncore_pci_get_dev_die_info(pdev, &die)) 1255 return; 1256 1257 box = pci_get_drvdata(pdev); 1258 if (!box) { 1259 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 1260 if (uncore_extra_pci_dev[die].dev[i] == pdev) { 1261 uncore_extra_pci_dev[die].dev[i] = NULL; 1262 break; 1263 } 1264 } 1265 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); 1266 return; 1267 } 1268 1269 pmu = box->pmu; 1270 1271 pci_set_drvdata(pdev, NULL); 1272 1273 uncore_pci_pmu_unregister(pmu, die); 1274 } 1275 1276 static int uncore_bus_notify(struct notifier_block *nb, 1277 unsigned long action, void *data, 1278 const struct pci_device_id *ids) 1279 { 1280 struct device *dev = data; 1281 struct pci_dev *pdev = to_pci_dev(dev); 1282 struct intel_uncore_pmu *pmu; 1283 int die; 1284 1285 /* Unregister the PMU when the device is going to be deleted. */ 1286 if (action != BUS_NOTIFY_DEL_DEVICE) 1287 return NOTIFY_DONE; 1288 1289 pmu = uncore_pci_find_dev_pmu(pdev, ids); 1290 if (!pmu) 1291 return NOTIFY_DONE; 1292 1293 if (uncore_pci_get_dev_die_info(pdev, &die)) 1294 return NOTIFY_DONE; 1295 1296 uncore_pci_pmu_unregister(pmu, die); 1297 1298 return NOTIFY_OK; 1299 } 1300 1301 static int uncore_pci_sub_bus_notify(struct notifier_block *nb, 1302 unsigned long action, void *data) 1303 { 1304 return uncore_bus_notify(nb, action, data, 1305 uncore_pci_sub_driver->id_table); 1306 } 1307 1308 static struct notifier_block uncore_pci_sub_notifier = { 1309 .notifier_call = uncore_pci_sub_bus_notify, 1310 }; 1311 1312 static void uncore_pci_sub_driver_init(void) 1313 { 1314 const struct pci_device_id *ids = uncore_pci_sub_driver->id_table; 1315 struct intel_uncore_type *type; 1316 struct intel_uncore_pmu *pmu; 1317 struct pci_dev *pci_sub_dev; 1318 bool notify = false; 1319 unsigned int devfn; 1320 int die; 1321 1322 while (ids && ids->vendor) { 1323 pci_sub_dev = NULL; 1324 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)]; 1325 /* 1326 * Search the available device, and register the 1327 * corresponding PMU. 1328 */ 1329 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 1330 ids->device, pci_sub_dev))) { 1331 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), 1332 UNCORE_PCI_DEV_FUNC(ids->driver_data)); 1333 if (devfn != pci_sub_dev->devfn) 1334 continue; 1335 1336 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; 1337 if (!pmu) 1338 continue; 1339 1340 if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) 1341 continue; 1342 1343 if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, 1344 die)) 1345 notify = true; 1346 } 1347 ids++; 1348 } 1349 1350 if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier)) 1351 notify = false; 1352 1353 if (!notify) 1354 uncore_pci_sub_driver = NULL; 1355 } 1356 1357 static int uncore_pci_bus_notify(struct notifier_block *nb, 1358 unsigned long action, void *data) 1359 { 1360 return uncore_bus_notify(nb, action, data, NULL); 1361 } 1362 1363 static struct notifier_block uncore_pci_notifier = { 1364 .notifier_call = uncore_pci_bus_notify, 1365 }; 1366 1367 1368 static void uncore_pci_pmus_register(void) 1369 { 1370 struct intel_uncore_type **types = uncore_pci_uncores; 1371 struct intel_uncore_type *type; 1372 struct intel_uncore_pmu *pmu; 1373 struct pci_dev *pdev; 1374 u64 box_ctl; 1375 int i, die; 1376 1377 for (; *types; types++) { 1378 type = *types; 1379 for (die = 0; die < __uncore_max_dies; die++) { 1380 for (i = 0; i < type->num_boxes; i++) { 1381 if (!type->box_ctls[die]) 1382 continue; 1383 box_ctl = type->box_ctls[die] + type->pci_offsets[i]; 1384 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl), 1385 UNCORE_DISCOVERY_PCI_BUS(box_ctl), 1386 UNCORE_DISCOVERY_PCI_DEVFN(box_ctl)); 1387 if (!pdev) 1388 continue; 1389 pmu = &type->pmus[i]; 1390 1391 uncore_pci_pmu_register(pdev, type, pmu, die); 1392 } 1393 } 1394 } 1395 1396 bus_register_notifier(&pci_bus_type, &uncore_pci_notifier); 1397 } 1398 1399 static int __init uncore_pci_init(void) 1400 { 1401 size_t size; 1402 int ret; 1403 1404 size = uncore_max_dies() * sizeof(struct pci_extra_dev); 1405 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); 1406 if (!uncore_extra_pci_dev) { 1407 ret = -ENOMEM; 1408 goto err; 1409 } 1410 1411 ret = uncore_types_init(uncore_pci_uncores, false); 1412 if (ret) 1413 goto errtype; 1414 1415 if (uncore_pci_driver) { 1416 uncore_pci_driver->probe = uncore_pci_probe; 1417 uncore_pci_driver->remove = uncore_pci_remove; 1418 1419 ret = pci_register_driver(uncore_pci_driver); 1420 if (ret) 1421 goto errtype; 1422 } else 1423 uncore_pci_pmus_register(); 1424 1425 if (uncore_pci_sub_driver) 1426 uncore_pci_sub_driver_init(); 1427 1428 pcidrv_registered = true; 1429 return 0; 1430 1431 errtype: 1432 uncore_types_exit(uncore_pci_uncores); 1433 kfree(uncore_extra_pci_dev); 1434 uncore_extra_pci_dev = NULL; 1435 uncore_free_pcibus_map(); 1436 err: 1437 uncore_pci_uncores = empty_uncore; 1438 return ret; 1439 } 1440 1441 static void uncore_pci_exit(void) 1442 { 1443 if (pcidrv_registered) { 1444 pcidrv_registered = false; 1445 if (uncore_pci_sub_driver) 1446 bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); 1447 if (uncore_pci_driver) 1448 pci_unregister_driver(uncore_pci_driver); 1449 else 1450 bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier); 1451 uncore_types_exit(uncore_pci_uncores); 1452 kfree(uncore_extra_pci_dev); 1453 uncore_free_pcibus_map(); 1454 } 1455 } 1456 1457 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, 1458 int new_cpu) 1459 { 1460 struct intel_uncore_pmu *pmu = type->pmus; 1461 struct intel_uncore_box *box; 1462 int i, die; 1463 1464 die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); 1465 for (i = 0; i < type->num_boxes; i++, pmu++) { 1466 box = pmu->boxes[die]; 1467 if (!box) 1468 continue; 1469 1470 if (old_cpu < 0) { 1471 WARN_ON_ONCE(box->cpu != -1); 1472 box->cpu = new_cpu; 1473 continue; 1474 } 1475 1476 WARN_ON_ONCE(box->cpu != old_cpu); 1477 box->cpu = -1; 1478 if (new_cpu < 0) 1479 continue; 1480 1481 uncore_pmu_cancel_hrtimer(box); 1482 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); 1483 box->cpu = new_cpu; 1484 } 1485 } 1486 1487 static void uncore_change_context(struct intel_uncore_type **uncores, 1488 int old_cpu, int new_cpu) 1489 { 1490 for (; *uncores; uncores++) 1491 uncore_change_type_ctx(*uncores, old_cpu, new_cpu); 1492 } 1493 1494 static void uncore_box_unref(struct intel_uncore_type **types, int id) 1495 { 1496 struct intel_uncore_type *type; 1497 struct intel_uncore_pmu *pmu; 1498 struct intel_uncore_box *box; 1499 int i; 1500 1501 for (; *types; types++) { 1502 type = *types; 1503 pmu = type->pmus; 1504 for (i = 0; i < type->num_boxes; i++, pmu++) { 1505 box = pmu->boxes[id]; 1506 if (box && atomic_dec_return(&box->refcnt) == 0) 1507 uncore_box_exit(box); 1508 } 1509 } 1510 } 1511 1512 static int uncore_event_cpu_offline(unsigned int cpu) 1513 { 1514 int die, target; 1515 1516 /* Check if exiting cpu is used for collecting uncore events */ 1517 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1518 goto unref; 1519 /* Find a new cpu to collect uncore events */ 1520 target = cpumask_any_but(topology_die_cpumask(cpu), cpu); 1521 1522 /* Migrate uncore events to the new target */ 1523 if (target < nr_cpu_ids) 1524 cpumask_set_cpu(target, &uncore_cpu_mask); 1525 else 1526 target = -1; 1527 1528 uncore_change_context(uncore_msr_uncores, cpu, target); 1529 uncore_change_context(uncore_mmio_uncores, cpu, target); 1530 uncore_change_context(uncore_pci_uncores, cpu, target); 1531 1532 unref: 1533 /* Clear the references */ 1534 die = topology_logical_die_id(cpu); 1535 uncore_box_unref(uncore_msr_uncores, die); 1536 uncore_box_unref(uncore_mmio_uncores, die); 1537 return 0; 1538 } 1539 1540 static int allocate_boxes(struct intel_uncore_type **types, 1541 unsigned int die, unsigned int cpu) 1542 { 1543 struct intel_uncore_box *box, *tmp; 1544 struct intel_uncore_type *type; 1545 struct intel_uncore_pmu *pmu; 1546 LIST_HEAD(allocated); 1547 int i; 1548 1549 /* Try to allocate all required boxes */ 1550 for (; *types; types++) { 1551 type = *types; 1552 pmu = type->pmus; 1553 for (i = 0; i < type->num_boxes; i++, pmu++) { 1554 if (pmu->boxes[die]) 1555 continue; 1556 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1557 if (!box) 1558 goto cleanup; 1559 box->pmu = pmu; 1560 box->dieid = die; 1561 list_add(&box->active_list, &allocated); 1562 } 1563 } 1564 /* Install them in the pmus */ 1565 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1566 list_del_init(&box->active_list); 1567 box->pmu->boxes[die] = box; 1568 } 1569 return 0; 1570 1571 cleanup: 1572 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1573 list_del_init(&box->active_list); 1574 kfree(box); 1575 } 1576 return -ENOMEM; 1577 } 1578 1579 static int uncore_box_ref(struct intel_uncore_type **types, 1580 int id, unsigned int cpu) 1581 { 1582 struct intel_uncore_type *type; 1583 struct intel_uncore_pmu *pmu; 1584 struct intel_uncore_box *box; 1585 int i, ret; 1586 1587 ret = allocate_boxes(types, id, cpu); 1588 if (ret) 1589 return ret; 1590 1591 for (; *types; types++) { 1592 type = *types; 1593 pmu = type->pmus; 1594 for (i = 0; i < type->num_boxes; i++, pmu++) { 1595 box = pmu->boxes[id]; 1596 if (box && atomic_inc_return(&box->refcnt) == 1) 1597 uncore_box_init(box); 1598 } 1599 } 1600 return 0; 1601 } 1602 1603 static int uncore_event_cpu_online(unsigned int cpu) 1604 { 1605 int die, target, msr_ret, mmio_ret; 1606 1607 die = topology_logical_die_id(cpu); 1608 msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); 1609 mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); 1610 if (msr_ret && mmio_ret) 1611 return -ENOMEM; 1612 1613 /* 1614 * Check if there is an online cpu in the package 1615 * which collects uncore events already. 1616 */ 1617 target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); 1618 if (target < nr_cpu_ids) 1619 return 0; 1620 1621 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1622 1623 if (!msr_ret) 1624 uncore_change_context(uncore_msr_uncores, -1, cpu); 1625 if (!mmio_ret) 1626 uncore_change_context(uncore_mmio_uncores, -1, cpu); 1627 uncore_change_context(uncore_pci_uncores, -1, cpu); 1628 return 0; 1629 } 1630 1631 static int __init type_pmu_register(struct intel_uncore_type *type) 1632 { 1633 int i, ret; 1634 1635 for (i = 0; i < type->num_boxes; i++) { 1636 ret = uncore_pmu_register(&type->pmus[i]); 1637 if (ret) 1638 return ret; 1639 } 1640 return 0; 1641 } 1642 1643 static int __init uncore_msr_pmus_register(void) 1644 { 1645 struct intel_uncore_type **types = uncore_msr_uncores; 1646 int ret; 1647 1648 for (; *types; types++) { 1649 ret = type_pmu_register(*types); 1650 if (ret) 1651 return ret; 1652 } 1653 return 0; 1654 } 1655 1656 static int __init uncore_cpu_init(void) 1657 { 1658 int ret; 1659 1660 ret = uncore_types_init(uncore_msr_uncores, true); 1661 if (ret) 1662 goto err; 1663 1664 ret = uncore_msr_pmus_register(); 1665 if (ret) 1666 goto err; 1667 return 0; 1668 err: 1669 uncore_types_exit(uncore_msr_uncores); 1670 uncore_msr_uncores = empty_uncore; 1671 return ret; 1672 } 1673 1674 static int __init uncore_mmio_init(void) 1675 { 1676 struct intel_uncore_type **types = uncore_mmio_uncores; 1677 int ret; 1678 1679 ret = uncore_types_init(types, true); 1680 if (ret) 1681 goto err; 1682 1683 for (; *types; types++) { 1684 ret = type_pmu_register(*types); 1685 if (ret) 1686 goto err; 1687 } 1688 return 0; 1689 err: 1690 uncore_types_exit(uncore_mmio_uncores); 1691 uncore_mmio_uncores = empty_uncore; 1692 return ret; 1693 } 1694 1695 struct intel_uncore_init_fun { 1696 void (*cpu_init)(void); 1697 int (*pci_init)(void); 1698 void (*mmio_init)(void); 1699 /* Discovery table is required */ 1700 bool use_discovery; 1701 /* The units in the discovery table should be ignored. */ 1702 int *uncore_units_ignore; 1703 }; 1704 1705 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { 1706 .cpu_init = nhm_uncore_cpu_init, 1707 }; 1708 1709 static const struct intel_uncore_init_fun snb_uncore_init __initconst = { 1710 .cpu_init = snb_uncore_cpu_init, 1711 .pci_init = snb_uncore_pci_init, 1712 }; 1713 1714 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = { 1715 .cpu_init = snb_uncore_cpu_init, 1716 .pci_init = ivb_uncore_pci_init, 1717 }; 1718 1719 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = { 1720 .cpu_init = snb_uncore_cpu_init, 1721 .pci_init = hsw_uncore_pci_init, 1722 }; 1723 1724 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = { 1725 .cpu_init = snb_uncore_cpu_init, 1726 .pci_init = bdw_uncore_pci_init, 1727 }; 1728 1729 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = { 1730 .cpu_init = snbep_uncore_cpu_init, 1731 .pci_init = snbep_uncore_pci_init, 1732 }; 1733 1734 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = { 1735 .cpu_init = nhmex_uncore_cpu_init, 1736 }; 1737 1738 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = { 1739 .cpu_init = ivbep_uncore_cpu_init, 1740 .pci_init = ivbep_uncore_pci_init, 1741 }; 1742 1743 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = { 1744 .cpu_init = hswep_uncore_cpu_init, 1745 .pci_init = hswep_uncore_pci_init, 1746 }; 1747 1748 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = { 1749 .cpu_init = bdx_uncore_cpu_init, 1750 .pci_init = bdx_uncore_pci_init, 1751 }; 1752 1753 static const struct intel_uncore_init_fun knl_uncore_init __initconst = { 1754 .cpu_init = knl_uncore_cpu_init, 1755 .pci_init = knl_uncore_pci_init, 1756 }; 1757 1758 static const struct intel_uncore_init_fun skl_uncore_init __initconst = { 1759 .cpu_init = skl_uncore_cpu_init, 1760 .pci_init = skl_uncore_pci_init, 1761 }; 1762 1763 static const struct intel_uncore_init_fun skx_uncore_init __initconst = { 1764 .cpu_init = skx_uncore_cpu_init, 1765 .pci_init = skx_uncore_pci_init, 1766 }; 1767 1768 static const struct intel_uncore_init_fun icl_uncore_init __initconst = { 1769 .cpu_init = icl_uncore_cpu_init, 1770 .pci_init = skl_uncore_pci_init, 1771 }; 1772 1773 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { 1774 .cpu_init = tgl_uncore_cpu_init, 1775 .mmio_init = tgl_uncore_mmio_init, 1776 }; 1777 1778 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { 1779 .cpu_init = tgl_uncore_cpu_init, 1780 .mmio_init = tgl_l_uncore_mmio_init, 1781 }; 1782 1783 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { 1784 .cpu_init = tgl_uncore_cpu_init, 1785 .pci_init = skl_uncore_pci_init, 1786 }; 1787 1788 static const struct intel_uncore_init_fun adl_uncore_init __initconst = { 1789 .cpu_init = adl_uncore_cpu_init, 1790 .mmio_init = adl_uncore_mmio_init, 1791 }; 1792 1793 static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { 1794 .cpu_init = mtl_uncore_cpu_init, 1795 .mmio_init = adl_uncore_mmio_init, 1796 }; 1797 1798 static const struct intel_uncore_init_fun icx_uncore_init __initconst = { 1799 .cpu_init = icx_uncore_cpu_init, 1800 .pci_init = icx_uncore_pci_init, 1801 .mmio_init = icx_uncore_mmio_init, 1802 }; 1803 1804 static const struct intel_uncore_init_fun snr_uncore_init __initconst = { 1805 .cpu_init = snr_uncore_cpu_init, 1806 .pci_init = snr_uncore_pci_init, 1807 .mmio_init = snr_uncore_mmio_init, 1808 }; 1809 1810 static const struct intel_uncore_init_fun spr_uncore_init __initconst = { 1811 .cpu_init = spr_uncore_cpu_init, 1812 .pci_init = spr_uncore_pci_init, 1813 .mmio_init = spr_uncore_mmio_init, 1814 .use_discovery = true, 1815 .uncore_units_ignore = spr_uncore_units_ignore, 1816 }; 1817 1818 static const struct intel_uncore_init_fun gnr_uncore_init __initconst = { 1819 .cpu_init = gnr_uncore_cpu_init, 1820 .pci_init = gnr_uncore_pci_init, 1821 .mmio_init = gnr_uncore_mmio_init, 1822 .use_discovery = true, 1823 .uncore_units_ignore = gnr_uncore_units_ignore, 1824 }; 1825 1826 static const struct intel_uncore_init_fun generic_uncore_init __initconst = { 1827 .cpu_init = intel_uncore_generic_uncore_cpu_init, 1828 .pci_init = intel_uncore_generic_uncore_pci_init, 1829 .mmio_init = intel_uncore_generic_uncore_mmio_init, 1830 }; 1831 1832 static const struct x86_cpu_id intel_uncore_match[] __initconst = { 1833 X86_MATCH_VFM(INTEL_NEHALEM_EP, &nhm_uncore_init), 1834 X86_MATCH_VFM(INTEL_NEHALEM, &nhm_uncore_init), 1835 X86_MATCH_VFM(INTEL_WESTMERE, &nhm_uncore_init), 1836 X86_MATCH_VFM(INTEL_WESTMERE_EP, &nhm_uncore_init), 1837 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &snb_uncore_init), 1838 X86_MATCH_VFM(INTEL_IVYBRIDGE, &ivb_uncore_init), 1839 X86_MATCH_VFM(INTEL_HASWELL, &hsw_uncore_init), 1840 X86_MATCH_VFM(INTEL_HASWELL_L, &hsw_uncore_init), 1841 X86_MATCH_VFM(INTEL_HASWELL_G, &hsw_uncore_init), 1842 X86_MATCH_VFM(INTEL_BROADWELL, &bdw_uncore_init), 1843 X86_MATCH_VFM(INTEL_BROADWELL_G, &bdw_uncore_init), 1844 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &snbep_uncore_init), 1845 X86_MATCH_VFM(INTEL_NEHALEM_EX, &nhmex_uncore_init), 1846 X86_MATCH_VFM(INTEL_WESTMERE_EX, &nhmex_uncore_init), 1847 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ivbep_uncore_init), 1848 X86_MATCH_VFM(INTEL_HASWELL_X, &hswep_uncore_init), 1849 X86_MATCH_VFM(INTEL_BROADWELL_X, &bdx_uncore_init), 1850 X86_MATCH_VFM(INTEL_BROADWELL_D, &bdx_uncore_init), 1851 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &knl_uncore_init), 1852 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &knl_uncore_init), 1853 X86_MATCH_VFM(INTEL_SKYLAKE, &skl_uncore_init), 1854 X86_MATCH_VFM(INTEL_SKYLAKE_L, &skl_uncore_init), 1855 X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_uncore_init), 1856 X86_MATCH_VFM(INTEL_KABYLAKE_L, &skl_uncore_init), 1857 X86_MATCH_VFM(INTEL_KABYLAKE, &skl_uncore_init), 1858 X86_MATCH_VFM(INTEL_COMETLAKE_L, &skl_uncore_init), 1859 X86_MATCH_VFM(INTEL_COMETLAKE, &skl_uncore_init), 1860 X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_uncore_init), 1861 X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &icl_uncore_init), 1862 X86_MATCH_VFM(INTEL_ICELAKE, &icl_uncore_init), 1863 X86_MATCH_VFM(INTEL_ICELAKE_D, &icx_uncore_init), 1864 X86_MATCH_VFM(INTEL_ICELAKE_X, &icx_uncore_init), 1865 X86_MATCH_VFM(INTEL_TIGERLAKE_L, &tgl_l_uncore_init), 1866 X86_MATCH_VFM(INTEL_TIGERLAKE, &tgl_uncore_init), 1867 X86_MATCH_VFM(INTEL_ROCKETLAKE, &rkl_uncore_init), 1868 X86_MATCH_VFM(INTEL_ALDERLAKE, &adl_uncore_init), 1869 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &adl_uncore_init), 1870 X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_uncore_init), 1871 X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &adl_uncore_init), 1872 X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init), 1873 X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init), 1874 X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init), 1875 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), 1876 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), 1877 X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), 1878 X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_uncore_init), 1879 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &snr_uncore_init), 1880 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), 1881 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), 1882 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), 1883 {}, 1884 }; 1885 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); 1886 1887 static int __init intel_uncore_init(void) 1888 { 1889 const struct x86_cpu_id *id; 1890 struct intel_uncore_init_fun *uncore_init; 1891 int pret = 0, cret = 0, mret = 0, ret; 1892 1893 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1894 return -ENODEV; 1895 1896 __uncore_max_dies = 1897 topology_max_packages() * topology_max_dies_per_package(); 1898 1899 id = x86_match_cpu(intel_uncore_match); 1900 if (!id) { 1901 if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL)) 1902 uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; 1903 else 1904 return -ENODEV; 1905 } else { 1906 uncore_init = (struct intel_uncore_init_fun *)id->driver_data; 1907 if (uncore_no_discover && uncore_init->use_discovery) 1908 return -ENODEV; 1909 if (uncore_init->use_discovery && 1910 !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore)) 1911 return -ENODEV; 1912 } 1913 1914 if (uncore_init->pci_init) { 1915 pret = uncore_init->pci_init(); 1916 if (!pret) 1917 pret = uncore_pci_init(); 1918 } 1919 1920 if (uncore_init->cpu_init) { 1921 uncore_init->cpu_init(); 1922 cret = uncore_cpu_init(); 1923 } 1924 1925 if (uncore_init->mmio_init) { 1926 uncore_init->mmio_init(); 1927 mret = uncore_mmio_init(); 1928 } 1929 1930 if (cret && pret && mret) { 1931 ret = -ENODEV; 1932 goto free_discovery; 1933 } 1934 1935 /* Install hotplug callbacks to setup the targets for each package */ 1936 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, 1937 "perf/x86/intel/uncore:online", 1938 uncore_event_cpu_online, 1939 uncore_event_cpu_offline); 1940 if (ret) 1941 goto err; 1942 return 0; 1943 1944 err: 1945 uncore_types_exit(uncore_msr_uncores); 1946 uncore_types_exit(uncore_mmio_uncores); 1947 uncore_pci_exit(); 1948 free_discovery: 1949 intel_uncore_clear_discovery_tables(); 1950 return ret; 1951 } 1952 module_init(intel_uncore_init); 1953 1954 static void __exit intel_uncore_exit(void) 1955 { 1956 cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); 1957 uncore_types_exit(uncore_msr_uncores); 1958 uncore_types_exit(uncore_mmio_uncores); 1959 uncore_pci_exit(); 1960 intel_uncore_clear_discovery_tables(); 1961 } 1962 module_exit(intel_uncore_exit); 1963