1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/module.h> 3 4 #include <asm/cpu_device_id.h> 5 #include <asm/intel-family.h> 6 #include "uncore.h" 7 #include "uncore_discovery.h" 8 9 static bool uncore_no_discover; 10 module_param(uncore_no_discover, bool, 0); 11 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism " 12 "(default: enable the discovery mechanism)."); 13 struct intel_uncore_type *empty_uncore[] = { NULL, }; 14 struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 15 struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 16 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; 17 18 static bool pcidrv_registered; 19 struct pci_driver *uncore_pci_driver; 20 /* The PCI driver for the device which the uncore doesn't own. */ 21 struct pci_driver *uncore_pci_sub_driver; 22 /* pci bus to socket mapping */ 23 DEFINE_RAW_SPINLOCK(pci2phy_map_lock); 24 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); 25 struct pci_extra_dev *uncore_extra_pci_dev; 26 int __uncore_max_dies; 27 28 /* mask of cpus that collect uncore events */ 29 static cpumask_t uncore_cpu_mask; 30 31 /* constraint for the fixed counter */ 32 static struct event_constraint uncore_constraint_fixed = 33 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); 34 struct event_constraint uncore_constraint_empty = 35 EVENT_CONSTRAINT(0, 0, 0); 36 37 MODULE_LICENSE("GPL"); 38 39 int uncore_pcibus_to_dieid(struct pci_bus *bus) 40 { 41 struct pci2phy_map *map; 42 int die_id = -1; 43 44 raw_spin_lock(&pci2phy_map_lock); 45 list_for_each_entry(map, &pci2phy_map_head, list) { 46 if (map->segment == pci_domain_nr(bus)) { 47 die_id = map->pbus_to_dieid[bus->number]; 48 break; 49 } 50 } 51 raw_spin_unlock(&pci2phy_map_lock); 52 53 return die_id; 54 } 55 56 int uncore_die_to_segment(int die) 57 { 58 struct pci_bus *bus = NULL; 59 60 /* Find first pci bus which attributes to specified die. */ 61 while ((bus = pci_find_next_bus(bus)) && 62 (die != uncore_pcibus_to_dieid(bus))) 63 ; 64 65 return bus ? pci_domain_nr(bus) : -EINVAL; 66 } 67 68 int uncore_device_to_die(struct pci_dev *dev) 69 { 70 int node = pcibus_to_node(dev->bus); 71 int cpu; 72 73 for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) { 74 struct cpuinfo_x86 *c = &cpu_data(cpu); 75 76 if (c->initialized && cpu_to_node(cpu) == node) 77 return c->topo.logical_die_id; 78 } 79 80 return -1; 81 } 82 83 static void uncore_free_pcibus_map(void) 84 { 85 struct pci2phy_map *map, *tmp; 86 87 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { 88 list_del(&map->list); 89 kfree(map); 90 } 91 } 92 93 struct pci2phy_map *__find_pci2phy_map(int segment) 94 { 95 struct pci2phy_map *map, *alloc = NULL; 96 int i; 97 98 lockdep_assert_held(&pci2phy_map_lock); 99 100 lookup: 101 list_for_each_entry(map, &pci2phy_map_head, list) { 102 if (map->segment == segment) 103 goto end; 104 } 105 106 if (!alloc) { 107 raw_spin_unlock(&pci2phy_map_lock); 108 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); 109 raw_spin_lock(&pci2phy_map_lock); 110 111 if (!alloc) 112 return NULL; 113 114 goto lookup; 115 } 116 117 map = alloc; 118 alloc = NULL; 119 map->segment = segment; 120 for (i = 0; i < 256; i++) 121 map->pbus_to_dieid[i] = -1; 122 list_add_tail(&map->list, &pci2phy_map_head); 123 124 end: 125 kfree(alloc); 126 return map; 127 } 128 129 ssize_t uncore_event_show(struct device *dev, 130 struct device_attribute *attr, char *buf) 131 { 132 struct uncore_event_desc *event = 133 container_of(attr, struct uncore_event_desc, attr); 134 return sprintf(buf, "%s", event->config); 135 } 136 137 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 138 { 139 unsigned int dieid = topology_logical_die_id(cpu); 140 141 /* 142 * The unsigned check also catches the '-1' return value for non 143 * existent mappings in the topology map. 144 */ 145 return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL; 146 } 147 148 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 149 { 150 u64 count; 151 152 rdmsrl(event->hw.event_base, count); 153 154 return count; 155 } 156 157 void uncore_mmio_exit_box(struct intel_uncore_box *box) 158 { 159 if (box->io_addr) 160 iounmap(box->io_addr); 161 } 162 163 u64 uncore_mmio_read_counter(struct intel_uncore_box *box, 164 struct perf_event *event) 165 { 166 if (!box->io_addr) 167 return 0; 168 169 if (!uncore_mmio_is_valid_offset(box, event->hw.event_base)) 170 return 0; 171 172 return readq(box->io_addr + event->hw.event_base); 173 } 174 175 /* 176 * generic get constraint function for shared match/mask registers. 177 */ 178 struct event_constraint * 179 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) 180 { 181 struct intel_uncore_extra_reg *er; 182 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 183 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; 184 unsigned long flags; 185 bool ok = false; 186 187 /* 188 * reg->alloc can be set due to existing state, so for fake box we 189 * need to ignore this, otherwise we might fail to allocate proper 190 * fake state for this extra reg constraint. 191 */ 192 if (reg1->idx == EXTRA_REG_NONE || 193 (!uncore_box_is_fake(box) && reg1->alloc)) 194 return NULL; 195 196 er = &box->shared_regs[reg1->idx]; 197 raw_spin_lock_irqsave(&er->lock, flags); 198 if (!atomic_read(&er->ref) || 199 (er->config1 == reg1->config && er->config2 == reg2->config)) { 200 atomic_inc(&er->ref); 201 er->config1 = reg1->config; 202 er->config2 = reg2->config; 203 ok = true; 204 } 205 raw_spin_unlock_irqrestore(&er->lock, flags); 206 207 if (ok) { 208 if (!uncore_box_is_fake(box)) 209 reg1->alloc = 1; 210 return NULL; 211 } 212 213 return &uncore_constraint_empty; 214 } 215 216 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) 217 { 218 struct intel_uncore_extra_reg *er; 219 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 220 221 /* 222 * Only put constraint if extra reg was actually allocated. Also 223 * takes care of event which do not use an extra shared reg. 224 * 225 * Also, if this is a fake box we shouldn't touch any event state 226 * (reg->alloc) and we don't care about leaving inconsistent box 227 * state either since it will be thrown out. 228 */ 229 if (uncore_box_is_fake(box) || !reg1->alloc) 230 return; 231 232 er = &box->shared_regs[reg1->idx]; 233 atomic_dec(&er->ref); 234 reg1->alloc = 0; 235 } 236 237 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) 238 { 239 struct intel_uncore_extra_reg *er; 240 unsigned long flags; 241 u64 config; 242 243 er = &box->shared_regs[idx]; 244 245 raw_spin_lock_irqsave(&er->lock, flags); 246 config = er->config; 247 raw_spin_unlock_irqrestore(&er->lock, flags); 248 249 return config; 250 } 251 252 static void uncore_assign_hw_event(struct intel_uncore_box *box, 253 struct perf_event *event, int idx) 254 { 255 struct hw_perf_event *hwc = &event->hw; 256 257 hwc->idx = idx; 258 hwc->last_tag = ++box->tags[idx]; 259 260 if (uncore_pmc_fixed(hwc->idx)) { 261 hwc->event_base = uncore_fixed_ctr(box); 262 hwc->config_base = uncore_fixed_ctl(box); 263 return; 264 } 265 266 hwc->config_base = uncore_event_ctl(box, hwc->idx); 267 hwc->event_base = uncore_perf_ctr(box, hwc->idx); 268 } 269 270 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) 271 { 272 u64 prev_count, new_count, delta; 273 int shift; 274 275 if (uncore_pmc_freerunning(event->hw.idx)) 276 shift = 64 - uncore_freerunning_bits(box, event); 277 else if (uncore_pmc_fixed(event->hw.idx)) 278 shift = 64 - uncore_fixed_ctr_bits(box); 279 else 280 shift = 64 - uncore_perf_ctr_bits(box); 281 282 /* the hrtimer might modify the previous event value */ 283 again: 284 prev_count = local64_read(&event->hw.prev_count); 285 new_count = uncore_read_counter(box, event); 286 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) 287 goto again; 288 289 delta = (new_count << shift) - (prev_count << shift); 290 delta >>= shift; 291 292 local64_add(delta, &event->count); 293 } 294 295 /* 296 * The overflow interrupt is unavailable for SandyBridge-EP, is broken 297 * for SandyBridge. So we use hrtimer to periodically poll the counter 298 * to avoid overflow. 299 */ 300 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 301 { 302 struct intel_uncore_box *box; 303 struct perf_event *event; 304 unsigned long flags; 305 int bit; 306 307 box = container_of(hrtimer, struct intel_uncore_box, hrtimer); 308 if (!box->n_active || box->cpu != smp_processor_id()) 309 return HRTIMER_NORESTART; 310 /* 311 * disable local interrupt to prevent uncore_pmu_event_start/stop 312 * to interrupt the update process 313 */ 314 local_irq_save(flags); 315 316 /* 317 * handle boxes with an active event list as opposed to active 318 * counters 319 */ 320 list_for_each_entry(event, &box->active_list, active_entry) { 321 uncore_perf_event_update(box, event); 322 } 323 324 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 325 uncore_perf_event_update(box, box->events[bit]); 326 327 local_irq_restore(flags); 328 329 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); 330 return HRTIMER_RESTART; 331 } 332 333 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 334 { 335 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 336 HRTIMER_MODE_REL_PINNED); 337 } 338 339 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 340 { 341 hrtimer_cancel(&box->hrtimer); 342 } 343 344 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) 345 { 346 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 347 box->hrtimer.function = uncore_pmu_hrtimer; 348 } 349 350 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, 351 int node) 352 { 353 int i, size, numshared = type->num_shared_regs ; 354 struct intel_uncore_box *box; 355 356 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); 357 358 box = kzalloc_node(size, GFP_KERNEL, node); 359 if (!box) 360 return NULL; 361 362 for (i = 0; i < numshared; i++) 363 raw_spin_lock_init(&box->shared_regs[i].lock); 364 365 uncore_pmu_init_hrtimer(box); 366 box->cpu = -1; 367 box->dieid = -1; 368 369 /* set default hrtimer timeout */ 370 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 371 372 INIT_LIST_HEAD(&box->active_list); 373 374 return box; 375 } 376 377 /* 378 * Using uncore_pmu_event_init pmu event_init callback 379 * as a detection point for uncore events. 380 */ 381 static int uncore_pmu_event_init(struct perf_event *event); 382 383 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) 384 { 385 return &box->pmu->pmu == event->pmu; 386 } 387 388 static int 389 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, 390 bool dogrp) 391 { 392 struct perf_event *event; 393 int n, max_count; 394 395 max_count = box->pmu->type->num_counters; 396 if (box->pmu->type->fixed_ctl) 397 max_count++; 398 399 if (box->n_events >= max_count) 400 return -EINVAL; 401 402 n = box->n_events; 403 404 if (is_box_event(box, leader)) { 405 box->event_list[n] = leader; 406 n++; 407 } 408 409 if (!dogrp) 410 return n; 411 412 for_each_sibling_event(event, leader) { 413 if (!is_box_event(box, event) || 414 event->state <= PERF_EVENT_STATE_OFF) 415 continue; 416 417 if (n >= max_count) 418 return -EINVAL; 419 420 box->event_list[n] = event; 421 n++; 422 } 423 return n; 424 } 425 426 static struct event_constraint * 427 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 428 { 429 struct intel_uncore_type *type = box->pmu->type; 430 struct event_constraint *c; 431 432 if (type->ops->get_constraint) { 433 c = type->ops->get_constraint(box, event); 434 if (c) 435 return c; 436 } 437 438 if (event->attr.config == UNCORE_FIXED_EVENT) 439 return &uncore_constraint_fixed; 440 441 if (type->constraints) { 442 for_each_event_constraint(c, type->constraints) { 443 if ((event->hw.config & c->cmask) == c->code) 444 return c; 445 } 446 } 447 448 return &type->unconstrainted; 449 } 450 451 static void uncore_put_event_constraint(struct intel_uncore_box *box, 452 struct perf_event *event) 453 { 454 if (box->pmu->type->ops->put_constraint) 455 box->pmu->type->ops->put_constraint(box, event); 456 } 457 458 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 459 { 460 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 461 struct event_constraint *c; 462 int i, wmin, wmax, ret = 0; 463 struct hw_perf_event *hwc; 464 465 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 466 467 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 468 c = uncore_get_event_constraint(box, box->event_list[i]); 469 box->event_constraint[i] = c; 470 wmin = min(wmin, c->weight); 471 wmax = max(wmax, c->weight); 472 } 473 474 /* fastpath, try to reuse previous register */ 475 for (i = 0; i < n; i++) { 476 hwc = &box->event_list[i]->hw; 477 c = box->event_constraint[i]; 478 479 /* never assigned */ 480 if (hwc->idx == -1) 481 break; 482 483 /* constraint still honored */ 484 if (!test_bit(hwc->idx, c->idxmsk)) 485 break; 486 487 /* not already used */ 488 if (test_bit(hwc->idx, used_mask)) 489 break; 490 491 __set_bit(hwc->idx, used_mask); 492 if (assign) 493 assign[i] = hwc->idx; 494 } 495 /* slow path */ 496 if (i != n) 497 ret = perf_assign_events(box->event_constraint, n, 498 wmin, wmax, n, assign); 499 500 if (!assign || ret) { 501 for (i = 0; i < n; i++) 502 uncore_put_event_constraint(box, box->event_list[i]); 503 } 504 return ret ? -EINVAL : 0; 505 } 506 507 void uncore_pmu_event_start(struct perf_event *event, int flags) 508 { 509 struct intel_uncore_box *box = uncore_event_to_box(event); 510 int idx = event->hw.idx; 511 512 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) 513 return; 514 515 /* 516 * Free running counter is read-only and always active. 517 * Use the current counter value as start point. 518 * There is no overflow interrupt for free running counter. 519 * Use hrtimer to periodically poll the counter to avoid overflow. 520 */ 521 if (uncore_pmc_freerunning(event->hw.idx)) { 522 list_add_tail(&event->active_entry, &box->active_list); 523 local64_set(&event->hw.prev_count, 524 uncore_read_counter(box, event)); 525 if (box->n_active++ == 0) 526 uncore_pmu_start_hrtimer(box); 527 return; 528 } 529 530 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 531 return; 532 533 event->hw.state = 0; 534 box->events[idx] = event; 535 box->n_active++; 536 __set_bit(idx, box->active_mask); 537 538 local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); 539 uncore_enable_event(box, event); 540 541 if (box->n_active == 1) 542 uncore_pmu_start_hrtimer(box); 543 } 544 545 void uncore_pmu_event_stop(struct perf_event *event, int flags) 546 { 547 struct intel_uncore_box *box = uncore_event_to_box(event); 548 struct hw_perf_event *hwc = &event->hw; 549 550 /* Cannot disable free running counter which is read-only */ 551 if (uncore_pmc_freerunning(hwc->idx)) { 552 list_del(&event->active_entry); 553 if (--box->n_active == 0) 554 uncore_pmu_cancel_hrtimer(box); 555 uncore_perf_event_update(box, event); 556 return; 557 } 558 559 if (__test_and_clear_bit(hwc->idx, box->active_mask)) { 560 uncore_disable_event(box, event); 561 box->n_active--; 562 box->events[hwc->idx] = NULL; 563 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 564 hwc->state |= PERF_HES_STOPPED; 565 566 if (box->n_active == 0) 567 uncore_pmu_cancel_hrtimer(box); 568 } 569 570 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 571 /* 572 * Drain the remaining delta count out of a event 573 * that we are disabling: 574 */ 575 uncore_perf_event_update(box, event); 576 hwc->state |= PERF_HES_UPTODATE; 577 } 578 } 579 580 int uncore_pmu_event_add(struct perf_event *event, int flags) 581 { 582 struct intel_uncore_box *box = uncore_event_to_box(event); 583 struct hw_perf_event *hwc = &event->hw; 584 int assign[UNCORE_PMC_IDX_MAX]; 585 int i, n, ret; 586 587 if (!box) 588 return -ENODEV; 589 590 /* 591 * The free funning counter is assigned in event_init(). 592 * The free running counter event and free running counter 593 * are 1:1 mapped. It doesn't need to be tracked in event_list. 594 */ 595 if (uncore_pmc_freerunning(hwc->idx)) { 596 if (flags & PERF_EF_START) 597 uncore_pmu_event_start(event, 0); 598 return 0; 599 } 600 601 ret = n = uncore_collect_events(box, event, false); 602 if (ret < 0) 603 return ret; 604 605 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 606 if (!(flags & PERF_EF_START)) 607 hwc->state |= PERF_HES_ARCH; 608 609 ret = uncore_assign_events(box, assign, n); 610 if (ret) 611 return ret; 612 613 /* save events moving to new counters */ 614 for (i = 0; i < box->n_events; i++) { 615 event = box->event_list[i]; 616 hwc = &event->hw; 617 618 if (hwc->idx == assign[i] && 619 hwc->last_tag == box->tags[assign[i]]) 620 continue; 621 /* 622 * Ensure we don't accidentally enable a stopped 623 * counter simply because we rescheduled. 624 */ 625 if (hwc->state & PERF_HES_STOPPED) 626 hwc->state |= PERF_HES_ARCH; 627 628 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 629 } 630 631 /* reprogram moved events into new counters */ 632 for (i = 0; i < n; i++) { 633 event = box->event_list[i]; 634 hwc = &event->hw; 635 636 if (hwc->idx != assign[i] || 637 hwc->last_tag != box->tags[assign[i]]) 638 uncore_assign_hw_event(box, event, assign[i]); 639 else if (i < box->n_events) 640 continue; 641 642 if (hwc->state & PERF_HES_ARCH) 643 continue; 644 645 uncore_pmu_event_start(event, 0); 646 } 647 box->n_events = n; 648 649 return 0; 650 } 651 652 void uncore_pmu_event_del(struct perf_event *event, int flags) 653 { 654 struct intel_uncore_box *box = uncore_event_to_box(event); 655 int i; 656 657 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 658 659 /* 660 * The event for free running counter is not tracked by event_list. 661 * It doesn't need to force event->hw.idx = -1 to reassign the counter. 662 * Because the event and the free running counter are 1:1 mapped. 663 */ 664 if (uncore_pmc_freerunning(event->hw.idx)) 665 return; 666 667 for (i = 0; i < box->n_events; i++) { 668 if (event == box->event_list[i]) { 669 uncore_put_event_constraint(box, event); 670 671 for (++i; i < box->n_events; i++) 672 box->event_list[i - 1] = box->event_list[i]; 673 674 --box->n_events; 675 break; 676 } 677 } 678 679 event->hw.idx = -1; 680 event->hw.last_tag = ~0ULL; 681 } 682 683 void uncore_pmu_event_read(struct perf_event *event) 684 { 685 struct intel_uncore_box *box = uncore_event_to_box(event); 686 uncore_perf_event_update(box, event); 687 } 688 689 /* 690 * validation ensures the group can be loaded onto the 691 * PMU if it was the only group available. 692 */ 693 static int uncore_validate_group(struct intel_uncore_pmu *pmu, 694 struct perf_event *event) 695 { 696 struct perf_event *leader = event->group_leader; 697 struct intel_uncore_box *fake_box; 698 int ret = -EINVAL, n; 699 700 /* The free running counter is always active. */ 701 if (uncore_pmc_freerunning(event->hw.idx)) 702 return 0; 703 704 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); 705 if (!fake_box) 706 return -ENOMEM; 707 708 fake_box->pmu = pmu; 709 /* 710 * the event is not yet connected with its 711 * siblings therefore we must first collect 712 * existing siblings, then add the new event 713 * before we can simulate the scheduling 714 */ 715 n = uncore_collect_events(fake_box, leader, true); 716 if (n < 0) 717 goto out; 718 719 fake_box->n_events = n; 720 n = uncore_collect_events(fake_box, event, false); 721 if (n < 0) 722 goto out; 723 724 fake_box->n_events = n; 725 726 ret = uncore_assign_events(fake_box, NULL, n); 727 out: 728 kfree(fake_box); 729 return ret; 730 } 731 732 static int uncore_pmu_event_init(struct perf_event *event) 733 { 734 struct intel_uncore_pmu *pmu; 735 struct intel_uncore_box *box; 736 struct hw_perf_event *hwc = &event->hw; 737 int ret; 738 739 if (event->attr.type != event->pmu->type) 740 return -ENOENT; 741 742 pmu = uncore_event_to_pmu(event); 743 /* no device found for this pmu */ 744 if (pmu->func_id < 0) 745 return -ENOENT; 746 747 /* Sampling not supported yet */ 748 if (hwc->sample_period) 749 return -EINVAL; 750 751 /* 752 * Place all uncore events for a particular physical package 753 * onto a single cpu 754 */ 755 if (event->cpu < 0) 756 return -EINVAL; 757 box = uncore_pmu_to_box(pmu, event->cpu); 758 if (!box || box->cpu < 0) 759 return -EINVAL; 760 event->cpu = box->cpu; 761 event->pmu_private = box; 762 763 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; 764 765 event->hw.idx = -1; 766 event->hw.last_tag = ~0ULL; 767 event->hw.extra_reg.idx = EXTRA_REG_NONE; 768 event->hw.branch_reg.idx = EXTRA_REG_NONE; 769 770 if (event->attr.config == UNCORE_FIXED_EVENT) { 771 /* no fixed counter */ 772 if (!pmu->type->fixed_ctl) 773 return -EINVAL; 774 /* 775 * if there is only one fixed counter, only the first pmu 776 * can access the fixed counter 777 */ 778 if (pmu->type->single_fixed && pmu->pmu_idx > 0) 779 return -EINVAL; 780 781 /* fixed counters have event field hardcoded to zero */ 782 hwc->config = 0ULL; 783 } else if (is_freerunning_event(event)) { 784 hwc->config = event->attr.config; 785 if (!check_valid_freerunning_event(box, event)) 786 return -EINVAL; 787 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING; 788 /* 789 * The free running counter event and free running counter 790 * are always 1:1 mapped. 791 * The free running counter is always active. 792 * Assign the free running counter here. 793 */ 794 event->hw.event_base = uncore_freerunning_counter(box, event); 795 } else { 796 hwc->config = event->attr.config & 797 (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); 798 if (pmu->type->ops->hw_config) { 799 ret = pmu->type->ops->hw_config(box, event); 800 if (ret) 801 return ret; 802 } 803 } 804 805 if (event->group_leader != event) 806 ret = uncore_validate_group(pmu, event); 807 else 808 ret = 0; 809 810 return ret; 811 } 812 813 static void uncore_pmu_enable(struct pmu *pmu) 814 { 815 struct intel_uncore_pmu *uncore_pmu; 816 struct intel_uncore_box *box; 817 818 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 819 820 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 821 if (!box) 822 return; 823 824 if (uncore_pmu->type->ops->enable_box) 825 uncore_pmu->type->ops->enable_box(box); 826 } 827 828 static void uncore_pmu_disable(struct pmu *pmu) 829 { 830 struct intel_uncore_pmu *uncore_pmu; 831 struct intel_uncore_box *box; 832 833 uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); 834 835 box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); 836 if (!box) 837 return; 838 839 if (uncore_pmu->type->ops->disable_box) 840 uncore_pmu->type->ops->disable_box(box); 841 } 842 843 static ssize_t uncore_get_attr_cpumask(struct device *dev, 844 struct device_attribute *attr, char *buf) 845 { 846 return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask); 847 } 848 849 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); 850 851 static struct attribute *uncore_pmu_attrs[] = { 852 &dev_attr_cpumask.attr, 853 NULL, 854 }; 855 856 static const struct attribute_group uncore_pmu_attr_group = { 857 .attrs = uncore_pmu_attrs, 858 }; 859 860 static inline int uncore_get_box_id(struct intel_uncore_type *type, 861 struct intel_uncore_pmu *pmu) 862 { 863 return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx; 864 } 865 866 void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) 867 { 868 struct intel_uncore_type *type = pmu->type; 869 870 if (type->num_boxes == 1) 871 sprintf(pmu_name, "uncore_type_%u", type->type_id); 872 else { 873 sprintf(pmu_name, "uncore_type_%u_%d", 874 type->type_id, uncore_get_box_id(type, pmu)); 875 } 876 } 877 878 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) 879 { 880 struct intel_uncore_type *type = pmu->type; 881 882 /* 883 * No uncore block name in discovery table. 884 * Use uncore_type_&typeid_&boxid as name. 885 */ 886 if (!type->name) { 887 uncore_get_alias_name(pmu->name, pmu); 888 return; 889 } 890 891 if (type->num_boxes == 1) { 892 if (strlen(type->name) > 0) 893 sprintf(pmu->name, "uncore_%s", type->name); 894 else 895 sprintf(pmu->name, "uncore"); 896 } else { 897 /* 898 * Use the box ID from the discovery table if applicable. 899 */ 900 sprintf(pmu->name, "uncore_%s_%d", type->name, 901 uncore_get_box_id(type, pmu)); 902 } 903 } 904 905 static int uncore_pmu_register(struct intel_uncore_pmu *pmu) 906 { 907 int ret; 908 909 if (!pmu->type->pmu) { 910 pmu->pmu = (struct pmu) { 911 .attr_groups = pmu->type->attr_groups, 912 .task_ctx_nr = perf_invalid_context, 913 .pmu_enable = uncore_pmu_enable, 914 .pmu_disable = uncore_pmu_disable, 915 .event_init = uncore_pmu_event_init, 916 .add = uncore_pmu_event_add, 917 .del = uncore_pmu_event_del, 918 .start = uncore_pmu_event_start, 919 .stop = uncore_pmu_event_stop, 920 .read = uncore_pmu_event_read, 921 .module = THIS_MODULE, 922 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 923 .attr_update = pmu->type->attr_update, 924 }; 925 } else { 926 pmu->pmu = *pmu->type->pmu; 927 pmu->pmu.attr_groups = pmu->type->attr_groups; 928 pmu->pmu.attr_update = pmu->type->attr_update; 929 } 930 931 uncore_get_pmu_name(pmu); 932 933 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 934 if (!ret) 935 pmu->registered = true; 936 return ret; 937 } 938 939 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) 940 { 941 if (!pmu->registered) 942 return; 943 perf_pmu_unregister(&pmu->pmu); 944 pmu->registered = false; 945 } 946 947 static void uncore_free_boxes(struct intel_uncore_pmu *pmu) 948 { 949 int die; 950 951 for (die = 0; die < uncore_max_dies(); die++) 952 kfree(pmu->boxes[die]); 953 kfree(pmu->boxes); 954 } 955 956 static void uncore_type_exit(struct intel_uncore_type *type) 957 { 958 struct intel_uncore_pmu *pmu = type->pmus; 959 int i; 960 961 if (type->cleanup_mapping) 962 type->cleanup_mapping(type); 963 964 if (pmu) { 965 for (i = 0; i < type->num_boxes; i++, pmu++) { 966 uncore_pmu_unregister(pmu); 967 uncore_free_boxes(pmu); 968 } 969 kfree(type->pmus); 970 type->pmus = NULL; 971 } 972 if (type->box_ids) { 973 kfree(type->box_ids); 974 type->box_ids = NULL; 975 } 976 kfree(type->events_group); 977 type->events_group = NULL; 978 } 979 980 static void uncore_types_exit(struct intel_uncore_type **types) 981 { 982 for (; *types; types++) 983 uncore_type_exit(*types); 984 } 985 986 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) 987 { 988 struct intel_uncore_pmu *pmus; 989 size_t size; 990 int i, j; 991 992 pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL); 993 if (!pmus) 994 return -ENOMEM; 995 996 size = uncore_max_dies() * sizeof(struct intel_uncore_box *); 997 998 for (i = 0; i < type->num_boxes; i++) { 999 pmus[i].func_id = setid ? i : -1; 1000 pmus[i].pmu_idx = i; 1001 pmus[i].type = type; 1002 pmus[i].boxes = kzalloc(size, GFP_KERNEL); 1003 if (!pmus[i].boxes) 1004 goto err; 1005 } 1006 1007 type->pmus = pmus; 1008 type->unconstrainted = (struct event_constraint) 1009 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 1010 0, type->num_counters, 0, 0); 1011 1012 if (type->event_descs) { 1013 struct { 1014 struct attribute_group group; 1015 struct attribute *attrs[]; 1016 } *attr_group; 1017 for (i = 0; type->event_descs[i].attr.attr.name; i++); 1018 1019 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1), 1020 GFP_KERNEL); 1021 if (!attr_group) 1022 goto err; 1023 1024 attr_group->group.name = "events"; 1025 attr_group->group.attrs = attr_group->attrs; 1026 1027 for (j = 0; j < i; j++) 1028 attr_group->attrs[j] = &type->event_descs[j].attr.attr; 1029 1030 type->events_group = &attr_group->group; 1031 } 1032 1033 type->pmu_group = &uncore_pmu_attr_group; 1034 1035 if (type->set_mapping) 1036 type->set_mapping(type); 1037 1038 return 0; 1039 1040 err: 1041 for (i = 0; i < type->num_boxes; i++) 1042 kfree(pmus[i].boxes); 1043 kfree(pmus); 1044 1045 return -ENOMEM; 1046 } 1047 1048 static int __init 1049 uncore_types_init(struct intel_uncore_type **types, bool setid) 1050 { 1051 int ret; 1052 1053 for (; *types; types++) { 1054 ret = uncore_type_init(*types, setid); 1055 if (ret) 1056 return ret; 1057 } 1058 return 0; 1059 } 1060 1061 /* 1062 * Get the die information of a PCI device. 1063 * @pdev: The PCI device. 1064 * @die: The die id which the device maps to. 1065 */ 1066 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die) 1067 { 1068 *die = uncore_pcibus_to_dieid(pdev->bus); 1069 if (*die < 0) 1070 return -EINVAL; 1071 1072 return 0; 1073 } 1074 1075 static struct intel_uncore_pmu * 1076 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) 1077 { 1078 struct intel_uncore_type **types = uncore_pci_uncores; 1079 struct intel_uncore_type *type; 1080 u64 box_ctl; 1081 int i, die; 1082 1083 for (; *types; types++) { 1084 type = *types; 1085 for (die = 0; die < __uncore_max_dies; die++) { 1086 for (i = 0; i < type->num_boxes; i++) { 1087 if (!type->box_ctls[die]) 1088 continue; 1089 box_ctl = type->box_ctls[die] + type->pci_offsets[i]; 1090 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) && 1091 pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) && 1092 pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl)) 1093 return &type->pmus[i]; 1094 } 1095 } 1096 } 1097 1098 return NULL; 1099 } 1100 1101 /* 1102 * Find the PMU of a PCI device. 1103 * @pdev: The PCI device. 1104 * @ids: The ID table of the available PCI devices with a PMU. 1105 * If NULL, search the whole uncore_pci_uncores. 1106 */ 1107 static struct intel_uncore_pmu * 1108 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) 1109 { 1110 struct intel_uncore_pmu *pmu = NULL; 1111 struct intel_uncore_type *type; 1112 kernel_ulong_t data; 1113 unsigned int devfn; 1114 1115 if (!ids) 1116 return uncore_pci_find_dev_pmu_from_types(pdev); 1117 1118 while (ids && ids->vendor) { 1119 if ((ids->vendor == pdev->vendor) && 1120 (ids->device == pdev->device)) { 1121 data = ids->driver_data; 1122 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data), 1123 UNCORE_PCI_DEV_FUNC(data)); 1124 if (devfn == pdev->devfn) { 1125 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)]; 1126 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)]; 1127 break; 1128 } 1129 } 1130 ids++; 1131 } 1132 return pmu; 1133 } 1134 1135 /* 1136 * Register the PMU for a PCI device 1137 * @pdev: The PCI device. 1138 * @type: The corresponding PMU type of the device. 1139 * @pmu: The corresponding PMU of the device. 1140 * @die: The die id which the device maps to. 1141 */ 1142 static int uncore_pci_pmu_register(struct pci_dev *pdev, 1143 struct intel_uncore_type *type, 1144 struct intel_uncore_pmu *pmu, 1145 int die) 1146 { 1147 struct intel_uncore_box *box; 1148 int ret; 1149 1150 if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) 1151 return -EINVAL; 1152 1153 box = uncore_alloc_box(type, NUMA_NO_NODE); 1154 if (!box) 1155 return -ENOMEM; 1156 1157 if (pmu->func_id < 0) 1158 pmu->func_id = pdev->devfn; 1159 else 1160 WARN_ON_ONCE(pmu->func_id != pdev->devfn); 1161 1162 atomic_inc(&box->refcnt); 1163 box->dieid = die; 1164 box->pci_dev = pdev; 1165 box->pmu = pmu; 1166 uncore_box_init(box); 1167 1168 pmu->boxes[die] = box; 1169 if (atomic_inc_return(&pmu->activeboxes) > 1) 1170 return 0; 1171 1172 /* First active box registers the pmu */ 1173 ret = uncore_pmu_register(pmu); 1174 if (ret) { 1175 pmu->boxes[die] = NULL; 1176 uncore_box_exit(box); 1177 kfree(box); 1178 } 1179 return ret; 1180 } 1181 1182 /* 1183 * add a pci uncore device 1184 */ 1185 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 1186 { 1187 struct intel_uncore_type *type; 1188 struct intel_uncore_pmu *pmu = NULL; 1189 int die, ret; 1190 1191 ret = uncore_pci_get_dev_die_info(pdev, &die); 1192 if (ret) 1193 return ret; 1194 1195 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 1196 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 1197 1198 uncore_extra_pci_dev[die].dev[idx] = pdev; 1199 pci_set_drvdata(pdev, NULL); 1200 return 0; 1201 } 1202 1203 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 1204 1205 /* 1206 * Some platforms, e.g. Knights Landing, use a common PCI device ID 1207 * for multiple instances of an uncore PMU device type. We should check 1208 * PCI slot and func to indicate the uncore box. 1209 */ 1210 if (id->driver_data & ~0xffff) { 1211 struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver); 1212 1213 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); 1214 if (pmu == NULL) 1215 return -ENODEV; 1216 } else { 1217 /* 1218 * for performance monitoring unit with multiple boxes, 1219 * each box has a different function id. 1220 */ 1221 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 1222 } 1223 1224 ret = uncore_pci_pmu_register(pdev, type, pmu, die); 1225 1226 pci_set_drvdata(pdev, pmu->boxes[die]); 1227 1228 return ret; 1229 } 1230 1231 /* 1232 * Unregister the PMU of a PCI device 1233 * @pmu: The corresponding PMU is unregistered. 1234 * @die: The die id which the device maps to. 1235 */ 1236 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die) 1237 { 1238 struct intel_uncore_box *box = pmu->boxes[die]; 1239 1240 pmu->boxes[die] = NULL; 1241 if (atomic_dec_return(&pmu->activeboxes) == 0) 1242 uncore_pmu_unregister(pmu); 1243 uncore_box_exit(box); 1244 kfree(box); 1245 } 1246 1247 static void uncore_pci_remove(struct pci_dev *pdev) 1248 { 1249 struct intel_uncore_box *box; 1250 struct intel_uncore_pmu *pmu; 1251 int i, die; 1252 1253 if (uncore_pci_get_dev_die_info(pdev, &die)) 1254 return; 1255 1256 box = pci_get_drvdata(pdev); 1257 if (!box) { 1258 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 1259 if (uncore_extra_pci_dev[die].dev[i] == pdev) { 1260 uncore_extra_pci_dev[die].dev[i] = NULL; 1261 break; 1262 } 1263 } 1264 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); 1265 return; 1266 } 1267 1268 pmu = box->pmu; 1269 1270 pci_set_drvdata(pdev, NULL); 1271 1272 uncore_pci_pmu_unregister(pmu, die); 1273 } 1274 1275 static int uncore_bus_notify(struct notifier_block *nb, 1276 unsigned long action, void *data, 1277 const struct pci_device_id *ids) 1278 { 1279 struct device *dev = data; 1280 struct pci_dev *pdev = to_pci_dev(dev); 1281 struct intel_uncore_pmu *pmu; 1282 int die; 1283 1284 /* Unregister the PMU when the device is going to be deleted. */ 1285 if (action != BUS_NOTIFY_DEL_DEVICE) 1286 return NOTIFY_DONE; 1287 1288 pmu = uncore_pci_find_dev_pmu(pdev, ids); 1289 if (!pmu) 1290 return NOTIFY_DONE; 1291 1292 if (uncore_pci_get_dev_die_info(pdev, &die)) 1293 return NOTIFY_DONE; 1294 1295 uncore_pci_pmu_unregister(pmu, die); 1296 1297 return NOTIFY_OK; 1298 } 1299 1300 static int uncore_pci_sub_bus_notify(struct notifier_block *nb, 1301 unsigned long action, void *data) 1302 { 1303 return uncore_bus_notify(nb, action, data, 1304 uncore_pci_sub_driver->id_table); 1305 } 1306 1307 static struct notifier_block uncore_pci_sub_notifier = { 1308 .notifier_call = uncore_pci_sub_bus_notify, 1309 }; 1310 1311 static void uncore_pci_sub_driver_init(void) 1312 { 1313 const struct pci_device_id *ids = uncore_pci_sub_driver->id_table; 1314 struct intel_uncore_type *type; 1315 struct intel_uncore_pmu *pmu; 1316 struct pci_dev *pci_sub_dev; 1317 bool notify = false; 1318 unsigned int devfn; 1319 int die; 1320 1321 while (ids && ids->vendor) { 1322 pci_sub_dev = NULL; 1323 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)]; 1324 /* 1325 * Search the available device, and register the 1326 * corresponding PMU. 1327 */ 1328 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 1329 ids->device, pci_sub_dev))) { 1330 devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), 1331 UNCORE_PCI_DEV_FUNC(ids->driver_data)); 1332 if (devfn != pci_sub_dev->devfn) 1333 continue; 1334 1335 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; 1336 if (!pmu) 1337 continue; 1338 1339 if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) 1340 continue; 1341 1342 if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, 1343 die)) 1344 notify = true; 1345 } 1346 ids++; 1347 } 1348 1349 if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier)) 1350 notify = false; 1351 1352 if (!notify) 1353 uncore_pci_sub_driver = NULL; 1354 } 1355 1356 static int uncore_pci_bus_notify(struct notifier_block *nb, 1357 unsigned long action, void *data) 1358 { 1359 return uncore_bus_notify(nb, action, data, NULL); 1360 } 1361 1362 static struct notifier_block uncore_pci_notifier = { 1363 .notifier_call = uncore_pci_bus_notify, 1364 }; 1365 1366 1367 static void uncore_pci_pmus_register(void) 1368 { 1369 struct intel_uncore_type **types = uncore_pci_uncores; 1370 struct intel_uncore_type *type; 1371 struct intel_uncore_pmu *pmu; 1372 struct pci_dev *pdev; 1373 u64 box_ctl; 1374 int i, die; 1375 1376 for (; *types; types++) { 1377 type = *types; 1378 for (die = 0; die < __uncore_max_dies; die++) { 1379 for (i = 0; i < type->num_boxes; i++) { 1380 if (!type->box_ctls[die]) 1381 continue; 1382 box_ctl = type->box_ctls[die] + type->pci_offsets[i]; 1383 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl), 1384 UNCORE_DISCOVERY_PCI_BUS(box_ctl), 1385 UNCORE_DISCOVERY_PCI_DEVFN(box_ctl)); 1386 if (!pdev) 1387 continue; 1388 pmu = &type->pmus[i]; 1389 1390 uncore_pci_pmu_register(pdev, type, pmu, die); 1391 } 1392 } 1393 } 1394 1395 bus_register_notifier(&pci_bus_type, &uncore_pci_notifier); 1396 } 1397 1398 static int __init uncore_pci_init(void) 1399 { 1400 size_t size; 1401 int ret; 1402 1403 size = uncore_max_dies() * sizeof(struct pci_extra_dev); 1404 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); 1405 if (!uncore_extra_pci_dev) { 1406 ret = -ENOMEM; 1407 goto err; 1408 } 1409 1410 ret = uncore_types_init(uncore_pci_uncores, false); 1411 if (ret) 1412 goto errtype; 1413 1414 if (uncore_pci_driver) { 1415 uncore_pci_driver->probe = uncore_pci_probe; 1416 uncore_pci_driver->remove = uncore_pci_remove; 1417 1418 ret = pci_register_driver(uncore_pci_driver); 1419 if (ret) 1420 goto errtype; 1421 } else 1422 uncore_pci_pmus_register(); 1423 1424 if (uncore_pci_sub_driver) 1425 uncore_pci_sub_driver_init(); 1426 1427 pcidrv_registered = true; 1428 return 0; 1429 1430 errtype: 1431 uncore_types_exit(uncore_pci_uncores); 1432 kfree(uncore_extra_pci_dev); 1433 uncore_extra_pci_dev = NULL; 1434 uncore_free_pcibus_map(); 1435 err: 1436 uncore_pci_uncores = empty_uncore; 1437 return ret; 1438 } 1439 1440 static void uncore_pci_exit(void) 1441 { 1442 if (pcidrv_registered) { 1443 pcidrv_registered = false; 1444 if (uncore_pci_sub_driver) 1445 bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); 1446 if (uncore_pci_driver) 1447 pci_unregister_driver(uncore_pci_driver); 1448 else 1449 bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier); 1450 uncore_types_exit(uncore_pci_uncores); 1451 kfree(uncore_extra_pci_dev); 1452 uncore_free_pcibus_map(); 1453 } 1454 } 1455 1456 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, 1457 int new_cpu) 1458 { 1459 struct intel_uncore_pmu *pmu = type->pmus; 1460 struct intel_uncore_box *box; 1461 int i, die; 1462 1463 die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); 1464 for (i = 0; i < type->num_boxes; i++, pmu++) { 1465 box = pmu->boxes[die]; 1466 if (!box) 1467 continue; 1468 1469 if (old_cpu < 0) { 1470 WARN_ON_ONCE(box->cpu != -1); 1471 box->cpu = new_cpu; 1472 continue; 1473 } 1474 1475 WARN_ON_ONCE(box->cpu != old_cpu); 1476 box->cpu = -1; 1477 if (new_cpu < 0) 1478 continue; 1479 1480 uncore_pmu_cancel_hrtimer(box); 1481 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); 1482 box->cpu = new_cpu; 1483 } 1484 } 1485 1486 static void uncore_change_context(struct intel_uncore_type **uncores, 1487 int old_cpu, int new_cpu) 1488 { 1489 for (; *uncores; uncores++) 1490 uncore_change_type_ctx(*uncores, old_cpu, new_cpu); 1491 } 1492 1493 static void uncore_box_unref(struct intel_uncore_type **types, int id) 1494 { 1495 struct intel_uncore_type *type; 1496 struct intel_uncore_pmu *pmu; 1497 struct intel_uncore_box *box; 1498 int i; 1499 1500 for (; *types; types++) { 1501 type = *types; 1502 pmu = type->pmus; 1503 for (i = 0; i < type->num_boxes; i++, pmu++) { 1504 box = pmu->boxes[id]; 1505 if (box && atomic_dec_return(&box->refcnt) == 0) 1506 uncore_box_exit(box); 1507 } 1508 } 1509 } 1510 1511 static int uncore_event_cpu_offline(unsigned int cpu) 1512 { 1513 int die, target; 1514 1515 /* Check if exiting cpu is used for collecting uncore events */ 1516 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1517 goto unref; 1518 /* Find a new cpu to collect uncore events */ 1519 target = cpumask_any_but(topology_die_cpumask(cpu), cpu); 1520 1521 /* Migrate uncore events to the new target */ 1522 if (target < nr_cpu_ids) 1523 cpumask_set_cpu(target, &uncore_cpu_mask); 1524 else 1525 target = -1; 1526 1527 uncore_change_context(uncore_msr_uncores, cpu, target); 1528 uncore_change_context(uncore_mmio_uncores, cpu, target); 1529 uncore_change_context(uncore_pci_uncores, cpu, target); 1530 1531 unref: 1532 /* Clear the references */ 1533 die = topology_logical_die_id(cpu); 1534 uncore_box_unref(uncore_msr_uncores, die); 1535 uncore_box_unref(uncore_mmio_uncores, die); 1536 return 0; 1537 } 1538 1539 static int allocate_boxes(struct intel_uncore_type **types, 1540 unsigned int die, unsigned int cpu) 1541 { 1542 struct intel_uncore_box *box, *tmp; 1543 struct intel_uncore_type *type; 1544 struct intel_uncore_pmu *pmu; 1545 LIST_HEAD(allocated); 1546 int i; 1547 1548 /* Try to allocate all required boxes */ 1549 for (; *types; types++) { 1550 type = *types; 1551 pmu = type->pmus; 1552 for (i = 0; i < type->num_boxes; i++, pmu++) { 1553 if (pmu->boxes[die]) 1554 continue; 1555 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1556 if (!box) 1557 goto cleanup; 1558 box->pmu = pmu; 1559 box->dieid = die; 1560 list_add(&box->active_list, &allocated); 1561 } 1562 } 1563 /* Install them in the pmus */ 1564 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1565 list_del_init(&box->active_list); 1566 box->pmu->boxes[die] = box; 1567 } 1568 return 0; 1569 1570 cleanup: 1571 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1572 list_del_init(&box->active_list); 1573 kfree(box); 1574 } 1575 return -ENOMEM; 1576 } 1577 1578 static int uncore_box_ref(struct intel_uncore_type **types, 1579 int id, unsigned int cpu) 1580 { 1581 struct intel_uncore_type *type; 1582 struct intel_uncore_pmu *pmu; 1583 struct intel_uncore_box *box; 1584 int i, ret; 1585 1586 ret = allocate_boxes(types, id, cpu); 1587 if (ret) 1588 return ret; 1589 1590 for (; *types; types++) { 1591 type = *types; 1592 pmu = type->pmus; 1593 for (i = 0; i < type->num_boxes; i++, pmu++) { 1594 box = pmu->boxes[id]; 1595 if (box && atomic_inc_return(&box->refcnt) == 1) 1596 uncore_box_init(box); 1597 } 1598 } 1599 return 0; 1600 } 1601 1602 static int uncore_event_cpu_online(unsigned int cpu) 1603 { 1604 int die, target, msr_ret, mmio_ret; 1605 1606 die = topology_logical_die_id(cpu); 1607 msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); 1608 mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); 1609 if (msr_ret && mmio_ret) 1610 return -ENOMEM; 1611 1612 /* 1613 * Check if there is an online cpu in the package 1614 * which collects uncore events already. 1615 */ 1616 target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); 1617 if (target < nr_cpu_ids) 1618 return 0; 1619 1620 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1621 1622 if (!msr_ret) 1623 uncore_change_context(uncore_msr_uncores, -1, cpu); 1624 if (!mmio_ret) 1625 uncore_change_context(uncore_mmio_uncores, -1, cpu); 1626 uncore_change_context(uncore_pci_uncores, -1, cpu); 1627 return 0; 1628 } 1629 1630 static int __init type_pmu_register(struct intel_uncore_type *type) 1631 { 1632 int i, ret; 1633 1634 for (i = 0; i < type->num_boxes; i++) { 1635 ret = uncore_pmu_register(&type->pmus[i]); 1636 if (ret) 1637 return ret; 1638 } 1639 return 0; 1640 } 1641 1642 static int __init uncore_msr_pmus_register(void) 1643 { 1644 struct intel_uncore_type **types = uncore_msr_uncores; 1645 int ret; 1646 1647 for (; *types; types++) { 1648 ret = type_pmu_register(*types); 1649 if (ret) 1650 return ret; 1651 } 1652 return 0; 1653 } 1654 1655 static int __init uncore_cpu_init(void) 1656 { 1657 int ret; 1658 1659 ret = uncore_types_init(uncore_msr_uncores, true); 1660 if (ret) 1661 goto err; 1662 1663 ret = uncore_msr_pmus_register(); 1664 if (ret) 1665 goto err; 1666 return 0; 1667 err: 1668 uncore_types_exit(uncore_msr_uncores); 1669 uncore_msr_uncores = empty_uncore; 1670 return ret; 1671 } 1672 1673 static int __init uncore_mmio_init(void) 1674 { 1675 struct intel_uncore_type **types = uncore_mmio_uncores; 1676 int ret; 1677 1678 ret = uncore_types_init(types, true); 1679 if (ret) 1680 goto err; 1681 1682 for (; *types; types++) { 1683 ret = type_pmu_register(*types); 1684 if (ret) 1685 goto err; 1686 } 1687 return 0; 1688 err: 1689 uncore_types_exit(uncore_mmio_uncores); 1690 uncore_mmio_uncores = empty_uncore; 1691 return ret; 1692 } 1693 1694 struct intel_uncore_init_fun { 1695 void (*cpu_init)(void); 1696 int (*pci_init)(void); 1697 void (*mmio_init)(void); 1698 /* Discovery table is required */ 1699 bool use_discovery; 1700 /* The units in the discovery table should be ignored. */ 1701 int *uncore_units_ignore; 1702 }; 1703 1704 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { 1705 .cpu_init = nhm_uncore_cpu_init, 1706 }; 1707 1708 static const struct intel_uncore_init_fun snb_uncore_init __initconst = { 1709 .cpu_init = snb_uncore_cpu_init, 1710 .pci_init = snb_uncore_pci_init, 1711 }; 1712 1713 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = { 1714 .cpu_init = snb_uncore_cpu_init, 1715 .pci_init = ivb_uncore_pci_init, 1716 }; 1717 1718 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = { 1719 .cpu_init = snb_uncore_cpu_init, 1720 .pci_init = hsw_uncore_pci_init, 1721 }; 1722 1723 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = { 1724 .cpu_init = snb_uncore_cpu_init, 1725 .pci_init = bdw_uncore_pci_init, 1726 }; 1727 1728 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = { 1729 .cpu_init = snbep_uncore_cpu_init, 1730 .pci_init = snbep_uncore_pci_init, 1731 }; 1732 1733 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = { 1734 .cpu_init = nhmex_uncore_cpu_init, 1735 }; 1736 1737 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = { 1738 .cpu_init = ivbep_uncore_cpu_init, 1739 .pci_init = ivbep_uncore_pci_init, 1740 }; 1741 1742 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = { 1743 .cpu_init = hswep_uncore_cpu_init, 1744 .pci_init = hswep_uncore_pci_init, 1745 }; 1746 1747 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = { 1748 .cpu_init = bdx_uncore_cpu_init, 1749 .pci_init = bdx_uncore_pci_init, 1750 }; 1751 1752 static const struct intel_uncore_init_fun knl_uncore_init __initconst = { 1753 .cpu_init = knl_uncore_cpu_init, 1754 .pci_init = knl_uncore_pci_init, 1755 }; 1756 1757 static const struct intel_uncore_init_fun skl_uncore_init __initconst = { 1758 .cpu_init = skl_uncore_cpu_init, 1759 .pci_init = skl_uncore_pci_init, 1760 }; 1761 1762 static const struct intel_uncore_init_fun skx_uncore_init __initconst = { 1763 .cpu_init = skx_uncore_cpu_init, 1764 .pci_init = skx_uncore_pci_init, 1765 }; 1766 1767 static const struct intel_uncore_init_fun icl_uncore_init __initconst = { 1768 .cpu_init = icl_uncore_cpu_init, 1769 .pci_init = skl_uncore_pci_init, 1770 }; 1771 1772 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { 1773 .cpu_init = tgl_uncore_cpu_init, 1774 .mmio_init = tgl_uncore_mmio_init, 1775 }; 1776 1777 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { 1778 .cpu_init = tgl_uncore_cpu_init, 1779 .mmio_init = tgl_l_uncore_mmio_init, 1780 }; 1781 1782 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { 1783 .cpu_init = tgl_uncore_cpu_init, 1784 .pci_init = skl_uncore_pci_init, 1785 }; 1786 1787 static const struct intel_uncore_init_fun adl_uncore_init __initconst = { 1788 .cpu_init = adl_uncore_cpu_init, 1789 .mmio_init = adl_uncore_mmio_init, 1790 }; 1791 1792 static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { 1793 .cpu_init = mtl_uncore_cpu_init, 1794 .mmio_init = adl_uncore_mmio_init, 1795 }; 1796 1797 static const struct intel_uncore_init_fun icx_uncore_init __initconst = { 1798 .cpu_init = icx_uncore_cpu_init, 1799 .pci_init = icx_uncore_pci_init, 1800 .mmio_init = icx_uncore_mmio_init, 1801 }; 1802 1803 static const struct intel_uncore_init_fun snr_uncore_init __initconst = { 1804 .cpu_init = snr_uncore_cpu_init, 1805 .pci_init = snr_uncore_pci_init, 1806 .mmio_init = snr_uncore_mmio_init, 1807 }; 1808 1809 static const struct intel_uncore_init_fun spr_uncore_init __initconst = { 1810 .cpu_init = spr_uncore_cpu_init, 1811 .pci_init = spr_uncore_pci_init, 1812 .mmio_init = spr_uncore_mmio_init, 1813 .use_discovery = true, 1814 .uncore_units_ignore = spr_uncore_units_ignore, 1815 }; 1816 1817 static const struct intel_uncore_init_fun gnr_uncore_init __initconst = { 1818 .cpu_init = gnr_uncore_cpu_init, 1819 .pci_init = gnr_uncore_pci_init, 1820 .mmio_init = gnr_uncore_mmio_init, 1821 .use_discovery = true, 1822 .uncore_units_ignore = gnr_uncore_units_ignore, 1823 }; 1824 1825 static const struct intel_uncore_init_fun generic_uncore_init __initconst = { 1826 .cpu_init = intel_uncore_generic_uncore_cpu_init, 1827 .pci_init = intel_uncore_generic_uncore_pci_init, 1828 .mmio_init = intel_uncore_generic_uncore_mmio_init, 1829 }; 1830 1831 static const struct x86_cpu_id intel_uncore_match[] __initconst = { 1832 X86_MATCH_VFM(INTEL_NEHALEM_EP, &nhm_uncore_init), 1833 X86_MATCH_VFM(INTEL_NEHALEM, &nhm_uncore_init), 1834 X86_MATCH_VFM(INTEL_WESTMERE, &nhm_uncore_init), 1835 X86_MATCH_VFM(INTEL_WESTMERE_EP, &nhm_uncore_init), 1836 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &snb_uncore_init), 1837 X86_MATCH_VFM(INTEL_IVYBRIDGE, &ivb_uncore_init), 1838 X86_MATCH_VFM(INTEL_HASWELL, &hsw_uncore_init), 1839 X86_MATCH_VFM(INTEL_HASWELL_L, &hsw_uncore_init), 1840 X86_MATCH_VFM(INTEL_HASWELL_G, &hsw_uncore_init), 1841 X86_MATCH_VFM(INTEL_BROADWELL, &bdw_uncore_init), 1842 X86_MATCH_VFM(INTEL_BROADWELL_G, &bdw_uncore_init), 1843 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &snbep_uncore_init), 1844 X86_MATCH_VFM(INTEL_NEHALEM_EX, &nhmex_uncore_init), 1845 X86_MATCH_VFM(INTEL_WESTMERE_EX, &nhmex_uncore_init), 1846 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ivbep_uncore_init), 1847 X86_MATCH_VFM(INTEL_HASWELL_X, &hswep_uncore_init), 1848 X86_MATCH_VFM(INTEL_BROADWELL_X, &bdx_uncore_init), 1849 X86_MATCH_VFM(INTEL_BROADWELL_D, &bdx_uncore_init), 1850 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &knl_uncore_init), 1851 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &knl_uncore_init), 1852 X86_MATCH_VFM(INTEL_SKYLAKE, &skl_uncore_init), 1853 X86_MATCH_VFM(INTEL_SKYLAKE_L, &skl_uncore_init), 1854 X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_uncore_init), 1855 X86_MATCH_VFM(INTEL_KABYLAKE_L, &skl_uncore_init), 1856 X86_MATCH_VFM(INTEL_KABYLAKE, &skl_uncore_init), 1857 X86_MATCH_VFM(INTEL_COMETLAKE_L, &skl_uncore_init), 1858 X86_MATCH_VFM(INTEL_COMETLAKE, &skl_uncore_init), 1859 X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_uncore_init), 1860 X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &icl_uncore_init), 1861 X86_MATCH_VFM(INTEL_ICELAKE, &icl_uncore_init), 1862 X86_MATCH_VFM(INTEL_ICELAKE_D, &icx_uncore_init), 1863 X86_MATCH_VFM(INTEL_ICELAKE_X, &icx_uncore_init), 1864 X86_MATCH_VFM(INTEL_TIGERLAKE_L, &tgl_l_uncore_init), 1865 X86_MATCH_VFM(INTEL_TIGERLAKE, &tgl_uncore_init), 1866 X86_MATCH_VFM(INTEL_ROCKETLAKE, &rkl_uncore_init), 1867 X86_MATCH_VFM(INTEL_ALDERLAKE, &adl_uncore_init), 1868 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &adl_uncore_init), 1869 X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_uncore_init), 1870 X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &adl_uncore_init), 1871 X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init), 1872 X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init), 1873 X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init), 1874 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), 1875 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), 1876 X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), 1877 X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_uncore_init), 1878 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &snr_uncore_init), 1879 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), 1880 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), 1881 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), 1882 {}, 1883 }; 1884 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); 1885 1886 static int __init intel_uncore_init(void) 1887 { 1888 const struct x86_cpu_id *id; 1889 struct intel_uncore_init_fun *uncore_init; 1890 int pret = 0, cret = 0, mret = 0, ret; 1891 1892 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1893 return -ENODEV; 1894 1895 __uncore_max_dies = 1896 topology_max_packages() * topology_max_dies_per_package(); 1897 1898 id = x86_match_cpu(intel_uncore_match); 1899 if (!id) { 1900 if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL)) 1901 uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; 1902 else 1903 return -ENODEV; 1904 } else { 1905 uncore_init = (struct intel_uncore_init_fun *)id->driver_data; 1906 if (uncore_no_discover && uncore_init->use_discovery) 1907 return -ENODEV; 1908 if (uncore_init->use_discovery && 1909 !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore)) 1910 return -ENODEV; 1911 } 1912 1913 if (uncore_init->pci_init) { 1914 pret = uncore_init->pci_init(); 1915 if (!pret) 1916 pret = uncore_pci_init(); 1917 } 1918 1919 if (uncore_init->cpu_init) { 1920 uncore_init->cpu_init(); 1921 cret = uncore_cpu_init(); 1922 } 1923 1924 if (uncore_init->mmio_init) { 1925 uncore_init->mmio_init(); 1926 mret = uncore_mmio_init(); 1927 } 1928 1929 if (cret && pret && mret) { 1930 ret = -ENODEV; 1931 goto free_discovery; 1932 } 1933 1934 /* Install hotplug callbacks to setup the targets for each package */ 1935 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, 1936 "perf/x86/intel/uncore:online", 1937 uncore_event_cpu_online, 1938 uncore_event_cpu_offline); 1939 if (ret) 1940 goto err; 1941 return 0; 1942 1943 err: 1944 uncore_types_exit(uncore_msr_uncores); 1945 uncore_types_exit(uncore_mmio_uncores); 1946 uncore_pci_exit(); 1947 free_discovery: 1948 intel_uncore_clear_discovery_tables(); 1949 return ret; 1950 } 1951 module_init(intel_uncore_init); 1952 1953 static void __exit intel_uncore_exit(void) 1954 { 1955 cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); 1956 uncore_types_exit(uncore_msr_uncores); 1957 uncore_types_exit(uncore_mmio_uncores); 1958 uncore_pci_exit(); 1959 intel_uncore_clear_discovery_tables(); 1960 } 1961 module_exit(intel_uncore_exit); 1962