xref: /linux/arch/x86/events/intel/uncore.c (revision c79c3c34f75d72a066e292b10aa50fc758c97c89)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3 
4 #include <asm/cpu_device_id.h>
5 #include <asm/intel-family.h>
6 #include "uncore.h"
7 
8 static struct intel_uncore_type *empty_uncore[] = { NULL, };
9 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
10 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
11 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
12 
13 static bool pcidrv_registered;
14 struct pci_driver *uncore_pci_driver;
15 /* The PCI driver for the device which the uncore doesn't own. */
16 struct pci_driver *uncore_pci_sub_driver;
17 /* pci bus to socket mapping */
18 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
19 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
20 struct pci_extra_dev *uncore_extra_pci_dev;
21 int __uncore_max_dies;
22 
23 /* mask of cpus that collect uncore events */
24 static cpumask_t uncore_cpu_mask;
25 
26 /* constraint for the fixed counter */
27 static struct event_constraint uncore_constraint_fixed =
28 	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
29 struct event_constraint uncore_constraint_empty =
30 	EVENT_CONSTRAINT(0, 0, 0);
31 
32 MODULE_LICENSE("GPL");
33 
34 int uncore_pcibus_to_dieid(struct pci_bus *bus)
35 {
36 	struct pci2phy_map *map;
37 	int die_id = -1;
38 
39 	raw_spin_lock(&pci2phy_map_lock);
40 	list_for_each_entry(map, &pci2phy_map_head, list) {
41 		if (map->segment == pci_domain_nr(bus)) {
42 			die_id = map->pbus_to_dieid[bus->number];
43 			break;
44 		}
45 	}
46 	raw_spin_unlock(&pci2phy_map_lock);
47 
48 	return die_id;
49 }
50 
51 static void uncore_free_pcibus_map(void)
52 {
53 	struct pci2phy_map *map, *tmp;
54 
55 	list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
56 		list_del(&map->list);
57 		kfree(map);
58 	}
59 }
60 
61 struct pci2phy_map *__find_pci2phy_map(int segment)
62 {
63 	struct pci2phy_map *map, *alloc = NULL;
64 	int i;
65 
66 	lockdep_assert_held(&pci2phy_map_lock);
67 
68 lookup:
69 	list_for_each_entry(map, &pci2phy_map_head, list) {
70 		if (map->segment == segment)
71 			goto end;
72 	}
73 
74 	if (!alloc) {
75 		raw_spin_unlock(&pci2phy_map_lock);
76 		alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
77 		raw_spin_lock(&pci2phy_map_lock);
78 
79 		if (!alloc)
80 			return NULL;
81 
82 		goto lookup;
83 	}
84 
85 	map = alloc;
86 	alloc = NULL;
87 	map->segment = segment;
88 	for (i = 0; i < 256; i++)
89 		map->pbus_to_dieid[i] = -1;
90 	list_add_tail(&map->list, &pci2phy_map_head);
91 
92 end:
93 	kfree(alloc);
94 	return map;
95 }
96 
97 ssize_t uncore_event_show(struct device *dev,
98 			  struct device_attribute *attr, char *buf)
99 {
100 	struct uncore_event_desc *event =
101 		container_of(attr, struct uncore_event_desc, attr);
102 	return sprintf(buf, "%s", event->config);
103 }
104 
105 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
106 {
107 	unsigned int dieid = topology_logical_die_id(cpu);
108 
109 	/*
110 	 * The unsigned check also catches the '-1' return value for non
111 	 * existent mappings in the topology map.
112 	 */
113 	return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
114 }
115 
116 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
117 {
118 	u64 count;
119 
120 	rdmsrl(event->hw.event_base, count);
121 
122 	return count;
123 }
124 
125 void uncore_mmio_exit_box(struct intel_uncore_box *box)
126 {
127 	if (box->io_addr)
128 		iounmap(box->io_addr);
129 }
130 
131 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
132 			     struct perf_event *event)
133 {
134 	if (!box->io_addr)
135 		return 0;
136 
137 	if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
138 		return 0;
139 
140 	return readq(box->io_addr + event->hw.event_base);
141 }
142 
143 /*
144  * generic get constraint function for shared match/mask registers.
145  */
146 struct event_constraint *
147 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
148 {
149 	struct intel_uncore_extra_reg *er;
150 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
151 	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
152 	unsigned long flags;
153 	bool ok = false;
154 
155 	/*
156 	 * reg->alloc can be set due to existing state, so for fake box we
157 	 * need to ignore this, otherwise we might fail to allocate proper
158 	 * fake state for this extra reg constraint.
159 	 */
160 	if (reg1->idx == EXTRA_REG_NONE ||
161 	    (!uncore_box_is_fake(box) && reg1->alloc))
162 		return NULL;
163 
164 	er = &box->shared_regs[reg1->idx];
165 	raw_spin_lock_irqsave(&er->lock, flags);
166 	if (!atomic_read(&er->ref) ||
167 	    (er->config1 == reg1->config && er->config2 == reg2->config)) {
168 		atomic_inc(&er->ref);
169 		er->config1 = reg1->config;
170 		er->config2 = reg2->config;
171 		ok = true;
172 	}
173 	raw_spin_unlock_irqrestore(&er->lock, flags);
174 
175 	if (ok) {
176 		if (!uncore_box_is_fake(box))
177 			reg1->alloc = 1;
178 		return NULL;
179 	}
180 
181 	return &uncore_constraint_empty;
182 }
183 
184 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
185 {
186 	struct intel_uncore_extra_reg *er;
187 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
188 
189 	/*
190 	 * Only put constraint if extra reg was actually allocated. Also
191 	 * takes care of event which do not use an extra shared reg.
192 	 *
193 	 * Also, if this is a fake box we shouldn't touch any event state
194 	 * (reg->alloc) and we don't care about leaving inconsistent box
195 	 * state either since it will be thrown out.
196 	 */
197 	if (uncore_box_is_fake(box) || !reg1->alloc)
198 		return;
199 
200 	er = &box->shared_regs[reg1->idx];
201 	atomic_dec(&er->ref);
202 	reg1->alloc = 0;
203 }
204 
205 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
206 {
207 	struct intel_uncore_extra_reg *er;
208 	unsigned long flags;
209 	u64 config;
210 
211 	er = &box->shared_regs[idx];
212 
213 	raw_spin_lock_irqsave(&er->lock, flags);
214 	config = er->config;
215 	raw_spin_unlock_irqrestore(&er->lock, flags);
216 
217 	return config;
218 }
219 
220 static void uncore_assign_hw_event(struct intel_uncore_box *box,
221 				   struct perf_event *event, int idx)
222 {
223 	struct hw_perf_event *hwc = &event->hw;
224 
225 	hwc->idx = idx;
226 	hwc->last_tag = ++box->tags[idx];
227 
228 	if (uncore_pmc_fixed(hwc->idx)) {
229 		hwc->event_base = uncore_fixed_ctr(box);
230 		hwc->config_base = uncore_fixed_ctl(box);
231 		return;
232 	}
233 
234 	hwc->config_base = uncore_event_ctl(box, hwc->idx);
235 	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
236 }
237 
238 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
239 {
240 	u64 prev_count, new_count, delta;
241 	int shift;
242 
243 	if (uncore_pmc_freerunning(event->hw.idx))
244 		shift = 64 - uncore_freerunning_bits(box, event);
245 	else if (uncore_pmc_fixed(event->hw.idx))
246 		shift = 64 - uncore_fixed_ctr_bits(box);
247 	else
248 		shift = 64 - uncore_perf_ctr_bits(box);
249 
250 	/* the hrtimer might modify the previous event value */
251 again:
252 	prev_count = local64_read(&event->hw.prev_count);
253 	new_count = uncore_read_counter(box, event);
254 	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
255 		goto again;
256 
257 	delta = (new_count << shift) - (prev_count << shift);
258 	delta >>= shift;
259 
260 	local64_add(delta, &event->count);
261 }
262 
263 /*
264  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
265  * for SandyBridge. So we use hrtimer to periodically poll the counter
266  * to avoid overflow.
267  */
268 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
269 {
270 	struct intel_uncore_box *box;
271 	struct perf_event *event;
272 	unsigned long flags;
273 	int bit;
274 
275 	box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
276 	if (!box->n_active || box->cpu != smp_processor_id())
277 		return HRTIMER_NORESTART;
278 	/*
279 	 * disable local interrupt to prevent uncore_pmu_event_start/stop
280 	 * to interrupt the update process
281 	 */
282 	local_irq_save(flags);
283 
284 	/*
285 	 * handle boxes with an active event list as opposed to active
286 	 * counters
287 	 */
288 	list_for_each_entry(event, &box->active_list, active_entry) {
289 		uncore_perf_event_update(box, event);
290 	}
291 
292 	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
293 		uncore_perf_event_update(box, box->events[bit]);
294 
295 	local_irq_restore(flags);
296 
297 	hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
298 	return HRTIMER_RESTART;
299 }
300 
301 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
302 {
303 	hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
304 		      HRTIMER_MODE_REL_PINNED);
305 }
306 
307 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
308 {
309 	hrtimer_cancel(&box->hrtimer);
310 }
311 
312 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
313 {
314 	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
315 	box->hrtimer.function = uncore_pmu_hrtimer;
316 }
317 
318 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
319 						 int node)
320 {
321 	int i, size, numshared = type->num_shared_regs ;
322 	struct intel_uncore_box *box;
323 
324 	size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
325 
326 	box = kzalloc_node(size, GFP_KERNEL, node);
327 	if (!box)
328 		return NULL;
329 
330 	for (i = 0; i < numshared; i++)
331 		raw_spin_lock_init(&box->shared_regs[i].lock);
332 
333 	uncore_pmu_init_hrtimer(box);
334 	box->cpu = -1;
335 	box->dieid = -1;
336 
337 	/* set default hrtimer timeout */
338 	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
339 
340 	INIT_LIST_HEAD(&box->active_list);
341 
342 	return box;
343 }
344 
345 /*
346  * Using uncore_pmu_event_init pmu event_init callback
347  * as a detection point for uncore events.
348  */
349 static int uncore_pmu_event_init(struct perf_event *event);
350 
351 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
352 {
353 	return &box->pmu->pmu == event->pmu;
354 }
355 
356 static int
357 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
358 		      bool dogrp)
359 {
360 	struct perf_event *event;
361 	int n, max_count;
362 
363 	max_count = box->pmu->type->num_counters;
364 	if (box->pmu->type->fixed_ctl)
365 		max_count++;
366 
367 	if (box->n_events >= max_count)
368 		return -EINVAL;
369 
370 	n = box->n_events;
371 
372 	if (is_box_event(box, leader)) {
373 		box->event_list[n] = leader;
374 		n++;
375 	}
376 
377 	if (!dogrp)
378 		return n;
379 
380 	for_each_sibling_event(event, leader) {
381 		if (!is_box_event(box, event) ||
382 		    event->state <= PERF_EVENT_STATE_OFF)
383 			continue;
384 
385 		if (n >= max_count)
386 			return -EINVAL;
387 
388 		box->event_list[n] = event;
389 		n++;
390 	}
391 	return n;
392 }
393 
394 static struct event_constraint *
395 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
396 {
397 	struct intel_uncore_type *type = box->pmu->type;
398 	struct event_constraint *c;
399 
400 	if (type->ops->get_constraint) {
401 		c = type->ops->get_constraint(box, event);
402 		if (c)
403 			return c;
404 	}
405 
406 	if (event->attr.config == UNCORE_FIXED_EVENT)
407 		return &uncore_constraint_fixed;
408 
409 	if (type->constraints) {
410 		for_each_event_constraint(c, type->constraints) {
411 			if ((event->hw.config & c->cmask) == c->code)
412 				return c;
413 		}
414 	}
415 
416 	return &type->unconstrainted;
417 }
418 
419 static void uncore_put_event_constraint(struct intel_uncore_box *box,
420 					struct perf_event *event)
421 {
422 	if (box->pmu->type->ops->put_constraint)
423 		box->pmu->type->ops->put_constraint(box, event);
424 }
425 
426 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
427 {
428 	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
429 	struct event_constraint *c;
430 	int i, wmin, wmax, ret = 0;
431 	struct hw_perf_event *hwc;
432 
433 	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
434 
435 	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
436 		c = uncore_get_event_constraint(box, box->event_list[i]);
437 		box->event_constraint[i] = c;
438 		wmin = min(wmin, c->weight);
439 		wmax = max(wmax, c->weight);
440 	}
441 
442 	/* fastpath, try to reuse previous register */
443 	for (i = 0; i < n; i++) {
444 		hwc = &box->event_list[i]->hw;
445 		c = box->event_constraint[i];
446 
447 		/* never assigned */
448 		if (hwc->idx == -1)
449 			break;
450 
451 		/* constraint still honored */
452 		if (!test_bit(hwc->idx, c->idxmsk))
453 			break;
454 
455 		/* not already used */
456 		if (test_bit(hwc->idx, used_mask))
457 			break;
458 
459 		__set_bit(hwc->idx, used_mask);
460 		if (assign)
461 			assign[i] = hwc->idx;
462 	}
463 	/* slow path */
464 	if (i != n)
465 		ret = perf_assign_events(box->event_constraint, n,
466 					 wmin, wmax, n, assign);
467 
468 	if (!assign || ret) {
469 		for (i = 0; i < n; i++)
470 			uncore_put_event_constraint(box, box->event_list[i]);
471 	}
472 	return ret ? -EINVAL : 0;
473 }
474 
475 void uncore_pmu_event_start(struct perf_event *event, int flags)
476 {
477 	struct intel_uncore_box *box = uncore_event_to_box(event);
478 	int idx = event->hw.idx;
479 
480 	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
481 		return;
482 
483 	/*
484 	 * Free running counter is read-only and always active.
485 	 * Use the current counter value as start point.
486 	 * There is no overflow interrupt for free running counter.
487 	 * Use hrtimer to periodically poll the counter to avoid overflow.
488 	 */
489 	if (uncore_pmc_freerunning(event->hw.idx)) {
490 		list_add_tail(&event->active_entry, &box->active_list);
491 		local64_set(&event->hw.prev_count,
492 			    uncore_read_counter(box, event));
493 		if (box->n_active++ == 0)
494 			uncore_pmu_start_hrtimer(box);
495 		return;
496 	}
497 
498 	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
499 		return;
500 
501 	event->hw.state = 0;
502 	box->events[idx] = event;
503 	box->n_active++;
504 	__set_bit(idx, box->active_mask);
505 
506 	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
507 	uncore_enable_event(box, event);
508 
509 	if (box->n_active == 1)
510 		uncore_pmu_start_hrtimer(box);
511 }
512 
513 void uncore_pmu_event_stop(struct perf_event *event, int flags)
514 {
515 	struct intel_uncore_box *box = uncore_event_to_box(event);
516 	struct hw_perf_event *hwc = &event->hw;
517 
518 	/* Cannot disable free running counter which is read-only */
519 	if (uncore_pmc_freerunning(hwc->idx)) {
520 		list_del(&event->active_entry);
521 		if (--box->n_active == 0)
522 			uncore_pmu_cancel_hrtimer(box);
523 		uncore_perf_event_update(box, event);
524 		return;
525 	}
526 
527 	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
528 		uncore_disable_event(box, event);
529 		box->n_active--;
530 		box->events[hwc->idx] = NULL;
531 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
532 		hwc->state |= PERF_HES_STOPPED;
533 
534 		if (box->n_active == 0)
535 			uncore_pmu_cancel_hrtimer(box);
536 	}
537 
538 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
539 		/*
540 		 * Drain the remaining delta count out of a event
541 		 * that we are disabling:
542 		 */
543 		uncore_perf_event_update(box, event);
544 		hwc->state |= PERF_HES_UPTODATE;
545 	}
546 }
547 
548 int uncore_pmu_event_add(struct perf_event *event, int flags)
549 {
550 	struct intel_uncore_box *box = uncore_event_to_box(event);
551 	struct hw_perf_event *hwc = &event->hw;
552 	int assign[UNCORE_PMC_IDX_MAX];
553 	int i, n, ret;
554 
555 	if (!box)
556 		return -ENODEV;
557 
558 	/*
559 	 * The free funning counter is assigned in event_init().
560 	 * The free running counter event and free running counter
561 	 * are 1:1 mapped. It doesn't need to be tracked in event_list.
562 	 */
563 	if (uncore_pmc_freerunning(hwc->idx)) {
564 		if (flags & PERF_EF_START)
565 			uncore_pmu_event_start(event, 0);
566 		return 0;
567 	}
568 
569 	ret = n = uncore_collect_events(box, event, false);
570 	if (ret < 0)
571 		return ret;
572 
573 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
574 	if (!(flags & PERF_EF_START))
575 		hwc->state |= PERF_HES_ARCH;
576 
577 	ret = uncore_assign_events(box, assign, n);
578 	if (ret)
579 		return ret;
580 
581 	/* save events moving to new counters */
582 	for (i = 0; i < box->n_events; i++) {
583 		event = box->event_list[i];
584 		hwc = &event->hw;
585 
586 		if (hwc->idx == assign[i] &&
587 			hwc->last_tag == box->tags[assign[i]])
588 			continue;
589 		/*
590 		 * Ensure we don't accidentally enable a stopped
591 		 * counter simply because we rescheduled.
592 		 */
593 		if (hwc->state & PERF_HES_STOPPED)
594 			hwc->state |= PERF_HES_ARCH;
595 
596 		uncore_pmu_event_stop(event, PERF_EF_UPDATE);
597 	}
598 
599 	/* reprogram moved events into new counters */
600 	for (i = 0; i < n; i++) {
601 		event = box->event_list[i];
602 		hwc = &event->hw;
603 
604 		if (hwc->idx != assign[i] ||
605 			hwc->last_tag != box->tags[assign[i]])
606 			uncore_assign_hw_event(box, event, assign[i]);
607 		else if (i < box->n_events)
608 			continue;
609 
610 		if (hwc->state & PERF_HES_ARCH)
611 			continue;
612 
613 		uncore_pmu_event_start(event, 0);
614 	}
615 	box->n_events = n;
616 
617 	return 0;
618 }
619 
620 void uncore_pmu_event_del(struct perf_event *event, int flags)
621 {
622 	struct intel_uncore_box *box = uncore_event_to_box(event);
623 	int i;
624 
625 	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
626 
627 	/*
628 	 * The event for free running counter is not tracked by event_list.
629 	 * It doesn't need to force event->hw.idx = -1 to reassign the counter.
630 	 * Because the event and the free running counter are 1:1 mapped.
631 	 */
632 	if (uncore_pmc_freerunning(event->hw.idx))
633 		return;
634 
635 	for (i = 0; i < box->n_events; i++) {
636 		if (event == box->event_list[i]) {
637 			uncore_put_event_constraint(box, event);
638 
639 			for (++i; i < box->n_events; i++)
640 				box->event_list[i - 1] = box->event_list[i];
641 
642 			--box->n_events;
643 			break;
644 		}
645 	}
646 
647 	event->hw.idx = -1;
648 	event->hw.last_tag = ~0ULL;
649 }
650 
651 void uncore_pmu_event_read(struct perf_event *event)
652 {
653 	struct intel_uncore_box *box = uncore_event_to_box(event);
654 	uncore_perf_event_update(box, event);
655 }
656 
657 /*
658  * validation ensures the group can be loaded onto the
659  * PMU if it was the only group available.
660  */
661 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
662 				struct perf_event *event)
663 {
664 	struct perf_event *leader = event->group_leader;
665 	struct intel_uncore_box *fake_box;
666 	int ret = -EINVAL, n;
667 
668 	/* The free running counter is always active. */
669 	if (uncore_pmc_freerunning(event->hw.idx))
670 		return 0;
671 
672 	fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
673 	if (!fake_box)
674 		return -ENOMEM;
675 
676 	fake_box->pmu = pmu;
677 	/*
678 	 * the event is not yet connected with its
679 	 * siblings therefore we must first collect
680 	 * existing siblings, then add the new event
681 	 * before we can simulate the scheduling
682 	 */
683 	n = uncore_collect_events(fake_box, leader, true);
684 	if (n < 0)
685 		goto out;
686 
687 	fake_box->n_events = n;
688 	n = uncore_collect_events(fake_box, event, false);
689 	if (n < 0)
690 		goto out;
691 
692 	fake_box->n_events = n;
693 
694 	ret = uncore_assign_events(fake_box, NULL, n);
695 out:
696 	kfree(fake_box);
697 	return ret;
698 }
699 
700 static int uncore_pmu_event_init(struct perf_event *event)
701 {
702 	struct intel_uncore_pmu *pmu;
703 	struct intel_uncore_box *box;
704 	struct hw_perf_event *hwc = &event->hw;
705 	int ret;
706 
707 	if (event->attr.type != event->pmu->type)
708 		return -ENOENT;
709 
710 	pmu = uncore_event_to_pmu(event);
711 	/* no device found for this pmu */
712 	if (pmu->func_id < 0)
713 		return -ENOENT;
714 
715 	/* Sampling not supported yet */
716 	if (hwc->sample_period)
717 		return -EINVAL;
718 
719 	/*
720 	 * Place all uncore events for a particular physical package
721 	 * onto a single cpu
722 	 */
723 	if (event->cpu < 0)
724 		return -EINVAL;
725 	box = uncore_pmu_to_box(pmu, event->cpu);
726 	if (!box || box->cpu < 0)
727 		return -EINVAL;
728 	event->cpu = box->cpu;
729 	event->pmu_private = box;
730 
731 	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
732 
733 	event->hw.idx = -1;
734 	event->hw.last_tag = ~0ULL;
735 	event->hw.extra_reg.idx = EXTRA_REG_NONE;
736 	event->hw.branch_reg.idx = EXTRA_REG_NONE;
737 
738 	if (event->attr.config == UNCORE_FIXED_EVENT) {
739 		/* no fixed counter */
740 		if (!pmu->type->fixed_ctl)
741 			return -EINVAL;
742 		/*
743 		 * if there is only one fixed counter, only the first pmu
744 		 * can access the fixed counter
745 		 */
746 		if (pmu->type->single_fixed && pmu->pmu_idx > 0)
747 			return -EINVAL;
748 
749 		/* fixed counters have event field hardcoded to zero */
750 		hwc->config = 0ULL;
751 	} else if (is_freerunning_event(event)) {
752 		hwc->config = event->attr.config;
753 		if (!check_valid_freerunning_event(box, event))
754 			return -EINVAL;
755 		event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
756 		/*
757 		 * The free running counter event and free running counter
758 		 * are always 1:1 mapped.
759 		 * The free running counter is always active.
760 		 * Assign the free running counter here.
761 		 */
762 		event->hw.event_base = uncore_freerunning_counter(box, event);
763 	} else {
764 		hwc->config = event->attr.config &
765 			      (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
766 		if (pmu->type->ops->hw_config) {
767 			ret = pmu->type->ops->hw_config(box, event);
768 			if (ret)
769 				return ret;
770 		}
771 	}
772 
773 	if (event->group_leader != event)
774 		ret = uncore_validate_group(pmu, event);
775 	else
776 		ret = 0;
777 
778 	return ret;
779 }
780 
781 static void uncore_pmu_enable(struct pmu *pmu)
782 {
783 	struct intel_uncore_pmu *uncore_pmu;
784 	struct intel_uncore_box *box;
785 
786 	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
787 	if (!uncore_pmu)
788 		return;
789 
790 	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
791 	if (!box)
792 		return;
793 
794 	if (uncore_pmu->type->ops->enable_box)
795 		uncore_pmu->type->ops->enable_box(box);
796 }
797 
798 static void uncore_pmu_disable(struct pmu *pmu)
799 {
800 	struct intel_uncore_pmu *uncore_pmu;
801 	struct intel_uncore_box *box;
802 
803 	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
804 	if (!uncore_pmu)
805 		return;
806 
807 	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
808 	if (!box)
809 		return;
810 
811 	if (uncore_pmu->type->ops->disable_box)
812 		uncore_pmu->type->ops->disable_box(box);
813 }
814 
815 static ssize_t uncore_get_attr_cpumask(struct device *dev,
816 				struct device_attribute *attr, char *buf)
817 {
818 	return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
819 }
820 
821 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
822 
823 static struct attribute *uncore_pmu_attrs[] = {
824 	&dev_attr_cpumask.attr,
825 	NULL,
826 };
827 
828 static const struct attribute_group uncore_pmu_attr_group = {
829 	.attrs = uncore_pmu_attrs,
830 };
831 
832 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
833 {
834 	int ret;
835 
836 	if (!pmu->type->pmu) {
837 		pmu->pmu = (struct pmu) {
838 			.attr_groups	= pmu->type->attr_groups,
839 			.task_ctx_nr	= perf_invalid_context,
840 			.pmu_enable	= uncore_pmu_enable,
841 			.pmu_disable	= uncore_pmu_disable,
842 			.event_init	= uncore_pmu_event_init,
843 			.add		= uncore_pmu_event_add,
844 			.del		= uncore_pmu_event_del,
845 			.start		= uncore_pmu_event_start,
846 			.stop		= uncore_pmu_event_stop,
847 			.read		= uncore_pmu_event_read,
848 			.module		= THIS_MODULE,
849 			.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
850 			.attr_update	= pmu->type->attr_update,
851 		};
852 	} else {
853 		pmu->pmu = *pmu->type->pmu;
854 		pmu->pmu.attr_groups = pmu->type->attr_groups;
855 		pmu->pmu.attr_update = pmu->type->attr_update;
856 	}
857 
858 	if (pmu->type->num_boxes == 1) {
859 		if (strlen(pmu->type->name) > 0)
860 			sprintf(pmu->name, "uncore_%s", pmu->type->name);
861 		else
862 			sprintf(pmu->name, "uncore");
863 	} else {
864 		sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
865 			pmu->pmu_idx);
866 	}
867 
868 	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
869 	if (!ret)
870 		pmu->registered = true;
871 	return ret;
872 }
873 
874 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
875 {
876 	if (!pmu->registered)
877 		return;
878 	perf_pmu_unregister(&pmu->pmu);
879 	pmu->registered = false;
880 }
881 
882 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
883 {
884 	int die;
885 
886 	for (die = 0; die < uncore_max_dies(); die++)
887 		kfree(pmu->boxes[die]);
888 	kfree(pmu->boxes);
889 }
890 
891 static void uncore_type_exit(struct intel_uncore_type *type)
892 {
893 	struct intel_uncore_pmu *pmu = type->pmus;
894 	int i;
895 
896 	if (type->cleanup_mapping)
897 		type->cleanup_mapping(type);
898 
899 	if (pmu) {
900 		for (i = 0; i < type->num_boxes; i++, pmu++) {
901 			uncore_pmu_unregister(pmu);
902 			uncore_free_boxes(pmu);
903 		}
904 		kfree(type->pmus);
905 		type->pmus = NULL;
906 	}
907 	kfree(type->events_group);
908 	type->events_group = NULL;
909 }
910 
911 static void uncore_types_exit(struct intel_uncore_type **types)
912 {
913 	for (; *types; types++)
914 		uncore_type_exit(*types);
915 }
916 
917 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
918 {
919 	struct intel_uncore_pmu *pmus;
920 	size_t size;
921 	int i, j;
922 
923 	pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
924 	if (!pmus)
925 		return -ENOMEM;
926 
927 	size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
928 
929 	for (i = 0; i < type->num_boxes; i++) {
930 		pmus[i].func_id	= setid ? i : -1;
931 		pmus[i].pmu_idx	= i;
932 		pmus[i].type	= type;
933 		pmus[i].boxes	= kzalloc(size, GFP_KERNEL);
934 		if (!pmus[i].boxes)
935 			goto err;
936 	}
937 
938 	type->pmus = pmus;
939 	type->unconstrainted = (struct event_constraint)
940 		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
941 				0, type->num_counters, 0, 0);
942 
943 	if (type->event_descs) {
944 		struct {
945 			struct attribute_group group;
946 			struct attribute *attrs[];
947 		} *attr_group;
948 		for (i = 0; type->event_descs[i].attr.attr.name; i++);
949 
950 		attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
951 								GFP_KERNEL);
952 		if (!attr_group)
953 			goto err;
954 
955 		attr_group->group.name = "events";
956 		attr_group->group.attrs = attr_group->attrs;
957 
958 		for (j = 0; j < i; j++)
959 			attr_group->attrs[j] = &type->event_descs[j].attr.attr;
960 
961 		type->events_group = &attr_group->group;
962 	}
963 
964 	type->pmu_group = &uncore_pmu_attr_group;
965 
966 	if (type->set_mapping)
967 		type->set_mapping(type);
968 
969 	return 0;
970 
971 err:
972 	for (i = 0; i < type->num_boxes; i++)
973 		kfree(pmus[i].boxes);
974 	kfree(pmus);
975 
976 	return -ENOMEM;
977 }
978 
979 static int __init
980 uncore_types_init(struct intel_uncore_type **types, bool setid)
981 {
982 	int ret;
983 
984 	for (; *types; types++) {
985 		ret = uncore_type_init(*types, setid);
986 		if (ret)
987 			return ret;
988 	}
989 	return 0;
990 }
991 
992 /*
993  * Get the die information of a PCI device.
994  * @pdev: The PCI device.
995  * @die: The die id which the device maps to.
996  */
997 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
998 {
999 	*die = uncore_pcibus_to_dieid(pdev->bus);
1000 	if (*die < 0)
1001 		return -EINVAL;
1002 
1003 	return 0;
1004 }
1005 
1006 /*
1007  * Find the PMU of a PCI device.
1008  * @pdev: The PCI device.
1009  * @ids: The ID table of the available PCI devices with a PMU.
1010  */
1011 static struct intel_uncore_pmu *
1012 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1013 {
1014 	struct intel_uncore_pmu *pmu = NULL;
1015 	struct intel_uncore_type *type;
1016 	kernel_ulong_t data;
1017 	unsigned int devfn;
1018 
1019 	while (ids && ids->vendor) {
1020 		if ((ids->vendor == pdev->vendor) &&
1021 		    (ids->device == pdev->device)) {
1022 			data = ids->driver_data;
1023 			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1024 					  UNCORE_PCI_DEV_FUNC(data));
1025 			if (devfn == pdev->devfn) {
1026 				type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1027 				pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1028 				break;
1029 			}
1030 		}
1031 		ids++;
1032 	}
1033 	return pmu;
1034 }
1035 
1036 /*
1037  * Register the PMU for a PCI device
1038  * @pdev: The PCI device.
1039  * @type: The corresponding PMU type of the device.
1040  * @pmu: The corresponding PMU of the device.
1041  * @die: The die id which the device maps to.
1042  */
1043 static int uncore_pci_pmu_register(struct pci_dev *pdev,
1044 				   struct intel_uncore_type *type,
1045 				   struct intel_uncore_pmu *pmu,
1046 				   int die)
1047 {
1048 	struct intel_uncore_box *box;
1049 	int ret;
1050 
1051 	if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1052 		return -EINVAL;
1053 
1054 	box = uncore_alloc_box(type, NUMA_NO_NODE);
1055 	if (!box)
1056 		return -ENOMEM;
1057 
1058 	if (pmu->func_id < 0)
1059 		pmu->func_id = pdev->devfn;
1060 	else
1061 		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1062 
1063 	atomic_inc(&box->refcnt);
1064 	box->dieid = die;
1065 	box->pci_dev = pdev;
1066 	box->pmu = pmu;
1067 	uncore_box_init(box);
1068 
1069 	pmu->boxes[die] = box;
1070 	if (atomic_inc_return(&pmu->activeboxes) > 1)
1071 		return 0;
1072 
1073 	/* First active box registers the pmu */
1074 	ret = uncore_pmu_register(pmu);
1075 	if (ret) {
1076 		pmu->boxes[die] = NULL;
1077 		uncore_box_exit(box);
1078 		kfree(box);
1079 	}
1080 	return ret;
1081 }
1082 
1083 /*
1084  * add a pci uncore device
1085  */
1086 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1087 {
1088 	struct intel_uncore_type *type;
1089 	struct intel_uncore_pmu *pmu = NULL;
1090 	int die, ret;
1091 
1092 	ret = uncore_pci_get_dev_die_info(pdev, &die);
1093 	if (ret)
1094 		return ret;
1095 
1096 	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1097 		int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1098 
1099 		uncore_extra_pci_dev[die].dev[idx] = pdev;
1100 		pci_set_drvdata(pdev, NULL);
1101 		return 0;
1102 	}
1103 
1104 	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1105 
1106 	/*
1107 	 * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1108 	 * for multiple instances of an uncore PMU device type. We should check
1109 	 * PCI slot and func to indicate the uncore box.
1110 	 */
1111 	if (id->driver_data & ~0xffff) {
1112 		struct pci_driver *pci_drv = pdev->driver;
1113 
1114 		pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1115 		if (pmu == NULL)
1116 			return -ENODEV;
1117 	} else {
1118 		/*
1119 		 * for performance monitoring unit with multiple boxes,
1120 		 * each box has a different function id.
1121 		 */
1122 		pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1123 	}
1124 
1125 	ret = uncore_pci_pmu_register(pdev, type, pmu, die);
1126 
1127 	pci_set_drvdata(pdev, pmu->boxes[die]);
1128 
1129 	return ret;
1130 }
1131 
1132 /*
1133  * Unregister the PMU of a PCI device
1134  * @pmu: The corresponding PMU is unregistered.
1135  * @die: The die id which the device maps to.
1136  */
1137 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
1138 {
1139 	struct intel_uncore_box *box = pmu->boxes[die];
1140 
1141 	pmu->boxes[die] = NULL;
1142 	if (atomic_dec_return(&pmu->activeboxes) == 0)
1143 		uncore_pmu_unregister(pmu);
1144 	uncore_box_exit(box);
1145 	kfree(box);
1146 }
1147 
1148 static void uncore_pci_remove(struct pci_dev *pdev)
1149 {
1150 	struct intel_uncore_box *box;
1151 	struct intel_uncore_pmu *pmu;
1152 	int i, die;
1153 
1154 	if (uncore_pci_get_dev_die_info(pdev, &die))
1155 		return;
1156 
1157 	box = pci_get_drvdata(pdev);
1158 	if (!box) {
1159 		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1160 			if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1161 				uncore_extra_pci_dev[die].dev[i] = NULL;
1162 				break;
1163 			}
1164 		}
1165 		WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1166 		return;
1167 	}
1168 
1169 	pmu = box->pmu;
1170 
1171 	pci_set_drvdata(pdev, NULL);
1172 
1173 	uncore_pci_pmu_unregister(pmu, die);
1174 }
1175 
1176 static int uncore_bus_notify(struct notifier_block *nb,
1177 			     unsigned long action, void *data)
1178 {
1179 	struct device *dev = data;
1180 	struct pci_dev *pdev = to_pci_dev(dev);
1181 	struct intel_uncore_pmu *pmu;
1182 	int die;
1183 
1184 	/* Unregister the PMU when the device is going to be deleted. */
1185 	if (action != BUS_NOTIFY_DEL_DEVICE)
1186 		return NOTIFY_DONE;
1187 
1188 	pmu = uncore_pci_find_dev_pmu(pdev, uncore_pci_sub_driver->id_table);
1189 	if (!pmu)
1190 		return NOTIFY_DONE;
1191 
1192 	if (uncore_pci_get_dev_die_info(pdev, &die))
1193 		return NOTIFY_DONE;
1194 
1195 	uncore_pci_pmu_unregister(pmu, die);
1196 
1197 	return NOTIFY_OK;
1198 }
1199 
1200 static struct notifier_block uncore_notifier = {
1201 	.notifier_call = uncore_bus_notify,
1202 };
1203 
1204 static void uncore_pci_sub_driver_init(void)
1205 {
1206 	const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1207 	struct intel_uncore_type *type;
1208 	struct intel_uncore_pmu *pmu;
1209 	struct pci_dev *pci_sub_dev;
1210 	bool notify = false;
1211 	unsigned int devfn;
1212 	int die;
1213 
1214 	while (ids && ids->vendor) {
1215 		pci_sub_dev = NULL;
1216 		type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1217 		/*
1218 		 * Search the available device, and register the
1219 		 * corresponding PMU.
1220 		 */
1221 		while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1222 						     ids->device, pci_sub_dev))) {
1223 			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1224 					  UNCORE_PCI_DEV_FUNC(ids->driver_data));
1225 			if (devfn != pci_sub_dev->devfn)
1226 				continue;
1227 
1228 			pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1229 			if (!pmu)
1230 				continue;
1231 
1232 			if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
1233 				continue;
1234 
1235 			if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1236 						     die))
1237 				notify = true;
1238 		}
1239 		ids++;
1240 	}
1241 
1242 	if (notify && bus_register_notifier(&pci_bus_type, &uncore_notifier))
1243 		notify = false;
1244 
1245 	if (!notify)
1246 		uncore_pci_sub_driver = NULL;
1247 }
1248 
1249 static int __init uncore_pci_init(void)
1250 {
1251 	size_t size;
1252 	int ret;
1253 
1254 	size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1255 	uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1256 	if (!uncore_extra_pci_dev) {
1257 		ret = -ENOMEM;
1258 		goto err;
1259 	}
1260 
1261 	ret = uncore_types_init(uncore_pci_uncores, false);
1262 	if (ret)
1263 		goto errtype;
1264 
1265 	uncore_pci_driver->probe = uncore_pci_probe;
1266 	uncore_pci_driver->remove = uncore_pci_remove;
1267 
1268 	ret = pci_register_driver(uncore_pci_driver);
1269 	if (ret)
1270 		goto errtype;
1271 
1272 	if (uncore_pci_sub_driver)
1273 		uncore_pci_sub_driver_init();
1274 
1275 	pcidrv_registered = true;
1276 	return 0;
1277 
1278 errtype:
1279 	uncore_types_exit(uncore_pci_uncores);
1280 	kfree(uncore_extra_pci_dev);
1281 	uncore_extra_pci_dev = NULL;
1282 	uncore_free_pcibus_map();
1283 err:
1284 	uncore_pci_uncores = empty_uncore;
1285 	return ret;
1286 }
1287 
1288 static void uncore_pci_exit(void)
1289 {
1290 	if (pcidrv_registered) {
1291 		pcidrv_registered = false;
1292 		if (uncore_pci_sub_driver)
1293 			bus_unregister_notifier(&pci_bus_type, &uncore_notifier);
1294 		pci_unregister_driver(uncore_pci_driver);
1295 		uncore_types_exit(uncore_pci_uncores);
1296 		kfree(uncore_extra_pci_dev);
1297 		uncore_free_pcibus_map();
1298 	}
1299 }
1300 
1301 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1302 				   int new_cpu)
1303 {
1304 	struct intel_uncore_pmu *pmu = type->pmus;
1305 	struct intel_uncore_box *box;
1306 	int i, die;
1307 
1308 	die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1309 	for (i = 0; i < type->num_boxes; i++, pmu++) {
1310 		box = pmu->boxes[die];
1311 		if (!box)
1312 			continue;
1313 
1314 		if (old_cpu < 0) {
1315 			WARN_ON_ONCE(box->cpu != -1);
1316 			box->cpu = new_cpu;
1317 			continue;
1318 		}
1319 
1320 		WARN_ON_ONCE(box->cpu != old_cpu);
1321 		box->cpu = -1;
1322 		if (new_cpu < 0)
1323 			continue;
1324 
1325 		uncore_pmu_cancel_hrtimer(box);
1326 		perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1327 		box->cpu = new_cpu;
1328 	}
1329 }
1330 
1331 static void uncore_change_context(struct intel_uncore_type **uncores,
1332 				  int old_cpu, int new_cpu)
1333 {
1334 	for (; *uncores; uncores++)
1335 		uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1336 }
1337 
1338 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1339 {
1340 	struct intel_uncore_type *type;
1341 	struct intel_uncore_pmu *pmu;
1342 	struct intel_uncore_box *box;
1343 	int i;
1344 
1345 	for (; *types; types++) {
1346 		type = *types;
1347 		pmu = type->pmus;
1348 		for (i = 0; i < type->num_boxes; i++, pmu++) {
1349 			box = pmu->boxes[id];
1350 			if (box && atomic_dec_return(&box->refcnt) == 0)
1351 				uncore_box_exit(box);
1352 		}
1353 	}
1354 }
1355 
1356 static int uncore_event_cpu_offline(unsigned int cpu)
1357 {
1358 	int die, target;
1359 
1360 	/* Check if exiting cpu is used for collecting uncore events */
1361 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1362 		goto unref;
1363 	/* Find a new cpu to collect uncore events */
1364 	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1365 
1366 	/* Migrate uncore events to the new target */
1367 	if (target < nr_cpu_ids)
1368 		cpumask_set_cpu(target, &uncore_cpu_mask);
1369 	else
1370 		target = -1;
1371 
1372 	uncore_change_context(uncore_msr_uncores, cpu, target);
1373 	uncore_change_context(uncore_mmio_uncores, cpu, target);
1374 	uncore_change_context(uncore_pci_uncores, cpu, target);
1375 
1376 unref:
1377 	/* Clear the references */
1378 	die = topology_logical_die_id(cpu);
1379 	uncore_box_unref(uncore_msr_uncores, die);
1380 	uncore_box_unref(uncore_mmio_uncores, die);
1381 	return 0;
1382 }
1383 
1384 static int allocate_boxes(struct intel_uncore_type **types,
1385 			 unsigned int die, unsigned int cpu)
1386 {
1387 	struct intel_uncore_box *box, *tmp;
1388 	struct intel_uncore_type *type;
1389 	struct intel_uncore_pmu *pmu;
1390 	LIST_HEAD(allocated);
1391 	int i;
1392 
1393 	/* Try to allocate all required boxes */
1394 	for (; *types; types++) {
1395 		type = *types;
1396 		pmu = type->pmus;
1397 		for (i = 0; i < type->num_boxes; i++, pmu++) {
1398 			if (pmu->boxes[die])
1399 				continue;
1400 			box = uncore_alloc_box(type, cpu_to_node(cpu));
1401 			if (!box)
1402 				goto cleanup;
1403 			box->pmu = pmu;
1404 			box->dieid = die;
1405 			list_add(&box->active_list, &allocated);
1406 		}
1407 	}
1408 	/* Install them in the pmus */
1409 	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1410 		list_del_init(&box->active_list);
1411 		box->pmu->boxes[die] = box;
1412 	}
1413 	return 0;
1414 
1415 cleanup:
1416 	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1417 		list_del_init(&box->active_list);
1418 		kfree(box);
1419 	}
1420 	return -ENOMEM;
1421 }
1422 
1423 static int uncore_box_ref(struct intel_uncore_type **types,
1424 			  int id, unsigned int cpu)
1425 {
1426 	struct intel_uncore_type *type;
1427 	struct intel_uncore_pmu *pmu;
1428 	struct intel_uncore_box *box;
1429 	int i, ret;
1430 
1431 	ret = allocate_boxes(types, id, cpu);
1432 	if (ret)
1433 		return ret;
1434 
1435 	for (; *types; types++) {
1436 		type = *types;
1437 		pmu = type->pmus;
1438 		for (i = 0; i < type->num_boxes; i++, pmu++) {
1439 			box = pmu->boxes[id];
1440 			if (box && atomic_inc_return(&box->refcnt) == 1)
1441 				uncore_box_init(box);
1442 		}
1443 	}
1444 	return 0;
1445 }
1446 
1447 static int uncore_event_cpu_online(unsigned int cpu)
1448 {
1449 	int die, target, msr_ret, mmio_ret;
1450 
1451 	die = topology_logical_die_id(cpu);
1452 	msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1453 	mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1454 	if (msr_ret && mmio_ret)
1455 		return -ENOMEM;
1456 
1457 	/*
1458 	 * Check if there is an online cpu in the package
1459 	 * which collects uncore events already.
1460 	 */
1461 	target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1462 	if (target < nr_cpu_ids)
1463 		return 0;
1464 
1465 	cpumask_set_cpu(cpu, &uncore_cpu_mask);
1466 
1467 	if (!msr_ret)
1468 		uncore_change_context(uncore_msr_uncores, -1, cpu);
1469 	if (!mmio_ret)
1470 		uncore_change_context(uncore_mmio_uncores, -1, cpu);
1471 	uncore_change_context(uncore_pci_uncores, -1, cpu);
1472 	return 0;
1473 }
1474 
1475 static int __init type_pmu_register(struct intel_uncore_type *type)
1476 {
1477 	int i, ret;
1478 
1479 	for (i = 0; i < type->num_boxes; i++) {
1480 		ret = uncore_pmu_register(&type->pmus[i]);
1481 		if (ret)
1482 			return ret;
1483 	}
1484 	return 0;
1485 }
1486 
1487 static int __init uncore_msr_pmus_register(void)
1488 {
1489 	struct intel_uncore_type **types = uncore_msr_uncores;
1490 	int ret;
1491 
1492 	for (; *types; types++) {
1493 		ret = type_pmu_register(*types);
1494 		if (ret)
1495 			return ret;
1496 	}
1497 	return 0;
1498 }
1499 
1500 static int __init uncore_cpu_init(void)
1501 {
1502 	int ret;
1503 
1504 	ret = uncore_types_init(uncore_msr_uncores, true);
1505 	if (ret)
1506 		goto err;
1507 
1508 	ret = uncore_msr_pmus_register();
1509 	if (ret)
1510 		goto err;
1511 	return 0;
1512 err:
1513 	uncore_types_exit(uncore_msr_uncores);
1514 	uncore_msr_uncores = empty_uncore;
1515 	return ret;
1516 }
1517 
1518 static int __init uncore_mmio_init(void)
1519 {
1520 	struct intel_uncore_type **types = uncore_mmio_uncores;
1521 	int ret;
1522 
1523 	ret = uncore_types_init(types, true);
1524 	if (ret)
1525 		goto err;
1526 
1527 	for (; *types; types++) {
1528 		ret = type_pmu_register(*types);
1529 		if (ret)
1530 			goto err;
1531 	}
1532 	return 0;
1533 err:
1534 	uncore_types_exit(uncore_mmio_uncores);
1535 	uncore_mmio_uncores = empty_uncore;
1536 	return ret;
1537 }
1538 
1539 struct intel_uncore_init_fun {
1540 	void	(*cpu_init)(void);
1541 	int	(*pci_init)(void);
1542 	void	(*mmio_init)(void);
1543 };
1544 
1545 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1546 	.cpu_init = nhm_uncore_cpu_init,
1547 };
1548 
1549 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1550 	.cpu_init = snb_uncore_cpu_init,
1551 	.pci_init = snb_uncore_pci_init,
1552 };
1553 
1554 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1555 	.cpu_init = snb_uncore_cpu_init,
1556 	.pci_init = ivb_uncore_pci_init,
1557 };
1558 
1559 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1560 	.cpu_init = snb_uncore_cpu_init,
1561 	.pci_init = hsw_uncore_pci_init,
1562 };
1563 
1564 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1565 	.cpu_init = snb_uncore_cpu_init,
1566 	.pci_init = bdw_uncore_pci_init,
1567 };
1568 
1569 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1570 	.cpu_init = snbep_uncore_cpu_init,
1571 	.pci_init = snbep_uncore_pci_init,
1572 };
1573 
1574 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1575 	.cpu_init = nhmex_uncore_cpu_init,
1576 };
1577 
1578 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1579 	.cpu_init = ivbep_uncore_cpu_init,
1580 	.pci_init = ivbep_uncore_pci_init,
1581 };
1582 
1583 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1584 	.cpu_init = hswep_uncore_cpu_init,
1585 	.pci_init = hswep_uncore_pci_init,
1586 };
1587 
1588 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1589 	.cpu_init = bdx_uncore_cpu_init,
1590 	.pci_init = bdx_uncore_pci_init,
1591 };
1592 
1593 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1594 	.cpu_init = knl_uncore_cpu_init,
1595 	.pci_init = knl_uncore_pci_init,
1596 };
1597 
1598 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1599 	.cpu_init = skl_uncore_cpu_init,
1600 	.pci_init = skl_uncore_pci_init,
1601 };
1602 
1603 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1604 	.cpu_init = skx_uncore_cpu_init,
1605 	.pci_init = skx_uncore_pci_init,
1606 };
1607 
1608 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1609 	.cpu_init = icl_uncore_cpu_init,
1610 	.pci_init = skl_uncore_pci_init,
1611 };
1612 
1613 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1614 	.cpu_init = tgl_uncore_cpu_init,
1615 	.mmio_init = tgl_uncore_mmio_init,
1616 };
1617 
1618 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1619 	.cpu_init = tgl_uncore_cpu_init,
1620 	.mmio_init = tgl_l_uncore_mmio_init,
1621 };
1622 
1623 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
1624 	.cpu_init = tgl_uncore_cpu_init,
1625 	.pci_init = skl_uncore_pci_init,
1626 };
1627 
1628 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1629 	.cpu_init = icx_uncore_cpu_init,
1630 	.pci_init = icx_uncore_pci_init,
1631 	.mmio_init = icx_uncore_mmio_init,
1632 };
1633 
1634 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1635 	.cpu_init = snr_uncore_cpu_init,
1636 	.pci_init = snr_uncore_pci_init,
1637 	.mmio_init = snr_uncore_mmio_init,
1638 };
1639 
1640 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1641 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&nhm_uncore_init),
1642 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&nhm_uncore_init),
1643 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,		&nhm_uncore_init),
1644 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,		&nhm_uncore_init),
1645 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&snb_uncore_init),
1646 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&ivb_uncore_init),
1647 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		&hsw_uncore_init),
1648 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		&hsw_uncore_init),
1649 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		&hsw_uncore_init),
1650 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		&bdw_uncore_init),
1651 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		&bdw_uncore_init),
1652 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&snbep_uncore_init),
1653 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		&nhmex_uncore_init),
1654 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,		&nhmex_uncore_init),
1655 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		&ivbep_uncore_init),
1656 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		&hswep_uncore_init),
1657 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		&bdx_uncore_init),
1658 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		&bdx_uncore_init),
1659 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&knl_uncore_init),
1660 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&knl_uncore_init),
1661 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		&skl_uncore_init),
1662 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		&skl_uncore_init),
1663 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&skx_uncore_init),
1664 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		&skl_uncore_init),
1665 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&skl_uncore_init),
1666 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,		&skl_uncore_init),
1667 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,		&skl_uncore_init),
1668 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		&icl_uncore_init),
1669 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,	&icl_uncore_init),
1670 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		&icl_uncore_init),
1671 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&icx_uncore_init),
1672 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&icx_uncore_init),
1673 	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,		&tgl_l_uncore_init),
1674 	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,		&tgl_uncore_init),
1675 	X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,		&rkl_uncore_init),
1676 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&snr_uncore_init),
1677 	{},
1678 };
1679 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1680 
1681 static int __init intel_uncore_init(void)
1682 {
1683 	const struct x86_cpu_id *id;
1684 	struct intel_uncore_init_fun *uncore_init;
1685 	int pret = 0, cret = 0, mret = 0, ret;
1686 
1687 	id = x86_match_cpu(intel_uncore_match);
1688 	if (!id)
1689 		return -ENODEV;
1690 
1691 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1692 		return -ENODEV;
1693 
1694 	__uncore_max_dies =
1695 		topology_max_packages() * topology_max_die_per_package();
1696 
1697 	uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1698 	if (uncore_init->pci_init) {
1699 		pret = uncore_init->pci_init();
1700 		if (!pret)
1701 			pret = uncore_pci_init();
1702 	}
1703 
1704 	if (uncore_init->cpu_init) {
1705 		uncore_init->cpu_init();
1706 		cret = uncore_cpu_init();
1707 	}
1708 
1709 	if (uncore_init->mmio_init) {
1710 		uncore_init->mmio_init();
1711 		mret = uncore_mmio_init();
1712 	}
1713 
1714 	if (cret && pret && mret)
1715 		return -ENODEV;
1716 
1717 	/* Install hotplug callbacks to setup the targets for each package */
1718 	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1719 				"perf/x86/intel/uncore:online",
1720 				uncore_event_cpu_online,
1721 				uncore_event_cpu_offline);
1722 	if (ret)
1723 		goto err;
1724 	return 0;
1725 
1726 err:
1727 	uncore_types_exit(uncore_msr_uncores);
1728 	uncore_types_exit(uncore_mmio_uncores);
1729 	uncore_pci_exit();
1730 	return ret;
1731 }
1732 module_init(intel_uncore_init);
1733 
1734 static void __exit intel_uncore_exit(void)
1735 {
1736 	cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1737 	uncore_types_exit(uncore_msr_uncores);
1738 	uncore_types_exit(uncore_mmio_uncores);
1739 	uncore_pci_exit();
1740 }
1741 module_exit(intel_uncore_exit);
1742