xref: /linux/drivers/resctrl/mpam_devices.c (revision 3796f75aa7958d26b93a2508de5fc1e0b2f8a853)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2025 Arm Ltd.
3 
4 #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
5 
6 #include <linux/acpi.h>
7 #include <linux/atomic.h>
8 #include <linux/arm_mpam.h>
9 #include <linux/bitfield.h>
10 #include <linux/bitmap.h>
11 #include <linux/cacheinfo.h>
12 #include <linux/cpu.h>
13 #include <linux/cpumask.h>
14 #include <linux/device.h>
15 #include <linux/errno.h>
16 #include <linux/gfp.h>
17 #include <linux/interrupt.h>
18 #include <linux/irq.h>
19 #include <linux/irqdesc.h>
20 #include <linux/list.h>
21 #include <linux/lockdep.h>
22 #include <linux/mutex.h>
23 #include <linux/platform_device.h>
24 #include <linux/printk.h>
25 #include <linux/srcu.h>
26 #include <linux/spinlock.h>
27 #include <linux/types.h>
28 #include <linux/workqueue.h>
29 
30 #include "mpam_internal.h"
31 
32 DEFINE_STATIC_KEY_FALSE(mpam_enabled); /* This moves to arch code */
33 
34 /*
35  * mpam_list_lock protects the SRCU lists when writing. Once the
36  * mpam_enabled key is enabled these lists are read-only,
37  * unless the error interrupt disables the driver.
38  */
39 static DEFINE_MUTEX(mpam_list_lock);
40 static LIST_HEAD(mpam_all_msc);
41 
42 struct srcu_struct mpam_srcu;
43 
44 /*
45  * Number of MSCs that have been probed. Once all MSCs have been probed MPAM
46  * can be enabled.
47  */
48 static atomic_t mpam_num_msc;
49 
50 static int mpam_cpuhp_state;
51 static DEFINE_MUTEX(mpam_cpuhp_state_lock);
52 
53 /*
54  * The smallest common values for any CPU or MSC in the system.
55  * Generating traffic outside this range will result in screaming interrupts.
56  */
57 u16 mpam_partid_max;
58 u8 mpam_pmg_max;
59 static bool partid_max_init, partid_max_published;
60 static DEFINE_SPINLOCK(partid_max_lock);
61 
62 /*
63  * mpam is enabled once all devices have been probed from CPU online callbacks,
64  * scheduled via this work_struct. If access to an MSC depends on a CPU that
65  * was not brought online at boot, this can happen surprisingly late.
66  */
67 static DECLARE_WORK(mpam_enable_work, &mpam_enable);
68 
69 /*
70  * All mpam error interrupts indicate a software bug. On receipt, disable the
71  * driver.
72  */
73 static DECLARE_WORK(mpam_broken_work, &mpam_disable);
74 
75 /* When mpam is disabled, the printed reason to aid debugging */
76 static char *mpam_disable_reason;
77 
78 /*
79  * An MSC is a physical container for controls and monitors, each identified by
80  * their RIS index. These share a base-address, interrupts and some MMIO
81  * registers. A vMSC is a virtual container for RIS in an MSC that control or
82  * monitor the same thing. Members of a vMSC are all RIS in the same MSC, but
83  * not all RIS in an MSC share a vMSC.
84  *
85  * Components are a group of vMSC that control or monitor the same thing but
86  * are from different MSC, so have different base-address, interrupts etc.
87  * Classes are the set components of the same type.
88  *
89  * The features of a vMSC is the union of the RIS it contains.
90  * The features of a Class and Component are the common subset of the vMSC
91  * they contain.
92  *
93  * e.g. The system cache may have bandwidth controls on multiple interfaces,
94  * for regulating traffic from devices independently of traffic from CPUs.
95  * If these are two RIS in one MSC, they will be treated as controlling
96  * different things, and will not share a vMSC/component/class.
97  *
98  * e.g. The L2 may have one MSC and two RIS, one for cache-controls another
99  * for bandwidth. These two RIS are members of the same vMSC.
100  *
101  * e.g. The set of RIS that make up the L2 are grouped as a component. These
102  * are sometimes termed slices. They should be configured the same, as if there
103  * were only one.
104  *
105  * e.g. The SoC probably has more than one L2, each attached to a distinct set
106  * of CPUs. All the L2 components are grouped as a class.
107  *
108  * When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list,
109  * then linked via struct mpam_ris to a vmsc, component and class.
110  * The same MSC may exist under different class->component->vmsc paths, but the
111  * RIS index will be unique.
112  */
113 LIST_HEAD(mpam_classes);
114 
115 /* List of all objects that can be free()d after synchronise_srcu() */
116 static LLIST_HEAD(mpam_garbage);
117 
118 static inline void init_garbage(struct mpam_garbage *garbage)
119 {
120 	init_llist_node(&garbage->llist);
121 }
122 
123 #define add_to_garbage(x)				\
124 do {							\
125 	__typeof__(x) _x = (x);				\
126 	_x->garbage.to_free = _x;			\
127 	llist_add(&_x->garbage.llist, &mpam_garbage);	\
128 } while (0)
129 
130 static void mpam_free_garbage(void)
131 {
132 	struct mpam_garbage *iter, *tmp;
133 	struct llist_node *to_free = llist_del_all(&mpam_garbage);
134 
135 	if (!to_free)
136 		return;
137 
138 	synchronize_srcu(&mpam_srcu);
139 
140 	llist_for_each_entry_safe(iter, tmp, to_free, llist) {
141 		if (iter->pdev)
142 			devm_kfree(&iter->pdev->dev, iter->to_free);
143 		else
144 			kfree(iter->to_free);
145 	}
146 }
147 
148 static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg)
149 {
150 	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
151 
152 	return readl_relaxed(msc->mapped_hwpage + reg);
153 }
154 
155 static inline u32 _mpam_read_partsel_reg(struct mpam_msc *msc, u16 reg)
156 {
157 	lockdep_assert_held_once(&msc->part_sel_lock);
158 	return __mpam_read_reg(msc, reg);
159 }
160 
161 #define mpam_read_partsel_reg(msc, reg) _mpam_read_partsel_reg(msc, MPAMF_##reg)
162 
163 static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val)
164 {
165 	WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz);
166 	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
167 
168 	writel_relaxed(val, msc->mapped_hwpage + reg);
169 }
170 
171 static inline void _mpam_write_partsel_reg(struct mpam_msc *msc, u16 reg, u32 val)
172 {
173 	lockdep_assert_held_once(&msc->part_sel_lock);
174 	__mpam_write_reg(msc, reg, val);
175 }
176 
177 #define mpam_write_partsel_reg(msc, reg, val)  _mpam_write_partsel_reg(msc, MPAMCFG_##reg, val)
178 
179 static inline u32 _mpam_read_monsel_reg(struct mpam_msc *msc, u16 reg)
180 {
181 	mpam_mon_sel_lock_held(msc);
182 	return __mpam_read_reg(msc, reg);
183 }
184 
185 #define mpam_read_monsel_reg(msc, reg) _mpam_read_monsel_reg(msc, MSMON_##reg)
186 
187 static inline void _mpam_write_monsel_reg(struct mpam_msc *msc, u16 reg, u32 val)
188 {
189 	mpam_mon_sel_lock_held(msc);
190 	__mpam_write_reg(msc, reg, val);
191 }
192 
193 #define mpam_write_monsel_reg(msc, reg, val)   _mpam_write_monsel_reg(msc, MSMON_##reg, val)
194 
195 static u64 mpam_msc_read_idr(struct mpam_msc *msc)
196 {
197 	u64 idr_high = 0, idr_low;
198 
199 	lockdep_assert_held(&msc->part_sel_lock);
200 
201 	idr_low = mpam_read_partsel_reg(msc, IDR);
202 	if (FIELD_GET(MPAMF_IDR_EXT, idr_low))
203 		idr_high = mpam_read_partsel_reg(msc, IDR + 4);
204 
205 	return (idr_high << 32) | idr_low;
206 }
207 
208 static void mpam_msc_clear_esr(struct mpam_msc *msc)
209 {
210 	u64 esr_low = __mpam_read_reg(msc, MPAMF_ESR);
211 
212 	if (!esr_low)
213 		return;
214 
215 	/*
216 	 * Clearing the high/low bits of MPAMF_ESR can not be atomic.
217 	 * Clear the top half first, so that the pending error bits in the
218 	 * lower half prevent hardware from updating either half of the
219 	 * register.
220 	 */
221 	if (msc->has_extd_esr)
222 		__mpam_write_reg(msc, MPAMF_ESR + 4, 0);
223 	__mpam_write_reg(msc, MPAMF_ESR, 0);
224 }
225 
226 static u64 mpam_msc_read_esr(struct mpam_msc *msc)
227 {
228 	u64 esr_high = 0, esr_low;
229 
230 	esr_low = __mpam_read_reg(msc, MPAMF_ESR);
231 	if (msc->has_extd_esr)
232 		esr_high = __mpam_read_reg(msc, MPAMF_ESR + 4);
233 
234 	return (esr_high << 32) | esr_low;
235 }
236 
237 static void __mpam_part_sel_raw(u32 partsel, struct mpam_msc *msc)
238 {
239 	lockdep_assert_held(&msc->part_sel_lock);
240 
241 	mpam_write_partsel_reg(msc, PART_SEL, partsel);
242 }
243 
244 static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc)
245 {
246 	u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) |
247 		      FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, partid);
248 
249 	__mpam_part_sel_raw(partsel, msc);
250 }
251 
252 int mpam_register_requestor(u16 partid_max, u8 pmg_max)
253 {
254 	guard(spinlock)(&partid_max_lock);
255 	if (!partid_max_init) {
256 		mpam_partid_max = partid_max;
257 		mpam_pmg_max = pmg_max;
258 		partid_max_init = true;
259 	} else if (!partid_max_published) {
260 		mpam_partid_max = min(mpam_partid_max, partid_max);
261 		mpam_pmg_max = min(mpam_pmg_max, pmg_max);
262 	} else {
263 		/* New requestors can't lower the values */
264 		if (partid_max < mpam_partid_max || pmg_max < mpam_pmg_max)
265 			return -EBUSY;
266 	}
267 
268 	return 0;
269 }
270 EXPORT_SYMBOL(mpam_register_requestor);
271 
272 static struct mpam_class *
273 mpam_class_alloc(u8 level_idx, enum mpam_class_types type)
274 {
275 	struct mpam_class *class;
276 
277 	lockdep_assert_held(&mpam_list_lock);
278 
279 	class = kzalloc(sizeof(*class), GFP_KERNEL);
280 	if (!class)
281 		return ERR_PTR(-ENOMEM);
282 	init_garbage(&class->garbage);
283 
284 	INIT_LIST_HEAD_RCU(&class->components);
285 	/* Affinity is updated when ris are added */
286 	class->level = level_idx;
287 	class->type = type;
288 	INIT_LIST_HEAD_RCU(&class->classes_list);
289 
290 	list_add_rcu(&class->classes_list, &mpam_classes);
291 
292 	return class;
293 }
294 
295 static void mpam_class_destroy(struct mpam_class *class)
296 {
297 	lockdep_assert_held(&mpam_list_lock);
298 
299 	list_del_rcu(&class->classes_list);
300 	add_to_garbage(class);
301 }
302 
303 static struct mpam_class *
304 mpam_class_find(u8 level_idx, enum mpam_class_types type)
305 {
306 	struct mpam_class *class;
307 
308 	lockdep_assert_held(&mpam_list_lock);
309 
310 	list_for_each_entry(class, &mpam_classes, classes_list) {
311 		if (class->type == type && class->level == level_idx)
312 			return class;
313 	}
314 
315 	return mpam_class_alloc(level_idx, type);
316 }
317 
318 static struct mpam_component *
319 mpam_component_alloc(struct mpam_class *class, int id)
320 {
321 	struct mpam_component *comp;
322 
323 	lockdep_assert_held(&mpam_list_lock);
324 
325 	comp = kzalloc(sizeof(*comp), GFP_KERNEL);
326 	if (!comp)
327 		return ERR_PTR(-ENOMEM);
328 	init_garbage(&comp->garbage);
329 
330 	comp->comp_id = id;
331 	INIT_LIST_HEAD_RCU(&comp->vmsc);
332 	/* Affinity is updated when RIS are added */
333 	INIT_LIST_HEAD_RCU(&comp->class_list);
334 	comp->class = class;
335 
336 	list_add_rcu(&comp->class_list, &class->components);
337 
338 	return comp;
339 }
340 
341 static void mpam_component_destroy(struct mpam_component *comp)
342 {
343 	struct mpam_class *class = comp->class;
344 
345 	lockdep_assert_held(&mpam_list_lock);
346 
347 	list_del_rcu(&comp->class_list);
348 	add_to_garbage(comp);
349 
350 	if (list_empty(&class->components))
351 		mpam_class_destroy(class);
352 }
353 
354 static struct mpam_component *
355 mpam_component_find(struct mpam_class *class, int id)
356 {
357 	struct mpam_component *comp;
358 
359 	lockdep_assert_held(&mpam_list_lock);
360 
361 	list_for_each_entry(comp, &class->components, class_list) {
362 		if (comp->comp_id == id)
363 			return comp;
364 	}
365 
366 	return mpam_component_alloc(class, id);
367 }
368 
369 static struct mpam_vmsc *
370 mpam_vmsc_alloc(struct mpam_component *comp, struct mpam_msc *msc)
371 {
372 	struct mpam_vmsc *vmsc;
373 
374 	lockdep_assert_held(&mpam_list_lock);
375 
376 	vmsc = kzalloc(sizeof(*vmsc), GFP_KERNEL);
377 	if (!vmsc)
378 		return ERR_PTR(-ENOMEM);
379 	init_garbage(&vmsc->garbage);
380 
381 	INIT_LIST_HEAD_RCU(&vmsc->ris);
382 	INIT_LIST_HEAD_RCU(&vmsc->comp_list);
383 	vmsc->comp = comp;
384 	vmsc->msc = msc;
385 
386 	list_add_rcu(&vmsc->comp_list, &comp->vmsc);
387 
388 	return vmsc;
389 }
390 
391 static void mpam_vmsc_destroy(struct mpam_vmsc *vmsc)
392 {
393 	struct mpam_component *comp = vmsc->comp;
394 
395 	lockdep_assert_held(&mpam_list_lock);
396 
397 	list_del_rcu(&vmsc->comp_list);
398 	add_to_garbage(vmsc);
399 
400 	if (list_empty(&comp->vmsc))
401 		mpam_component_destroy(comp);
402 }
403 
404 static struct mpam_vmsc *
405 mpam_vmsc_find(struct mpam_component *comp, struct mpam_msc *msc)
406 {
407 	struct mpam_vmsc *vmsc;
408 
409 	lockdep_assert_held(&mpam_list_lock);
410 
411 	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
412 		if (vmsc->msc->id == msc->id)
413 			return vmsc;
414 	}
415 
416 	return mpam_vmsc_alloc(comp, msc);
417 }
418 
419 /*
420  * The cacheinfo structures are only populated when CPUs are online.
421  * This helper walks the acpi tables to include offline CPUs too.
422  */
423 int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level,
424 				   cpumask_t *affinity)
425 {
426 	return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity);
427 }
428 
429 /*
430  * cpumask_of_node() only knows about online CPUs. This can't tell us whether
431  * a class is represented on all possible CPUs.
432  */
433 static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity)
434 {
435 	int cpu;
436 
437 	for_each_possible_cpu(cpu) {
438 		if (node_id == cpu_to_node(cpu))
439 			cpumask_set_cpu(cpu, affinity);
440 	}
441 }
442 
443 static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity,
444 				 enum mpam_class_types type,
445 				 struct mpam_class *class,
446 				 struct mpam_component *comp)
447 {
448 	int err;
449 
450 	switch (type) {
451 	case MPAM_CLASS_CACHE:
452 		err = mpam_get_cpumask_from_cache_id(comp->comp_id, class->level,
453 						     affinity);
454 		if (err) {
455 			dev_warn_once(&msc->pdev->dev,
456 				      "Failed to determine CPU affinity\n");
457 			return err;
458 		}
459 
460 		if (cpumask_empty(affinity))
461 			dev_warn_once(&msc->pdev->dev, "no CPUs associated with cache node\n");
462 
463 		break;
464 	case MPAM_CLASS_MEMORY:
465 		get_cpumask_from_node_id(comp->comp_id, affinity);
466 		/* affinity may be empty for CPU-less memory nodes */
467 		break;
468 	case MPAM_CLASS_UNKNOWN:
469 		return 0;
470 	}
471 
472 	cpumask_and(affinity, affinity, &msc->accessibility);
473 
474 	return 0;
475 }
476 
477 static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx,
478 				  enum mpam_class_types type, u8 class_id,
479 				  int component_id)
480 {
481 	int err;
482 	struct mpam_vmsc *vmsc;
483 	struct mpam_msc_ris *ris;
484 	struct mpam_class *class;
485 	struct mpam_component *comp;
486 	struct platform_device *pdev = msc->pdev;
487 
488 	lockdep_assert_held(&mpam_list_lock);
489 
490 	if (ris_idx > MPAM_MSC_MAX_NUM_RIS)
491 		return -EINVAL;
492 
493 	if (test_and_set_bit(ris_idx, &msc->ris_idxs))
494 		return -EBUSY;
495 
496 	ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), GFP_KERNEL);
497 	if (!ris)
498 		return -ENOMEM;
499 	init_garbage(&ris->garbage);
500 	ris->garbage.pdev = pdev;
501 
502 	class = mpam_class_find(class_id, type);
503 	if (IS_ERR(class))
504 		return PTR_ERR(class);
505 
506 	comp = mpam_component_find(class, component_id);
507 	if (IS_ERR(comp)) {
508 		if (list_empty(&class->components))
509 			mpam_class_destroy(class);
510 		return PTR_ERR(comp);
511 	}
512 
513 	vmsc = mpam_vmsc_find(comp, msc);
514 	if (IS_ERR(vmsc)) {
515 		if (list_empty(&comp->vmsc))
516 			mpam_component_destroy(comp);
517 		return PTR_ERR(vmsc);
518 	}
519 
520 	err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp);
521 	if (err) {
522 		if (list_empty(&vmsc->ris))
523 			mpam_vmsc_destroy(vmsc);
524 		return err;
525 	}
526 
527 	ris->ris_idx = ris_idx;
528 	INIT_LIST_HEAD_RCU(&ris->msc_list);
529 	INIT_LIST_HEAD_RCU(&ris->vmsc_list);
530 	ris->vmsc = vmsc;
531 
532 	cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity);
533 	cpumask_or(&class->affinity, &class->affinity, &ris->affinity);
534 	list_add_rcu(&ris->vmsc_list, &vmsc->ris);
535 	list_add_rcu(&ris->msc_list, &msc->ris);
536 
537 	return 0;
538 }
539 
540 static void mpam_ris_destroy(struct mpam_msc_ris *ris)
541 {
542 	struct mpam_vmsc *vmsc = ris->vmsc;
543 	struct mpam_msc *msc = vmsc->msc;
544 	struct mpam_component *comp = vmsc->comp;
545 	struct mpam_class *class = comp->class;
546 
547 	lockdep_assert_held(&mpam_list_lock);
548 
549 	/*
550 	 * It is assumed affinities don't overlap. If they do the class becomes
551 	 * unusable immediately.
552 	 */
553 	cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity);
554 	cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity);
555 	clear_bit(ris->ris_idx, &msc->ris_idxs);
556 	list_del_rcu(&ris->msc_list);
557 	list_del_rcu(&ris->vmsc_list);
558 	add_to_garbage(ris);
559 
560 	if (list_empty(&vmsc->ris))
561 		mpam_vmsc_destroy(vmsc);
562 }
563 
564 int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
565 		    enum mpam_class_types type, u8 class_id, int component_id)
566 {
567 	int err;
568 
569 	mutex_lock(&mpam_list_lock);
570 	err = mpam_ris_create_locked(msc, ris_idx, type, class_id,
571 				     component_id);
572 	mutex_unlock(&mpam_list_lock);
573 	if (err)
574 		mpam_free_garbage();
575 
576 	return err;
577 }
578 
579 static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc,
580 						   u8 ris_idx)
581 {
582 	int err;
583 	struct mpam_msc_ris *ris;
584 
585 	lockdep_assert_held(&mpam_list_lock);
586 
587 	if (!test_bit(ris_idx, &msc->ris_idxs)) {
588 		err = mpam_ris_create_locked(msc, ris_idx, MPAM_CLASS_UNKNOWN,
589 					     0, 0);
590 		if (err)
591 			return ERR_PTR(err);
592 	}
593 
594 	list_for_each_entry(ris, &msc->ris, msc_list) {
595 		if (ris->ris_idx == ris_idx)
596 			return ris;
597 	}
598 
599 	return ERR_PTR(-ENOENT);
600 }
601 
602 /*
603  * IHI009A.a has this nugget: "If a monitor does not support automatic behaviour
604  * of NRDY, software can use this bit for any purpose" - so hardware might not
605  * implement this - but it isn't RES0.
606  *
607  * Try and see what values stick in this bit. If we can write either value,
608  * its probably not implemented by hardware.
609  */
610 static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg)
611 {
612 	u32 now;
613 	u64 mon_sel;
614 	bool can_set, can_clear;
615 	struct mpam_msc *msc = ris->vmsc->msc;
616 
617 	if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc)))
618 		return false;
619 
620 	mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) |
621 		  FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
622 	_mpam_write_monsel_reg(msc, mon_reg, mon_sel);
623 
624 	_mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY);
625 	now = _mpam_read_monsel_reg(msc, mon_reg);
626 	can_set = now & MSMON___NRDY;
627 
628 	_mpam_write_monsel_reg(msc, mon_reg, 0);
629 	now = _mpam_read_monsel_reg(msc, mon_reg);
630 	can_clear = !(now & MSMON___NRDY);
631 	mpam_mon_sel_unlock(msc);
632 
633 	return (!can_set || !can_clear);
634 }
635 
636 #define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg)			\
637 	_mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg)
638 
639 static void mpam_ris_hw_probe(struct mpam_msc_ris *ris)
640 {
641 	int err;
642 	struct mpam_msc *msc = ris->vmsc->msc;
643 	struct device *dev = &msc->pdev->dev;
644 	struct mpam_props *props = &ris->props;
645 
646 	lockdep_assert_held(&msc->probe_lock);
647 	lockdep_assert_held(&msc->part_sel_lock);
648 
649 	/* Cache Portion partitioning */
650 	if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) {
651 		u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR);
652 
653 		props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, cpor_features);
654 		if (props->cpbm_wd)
655 			mpam_set_feature(mpam_feat_cpor_part, props);
656 	}
657 
658 	/* Memory bandwidth partitioning */
659 	if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) {
660 		u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR);
661 
662 		/* portion bitmap resolution */
663 		props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features);
664 		if (props->mbw_pbm_bits &&
665 		    FIELD_GET(MPAMF_MBW_IDR_HAS_PBM, mbw_features))
666 			mpam_set_feature(mpam_feat_mbw_part, props);
667 
668 		props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features);
669 		if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features))
670 			mpam_set_feature(mpam_feat_mbw_max, props);
671 	}
672 
673 	/* Performance Monitoring */
674 	if (FIELD_GET(MPAMF_IDR_HAS_MSMON, ris->idr)) {
675 		u32 msmon_features = mpam_read_partsel_reg(msc, MSMON_IDR);
676 
677 		/*
678 		 * If the firmware max-nrdy-us property is missing, the
679 		 * CSU counters can't be used. Should we wait forever?
680 		 */
681 		err = device_property_read_u32(&msc->pdev->dev,
682 					       "arm,not-ready-us",
683 					       &msc->nrdy_usec);
684 
685 		if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_CSU, msmon_features)) {
686 			u32 csumonidr;
687 
688 			csumonidr = mpam_read_partsel_reg(msc, CSUMON_IDR);
689 			props->num_csu_mon = FIELD_GET(MPAMF_CSUMON_IDR_NUM_MON, csumonidr);
690 			if (props->num_csu_mon) {
691 				bool hw_managed;
692 
693 				mpam_set_feature(mpam_feat_msmon_csu, props);
694 
695 				/* Is NRDY hardware managed? */
696 				hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU);
697 				if (hw_managed)
698 					mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props);
699 			}
700 
701 			/*
702 			 * Accept the missing firmware property if NRDY appears
703 			 * un-implemented.
704 			 */
705 			if (err && mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, props))
706 				dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware.");
707 		}
708 		if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) {
709 			bool hw_managed;
710 			u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR);
711 
712 			props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr);
713 			if (props->num_mbwu_mon)
714 				mpam_set_feature(mpam_feat_msmon_mbwu, props);
715 
716 			/* Is NRDY hardware managed? */
717 			hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU);
718 			if (hw_managed)
719 				mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props);
720 
721 			/*
722 			 * Don't warn about any missing firmware property for
723 			 * MBWU NRDY - it doesn't make any sense!
724 			 */
725 		}
726 	}
727 }
728 
729 static int mpam_msc_hw_probe(struct mpam_msc *msc)
730 {
731 	u64 idr;
732 	u16 partid_max;
733 	u8 ris_idx, pmg_max;
734 	struct mpam_msc_ris *ris;
735 	struct device *dev = &msc->pdev->dev;
736 
737 	lockdep_assert_held(&msc->probe_lock);
738 
739 	idr = __mpam_read_reg(msc, MPAMF_AIDR);
740 	if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) {
741 		dev_err_once(dev, "MSC does not match MPAM architecture v1.x\n");
742 		return -EIO;
743 	}
744 
745 	/* Grab an IDR value to find out how many RIS there are */
746 	mutex_lock(&msc->part_sel_lock);
747 	idr = mpam_msc_read_idr(msc);
748 	mutex_unlock(&msc->part_sel_lock);
749 
750 	msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr);
751 
752 	/* Use these values so partid/pmg always starts with a valid value */
753 	msc->partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
754 	msc->pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
755 
756 	for (ris_idx = 0; ris_idx <= msc->ris_max; ris_idx++) {
757 		mutex_lock(&msc->part_sel_lock);
758 		__mpam_part_sel(ris_idx, 0, msc);
759 		idr = mpam_msc_read_idr(msc);
760 		mutex_unlock(&msc->part_sel_lock);
761 
762 		partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
763 		pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
764 		msc->partid_max = min(msc->partid_max, partid_max);
765 		msc->pmg_max = min(msc->pmg_max, pmg_max);
766 		msc->has_extd_esr = FIELD_GET(MPAMF_IDR_HAS_EXTD_ESR, idr);
767 
768 		mutex_lock(&mpam_list_lock);
769 		ris = mpam_get_or_create_ris(msc, ris_idx);
770 		mutex_unlock(&mpam_list_lock);
771 		if (IS_ERR(ris))
772 			return PTR_ERR(ris);
773 		ris->idr = idr;
774 
775 		mutex_lock(&msc->part_sel_lock);
776 		__mpam_part_sel(ris_idx, 0, msc);
777 		mpam_ris_hw_probe(ris);
778 		mutex_unlock(&msc->part_sel_lock);
779 	}
780 
781 	/* Clear any stale errors */
782 	mpam_msc_clear_esr(msc);
783 
784 	spin_lock(&partid_max_lock);
785 	mpam_partid_max = min(mpam_partid_max, msc->partid_max);
786 	mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max);
787 	spin_unlock(&partid_max_lock);
788 
789 	msc->probed = true;
790 
791 	return 0;
792 }
793 
794 static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd)
795 {
796 	u32 num_words, msb;
797 	u32 bm = ~0;
798 	int i;
799 
800 	lockdep_assert_held(&msc->part_sel_lock);
801 
802 	if (wd == 0)
803 		return;
804 
805 	/*
806 	 * Write all ~0 to all but the last 32bit-word, which may
807 	 * have fewer bits...
808 	 */
809 	num_words = DIV_ROUND_UP(wd, 32);
810 	for (i = 0; i < num_words - 1; i++, reg += sizeof(bm))
811 		__mpam_write_reg(msc, reg, bm);
812 
813 	/*
814 	 * ....and then the last (maybe) partial 32bit word. When wd is a
815 	 * multiple of 32, msb should be 31 to write a full 32bit word.
816 	 */
817 	msb = (wd - 1) % 32;
818 	bm = GENMASK(msb, 0);
819 	__mpam_write_reg(msc, reg, bm);
820 }
821 
822 static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid)
823 {
824 	struct mpam_msc *msc = ris->vmsc->msc;
825 	struct mpam_props *rprops = &ris->props;
826 
827 	WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu)));
828 
829 	mutex_lock(&msc->part_sel_lock);
830 	__mpam_part_sel(ris->ris_idx, partid, msc);
831 
832 	if (mpam_has_feature(mpam_feat_cpor_part, rprops))
833 		mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd);
834 
835 	if (mpam_has_feature(mpam_feat_mbw_part, rprops))
836 		mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits);
837 
838 	if (mpam_has_feature(mpam_feat_mbw_min, rprops))
839 		mpam_write_partsel_reg(msc, MBW_MIN, 0);
840 
841 	if (mpam_has_feature(mpam_feat_mbw_max, rprops))
842 		mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX);
843 
844 	mutex_unlock(&msc->part_sel_lock);
845 }
846 
847 /*
848  * Called via smp_call_on_cpu() to prevent migration, while still being
849  * pre-emptible. Caller must hold mpam_srcu.
850  */
851 static int mpam_reset_ris(void *arg)
852 {
853 	u16 partid, partid_max;
854 	struct mpam_msc_ris *ris = arg;
855 
856 	if (ris->in_reset_state)
857 		return 0;
858 
859 	spin_lock(&partid_max_lock);
860 	partid_max = mpam_partid_max;
861 	spin_unlock(&partid_max_lock);
862 	for (partid = 0; partid <= partid_max; partid++)
863 		mpam_reset_ris_partid(ris, partid);
864 
865 	return 0;
866 }
867 
868 /*
869  * Get the preferred CPU for this MSC. If it is accessible from this CPU,
870  * this CPU is preferred. This can be preempted/migrated, it will only result
871  * in more work.
872  */
873 static int mpam_get_msc_preferred_cpu(struct mpam_msc *msc)
874 {
875 	int cpu = raw_smp_processor_id();
876 
877 	if (cpumask_test_cpu(cpu, &msc->accessibility))
878 		return cpu;
879 
880 	return cpumask_first_and(&msc->accessibility, cpu_online_mask);
881 }
882 
883 static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg)
884 {
885 	lockdep_assert_irqs_enabled();
886 	lockdep_assert_cpus_held();
887 	WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu)));
888 
889 	return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true);
890 }
891 
892 static void mpam_reset_msc(struct mpam_msc *msc, bool online)
893 {
894 	struct mpam_msc_ris *ris;
895 
896 	list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) {
897 		mpam_touch_msc(msc, &mpam_reset_ris, ris);
898 
899 		/*
900 		 * Set in_reset_state when coming online. The reset state
901 		 * for non-zero partid may be lost while the CPUs are offline.
902 		 */
903 		ris->in_reset_state = online;
904 	}
905 }
906 
907 static void _enable_percpu_irq(void *_irq)
908 {
909 	int *irq = _irq;
910 
911 	enable_percpu_irq(*irq, IRQ_TYPE_NONE);
912 }
913 
914 static int mpam_cpu_online(unsigned int cpu)
915 {
916 	struct mpam_msc *msc;
917 
918 	guard(srcu)(&mpam_srcu);
919 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
920 				 srcu_read_lock_held(&mpam_srcu)) {
921 		if (!cpumask_test_cpu(cpu, &msc->accessibility))
922 			continue;
923 
924 		if (msc->reenable_error_ppi)
925 			_enable_percpu_irq(&msc->reenable_error_ppi);
926 
927 		if (atomic_fetch_inc(&msc->online_refs) == 0)
928 			mpam_reset_msc(msc, true);
929 	}
930 
931 	return 0;
932 }
933 
934 /* Before mpam is enabled, try to probe new MSC */
935 static int mpam_discovery_cpu_online(unsigned int cpu)
936 {
937 	int err = 0;
938 	struct mpam_msc *msc;
939 	bool new_device_probed = false;
940 
941 	if (mpam_is_enabled())
942 		return 0;
943 
944 	guard(srcu)(&mpam_srcu);
945 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
946 				 srcu_read_lock_held(&mpam_srcu)) {
947 		if (!cpumask_test_cpu(cpu, &msc->accessibility))
948 			continue;
949 
950 		mutex_lock(&msc->probe_lock);
951 		if (!msc->probed)
952 			err = mpam_msc_hw_probe(msc);
953 		mutex_unlock(&msc->probe_lock);
954 
955 		if (err)
956 			break;
957 		new_device_probed = true;
958 	}
959 
960 	if (new_device_probed && !err)
961 		schedule_work(&mpam_enable_work);
962 	if (err) {
963 		mpam_disable_reason = "error during probing";
964 		schedule_work(&mpam_broken_work);
965 	}
966 
967 	return err;
968 }
969 
970 static int mpam_cpu_offline(unsigned int cpu)
971 {
972 	struct mpam_msc *msc;
973 
974 	guard(srcu)(&mpam_srcu);
975 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
976 				 srcu_read_lock_held(&mpam_srcu)) {
977 		if (!cpumask_test_cpu(cpu, &msc->accessibility))
978 			continue;
979 
980 		if (msc->reenable_error_ppi)
981 			disable_percpu_irq(msc->reenable_error_ppi);
982 
983 		if (atomic_dec_and_test(&msc->online_refs))
984 			mpam_reset_msc(msc, false);
985 	}
986 
987 	return 0;
988 }
989 
990 static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online),
991 					  int (*offline)(unsigned int offline),
992 					  char *name)
993 {
994 	mutex_lock(&mpam_cpuhp_state_lock);
995 	if (mpam_cpuhp_state) {
996 		cpuhp_remove_state(mpam_cpuhp_state);
997 		mpam_cpuhp_state = 0;
998 	}
999 
1000 	mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, name, online,
1001 					     offline);
1002 	if (mpam_cpuhp_state <= 0) {
1003 		pr_err("Failed to register cpuhp callbacks");
1004 		mpam_cpuhp_state = 0;
1005 	}
1006 	mutex_unlock(&mpam_cpuhp_state_lock);
1007 }
1008 
1009 static int __setup_ppi(struct mpam_msc *msc)
1010 {
1011 	int cpu;
1012 
1013 	msc->error_dev_id = alloc_percpu(struct mpam_msc *);
1014 	if (!msc->error_dev_id)
1015 		return -ENOMEM;
1016 
1017 	for_each_cpu(cpu, &msc->accessibility)
1018 		*per_cpu_ptr(msc->error_dev_id, cpu) = msc;
1019 
1020 	return 0;
1021 }
1022 
1023 static int mpam_msc_setup_error_irq(struct mpam_msc *msc)
1024 {
1025 	int irq;
1026 
1027 	irq = platform_get_irq_byname_optional(msc->pdev, "error");
1028 	if (irq <= 0)
1029 		return 0;
1030 
1031 	/* Allocate and initialise the percpu device pointer for PPI */
1032 	if (irq_is_percpu(irq))
1033 		return __setup_ppi(msc);
1034 
1035 	/* sanity check: shared interrupts can be routed anywhere? */
1036 	if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) {
1037 		pr_err_once("msc:%u is a private resource with a shared error interrupt",
1038 			    msc->id);
1039 		return -EINVAL;
1040 	}
1041 
1042 	return 0;
1043 }
1044 
1045 /*
1046  * An MSC can control traffic from a set of CPUs, but may only be accessible
1047  * from a (hopefully wider) set of CPUs. The common reason for this is power
1048  * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the
1049  * corresponding cache may also be powered off. By making accesses from
1050  * one of those CPUs, we ensure we don't access a cache that's powered off.
1051  */
1052 static void update_msc_accessibility(struct mpam_msc *msc)
1053 {
1054 	u32 affinity_id;
1055 	int err;
1056 
1057 	err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity",
1058 				       &affinity_id);
1059 	if (err)
1060 		cpumask_copy(&msc->accessibility, cpu_possible_mask);
1061 	else
1062 		acpi_pptt_get_cpus_from_container(affinity_id, &msc->accessibility);
1063 }
1064 
1065 /*
1066  * There are two ways of reaching a struct mpam_msc_ris. Via the
1067  * class->component->vmsc->ris, or via the msc.
1068  * When destroying the msc, the other side needs unlinking and cleaning up too.
1069  */
1070 static void mpam_msc_destroy(struct mpam_msc *msc)
1071 {
1072 	struct platform_device *pdev = msc->pdev;
1073 	struct mpam_msc_ris *ris, *tmp;
1074 
1075 	lockdep_assert_held(&mpam_list_lock);
1076 
1077 	list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list)
1078 		mpam_ris_destroy(ris);
1079 
1080 	list_del_rcu(&msc->all_msc_list);
1081 	platform_set_drvdata(pdev, NULL);
1082 
1083 	add_to_garbage(msc);
1084 }
1085 
1086 static void mpam_msc_drv_remove(struct platform_device *pdev)
1087 {
1088 	struct mpam_msc *msc = platform_get_drvdata(pdev);
1089 
1090 	mutex_lock(&mpam_list_lock);
1091 	mpam_msc_destroy(msc);
1092 	mutex_unlock(&mpam_list_lock);
1093 
1094 	mpam_free_garbage();
1095 }
1096 
1097 static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev)
1098 {
1099 	int err;
1100 	u32 tmp;
1101 	struct mpam_msc *msc;
1102 	struct resource *msc_res;
1103 	struct device *dev = &pdev->dev;
1104 
1105 	lockdep_assert_held(&mpam_list_lock);
1106 
1107 	msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL);
1108 	if (!msc)
1109 		return ERR_PTR(-ENOMEM);
1110 	init_garbage(&msc->garbage);
1111 	msc->garbage.pdev = pdev;
1112 
1113 	err = devm_mutex_init(dev, &msc->probe_lock);
1114 	if (err)
1115 		return ERR_PTR(err);
1116 
1117 	err = devm_mutex_init(dev, &msc->part_sel_lock);
1118 	if (err)
1119 		return ERR_PTR(err);
1120 
1121 	err = devm_mutex_init(dev, &msc->error_irq_lock);
1122 	if (err)
1123 		return ERR_PTR(err);
1124 	mpam_mon_sel_lock_init(msc);
1125 	msc->id = pdev->id;
1126 	msc->pdev = pdev;
1127 	INIT_LIST_HEAD_RCU(&msc->all_msc_list);
1128 	INIT_LIST_HEAD_RCU(&msc->ris);
1129 
1130 	update_msc_accessibility(msc);
1131 	if (cpumask_empty(&msc->accessibility)) {
1132 		dev_err_once(dev, "MSC is not accessible from any CPU!");
1133 		return ERR_PTR(-EINVAL);
1134 	}
1135 
1136 	err = mpam_msc_setup_error_irq(msc);
1137 	if (err)
1138 		return ERR_PTR(err);
1139 
1140 	if (device_property_read_u32(&pdev->dev, "pcc-channel", &tmp))
1141 		msc->iface = MPAM_IFACE_MMIO;
1142 	else
1143 		msc->iface = MPAM_IFACE_PCC;
1144 
1145 	if (msc->iface == MPAM_IFACE_MMIO) {
1146 		void __iomem *io;
1147 
1148 		io = devm_platform_get_and_ioremap_resource(pdev, 0,
1149 							    &msc_res);
1150 		if (IS_ERR(io)) {
1151 			dev_err_once(dev, "Failed to map MSC base address\n");
1152 			return ERR_CAST(io);
1153 		}
1154 		msc->mapped_hwpage_sz = msc_res->end - msc_res->start;
1155 		msc->mapped_hwpage = io;
1156 	} else {
1157 		return ERR_PTR(-EINVAL);
1158 	}
1159 
1160 	list_add_rcu(&msc->all_msc_list, &mpam_all_msc);
1161 	platform_set_drvdata(pdev, msc);
1162 
1163 	return msc;
1164 }
1165 
1166 static int fw_num_msc;
1167 
1168 static int mpam_msc_drv_probe(struct platform_device *pdev)
1169 {
1170 	int err;
1171 	struct mpam_msc *msc = NULL;
1172 	void *plat_data = pdev->dev.platform_data;
1173 
1174 	mutex_lock(&mpam_list_lock);
1175 	msc = do_mpam_msc_drv_probe(pdev);
1176 	mutex_unlock(&mpam_list_lock);
1177 
1178 	if (IS_ERR(msc))
1179 		return PTR_ERR(msc);
1180 
1181 	/* Create RIS entries described by firmware */
1182 	err = acpi_mpam_parse_resources(msc, plat_data);
1183 	if (err) {
1184 		mpam_msc_drv_remove(pdev);
1185 		return err;
1186 	}
1187 
1188 	if (atomic_add_return(1, &mpam_num_msc) == fw_num_msc)
1189 		mpam_register_cpuhp_callbacks(mpam_discovery_cpu_online, NULL,
1190 					      "mpam:drv_probe");
1191 
1192 	return 0;
1193 }
1194 
1195 static struct platform_driver mpam_msc_driver = {
1196 	.driver = {
1197 		.name = "mpam_msc",
1198 	},
1199 	.probe = mpam_msc_drv_probe,
1200 	.remove = mpam_msc_drv_remove,
1201 };
1202 
1203 /* Any of these features mean the BWA_WD field is valid. */
1204 static bool mpam_has_bwa_wd_feature(struct mpam_props *props)
1205 {
1206 	if (mpam_has_feature(mpam_feat_mbw_min, props))
1207 		return true;
1208 	if (mpam_has_feature(mpam_feat_mbw_max, props))
1209 		return true;
1210 	return false;
1211 }
1212 
1213 #define MISMATCHED_HELPER(parent, child, helper, field, alias)		\
1214 	helper(parent) &&						\
1215 	((helper(child) && (parent)->field != (child)->field) ||	\
1216 	 (!helper(child) && !(alias)))
1217 
1218 #define MISMATCHED_FEAT(parent, child, feat, field, alias)		     \
1219 	mpam_has_feature((feat), (parent)) &&				     \
1220 	((mpam_has_feature((feat), (child)) && (parent)->field != (child)->field) || \
1221 	 (!mpam_has_feature((feat), (child)) && !(alias)))
1222 
1223 #define CAN_MERGE_FEAT(parent, child, feat, alias)			\
1224 	(alias) && !mpam_has_feature((feat), (parent)) &&		\
1225 	mpam_has_feature((feat), (child))
1226 
1227 /*
1228  * Combine two props fields.
1229  * If this is for controls that alias the same resource, it is safe to just
1230  * copy the values over. If two aliasing controls implement the same scheme
1231  * a safe value must be picked.
1232  * For non-aliasing controls, these control different resources, and the
1233  * resulting safe value must be compatible with both. When merging values in
1234  * the tree, all the aliasing resources must be handled first.
1235  * On mismatch, parent is modified.
1236  */
1237 static void __props_mismatch(struct mpam_props *parent,
1238 			     struct mpam_props *child, bool alias)
1239 {
1240 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_cpor_part, alias)) {
1241 		parent->cpbm_wd = child->cpbm_wd;
1242 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_cpor_part,
1243 				   cpbm_wd, alias)) {
1244 		pr_debug("cleared cpor_part\n");
1245 		mpam_clear_feature(mpam_feat_cpor_part, parent);
1246 		parent->cpbm_wd = 0;
1247 	}
1248 
1249 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_mbw_part, alias)) {
1250 		parent->mbw_pbm_bits = child->mbw_pbm_bits;
1251 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_mbw_part,
1252 				   mbw_pbm_bits, alias)) {
1253 		pr_debug("cleared mbw_part\n");
1254 		mpam_clear_feature(mpam_feat_mbw_part, parent);
1255 		parent->mbw_pbm_bits = 0;
1256 	}
1257 
1258 	/* bwa_wd is a count of bits, fewer bits means less precision */
1259 	if (alias && !mpam_has_bwa_wd_feature(parent) &&
1260 	    mpam_has_bwa_wd_feature(child)) {
1261 		parent->bwa_wd = child->bwa_wd;
1262 	} else if (MISMATCHED_HELPER(parent, child, mpam_has_bwa_wd_feature,
1263 				     bwa_wd, alias)) {
1264 		pr_debug("took the min bwa_wd\n");
1265 		parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd);
1266 	}
1267 
1268 	/* For num properties, take the minimum */
1269 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_csu, alias)) {
1270 		parent->num_csu_mon = child->num_csu_mon;
1271 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_csu,
1272 				   num_csu_mon, alias)) {
1273 		pr_debug("took the min num_csu_mon\n");
1274 		parent->num_csu_mon = min(parent->num_csu_mon,
1275 					  child->num_csu_mon);
1276 	}
1277 
1278 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_mbwu, alias)) {
1279 		parent->num_mbwu_mon = child->num_mbwu_mon;
1280 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_mbwu,
1281 				   num_mbwu_mon, alias)) {
1282 		pr_debug("took the min num_mbwu_mon\n");
1283 		parent->num_mbwu_mon = min(parent->num_mbwu_mon,
1284 					   child->num_mbwu_mon);
1285 	}
1286 
1287 	if (alias) {
1288 		/* Merge features for aliased resources */
1289 		bitmap_or(parent->features, parent->features, child->features, MPAM_FEATURE_LAST);
1290 	} else {
1291 		/* Clear missing features for non aliasing */
1292 		bitmap_and(parent->features, parent->features, child->features, MPAM_FEATURE_LAST);
1293 	}
1294 }
1295 
1296 /*
1297  * If a vmsc doesn't match class feature/configuration, do the right thing(tm).
1298  * For 'num' properties we can just take the minimum.
1299  * For properties where the mismatched unused bits would make a difference, we
1300  * nobble the class feature, as we can't configure all the resources.
1301  * e.g. The L3 cache is composed of two resources with 13 and 17 portion
1302  * bitmaps respectively.
1303  */
1304 static void
1305 __class_props_mismatch(struct mpam_class *class, struct mpam_vmsc *vmsc)
1306 {
1307 	struct mpam_props *cprops = &class->props;
1308 	struct mpam_props *vprops = &vmsc->props;
1309 	struct device *dev = &vmsc->msc->pdev->dev;
1310 
1311 	lockdep_assert_held(&mpam_list_lock); /* we modify class */
1312 
1313 	dev_dbg(dev, "Merging features for class:0x%lx &= vmsc:0x%lx\n",
1314 		(long)cprops->features, (long)vprops->features);
1315 
1316 	/* Take the safe value for any common features */
1317 	__props_mismatch(cprops, vprops, false);
1318 }
1319 
1320 static void
1321 __vmsc_props_mismatch(struct mpam_vmsc *vmsc, struct mpam_msc_ris *ris)
1322 {
1323 	struct mpam_props *rprops = &ris->props;
1324 	struct mpam_props *vprops = &vmsc->props;
1325 	struct device *dev = &vmsc->msc->pdev->dev;
1326 
1327 	lockdep_assert_held(&mpam_list_lock); /* we modify vmsc */
1328 
1329 	dev_dbg(dev, "Merging features for vmsc:0x%lx |= ris:0x%lx\n",
1330 		(long)vprops->features, (long)rprops->features);
1331 
1332 	/*
1333 	 * Merge mismatched features - Copy any features that aren't common,
1334 	 * but take the safe value for any common features.
1335 	 */
1336 	__props_mismatch(vprops, rprops, true);
1337 }
1338 
1339 /*
1340  * Copy the first component's first vMSC's properties and features to the
1341  * class. __class_props_mismatch() will remove conflicts.
1342  * It is not possible to have a class with no components, or a component with
1343  * no resources. The vMSC properties have already been built.
1344  */
1345 static void mpam_enable_init_class_features(struct mpam_class *class)
1346 {
1347 	struct mpam_vmsc *vmsc;
1348 	struct mpam_component *comp;
1349 
1350 	comp = list_first_entry(&class->components,
1351 				struct mpam_component, class_list);
1352 	vmsc = list_first_entry(&comp->vmsc,
1353 				struct mpam_vmsc, comp_list);
1354 
1355 	class->props = vmsc->props;
1356 }
1357 
1358 static void mpam_enable_merge_vmsc_features(struct mpam_component *comp)
1359 {
1360 	struct mpam_vmsc *vmsc;
1361 	struct mpam_msc_ris *ris;
1362 	struct mpam_class *class = comp->class;
1363 
1364 	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
1365 		list_for_each_entry(ris, &vmsc->ris, vmsc_list) {
1366 			__vmsc_props_mismatch(vmsc, ris);
1367 			class->nrdy_usec = max(class->nrdy_usec,
1368 					       vmsc->msc->nrdy_usec);
1369 		}
1370 	}
1371 }
1372 
1373 static void mpam_enable_merge_class_features(struct mpam_component *comp)
1374 {
1375 	struct mpam_vmsc *vmsc;
1376 	struct mpam_class *class = comp->class;
1377 
1378 	list_for_each_entry(vmsc, &comp->vmsc, comp_list)
1379 		__class_props_mismatch(class, vmsc);
1380 }
1381 
1382 /*
1383  * Merge all the common resource features into class.
1384  * vmsc features are bitwise-or'd together by mpam_enable_merge_vmsc_features()
1385  * as the first step so that mpam_enable_init_class_features() can initialise
1386  * the class with a representative set of features.
1387  * Next the mpam_enable_merge_class_features() bitwise-and's all the vmsc
1388  * features to form the class features.
1389  * Other features are the min/max as appropriate.
1390  *
1391  * To avoid walking the whole tree twice, the class->nrdy_usec property is
1392  * updated when working with the vmsc as it is a max(), and doesn't need
1393  * initialising first.
1394  */
1395 static void mpam_enable_merge_features(struct list_head *all_classes_list)
1396 {
1397 	struct mpam_class *class;
1398 	struct mpam_component *comp;
1399 
1400 	lockdep_assert_held(&mpam_list_lock);
1401 
1402 	list_for_each_entry(class, all_classes_list, classes_list) {
1403 		list_for_each_entry(comp, &class->components, class_list)
1404 			mpam_enable_merge_vmsc_features(comp);
1405 
1406 		mpam_enable_init_class_features(class);
1407 
1408 		list_for_each_entry(comp, &class->components, class_list)
1409 			mpam_enable_merge_class_features(comp);
1410 	}
1411 }
1412 
1413 static char *mpam_errcode_names[16] = {
1414 	[MPAM_ERRCODE_NONE]			= "No error",
1415 	[MPAM_ERRCODE_PARTID_SEL_RANGE]		= "PARTID_SEL_Range",
1416 	[MPAM_ERRCODE_REQ_PARTID_RANGE]		= "Req_PARTID_Range",
1417 	[MPAM_ERRCODE_MSMONCFG_ID_RANGE]	= "MSMONCFG_ID_RANGE",
1418 	[MPAM_ERRCODE_REQ_PMG_RANGE]		= "Req_PMG_Range",
1419 	[MPAM_ERRCODE_MONITOR_RANGE]		= "Monitor_Range",
1420 	[MPAM_ERRCODE_INTPARTID_RANGE]		= "intPARTID_Range",
1421 	[MPAM_ERRCODE_UNEXPECTED_INTERNAL]	= "Unexpected_INTERNAL",
1422 	[MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL]	= "Undefined_RIS_PART_SEL",
1423 	[MPAM_ERRCODE_RIS_NO_CONTROL]		= "RIS_No_Control",
1424 	[MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL]	= "Undefined_RIS_MON_SEL",
1425 	[MPAM_ERRCODE_RIS_NO_MONITOR]		= "RIS_No_Monitor",
1426 	[12 ... 15] = "Reserved"
1427 };
1428 
1429 static int mpam_enable_msc_ecr(void *_msc)
1430 {
1431 	struct mpam_msc *msc = _msc;
1432 
1433 	__mpam_write_reg(msc, MPAMF_ECR, MPAMF_ECR_INTEN);
1434 
1435 	return 0;
1436 }
1437 
1438 /* This can run in mpam_disable(), and the interrupt handler on the same CPU */
1439 static int mpam_disable_msc_ecr(void *_msc)
1440 {
1441 	struct mpam_msc *msc = _msc;
1442 
1443 	__mpam_write_reg(msc, MPAMF_ECR, 0);
1444 
1445 	return 0;
1446 }
1447 
1448 static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc)
1449 {
1450 	u64 reg;
1451 	u16 partid;
1452 	u8 errcode, pmg, ris;
1453 
1454 	if (WARN_ON_ONCE(!msc) ||
1455 	    WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
1456 					   &msc->accessibility)))
1457 		return IRQ_NONE;
1458 
1459 	reg = mpam_msc_read_esr(msc);
1460 
1461 	errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg);
1462 	if (!errcode)
1463 		return IRQ_NONE;
1464 
1465 	/* Clear level triggered irq */
1466 	mpam_msc_clear_esr(msc);
1467 
1468 	partid = FIELD_GET(MPAMF_ESR_PARTID_MON, reg);
1469 	pmg = FIELD_GET(MPAMF_ESR_PMG, reg);
1470 	ris = FIELD_GET(MPAMF_ESR_RIS, reg);
1471 
1472 	pr_err_ratelimited("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n",
1473 			   msc->id, mpam_errcode_names[errcode], partid, pmg,
1474 			   ris);
1475 
1476 	/* Disable this interrupt. */
1477 	mpam_disable_msc_ecr(msc);
1478 
1479 	/* Are we racing with the thread disabling MPAM? */
1480 	if (!mpam_is_enabled())
1481 		return IRQ_HANDLED;
1482 
1483 	/*
1484 	 * Schedule the teardown work. Don't use a threaded IRQ as we can't
1485 	 * unregister the interrupt from the threaded part of the handler.
1486 	 */
1487 	mpam_disable_reason = "hardware error interrupt";
1488 	schedule_work(&mpam_broken_work);
1489 
1490 	return IRQ_HANDLED;
1491 }
1492 
1493 static irqreturn_t mpam_ppi_handler(int irq, void *dev_id)
1494 {
1495 	struct mpam_msc *msc = *(struct mpam_msc **)dev_id;
1496 
1497 	return __mpam_irq_handler(irq, msc);
1498 }
1499 
1500 static irqreturn_t mpam_spi_handler(int irq, void *dev_id)
1501 {
1502 	struct mpam_msc *msc = dev_id;
1503 
1504 	return __mpam_irq_handler(irq, msc);
1505 }
1506 
1507 static int mpam_register_irqs(void)
1508 {
1509 	int err, irq;
1510 	struct mpam_msc *msc;
1511 
1512 	lockdep_assert_cpus_held();
1513 
1514 	guard(srcu)(&mpam_srcu);
1515 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1516 				 srcu_read_lock_held(&mpam_srcu)) {
1517 		irq = platform_get_irq_byname_optional(msc->pdev, "error");
1518 		if (irq <= 0)
1519 			continue;
1520 
1521 		/* The MPAM spec says the interrupt can be SPI, PPI or LPI */
1522 		/* We anticipate sharing the interrupt with other MSCs */
1523 		if (irq_is_percpu(irq)) {
1524 			err = request_percpu_irq(irq, &mpam_ppi_handler,
1525 						 "mpam:msc:error",
1526 						 msc->error_dev_id);
1527 			if (err)
1528 				return err;
1529 
1530 			msc->reenable_error_ppi = irq;
1531 			smp_call_function_many(&msc->accessibility,
1532 					       &_enable_percpu_irq, &irq,
1533 					       true);
1534 		} else {
1535 			err = devm_request_irq(&msc->pdev->dev, irq,
1536 					       &mpam_spi_handler, IRQF_SHARED,
1537 					       "mpam:msc:error", msc);
1538 			if (err)
1539 				return err;
1540 		}
1541 
1542 		mutex_lock(&msc->error_irq_lock);
1543 		msc->error_irq_req = true;
1544 		mpam_touch_msc(msc, mpam_enable_msc_ecr, msc);
1545 		msc->error_irq_hw_enabled = true;
1546 		mutex_unlock(&msc->error_irq_lock);
1547 	}
1548 
1549 	return 0;
1550 }
1551 
1552 static void mpam_unregister_irqs(void)
1553 {
1554 	int irq;
1555 	struct mpam_msc *msc;
1556 
1557 	guard(cpus_read_lock)();
1558 	guard(srcu)(&mpam_srcu);
1559 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1560 				 srcu_read_lock_held(&mpam_srcu)) {
1561 		irq = platform_get_irq_byname_optional(msc->pdev, "error");
1562 		if (irq <= 0)
1563 			continue;
1564 
1565 		mutex_lock(&msc->error_irq_lock);
1566 		if (msc->error_irq_hw_enabled) {
1567 			mpam_touch_msc(msc, mpam_disable_msc_ecr, msc);
1568 			msc->error_irq_hw_enabled = false;
1569 		}
1570 
1571 		if (msc->error_irq_req) {
1572 			if (irq_is_percpu(irq)) {
1573 				msc->reenable_error_ppi = 0;
1574 				free_percpu_irq(irq, msc->error_dev_id);
1575 			} else {
1576 				devm_free_irq(&msc->pdev->dev, irq, msc);
1577 			}
1578 			msc->error_irq_req = false;
1579 		}
1580 		mutex_unlock(&msc->error_irq_lock);
1581 	}
1582 }
1583 
1584 static void mpam_enable_once(void)
1585 {
1586 	int err;
1587 
1588 	/*
1589 	 * Once the cpuhp callbacks have been changed, mpam_partid_max can no
1590 	 * longer change.
1591 	 */
1592 	spin_lock(&partid_max_lock);
1593 	partid_max_published = true;
1594 	spin_unlock(&partid_max_lock);
1595 
1596 	/*
1597 	 * If all the MSC have been probed, enabling the IRQs happens next.
1598 	 * That involves cross-calling to a CPU that can reach the MSC, and
1599 	 * the locks must be taken in this order:
1600 	 */
1601 	cpus_read_lock();
1602 	mutex_lock(&mpam_list_lock);
1603 	mpam_enable_merge_features(&mpam_classes);
1604 
1605 	err = mpam_register_irqs();
1606 
1607 	mutex_unlock(&mpam_list_lock);
1608 	cpus_read_unlock();
1609 
1610 	if (err) {
1611 		pr_warn("Failed to register irqs: %d\n", err);
1612 		mpam_disable_reason = "Failed to enable.";
1613 		schedule_work(&mpam_broken_work);
1614 		return;
1615 	}
1616 
1617 	static_branch_enable(&mpam_enabled);
1618 	mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline,
1619 				      "mpam:online");
1620 
1621 	/* Use printk() to avoid the pr_fmt adding the function name. */
1622 	printk(KERN_INFO "MPAM enabled with %u PARTIDs and %u PMGs\n",
1623 	       mpam_partid_max + 1, mpam_pmg_max + 1);
1624 }
1625 
1626 static void mpam_reset_component_locked(struct mpam_component *comp)
1627 {
1628 	struct mpam_vmsc *vmsc;
1629 
1630 	lockdep_assert_cpus_held();
1631 
1632 	guard(srcu)(&mpam_srcu);
1633 	list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list,
1634 				 srcu_read_lock_held(&mpam_srcu)) {
1635 		struct mpam_msc *msc = vmsc->msc;
1636 		struct mpam_msc_ris *ris;
1637 
1638 		list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list,
1639 					 srcu_read_lock_held(&mpam_srcu)) {
1640 			if (!ris->in_reset_state)
1641 				mpam_touch_msc(msc, mpam_reset_ris, ris);
1642 			ris->in_reset_state = true;
1643 		}
1644 	}
1645 }
1646 
1647 static void mpam_reset_class_locked(struct mpam_class *class)
1648 {
1649 	struct mpam_component *comp;
1650 
1651 	lockdep_assert_cpus_held();
1652 
1653 	guard(srcu)(&mpam_srcu);
1654 	list_for_each_entry_srcu(comp, &class->components, class_list,
1655 				 srcu_read_lock_held(&mpam_srcu))
1656 		mpam_reset_component_locked(comp);
1657 }
1658 
1659 static void mpam_reset_class(struct mpam_class *class)
1660 {
1661 	cpus_read_lock();
1662 	mpam_reset_class_locked(class);
1663 	cpus_read_unlock();
1664 }
1665 
1666 /*
1667  * Called in response to an error IRQ.
1668  * All of MPAMs errors indicate a software bug, restore any modified
1669  * controls to their reset values.
1670  */
1671 void mpam_disable(struct work_struct *ignored)
1672 {
1673 	int idx;
1674 	struct mpam_class *class;
1675 	struct mpam_msc *msc, *tmp;
1676 
1677 	mutex_lock(&mpam_cpuhp_state_lock);
1678 	if (mpam_cpuhp_state) {
1679 		cpuhp_remove_state(mpam_cpuhp_state);
1680 		mpam_cpuhp_state = 0;
1681 	}
1682 	mutex_unlock(&mpam_cpuhp_state_lock);
1683 
1684 	static_branch_disable(&mpam_enabled);
1685 
1686 	mpam_unregister_irqs();
1687 
1688 	idx = srcu_read_lock(&mpam_srcu);
1689 	list_for_each_entry_srcu(class, &mpam_classes, classes_list,
1690 				 srcu_read_lock_held(&mpam_srcu))
1691 		mpam_reset_class(class);
1692 	srcu_read_unlock(&mpam_srcu, idx);
1693 
1694 	mutex_lock(&mpam_list_lock);
1695 	list_for_each_entry_safe(msc, tmp, &mpam_all_msc, all_msc_list)
1696 		mpam_msc_destroy(msc);
1697 	mutex_unlock(&mpam_list_lock);
1698 	mpam_free_garbage();
1699 
1700 	pr_err_once("MPAM disabled due to %s\n", mpam_disable_reason);
1701 }
1702 
1703 /*
1704  * Enable mpam once all devices have been probed.
1705  * Scheduled by mpam_discovery_cpu_online() once all devices have been created.
1706  * Also scheduled when new devices are probed when new CPUs come online.
1707  */
1708 void mpam_enable(struct work_struct *work)
1709 {
1710 	static atomic_t once;
1711 	struct mpam_msc *msc;
1712 	bool all_devices_probed = true;
1713 
1714 	/* Have we probed all the hw devices? */
1715 	guard(srcu)(&mpam_srcu);
1716 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1717 				 srcu_read_lock_held(&mpam_srcu)) {
1718 		mutex_lock(&msc->probe_lock);
1719 		if (!msc->probed)
1720 			all_devices_probed = false;
1721 		mutex_unlock(&msc->probe_lock);
1722 
1723 		if (!all_devices_probed)
1724 			break;
1725 	}
1726 
1727 	if (all_devices_probed && !atomic_fetch_inc(&once))
1728 		mpam_enable_once();
1729 }
1730 
1731 static int __init mpam_msc_driver_init(void)
1732 {
1733 	if (!system_supports_mpam())
1734 		return -EOPNOTSUPP;
1735 
1736 	init_srcu_struct(&mpam_srcu);
1737 
1738 	fw_num_msc = acpi_mpam_count_msc();
1739 	if (fw_num_msc <= 0) {
1740 		pr_err("No MSC devices found in firmware\n");
1741 		return -EINVAL;
1742 	}
1743 
1744 	return platform_driver_register(&mpam_msc_driver);
1745 }
1746 
1747 /* Must occur after arm64_mpam_register_cpus() from arch_initcall() */
1748 subsys_initcall(mpam_msc_driver_init);
1749