xref: /linux/drivers/resctrl/mpam_devices.c (revision 49aa621c4dcaf8e3cfeb9e73d07a9746b889f9e8)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2025 Arm Ltd.
3 
4 #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
5 
6 #include <linux/acpi.h>
7 #include <linux/atomic.h>
8 #include <linux/arm_mpam.h>
9 #include <linux/bitfield.h>
10 #include <linux/bitmap.h>
11 #include <linux/cacheinfo.h>
12 #include <linux/cpu.h>
13 #include <linux/cpumask.h>
14 #include <linux/device.h>
15 #include <linux/errno.h>
16 #include <linux/gfp.h>
17 #include <linux/interrupt.h>
18 #include <linux/irq.h>
19 #include <linux/irqdesc.h>
20 #include <linux/list.h>
21 #include <linux/lockdep.h>
22 #include <linux/mutex.h>
23 #include <linux/platform_device.h>
24 #include <linux/printk.h>
25 #include <linux/srcu.h>
26 #include <linux/spinlock.h>
27 #include <linux/types.h>
28 #include <linux/workqueue.h>
29 
30 #include "mpam_internal.h"
31 
32 /*
33  * mpam_list_lock protects the SRCU lists when writing. Once the
34  * mpam_enabled key is enabled these lists are read-only,
35  * unless the error interrupt disables the driver.
36  */
37 static DEFINE_MUTEX(mpam_list_lock);
38 static LIST_HEAD(mpam_all_msc);
39 
40 struct srcu_struct mpam_srcu;
41 
42 /*
43  * Number of MSCs that have been probed. Once all MSCs have been probed MPAM
44  * can be enabled.
45  */
46 static atomic_t mpam_num_msc;
47 
48 static int mpam_cpuhp_state;
49 static DEFINE_MUTEX(mpam_cpuhp_state_lock);
50 
51 /*
52  * The smallest common values for any CPU or MSC in the system.
53  * Generating traffic outside this range will result in screaming interrupts.
54  */
55 u16 mpam_partid_max;
56 u8 mpam_pmg_max;
57 static bool partid_max_init, partid_max_published;
58 static DEFINE_SPINLOCK(partid_max_lock);
59 
60 /*
61  * mpam is enabled once all devices have been probed from CPU online callbacks,
62  * scheduled via this work_struct. If access to an MSC depends on a CPU that
63  * was not brought online at boot, this can happen surprisingly late.
64  */
65 static DECLARE_WORK(mpam_enable_work, &mpam_enable);
66 
67 /*
68  * All mpam error interrupts indicate a software bug. On receipt, disable the
69  * driver.
70  */
71 static DECLARE_WORK(mpam_broken_work, &mpam_disable);
72 
73 /* When mpam is disabled, the printed reason to aid debugging */
74 static char *mpam_disable_reason;
75 
76 /*
77  * An MSC is a physical container for controls and monitors, each identified by
78  * their RIS index. These share a base-address, interrupts and some MMIO
79  * registers. A vMSC is a virtual container for RIS in an MSC that control or
80  * monitor the same thing. Members of a vMSC are all RIS in the same MSC, but
81  * not all RIS in an MSC share a vMSC.
82  *
83  * Components are a group of vMSC that control or monitor the same thing but
84  * are from different MSC, so have different base-address, interrupts etc.
85  * Classes are the set components of the same type.
86  *
87  * The features of a vMSC is the union of the RIS it contains.
88  * The features of a Class and Component are the common subset of the vMSC
89  * they contain.
90  *
91  * e.g. The system cache may have bandwidth controls on multiple interfaces,
92  * for regulating traffic from devices independently of traffic from CPUs.
93  * If these are two RIS in one MSC, they will be treated as controlling
94  * different things, and will not share a vMSC/component/class.
95  *
96  * e.g. The L2 may have one MSC and two RIS, one for cache-controls another
97  * for bandwidth. These two RIS are members of the same vMSC.
98  *
99  * e.g. The set of RIS that make up the L2 are grouped as a component. These
100  * are sometimes termed slices. They should be configured the same, as if there
101  * were only one.
102  *
103  * e.g. The SoC probably has more than one L2, each attached to a distinct set
104  * of CPUs. All the L2 components are grouped as a class.
105  *
106  * When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list,
107  * then linked via struct mpam_ris to a vmsc, component and class.
108  * The same MSC may exist under different class->component->vmsc paths, but the
109  * RIS index will be unique.
110  */
111 LIST_HEAD(mpam_classes);
112 
113 /* List of all objects that can be free()d after synchronise_srcu() */
114 static LLIST_HEAD(mpam_garbage);
115 
116 static inline void init_garbage(struct mpam_garbage *garbage)
117 {
118 	init_llist_node(&garbage->llist);
119 }
120 
121 #define add_to_garbage(x)				\
122 do {							\
123 	__typeof__(x) _x = (x);				\
124 	_x->garbage.to_free = _x;			\
125 	llist_add(&_x->garbage.llist, &mpam_garbage);	\
126 } while (0)
127 
128 static void mpam_free_garbage(void)
129 {
130 	struct mpam_garbage *iter, *tmp;
131 	struct llist_node *to_free = llist_del_all(&mpam_garbage);
132 
133 	if (!to_free)
134 		return;
135 
136 	synchronize_srcu(&mpam_srcu);
137 
138 	llist_for_each_entry_safe(iter, tmp, to_free, llist) {
139 		if (iter->pdev)
140 			devm_kfree(&iter->pdev->dev, iter->to_free);
141 		else
142 			kfree(iter->to_free);
143 	}
144 }
145 
146 static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg)
147 {
148 	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
149 
150 	return readl_relaxed(msc->mapped_hwpage + reg);
151 }
152 
153 static inline u32 _mpam_read_partsel_reg(struct mpam_msc *msc, u16 reg)
154 {
155 	lockdep_assert_held_once(&msc->part_sel_lock);
156 	return __mpam_read_reg(msc, reg);
157 }
158 
159 #define mpam_read_partsel_reg(msc, reg) _mpam_read_partsel_reg(msc, MPAMF_##reg)
160 
161 static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val)
162 {
163 	WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz);
164 	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
165 
166 	writel_relaxed(val, msc->mapped_hwpage + reg);
167 }
168 
169 static inline void _mpam_write_partsel_reg(struct mpam_msc *msc, u16 reg, u32 val)
170 {
171 	lockdep_assert_held_once(&msc->part_sel_lock);
172 	__mpam_write_reg(msc, reg, val);
173 }
174 
175 #define mpam_write_partsel_reg(msc, reg, val)  _mpam_write_partsel_reg(msc, MPAMCFG_##reg, val)
176 
177 static inline u32 _mpam_read_monsel_reg(struct mpam_msc *msc, u16 reg)
178 {
179 	mpam_mon_sel_lock_held(msc);
180 	return __mpam_read_reg(msc, reg);
181 }
182 
183 #define mpam_read_monsel_reg(msc, reg) _mpam_read_monsel_reg(msc, MSMON_##reg)
184 
185 static inline void _mpam_write_monsel_reg(struct mpam_msc *msc, u16 reg, u32 val)
186 {
187 	mpam_mon_sel_lock_held(msc);
188 	__mpam_write_reg(msc, reg, val);
189 }
190 
191 #define mpam_write_monsel_reg(msc, reg, val)   _mpam_write_monsel_reg(msc, MSMON_##reg, val)
192 
193 static u64 mpam_msc_read_idr(struct mpam_msc *msc)
194 {
195 	u64 idr_high = 0, idr_low;
196 
197 	lockdep_assert_held(&msc->part_sel_lock);
198 
199 	idr_low = mpam_read_partsel_reg(msc, IDR);
200 	if (FIELD_GET(MPAMF_IDR_EXT, idr_low))
201 		idr_high = mpam_read_partsel_reg(msc, IDR + 4);
202 
203 	return (idr_high << 32) | idr_low;
204 }
205 
206 static void mpam_msc_clear_esr(struct mpam_msc *msc)
207 {
208 	u64 esr_low = __mpam_read_reg(msc, MPAMF_ESR);
209 
210 	if (!esr_low)
211 		return;
212 
213 	/*
214 	 * Clearing the high/low bits of MPAMF_ESR can not be atomic.
215 	 * Clear the top half first, so that the pending error bits in the
216 	 * lower half prevent hardware from updating either half of the
217 	 * register.
218 	 */
219 	if (msc->has_extd_esr)
220 		__mpam_write_reg(msc, MPAMF_ESR + 4, 0);
221 	__mpam_write_reg(msc, MPAMF_ESR, 0);
222 }
223 
224 static u64 mpam_msc_read_esr(struct mpam_msc *msc)
225 {
226 	u64 esr_high = 0, esr_low;
227 
228 	esr_low = __mpam_read_reg(msc, MPAMF_ESR);
229 	if (msc->has_extd_esr)
230 		esr_high = __mpam_read_reg(msc, MPAMF_ESR + 4);
231 
232 	return (esr_high << 32) | esr_low;
233 }
234 
235 static void __mpam_part_sel_raw(u32 partsel, struct mpam_msc *msc)
236 {
237 	lockdep_assert_held(&msc->part_sel_lock);
238 
239 	mpam_write_partsel_reg(msc, PART_SEL, partsel);
240 }
241 
242 static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc)
243 {
244 	u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) |
245 		      FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, partid);
246 
247 	__mpam_part_sel_raw(partsel, msc);
248 }
249 
250 int mpam_register_requestor(u16 partid_max, u8 pmg_max)
251 {
252 	guard(spinlock)(&partid_max_lock);
253 	if (!partid_max_init) {
254 		mpam_partid_max = partid_max;
255 		mpam_pmg_max = pmg_max;
256 		partid_max_init = true;
257 	} else if (!partid_max_published) {
258 		mpam_partid_max = min(mpam_partid_max, partid_max);
259 		mpam_pmg_max = min(mpam_pmg_max, pmg_max);
260 	} else {
261 		/* New requestors can't lower the values */
262 		if (partid_max < mpam_partid_max || pmg_max < mpam_pmg_max)
263 			return -EBUSY;
264 	}
265 
266 	return 0;
267 }
268 EXPORT_SYMBOL(mpam_register_requestor);
269 
270 static struct mpam_class *
271 mpam_class_alloc(u8 level_idx, enum mpam_class_types type)
272 {
273 	struct mpam_class *class;
274 
275 	lockdep_assert_held(&mpam_list_lock);
276 
277 	class = kzalloc(sizeof(*class), GFP_KERNEL);
278 	if (!class)
279 		return ERR_PTR(-ENOMEM);
280 	init_garbage(&class->garbage);
281 
282 	INIT_LIST_HEAD_RCU(&class->components);
283 	/* Affinity is updated when ris are added */
284 	class->level = level_idx;
285 	class->type = type;
286 	INIT_LIST_HEAD_RCU(&class->classes_list);
287 
288 	list_add_rcu(&class->classes_list, &mpam_classes);
289 
290 	return class;
291 }
292 
293 static void mpam_class_destroy(struct mpam_class *class)
294 {
295 	lockdep_assert_held(&mpam_list_lock);
296 
297 	list_del_rcu(&class->classes_list);
298 	add_to_garbage(class);
299 }
300 
301 static struct mpam_class *
302 mpam_class_find(u8 level_idx, enum mpam_class_types type)
303 {
304 	struct mpam_class *class;
305 
306 	lockdep_assert_held(&mpam_list_lock);
307 
308 	list_for_each_entry(class, &mpam_classes, classes_list) {
309 		if (class->type == type && class->level == level_idx)
310 			return class;
311 	}
312 
313 	return mpam_class_alloc(level_idx, type);
314 }
315 
316 static struct mpam_component *
317 mpam_component_alloc(struct mpam_class *class, int id)
318 {
319 	struct mpam_component *comp;
320 
321 	lockdep_assert_held(&mpam_list_lock);
322 
323 	comp = kzalloc(sizeof(*comp), GFP_KERNEL);
324 	if (!comp)
325 		return ERR_PTR(-ENOMEM);
326 	init_garbage(&comp->garbage);
327 
328 	comp->comp_id = id;
329 	INIT_LIST_HEAD_RCU(&comp->vmsc);
330 	/* Affinity is updated when RIS are added */
331 	INIT_LIST_HEAD_RCU(&comp->class_list);
332 	comp->class = class;
333 
334 	list_add_rcu(&comp->class_list, &class->components);
335 
336 	return comp;
337 }
338 
339 static void mpam_component_destroy(struct mpam_component *comp)
340 {
341 	struct mpam_class *class = comp->class;
342 
343 	lockdep_assert_held(&mpam_list_lock);
344 
345 	list_del_rcu(&comp->class_list);
346 	add_to_garbage(comp);
347 
348 	if (list_empty(&class->components))
349 		mpam_class_destroy(class);
350 }
351 
352 static struct mpam_component *
353 mpam_component_find(struct mpam_class *class, int id)
354 {
355 	struct mpam_component *comp;
356 
357 	lockdep_assert_held(&mpam_list_lock);
358 
359 	list_for_each_entry(comp, &class->components, class_list) {
360 		if (comp->comp_id == id)
361 			return comp;
362 	}
363 
364 	return mpam_component_alloc(class, id);
365 }
366 
367 static struct mpam_vmsc *
368 mpam_vmsc_alloc(struct mpam_component *comp, struct mpam_msc *msc)
369 {
370 	struct mpam_vmsc *vmsc;
371 
372 	lockdep_assert_held(&mpam_list_lock);
373 
374 	vmsc = kzalloc(sizeof(*vmsc), GFP_KERNEL);
375 	if (!vmsc)
376 		return ERR_PTR(-ENOMEM);
377 	init_garbage(&vmsc->garbage);
378 
379 	INIT_LIST_HEAD_RCU(&vmsc->ris);
380 	INIT_LIST_HEAD_RCU(&vmsc->comp_list);
381 	vmsc->comp = comp;
382 	vmsc->msc = msc;
383 
384 	list_add_rcu(&vmsc->comp_list, &comp->vmsc);
385 
386 	return vmsc;
387 }
388 
389 static void mpam_vmsc_destroy(struct mpam_vmsc *vmsc)
390 {
391 	struct mpam_component *comp = vmsc->comp;
392 
393 	lockdep_assert_held(&mpam_list_lock);
394 
395 	list_del_rcu(&vmsc->comp_list);
396 	add_to_garbage(vmsc);
397 
398 	if (list_empty(&comp->vmsc))
399 		mpam_component_destroy(comp);
400 }
401 
402 static struct mpam_vmsc *
403 mpam_vmsc_find(struct mpam_component *comp, struct mpam_msc *msc)
404 {
405 	struct mpam_vmsc *vmsc;
406 
407 	lockdep_assert_held(&mpam_list_lock);
408 
409 	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
410 		if (vmsc->msc->id == msc->id)
411 			return vmsc;
412 	}
413 
414 	return mpam_vmsc_alloc(comp, msc);
415 }
416 
417 /*
418  * The cacheinfo structures are only populated when CPUs are online.
419  * This helper walks the acpi tables to include offline CPUs too.
420  */
421 int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level,
422 				   cpumask_t *affinity)
423 {
424 	return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity);
425 }
426 
427 /*
428  * cpumask_of_node() only knows about online CPUs. This can't tell us whether
429  * a class is represented on all possible CPUs.
430  */
431 static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity)
432 {
433 	int cpu;
434 
435 	for_each_possible_cpu(cpu) {
436 		if (node_id == cpu_to_node(cpu))
437 			cpumask_set_cpu(cpu, affinity);
438 	}
439 }
440 
441 static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity,
442 				 enum mpam_class_types type,
443 				 struct mpam_class *class,
444 				 struct mpam_component *comp)
445 {
446 	int err;
447 
448 	switch (type) {
449 	case MPAM_CLASS_CACHE:
450 		err = mpam_get_cpumask_from_cache_id(comp->comp_id, class->level,
451 						     affinity);
452 		if (err) {
453 			dev_warn_once(&msc->pdev->dev,
454 				      "Failed to determine CPU affinity\n");
455 			return err;
456 		}
457 
458 		if (cpumask_empty(affinity))
459 			dev_warn_once(&msc->pdev->dev, "no CPUs associated with cache node\n");
460 
461 		break;
462 	case MPAM_CLASS_MEMORY:
463 		get_cpumask_from_node_id(comp->comp_id, affinity);
464 		/* affinity may be empty for CPU-less memory nodes */
465 		break;
466 	case MPAM_CLASS_UNKNOWN:
467 		return 0;
468 	}
469 
470 	cpumask_and(affinity, affinity, &msc->accessibility);
471 
472 	return 0;
473 }
474 
475 static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx,
476 				  enum mpam_class_types type, u8 class_id,
477 				  int component_id)
478 {
479 	int err;
480 	struct mpam_vmsc *vmsc;
481 	struct mpam_msc_ris *ris;
482 	struct mpam_class *class;
483 	struct mpam_component *comp;
484 	struct platform_device *pdev = msc->pdev;
485 
486 	lockdep_assert_held(&mpam_list_lock);
487 
488 	if (ris_idx > MPAM_MSC_MAX_NUM_RIS)
489 		return -EINVAL;
490 
491 	if (test_and_set_bit(ris_idx, &msc->ris_idxs))
492 		return -EBUSY;
493 
494 	ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), GFP_KERNEL);
495 	if (!ris)
496 		return -ENOMEM;
497 	init_garbage(&ris->garbage);
498 	ris->garbage.pdev = pdev;
499 
500 	class = mpam_class_find(class_id, type);
501 	if (IS_ERR(class))
502 		return PTR_ERR(class);
503 
504 	comp = mpam_component_find(class, component_id);
505 	if (IS_ERR(comp)) {
506 		if (list_empty(&class->components))
507 			mpam_class_destroy(class);
508 		return PTR_ERR(comp);
509 	}
510 
511 	vmsc = mpam_vmsc_find(comp, msc);
512 	if (IS_ERR(vmsc)) {
513 		if (list_empty(&comp->vmsc))
514 			mpam_component_destroy(comp);
515 		return PTR_ERR(vmsc);
516 	}
517 
518 	err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp);
519 	if (err) {
520 		if (list_empty(&vmsc->ris))
521 			mpam_vmsc_destroy(vmsc);
522 		return err;
523 	}
524 
525 	ris->ris_idx = ris_idx;
526 	INIT_LIST_HEAD_RCU(&ris->msc_list);
527 	INIT_LIST_HEAD_RCU(&ris->vmsc_list);
528 	ris->vmsc = vmsc;
529 
530 	cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity);
531 	cpumask_or(&class->affinity, &class->affinity, &ris->affinity);
532 	list_add_rcu(&ris->vmsc_list, &vmsc->ris);
533 	list_add_rcu(&ris->msc_list, &msc->ris);
534 
535 	return 0;
536 }
537 
538 static void mpam_ris_destroy(struct mpam_msc_ris *ris)
539 {
540 	struct mpam_vmsc *vmsc = ris->vmsc;
541 	struct mpam_msc *msc = vmsc->msc;
542 	struct mpam_component *comp = vmsc->comp;
543 	struct mpam_class *class = comp->class;
544 
545 	lockdep_assert_held(&mpam_list_lock);
546 
547 	/*
548 	 * It is assumed affinities don't overlap. If they do the class becomes
549 	 * unusable immediately.
550 	 */
551 	cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity);
552 	cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity);
553 	clear_bit(ris->ris_idx, &msc->ris_idxs);
554 	list_del_rcu(&ris->msc_list);
555 	list_del_rcu(&ris->vmsc_list);
556 	add_to_garbage(ris);
557 
558 	if (list_empty(&vmsc->ris))
559 		mpam_vmsc_destroy(vmsc);
560 }
561 
562 int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
563 		    enum mpam_class_types type, u8 class_id, int component_id)
564 {
565 	int err;
566 
567 	mutex_lock(&mpam_list_lock);
568 	err = mpam_ris_create_locked(msc, ris_idx, type, class_id,
569 				     component_id);
570 	mutex_unlock(&mpam_list_lock);
571 	if (err)
572 		mpam_free_garbage();
573 
574 	return err;
575 }
576 
577 static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc,
578 						   u8 ris_idx)
579 {
580 	int err;
581 	struct mpam_msc_ris *ris;
582 
583 	lockdep_assert_held(&mpam_list_lock);
584 
585 	if (!test_bit(ris_idx, &msc->ris_idxs)) {
586 		err = mpam_ris_create_locked(msc, ris_idx, MPAM_CLASS_UNKNOWN,
587 					     0, 0);
588 		if (err)
589 			return ERR_PTR(err);
590 	}
591 
592 	list_for_each_entry(ris, &msc->ris, msc_list) {
593 		if (ris->ris_idx == ris_idx)
594 			return ris;
595 	}
596 
597 	return ERR_PTR(-ENOENT);
598 }
599 
600 /*
601  * IHI009A.a has this nugget: "If a monitor does not support automatic behaviour
602  * of NRDY, software can use this bit for any purpose" - so hardware might not
603  * implement this - but it isn't RES0.
604  *
605  * Try and see what values stick in this bit. If we can write either value,
606  * its probably not implemented by hardware.
607  */
608 static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg)
609 {
610 	u32 now;
611 	u64 mon_sel;
612 	bool can_set, can_clear;
613 	struct mpam_msc *msc = ris->vmsc->msc;
614 
615 	if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc)))
616 		return false;
617 
618 	mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) |
619 		  FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
620 	_mpam_write_monsel_reg(msc, mon_reg, mon_sel);
621 
622 	_mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY);
623 	now = _mpam_read_monsel_reg(msc, mon_reg);
624 	can_set = now & MSMON___NRDY;
625 
626 	_mpam_write_monsel_reg(msc, mon_reg, 0);
627 	now = _mpam_read_monsel_reg(msc, mon_reg);
628 	can_clear = !(now & MSMON___NRDY);
629 	mpam_mon_sel_unlock(msc);
630 
631 	return (!can_set || !can_clear);
632 }
633 
634 #define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg)			\
635 	_mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg)
636 
637 static void mpam_ris_hw_probe(struct mpam_msc_ris *ris)
638 {
639 	int err;
640 	struct mpam_msc *msc = ris->vmsc->msc;
641 	struct device *dev = &msc->pdev->dev;
642 	struct mpam_props *props = &ris->props;
643 
644 	lockdep_assert_held(&msc->probe_lock);
645 	lockdep_assert_held(&msc->part_sel_lock);
646 
647 	/* Cache Portion partitioning */
648 	if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) {
649 		u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR);
650 
651 		props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, cpor_features);
652 		if (props->cpbm_wd)
653 			mpam_set_feature(mpam_feat_cpor_part, props);
654 	}
655 
656 	/* Memory bandwidth partitioning */
657 	if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) {
658 		u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR);
659 
660 		/* portion bitmap resolution */
661 		props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features);
662 		if (props->mbw_pbm_bits &&
663 		    FIELD_GET(MPAMF_MBW_IDR_HAS_PBM, mbw_features))
664 			mpam_set_feature(mpam_feat_mbw_part, props);
665 
666 		props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features);
667 		if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features))
668 			mpam_set_feature(mpam_feat_mbw_max, props);
669 	}
670 
671 	/* Performance Monitoring */
672 	if (FIELD_GET(MPAMF_IDR_HAS_MSMON, ris->idr)) {
673 		u32 msmon_features = mpam_read_partsel_reg(msc, MSMON_IDR);
674 
675 		/*
676 		 * If the firmware max-nrdy-us property is missing, the
677 		 * CSU counters can't be used. Should we wait forever?
678 		 */
679 		err = device_property_read_u32(&msc->pdev->dev,
680 					       "arm,not-ready-us",
681 					       &msc->nrdy_usec);
682 
683 		if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_CSU, msmon_features)) {
684 			u32 csumonidr;
685 
686 			csumonidr = mpam_read_partsel_reg(msc, CSUMON_IDR);
687 			props->num_csu_mon = FIELD_GET(MPAMF_CSUMON_IDR_NUM_MON, csumonidr);
688 			if (props->num_csu_mon) {
689 				bool hw_managed;
690 
691 				mpam_set_feature(mpam_feat_msmon_csu, props);
692 
693 				/* Is NRDY hardware managed? */
694 				hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU);
695 				if (hw_managed)
696 					mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props);
697 			}
698 
699 			/*
700 			 * Accept the missing firmware property if NRDY appears
701 			 * un-implemented.
702 			 */
703 			if (err && mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, props))
704 				dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware.");
705 		}
706 		if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) {
707 			bool hw_managed;
708 			u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR);
709 
710 			props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr);
711 			if (props->num_mbwu_mon)
712 				mpam_set_feature(mpam_feat_msmon_mbwu, props);
713 
714 			/* Is NRDY hardware managed? */
715 			hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU);
716 			if (hw_managed)
717 				mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props);
718 
719 			/*
720 			 * Don't warn about any missing firmware property for
721 			 * MBWU NRDY - it doesn't make any sense!
722 			 */
723 		}
724 	}
725 }
726 
727 static int mpam_msc_hw_probe(struct mpam_msc *msc)
728 {
729 	u64 idr;
730 	u16 partid_max;
731 	u8 ris_idx, pmg_max;
732 	struct mpam_msc_ris *ris;
733 	struct device *dev = &msc->pdev->dev;
734 
735 	lockdep_assert_held(&msc->probe_lock);
736 
737 	idr = __mpam_read_reg(msc, MPAMF_AIDR);
738 	if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) {
739 		dev_err_once(dev, "MSC does not match MPAM architecture v1.x\n");
740 		return -EIO;
741 	}
742 
743 	/* Grab an IDR value to find out how many RIS there are */
744 	mutex_lock(&msc->part_sel_lock);
745 	idr = mpam_msc_read_idr(msc);
746 	mutex_unlock(&msc->part_sel_lock);
747 
748 	msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr);
749 
750 	/* Use these values so partid/pmg always starts with a valid value */
751 	msc->partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
752 	msc->pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
753 
754 	for (ris_idx = 0; ris_idx <= msc->ris_max; ris_idx++) {
755 		mutex_lock(&msc->part_sel_lock);
756 		__mpam_part_sel(ris_idx, 0, msc);
757 		idr = mpam_msc_read_idr(msc);
758 		mutex_unlock(&msc->part_sel_lock);
759 
760 		partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
761 		pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
762 		msc->partid_max = min(msc->partid_max, partid_max);
763 		msc->pmg_max = min(msc->pmg_max, pmg_max);
764 		msc->has_extd_esr = FIELD_GET(MPAMF_IDR_HAS_EXTD_ESR, idr);
765 
766 		mutex_lock(&mpam_list_lock);
767 		ris = mpam_get_or_create_ris(msc, ris_idx);
768 		mutex_unlock(&mpam_list_lock);
769 		if (IS_ERR(ris))
770 			return PTR_ERR(ris);
771 		ris->idr = idr;
772 
773 		mutex_lock(&msc->part_sel_lock);
774 		__mpam_part_sel(ris_idx, 0, msc);
775 		mpam_ris_hw_probe(ris);
776 		mutex_unlock(&msc->part_sel_lock);
777 	}
778 
779 	/* Clear any stale errors */
780 	mpam_msc_clear_esr(msc);
781 
782 	spin_lock(&partid_max_lock);
783 	mpam_partid_max = min(mpam_partid_max, msc->partid_max);
784 	mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max);
785 	spin_unlock(&partid_max_lock);
786 
787 	msc->probed = true;
788 
789 	return 0;
790 }
791 
792 static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd)
793 {
794 	u32 num_words, msb;
795 	u32 bm = ~0;
796 	int i;
797 
798 	lockdep_assert_held(&msc->part_sel_lock);
799 
800 	if (wd == 0)
801 		return;
802 
803 	/*
804 	 * Write all ~0 to all but the last 32bit-word, which may
805 	 * have fewer bits...
806 	 */
807 	num_words = DIV_ROUND_UP(wd, 32);
808 	for (i = 0; i < num_words - 1; i++, reg += sizeof(bm))
809 		__mpam_write_reg(msc, reg, bm);
810 
811 	/*
812 	 * ....and then the last (maybe) partial 32bit word. When wd is a
813 	 * multiple of 32, msb should be 31 to write a full 32bit word.
814 	 */
815 	msb = (wd - 1) % 32;
816 	bm = GENMASK(msb, 0);
817 	__mpam_write_reg(msc, reg, bm);
818 }
819 
820 static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid)
821 {
822 	struct mpam_msc *msc = ris->vmsc->msc;
823 	struct mpam_props *rprops = &ris->props;
824 
825 	WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu)));
826 
827 	mutex_lock(&msc->part_sel_lock);
828 	__mpam_part_sel(ris->ris_idx, partid, msc);
829 
830 	if (mpam_has_feature(mpam_feat_cpor_part, rprops))
831 		mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd);
832 
833 	if (mpam_has_feature(mpam_feat_mbw_part, rprops))
834 		mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits);
835 
836 	if (mpam_has_feature(mpam_feat_mbw_min, rprops))
837 		mpam_write_partsel_reg(msc, MBW_MIN, 0);
838 
839 	if (mpam_has_feature(mpam_feat_mbw_max, rprops))
840 		mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX);
841 
842 	mutex_unlock(&msc->part_sel_lock);
843 }
844 
845 /*
846  * Called via smp_call_on_cpu() to prevent migration, while still being
847  * pre-emptible. Caller must hold mpam_srcu.
848  */
849 static int mpam_reset_ris(void *arg)
850 {
851 	u16 partid, partid_max;
852 	struct mpam_msc_ris *ris = arg;
853 
854 	if (ris->in_reset_state)
855 		return 0;
856 
857 	spin_lock(&partid_max_lock);
858 	partid_max = mpam_partid_max;
859 	spin_unlock(&partid_max_lock);
860 	for (partid = 0; partid <= partid_max; partid++)
861 		mpam_reset_ris_partid(ris, partid);
862 
863 	return 0;
864 }
865 
866 /*
867  * Get the preferred CPU for this MSC. If it is accessible from this CPU,
868  * this CPU is preferred. This can be preempted/migrated, it will only result
869  * in more work.
870  */
871 static int mpam_get_msc_preferred_cpu(struct mpam_msc *msc)
872 {
873 	int cpu = raw_smp_processor_id();
874 
875 	if (cpumask_test_cpu(cpu, &msc->accessibility))
876 		return cpu;
877 
878 	return cpumask_first_and(&msc->accessibility, cpu_online_mask);
879 }
880 
881 static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg)
882 {
883 	lockdep_assert_irqs_enabled();
884 	lockdep_assert_cpus_held();
885 	WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu)));
886 
887 	return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true);
888 }
889 
890 static void mpam_reset_msc(struct mpam_msc *msc, bool online)
891 {
892 	struct mpam_msc_ris *ris;
893 
894 	list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) {
895 		mpam_touch_msc(msc, &mpam_reset_ris, ris);
896 
897 		/*
898 		 * Set in_reset_state when coming online. The reset state
899 		 * for non-zero partid may be lost while the CPUs are offline.
900 		 */
901 		ris->in_reset_state = online;
902 	}
903 }
904 
905 static void _enable_percpu_irq(void *_irq)
906 {
907 	int *irq = _irq;
908 
909 	enable_percpu_irq(*irq, IRQ_TYPE_NONE);
910 }
911 
912 static int mpam_cpu_online(unsigned int cpu)
913 {
914 	struct mpam_msc *msc;
915 
916 	guard(srcu)(&mpam_srcu);
917 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
918 				 srcu_read_lock_held(&mpam_srcu)) {
919 		if (!cpumask_test_cpu(cpu, &msc->accessibility))
920 			continue;
921 
922 		if (msc->reenable_error_ppi)
923 			_enable_percpu_irq(&msc->reenable_error_ppi);
924 
925 		if (atomic_fetch_inc(&msc->online_refs) == 0)
926 			mpam_reset_msc(msc, true);
927 	}
928 
929 	return 0;
930 }
931 
932 /* Before mpam is enabled, try to probe new MSC */
933 static int mpam_discovery_cpu_online(unsigned int cpu)
934 {
935 	int err = 0;
936 	struct mpam_msc *msc;
937 	bool new_device_probed = false;
938 
939 	guard(srcu)(&mpam_srcu);
940 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
941 				 srcu_read_lock_held(&mpam_srcu)) {
942 		if (!cpumask_test_cpu(cpu, &msc->accessibility))
943 			continue;
944 
945 		mutex_lock(&msc->probe_lock);
946 		if (!msc->probed)
947 			err = mpam_msc_hw_probe(msc);
948 		mutex_unlock(&msc->probe_lock);
949 
950 		if (err)
951 			break;
952 		new_device_probed = true;
953 	}
954 
955 	if (new_device_probed && !err)
956 		schedule_work(&mpam_enable_work);
957 	if (err) {
958 		mpam_disable_reason = "error during probing";
959 		schedule_work(&mpam_broken_work);
960 	}
961 
962 	return err;
963 }
964 
965 static int mpam_cpu_offline(unsigned int cpu)
966 {
967 	struct mpam_msc *msc;
968 
969 	guard(srcu)(&mpam_srcu);
970 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
971 				 srcu_read_lock_held(&mpam_srcu)) {
972 		if (!cpumask_test_cpu(cpu, &msc->accessibility))
973 			continue;
974 
975 		if (msc->reenable_error_ppi)
976 			disable_percpu_irq(msc->reenable_error_ppi);
977 
978 		if (atomic_dec_and_test(&msc->online_refs))
979 			mpam_reset_msc(msc, false);
980 	}
981 
982 	return 0;
983 }
984 
985 static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online),
986 					  int (*offline)(unsigned int offline),
987 					  char *name)
988 {
989 	mutex_lock(&mpam_cpuhp_state_lock);
990 	if (mpam_cpuhp_state) {
991 		cpuhp_remove_state(mpam_cpuhp_state);
992 		mpam_cpuhp_state = 0;
993 	}
994 
995 	mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, name, online,
996 					     offline);
997 	if (mpam_cpuhp_state <= 0) {
998 		pr_err("Failed to register cpuhp callbacks");
999 		mpam_cpuhp_state = 0;
1000 	}
1001 	mutex_unlock(&mpam_cpuhp_state_lock);
1002 }
1003 
1004 static int __setup_ppi(struct mpam_msc *msc)
1005 {
1006 	int cpu;
1007 
1008 	msc->error_dev_id = alloc_percpu(struct mpam_msc *);
1009 	if (!msc->error_dev_id)
1010 		return -ENOMEM;
1011 
1012 	for_each_cpu(cpu, &msc->accessibility)
1013 		*per_cpu_ptr(msc->error_dev_id, cpu) = msc;
1014 
1015 	return 0;
1016 }
1017 
1018 static int mpam_msc_setup_error_irq(struct mpam_msc *msc)
1019 {
1020 	int irq;
1021 
1022 	irq = platform_get_irq_byname_optional(msc->pdev, "error");
1023 	if (irq <= 0)
1024 		return 0;
1025 
1026 	/* Allocate and initialise the percpu device pointer for PPI */
1027 	if (irq_is_percpu(irq))
1028 		return __setup_ppi(msc);
1029 
1030 	/* sanity check: shared interrupts can be routed anywhere? */
1031 	if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) {
1032 		pr_err_once("msc:%u is a private resource with a shared error interrupt",
1033 			    msc->id);
1034 		return -EINVAL;
1035 	}
1036 
1037 	return 0;
1038 }
1039 
1040 /*
1041  * An MSC can control traffic from a set of CPUs, but may only be accessible
1042  * from a (hopefully wider) set of CPUs. The common reason for this is power
1043  * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the
1044  * corresponding cache may also be powered off. By making accesses from
1045  * one of those CPUs, we ensure we don't access a cache that's powered off.
1046  */
1047 static void update_msc_accessibility(struct mpam_msc *msc)
1048 {
1049 	u32 affinity_id;
1050 	int err;
1051 
1052 	err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity",
1053 				       &affinity_id);
1054 	if (err)
1055 		cpumask_copy(&msc->accessibility, cpu_possible_mask);
1056 	else
1057 		acpi_pptt_get_cpus_from_container(affinity_id, &msc->accessibility);
1058 }
1059 
1060 /*
1061  * There are two ways of reaching a struct mpam_msc_ris. Via the
1062  * class->component->vmsc->ris, or via the msc.
1063  * When destroying the msc, the other side needs unlinking and cleaning up too.
1064  */
1065 static void mpam_msc_destroy(struct mpam_msc *msc)
1066 {
1067 	struct platform_device *pdev = msc->pdev;
1068 	struct mpam_msc_ris *ris, *tmp;
1069 
1070 	lockdep_assert_held(&mpam_list_lock);
1071 
1072 	list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list)
1073 		mpam_ris_destroy(ris);
1074 
1075 	list_del_rcu(&msc->all_msc_list);
1076 	platform_set_drvdata(pdev, NULL);
1077 
1078 	add_to_garbage(msc);
1079 }
1080 
1081 static void mpam_msc_drv_remove(struct platform_device *pdev)
1082 {
1083 	struct mpam_msc *msc = platform_get_drvdata(pdev);
1084 
1085 	mutex_lock(&mpam_list_lock);
1086 	mpam_msc_destroy(msc);
1087 	mutex_unlock(&mpam_list_lock);
1088 
1089 	mpam_free_garbage();
1090 }
1091 
1092 static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev)
1093 {
1094 	int err;
1095 	u32 tmp;
1096 	struct mpam_msc *msc;
1097 	struct resource *msc_res;
1098 	struct device *dev = &pdev->dev;
1099 
1100 	lockdep_assert_held(&mpam_list_lock);
1101 
1102 	msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL);
1103 	if (!msc)
1104 		return ERR_PTR(-ENOMEM);
1105 	init_garbage(&msc->garbage);
1106 	msc->garbage.pdev = pdev;
1107 
1108 	err = devm_mutex_init(dev, &msc->probe_lock);
1109 	if (err)
1110 		return ERR_PTR(err);
1111 
1112 	err = devm_mutex_init(dev, &msc->part_sel_lock);
1113 	if (err)
1114 		return ERR_PTR(err);
1115 
1116 	err = devm_mutex_init(dev, &msc->error_irq_lock);
1117 	if (err)
1118 		return ERR_PTR(err);
1119 	mpam_mon_sel_lock_init(msc);
1120 	msc->id = pdev->id;
1121 	msc->pdev = pdev;
1122 	INIT_LIST_HEAD_RCU(&msc->all_msc_list);
1123 	INIT_LIST_HEAD_RCU(&msc->ris);
1124 
1125 	update_msc_accessibility(msc);
1126 	if (cpumask_empty(&msc->accessibility)) {
1127 		dev_err_once(dev, "MSC is not accessible from any CPU!");
1128 		return ERR_PTR(-EINVAL);
1129 	}
1130 
1131 	err = mpam_msc_setup_error_irq(msc);
1132 	if (err)
1133 		return ERR_PTR(err);
1134 
1135 	if (device_property_read_u32(&pdev->dev, "pcc-channel", &tmp))
1136 		msc->iface = MPAM_IFACE_MMIO;
1137 	else
1138 		msc->iface = MPAM_IFACE_PCC;
1139 
1140 	if (msc->iface == MPAM_IFACE_MMIO) {
1141 		void __iomem *io;
1142 
1143 		io = devm_platform_get_and_ioremap_resource(pdev, 0,
1144 							    &msc_res);
1145 		if (IS_ERR(io)) {
1146 			dev_err_once(dev, "Failed to map MSC base address\n");
1147 			return ERR_CAST(io);
1148 		}
1149 		msc->mapped_hwpage_sz = msc_res->end - msc_res->start;
1150 		msc->mapped_hwpage = io;
1151 	} else {
1152 		return ERR_PTR(-EINVAL);
1153 	}
1154 
1155 	list_add_rcu(&msc->all_msc_list, &mpam_all_msc);
1156 	platform_set_drvdata(pdev, msc);
1157 
1158 	return msc;
1159 }
1160 
1161 static int fw_num_msc;
1162 
1163 static int mpam_msc_drv_probe(struct platform_device *pdev)
1164 {
1165 	int err;
1166 	struct mpam_msc *msc = NULL;
1167 	void *plat_data = pdev->dev.platform_data;
1168 
1169 	mutex_lock(&mpam_list_lock);
1170 	msc = do_mpam_msc_drv_probe(pdev);
1171 	mutex_unlock(&mpam_list_lock);
1172 
1173 	if (IS_ERR(msc))
1174 		return PTR_ERR(msc);
1175 
1176 	/* Create RIS entries described by firmware */
1177 	err = acpi_mpam_parse_resources(msc, plat_data);
1178 	if (err) {
1179 		mpam_msc_drv_remove(pdev);
1180 		return err;
1181 	}
1182 
1183 	if (atomic_add_return(1, &mpam_num_msc) == fw_num_msc)
1184 		mpam_register_cpuhp_callbacks(mpam_discovery_cpu_online, NULL,
1185 					      "mpam:drv_probe");
1186 
1187 	return 0;
1188 }
1189 
1190 static struct platform_driver mpam_msc_driver = {
1191 	.driver = {
1192 		.name = "mpam_msc",
1193 	},
1194 	.probe = mpam_msc_drv_probe,
1195 	.remove = mpam_msc_drv_remove,
1196 };
1197 
1198 /* Any of these features mean the BWA_WD field is valid. */
1199 static bool mpam_has_bwa_wd_feature(struct mpam_props *props)
1200 {
1201 	if (mpam_has_feature(mpam_feat_mbw_min, props))
1202 		return true;
1203 	if (mpam_has_feature(mpam_feat_mbw_max, props))
1204 		return true;
1205 	return false;
1206 }
1207 
1208 #define MISMATCHED_HELPER(parent, child, helper, field, alias)		\
1209 	helper(parent) &&						\
1210 	((helper(child) && (parent)->field != (child)->field) ||	\
1211 	 (!helper(child) && !(alias)))
1212 
1213 #define MISMATCHED_FEAT(parent, child, feat, field, alias)		     \
1214 	mpam_has_feature((feat), (parent)) &&				     \
1215 	((mpam_has_feature((feat), (child)) && (parent)->field != (child)->field) || \
1216 	 (!mpam_has_feature((feat), (child)) && !(alias)))
1217 
1218 #define CAN_MERGE_FEAT(parent, child, feat, alias)			\
1219 	(alias) && !mpam_has_feature((feat), (parent)) &&		\
1220 	mpam_has_feature((feat), (child))
1221 
1222 /*
1223  * Combine two props fields.
1224  * If this is for controls that alias the same resource, it is safe to just
1225  * copy the values over. If two aliasing controls implement the same scheme
1226  * a safe value must be picked.
1227  * For non-aliasing controls, these control different resources, and the
1228  * resulting safe value must be compatible with both. When merging values in
1229  * the tree, all the aliasing resources must be handled first.
1230  * On mismatch, parent is modified.
1231  */
1232 static void __props_mismatch(struct mpam_props *parent,
1233 			     struct mpam_props *child, bool alias)
1234 {
1235 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_cpor_part, alias)) {
1236 		parent->cpbm_wd = child->cpbm_wd;
1237 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_cpor_part,
1238 				   cpbm_wd, alias)) {
1239 		pr_debug("cleared cpor_part\n");
1240 		mpam_clear_feature(mpam_feat_cpor_part, parent);
1241 		parent->cpbm_wd = 0;
1242 	}
1243 
1244 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_mbw_part, alias)) {
1245 		parent->mbw_pbm_bits = child->mbw_pbm_bits;
1246 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_mbw_part,
1247 				   mbw_pbm_bits, alias)) {
1248 		pr_debug("cleared mbw_part\n");
1249 		mpam_clear_feature(mpam_feat_mbw_part, parent);
1250 		parent->mbw_pbm_bits = 0;
1251 	}
1252 
1253 	/* bwa_wd is a count of bits, fewer bits means less precision */
1254 	if (alias && !mpam_has_bwa_wd_feature(parent) &&
1255 	    mpam_has_bwa_wd_feature(child)) {
1256 		parent->bwa_wd = child->bwa_wd;
1257 	} else if (MISMATCHED_HELPER(parent, child, mpam_has_bwa_wd_feature,
1258 				     bwa_wd, alias)) {
1259 		pr_debug("took the min bwa_wd\n");
1260 		parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd);
1261 	}
1262 
1263 	/* For num properties, take the minimum */
1264 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_csu, alias)) {
1265 		parent->num_csu_mon = child->num_csu_mon;
1266 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_csu,
1267 				   num_csu_mon, alias)) {
1268 		pr_debug("took the min num_csu_mon\n");
1269 		parent->num_csu_mon = min(parent->num_csu_mon,
1270 					  child->num_csu_mon);
1271 	}
1272 
1273 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_mbwu, alias)) {
1274 		parent->num_mbwu_mon = child->num_mbwu_mon;
1275 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_mbwu,
1276 				   num_mbwu_mon, alias)) {
1277 		pr_debug("took the min num_mbwu_mon\n");
1278 		parent->num_mbwu_mon = min(parent->num_mbwu_mon,
1279 					   child->num_mbwu_mon);
1280 	}
1281 
1282 	if (alias) {
1283 		/* Merge features for aliased resources */
1284 		bitmap_or(parent->features, parent->features, child->features, MPAM_FEATURE_LAST);
1285 	} else {
1286 		/* Clear missing features for non aliasing */
1287 		bitmap_and(parent->features, parent->features, child->features, MPAM_FEATURE_LAST);
1288 	}
1289 }
1290 
1291 /*
1292  * If a vmsc doesn't match class feature/configuration, do the right thing(tm).
1293  * For 'num' properties we can just take the minimum.
1294  * For properties where the mismatched unused bits would make a difference, we
1295  * nobble the class feature, as we can't configure all the resources.
1296  * e.g. The L3 cache is composed of two resources with 13 and 17 portion
1297  * bitmaps respectively.
1298  */
1299 static void
1300 __class_props_mismatch(struct mpam_class *class, struct mpam_vmsc *vmsc)
1301 {
1302 	struct mpam_props *cprops = &class->props;
1303 	struct mpam_props *vprops = &vmsc->props;
1304 	struct device *dev = &vmsc->msc->pdev->dev;
1305 
1306 	lockdep_assert_held(&mpam_list_lock); /* we modify class */
1307 
1308 	dev_dbg(dev, "Merging features for class:0x%lx &= vmsc:0x%lx\n",
1309 		(long)cprops->features, (long)vprops->features);
1310 
1311 	/* Take the safe value for any common features */
1312 	__props_mismatch(cprops, vprops, false);
1313 }
1314 
1315 static void
1316 __vmsc_props_mismatch(struct mpam_vmsc *vmsc, struct mpam_msc_ris *ris)
1317 {
1318 	struct mpam_props *rprops = &ris->props;
1319 	struct mpam_props *vprops = &vmsc->props;
1320 	struct device *dev = &vmsc->msc->pdev->dev;
1321 
1322 	lockdep_assert_held(&mpam_list_lock); /* we modify vmsc */
1323 
1324 	dev_dbg(dev, "Merging features for vmsc:0x%lx |= ris:0x%lx\n",
1325 		(long)vprops->features, (long)rprops->features);
1326 
1327 	/*
1328 	 * Merge mismatched features - Copy any features that aren't common,
1329 	 * but take the safe value for any common features.
1330 	 */
1331 	__props_mismatch(vprops, rprops, true);
1332 }
1333 
1334 /*
1335  * Copy the first component's first vMSC's properties and features to the
1336  * class. __class_props_mismatch() will remove conflicts.
1337  * It is not possible to have a class with no components, or a component with
1338  * no resources. The vMSC properties have already been built.
1339  */
1340 static void mpam_enable_init_class_features(struct mpam_class *class)
1341 {
1342 	struct mpam_vmsc *vmsc;
1343 	struct mpam_component *comp;
1344 
1345 	comp = list_first_entry(&class->components,
1346 				struct mpam_component, class_list);
1347 	vmsc = list_first_entry(&comp->vmsc,
1348 				struct mpam_vmsc, comp_list);
1349 
1350 	class->props = vmsc->props;
1351 }
1352 
1353 static void mpam_enable_merge_vmsc_features(struct mpam_component *comp)
1354 {
1355 	struct mpam_vmsc *vmsc;
1356 	struct mpam_msc_ris *ris;
1357 	struct mpam_class *class = comp->class;
1358 
1359 	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
1360 		list_for_each_entry(ris, &vmsc->ris, vmsc_list) {
1361 			__vmsc_props_mismatch(vmsc, ris);
1362 			class->nrdy_usec = max(class->nrdy_usec,
1363 					       vmsc->msc->nrdy_usec);
1364 		}
1365 	}
1366 }
1367 
1368 static void mpam_enable_merge_class_features(struct mpam_component *comp)
1369 {
1370 	struct mpam_vmsc *vmsc;
1371 	struct mpam_class *class = comp->class;
1372 
1373 	list_for_each_entry(vmsc, &comp->vmsc, comp_list)
1374 		__class_props_mismatch(class, vmsc);
1375 }
1376 
1377 /*
1378  * Merge all the common resource features into class.
1379  * vmsc features are bitwise-or'd together by mpam_enable_merge_vmsc_features()
1380  * as the first step so that mpam_enable_init_class_features() can initialise
1381  * the class with a representative set of features.
1382  * Next the mpam_enable_merge_class_features() bitwise-and's all the vmsc
1383  * features to form the class features.
1384  * Other features are the min/max as appropriate.
1385  *
1386  * To avoid walking the whole tree twice, the class->nrdy_usec property is
1387  * updated when working with the vmsc as it is a max(), and doesn't need
1388  * initialising first.
1389  */
1390 static void mpam_enable_merge_features(struct list_head *all_classes_list)
1391 {
1392 	struct mpam_class *class;
1393 	struct mpam_component *comp;
1394 
1395 	lockdep_assert_held(&mpam_list_lock);
1396 
1397 	list_for_each_entry(class, all_classes_list, classes_list) {
1398 		list_for_each_entry(comp, &class->components, class_list)
1399 			mpam_enable_merge_vmsc_features(comp);
1400 
1401 		mpam_enable_init_class_features(class);
1402 
1403 		list_for_each_entry(comp, &class->components, class_list)
1404 			mpam_enable_merge_class_features(comp);
1405 	}
1406 }
1407 
1408 static char *mpam_errcode_names[16] = {
1409 	[MPAM_ERRCODE_NONE]			= "No error",
1410 	[MPAM_ERRCODE_PARTID_SEL_RANGE]		= "PARTID_SEL_Range",
1411 	[MPAM_ERRCODE_REQ_PARTID_RANGE]		= "Req_PARTID_Range",
1412 	[MPAM_ERRCODE_MSMONCFG_ID_RANGE]	= "MSMONCFG_ID_RANGE",
1413 	[MPAM_ERRCODE_REQ_PMG_RANGE]		= "Req_PMG_Range",
1414 	[MPAM_ERRCODE_MONITOR_RANGE]		= "Monitor_Range",
1415 	[MPAM_ERRCODE_INTPARTID_RANGE]		= "intPARTID_Range",
1416 	[MPAM_ERRCODE_UNEXPECTED_INTERNAL]	= "Unexpected_INTERNAL",
1417 	[MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL]	= "Undefined_RIS_PART_SEL",
1418 	[MPAM_ERRCODE_RIS_NO_CONTROL]		= "RIS_No_Control",
1419 	[MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL]	= "Undefined_RIS_MON_SEL",
1420 	[MPAM_ERRCODE_RIS_NO_MONITOR]		= "RIS_No_Monitor",
1421 	[12 ... 15] = "Reserved"
1422 };
1423 
1424 static int mpam_enable_msc_ecr(void *_msc)
1425 {
1426 	struct mpam_msc *msc = _msc;
1427 
1428 	__mpam_write_reg(msc, MPAMF_ECR, MPAMF_ECR_INTEN);
1429 
1430 	return 0;
1431 }
1432 
1433 /* This can run in mpam_disable(), and the interrupt handler on the same CPU */
1434 static int mpam_disable_msc_ecr(void *_msc)
1435 {
1436 	struct mpam_msc *msc = _msc;
1437 
1438 	__mpam_write_reg(msc, MPAMF_ECR, 0);
1439 
1440 	return 0;
1441 }
1442 
1443 static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc)
1444 {
1445 	u64 reg;
1446 	u16 partid;
1447 	u8 errcode, pmg, ris;
1448 
1449 	if (WARN_ON_ONCE(!msc) ||
1450 	    WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
1451 					   &msc->accessibility)))
1452 		return IRQ_NONE;
1453 
1454 	reg = mpam_msc_read_esr(msc);
1455 
1456 	errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg);
1457 	if (!errcode)
1458 		return IRQ_NONE;
1459 
1460 	/* Clear level triggered irq */
1461 	mpam_msc_clear_esr(msc);
1462 
1463 	partid = FIELD_GET(MPAMF_ESR_PARTID_MON, reg);
1464 	pmg = FIELD_GET(MPAMF_ESR_PMG, reg);
1465 	ris = FIELD_GET(MPAMF_ESR_RIS, reg);
1466 
1467 	pr_err_ratelimited("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n",
1468 			   msc->id, mpam_errcode_names[errcode], partid, pmg,
1469 			   ris);
1470 
1471 	/* Disable this interrupt. */
1472 	mpam_disable_msc_ecr(msc);
1473 
1474 	/*
1475 	 * Schedule the teardown work. Don't use a threaded IRQ as we can't
1476 	 * unregister the interrupt from the threaded part of the handler.
1477 	 */
1478 	mpam_disable_reason = "hardware error interrupt";
1479 	schedule_work(&mpam_broken_work);
1480 
1481 	return IRQ_HANDLED;
1482 }
1483 
1484 static irqreturn_t mpam_ppi_handler(int irq, void *dev_id)
1485 {
1486 	struct mpam_msc *msc = *(struct mpam_msc **)dev_id;
1487 
1488 	return __mpam_irq_handler(irq, msc);
1489 }
1490 
1491 static irqreturn_t mpam_spi_handler(int irq, void *dev_id)
1492 {
1493 	struct mpam_msc *msc = dev_id;
1494 
1495 	return __mpam_irq_handler(irq, msc);
1496 }
1497 
1498 static int mpam_register_irqs(void)
1499 {
1500 	int err, irq;
1501 	struct mpam_msc *msc;
1502 
1503 	lockdep_assert_cpus_held();
1504 
1505 	guard(srcu)(&mpam_srcu);
1506 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1507 				 srcu_read_lock_held(&mpam_srcu)) {
1508 		irq = platform_get_irq_byname_optional(msc->pdev, "error");
1509 		if (irq <= 0)
1510 			continue;
1511 
1512 		/* The MPAM spec says the interrupt can be SPI, PPI or LPI */
1513 		/* We anticipate sharing the interrupt with other MSCs */
1514 		if (irq_is_percpu(irq)) {
1515 			err = request_percpu_irq(irq, &mpam_ppi_handler,
1516 						 "mpam:msc:error",
1517 						 msc->error_dev_id);
1518 			if (err)
1519 				return err;
1520 
1521 			msc->reenable_error_ppi = irq;
1522 			smp_call_function_many(&msc->accessibility,
1523 					       &_enable_percpu_irq, &irq,
1524 					       true);
1525 		} else {
1526 			err = devm_request_irq(&msc->pdev->dev, irq,
1527 					       &mpam_spi_handler, IRQF_SHARED,
1528 					       "mpam:msc:error", msc);
1529 			if (err)
1530 				return err;
1531 		}
1532 
1533 		mutex_lock(&msc->error_irq_lock);
1534 		msc->error_irq_req = true;
1535 		mpam_touch_msc(msc, mpam_enable_msc_ecr, msc);
1536 		msc->error_irq_hw_enabled = true;
1537 		mutex_unlock(&msc->error_irq_lock);
1538 	}
1539 
1540 	return 0;
1541 }
1542 
1543 static void mpam_unregister_irqs(void)
1544 {
1545 	int irq;
1546 	struct mpam_msc *msc;
1547 
1548 	guard(cpus_read_lock)();
1549 	guard(srcu)(&mpam_srcu);
1550 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1551 				 srcu_read_lock_held(&mpam_srcu)) {
1552 		irq = platform_get_irq_byname_optional(msc->pdev, "error");
1553 		if (irq <= 0)
1554 			continue;
1555 
1556 		mutex_lock(&msc->error_irq_lock);
1557 		if (msc->error_irq_hw_enabled) {
1558 			mpam_touch_msc(msc, mpam_disable_msc_ecr, msc);
1559 			msc->error_irq_hw_enabled = false;
1560 		}
1561 
1562 		if (msc->error_irq_req) {
1563 			if (irq_is_percpu(irq)) {
1564 				msc->reenable_error_ppi = 0;
1565 				free_percpu_irq(irq, msc->error_dev_id);
1566 			} else {
1567 				devm_free_irq(&msc->pdev->dev, irq, msc);
1568 			}
1569 			msc->error_irq_req = false;
1570 		}
1571 		mutex_unlock(&msc->error_irq_lock);
1572 	}
1573 }
1574 
1575 static void mpam_enable_once(void)
1576 {
1577 	int err;
1578 
1579 	/*
1580 	 * Once the cpuhp callbacks have been changed, mpam_partid_max can no
1581 	 * longer change.
1582 	 */
1583 	spin_lock(&partid_max_lock);
1584 	partid_max_published = true;
1585 	spin_unlock(&partid_max_lock);
1586 
1587 	/*
1588 	 * If all the MSC have been probed, enabling the IRQs happens next.
1589 	 * That involves cross-calling to a CPU that can reach the MSC, and
1590 	 * the locks must be taken in this order:
1591 	 */
1592 	cpus_read_lock();
1593 	mutex_lock(&mpam_list_lock);
1594 	mpam_enable_merge_features(&mpam_classes);
1595 
1596 	err = mpam_register_irqs();
1597 
1598 	mutex_unlock(&mpam_list_lock);
1599 	cpus_read_unlock();
1600 
1601 	if (err) {
1602 		pr_warn("Failed to register irqs: %d\n", err);
1603 		mpam_disable_reason = "Failed to enable.";
1604 		schedule_work(&mpam_broken_work);
1605 		return;
1606 	}
1607 
1608 	mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline,
1609 				      "mpam:online");
1610 
1611 	/* Use printk() to avoid the pr_fmt adding the function name. */
1612 	printk(KERN_INFO "MPAM enabled with %u PARTIDs and %u PMGs\n",
1613 	       mpam_partid_max + 1, mpam_pmg_max + 1);
1614 }
1615 
1616 static void mpam_reset_component_locked(struct mpam_component *comp)
1617 {
1618 	struct mpam_vmsc *vmsc;
1619 
1620 	lockdep_assert_cpus_held();
1621 
1622 	guard(srcu)(&mpam_srcu);
1623 	list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list,
1624 				 srcu_read_lock_held(&mpam_srcu)) {
1625 		struct mpam_msc *msc = vmsc->msc;
1626 		struct mpam_msc_ris *ris;
1627 
1628 		list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list,
1629 					 srcu_read_lock_held(&mpam_srcu)) {
1630 			if (!ris->in_reset_state)
1631 				mpam_touch_msc(msc, mpam_reset_ris, ris);
1632 			ris->in_reset_state = true;
1633 		}
1634 	}
1635 }
1636 
1637 static void mpam_reset_class_locked(struct mpam_class *class)
1638 {
1639 	struct mpam_component *comp;
1640 
1641 	lockdep_assert_cpus_held();
1642 
1643 	guard(srcu)(&mpam_srcu);
1644 	list_for_each_entry_srcu(comp, &class->components, class_list,
1645 				 srcu_read_lock_held(&mpam_srcu))
1646 		mpam_reset_component_locked(comp);
1647 }
1648 
1649 static void mpam_reset_class(struct mpam_class *class)
1650 {
1651 	cpus_read_lock();
1652 	mpam_reset_class_locked(class);
1653 	cpus_read_unlock();
1654 }
1655 
1656 /*
1657  * Called in response to an error IRQ.
1658  * All of MPAMs errors indicate a software bug, restore any modified
1659  * controls to their reset values.
1660  */
1661 void mpam_disable(struct work_struct *ignored)
1662 {
1663 	int idx;
1664 	struct mpam_class *class;
1665 	struct mpam_msc *msc, *tmp;
1666 
1667 	mutex_lock(&mpam_cpuhp_state_lock);
1668 	if (mpam_cpuhp_state) {
1669 		cpuhp_remove_state(mpam_cpuhp_state);
1670 		mpam_cpuhp_state = 0;
1671 	}
1672 	mutex_unlock(&mpam_cpuhp_state_lock);
1673 
1674 	mpam_unregister_irqs();
1675 
1676 	idx = srcu_read_lock(&mpam_srcu);
1677 	list_for_each_entry_srcu(class, &mpam_classes, classes_list,
1678 				 srcu_read_lock_held(&mpam_srcu))
1679 		mpam_reset_class(class);
1680 	srcu_read_unlock(&mpam_srcu, idx);
1681 
1682 	mutex_lock(&mpam_list_lock);
1683 	list_for_each_entry_safe(msc, tmp, &mpam_all_msc, all_msc_list)
1684 		mpam_msc_destroy(msc);
1685 	mutex_unlock(&mpam_list_lock);
1686 	mpam_free_garbage();
1687 
1688 	pr_err_once("MPAM disabled due to %s\n", mpam_disable_reason);
1689 }
1690 
1691 /*
1692  * Enable mpam once all devices have been probed.
1693  * Scheduled by mpam_discovery_cpu_online() once all devices have been created.
1694  * Also scheduled when new devices are probed when new CPUs come online.
1695  */
1696 void mpam_enable(struct work_struct *work)
1697 {
1698 	static atomic_t once;
1699 	struct mpam_msc *msc;
1700 	bool all_devices_probed = true;
1701 
1702 	/* Have we probed all the hw devices? */
1703 	guard(srcu)(&mpam_srcu);
1704 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1705 				 srcu_read_lock_held(&mpam_srcu)) {
1706 		mutex_lock(&msc->probe_lock);
1707 		if (!msc->probed)
1708 			all_devices_probed = false;
1709 		mutex_unlock(&msc->probe_lock);
1710 
1711 		if (!all_devices_probed)
1712 			break;
1713 	}
1714 
1715 	if (all_devices_probed && !atomic_fetch_inc(&once))
1716 		mpam_enable_once();
1717 }
1718 
1719 static int __init mpam_msc_driver_init(void)
1720 {
1721 	if (!system_supports_mpam())
1722 		return -EOPNOTSUPP;
1723 
1724 	init_srcu_struct(&mpam_srcu);
1725 
1726 	fw_num_msc = acpi_mpam_count_msc();
1727 	if (fw_num_msc <= 0) {
1728 		pr_err("No MSC devices found in firmware\n");
1729 		return -EINVAL;
1730 	}
1731 
1732 	return platform_driver_register(&mpam_msc_driver);
1733 }
1734 
1735 /* Must occur after arm64_mpam_register_cpus() from arch_initcall() */
1736 subsys_initcall(mpam_msc_driver_init);
1737