xref: /linux/drivers/resctrl/mpam_devices.c (revision 880df85d8673f8e2395f139d3618661366e5d4d8)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2025 Arm Ltd.
3 
4 #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
5 
6 #include <linux/acpi.h>
7 #include <linux/atomic.h>
8 #include <linux/arm_mpam.h>
9 #include <linux/bitfield.h>
10 #include <linux/bitmap.h>
11 #include <linux/cacheinfo.h>
12 #include <linux/cpu.h>
13 #include <linux/cpumask.h>
14 #include <linux/device.h>
15 #include <linux/errno.h>
16 #include <linux/gfp.h>
17 #include <linux/interrupt.h>
18 #include <linux/irq.h>
19 #include <linux/irqdesc.h>
20 #include <linux/list.h>
21 #include <linux/lockdep.h>
22 #include <linux/mutex.h>
23 #include <linux/platform_device.h>
24 #include <linux/printk.h>
25 #include <linux/srcu.h>
26 #include <linux/spinlock.h>
27 #include <linux/types.h>
28 #include <linux/workqueue.h>
29 
30 #include "mpam_internal.h"
31 
32 DEFINE_STATIC_KEY_FALSE(mpam_enabled); /* This moves to arch code */
33 
34 /*
35  * mpam_list_lock protects the SRCU lists when writing. Once the
36  * mpam_enabled key is enabled these lists are read-only,
37  * unless the error interrupt disables the driver.
38  */
39 static DEFINE_MUTEX(mpam_list_lock);
40 static LIST_HEAD(mpam_all_msc);
41 
42 struct srcu_struct mpam_srcu;
43 
44 /*
45  * Number of MSCs that have been probed. Once all MSCs have been probed MPAM
46  * can be enabled.
47  */
48 static atomic_t mpam_num_msc;
49 
50 static int mpam_cpuhp_state;
51 static DEFINE_MUTEX(mpam_cpuhp_state_lock);
52 
53 /*
54  * The smallest common values for any CPU or MSC in the system.
55  * Generating traffic outside this range will result in screaming interrupts.
56  */
57 u16 mpam_partid_max;
58 u8 mpam_pmg_max;
59 static bool partid_max_init, partid_max_published;
60 static DEFINE_SPINLOCK(partid_max_lock);
61 
62 /*
63  * mpam is enabled once all devices have been probed from CPU online callbacks,
64  * scheduled via this work_struct. If access to an MSC depends on a CPU that
65  * was not brought online at boot, this can happen surprisingly late.
66  */
67 static DECLARE_WORK(mpam_enable_work, &mpam_enable);
68 
69 /*
70  * All mpam error interrupts indicate a software bug. On receipt, disable the
71  * driver.
72  */
73 static DECLARE_WORK(mpam_broken_work, &mpam_disable);
74 
75 /* When mpam is disabled, the printed reason to aid debugging */
76 static char *mpam_disable_reason;
77 
78 /*
79  * An MSC is a physical container for controls and monitors, each identified by
80  * their RIS index. These share a base-address, interrupts and some MMIO
81  * registers. A vMSC is a virtual container for RIS in an MSC that control or
82  * monitor the same thing. Members of a vMSC are all RIS in the same MSC, but
83  * not all RIS in an MSC share a vMSC.
84  *
85  * Components are a group of vMSC that control or monitor the same thing but
86  * are from different MSC, so have different base-address, interrupts etc.
87  * Classes are the set components of the same type.
88  *
89  * The features of a vMSC is the union of the RIS it contains.
90  * The features of a Class and Component are the common subset of the vMSC
91  * they contain.
92  *
93  * e.g. The system cache may have bandwidth controls on multiple interfaces,
94  * for regulating traffic from devices independently of traffic from CPUs.
95  * If these are two RIS in one MSC, they will be treated as controlling
96  * different things, and will not share a vMSC/component/class.
97  *
98  * e.g. The L2 may have one MSC and two RIS, one for cache-controls another
99  * for bandwidth. These two RIS are members of the same vMSC.
100  *
101  * e.g. The set of RIS that make up the L2 are grouped as a component. These
102  * are sometimes termed slices. They should be configured the same, as if there
103  * were only one.
104  *
105  * e.g. The SoC probably has more than one L2, each attached to a distinct set
106  * of CPUs. All the L2 components are grouped as a class.
107  *
108  * When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list,
109  * then linked via struct mpam_ris to a vmsc, component and class.
110  * The same MSC may exist under different class->component->vmsc paths, but the
111  * RIS index will be unique.
112  */
113 LIST_HEAD(mpam_classes);
114 
115 /* List of all objects that can be free()d after synchronise_srcu() */
116 static LLIST_HEAD(mpam_garbage);
117 
118 static inline void init_garbage(struct mpam_garbage *garbage)
119 {
120 	init_llist_node(&garbage->llist);
121 }
122 
123 #define add_to_garbage(x)				\
124 do {							\
125 	__typeof__(x) _x = (x);				\
126 	_x->garbage.to_free = _x;			\
127 	llist_add(&_x->garbage.llist, &mpam_garbage);	\
128 } while (0)
129 
130 static void mpam_free_garbage(void)
131 {
132 	struct mpam_garbage *iter, *tmp;
133 	struct llist_node *to_free = llist_del_all(&mpam_garbage);
134 
135 	if (!to_free)
136 		return;
137 
138 	synchronize_srcu(&mpam_srcu);
139 
140 	llist_for_each_entry_safe(iter, tmp, to_free, llist) {
141 		if (iter->pdev)
142 			devm_kfree(&iter->pdev->dev, iter->to_free);
143 		else
144 			kfree(iter->to_free);
145 	}
146 }
147 
148 /*
149  * Once mpam is enabled, new requestors cannot further reduce the available
150  * partid. Assert that the size is fixed, and new requestors will be turned
151  * away.
152  */
153 static void mpam_assert_partid_sizes_fixed(void)
154 {
155 	WARN_ON_ONCE(!partid_max_published);
156 }
157 
158 static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg)
159 {
160 	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
161 
162 	return readl_relaxed(msc->mapped_hwpage + reg);
163 }
164 
165 static inline u32 _mpam_read_partsel_reg(struct mpam_msc *msc, u16 reg)
166 {
167 	lockdep_assert_held_once(&msc->part_sel_lock);
168 	return __mpam_read_reg(msc, reg);
169 }
170 
171 #define mpam_read_partsel_reg(msc, reg) _mpam_read_partsel_reg(msc, MPAMF_##reg)
172 
173 static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val)
174 {
175 	WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz);
176 	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
177 
178 	writel_relaxed(val, msc->mapped_hwpage + reg);
179 }
180 
181 static inline void _mpam_write_partsel_reg(struct mpam_msc *msc, u16 reg, u32 val)
182 {
183 	lockdep_assert_held_once(&msc->part_sel_lock);
184 	__mpam_write_reg(msc, reg, val);
185 }
186 
187 #define mpam_write_partsel_reg(msc, reg, val)  _mpam_write_partsel_reg(msc, MPAMCFG_##reg, val)
188 
189 static inline u32 _mpam_read_monsel_reg(struct mpam_msc *msc, u16 reg)
190 {
191 	mpam_mon_sel_lock_held(msc);
192 	return __mpam_read_reg(msc, reg);
193 }
194 
195 #define mpam_read_monsel_reg(msc, reg) _mpam_read_monsel_reg(msc, MSMON_##reg)
196 
197 static inline void _mpam_write_monsel_reg(struct mpam_msc *msc, u16 reg, u32 val)
198 {
199 	mpam_mon_sel_lock_held(msc);
200 	__mpam_write_reg(msc, reg, val);
201 }
202 
203 #define mpam_write_monsel_reg(msc, reg, val)   _mpam_write_monsel_reg(msc, MSMON_##reg, val)
204 
205 static u64 mpam_msc_read_idr(struct mpam_msc *msc)
206 {
207 	u64 idr_high = 0, idr_low;
208 
209 	lockdep_assert_held(&msc->part_sel_lock);
210 
211 	idr_low = mpam_read_partsel_reg(msc, IDR);
212 	if (FIELD_GET(MPAMF_IDR_EXT, idr_low))
213 		idr_high = mpam_read_partsel_reg(msc, IDR + 4);
214 
215 	return (idr_high << 32) | idr_low;
216 }
217 
218 static void mpam_msc_clear_esr(struct mpam_msc *msc)
219 {
220 	u64 esr_low = __mpam_read_reg(msc, MPAMF_ESR);
221 
222 	if (!esr_low)
223 		return;
224 
225 	/*
226 	 * Clearing the high/low bits of MPAMF_ESR can not be atomic.
227 	 * Clear the top half first, so that the pending error bits in the
228 	 * lower half prevent hardware from updating either half of the
229 	 * register.
230 	 */
231 	if (msc->has_extd_esr)
232 		__mpam_write_reg(msc, MPAMF_ESR + 4, 0);
233 	__mpam_write_reg(msc, MPAMF_ESR, 0);
234 }
235 
236 static u64 mpam_msc_read_esr(struct mpam_msc *msc)
237 {
238 	u64 esr_high = 0, esr_low;
239 
240 	esr_low = __mpam_read_reg(msc, MPAMF_ESR);
241 	if (msc->has_extd_esr)
242 		esr_high = __mpam_read_reg(msc, MPAMF_ESR + 4);
243 
244 	return (esr_high << 32) | esr_low;
245 }
246 
247 static void __mpam_part_sel_raw(u32 partsel, struct mpam_msc *msc)
248 {
249 	lockdep_assert_held(&msc->part_sel_lock);
250 
251 	mpam_write_partsel_reg(msc, PART_SEL, partsel);
252 }
253 
254 static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc)
255 {
256 	u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) |
257 		      FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, partid);
258 
259 	__mpam_part_sel_raw(partsel, msc);
260 }
261 
262 static void __mpam_intpart_sel(u8 ris_idx, u16 intpartid, struct mpam_msc *msc)
263 {
264 	u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) |
265 		      FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, intpartid) |
266 		      MPAMCFG_PART_SEL_INTERNAL;
267 
268 	__mpam_part_sel_raw(partsel, msc);
269 }
270 
271 int mpam_register_requestor(u16 partid_max, u8 pmg_max)
272 {
273 	guard(spinlock)(&partid_max_lock);
274 	if (!partid_max_init) {
275 		mpam_partid_max = partid_max;
276 		mpam_pmg_max = pmg_max;
277 		partid_max_init = true;
278 	} else if (!partid_max_published) {
279 		mpam_partid_max = min(mpam_partid_max, partid_max);
280 		mpam_pmg_max = min(mpam_pmg_max, pmg_max);
281 	} else {
282 		/* New requestors can't lower the values */
283 		if (partid_max < mpam_partid_max || pmg_max < mpam_pmg_max)
284 			return -EBUSY;
285 	}
286 
287 	return 0;
288 }
289 EXPORT_SYMBOL(mpam_register_requestor);
290 
291 static struct mpam_class *
292 mpam_class_alloc(u8 level_idx, enum mpam_class_types type)
293 {
294 	struct mpam_class *class;
295 
296 	lockdep_assert_held(&mpam_list_lock);
297 
298 	class = kzalloc(sizeof(*class), GFP_KERNEL);
299 	if (!class)
300 		return ERR_PTR(-ENOMEM);
301 	init_garbage(&class->garbage);
302 
303 	INIT_LIST_HEAD_RCU(&class->components);
304 	/* Affinity is updated when ris are added */
305 	class->level = level_idx;
306 	class->type = type;
307 	INIT_LIST_HEAD_RCU(&class->classes_list);
308 
309 	list_add_rcu(&class->classes_list, &mpam_classes);
310 
311 	return class;
312 }
313 
314 static void mpam_class_destroy(struct mpam_class *class)
315 {
316 	lockdep_assert_held(&mpam_list_lock);
317 
318 	list_del_rcu(&class->classes_list);
319 	add_to_garbage(class);
320 }
321 
322 static struct mpam_class *
323 mpam_class_find(u8 level_idx, enum mpam_class_types type)
324 {
325 	struct mpam_class *class;
326 
327 	lockdep_assert_held(&mpam_list_lock);
328 
329 	list_for_each_entry(class, &mpam_classes, classes_list) {
330 		if (class->type == type && class->level == level_idx)
331 			return class;
332 	}
333 
334 	return mpam_class_alloc(level_idx, type);
335 }
336 
337 static struct mpam_component *
338 mpam_component_alloc(struct mpam_class *class, int id)
339 {
340 	struct mpam_component *comp;
341 
342 	lockdep_assert_held(&mpam_list_lock);
343 
344 	comp = kzalloc(sizeof(*comp), GFP_KERNEL);
345 	if (!comp)
346 		return ERR_PTR(-ENOMEM);
347 	init_garbage(&comp->garbage);
348 
349 	comp->comp_id = id;
350 	INIT_LIST_HEAD_RCU(&comp->vmsc);
351 	/* Affinity is updated when RIS are added */
352 	INIT_LIST_HEAD_RCU(&comp->class_list);
353 	comp->class = class;
354 
355 	list_add_rcu(&comp->class_list, &class->components);
356 
357 	return comp;
358 }
359 
360 static void __destroy_component_cfg(struct mpam_component *comp);
361 
362 static void mpam_component_destroy(struct mpam_component *comp)
363 {
364 	struct mpam_class *class = comp->class;
365 
366 	lockdep_assert_held(&mpam_list_lock);
367 
368 	__destroy_component_cfg(comp);
369 
370 	list_del_rcu(&comp->class_list);
371 	add_to_garbage(comp);
372 
373 	if (list_empty(&class->components))
374 		mpam_class_destroy(class);
375 }
376 
377 static struct mpam_component *
378 mpam_component_find(struct mpam_class *class, int id)
379 {
380 	struct mpam_component *comp;
381 
382 	lockdep_assert_held(&mpam_list_lock);
383 
384 	list_for_each_entry(comp, &class->components, class_list) {
385 		if (comp->comp_id == id)
386 			return comp;
387 	}
388 
389 	return mpam_component_alloc(class, id);
390 }
391 
392 static struct mpam_vmsc *
393 mpam_vmsc_alloc(struct mpam_component *comp, struct mpam_msc *msc)
394 {
395 	struct mpam_vmsc *vmsc;
396 
397 	lockdep_assert_held(&mpam_list_lock);
398 
399 	vmsc = kzalloc(sizeof(*vmsc), GFP_KERNEL);
400 	if (!vmsc)
401 		return ERR_PTR(-ENOMEM);
402 	init_garbage(&vmsc->garbage);
403 
404 	INIT_LIST_HEAD_RCU(&vmsc->ris);
405 	INIT_LIST_HEAD_RCU(&vmsc->comp_list);
406 	vmsc->comp = comp;
407 	vmsc->msc = msc;
408 
409 	list_add_rcu(&vmsc->comp_list, &comp->vmsc);
410 
411 	return vmsc;
412 }
413 
414 static void mpam_vmsc_destroy(struct mpam_vmsc *vmsc)
415 {
416 	struct mpam_component *comp = vmsc->comp;
417 
418 	lockdep_assert_held(&mpam_list_lock);
419 
420 	list_del_rcu(&vmsc->comp_list);
421 	add_to_garbage(vmsc);
422 
423 	if (list_empty(&comp->vmsc))
424 		mpam_component_destroy(comp);
425 }
426 
427 static struct mpam_vmsc *
428 mpam_vmsc_find(struct mpam_component *comp, struct mpam_msc *msc)
429 {
430 	struct mpam_vmsc *vmsc;
431 
432 	lockdep_assert_held(&mpam_list_lock);
433 
434 	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
435 		if (vmsc->msc->id == msc->id)
436 			return vmsc;
437 	}
438 
439 	return mpam_vmsc_alloc(comp, msc);
440 }
441 
442 /*
443  * The cacheinfo structures are only populated when CPUs are online.
444  * This helper walks the acpi tables to include offline CPUs too.
445  */
446 int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level,
447 				   cpumask_t *affinity)
448 {
449 	return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity);
450 }
451 
452 /*
453  * cpumask_of_node() only knows about online CPUs. This can't tell us whether
454  * a class is represented on all possible CPUs.
455  */
456 static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity)
457 {
458 	int cpu;
459 
460 	for_each_possible_cpu(cpu) {
461 		if (node_id == cpu_to_node(cpu))
462 			cpumask_set_cpu(cpu, affinity);
463 	}
464 }
465 
466 static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity,
467 				 enum mpam_class_types type,
468 				 struct mpam_class *class,
469 				 struct mpam_component *comp)
470 {
471 	int err;
472 
473 	switch (type) {
474 	case MPAM_CLASS_CACHE:
475 		err = mpam_get_cpumask_from_cache_id(comp->comp_id, class->level,
476 						     affinity);
477 		if (err) {
478 			dev_warn_once(&msc->pdev->dev,
479 				      "Failed to determine CPU affinity\n");
480 			return err;
481 		}
482 
483 		if (cpumask_empty(affinity))
484 			dev_warn_once(&msc->pdev->dev, "no CPUs associated with cache node\n");
485 
486 		break;
487 	case MPAM_CLASS_MEMORY:
488 		get_cpumask_from_node_id(comp->comp_id, affinity);
489 		/* affinity may be empty for CPU-less memory nodes */
490 		break;
491 	case MPAM_CLASS_UNKNOWN:
492 		return 0;
493 	}
494 
495 	cpumask_and(affinity, affinity, &msc->accessibility);
496 
497 	return 0;
498 }
499 
500 static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx,
501 				  enum mpam_class_types type, u8 class_id,
502 				  int component_id)
503 {
504 	int err;
505 	struct mpam_vmsc *vmsc;
506 	struct mpam_msc_ris *ris;
507 	struct mpam_class *class;
508 	struct mpam_component *comp;
509 	struct platform_device *pdev = msc->pdev;
510 
511 	lockdep_assert_held(&mpam_list_lock);
512 
513 	if (ris_idx > MPAM_MSC_MAX_NUM_RIS)
514 		return -EINVAL;
515 
516 	if (test_and_set_bit(ris_idx, &msc->ris_idxs))
517 		return -EBUSY;
518 
519 	ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), GFP_KERNEL);
520 	if (!ris)
521 		return -ENOMEM;
522 	init_garbage(&ris->garbage);
523 	ris->garbage.pdev = pdev;
524 
525 	class = mpam_class_find(class_id, type);
526 	if (IS_ERR(class))
527 		return PTR_ERR(class);
528 
529 	comp = mpam_component_find(class, component_id);
530 	if (IS_ERR(comp)) {
531 		if (list_empty(&class->components))
532 			mpam_class_destroy(class);
533 		return PTR_ERR(comp);
534 	}
535 
536 	vmsc = mpam_vmsc_find(comp, msc);
537 	if (IS_ERR(vmsc)) {
538 		if (list_empty(&comp->vmsc))
539 			mpam_component_destroy(comp);
540 		return PTR_ERR(vmsc);
541 	}
542 
543 	err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp);
544 	if (err) {
545 		if (list_empty(&vmsc->ris))
546 			mpam_vmsc_destroy(vmsc);
547 		return err;
548 	}
549 
550 	ris->ris_idx = ris_idx;
551 	INIT_LIST_HEAD_RCU(&ris->msc_list);
552 	INIT_LIST_HEAD_RCU(&ris->vmsc_list);
553 	ris->vmsc = vmsc;
554 
555 	cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity);
556 	cpumask_or(&class->affinity, &class->affinity, &ris->affinity);
557 	list_add_rcu(&ris->vmsc_list, &vmsc->ris);
558 	list_add_rcu(&ris->msc_list, &msc->ris);
559 
560 	return 0;
561 }
562 
563 static void mpam_ris_destroy(struct mpam_msc_ris *ris)
564 {
565 	struct mpam_vmsc *vmsc = ris->vmsc;
566 	struct mpam_msc *msc = vmsc->msc;
567 	struct mpam_component *comp = vmsc->comp;
568 	struct mpam_class *class = comp->class;
569 
570 	lockdep_assert_held(&mpam_list_lock);
571 
572 	/*
573 	 * It is assumed affinities don't overlap. If they do the class becomes
574 	 * unusable immediately.
575 	 */
576 	cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity);
577 	cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity);
578 	clear_bit(ris->ris_idx, &msc->ris_idxs);
579 	list_del_rcu(&ris->msc_list);
580 	list_del_rcu(&ris->vmsc_list);
581 	add_to_garbage(ris);
582 
583 	if (list_empty(&vmsc->ris))
584 		mpam_vmsc_destroy(vmsc);
585 }
586 
587 int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
588 		    enum mpam_class_types type, u8 class_id, int component_id)
589 {
590 	int err;
591 
592 	mutex_lock(&mpam_list_lock);
593 	err = mpam_ris_create_locked(msc, ris_idx, type, class_id,
594 				     component_id);
595 	mutex_unlock(&mpam_list_lock);
596 	if (err)
597 		mpam_free_garbage();
598 
599 	return err;
600 }
601 
602 static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc,
603 						   u8 ris_idx)
604 {
605 	int err;
606 	struct mpam_msc_ris *ris;
607 
608 	lockdep_assert_held(&mpam_list_lock);
609 
610 	if (!test_bit(ris_idx, &msc->ris_idxs)) {
611 		err = mpam_ris_create_locked(msc, ris_idx, MPAM_CLASS_UNKNOWN,
612 					     0, 0);
613 		if (err)
614 			return ERR_PTR(err);
615 	}
616 
617 	list_for_each_entry(ris, &msc->ris, msc_list) {
618 		if (ris->ris_idx == ris_idx)
619 			return ris;
620 	}
621 
622 	return ERR_PTR(-ENOENT);
623 }
624 
625 /*
626  * IHI009A.a has this nugget: "If a monitor does not support automatic behaviour
627  * of NRDY, software can use this bit for any purpose" - so hardware might not
628  * implement this - but it isn't RES0.
629  *
630  * Try and see what values stick in this bit. If we can write either value,
631  * its probably not implemented by hardware.
632  */
633 static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg)
634 {
635 	u32 now;
636 	u64 mon_sel;
637 	bool can_set, can_clear;
638 	struct mpam_msc *msc = ris->vmsc->msc;
639 
640 	if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc)))
641 		return false;
642 
643 	mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) |
644 		  FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
645 	_mpam_write_monsel_reg(msc, mon_reg, mon_sel);
646 
647 	_mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY);
648 	now = _mpam_read_monsel_reg(msc, mon_reg);
649 	can_set = now & MSMON___NRDY;
650 
651 	_mpam_write_monsel_reg(msc, mon_reg, 0);
652 	now = _mpam_read_monsel_reg(msc, mon_reg);
653 	can_clear = !(now & MSMON___NRDY);
654 	mpam_mon_sel_unlock(msc);
655 
656 	return (!can_set || !can_clear);
657 }
658 
659 #define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg)			\
660 	_mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg)
661 
662 static void mpam_ris_hw_probe(struct mpam_msc_ris *ris)
663 {
664 	int err;
665 	struct mpam_msc *msc = ris->vmsc->msc;
666 	struct device *dev = &msc->pdev->dev;
667 	struct mpam_props *props = &ris->props;
668 	struct mpam_class *class = ris->vmsc->comp->class;
669 
670 	lockdep_assert_held(&msc->probe_lock);
671 	lockdep_assert_held(&msc->part_sel_lock);
672 
673 	/* Cache Capacity Partitioning */
674 	if (FIELD_GET(MPAMF_IDR_HAS_CCAP_PART, ris->idr)) {
675 		u32 ccap_features = mpam_read_partsel_reg(msc, CCAP_IDR);
676 
677 		props->cmax_wd = FIELD_GET(MPAMF_CCAP_IDR_CMAX_WD, ccap_features);
678 		if (props->cmax_wd &&
679 		    FIELD_GET(MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM, ccap_features))
680 			mpam_set_feature(mpam_feat_cmax_softlim, props);
681 
682 		if (props->cmax_wd &&
683 		    !FIELD_GET(MPAMF_CCAP_IDR_NO_CMAX, ccap_features))
684 			mpam_set_feature(mpam_feat_cmax_cmax, props);
685 
686 		if (props->cmax_wd &&
687 		    FIELD_GET(MPAMF_CCAP_IDR_HAS_CMIN, ccap_features))
688 			mpam_set_feature(mpam_feat_cmax_cmin, props);
689 
690 		props->cassoc_wd = FIELD_GET(MPAMF_CCAP_IDR_CASSOC_WD, ccap_features);
691 		if (props->cassoc_wd &&
692 		    FIELD_GET(MPAMF_CCAP_IDR_HAS_CASSOC, ccap_features))
693 			mpam_set_feature(mpam_feat_cmax_cassoc, props);
694 	}
695 
696 	/* Cache Portion partitioning */
697 	if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) {
698 		u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR);
699 
700 		props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, cpor_features);
701 		if (props->cpbm_wd)
702 			mpam_set_feature(mpam_feat_cpor_part, props);
703 	}
704 
705 	/* Memory bandwidth partitioning */
706 	if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) {
707 		u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR);
708 
709 		/* portion bitmap resolution */
710 		props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features);
711 		if (props->mbw_pbm_bits &&
712 		    FIELD_GET(MPAMF_MBW_IDR_HAS_PBM, mbw_features))
713 			mpam_set_feature(mpam_feat_mbw_part, props);
714 
715 		props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features);
716 		if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features))
717 			mpam_set_feature(mpam_feat_mbw_max, props);
718 
719 		if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MIN, mbw_features))
720 			mpam_set_feature(mpam_feat_mbw_min, props);
721 
722 		if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_PROP, mbw_features))
723 			mpam_set_feature(mpam_feat_mbw_prop, props);
724 	}
725 
726 	/* Priority partitioning */
727 	if (FIELD_GET(MPAMF_IDR_HAS_PRI_PART, ris->idr)) {
728 		u32 pri_features = mpam_read_partsel_reg(msc, PRI_IDR);
729 
730 		props->intpri_wd = FIELD_GET(MPAMF_PRI_IDR_INTPRI_WD, pri_features);
731 		if (props->intpri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_INTPRI, pri_features)) {
732 			mpam_set_feature(mpam_feat_intpri_part, props);
733 			if (FIELD_GET(MPAMF_PRI_IDR_INTPRI_0_IS_LOW, pri_features))
734 				mpam_set_feature(mpam_feat_intpri_part_0_low, props);
735 		}
736 
737 		props->dspri_wd = FIELD_GET(MPAMF_PRI_IDR_DSPRI_WD, pri_features);
738 		if (props->dspri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_DSPRI, pri_features)) {
739 			mpam_set_feature(mpam_feat_dspri_part, props);
740 			if (FIELD_GET(MPAMF_PRI_IDR_DSPRI_0_IS_LOW, pri_features))
741 				mpam_set_feature(mpam_feat_dspri_part_0_low, props);
742 		}
743 	}
744 
745 	/* Performance Monitoring */
746 	if (FIELD_GET(MPAMF_IDR_HAS_MSMON, ris->idr)) {
747 		u32 msmon_features = mpam_read_partsel_reg(msc, MSMON_IDR);
748 
749 		/*
750 		 * If the firmware max-nrdy-us property is missing, the
751 		 * CSU counters can't be used. Should we wait forever?
752 		 */
753 		err = device_property_read_u32(&msc->pdev->dev,
754 					       "arm,not-ready-us",
755 					       &msc->nrdy_usec);
756 
757 		if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_CSU, msmon_features)) {
758 			u32 csumonidr;
759 
760 			csumonidr = mpam_read_partsel_reg(msc, CSUMON_IDR);
761 			props->num_csu_mon = FIELD_GET(MPAMF_CSUMON_IDR_NUM_MON, csumonidr);
762 			if (props->num_csu_mon) {
763 				bool hw_managed;
764 
765 				mpam_set_feature(mpam_feat_msmon_csu, props);
766 
767 				if (FIELD_GET(MPAMF_CSUMON_IDR_HAS_XCL, csumonidr))
768 					mpam_set_feature(mpam_feat_msmon_csu_xcl, props);
769 
770 				/* Is NRDY hardware managed? */
771 				hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU);
772 				if (hw_managed)
773 					mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props);
774 			}
775 
776 			/*
777 			 * Accept the missing firmware property if NRDY appears
778 			 * un-implemented.
779 			 */
780 			if (err && mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, props))
781 				dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware.");
782 		}
783 		if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) {
784 			bool hw_managed;
785 			u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR);
786 
787 			props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr);
788 			if (props->num_mbwu_mon)
789 				mpam_set_feature(mpam_feat_msmon_mbwu, props);
790 
791 			if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumon_idr))
792 				mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props);
793 
794 			/* Is NRDY hardware managed? */
795 			hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU);
796 			if (hw_managed)
797 				mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props);
798 
799 			/*
800 			 * Don't warn about any missing firmware property for
801 			 * MBWU NRDY - it doesn't make any sense!
802 			 */
803 		}
804 	}
805 
806 	/*
807 	 * RIS with PARTID narrowing don't have enough storage for one
808 	 * configuration per PARTID. If these are in a class we could use,
809 	 * reduce the supported partid_max to match the number of intpartid.
810 	 * If the class is unknown, just ignore it.
811 	 */
812 	if (FIELD_GET(MPAMF_IDR_HAS_PARTID_NRW, ris->idr) &&
813 	    class->type != MPAM_CLASS_UNKNOWN) {
814 		u32 nrwidr = mpam_read_partsel_reg(msc, PARTID_NRW_IDR);
815 		u16 partid_max = FIELD_GET(MPAMF_PARTID_NRW_IDR_INTPARTID_MAX, nrwidr);
816 
817 		mpam_set_feature(mpam_feat_partid_nrw, props);
818 		msc->partid_max = min(msc->partid_max, partid_max);
819 	}
820 }
821 
822 static int mpam_msc_hw_probe(struct mpam_msc *msc)
823 {
824 	u64 idr;
825 	u16 partid_max;
826 	u8 ris_idx, pmg_max;
827 	struct mpam_msc_ris *ris;
828 	struct device *dev = &msc->pdev->dev;
829 
830 	lockdep_assert_held(&msc->probe_lock);
831 
832 	idr = __mpam_read_reg(msc, MPAMF_AIDR);
833 	if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) {
834 		dev_err_once(dev, "MSC does not match MPAM architecture v1.x\n");
835 		return -EIO;
836 	}
837 
838 	/* Grab an IDR value to find out how many RIS there are */
839 	mutex_lock(&msc->part_sel_lock);
840 	idr = mpam_msc_read_idr(msc);
841 	mutex_unlock(&msc->part_sel_lock);
842 
843 	msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr);
844 
845 	/* Use these values so partid/pmg always starts with a valid value */
846 	msc->partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
847 	msc->pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
848 
849 	for (ris_idx = 0; ris_idx <= msc->ris_max; ris_idx++) {
850 		mutex_lock(&msc->part_sel_lock);
851 		__mpam_part_sel(ris_idx, 0, msc);
852 		idr = mpam_msc_read_idr(msc);
853 		mutex_unlock(&msc->part_sel_lock);
854 
855 		partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
856 		pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
857 		msc->partid_max = min(msc->partid_max, partid_max);
858 		msc->pmg_max = min(msc->pmg_max, pmg_max);
859 		msc->has_extd_esr = FIELD_GET(MPAMF_IDR_HAS_EXTD_ESR, idr);
860 
861 		mutex_lock(&mpam_list_lock);
862 		ris = mpam_get_or_create_ris(msc, ris_idx);
863 		mutex_unlock(&mpam_list_lock);
864 		if (IS_ERR(ris))
865 			return PTR_ERR(ris);
866 		ris->idr = idr;
867 
868 		mutex_lock(&msc->part_sel_lock);
869 		__mpam_part_sel(ris_idx, 0, msc);
870 		mpam_ris_hw_probe(ris);
871 		mutex_unlock(&msc->part_sel_lock);
872 	}
873 
874 	/* Clear any stale errors */
875 	mpam_msc_clear_esr(msc);
876 
877 	spin_lock(&partid_max_lock);
878 	mpam_partid_max = min(mpam_partid_max, msc->partid_max);
879 	mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max);
880 	spin_unlock(&partid_max_lock);
881 
882 	msc->probed = true;
883 
884 	return 0;
885 }
886 
887 static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd)
888 {
889 	u32 num_words, msb;
890 	u32 bm = ~0;
891 	int i;
892 
893 	lockdep_assert_held(&msc->part_sel_lock);
894 
895 	if (wd == 0)
896 		return;
897 
898 	/*
899 	 * Write all ~0 to all but the last 32bit-word, which may
900 	 * have fewer bits...
901 	 */
902 	num_words = DIV_ROUND_UP(wd, 32);
903 	for (i = 0; i < num_words - 1; i++, reg += sizeof(bm))
904 		__mpam_write_reg(msc, reg, bm);
905 
906 	/*
907 	 * ....and then the last (maybe) partial 32bit word. When wd is a
908 	 * multiple of 32, msb should be 31 to write a full 32bit word.
909 	 */
910 	msb = (wd - 1) % 32;
911 	bm = GENMASK(msb, 0);
912 	__mpam_write_reg(msc, reg, bm);
913 }
914 
915 /* Called via IPI. Call while holding an SRCU reference */
916 static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid,
917 				      struct mpam_config *cfg)
918 {
919 	u32 pri_val = 0;
920 	u16 cmax = MPAMCFG_CMAX_CMAX;
921 	struct mpam_msc *msc = ris->vmsc->msc;
922 	struct mpam_props *rprops = &ris->props;
923 	u16 dspri = GENMASK(rprops->dspri_wd, 0);
924 	u16 intpri = GENMASK(rprops->intpri_wd, 0);
925 
926 	mutex_lock(&msc->part_sel_lock);
927 	__mpam_part_sel(ris->ris_idx, partid, msc);
928 
929 	if (mpam_has_feature(mpam_feat_partid_nrw, rprops)) {
930 		/* Update the intpartid mapping */
931 		mpam_write_partsel_reg(msc, INTPARTID,
932 				       MPAMCFG_INTPARTID_INTERNAL | partid);
933 
934 		/*
935 		 * Then switch to the 'internal' partid to update the
936 		 * configuration.
937 		 */
938 		__mpam_intpart_sel(ris->ris_idx, partid, msc);
939 	}
940 
941 	if (mpam_has_feature(mpam_feat_cpor_part, rprops) &&
942 	    mpam_has_feature(mpam_feat_cpor_part, cfg)) {
943 		if (cfg->reset_cpbm)
944 			mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd);
945 		else
946 			mpam_write_partsel_reg(msc, CPBM, cfg->cpbm);
947 	}
948 
949 	if (mpam_has_feature(mpam_feat_mbw_part, rprops) &&
950 	    mpam_has_feature(mpam_feat_mbw_part, cfg)) {
951 		if (cfg->reset_mbw_pbm)
952 			mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits);
953 		else
954 			mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm);
955 	}
956 
957 	if (mpam_has_feature(mpam_feat_mbw_min, rprops) &&
958 	    mpam_has_feature(mpam_feat_mbw_min, cfg))
959 		mpam_write_partsel_reg(msc, MBW_MIN, 0);
960 
961 	if (mpam_has_feature(mpam_feat_mbw_max, rprops) &&
962 	    mpam_has_feature(mpam_feat_mbw_max, cfg)) {
963 		if (cfg->reset_mbw_max)
964 			mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX);
965 		else
966 			mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max);
967 	}
968 
969 	if (mpam_has_feature(mpam_feat_mbw_prop, rprops) &&
970 	    mpam_has_feature(mpam_feat_mbw_prop, cfg))
971 		mpam_write_partsel_reg(msc, MBW_PROP, 0);
972 
973 	if (mpam_has_feature(mpam_feat_cmax_cmax, rprops))
974 		mpam_write_partsel_reg(msc, CMAX, cmax);
975 
976 	if (mpam_has_feature(mpam_feat_cmax_cmin, rprops))
977 		mpam_write_partsel_reg(msc, CMIN, 0);
978 
979 	if (mpam_has_feature(mpam_feat_cmax_cassoc, rprops))
980 		mpam_write_partsel_reg(msc, CASSOC, MPAMCFG_CASSOC_CASSOC);
981 
982 	if (mpam_has_feature(mpam_feat_intpri_part, rprops) ||
983 	    mpam_has_feature(mpam_feat_dspri_part, rprops)) {
984 		/* aces high? */
985 		if (!mpam_has_feature(mpam_feat_intpri_part_0_low, rprops))
986 			intpri = 0;
987 		if (!mpam_has_feature(mpam_feat_dspri_part_0_low, rprops))
988 			dspri = 0;
989 
990 		if (mpam_has_feature(mpam_feat_intpri_part, rprops))
991 			pri_val |= FIELD_PREP(MPAMCFG_PRI_INTPRI, intpri);
992 		if (mpam_has_feature(mpam_feat_dspri_part, rprops))
993 			pri_val |= FIELD_PREP(MPAMCFG_PRI_DSPRI, dspri);
994 
995 		mpam_write_partsel_reg(msc, PRI, pri_val);
996 	}
997 
998 	mutex_unlock(&msc->part_sel_lock);
999 }
1000 
1001 static void mpam_init_reset_cfg(struct mpam_config *reset_cfg)
1002 {
1003 	*reset_cfg = (struct mpam_config) {
1004 		.reset_cpbm = true,
1005 		.reset_mbw_pbm = true,
1006 		.reset_mbw_max = true,
1007 	};
1008 	bitmap_fill(reset_cfg->features, MPAM_FEATURE_LAST);
1009 }
1010 
1011 /*
1012  * Called via smp_call_on_cpu() to prevent migration, while still being
1013  * pre-emptible. Caller must hold mpam_srcu.
1014  */
1015 static int mpam_reset_ris(void *arg)
1016 {
1017 	u16 partid, partid_max;
1018 	struct mpam_config reset_cfg;
1019 	struct mpam_msc_ris *ris = arg;
1020 
1021 	if (ris->in_reset_state)
1022 		return 0;
1023 
1024 	mpam_init_reset_cfg(&reset_cfg);
1025 
1026 	spin_lock(&partid_max_lock);
1027 	partid_max = mpam_partid_max;
1028 	spin_unlock(&partid_max_lock);
1029 	for (partid = 0; partid <= partid_max; partid++)
1030 		mpam_reprogram_ris_partid(ris, partid, &reset_cfg);
1031 
1032 	return 0;
1033 }
1034 
1035 /*
1036  * Get the preferred CPU for this MSC. If it is accessible from this CPU,
1037  * this CPU is preferred. This can be preempted/migrated, it will only result
1038  * in more work.
1039  */
1040 static int mpam_get_msc_preferred_cpu(struct mpam_msc *msc)
1041 {
1042 	int cpu = raw_smp_processor_id();
1043 
1044 	if (cpumask_test_cpu(cpu, &msc->accessibility))
1045 		return cpu;
1046 
1047 	return cpumask_first_and(&msc->accessibility, cpu_online_mask);
1048 }
1049 
1050 static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg)
1051 {
1052 	lockdep_assert_irqs_enabled();
1053 	lockdep_assert_cpus_held();
1054 	WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu)));
1055 
1056 	return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true);
1057 }
1058 
1059 struct mpam_write_config_arg {
1060 	struct mpam_msc_ris *ris;
1061 	struct mpam_component *comp;
1062 	u16 partid;
1063 };
1064 
1065 static int __write_config(void *arg)
1066 {
1067 	struct mpam_write_config_arg *c = arg;
1068 
1069 	mpam_reprogram_ris_partid(c->ris, c->partid, &c->comp->cfg[c->partid]);
1070 
1071 	return 0;
1072 }
1073 
1074 static void mpam_reprogram_msc(struct mpam_msc *msc)
1075 {
1076 	u16 partid;
1077 	bool reset;
1078 	struct mpam_config *cfg;
1079 	struct mpam_msc_ris *ris;
1080 	struct mpam_write_config_arg arg;
1081 
1082 	/*
1083 	 * No lock for mpam_partid_max as partid_max_published has been
1084 	 * set by mpam_enabled(), so the values can no longer change.
1085 	 */
1086 	mpam_assert_partid_sizes_fixed();
1087 
1088 	mutex_lock(&msc->cfg_lock);
1089 	list_for_each_entry_srcu(ris, &msc->ris, msc_list,
1090 				 srcu_read_lock_held(&mpam_srcu)) {
1091 		if (!mpam_is_enabled() && !ris->in_reset_state) {
1092 			mpam_touch_msc(msc, &mpam_reset_ris, ris);
1093 			ris->in_reset_state = true;
1094 			continue;
1095 		}
1096 
1097 		arg.comp = ris->vmsc->comp;
1098 		arg.ris = ris;
1099 		reset = true;
1100 		for (partid = 0; partid <= mpam_partid_max; partid++) {
1101 			cfg = &ris->vmsc->comp->cfg[partid];
1102 			if (!bitmap_empty(cfg->features, MPAM_FEATURE_LAST))
1103 				reset = false;
1104 
1105 			arg.partid = partid;
1106 			mpam_touch_msc(msc, __write_config, &arg);
1107 		}
1108 		ris->in_reset_state = reset;
1109 	}
1110 	mutex_unlock(&msc->cfg_lock);
1111 }
1112 
1113 static void _enable_percpu_irq(void *_irq)
1114 {
1115 	int *irq = _irq;
1116 
1117 	enable_percpu_irq(*irq, IRQ_TYPE_NONE);
1118 }
1119 
1120 static int mpam_cpu_online(unsigned int cpu)
1121 {
1122 	struct mpam_msc *msc;
1123 
1124 	guard(srcu)(&mpam_srcu);
1125 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1126 				 srcu_read_lock_held(&mpam_srcu)) {
1127 		if (!cpumask_test_cpu(cpu, &msc->accessibility))
1128 			continue;
1129 
1130 		if (msc->reenable_error_ppi)
1131 			_enable_percpu_irq(&msc->reenable_error_ppi);
1132 
1133 		if (atomic_fetch_inc(&msc->online_refs) == 0)
1134 			mpam_reprogram_msc(msc);
1135 	}
1136 
1137 	return 0;
1138 }
1139 
1140 /* Before mpam is enabled, try to probe new MSC */
1141 static int mpam_discovery_cpu_online(unsigned int cpu)
1142 {
1143 	int err = 0;
1144 	struct mpam_msc *msc;
1145 	bool new_device_probed = false;
1146 
1147 	if (mpam_is_enabled())
1148 		return 0;
1149 
1150 	guard(srcu)(&mpam_srcu);
1151 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1152 				 srcu_read_lock_held(&mpam_srcu)) {
1153 		if (!cpumask_test_cpu(cpu, &msc->accessibility))
1154 			continue;
1155 
1156 		mutex_lock(&msc->probe_lock);
1157 		if (!msc->probed)
1158 			err = mpam_msc_hw_probe(msc);
1159 		mutex_unlock(&msc->probe_lock);
1160 
1161 		if (err)
1162 			break;
1163 		new_device_probed = true;
1164 	}
1165 
1166 	if (new_device_probed && !err)
1167 		schedule_work(&mpam_enable_work);
1168 	if (err) {
1169 		mpam_disable_reason = "error during probing";
1170 		schedule_work(&mpam_broken_work);
1171 	}
1172 
1173 	return err;
1174 }
1175 
1176 static int mpam_cpu_offline(unsigned int cpu)
1177 {
1178 	struct mpam_msc *msc;
1179 
1180 	guard(srcu)(&mpam_srcu);
1181 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1182 				 srcu_read_lock_held(&mpam_srcu)) {
1183 		if (!cpumask_test_cpu(cpu, &msc->accessibility))
1184 			continue;
1185 
1186 		if (msc->reenable_error_ppi)
1187 			disable_percpu_irq(msc->reenable_error_ppi);
1188 
1189 		if (atomic_dec_and_test(&msc->online_refs)) {
1190 			struct mpam_msc_ris *ris;
1191 
1192 			mutex_lock(&msc->cfg_lock);
1193 			list_for_each_entry_srcu(ris, &msc->ris, msc_list,
1194 						 srcu_read_lock_held(&mpam_srcu)) {
1195 				mpam_touch_msc(msc, &mpam_reset_ris, ris);
1196 
1197 				/*
1198 				 * The reset state for non-zero partid may be
1199 				 * lost while the CPUs are offline.
1200 				 */
1201 				ris->in_reset_state = false;
1202 			}
1203 			mutex_unlock(&msc->cfg_lock);
1204 		}
1205 	}
1206 
1207 	return 0;
1208 }
1209 
1210 static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online),
1211 					  int (*offline)(unsigned int offline),
1212 					  char *name)
1213 {
1214 	mutex_lock(&mpam_cpuhp_state_lock);
1215 	if (mpam_cpuhp_state) {
1216 		cpuhp_remove_state(mpam_cpuhp_state);
1217 		mpam_cpuhp_state = 0;
1218 	}
1219 
1220 	mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, name, online,
1221 					     offline);
1222 	if (mpam_cpuhp_state <= 0) {
1223 		pr_err("Failed to register cpuhp callbacks");
1224 		mpam_cpuhp_state = 0;
1225 	}
1226 	mutex_unlock(&mpam_cpuhp_state_lock);
1227 }
1228 
1229 static int __setup_ppi(struct mpam_msc *msc)
1230 {
1231 	int cpu;
1232 
1233 	msc->error_dev_id = alloc_percpu(struct mpam_msc *);
1234 	if (!msc->error_dev_id)
1235 		return -ENOMEM;
1236 
1237 	for_each_cpu(cpu, &msc->accessibility)
1238 		*per_cpu_ptr(msc->error_dev_id, cpu) = msc;
1239 
1240 	return 0;
1241 }
1242 
1243 static int mpam_msc_setup_error_irq(struct mpam_msc *msc)
1244 {
1245 	int irq;
1246 
1247 	irq = platform_get_irq_byname_optional(msc->pdev, "error");
1248 	if (irq <= 0)
1249 		return 0;
1250 
1251 	/* Allocate and initialise the percpu device pointer for PPI */
1252 	if (irq_is_percpu(irq))
1253 		return __setup_ppi(msc);
1254 
1255 	/* sanity check: shared interrupts can be routed anywhere? */
1256 	if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) {
1257 		pr_err_once("msc:%u is a private resource with a shared error interrupt",
1258 			    msc->id);
1259 		return -EINVAL;
1260 	}
1261 
1262 	return 0;
1263 }
1264 
1265 /*
1266  * An MSC can control traffic from a set of CPUs, but may only be accessible
1267  * from a (hopefully wider) set of CPUs. The common reason for this is power
1268  * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the
1269  * corresponding cache may also be powered off. By making accesses from
1270  * one of those CPUs, we ensure we don't access a cache that's powered off.
1271  */
1272 static void update_msc_accessibility(struct mpam_msc *msc)
1273 {
1274 	u32 affinity_id;
1275 	int err;
1276 
1277 	err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity",
1278 				       &affinity_id);
1279 	if (err)
1280 		cpumask_copy(&msc->accessibility, cpu_possible_mask);
1281 	else
1282 		acpi_pptt_get_cpus_from_container(affinity_id, &msc->accessibility);
1283 }
1284 
1285 /*
1286  * There are two ways of reaching a struct mpam_msc_ris. Via the
1287  * class->component->vmsc->ris, or via the msc.
1288  * When destroying the msc, the other side needs unlinking and cleaning up too.
1289  */
1290 static void mpam_msc_destroy(struct mpam_msc *msc)
1291 {
1292 	struct platform_device *pdev = msc->pdev;
1293 	struct mpam_msc_ris *ris, *tmp;
1294 
1295 	lockdep_assert_held(&mpam_list_lock);
1296 
1297 	list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list)
1298 		mpam_ris_destroy(ris);
1299 
1300 	list_del_rcu(&msc->all_msc_list);
1301 	platform_set_drvdata(pdev, NULL);
1302 
1303 	add_to_garbage(msc);
1304 }
1305 
1306 static void mpam_msc_drv_remove(struct platform_device *pdev)
1307 {
1308 	struct mpam_msc *msc = platform_get_drvdata(pdev);
1309 
1310 	mutex_lock(&mpam_list_lock);
1311 	mpam_msc_destroy(msc);
1312 	mutex_unlock(&mpam_list_lock);
1313 
1314 	mpam_free_garbage();
1315 }
1316 
1317 static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev)
1318 {
1319 	int err;
1320 	u32 tmp;
1321 	struct mpam_msc *msc;
1322 	struct resource *msc_res;
1323 	struct device *dev = &pdev->dev;
1324 
1325 	lockdep_assert_held(&mpam_list_lock);
1326 
1327 	msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL);
1328 	if (!msc)
1329 		return ERR_PTR(-ENOMEM);
1330 	init_garbage(&msc->garbage);
1331 	msc->garbage.pdev = pdev;
1332 
1333 	err = devm_mutex_init(dev, &msc->probe_lock);
1334 	if (err)
1335 		return ERR_PTR(err);
1336 
1337 	err = devm_mutex_init(dev, &msc->part_sel_lock);
1338 	if (err)
1339 		return ERR_PTR(err);
1340 
1341 	err = devm_mutex_init(dev, &msc->error_irq_lock);
1342 	if (err)
1343 		return ERR_PTR(err);
1344 
1345 	err = devm_mutex_init(dev, &msc->cfg_lock);
1346 	if (err)
1347 		return ERR_PTR(err);
1348 
1349 	mpam_mon_sel_lock_init(msc);
1350 	msc->id = pdev->id;
1351 	msc->pdev = pdev;
1352 	INIT_LIST_HEAD_RCU(&msc->all_msc_list);
1353 	INIT_LIST_HEAD_RCU(&msc->ris);
1354 
1355 	update_msc_accessibility(msc);
1356 	if (cpumask_empty(&msc->accessibility)) {
1357 		dev_err_once(dev, "MSC is not accessible from any CPU!");
1358 		return ERR_PTR(-EINVAL);
1359 	}
1360 
1361 	err = mpam_msc_setup_error_irq(msc);
1362 	if (err)
1363 		return ERR_PTR(err);
1364 
1365 	if (device_property_read_u32(&pdev->dev, "pcc-channel", &tmp))
1366 		msc->iface = MPAM_IFACE_MMIO;
1367 	else
1368 		msc->iface = MPAM_IFACE_PCC;
1369 
1370 	if (msc->iface == MPAM_IFACE_MMIO) {
1371 		void __iomem *io;
1372 
1373 		io = devm_platform_get_and_ioremap_resource(pdev, 0,
1374 							    &msc_res);
1375 		if (IS_ERR(io)) {
1376 			dev_err_once(dev, "Failed to map MSC base address\n");
1377 			return ERR_CAST(io);
1378 		}
1379 		msc->mapped_hwpage_sz = msc_res->end - msc_res->start;
1380 		msc->mapped_hwpage = io;
1381 	} else {
1382 		return ERR_PTR(-EINVAL);
1383 	}
1384 
1385 	list_add_rcu(&msc->all_msc_list, &mpam_all_msc);
1386 	platform_set_drvdata(pdev, msc);
1387 
1388 	return msc;
1389 }
1390 
1391 static int fw_num_msc;
1392 
1393 static int mpam_msc_drv_probe(struct platform_device *pdev)
1394 {
1395 	int err;
1396 	struct mpam_msc *msc = NULL;
1397 	void *plat_data = pdev->dev.platform_data;
1398 
1399 	mutex_lock(&mpam_list_lock);
1400 	msc = do_mpam_msc_drv_probe(pdev);
1401 	mutex_unlock(&mpam_list_lock);
1402 
1403 	if (IS_ERR(msc))
1404 		return PTR_ERR(msc);
1405 
1406 	/* Create RIS entries described by firmware */
1407 	err = acpi_mpam_parse_resources(msc, plat_data);
1408 	if (err) {
1409 		mpam_msc_drv_remove(pdev);
1410 		return err;
1411 	}
1412 
1413 	if (atomic_add_return(1, &mpam_num_msc) == fw_num_msc)
1414 		mpam_register_cpuhp_callbacks(mpam_discovery_cpu_online, NULL,
1415 					      "mpam:drv_probe");
1416 
1417 	return 0;
1418 }
1419 
1420 static struct platform_driver mpam_msc_driver = {
1421 	.driver = {
1422 		.name = "mpam_msc",
1423 	},
1424 	.probe = mpam_msc_drv_probe,
1425 	.remove = mpam_msc_drv_remove,
1426 };
1427 
1428 /* Any of these features mean the BWA_WD field is valid. */
1429 static bool mpam_has_bwa_wd_feature(struct mpam_props *props)
1430 {
1431 	if (mpam_has_feature(mpam_feat_mbw_min, props))
1432 		return true;
1433 	if (mpam_has_feature(mpam_feat_mbw_max, props))
1434 		return true;
1435 	if (mpam_has_feature(mpam_feat_mbw_prop, props))
1436 		return true;
1437 	return false;
1438 }
1439 
1440 /* Any of these features mean the CMAX_WD field is valid. */
1441 static bool mpam_has_cmax_wd_feature(struct mpam_props *props)
1442 {
1443 	if (mpam_has_feature(mpam_feat_cmax_cmax, props))
1444 		return true;
1445 	if (mpam_has_feature(mpam_feat_cmax_cmin, props))
1446 		return true;
1447 	return false;
1448 }
1449 
1450 #define MISMATCHED_HELPER(parent, child, helper, field, alias)		\
1451 	helper(parent) &&						\
1452 	((helper(child) && (parent)->field != (child)->field) ||	\
1453 	 (!helper(child) && !(alias)))
1454 
1455 #define MISMATCHED_FEAT(parent, child, feat, field, alias)		     \
1456 	mpam_has_feature((feat), (parent)) &&				     \
1457 	((mpam_has_feature((feat), (child)) && (parent)->field != (child)->field) || \
1458 	 (!mpam_has_feature((feat), (child)) && !(alias)))
1459 
1460 #define CAN_MERGE_FEAT(parent, child, feat, alias)			\
1461 	(alias) && !mpam_has_feature((feat), (parent)) &&		\
1462 	mpam_has_feature((feat), (child))
1463 
1464 /*
1465  * Combine two props fields.
1466  * If this is for controls that alias the same resource, it is safe to just
1467  * copy the values over. If two aliasing controls implement the same scheme
1468  * a safe value must be picked.
1469  * For non-aliasing controls, these control different resources, and the
1470  * resulting safe value must be compatible with both. When merging values in
1471  * the tree, all the aliasing resources must be handled first.
1472  * On mismatch, parent is modified.
1473  */
1474 static void __props_mismatch(struct mpam_props *parent,
1475 			     struct mpam_props *child, bool alias)
1476 {
1477 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_cpor_part, alias)) {
1478 		parent->cpbm_wd = child->cpbm_wd;
1479 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_cpor_part,
1480 				   cpbm_wd, alias)) {
1481 		pr_debug("cleared cpor_part\n");
1482 		mpam_clear_feature(mpam_feat_cpor_part, parent);
1483 		parent->cpbm_wd = 0;
1484 	}
1485 
1486 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_mbw_part, alias)) {
1487 		parent->mbw_pbm_bits = child->mbw_pbm_bits;
1488 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_mbw_part,
1489 				   mbw_pbm_bits, alias)) {
1490 		pr_debug("cleared mbw_part\n");
1491 		mpam_clear_feature(mpam_feat_mbw_part, parent);
1492 		parent->mbw_pbm_bits = 0;
1493 	}
1494 
1495 	/* bwa_wd is a count of bits, fewer bits means less precision */
1496 	if (alias && !mpam_has_bwa_wd_feature(parent) &&
1497 	    mpam_has_bwa_wd_feature(child)) {
1498 		parent->bwa_wd = child->bwa_wd;
1499 	} else if (MISMATCHED_HELPER(parent, child, mpam_has_bwa_wd_feature,
1500 				     bwa_wd, alias)) {
1501 		pr_debug("took the min bwa_wd\n");
1502 		parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd);
1503 	}
1504 
1505 	if (alias && !mpam_has_cmax_wd_feature(parent) && mpam_has_cmax_wd_feature(child)) {
1506 		parent->cmax_wd = child->cmax_wd;
1507 	} else if (MISMATCHED_HELPER(parent, child, mpam_has_cmax_wd_feature,
1508 				     cmax_wd, alias)) {
1509 		pr_debug("%s took the min cmax_wd\n", __func__);
1510 		parent->cmax_wd = min(parent->cmax_wd, child->cmax_wd);
1511 	}
1512 
1513 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_cmax_cassoc, alias)) {
1514 		parent->cassoc_wd = child->cassoc_wd;
1515 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_cmax_cassoc,
1516 				   cassoc_wd, alias)) {
1517 		pr_debug("%s cleared cassoc_wd\n", __func__);
1518 		mpam_clear_feature(mpam_feat_cmax_cassoc, parent);
1519 		parent->cassoc_wd = 0;
1520 	}
1521 
1522 	/* For num properties, take the minimum */
1523 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_csu, alias)) {
1524 		parent->num_csu_mon = child->num_csu_mon;
1525 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_csu,
1526 				   num_csu_mon, alias)) {
1527 		pr_debug("took the min num_csu_mon\n");
1528 		parent->num_csu_mon = min(parent->num_csu_mon,
1529 					  child->num_csu_mon);
1530 	}
1531 
1532 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_mbwu, alias)) {
1533 		parent->num_mbwu_mon = child->num_mbwu_mon;
1534 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_mbwu,
1535 				   num_mbwu_mon, alias)) {
1536 		pr_debug("took the min num_mbwu_mon\n");
1537 		parent->num_mbwu_mon = min(parent->num_mbwu_mon,
1538 					   child->num_mbwu_mon);
1539 	}
1540 
1541 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_intpri_part, alias)) {
1542 		parent->intpri_wd = child->intpri_wd;
1543 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_intpri_part,
1544 				   intpri_wd, alias)) {
1545 		pr_debug("%s took the min intpri_wd\n", __func__);
1546 		parent->intpri_wd = min(parent->intpri_wd, child->intpri_wd);
1547 	}
1548 
1549 	if (CAN_MERGE_FEAT(parent, child, mpam_feat_dspri_part, alias)) {
1550 		parent->dspri_wd = child->dspri_wd;
1551 	} else if (MISMATCHED_FEAT(parent, child, mpam_feat_dspri_part,
1552 				   dspri_wd, alias)) {
1553 		pr_debug("%s took the min dspri_wd\n", __func__);
1554 		parent->dspri_wd = min(parent->dspri_wd, child->dspri_wd);
1555 	}
1556 
1557 	/* TODO: alias support for these two */
1558 	/* {int,ds}pri may not have differing 0-low behaviour */
1559 	if (mpam_has_feature(mpam_feat_intpri_part, parent) &&
1560 	    (!mpam_has_feature(mpam_feat_intpri_part, child) ||
1561 	     mpam_has_feature(mpam_feat_intpri_part_0_low, parent) !=
1562 	     mpam_has_feature(mpam_feat_intpri_part_0_low, child))) {
1563 		pr_debug("%s cleared intpri_part\n", __func__);
1564 		mpam_clear_feature(mpam_feat_intpri_part, parent);
1565 		mpam_clear_feature(mpam_feat_intpri_part_0_low, parent);
1566 	}
1567 	if (mpam_has_feature(mpam_feat_dspri_part, parent) &&
1568 	    (!mpam_has_feature(mpam_feat_dspri_part, child) ||
1569 	     mpam_has_feature(mpam_feat_dspri_part_0_low, parent) !=
1570 	     mpam_has_feature(mpam_feat_dspri_part_0_low, child))) {
1571 		pr_debug("%s cleared dspri_part\n", __func__);
1572 		mpam_clear_feature(mpam_feat_dspri_part, parent);
1573 		mpam_clear_feature(mpam_feat_dspri_part_0_low, parent);
1574 	}
1575 
1576 	if (alias) {
1577 		/* Merge features for aliased resources */
1578 		bitmap_or(parent->features, parent->features, child->features, MPAM_FEATURE_LAST);
1579 	} else {
1580 		/* Clear missing features for non aliasing */
1581 		bitmap_and(parent->features, parent->features, child->features, MPAM_FEATURE_LAST);
1582 	}
1583 }
1584 
1585 /*
1586  * If a vmsc doesn't match class feature/configuration, do the right thing(tm).
1587  * For 'num' properties we can just take the minimum.
1588  * For properties where the mismatched unused bits would make a difference, we
1589  * nobble the class feature, as we can't configure all the resources.
1590  * e.g. The L3 cache is composed of two resources with 13 and 17 portion
1591  * bitmaps respectively.
1592  */
1593 static void
1594 __class_props_mismatch(struct mpam_class *class, struct mpam_vmsc *vmsc)
1595 {
1596 	struct mpam_props *cprops = &class->props;
1597 	struct mpam_props *vprops = &vmsc->props;
1598 	struct device *dev = &vmsc->msc->pdev->dev;
1599 
1600 	lockdep_assert_held(&mpam_list_lock); /* we modify class */
1601 
1602 	dev_dbg(dev, "Merging features for class:0x%lx &= vmsc:0x%lx\n",
1603 		(long)cprops->features, (long)vprops->features);
1604 
1605 	/* Take the safe value for any common features */
1606 	__props_mismatch(cprops, vprops, false);
1607 }
1608 
1609 static void
1610 __vmsc_props_mismatch(struct mpam_vmsc *vmsc, struct mpam_msc_ris *ris)
1611 {
1612 	struct mpam_props *rprops = &ris->props;
1613 	struct mpam_props *vprops = &vmsc->props;
1614 	struct device *dev = &vmsc->msc->pdev->dev;
1615 
1616 	lockdep_assert_held(&mpam_list_lock); /* we modify vmsc */
1617 
1618 	dev_dbg(dev, "Merging features for vmsc:0x%lx |= ris:0x%lx\n",
1619 		(long)vprops->features, (long)rprops->features);
1620 
1621 	/*
1622 	 * Merge mismatched features - Copy any features that aren't common,
1623 	 * but take the safe value for any common features.
1624 	 */
1625 	__props_mismatch(vprops, rprops, true);
1626 }
1627 
1628 /*
1629  * Copy the first component's first vMSC's properties and features to the
1630  * class. __class_props_mismatch() will remove conflicts.
1631  * It is not possible to have a class with no components, or a component with
1632  * no resources. The vMSC properties have already been built.
1633  */
1634 static void mpam_enable_init_class_features(struct mpam_class *class)
1635 {
1636 	struct mpam_vmsc *vmsc;
1637 	struct mpam_component *comp;
1638 
1639 	comp = list_first_entry(&class->components,
1640 				struct mpam_component, class_list);
1641 	vmsc = list_first_entry(&comp->vmsc,
1642 				struct mpam_vmsc, comp_list);
1643 
1644 	class->props = vmsc->props;
1645 }
1646 
1647 static void mpam_enable_merge_vmsc_features(struct mpam_component *comp)
1648 {
1649 	struct mpam_vmsc *vmsc;
1650 	struct mpam_msc_ris *ris;
1651 	struct mpam_class *class = comp->class;
1652 
1653 	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
1654 		list_for_each_entry(ris, &vmsc->ris, vmsc_list) {
1655 			__vmsc_props_mismatch(vmsc, ris);
1656 			class->nrdy_usec = max(class->nrdy_usec,
1657 					       vmsc->msc->nrdy_usec);
1658 		}
1659 	}
1660 }
1661 
1662 static void mpam_enable_merge_class_features(struct mpam_component *comp)
1663 {
1664 	struct mpam_vmsc *vmsc;
1665 	struct mpam_class *class = comp->class;
1666 
1667 	list_for_each_entry(vmsc, &comp->vmsc, comp_list)
1668 		__class_props_mismatch(class, vmsc);
1669 }
1670 
1671 /*
1672  * Merge all the common resource features into class.
1673  * vmsc features are bitwise-or'd together by mpam_enable_merge_vmsc_features()
1674  * as the first step so that mpam_enable_init_class_features() can initialise
1675  * the class with a representative set of features.
1676  * Next the mpam_enable_merge_class_features() bitwise-and's all the vmsc
1677  * features to form the class features.
1678  * Other features are the min/max as appropriate.
1679  *
1680  * To avoid walking the whole tree twice, the class->nrdy_usec property is
1681  * updated when working with the vmsc as it is a max(), and doesn't need
1682  * initialising first.
1683  */
1684 static void mpam_enable_merge_features(struct list_head *all_classes_list)
1685 {
1686 	struct mpam_class *class;
1687 	struct mpam_component *comp;
1688 
1689 	lockdep_assert_held(&mpam_list_lock);
1690 
1691 	list_for_each_entry(class, all_classes_list, classes_list) {
1692 		list_for_each_entry(comp, &class->components, class_list)
1693 			mpam_enable_merge_vmsc_features(comp);
1694 
1695 		mpam_enable_init_class_features(class);
1696 
1697 		list_for_each_entry(comp, &class->components, class_list)
1698 			mpam_enable_merge_class_features(comp);
1699 	}
1700 }
1701 
1702 static char *mpam_errcode_names[16] = {
1703 	[MPAM_ERRCODE_NONE]			= "No error",
1704 	[MPAM_ERRCODE_PARTID_SEL_RANGE]		= "PARTID_SEL_Range",
1705 	[MPAM_ERRCODE_REQ_PARTID_RANGE]		= "Req_PARTID_Range",
1706 	[MPAM_ERRCODE_MSMONCFG_ID_RANGE]	= "MSMONCFG_ID_RANGE",
1707 	[MPAM_ERRCODE_REQ_PMG_RANGE]		= "Req_PMG_Range",
1708 	[MPAM_ERRCODE_MONITOR_RANGE]		= "Monitor_Range",
1709 	[MPAM_ERRCODE_INTPARTID_RANGE]		= "intPARTID_Range",
1710 	[MPAM_ERRCODE_UNEXPECTED_INTERNAL]	= "Unexpected_INTERNAL",
1711 	[MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL]	= "Undefined_RIS_PART_SEL",
1712 	[MPAM_ERRCODE_RIS_NO_CONTROL]		= "RIS_No_Control",
1713 	[MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL]	= "Undefined_RIS_MON_SEL",
1714 	[MPAM_ERRCODE_RIS_NO_MONITOR]		= "RIS_No_Monitor",
1715 	[12 ... 15] = "Reserved"
1716 };
1717 
1718 static int mpam_enable_msc_ecr(void *_msc)
1719 {
1720 	struct mpam_msc *msc = _msc;
1721 
1722 	__mpam_write_reg(msc, MPAMF_ECR, MPAMF_ECR_INTEN);
1723 
1724 	return 0;
1725 }
1726 
1727 /* This can run in mpam_disable(), and the interrupt handler on the same CPU */
1728 static int mpam_disable_msc_ecr(void *_msc)
1729 {
1730 	struct mpam_msc *msc = _msc;
1731 
1732 	__mpam_write_reg(msc, MPAMF_ECR, 0);
1733 
1734 	return 0;
1735 }
1736 
1737 static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc)
1738 {
1739 	u64 reg;
1740 	u16 partid;
1741 	u8 errcode, pmg, ris;
1742 
1743 	if (WARN_ON_ONCE(!msc) ||
1744 	    WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
1745 					   &msc->accessibility)))
1746 		return IRQ_NONE;
1747 
1748 	reg = mpam_msc_read_esr(msc);
1749 
1750 	errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg);
1751 	if (!errcode)
1752 		return IRQ_NONE;
1753 
1754 	/* Clear level triggered irq */
1755 	mpam_msc_clear_esr(msc);
1756 
1757 	partid = FIELD_GET(MPAMF_ESR_PARTID_MON, reg);
1758 	pmg = FIELD_GET(MPAMF_ESR_PMG, reg);
1759 	ris = FIELD_GET(MPAMF_ESR_RIS, reg);
1760 
1761 	pr_err_ratelimited("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n",
1762 			   msc->id, mpam_errcode_names[errcode], partid, pmg,
1763 			   ris);
1764 
1765 	/* Disable this interrupt. */
1766 	mpam_disable_msc_ecr(msc);
1767 
1768 	/* Are we racing with the thread disabling MPAM? */
1769 	if (!mpam_is_enabled())
1770 		return IRQ_HANDLED;
1771 
1772 	/*
1773 	 * Schedule the teardown work. Don't use a threaded IRQ as we can't
1774 	 * unregister the interrupt from the threaded part of the handler.
1775 	 */
1776 	mpam_disable_reason = "hardware error interrupt";
1777 	schedule_work(&mpam_broken_work);
1778 
1779 	return IRQ_HANDLED;
1780 }
1781 
1782 static irqreturn_t mpam_ppi_handler(int irq, void *dev_id)
1783 {
1784 	struct mpam_msc *msc = *(struct mpam_msc **)dev_id;
1785 
1786 	return __mpam_irq_handler(irq, msc);
1787 }
1788 
1789 static irqreturn_t mpam_spi_handler(int irq, void *dev_id)
1790 {
1791 	struct mpam_msc *msc = dev_id;
1792 
1793 	return __mpam_irq_handler(irq, msc);
1794 }
1795 
1796 static int mpam_register_irqs(void)
1797 {
1798 	int err, irq;
1799 	struct mpam_msc *msc;
1800 
1801 	lockdep_assert_cpus_held();
1802 
1803 	guard(srcu)(&mpam_srcu);
1804 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1805 				 srcu_read_lock_held(&mpam_srcu)) {
1806 		irq = platform_get_irq_byname_optional(msc->pdev, "error");
1807 		if (irq <= 0)
1808 			continue;
1809 
1810 		/* The MPAM spec says the interrupt can be SPI, PPI or LPI */
1811 		/* We anticipate sharing the interrupt with other MSCs */
1812 		if (irq_is_percpu(irq)) {
1813 			err = request_percpu_irq(irq, &mpam_ppi_handler,
1814 						 "mpam:msc:error",
1815 						 msc->error_dev_id);
1816 			if (err)
1817 				return err;
1818 
1819 			msc->reenable_error_ppi = irq;
1820 			smp_call_function_many(&msc->accessibility,
1821 					       &_enable_percpu_irq, &irq,
1822 					       true);
1823 		} else {
1824 			err = devm_request_irq(&msc->pdev->dev, irq,
1825 					       &mpam_spi_handler, IRQF_SHARED,
1826 					       "mpam:msc:error", msc);
1827 			if (err)
1828 				return err;
1829 		}
1830 
1831 		mutex_lock(&msc->error_irq_lock);
1832 		msc->error_irq_req = true;
1833 		mpam_touch_msc(msc, mpam_enable_msc_ecr, msc);
1834 		msc->error_irq_hw_enabled = true;
1835 		mutex_unlock(&msc->error_irq_lock);
1836 	}
1837 
1838 	return 0;
1839 }
1840 
1841 static void mpam_unregister_irqs(void)
1842 {
1843 	int irq;
1844 	struct mpam_msc *msc;
1845 
1846 	guard(cpus_read_lock)();
1847 	guard(srcu)(&mpam_srcu);
1848 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
1849 				 srcu_read_lock_held(&mpam_srcu)) {
1850 		irq = platform_get_irq_byname_optional(msc->pdev, "error");
1851 		if (irq <= 0)
1852 			continue;
1853 
1854 		mutex_lock(&msc->error_irq_lock);
1855 		if (msc->error_irq_hw_enabled) {
1856 			mpam_touch_msc(msc, mpam_disable_msc_ecr, msc);
1857 			msc->error_irq_hw_enabled = false;
1858 		}
1859 
1860 		if (msc->error_irq_req) {
1861 			if (irq_is_percpu(irq)) {
1862 				msc->reenable_error_ppi = 0;
1863 				free_percpu_irq(irq, msc->error_dev_id);
1864 			} else {
1865 				devm_free_irq(&msc->pdev->dev, irq, msc);
1866 			}
1867 			msc->error_irq_req = false;
1868 		}
1869 		mutex_unlock(&msc->error_irq_lock);
1870 	}
1871 }
1872 
1873 static void __destroy_component_cfg(struct mpam_component *comp)
1874 {
1875 	add_to_garbage(comp->cfg);
1876 }
1877 
1878 static void mpam_reset_component_cfg(struct mpam_component *comp)
1879 {
1880 	int i;
1881 	struct mpam_props *cprops = &comp->class->props;
1882 
1883 	mpam_assert_partid_sizes_fixed();
1884 
1885 	if (!comp->cfg)
1886 		return;
1887 
1888 	for (i = 0; i <= mpam_partid_max; i++) {
1889 		comp->cfg[i] = (struct mpam_config) {};
1890 		if (cprops->cpbm_wd)
1891 			comp->cfg[i].cpbm = GENMASK(cprops->cpbm_wd - 1, 0);
1892 		if (cprops->mbw_pbm_bits)
1893 			comp->cfg[i].mbw_pbm = GENMASK(cprops->mbw_pbm_bits - 1, 0);
1894 		if (cprops->bwa_wd)
1895 			comp->cfg[i].mbw_max = GENMASK(15, 16 - cprops->bwa_wd);
1896 	}
1897 }
1898 
1899 static int __allocate_component_cfg(struct mpam_component *comp)
1900 {
1901 	mpam_assert_partid_sizes_fixed();
1902 
1903 	if (comp->cfg)
1904 		return 0;
1905 
1906 	comp->cfg = kcalloc(mpam_partid_max + 1, sizeof(*comp->cfg), GFP_KERNEL);
1907 	if (!comp->cfg)
1908 		return -ENOMEM;
1909 
1910 	/*
1911 	 * The array is free()d in one go, so only cfg[0]'s structure needs
1912 	 * to be initialised.
1913 	 */
1914 	init_garbage(&comp->cfg[0].garbage);
1915 
1916 	mpam_reset_component_cfg(comp);
1917 
1918 	return 0;
1919 }
1920 
1921 static int mpam_allocate_config(void)
1922 {
1923 	struct mpam_class *class;
1924 	struct mpam_component *comp;
1925 
1926 	lockdep_assert_held(&mpam_list_lock);
1927 
1928 	list_for_each_entry(class, &mpam_classes, classes_list) {
1929 		list_for_each_entry(comp, &class->components, class_list) {
1930 			int err = __allocate_component_cfg(comp);
1931 			if (err)
1932 				return err;
1933 		}
1934 	}
1935 
1936 	return 0;
1937 }
1938 
1939 static void mpam_enable_once(void)
1940 {
1941 	int err;
1942 
1943 	/*
1944 	 * Once the cpuhp callbacks have been changed, mpam_partid_max can no
1945 	 * longer change.
1946 	 */
1947 	spin_lock(&partid_max_lock);
1948 	partid_max_published = true;
1949 	spin_unlock(&partid_max_lock);
1950 
1951 	/*
1952 	 * If all the MSC have been probed, enabling the IRQs happens next.
1953 	 * That involves cross-calling to a CPU that can reach the MSC, and
1954 	 * the locks must be taken in this order:
1955 	 */
1956 	cpus_read_lock();
1957 	mutex_lock(&mpam_list_lock);
1958 	do {
1959 		mpam_enable_merge_features(&mpam_classes);
1960 
1961 		err = mpam_register_irqs();
1962 		if (err) {
1963 			pr_warn("Failed to register irqs: %d\n", err);
1964 			break;
1965 		}
1966 
1967 		err = mpam_allocate_config();
1968 		if (err) {
1969 			pr_err("Failed to allocate configuration arrays.\n");
1970 			break;
1971 		}
1972 	} while (0);
1973 	mutex_unlock(&mpam_list_lock);
1974 	cpus_read_unlock();
1975 
1976 	if (err) {
1977 		mpam_disable_reason = "Failed to enable.";
1978 		schedule_work(&mpam_broken_work);
1979 		return;
1980 	}
1981 
1982 	static_branch_enable(&mpam_enabled);
1983 	mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline,
1984 				      "mpam:online");
1985 
1986 	/* Use printk() to avoid the pr_fmt adding the function name. */
1987 	printk(KERN_INFO "MPAM enabled with %u PARTIDs and %u PMGs\n",
1988 	       mpam_partid_max + 1, mpam_pmg_max + 1);
1989 }
1990 
1991 static void mpam_reset_component_locked(struct mpam_component *comp)
1992 {
1993 	struct mpam_vmsc *vmsc;
1994 
1995 	lockdep_assert_cpus_held();
1996 	mpam_assert_partid_sizes_fixed();
1997 
1998 	mpam_reset_component_cfg(comp);
1999 
2000 	guard(srcu)(&mpam_srcu);
2001 	list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list,
2002 				 srcu_read_lock_held(&mpam_srcu)) {
2003 		struct mpam_msc *msc = vmsc->msc;
2004 		struct mpam_msc_ris *ris;
2005 
2006 		list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list,
2007 					 srcu_read_lock_held(&mpam_srcu)) {
2008 			if (!ris->in_reset_state)
2009 				mpam_touch_msc(msc, mpam_reset_ris, ris);
2010 			ris->in_reset_state = true;
2011 		}
2012 	}
2013 }
2014 
2015 static void mpam_reset_class_locked(struct mpam_class *class)
2016 {
2017 	struct mpam_component *comp;
2018 
2019 	lockdep_assert_cpus_held();
2020 
2021 	guard(srcu)(&mpam_srcu);
2022 	list_for_each_entry_srcu(comp, &class->components, class_list,
2023 				 srcu_read_lock_held(&mpam_srcu))
2024 		mpam_reset_component_locked(comp);
2025 }
2026 
2027 static void mpam_reset_class(struct mpam_class *class)
2028 {
2029 	cpus_read_lock();
2030 	mpam_reset_class_locked(class);
2031 	cpus_read_unlock();
2032 }
2033 
2034 /*
2035  * Called in response to an error IRQ.
2036  * All of MPAMs errors indicate a software bug, restore any modified
2037  * controls to their reset values.
2038  */
2039 void mpam_disable(struct work_struct *ignored)
2040 {
2041 	int idx;
2042 	struct mpam_class *class;
2043 	struct mpam_msc *msc, *tmp;
2044 
2045 	mutex_lock(&mpam_cpuhp_state_lock);
2046 	if (mpam_cpuhp_state) {
2047 		cpuhp_remove_state(mpam_cpuhp_state);
2048 		mpam_cpuhp_state = 0;
2049 	}
2050 	mutex_unlock(&mpam_cpuhp_state_lock);
2051 
2052 	static_branch_disable(&mpam_enabled);
2053 
2054 	mpam_unregister_irqs();
2055 
2056 	idx = srcu_read_lock(&mpam_srcu);
2057 	list_for_each_entry_srcu(class, &mpam_classes, classes_list,
2058 				 srcu_read_lock_held(&mpam_srcu))
2059 		mpam_reset_class(class);
2060 	srcu_read_unlock(&mpam_srcu, idx);
2061 
2062 	mutex_lock(&mpam_list_lock);
2063 	list_for_each_entry_safe(msc, tmp, &mpam_all_msc, all_msc_list)
2064 		mpam_msc_destroy(msc);
2065 	mutex_unlock(&mpam_list_lock);
2066 	mpam_free_garbage();
2067 
2068 	pr_err_once("MPAM disabled due to %s\n", mpam_disable_reason);
2069 }
2070 
2071 /*
2072  * Enable mpam once all devices have been probed.
2073  * Scheduled by mpam_discovery_cpu_online() once all devices have been created.
2074  * Also scheduled when new devices are probed when new CPUs come online.
2075  */
2076 void mpam_enable(struct work_struct *work)
2077 {
2078 	static atomic_t once;
2079 	struct mpam_msc *msc;
2080 	bool all_devices_probed = true;
2081 
2082 	/* Have we probed all the hw devices? */
2083 	guard(srcu)(&mpam_srcu);
2084 	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
2085 				 srcu_read_lock_held(&mpam_srcu)) {
2086 		mutex_lock(&msc->probe_lock);
2087 		if (!msc->probed)
2088 			all_devices_probed = false;
2089 		mutex_unlock(&msc->probe_lock);
2090 
2091 		if (!all_devices_probed)
2092 			break;
2093 	}
2094 
2095 	if (all_devices_probed && !atomic_fetch_inc(&once))
2096 		mpam_enable_once();
2097 }
2098 
2099 #define maybe_update_config(cfg, feature, newcfg, member, changes) do { \
2100 	if (mpam_has_feature(feature, newcfg) &&			\
2101 	    (newcfg)->member != (cfg)->member) {			\
2102 		(cfg)->member = (newcfg)->member;			\
2103 		mpam_set_feature(feature, cfg);				\
2104 									\
2105 		(changes) = true;					\
2106 	}								\
2107 } while (0)
2108 
2109 static bool mpam_update_config(struct mpam_config *cfg,
2110 			       const struct mpam_config *newcfg)
2111 {
2112 	bool has_changes = false;
2113 
2114 	maybe_update_config(cfg, mpam_feat_cpor_part, newcfg, cpbm, has_changes);
2115 	maybe_update_config(cfg, mpam_feat_mbw_part, newcfg, mbw_pbm, has_changes);
2116 	maybe_update_config(cfg, mpam_feat_mbw_max, newcfg, mbw_max, has_changes);
2117 
2118 	return has_changes;
2119 }
2120 
2121 int mpam_apply_config(struct mpam_component *comp, u16 partid,
2122 		      struct mpam_config *cfg)
2123 {
2124 	struct mpam_write_config_arg arg;
2125 	struct mpam_msc_ris *ris;
2126 	struct mpam_vmsc *vmsc;
2127 	struct mpam_msc *msc;
2128 
2129 	lockdep_assert_cpus_held();
2130 
2131 	/* Don't pass in the current config! */
2132 	WARN_ON_ONCE(&comp->cfg[partid] == cfg);
2133 
2134 	if (!mpam_update_config(&comp->cfg[partid], cfg))
2135 		return 0;
2136 
2137 	arg.comp = comp;
2138 	arg.partid = partid;
2139 
2140 	guard(srcu)(&mpam_srcu);
2141 	list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list,
2142 				 srcu_read_lock_held(&mpam_srcu)) {
2143 		msc = vmsc->msc;
2144 
2145 		mutex_lock(&msc->cfg_lock);
2146 		list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list,
2147 					 srcu_read_lock_held(&mpam_srcu)) {
2148 			arg.ris = ris;
2149 			mpam_touch_msc(msc, __write_config, &arg);
2150 		}
2151 		mutex_unlock(&msc->cfg_lock);
2152 	}
2153 
2154 	return 0;
2155 }
2156 
2157 static int __init mpam_msc_driver_init(void)
2158 {
2159 	if (!system_supports_mpam())
2160 		return -EOPNOTSUPP;
2161 
2162 	init_srcu_struct(&mpam_srcu);
2163 
2164 	fw_num_msc = acpi_mpam_count_msc();
2165 	if (fw_num_msc <= 0) {
2166 		pr_err("No MSC devices found in firmware\n");
2167 		return -EINVAL;
2168 	}
2169 
2170 	return platform_driver_register(&mpam_msc_driver);
2171 }
2172 
2173 /* Must occur after arm64_mpam_register_cpus() from arch_initcall() */
2174 subsys_initcall(mpam_msc_driver_init);
2175