xref: /linux/fs/resctrl/monitor.c (revision d0891647fbc6e931f27517364cbc4ee1811d76db)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Resource Director Technology(RDT)
4  * - Monitoring code
5  *
6  * Copyright (C) 2017 Intel Corporation
7  *
8  * Author:
9  *    Vikas Shivappa <vikas.shivappa@intel.com>
10  *
11  * This replaces the cqm.c based on perf but we reuse a lot of
12  * code and datastructures originally from Peter Zijlstra and Matt Fleming.
13  *
14  * More information about RDT be found in the Intel (R) x86 Architecture
15  * Software Developer Manual June 2016, volume 3, section 17.17.
16  */
17 
18 #define pr_fmt(fmt)	"resctrl: " fmt
19 
20 #include <linux/cpu.h>
21 #include <linux/resctrl.h>
22 #include <linux/sizes.h>
23 #include <linux/slab.h>
24 
25 #include "internal.h"
26 
27 #define CREATE_TRACE_POINTS
28 
29 #include "monitor_trace.h"
30 
31 /**
32  * struct rmid_entry - dirty tracking for all RMID.
33  * @closid:	The CLOSID for this entry.
34  * @rmid:	The RMID for this entry.
35  * @busy:	The number of domains with cached data using this RMID.
36  * @list:	Member of the rmid_free_lru list when busy == 0.
37  *
38  * Depending on the architecture the correct monitor is accessed using
39  * both @closid and @rmid, or @rmid only.
40  *
41  * Take the rdtgroup_mutex when accessing.
42  */
43 struct rmid_entry {
44 	u32				closid;
45 	u32				rmid;
46 	int				busy;
47 	struct list_head		list;
48 };
49 
50 /*
51  * @rmid_free_lru - A least recently used list of free RMIDs
52  *     These RMIDs are guaranteed to have an occupancy less than the
53  *     threshold occupancy
54  */
55 static LIST_HEAD(rmid_free_lru);
56 
57 /*
58  * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
59  *     Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
60  *     Indexed by CLOSID. Protected by rdtgroup_mutex.
61  */
62 static u32 *closid_num_dirty_rmid;
63 
64 /*
65  * @rmid_limbo_count - count of currently unused but (potentially)
66  *     dirty RMIDs.
67  *     This counts RMIDs that no one is currently using but that
68  *     may have a occupancy value > resctrl_rmid_realloc_threshold. User can
69  *     change the threshold occupancy value.
70  */
71 static unsigned int rmid_limbo_count;
72 
73 /*
74  * @rmid_entry - The entry in the limbo and free lists.
75  */
76 static struct rmid_entry	*rmid_ptrs;
77 
78 /*
79  * This is the threshold cache occupancy in bytes at which we will consider an
80  * RMID available for re-allocation.
81  */
82 unsigned int resctrl_rmid_realloc_threshold;
83 
84 /*
85  * This is the maximum value for the reallocation threshold, in bytes.
86  */
87 unsigned int resctrl_rmid_realloc_limit;
88 
89 /*
90  * x86 and arm64 differ in their handling of monitoring.
91  * x86's RMID are independent numbers, there is only one source of traffic
92  * with an RMID value of '1'.
93  * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
94  * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
95  * value is no longer unique.
96  * To account for this, resctrl uses an index. On x86 this is just the RMID,
97  * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
98  *
99  * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
100  * must accept an attempt to read every index.
101  */
102 static inline struct rmid_entry *__rmid_entry(u32 idx)
103 {
104 	struct rmid_entry *entry;
105 	u32 closid, rmid;
106 
107 	entry = &rmid_ptrs[idx];
108 	resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
109 
110 	WARN_ON_ONCE(entry->closid != closid);
111 	WARN_ON_ONCE(entry->rmid != rmid);
112 
113 	return entry;
114 }
115 
116 static void limbo_release_entry(struct rmid_entry *entry)
117 {
118 	lockdep_assert_held(&rdtgroup_mutex);
119 
120 	rmid_limbo_count--;
121 	list_add_tail(&entry->list, &rmid_free_lru);
122 
123 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
124 		closid_num_dirty_rmid[entry->closid]--;
125 }
126 
127 /*
128  * Check the RMIDs that are marked as busy for this domain. If the
129  * reported LLC occupancy is below the threshold clear the busy bit and
130  * decrement the count. If the busy count gets to zero on an RMID, we
131  * free the RMID
132  */
133 void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free)
134 {
135 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
136 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
137 	struct rmid_entry *entry;
138 	u32 idx, cur_idx = 1;
139 	void *arch_mon_ctx;
140 	void *arch_priv;
141 	bool rmid_dirty;
142 	u64 val = 0;
143 
144 	arch_priv = mon_event_all[QOS_L3_OCCUP_EVENT_ID].arch_priv;
145 	arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
146 	if (IS_ERR(arch_mon_ctx)) {
147 		pr_warn_ratelimited("Failed to allocate monitor context: %ld",
148 				    PTR_ERR(arch_mon_ctx));
149 		return;
150 	}
151 
152 	/*
153 	 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
154 	 * are marked as busy for occupancy < threshold. If the occupancy
155 	 * is less than the threshold decrement the busy counter of the
156 	 * RMID and move it to the free list when the counter reaches 0.
157 	 */
158 	for (;;) {
159 		idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
160 		if (idx >= idx_limit)
161 			break;
162 
163 		entry = __rmid_entry(idx);
164 		if (resctrl_arch_rmid_read(r, &d->hdr, entry->closid, entry->rmid,
165 					   QOS_L3_OCCUP_EVENT_ID, arch_priv, &val,
166 					   arch_mon_ctx)) {
167 			rmid_dirty = true;
168 		} else {
169 			rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
170 
171 			/*
172 			 * x86's CLOSID and RMID are independent numbers, so the entry's
173 			 * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
174 			 * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
175 			 * used to select the configuration. It is thus necessary to track both
176 			 * CLOSID and RMID because there may be dependencies between them
177 			 * on some architectures.
178 			 */
179 			trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
180 		}
181 
182 		if (force_free || !rmid_dirty) {
183 			clear_bit(idx, d->rmid_busy_llc);
184 			if (!--entry->busy)
185 				limbo_release_entry(entry);
186 		}
187 		cur_idx = idx + 1;
188 	}
189 
190 	resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
191 }
192 
193 bool has_busy_rmid(struct rdt_l3_mon_domain *d)
194 {
195 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
196 
197 	return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
198 }
199 
200 static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
201 {
202 	struct rmid_entry *itr;
203 	u32 itr_idx, cmp_idx;
204 
205 	if (list_empty(&rmid_free_lru))
206 		return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
207 
208 	list_for_each_entry(itr, &rmid_free_lru, list) {
209 		/*
210 		 * Get the index of this free RMID, and the index it would need
211 		 * to be if it were used with this CLOSID.
212 		 * If the CLOSID is irrelevant on this architecture, the two
213 		 * index values are always the same on every entry and thus the
214 		 * very first entry will be returned.
215 		 */
216 		itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
217 		cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
218 
219 		if (itr_idx == cmp_idx)
220 			return itr;
221 	}
222 
223 	return ERR_PTR(-ENOSPC);
224 }
225 
226 /**
227  * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
228  *                                  RMID are clean, or the CLOSID that has
229  *                                  the most clean RMID.
230  *
231  * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
232  * may not be able to allocate clean RMID. To avoid this the allocator will
233  * choose the CLOSID with the most clean RMID.
234  *
235  * When the CLOSID and RMID are independent numbers, the first free CLOSID will
236  * be returned.
237  */
238 int resctrl_find_cleanest_closid(void)
239 {
240 	u32 cleanest_closid = ~0;
241 	int i = 0;
242 
243 	lockdep_assert_held(&rdtgroup_mutex);
244 
245 	if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
246 		return -EIO;
247 
248 	for (i = 0; i < closids_supported(); i++) {
249 		int num_dirty;
250 
251 		if (closid_allocated(i))
252 			continue;
253 
254 		num_dirty = closid_num_dirty_rmid[i];
255 		if (num_dirty == 0)
256 			return i;
257 
258 		if (cleanest_closid == ~0)
259 			cleanest_closid = i;
260 
261 		if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
262 			cleanest_closid = i;
263 	}
264 
265 	if (cleanest_closid == ~0)
266 		return -ENOSPC;
267 
268 	return cleanest_closid;
269 }
270 
271 /*
272  * For MPAM the RMID value is not unique, and has to be considered with
273  * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
274  * allows all domains to be managed by a single free list.
275  * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
276  */
277 int alloc_rmid(u32 closid)
278 {
279 	struct rmid_entry *entry;
280 
281 	lockdep_assert_held(&rdtgroup_mutex);
282 
283 	entry = resctrl_find_free_rmid(closid);
284 	if (IS_ERR(entry))
285 		return PTR_ERR(entry);
286 
287 	list_del(&entry->list);
288 	return entry->rmid;
289 }
290 
291 static void add_rmid_to_limbo(struct rmid_entry *entry)
292 {
293 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
294 	struct rdt_l3_mon_domain *d;
295 	u32 idx;
296 
297 	lockdep_assert_held(&rdtgroup_mutex);
298 
299 	/* Walking r->domains, ensure it can't race with cpuhp */
300 	lockdep_assert_cpus_held();
301 
302 	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
303 
304 	entry->busy = 0;
305 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
306 		/*
307 		 * For the first limbo RMID in the domain,
308 		 * setup up the limbo worker.
309 		 */
310 		if (!has_busy_rmid(d))
311 			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
312 						RESCTRL_PICK_ANY_CPU);
313 		set_bit(idx, d->rmid_busy_llc);
314 		entry->busy++;
315 	}
316 
317 	rmid_limbo_count++;
318 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
319 		closid_num_dirty_rmid[entry->closid]++;
320 }
321 
322 void free_rmid(u32 closid, u32 rmid)
323 {
324 	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
325 	struct rmid_entry *entry;
326 
327 	lockdep_assert_held(&rdtgroup_mutex);
328 
329 	/*
330 	 * Do not allow the default rmid to be free'd. Comparing by index
331 	 * allows architectures that ignore the closid parameter to avoid an
332 	 * unnecessary check.
333 	 */
334 	if (!resctrl_arch_mon_capable() ||
335 	    idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
336 						RESCTRL_RESERVED_RMID))
337 		return;
338 
339 	entry = __rmid_entry(idx);
340 
341 	if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
342 		add_rmid_to_limbo(entry);
343 	else
344 		list_add_tail(&entry->list, &rmid_free_lru);
345 }
346 
347 static struct mbm_state *get_mbm_state(struct rdt_l3_mon_domain *d, u32 closid,
348 				       u32 rmid, enum resctrl_event_id evtid)
349 {
350 	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
351 	struct mbm_state *state;
352 
353 	if (!resctrl_is_mbm_event(evtid))
354 		return NULL;
355 
356 	state = d->mbm_states[MBM_STATE_IDX(evtid)];
357 
358 	return state ? &state[idx] : NULL;
359 }
360 
361 /*
362  * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp.
363  *
364  * Return:
365  * Valid counter ID on success, or -ENOENT on failure.
366  */
367 static int mbm_cntr_get(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
368 			struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
369 {
370 	int cntr_id;
371 
372 	if (!r->mon.mbm_cntr_assignable)
373 		return -ENOENT;
374 
375 	if (!resctrl_is_mbm_event(evtid))
376 		return -ENOENT;
377 
378 	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
379 		if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
380 		    d->cntr_cfg[cntr_id].evtid == evtid)
381 			return cntr_id;
382 	}
383 
384 	return -ENOENT;
385 }
386 
387 /*
388  * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d.
389  * Caller must ensure that the specified event is not assigned already.
390  *
391  * Return:
392  * Valid counter ID on success, or -ENOSPC on failure.
393  */
394 static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
395 			  struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
396 {
397 	int cntr_id;
398 
399 	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
400 		if (!d->cntr_cfg[cntr_id].rdtgrp) {
401 			d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
402 			d->cntr_cfg[cntr_id].evtid = evtid;
403 			return cntr_id;
404 		}
405 	}
406 
407 	return -ENOSPC;
408 }
409 
410 /*
411  * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d.
412  */
413 static void mbm_cntr_free(struct rdt_l3_mon_domain *d, int cntr_id)
414 {
415 	memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg));
416 }
417 
418 static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
419 {
420 	int cpu = smp_processor_id();
421 	u32 closid = rdtgrp->closid;
422 	u32 rmid = rdtgrp->mon.rmid;
423 	struct rdt_l3_mon_domain *d;
424 	int cntr_id = -ENOENT;
425 	struct mbm_state *m;
426 	u64 tval = 0;
427 
428 	if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) {
429 		rr->err = -EIO;
430 		return -EINVAL;
431 	}
432 	d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
433 
434 	if (rr->is_mbm_cntr) {
435 		cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evt->evtid);
436 		if (cntr_id < 0) {
437 			rr->err = -ENOENT;
438 			return -EINVAL;
439 		}
440 	}
441 
442 	if (rr->first) {
443 		if (rr->is_mbm_cntr)
444 			resctrl_arch_reset_cntr(rr->r, d, closid, rmid, cntr_id, rr->evt->evtid);
445 		else
446 			resctrl_arch_reset_rmid(rr->r, d, closid, rmid, rr->evt->evtid);
447 		m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
448 		if (m)
449 			memset(m, 0, sizeof(struct mbm_state));
450 		return 0;
451 	}
452 
453 	/* Reading a single domain, must be on a CPU in that domain. */
454 	if (!cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
455 		return -EINVAL;
456 	if (rr->is_mbm_cntr)
457 		rr->err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id,
458 						 rr->evt->evtid, &tval);
459 	else
460 		rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, closid, rmid,
461 						 rr->evt->evtid, rr->evt->arch_priv,
462 						 &tval, rr->arch_mon_ctx);
463 	if (rr->err)
464 		return rr->err;
465 
466 	rr->val += tval;
467 
468 	return 0;
469 }
470 
471 static int __l3_mon_event_count_sum(struct rdtgroup *rdtgrp, struct rmid_read *rr)
472 {
473 	int cpu = smp_processor_id();
474 	u32 closid = rdtgrp->closid;
475 	u32 rmid = rdtgrp->mon.rmid;
476 	struct rdt_l3_mon_domain *d;
477 	u64 tval = 0;
478 	int err, ret;
479 
480 	/*
481 	 * Summing across domains is only done for systems that implement
482 	 * Sub-NUMA Cluster. There is no overlap with systems that support
483 	 * assignable counters.
484 	 */
485 	if (rr->is_mbm_cntr) {
486 		pr_warn_once("Summing domains using assignable counters is not supported\n");
487 		rr->err = -EINVAL;
488 		return -EINVAL;
489 	}
490 
491 	/* Summing domains that share a cache, must be on a CPU for that cache. */
492 	if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
493 		return -EINVAL;
494 
495 	/*
496 	 * Legacy files must report the sum of an event across all
497 	 * domains that share the same L3 cache instance.
498 	 * Report success if a read from any domain succeeds, -EINVAL
499 	 * (translated to "Unavailable" for user space) if reading from
500 	 * all domains fail for any reason.
501 	 */
502 	ret = -EINVAL;
503 	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
504 		if (d->ci_id != rr->ci->id)
505 			continue;
506 		err = resctrl_arch_rmid_read(rr->r, &d->hdr, closid, rmid,
507 					     rr->evt->evtid, rr->evt->arch_priv,
508 					     &tval, rr->arch_mon_ctx);
509 		if (!err) {
510 			rr->val += tval;
511 			ret = 0;
512 		}
513 	}
514 
515 	if (ret)
516 		rr->err = ret;
517 
518 	return ret;
519 }
520 
521 static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
522 {
523 	switch (rr->r->rid) {
524 	case RDT_RESOURCE_L3:
525 		WARN_ON_ONCE(rr->evt->any_cpu);
526 		if (rr->hdr)
527 			return __l3_mon_event_count(rdtgrp, rr);
528 		else
529 			return __l3_mon_event_count_sum(rdtgrp, rr);
530 	case RDT_RESOURCE_PERF_PKG: {
531 		u64 tval = 0;
532 
533 		rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, rdtgrp->closid,
534 						 rdtgrp->mon.rmid, rr->evt->evtid,
535 						 rr->evt->arch_priv,
536 						 &tval, rr->arch_mon_ctx);
537 		if (rr->err)
538 			return rr->err;
539 
540 		rr->val += tval;
541 
542 		return 0;
543 	}
544 	default:
545 		rr->err = -EINVAL;
546 		return -EINVAL;
547 	}
548 }
549 
550 /*
551  * mbm_bw_count() - Update bw count from values previously read by
552  *		    __mon_event_count().
553  * @rdtgrp:	resctrl group associated with the CLOSID and RMID to identify
554  *		the cached mbm_state.
555  * @rr:		The struct rmid_read populated by __mon_event_count().
556  *
557  * Supporting function to calculate the memory bandwidth
558  * and delta bandwidth in MBps. The chunks value previously read by
559  * __mon_event_count() is compared with the chunks value from the previous
560  * invocation. This must be called once per second to maintain values in MBps.
561  */
562 static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
563 {
564 	u64 cur_bw, bytes, cur_bytes;
565 	u32 closid = rdtgrp->closid;
566 	u32 rmid = rdtgrp->mon.rmid;
567 	struct rdt_l3_mon_domain *d;
568 	struct mbm_state *m;
569 
570 	if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
571 		return;
572 	d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
573 	m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
574 	if (WARN_ON_ONCE(!m))
575 		return;
576 
577 	cur_bytes = rr->val;
578 	bytes = cur_bytes - m->prev_bw_bytes;
579 	m->prev_bw_bytes = cur_bytes;
580 
581 	cur_bw = bytes / SZ_1M;
582 
583 	m->prev_bw = cur_bw;
584 }
585 
586 /*
587  * This is scheduled by mon_event_read() to read the CQM/MBM counters
588  * on a domain.
589  */
590 void mon_event_count(void *info)
591 {
592 	struct rdtgroup *rdtgrp, *entry;
593 	struct rmid_read *rr = info;
594 	struct list_head *head;
595 	int ret;
596 
597 	rdtgrp = rr->rgrp;
598 
599 	ret = __mon_event_count(rdtgrp, rr);
600 
601 	/*
602 	 * For Ctrl groups read data from child monitor groups and
603 	 * add them together. Count events which are read successfully.
604 	 * Discard the rmid_read's reporting errors.
605 	 */
606 	head = &rdtgrp->mon.crdtgrp_list;
607 
608 	if (rdtgrp->type == RDTCTRL_GROUP) {
609 		list_for_each_entry(entry, head, mon.crdtgrp_list) {
610 			if (__mon_event_count(entry, rr) == 0)
611 				ret = 0;
612 		}
613 	}
614 
615 	/*
616 	 * __mon_event_count() calls for newly created monitor groups may
617 	 * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
618 	 * Discard error if any of the monitor event reads succeeded.
619 	 */
620 	if (ret == 0)
621 		rr->err = 0;
622 }
623 
624 static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
625 							struct rdt_resource *r)
626 {
627 	struct rdt_ctrl_domain *d;
628 
629 	lockdep_assert_cpus_held();
630 
631 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
632 		/* Find the domain that contains this CPU */
633 		if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
634 			return d;
635 	}
636 
637 	return NULL;
638 }
639 
640 /*
641  * Feedback loop for MBA software controller (mba_sc)
642  *
643  * mba_sc is a feedback loop where we periodically read MBM counters and
644  * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
645  * that:
646  *
647  *   current bandwidth(cur_bw) < user specified bandwidth(user_bw)
648  *
649  * This uses the MBM counters to measure the bandwidth and MBA throttle
650  * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
651  * fact that resctrl rdtgroups have both monitoring and control.
652  *
653  * The frequency of the checks is 1s and we just tag along the MBM overflow
654  * timer. Having 1s interval makes the calculation of bandwidth simpler.
655  *
656  * Although MBA's goal is to restrict the bandwidth to a maximum, there may
657  * be a need to increase the bandwidth to avoid unnecessarily restricting
658  * the L2 <-> L3 traffic.
659  *
660  * Since MBA controls the L2 external bandwidth where as MBM measures the
661  * L3 external bandwidth the following sequence could lead to such a
662  * situation.
663  *
664  * Consider an rdtgroup which had high L3 <-> memory traffic in initial
665  * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
666  * after some time rdtgroup has mostly L2 <-> L3 traffic.
667  *
668  * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
669  * throttle MSRs already have low percentage values.  To avoid
670  * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
671  */
672 static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_l3_mon_domain *dom_mbm)
673 {
674 	u32 closid, rmid, cur_msr_val, new_msr_val;
675 	struct mbm_state *pmbm_data, *cmbm_data;
676 	struct rdt_ctrl_domain *dom_mba;
677 	enum resctrl_event_id evt_id;
678 	struct rdt_resource *r_mba;
679 	struct list_head *head;
680 	struct rdtgroup *entry;
681 	u32 cur_bw, user_bw;
682 
683 	r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
684 	evt_id = rgrp->mba_mbps_event;
685 
686 	closid = rgrp->closid;
687 	rmid = rgrp->mon.rmid;
688 	pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
689 	if (WARN_ON_ONCE(!pmbm_data))
690 		return;
691 
692 	dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
693 	if (!dom_mba) {
694 		pr_warn_once("Failure to get domain for MBA update\n");
695 		return;
696 	}
697 
698 	cur_bw = pmbm_data->prev_bw;
699 	user_bw = dom_mba->mbps_val[closid];
700 
701 	/* MBA resource doesn't support CDP */
702 	cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
703 
704 	/*
705 	 * For Ctrl groups read data from child monitor groups.
706 	 */
707 	head = &rgrp->mon.crdtgrp_list;
708 	list_for_each_entry(entry, head, mon.crdtgrp_list) {
709 		cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
710 		if (WARN_ON_ONCE(!cmbm_data))
711 			return;
712 		cur_bw += cmbm_data->prev_bw;
713 	}
714 
715 	/*
716 	 * Scale up/down the bandwidth linearly for the ctrl group.  The
717 	 * bandwidth step is the bandwidth granularity specified by the
718 	 * hardware.
719 	 * Always increase throttling if current bandwidth is above the
720 	 * target set by user.
721 	 * But avoid thrashing up and down on every poll by checking
722 	 * whether a decrease in throttling is likely to push the group
723 	 * back over target. E.g. if currently throttling to 30% of bandwidth
724 	 * on a system with 10% granularity steps, check whether moving to
725 	 * 40% would go past the limit by multiplying current bandwidth by
726 	 * "(30 + 10) / 30".
727 	 */
728 	if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
729 		new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
730 	} else if (cur_msr_val < MAX_MBA_BW &&
731 		   (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
732 		new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
733 	} else {
734 		return;
735 	}
736 
737 	resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
738 }
739 
740 static void mbm_update_one_event(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
741 				 struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
742 {
743 	struct rmid_read rr = {0};
744 
745 	rr.r = r;
746 	rr.hdr = &d->hdr;
747 	rr.evt = &mon_event_all[evtid];
748 	if (resctrl_arch_mbm_cntr_assign_enabled(r)) {
749 		rr.is_mbm_cntr = true;
750 	} else {
751 		rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, evtid);
752 		if (IS_ERR(rr.arch_mon_ctx)) {
753 			pr_warn_ratelimited("Failed to allocate monitor context: %ld",
754 					    PTR_ERR(rr.arch_mon_ctx));
755 			return;
756 		}
757 	}
758 
759 	__mon_event_count(rdtgrp, &rr);
760 
761 	/*
762 	 * If the software controller is enabled, compute the
763 	 * bandwidth for this event id.
764 	 */
765 	if (is_mba_sc(NULL))
766 		mbm_bw_count(rdtgrp, &rr);
767 
768 	if (rr.arch_mon_ctx)
769 		resctrl_arch_mon_ctx_free(rr.r, evtid, rr.arch_mon_ctx);
770 }
771 
772 static void mbm_update(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
773 		       struct rdtgroup *rdtgrp)
774 {
775 	/*
776 	 * This is protected from concurrent reads from user as both
777 	 * the user and overflow handler hold the global mutex.
778 	 */
779 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
780 		mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
781 
782 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
783 		mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
784 }
785 
786 /*
787  * Handler to scan the limbo list and move the RMIDs
788  * to free list whose occupancy < threshold_occupancy.
789  */
790 void cqm_handle_limbo(struct work_struct *work)
791 {
792 	unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
793 	struct rdt_l3_mon_domain *d;
794 
795 	cpus_read_lock();
796 	mutex_lock(&rdtgroup_mutex);
797 
798 	d = container_of(work, struct rdt_l3_mon_domain, cqm_limbo.work);
799 
800 	__check_limbo(d, false);
801 
802 	if (has_busy_rmid(d)) {
803 		d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
804 							   RESCTRL_PICK_ANY_CPU);
805 		schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
806 					 delay);
807 	}
808 
809 	mutex_unlock(&rdtgroup_mutex);
810 	cpus_read_unlock();
811 }
812 
813 /**
814  * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
815  *                             domain.
816  * @dom:           The domain the limbo handler should run for.
817  * @delay_ms:      How far in the future the handler should run.
818  * @exclude_cpu:   Which CPU the handler should not run on,
819  *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
820  */
821 void cqm_setup_limbo_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
822 			     int exclude_cpu)
823 {
824 	unsigned long delay = msecs_to_jiffies(delay_ms);
825 	int cpu;
826 
827 	cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
828 	dom->cqm_work_cpu = cpu;
829 
830 	if (cpu < nr_cpu_ids)
831 		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
832 }
833 
834 void mbm_handle_overflow(struct work_struct *work)
835 {
836 	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
837 	struct rdtgroup *prgrp, *crgrp;
838 	struct rdt_l3_mon_domain *d;
839 	struct list_head *head;
840 	struct rdt_resource *r;
841 
842 	cpus_read_lock();
843 	mutex_lock(&rdtgroup_mutex);
844 
845 	/*
846 	 * If the filesystem has been unmounted this work no longer needs to
847 	 * run.
848 	 */
849 	if (!resctrl_mounted || !resctrl_arch_mon_capable())
850 		goto out_unlock;
851 
852 	r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
853 	d = container_of(work, struct rdt_l3_mon_domain, mbm_over.work);
854 
855 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
856 		mbm_update(r, d, prgrp);
857 
858 		head = &prgrp->mon.crdtgrp_list;
859 		list_for_each_entry(crgrp, head, mon.crdtgrp_list)
860 			mbm_update(r, d, crgrp);
861 
862 		if (is_mba_sc(NULL))
863 			update_mba_bw(prgrp, d);
864 	}
865 
866 	/*
867 	 * Re-check for housekeeping CPUs. This allows the overflow handler to
868 	 * move off a nohz_full CPU quickly.
869 	 */
870 	d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
871 						   RESCTRL_PICK_ANY_CPU);
872 	schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
873 
874 out_unlock:
875 	mutex_unlock(&rdtgroup_mutex);
876 	cpus_read_unlock();
877 }
878 
879 /**
880  * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
881  *                                domain.
882  * @dom:           The domain the overflow handler should run for.
883  * @delay_ms:      How far in the future the handler should run.
884  * @exclude_cpu:   Which CPU the handler should not run on,
885  *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
886  */
887 void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
888 				int exclude_cpu)
889 {
890 	unsigned long delay = msecs_to_jiffies(delay_ms);
891 	int cpu;
892 
893 	/*
894 	 * When a domain comes online there is no guarantee the filesystem is
895 	 * mounted. If not, there is no need to catch counter overflow.
896 	 */
897 	if (!resctrl_mounted || !resctrl_arch_mon_capable())
898 		return;
899 	cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
900 	dom->mbm_work_cpu = cpu;
901 
902 	if (cpu < nr_cpu_ids)
903 		schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
904 }
905 
906 int setup_rmid_lru_list(void)
907 {
908 	struct rmid_entry *entry = NULL;
909 	u32 idx_limit;
910 	u32 idx;
911 	int i;
912 
913 	if (!resctrl_arch_mon_capable())
914 		return 0;
915 
916 	/*
917 	 * Called on every mount, but the number of RMIDs cannot change
918 	 * after the first mount, so keep using the same set of rmid_ptrs[]
919 	 * until resctrl_exit(). Note that the limbo handler continues to
920 	 * access rmid_ptrs[] after resctrl is unmounted.
921 	 */
922 	if (rmid_ptrs)
923 		return 0;
924 
925 	idx_limit = resctrl_arch_system_num_rmid_idx();
926 	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
927 	if (!rmid_ptrs)
928 		return -ENOMEM;
929 
930 	for (i = 0; i < idx_limit; i++) {
931 		entry = &rmid_ptrs[i];
932 		INIT_LIST_HEAD(&entry->list);
933 
934 		resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
935 		list_add_tail(&entry->list, &rmid_free_lru);
936 	}
937 
938 	/*
939 	 * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
940 	 * are always allocated. These are used for the rdtgroup_default
941 	 * control group, which was setup earlier in rdtgroup_setup_default().
942 	 */
943 	idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
944 					   RESCTRL_RESERVED_RMID);
945 	entry = __rmid_entry(idx);
946 	list_del(&entry->list);
947 
948 	return 0;
949 }
950 
951 void free_rmid_lru_list(void)
952 {
953 	if (!resctrl_arch_mon_capable())
954 		return;
955 
956 	mutex_lock(&rdtgroup_mutex);
957 	kfree(rmid_ptrs);
958 	rmid_ptrs = NULL;
959 	mutex_unlock(&rdtgroup_mutex);
960 }
961 
962 #define MON_EVENT(_eventid, _name, _res, _fp)	\
963 	[_eventid] = {				\
964 	.name			= _name,	\
965 	.evtid			= _eventid,	\
966 	.rid			= _res,		\
967 	.is_floating_point	= _fp,		\
968 }
969 
970 /*
971  * All available events. Architecture code marks the ones that
972  * are supported by a system using resctrl_enable_mon_event()
973  * to set .enabled.
974  */
975 struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
976 	MON_EVENT(QOS_L3_OCCUP_EVENT_ID,		"llc_occupancy",	RDT_RESOURCE_L3,	false),
977 	MON_EVENT(QOS_L3_MBM_TOTAL_EVENT_ID,		"mbm_total_bytes",	RDT_RESOURCE_L3,	false),
978 	MON_EVENT(QOS_L3_MBM_LOCAL_EVENT_ID,		"mbm_local_bytes",	RDT_RESOURCE_L3,	false),
979 	MON_EVENT(PMT_EVENT_ENERGY,			"core_energy",		RDT_RESOURCE_PERF_PKG,	true),
980 	MON_EVENT(PMT_EVENT_ACTIVITY,			"activity",		RDT_RESOURCE_PERF_PKG,	true),
981 	MON_EVENT(PMT_EVENT_STALLS_LLC_HIT,		"stalls_llc_hit",	RDT_RESOURCE_PERF_PKG,	false),
982 	MON_EVENT(PMT_EVENT_C1_RES,			"c1_res",		RDT_RESOURCE_PERF_PKG,	false),
983 	MON_EVENT(PMT_EVENT_UNHALTED_CORE_CYCLES,	"unhalted_core_cycles",	RDT_RESOURCE_PERF_PKG,	false),
984 	MON_EVENT(PMT_EVENT_STALLS_LLC_MISS,		"stalls_llc_miss",	RDT_RESOURCE_PERF_PKG,	false),
985 	MON_EVENT(PMT_EVENT_AUTO_C6_RES,		"c6_res",		RDT_RESOURCE_PERF_PKG,	false),
986 	MON_EVENT(PMT_EVENT_UNHALTED_REF_CYCLES,	"unhalted_ref_cycles",	RDT_RESOURCE_PERF_PKG,	false),
987 	MON_EVENT(PMT_EVENT_UOPS_RETIRED,		"uops_retired",		RDT_RESOURCE_PERF_PKG,	false),
988 };
989 
990 bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
991 			      unsigned int binary_bits, void *arch_priv)
992 {
993 	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS ||
994 			 binary_bits > MAX_BINARY_BITS))
995 		return false;
996 	if (mon_event_all[eventid].enabled) {
997 		pr_warn("Duplicate enable for event %d\n", eventid);
998 		return false;
999 	}
1000 	if (binary_bits && !mon_event_all[eventid].is_floating_point) {
1001 		pr_warn("Event %d may not be floating point\n", eventid);
1002 		return false;
1003 	}
1004 
1005 	mon_event_all[eventid].any_cpu = any_cpu;
1006 	mon_event_all[eventid].binary_bits = binary_bits;
1007 	mon_event_all[eventid].arch_priv = arch_priv;
1008 	mon_event_all[eventid].enabled = true;
1009 
1010 	return true;
1011 }
1012 
1013 bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
1014 {
1015 	return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS &&
1016 	       mon_event_all[eventid].enabled;
1017 }
1018 
1019 u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)
1020 {
1021 	return mon_event_all[evtid].evt_cfg;
1022 }
1023 
1024 /**
1025  * struct mbm_transaction - Memory transaction an MBM event can be configured with.
1026  * @name:	Name of memory transaction (read, write ...).
1027  * @val:	The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to
1028  *		represent the memory transaction within an event's configuration.
1029  */
1030 struct mbm_transaction {
1031 	char	name[32];
1032 	u32	val;
1033 };
1034 
1035 /* Decoded values for each type of memory transaction. */
1036 static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = {
1037 	{"local_reads", READS_TO_LOCAL_MEM},
1038 	{"remote_reads", READS_TO_REMOTE_MEM},
1039 	{"local_non_temporal_writes", NON_TEMP_WRITE_TO_LOCAL_MEM},
1040 	{"remote_non_temporal_writes", NON_TEMP_WRITE_TO_REMOTE_MEM},
1041 	{"local_reads_slow_memory", READS_TO_LOCAL_S_MEM},
1042 	{"remote_reads_slow_memory", READS_TO_REMOTE_S_MEM},
1043 	{"dirty_victim_writes_all", DIRTY_VICTIMS_TO_ALL_MEM},
1044 };
1045 
1046 int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
1047 {
1048 	struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
1049 	struct rdt_resource *r;
1050 	bool sep = false;
1051 	int ret = 0, i;
1052 
1053 	mutex_lock(&rdtgroup_mutex);
1054 	rdt_last_cmd_clear();
1055 
1056 	r = resctrl_arch_get_resource(mevt->rid);
1057 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1058 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1059 		ret = -EINVAL;
1060 		goto out_unlock;
1061 	}
1062 
1063 	for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
1064 		if (mevt->evt_cfg & mbm_transactions[i].val) {
1065 			if (sep)
1066 				seq_putc(seq, ',');
1067 			seq_printf(seq, "%s", mbm_transactions[i].name);
1068 			sep = true;
1069 		}
1070 	}
1071 	seq_putc(seq, '\n');
1072 
1073 out_unlock:
1074 	mutex_unlock(&rdtgroup_mutex);
1075 
1076 	return ret;
1077 }
1078 
1079 int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s,
1080 				     void *v)
1081 {
1082 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1083 	int ret = 0;
1084 
1085 	mutex_lock(&rdtgroup_mutex);
1086 	rdt_last_cmd_clear();
1087 
1088 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1089 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1090 		ret = -EINVAL;
1091 		goto out_unlock;
1092 	}
1093 
1094 	seq_printf(s, "%u\n", r->mon.mbm_assign_on_mkdir);
1095 
1096 out_unlock:
1097 	mutex_unlock(&rdtgroup_mutex);
1098 
1099 	return ret;
1100 }
1101 
1102 ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf,
1103 					  size_t nbytes, loff_t off)
1104 {
1105 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1106 	bool value;
1107 	int ret;
1108 
1109 	ret = kstrtobool(buf, &value);
1110 	if (ret)
1111 		return ret;
1112 
1113 	mutex_lock(&rdtgroup_mutex);
1114 	rdt_last_cmd_clear();
1115 
1116 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1117 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1118 		ret = -EINVAL;
1119 		goto out_unlock;
1120 	}
1121 
1122 	r->mon.mbm_assign_on_mkdir = value;
1123 
1124 out_unlock:
1125 	mutex_unlock(&rdtgroup_mutex);
1126 
1127 	return ret ?: nbytes;
1128 }
1129 
1130 /*
1131  * mbm_cntr_free_all() - Clear all the counter ID configuration details in the
1132  *			 domain @d. Called when mbm_assign_mode is changed.
1133  */
1134 static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
1135 {
1136 	memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs);
1137 }
1138 
1139 /*
1140  * resctrl_reset_rmid_all() - Reset all non-architecture states for all the
1141  *			      supported RMIDs.
1142  */
1143 static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
1144 {
1145 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
1146 	enum resctrl_event_id evt;
1147 	int idx;
1148 
1149 	for_each_mbm_event_id(evt) {
1150 		if (!resctrl_is_mon_event_enabled(evt))
1151 			continue;
1152 		idx = MBM_STATE_IDX(evt);
1153 		memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit);
1154 	}
1155 }
1156 
1157 /*
1158  * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID
1159  * pair in the domain.
1160  *
1161  * Assign the counter if @assign is true else unassign the counter. Reset the
1162  * associated non-architectural state.
1163  */
1164 static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
1165 				 enum resctrl_event_id evtid, u32 rmid, u32 closid,
1166 				 u32 cntr_id, bool assign)
1167 {
1168 	struct mbm_state *m;
1169 
1170 	resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
1171 
1172 	m = get_mbm_state(d, closid, rmid, evtid);
1173 	if (m)
1174 		memset(m, 0, sizeof(*m));
1175 }
1176 
1177 /*
1178  * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event
1179  * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d.
1180  *
1181  * Return:
1182  * 0 on success, < 0 on failure.
1183  */
1184 static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
1185 				      struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1186 {
1187 	int cntr_id;
1188 
1189 	/* No action required if the counter is assigned already. */
1190 	cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
1191 	if (cntr_id >= 0)
1192 		return 0;
1193 
1194 	cntr_id = mbm_cntr_alloc(r, d, rdtgrp, mevt->evtid);
1195 	if (cntr_id < 0) {
1196 		rdt_last_cmd_printf("Failed to allocate counter for %s in domain %d\n",
1197 				    mevt->name, d->hdr.id);
1198 		return cntr_id;
1199 	}
1200 
1201 	rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, true);
1202 
1203 	return 0;
1204 }
1205 
1206 /*
1207  * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in
1208  * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is
1209  * NULL; otherwise, assign the counter to the specified domain @d.
1210  *
1211  * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr()
1212  * will fail. The assignment process will abort at the first failure encountered
1213  * during domain traversal, which may result in the event being only partially
1214  * assigned.
1215  *
1216  * Return:
1217  * 0 on success, < 0 on failure.
1218  */
1219 static int rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
1220 				      struct mon_evt *mevt)
1221 {
1222 	struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1223 	int ret = 0;
1224 
1225 	if (!d) {
1226 		list_for_each_entry(d, &r->mon_domains, hdr.list) {
1227 			ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
1228 			if (ret)
1229 				return ret;
1230 		}
1231 	} else {
1232 		ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
1233 	}
1234 
1235 	return ret;
1236 }
1237 
1238 /*
1239  * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when
1240  *			     a new group is created.
1241  *
1242  * Each group can accommodate two counters per domain: one for the total
1243  * event and one for the local event. Assignments may fail due to the limited
1244  * number of counters. However, it is not necessary to fail the group creation
1245  * and thus no failure is returned. Users have the option to modify the
1246  * counter assignments after the group has been created.
1247  */
1248 void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp)
1249 {
1250 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1251 
1252 	if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) ||
1253 	    !r->mon.mbm_assign_on_mkdir)
1254 		return;
1255 
1256 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1257 		rdtgroup_assign_cntr_event(NULL, rdtgrp,
1258 					   &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
1259 
1260 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1261 		rdtgroup_assign_cntr_event(NULL, rdtgrp,
1262 					   &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
1263 }
1264 
1265 /*
1266  * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration
1267  * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp.
1268  */
1269 static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
1270 					struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1271 {
1272 	int cntr_id;
1273 
1274 	cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
1275 
1276 	/* If there is no cntr_id assigned, nothing to do */
1277 	if (cntr_id < 0)
1278 		return;
1279 
1280 	rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, false);
1281 
1282 	mbm_cntr_free(d, cntr_id);
1283 }
1284 
1285 /*
1286  * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with
1287  * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign
1288  * the counters from all the domains if @d is NULL else unassign from @d.
1289  */
1290 static void rdtgroup_unassign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
1291 					 struct mon_evt *mevt)
1292 {
1293 	struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1294 
1295 	if (!d) {
1296 		list_for_each_entry(d, &r->mon_domains, hdr.list)
1297 			rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
1298 	} else {
1299 		rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
1300 	}
1301 }
1302 
1303 /*
1304  * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events.
1305  *			       Called when a group is deleted.
1306  */
1307 void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp)
1308 {
1309 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1310 
1311 	if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r))
1312 		return;
1313 
1314 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1315 		rdtgroup_unassign_cntr_event(NULL, rdtgrp,
1316 					     &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
1317 
1318 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1319 		rdtgroup_unassign_cntr_event(NULL, rdtgrp,
1320 					     &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
1321 }
1322 
1323 static int resctrl_parse_mem_transactions(char *tok, u32 *val)
1324 {
1325 	u32 temp_val = 0;
1326 	char *evt_str;
1327 	bool found;
1328 	int i;
1329 
1330 next_config:
1331 	if (!tok || tok[0] == '\0') {
1332 		*val = temp_val;
1333 		return 0;
1334 	}
1335 
1336 	/* Start processing the strings for each memory transaction type */
1337 	evt_str = strim(strsep(&tok, ","));
1338 	found = false;
1339 	for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
1340 		if (!strcmp(mbm_transactions[i].name, evt_str)) {
1341 			temp_val |= mbm_transactions[i].val;
1342 			found = true;
1343 			break;
1344 		}
1345 	}
1346 
1347 	if (!found) {
1348 		rdt_last_cmd_printf("Invalid memory transaction type %s\n", evt_str);
1349 		return -EINVAL;
1350 	}
1351 
1352 	goto next_config;
1353 }
1354 
1355 /*
1356  * rdtgroup_update_cntr_event - Update the counter assignments for the event
1357  *				in a group.
1358  * @r:		Resource to which update needs to be done.
1359  * @rdtgrp:	Resctrl group.
1360  * @evtid:	MBM monitor event.
1361  */
1362 static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
1363 				       enum resctrl_event_id evtid)
1364 {
1365 	struct rdt_l3_mon_domain *d;
1366 	int cntr_id;
1367 
1368 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
1369 		cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid);
1370 		if (cntr_id >= 0)
1371 			rdtgroup_assign_cntr(r, d, evtid, rdtgrp->mon.rmid,
1372 					     rdtgrp->closid, cntr_id, true);
1373 	}
1374 }
1375 
1376 /*
1377  * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event
1378  *				   for all the groups.
1379  * @mevt	MBM Monitor event.
1380  */
1381 static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt)
1382 {
1383 	struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1384 	struct rdtgroup *prgrp, *crgrp;
1385 
1386 	/*
1387 	 * Find all the groups where the event is assigned and update the
1388 	 * configuration of existing assignments.
1389 	 */
1390 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
1391 		rdtgroup_update_cntr_event(r, prgrp, mevt->evtid);
1392 
1393 		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
1394 			rdtgroup_update_cntr_event(r, crgrp, mevt->evtid);
1395 	}
1396 }
1397 
1398 ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
1399 			   loff_t off)
1400 {
1401 	struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
1402 	struct rdt_resource *r;
1403 	u32 evt_cfg = 0;
1404 	int ret = 0;
1405 
1406 	/* Valid input requires a trailing newline */
1407 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1408 		return -EINVAL;
1409 
1410 	buf[nbytes - 1] = '\0';
1411 
1412 	cpus_read_lock();
1413 	mutex_lock(&rdtgroup_mutex);
1414 
1415 	rdt_last_cmd_clear();
1416 
1417 	r = resctrl_arch_get_resource(mevt->rid);
1418 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1419 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1420 		ret = -EINVAL;
1421 		goto out_unlock;
1422 	}
1423 
1424 	ret = resctrl_parse_mem_transactions(buf, &evt_cfg);
1425 	if (!ret && mevt->evt_cfg != evt_cfg) {
1426 		mevt->evt_cfg = evt_cfg;
1427 		resctrl_update_cntr_allrdtgrp(mevt);
1428 	}
1429 
1430 out_unlock:
1431 	mutex_unlock(&rdtgroup_mutex);
1432 	cpus_read_unlock();
1433 
1434 	return ret ?: nbytes;
1435 }
1436 
1437 int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of,
1438 				 struct seq_file *s, void *v)
1439 {
1440 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1441 	bool enabled;
1442 
1443 	mutex_lock(&rdtgroup_mutex);
1444 	enabled = resctrl_arch_mbm_cntr_assign_enabled(r);
1445 
1446 	if (r->mon.mbm_cntr_assignable) {
1447 		if (enabled)
1448 			seq_puts(s, "[mbm_event]\n");
1449 		else
1450 			seq_puts(s, "[default]\n");
1451 
1452 		if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) {
1453 			if (enabled)
1454 				seq_puts(s, "default\n");
1455 			else
1456 				seq_puts(s, "mbm_event\n");
1457 		}
1458 	} else {
1459 		seq_puts(s, "[default]\n");
1460 	}
1461 
1462 	mutex_unlock(&rdtgroup_mutex);
1463 
1464 	return 0;
1465 }
1466 
1467 ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
1468 				      size_t nbytes, loff_t off)
1469 {
1470 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1471 	struct rdt_l3_mon_domain *d;
1472 	int ret = 0;
1473 	bool enable;
1474 
1475 	/* Valid input requires a trailing newline */
1476 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1477 		return -EINVAL;
1478 
1479 	buf[nbytes - 1] = '\0';
1480 
1481 	cpus_read_lock();
1482 	mutex_lock(&rdtgroup_mutex);
1483 
1484 	rdt_last_cmd_clear();
1485 
1486 	if (!strcmp(buf, "default")) {
1487 		enable = 0;
1488 	} else if (!strcmp(buf, "mbm_event")) {
1489 		if (r->mon.mbm_cntr_assignable) {
1490 			enable = 1;
1491 		} else {
1492 			ret = -EINVAL;
1493 			rdt_last_cmd_puts("mbm_event mode is not supported\n");
1494 			goto out_unlock;
1495 		}
1496 	} else {
1497 		ret = -EINVAL;
1498 		rdt_last_cmd_puts("Unsupported assign mode\n");
1499 		goto out_unlock;
1500 	}
1501 
1502 	if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) {
1503 		ret = resctrl_arch_mbm_cntr_assign_set(r, enable);
1504 		if (ret)
1505 			goto out_unlock;
1506 
1507 		/* Update the visibility of BMEC related files */
1508 		resctrl_bmec_files_show(r, NULL, !enable);
1509 
1510 		/*
1511 		 * Initialize the default memory transaction values for
1512 		 * total and local events.
1513 		 */
1514 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1515 			mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
1516 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1517 			mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
1518 									   (READS_TO_LOCAL_MEM |
1519 									    READS_TO_LOCAL_S_MEM |
1520 									    NON_TEMP_WRITE_TO_LOCAL_MEM);
1521 		/* Enable auto assignment when switching to "mbm_event" mode */
1522 		if (enable)
1523 			r->mon.mbm_assign_on_mkdir = true;
1524 		/*
1525 		 * Reset all the non-achitectural RMID state and assignable counters.
1526 		 */
1527 		list_for_each_entry(d, &r->mon_domains, hdr.list) {
1528 			mbm_cntr_free_all(r, d);
1529 			resctrl_reset_rmid_all(r, d);
1530 		}
1531 	}
1532 
1533 out_unlock:
1534 	mutex_unlock(&rdtgroup_mutex);
1535 	cpus_read_unlock();
1536 
1537 	return ret ?: nbytes;
1538 }
1539 
1540 int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of,
1541 			       struct seq_file *s, void *v)
1542 {
1543 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1544 	struct rdt_l3_mon_domain *dom;
1545 	bool sep = false;
1546 
1547 	cpus_read_lock();
1548 	mutex_lock(&rdtgroup_mutex);
1549 
1550 	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1551 		if (sep)
1552 			seq_putc(s, ';');
1553 
1554 		seq_printf(s, "%d=%d", dom->hdr.id, r->mon.num_mbm_cntrs);
1555 		sep = true;
1556 	}
1557 	seq_putc(s, '\n');
1558 
1559 	mutex_unlock(&rdtgroup_mutex);
1560 	cpus_read_unlock();
1561 	return 0;
1562 }
1563 
1564 int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of,
1565 				     struct seq_file *s, void *v)
1566 {
1567 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1568 	struct rdt_l3_mon_domain *dom;
1569 	bool sep = false;
1570 	u32 cntrs, i;
1571 	int ret = 0;
1572 
1573 	cpus_read_lock();
1574 	mutex_lock(&rdtgroup_mutex);
1575 
1576 	rdt_last_cmd_clear();
1577 
1578 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1579 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1580 		ret = -EINVAL;
1581 		goto out_unlock;
1582 	}
1583 
1584 	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1585 		if (sep)
1586 			seq_putc(s, ';');
1587 
1588 		cntrs = 0;
1589 		for (i = 0; i < r->mon.num_mbm_cntrs; i++) {
1590 			if (!dom->cntr_cfg[i].rdtgrp)
1591 				cntrs++;
1592 		}
1593 
1594 		seq_printf(s, "%d=%u", dom->hdr.id, cntrs);
1595 		sep = true;
1596 	}
1597 	seq_putc(s, '\n');
1598 
1599 out_unlock:
1600 	mutex_unlock(&rdtgroup_mutex);
1601 	cpus_read_unlock();
1602 
1603 	return ret;
1604 }
1605 
1606 int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
1607 {
1608 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1609 	struct rdt_l3_mon_domain *d;
1610 	struct rdtgroup *rdtgrp;
1611 	struct mon_evt *mevt;
1612 	int ret = 0;
1613 	bool sep;
1614 
1615 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1616 	if (!rdtgrp) {
1617 		ret = -ENOENT;
1618 		goto out_unlock;
1619 	}
1620 
1621 	rdt_last_cmd_clear();
1622 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1623 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1624 		ret = -EINVAL;
1625 		goto out_unlock;
1626 	}
1627 
1628 	for_each_mon_event(mevt) {
1629 		if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid))
1630 			continue;
1631 
1632 		sep = false;
1633 		seq_printf(s, "%s:", mevt->name);
1634 		list_for_each_entry(d, &r->mon_domains, hdr.list) {
1635 			if (sep)
1636 				seq_putc(s, ';');
1637 
1638 			if (mbm_cntr_get(r, d, rdtgrp, mevt->evtid) < 0)
1639 				seq_printf(s, "%d=_", d->hdr.id);
1640 			else
1641 				seq_printf(s, "%d=e", d->hdr.id);
1642 
1643 			sep = true;
1644 		}
1645 		seq_putc(s, '\n');
1646 	}
1647 
1648 out_unlock:
1649 	rdtgroup_kn_unlock(of->kn);
1650 
1651 	return ret;
1652 }
1653 
1654 /*
1655  * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching
1656  * event name.
1657  */
1658 static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name)
1659 {
1660 	struct mon_evt *mevt;
1661 
1662 	for_each_mon_event(mevt) {
1663 		if (mevt->rid == r->rid && mevt->enabled &&
1664 		    resctrl_is_mbm_event(mevt->evtid) &&
1665 		    !strcmp(mevt->name, name))
1666 			return mevt;
1667 	}
1668 
1669 	return NULL;
1670 }
1671 
1672 static int rdtgroup_modify_assign_state(char *assign, struct rdt_l3_mon_domain *d,
1673 					struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1674 {
1675 	int ret = 0;
1676 
1677 	if (!assign || strlen(assign) != 1)
1678 		return -EINVAL;
1679 
1680 	switch (*assign) {
1681 	case 'e':
1682 		ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt);
1683 		break;
1684 	case '_':
1685 		rdtgroup_unassign_cntr_event(d, rdtgrp, mevt);
1686 		break;
1687 	default:
1688 		ret = -EINVAL;
1689 		break;
1690 	}
1691 
1692 	return ret;
1693 }
1694 
1695 static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp,
1696 					char *event, char *tok)
1697 {
1698 	struct rdt_l3_mon_domain *d;
1699 	unsigned long dom_id = 0;
1700 	char *dom_str, *id_str;
1701 	struct mon_evt *mevt;
1702 	int ret;
1703 
1704 	mevt = mbm_get_mon_event_by_name(r, event);
1705 	if (!mevt) {
1706 		rdt_last_cmd_printf("Invalid event %s\n", event);
1707 		return -ENOENT;
1708 	}
1709 
1710 next:
1711 	if (!tok || tok[0] == '\0')
1712 		return 0;
1713 
1714 	/* Start processing the strings for each domain */
1715 	dom_str = strim(strsep(&tok, ";"));
1716 
1717 	id_str = strsep(&dom_str, "=");
1718 
1719 	/* Check for domain id '*' which means all domains */
1720 	if (id_str && *id_str == '*') {
1721 		ret = rdtgroup_modify_assign_state(dom_str, NULL, rdtgrp, mevt);
1722 		if (ret)
1723 			rdt_last_cmd_printf("Assign operation '%s:*=%s' failed\n",
1724 					    event, dom_str);
1725 		return ret;
1726 	} else if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1727 		rdt_last_cmd_puts("Missing domain id\n");
1728 		return -EINVAL;
1729 	}
1730 
1731 	/* Verify if the dom_id is valid */
1732 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
1733 		if (d->hdr.id == dom_id) {
1734 			ret = rdtgroup_modify_assign_state(dom_str, d, rdtgrp, mevt);
1735 			if (ret) {
1736 				rdt_last_cmd_printf("Assign operation '%s:%ld=%s' failed\n",
1737 						    event, dom_id, dom_str);
1738 				return ret;
1739 			}
1740 			goto next;
1741 		}
1742 	}
1743 
1744 	rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id);
1745 	return -EINVAL;
1746 }
1747 
1748 ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
1749 				 size_t nbytes, loff_t off)
1750 {
1751 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1752 	struct rdtgroup *rdtgrp;
1753 	char *token, *event;
1754 	int ret = 0;
1755 
1756 	/* Valid input requires a trailing newline */
1757 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1758 		return -EINVAL;
1759 
1760 	buf[nbytes - 1] = '\0';
1761 
1762 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1763 	if (!rdtgrp) {
1764 		rdtgroup_kn_unlock(of->kn);
1765 		return -ENOENT;
1766 	}
1767 	rdt_last_cmd_clear();
1768 
1769 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1770 		rdt_last_cmd_puts("mbm_event mode is not enabled\n");
1771 		rdtgroup_kn_unlock(of->kn);
1772 		return -EINVAL;
1773 	}
1774 
1775 	while ((token = strsep(&buf, "\n")) != NULL) {
1776 		/*
1777 		 * The write command follows the following format:
1778 		 * "<Event>:<Domain ID>=<Assignment state>"
1779 		 * Extract the event name first.
1780 		 */
1781 		event = strsep(&token, ":");
1782 
1783 		ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, token);
1784 		if (ret)
1785 			break;
1786 	}
1787 
1788 	rdtgroup_kn_unlock(of->kn);
1789 
1790 	return ret ?: nbytes;
1791 }
1792 
1793 static int closid_num_dirty_rmid_alloc(struct rdt_resource *r)
1794 {
1795 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
1796 		u32 num_closid = resctrl_arch_get_num_closid(r);
1797 		u32 *tmp;
1798 
1799 		/* For ARM memory ordering access to closid_num_dirty_rmid */
1800 		mutex_lock(&rdtgroup_mutex);
1801 
1802 		/*
1803 		 * If the architecture hasn't provided a sanitised value here,
1804 		 * this may result in larger arrays than necessary. Resctrl will
1805 		 * use a smaller system wide value based on the resources in
1806 		 * use.
1807 		 */
1808 		tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
1809 		if (!tmp) {
1810 			mutex_unlock(&rdtgroup_mutex);
1811 			return -ENOMEM;
1812 		}
1813 
1814 		closid_num_dirty_rmid = tmp;
1815 
1816 		mutex_unlock(&rdtgroup_mutex);
1817 	}
1818 
1819 	return 0;
1820 }
1821 
1822 static void closid_num_dirty_rmid_free(void)
1823 {
1824 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
1825 		mutex_lock(&rdtgroup_mutex);
1826 		kfree(closid_num_dirty_rmid);
1827 		closid_num_dirty_rmid = NULL;
1828 		mutex_unlock(&rdtgroup_mutex);
1829 	}
1830 }
1831 
1832 /**
1833  * resctrl_l3_mon_resource_init() - Initialise global monitoring structures.
1834  *
1835  * Allocate and initialise global monitor resources that do not belong to a
1836  * specific domain. i.e. the closid_num_dirty_rmid[] used to find the CLOSID
1837  * with the cleanest set of RMIDs.
1838  * Called once during boot after the struct rdt_resource's have been configured
1839  * but before the filesystem is mounted.
1840  * Resctrl's cpuhp callbacks may be called before this point to bring a domain
1841  * online.
1842  *
1843  * Return: 0 for success, or -ENOMEM.
1844  */
1845 int resctrl_l3_mon_resource_init(void)
1846 {
1847 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1848 	int ret;
1849 
1850 	if (!r->mon_capable)
1851 		return 0;
1852 
1853 	ret = closid_num_dirty_rmid_alloc(r);
1854 	if (ret)
1855 		return ret;
1856 
1857 	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
1858 		mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
1859 		resctrl_file_fflags_init("mbm_total_bytes_config",
1860 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1861 	}
1862 	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
1863 		mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
1864 		resctrl_file_fflags_init("mbm_local_bytes_config",
1865 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1866 	}
1867 
1868 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1869 		mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
1870 	else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1871 		mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
1872 
1873 	if (r->mon.mbm_cntr_assignable) {
1874 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1875 			mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
1876 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1877 			mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
1878 									   (READS_TO_LOCAL_MEM |
1879 									    READS_TO_LOCAL_S_MEM |
1880 									    NON_TEMP_WRITE_TO_LOCAL_MEM);
1881 		r->mon.mbm_assign_on_mkdir = true;
1882 		resctrl_file_fflags_init("num_mbm_cntrs",
1883 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1884 		resctrl_file_fflags_init("available_mbm_cntrs",
1885 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1886 		resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG);
1887 		resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO |
1888 					 RFTYPE_RES_CACHE);
1889 		resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
1890 	}
1891 
1892 	return 0;
1893 }
1894 
1895 void resctrl_l3_mon_resource_exit(void)
1896 {
1897 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1898 
1899 	if (!r->mon_capable)
1900 		return;
1901 
1902 	closid_num_dirty_rmid_free();
1903 }
1904