xref: /linux/fs/resctrl/monitor.c (revision 2cb8eeaf00efc037988910de17ffe592b23941a6)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Resource Director Technology(RDT)
4  * - Monitoring code
5  *
6  * Copyright (C) 2017 Intel Corporation
7  *
8  * Author:
9  *    Vikas Shivappa <vikas.shivappa@intel.com>
10  *
11  * This replaces the cqm.c based on perf but we reuse a lot of
12  * code and datastructures originally from Peter Zijlstra and Matt Fleming.
13  *
14  * More information about RDT be found in the Intel (R) x86 Architecture
15  * Software Developer Manual June 2016, volume 3, section 17.17.
16  */
17 
18 #define pr_fmt(fmt)	"resctrl: " fmt
19 
20 #include <linux/cpu.h>
21 #include <linux/resctrl.h>
22 #include <linux/sizes.h>
23 #include <linux/slab.h>
24 
25 #include "internal.h"
26 
27 #define CREATE_TRACE_POINTS
28 
29 #include "monitor_trace.h"
30 
31 /**
32  * struct rmid_entry - dirty tracking for all RMID.
33  * @closid:	The CLOSID for this entry.
34  * @rmid:	The RMID for this entry.
35  * @busy:	The number of domains with cached data using this RMID.
36  * @list:	Member of the rmid_free_lru list when busy == 0.
37  *
38  * Depending on the architecture the correct monitor is accessed using
39  * both @closid and @rmid, or @rmid only.
40  *
41  * Take the rdtgroup_mutex when accessing.
42  */
43 struct rmid_entry {
44 	u32				closid;
45 	u32				rmid;
46 	int				busy;
47 	struct list_head		list;
48 };
49 
50 /*
51  * @rmid_free_lru - A least recently used list of free RMIDs
52  *     These RMIDs are guaranteed to have an occupancy less than the
53  *     threshold occupancy
54  */
55 static LIST_HEAD(rmid_free_lru);
56 
57 /*
58  * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
59  *     Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
60  *     Indexed by CLOSID. Protected by rdtgroup_mutex.
61  */
62 static u32 *closid_num_dirty_rmid;
63 
64 /*
65  * @rmid_limbo_count - count of currently unused but (potentially)
66  *     dirty RMIDs.
67  *     This counts RMIDs that no one is currently using but that
68  *     may have a occupancy value > resctrl_rmid_realloc_threshold. User can
69  *     change the threshold occupancy value.
70  */
71 static unsigned int rmid_limbo_count;
72 
73 /*
74  * @rmid_entry - The entry in the limbo and free lists.
75  */
76 static struct rmid_entry	*rmid_ptrs;
77 
78 /*
79  * This is the threshold cache occupancy in bytes at which we will consider an
80  * RMID available for re-allocation.
81  */
82 unsigned int resctrl_rmid_realloc_threshold;
83 
84 /*
85  * This is the maximum value for the reallocation threshold, in bytes.
86  */
87 unsigned int resctrl_rmid_realloc_limit;
88 
89 /*
90  * x86 and arm64 differ in their handling of monitoring.
91  * x86's RMID are independent numbers, there is only one source of traffic
92  * with an RMID value of '1'.
93  * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
94  * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
95  * value is no longer unique.
96  * To account for this, resctrl uses an index. On x86 this is just the RMID,
97  * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
98  *
99  * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
100  * must accept an attempt to read every index.
101  */
__rmid_entry(u32 idx)102 static inline struct rmid_entry *__rmid_entry(u32 idx)
103 {
104 	struct rmid_entry *entry;
105 	u32 closid, rmid;
106 
107 	entry = &rmid_ptrs[idx];
108 	resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
109 
110 	WARN_ON_ONCE(entry->closid != closid);
111 	WARN_ON_ONCE(entry->rmid != rmid);
112 
113 	return entry;
114 }
115 
limbo_release_entry(struct rmid_entry * entry)116 static void limbo_release_entry(struct rmid_entry *entry)
117 {
118 	lockdep_assert_held(&rdtgroup_mutex);
119 
120 	rmid_limbo_count--;
121 	list_add_tail(&entry->list, &rmid_free_lru);
122 
123 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
124 		closid_num_dirty_rmid[entry->closid]--;
125 }
126 
127 /*
128  * Check the RMIDs that are marked as busy for this domain. If the
129  * reported LLC occupancy is below the threshold clear the busy bit and
130  * decrement the count. If the busy count gets to zero on an RMID, we
131  * free the RMID
132  */
__check_limbo(struct rdt_mon_domain * d,bool force_free)133 void __check_limbo(struct rdt_mon_domain *d, bool force_free)
134 {
135 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
136 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
137 	struct rmid_entry *entry;
138 	u32 idx, cur_idx = 1;
139 	void *arch_mon_ctx;
140 	bool rmid_dirty;
141 	u64 val = 0;
142 
143 	arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
144 	if (IS_ERR(arch_mon_ctx)) {
145 		pr_warn_ratelimited("Failed to allocate monitor context: %ld",
146 				    PTR_ERR(arch_mon_ctx));
147 		return;
148 	}
149 
150 	/*
151 	 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
152 	 * are marked as busy for occupancy < threshold. If the occupancy
153 	 * is less than the threshold decrement the busy counter of the
154 	 * RMID and move it to the free list when the counter reaches 0.
155 	 */
156 	for (;;) {
157 		idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
158 		if (idx >= idx_limit)
159 			break;
160 
161 		entry = __rmid_entry(idx);
162 		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
163 					   QOS_L3_OCCUP_EVENT_ID, &val,
164 					   arch_mon_ctx)) {
165 			rmid_dirty = true;
166 		} else {
167 			rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
168 
169 			/*
170 			 * x86's CLOSID and RMID are independent numbers, so the entry's
171 			 * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
172 			 * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
173 			 * used to select the configuration. It is thus necessary to track both
174 			 * CLOSID and RMID because there may be dependencies between them
175 			 * on some architectures.
176 			 */
177 			trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
178 		}
179 
180 		if (force_free || !rmid_dirty) {
181 			clear_bit(idx, d->rmid_busy_llc);
182 			if (!--entry->busy)
183 				limbo_release_entry(entry);
184 		}
185 		cur_idx = idx + 1;
186 	}
187 
188 	resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
189 }
190 
has_busy_rmid(struct rdt_mon_domain * d)191 bool has_busy_rmid(struct rdt_mon_domain *d)
192 {
193 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
194 
195 	return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
196 }
197 
resctrl_find_free_rmid(u32 closid)198 static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
199 {
200 	struct rmid_entry *itr;
201 	u32 itr_idx, cmp_idx;
202 
203 	if (list_empty(&rmid_free_lru))
204 		return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
205 
206 	list_for_each_entry(itr, &rmid_free_lru, list) {
207 		/*
208 		 * Get the index of this free RMID, and the index it would need
209 		 * to be if it were used with this CLOSID.
210 		 * If the CLOSID is irrelevant on this architecture, the two
211 		 * index values are always the same on every entry and thus the
212 		 * very first entry will be returned.
213 		 */
214 		itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
215 		cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
216 
217 		if (itr_idx == cmp_idx)
218 			return itr;
219 	}
220 
221 	return ERR_PTR(-ENOSPC);
222 }
223 
224 /**
225  * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
226  *                                  RMID are clean, or the CLOSID that has
227  *                                  the most clean RMID.
228  *
229  * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
230  * may not be able to allocate clean RMID. To avoid this the allocator will
231  * choose the CLOSID with the most clean RMID.
232  *
233  * When the CLOSID and RMID are independent numbers, the first free CLOSID will
234  * be returned.
235  */
resctrl_find_cleanest_closid(void)236 int resctrl_find_cleanest_closid(void)
237 {
238 	u32 cleanest_closid = ~0;
239 	int i = 0;
240 
241 	lockdep_assert_held(&rdtgroup_mutex);
242 
243 	if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
244 		return -EIO;
245 
246 	for (i = 0; i < closids_supported(); i++) {
247 		int num_dirty;
248 
249 		if (closid_allocated(i))
250 			continue;
251 
252 		num_dirty = closid_num_dirty_rmid[i];
253 		if (num_dirty == 0)
254 			return i;
255 
256 		if (cleanest_closid == ~0)
257 			cleanest_closid = i;
258 
259 		if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
260 			cleanest_closid = i;
261 	}
262 
263 	if (cleanest_closid == ~0)
264 		return -ENOSPC;
265 
266 	return cleanest_closid;
267 }
268 
269 /*
270  * For MPAM the RMID value is not unique, and has to be considered with
271  * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
272  * allows all domains to be managed by a single free list.
273  * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
274  */
alloc_rmid(u32 closid)275 int alloc_rmid(u32 closid)
276 {
277 	struct rmid_entry *entry;
278 
279 	lockdep_assert_held(&rdtgroup_mutex);
280 
281 	entry = resctrl_find_free_rmid(closid);
282 	if (IS_ERR(entry))
283 		return PTR_ERR(entry);
284 
285 	list_del(&entry->list);
286 	return entry->rmid;
287 }
288 
add_rmid_to_limbo(struct rmid_entry * entry)289 static void add_rmid_to_limbo(struct rmid_entry *entry)
290 {
291 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
292 	struct rdt_mon_domain *d;
293 	u32 idx;
294 
295 	lockdep_assert_held(&rdtgroup_mutex);
296 
297 	/* Walking r->domains, ensure it can't race with cpuhp */
298 	lockdep_assert_cpus_held();
299 
300 	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
301 
302 	entry->busy = 0;
303 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
304 		/*
305 		 * For the first limbo RMID in the domain,
306 		 * setup up the limbo worker.
307 		 */
308 		if (!has_busy_rmid(d))
309 			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
310 						RESCTRL_PICK_ANY_CPU);
311 		set_bit(idx, d->rmid_busy_llc);
312 		entry->busy++;
313 	}
314 
315 	rmid_limbo_count++;
316 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
317 		closid_num_dirty_rmid[entry->closid]++;
318 }
319 
free_rmid(u32 closid,u32 rmid)320 void free_rmid(u32 closid, u32 rmid)
321 {
322 	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
323 	struct rmid_entry *entry;
324 
325 	lockdep_assert_held(&rdtgroup_mutex);
326 
327 	/*
328 	 * Do not allow the default rmid to be free'd. Comparing by index
329 	 * allows architectures that ignore the closid parameter to avoid an
330 	 * unnecessary check.
331 	 */
332 	if (!resctrl_arch_mon_capable() ||
333 	    idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
334 						RESCTRL_RESERVED_RMID))
335 		return;
336 
337 	entry = __rmid_entry(idx);
338 
339 	if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
340 		add_rmid_to_limbo(entry);
341 	else
342 		list_add_tail(&entry->list, &rmid_free_lru);
343 }
344 
get_mbm_state(struct rdt_mon_domain * d,u32 closid,u32 rmid,enum resctrl_event_id evtid)345 static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
346 				       u32 rmid, enum resctrl_event_id evtid)
347 {
348 	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
349 	struct mbm_state *state;
350 
351 	if (!resctrl_is_mbm_event(evtid))
352 		return NULL;
353 
354 	state = d->mbm_states[MBM_STATE_IDX(evtid)];
355 
356 	return state ? &state[idx] : NULL;
357 }
358 
359 /*
360  * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp.
361  *
362  * Return:
363  * Valid counter ID on success, or -ENOENT on failure.
364  */
mbm_cntr_get(struct rdt_resource * r,struct rdt_mon_domain * d,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)365 static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d,
366 			struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
367 {
368 	int cntr_id;
369 
370 	if (!r->mon.mbm_cntr_assignable)
371 		return -ENOENT;
372 
373 	if (!resctrl_is_mbm_event(evtid))
374 		return -ENOENT;
375 
376 	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
377 		if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
378 		    d->cntr_cfg[cntr_id].evtid == evtid)
379 			return cntr_id;
380 	}
381 
382 	return -ENOENT;
383 }
384 
385 /*
386  * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d.
387  * Caller must ensure that the specified event is not assigned already.
388  *
389  * Return:
390  * Valid counter ID on success, or -ENOSPC on failure.
391  */
mbm_cntr_alloc(struct rdt_resource * r,struct rdt_mon_domain * d,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)392 static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d,
393 			  struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
394 {
395 	int cntr_id;
396 
397 	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
398 		if (!d->cntr_cfg[cntr_id].rdtgrp) {
399 			d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
400 			d->cntr_cfg[cntr_id].evtid = evtid;
401 			return cntr_id;
402 		}
403 	}
404 
405 	return -ENOSPC;
406 }
407 
408 /*
409  * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d.
410  */
mbm_cntr_free(struct rdt_mon_domain * d,int cntr_id)411 static void mbm_cntr_free(struct rdt_mon_domain *d, int cntr_id)
412 {
413 	memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg));
414 }
415 
__mon_event_count(struct rdtgroup * rdtgrp,struct rmid_read * rr)416 static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
417 {
418 	int cpu = smp_processor_id();
419 	u32 closid = rdtgrp->closid;
420 	u32 rmid = rdtgrp->mon.rmid;
421 	struct rdt_mon_domain *d;
422 	int cntr_id = -ENOENT;
423 	struct mbm_state *m;
424 	int err, ret;
425 	u64 tval = 0;
426 
427 	if (rr->is_mbm_cntr) {
428 		cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid);
429 		if (cntr_id < 0) {
430 			rr->err = -ENOENT;
431 			return -EINVAL;
432 		}
433 	}
434 
435 	if (rr->first) {
436 		if (rr->is_mbm_cntr)
437 			resctrl_arch_reset_cntr(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid);
438 		else
439 			resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
440 		m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
441 		if (m)
442 			memset(m, 0, sizeof(struct mbm_state));
443 		return 0;
444 	}
445 
446 	if (rr->d) {
447 		/* Reading a single domain, must be on a CPU in that domain. */
448 		if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
449 			return -EINVAL;
450 		if (rr->is_mbm_cntr)
451 			rr->err = resctrl_arch_cntr_read(rr->r, rr->d, closid, rmid, cntr_id,
452 							 rr->evtid, &tval);
453 		else
454 			rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
455 							 rr->evtid, &tval, rr->arch_mon_ctx);
456 		if (rr->err)
457 			return rr->err;
458 
459 		rr->val += tval;
460 
461 		return 0;
462 	}
463 
464 	/* Summing domains that share a cache, must be on a CPU for that cache. */
465 	if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
466 		return -EINVAL;
467 
468 	/*
469 	 * Legacy files must report the sum of an event across all
470 	 * domains that share the same L3 cache instance.
471 	 * Report success if a read from any domain succeeds, -EINVAL
472 	 * (translated to "Unavailable" for user space) if reading from
473 	 * all domains fail for any reason.
474 	 */
475 	ret = -EINVAL;
476 	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
477 		if (d->ci_id != rr->ci->id)
478 			continue;
479 		if (rr->is_mbm_cntr)
480 			err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id,
481 						     rr->evtid, &tval);
482 		else
483 			err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
484 						     rr->evtid, &tval, rr->arch_mon_ctx);
485 		if (!err) {
486 			rr->val += tval;
487 			ret = 0;
488 		}
489 	}
490 
491 	if (ret)
492 		rr->err = ret;
493 
494 	return ret;
495 }
496 
497 /*
498  * mbm_bw_count() - Update bw count from values previously read by
499  *		    __mon_event_count().
500  * @rdtgrp:	resctrl group associated with the CLOSID and RMID to identify
501  *		the cached mbm_state.
502  * @rr:		The struct rmid_read populated by __mon_event_count().
503  *
504  * Supporting function to calculate the memory bandwidth
505  * and delta bandwidth in MBps. The chunks value previously read by
506  * __mon_event_count() is compared with the chunks value from the previous
507  * invocation. This must be called once per second to maintain values in MBps.
508  */
mbm_bw_count(struct rdtgroup * rdtgrp,struct rmid_read * rr)509 static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
510 {
511 	u64 cur_bw, bytes, cur_bytes;
512 	u32 closid = rdtgrp->closid;
513 	u32 rmid = rdtgrp->mon.rmid;
514 	struct mbm_state *m;
515 
516 	m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
517 	if (WARN_ON_ONCE(!m))
518 		return;
519 
520 	cur_bytes = rr->val;
521 	bytes = cur_bytes - m->prev_bw_bytes;
522 	m->prev_bw_bytes = cur_bytes;
523 
524 	cur_bw = bytes / SZ_1M;
525 
526 	m->prev_bw = cur_bw;
527 }
528 
529 /*
530  * This is scheduled by mon_event_read() to read the CQM/MBM counters
531  * on a domain.
532  */
mon_event_count(void * info)533 void mon_event_count(void *info)
534 {
535 	struct rdtgroup *rdtgrp, *entry;
536 	struct rmid_read *rr = info;
537 	struct list_head *head;
538 	int ret;
539 
540 	rdtgrp = rr->rgrp;
541 
542 	ret = __mon_event_count(rdtgrp, rr);
543 
544 	/*
545 	 * For Ctrl groups read data from child monitor groups and
546 	 * add them together. Count events which are read successfully.
547 	 * Discard the rmid_read's reporting errors.
548 	 */
549 	head = &rdtgrp->mon.crdtgrp_list;
550 
551 	if (rdtgrp->type == RDTCTRL_GROUP) {
552 		list_for_each_entry(entry, head, mon.crdtgrp_list) {
553 			if (__mon_event_count(entry, rr) == 0)
554 				ret = 0;
555 		}
556 	}
557 
558 	/*
559 	 * __mon_event_count() calls for newly created monitor groups may
560 	 * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
561 	 * Discard error if any of the monitor event reads succeeded.
562 	 */
563 	if (ret == 0)
564 		rr->err = 0;
565 }
566 
get_ctrl_domain_from_cpu(int cpu,struct rdt_resource * r)567 static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
568 							struct rdt_resource *r)
569 {
570 	struct rdt_ctrl_domain *d;
571 
572 	lockdep_assert_cpus_held();
573 
574 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
575 		/* Find the domain that contains this CPU */
576 		if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
577 			return d;
578 	}
579 
580 	return NULL;
581 }
582 
583 /*
584  * Feedback loop for MBA software controller (mba_sc)
585  *
586  * mba_sc is a feedback loop where we periodically read MBM counters and
587  * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
588  * that:
589  *
590  *   current bandwidth(cur_bw) < user specified bandwidth(user_bw)
591  *
592  * This uses the MBM counters to measure the bandwidth and MBA throttle
593  * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
594  * fact that resctrl rdtgroups have both monitoring and control.
595  *
596  * The frequency of the checks is 1s and we just tag along the MBM overflow
597  * timer. Having 1s interval makes the calculation of bandwidth simpler.
598  *
599  * Although MBA's goal is to restrict the bandwidth to a maximum, there may
600  * be a need to increase the bandwidth to avoid unnecessarily restricting
601  * the L2 <-> L3 traffic.
602  *
603  * Since MBA controls the L2 external bandwidth where as MBM measures the
604  * L3 external bandwidth the following sequence could lead to such a
605  * situation.
606  *
607  * Consider an rdtgroup which had high L3 <-> memory traffic in initial
608  * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
609  * after some time rdtgroup has mostly L2 <-> L3 traffic.
610  *
611  * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
612  * throttle MSRs already have low percentage values.  To avoid
613  * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
614  */
update_mba_bw(struct rdtgroup * rgrp,struct rdt_mon_domain * dom_mbm)615 static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
616 {
617 	u32 closid, rmid, cur_msr_val, new_msr_val;
618 	struct mbm_state *pmbm_data, *cmbm_data;
619 	struct rdt_ctrl_domain *dom_mba;
620 	enum resctrl_event_id evt_id;
621 	struct rdt_resource *r_mba;
622 	struct list_head *head;
623 	struct rdtgroup *entry;
624 	u32 cur_bw, user_bw;
625 
626 	r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
627 	evt_id = rgrp->mba_mbps_event;
628 
629 	closid = rgrp->closid;
630 	rmid = rgrp->mon.rmid;
631 	pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
632 	if (WARN_ON_ONCE(!pmbm_data))
633 		return;
634 
635 	dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
636 	if (!dom_mba) {
637 		pr_warn_once("Failure to get domain for MBA update\n");
638 		return;
639 	}
640 
641 	cur_bw = pmbm_data->prev_bw;
642 	user_bw = dom_mba->mbps_val[closid];
643 
644 	/* MBA resource doesn't support CDP */
645 	cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
646 
647 	/*
648 	 * For Ctrl groups read data from child monitor groups.
649 	 */
650 	head = &rgrp->mon.crdtgrp_list;
651 	list_for_each_entry(entry, head, mon.crdtgrp_list) {
652 		cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
653 		if (WARN_ON_ONCE(!cmbm_data))
654 			return;
655 		cur_bw += cmbm_data->prev_bw;
656 	}
657 
658 	/*
659 	 * Scale up/down the bandwidth linearly for the ctrl group.  The
660 	 * bandwidth step is the bandwidth granularity specified by the
661 	 * hardware.
662 	 * Always increase throttling if current bandwidth is above the
663 	 * target set by user.
664 	 * But avoid thrashing up and down on every poll by checking
665 	 * whether a decrease in throttling is likely to push the group
666 	 * back over target. E.g. if currently throttling to 30% of bandwidth
667 	 * on a system with 10% granularity steps, check whether moving to
668 	 * 40% would go past the limit by multiplying current bandwidth by
669 	 * "(30 + 10) / 30".
670 	 */
671 	if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
672 		new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
673 	} else if (cur_msr_val < MAX_MBA_BW &&
674 		   (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
675 		new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
676 	} else {
677 		return;
678 	}
679 
680 	resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
681 }
682 
mbm_update_one_event(struct rdt_resource * r,struct rdt_mon_domain * d,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)683 static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
684 				 struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
685 {
686 	struct rmid_read rr = {0};
687 
688 	rr.r = r;
689 	rr.d = d;
690 	rr.evtid = evtid;
691 	if (resctrl_arch_mbm_cntr_assign_enabled(r)) {
692 		rr.is_mbm_cntr = true;
693 	} else {
694 		rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
695 		if (IS_ERR(rr.arch_mon_ctx)) {
696 			pr_warn_ratelimited("Failed to allocate monitor context: %ld",
697 					    PTR_ERR(rr.arch_mon_ctx));
698 			return;
699 		}
700 	}
701 
702 	__mon_event_count(rdtgrp, &rr);
703 
704 	/*
705 	 * If the software controller is enabled, compute the
706 	 * bandwidth for this event id.
707 	 */
708 	if (is_mba_sc(NULL))
709 		mbm_bw_count(rdtgrp, &rr);
710 
711 	if (rr.arch_mon_ctx)
712 		resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
713 }
714 
mbm_update(struct rdt_resource * r,struct rdt_mon_domain * d,struct rdtgroup * rdtgrp)715 static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
716 		       struct rdtgroup *rdtgrp)
717 {
718 	/*
719 	 * This is protected from concurrent reads from user as both
720 	 * the user and overflow handler hold the global mutex.
721 	 */
722 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
723 		mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
724 
725 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
726 		mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
727 }
728 
729 /*
730  * Handler to scan the limbo list and move the RMIDs
731  * to free list whose occupancy < threshold_occupancy.
732  */
cqm_handle_limbo(struct work_struct * work)733 void cqm_handle_limbo(struct work_struct *work)
734 {
735 	unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
736 	struct rdt_mon_domain *d;
737 
738 	cpus_read_lock();
739 	mutex_lock(&rdtgroup_mutex);
740 
741 	d = container_of(work, struct rdt_mon_domain, cqm_limbo.work);
742 
743 	__check_limbo(d, false);
744 
745 	if (has_busy_rmid(d)) {
746 		d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
747 							   RESCTRL_PICK_ANY_CPU);
748 		schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
749 					 delay);
750 	}
751 
752 	mutex_unlock(&rdtgroup_mutex);
753 	cpus_read_unlock();
754 }
755 
756 /**
757  * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
758  *                             domain.
759  * @dom:           The domain the limbo handler should run for.
760  * @delay_ms:      How far in the future the handler should run.
761  * @exclude_cpu:   Which CPU the handler should not run on,
762  *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
763  */
cqm_setup_limbo_handler(struct rdt_mon_domain * dom,unsigned long delay_ms,int exclude_cpu)764 void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
765 			     int exclude_cpu)
766 {
767 	unsigned long delay = msecs_to_jiffies(delay_ms);
768 	int cpu;
769 
770 	cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
771 	dom->cqm_work_cpu = cpu;
772 
773 	if (cpu < nr_cpu_ids)
774 		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
775 }
776 
mbm_handle_overflow(struct work_struct * work)777 void mbm_handle_overflow(struct work_struct *work)
778 {
779 	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
780 	struct rdtgroup *prgrp, *crgrp;
781 	struct rdt_mon_domain *d;
782 	struct list_head *head;
783 	struct rdt_resource *r;
784 
785 	cpus_read_lock();
786 	mutex_lock(&rdtgroup_mutex);
787 
788 	/*
789 	 * If the filesystem has been unmounted this work no longer needs to
790 	 * run.
791 	 */
792 	if (!resctrl_mounted || !resctrl_arch_mon_capable())
793 		goto out_unlock;
794 
795 	r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
796 	d = container_of(work, struct rdt_mon_domain, mbm_over.work);
797 
798 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
799 		mbm_update(r, d, prgrp);
800 
801 		head = &prgrp->mon.crdtgrp_list;
802 		list_for_each_entry(crgrp, head, mon.crdtgrp_list)
803 			mbm_update(r, d, crgrp);
804 
805 		if (is_mba_sc(NULL))
806 			update_mba_bw(prgrp, d);
807 	}
808 
809 	/*
810 	 * Re-check for housekeeping CPUs. This allows the overflow handler to
811 	 * move off a nohz_full CPU quickly.
812 	 */
813 	d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
814 						   RESCTRL_PICK_ANY_CPU);
815 	schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
816 
817 out_unlock:
818 	mutex_unlock(&rdtgroup_mutex);
819 	cpus_read_unlock();
820 }
821 
822 /**
823  * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
824  *                                domain.
825  * @dom:           The domain the overflow handler should run for.
826  * @delay_ms:      How far in the future the handler should run.
827  * @exclude_cpu:   Which CPU the handler should not run on,
828  *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
829  */
mbm_setup_overflow_handler(struct rdt_mon_domain * dom,unsigned long delay_ms,int exclude_cpu)830 void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
831 				int exclude_cpu)
832 {
833 	unsigned long delay = msecs_to_jiffies(delay_ms);
834 	int cpu;
835 
836 	/*
837 	 * When a domain comes online there is no guarantee the filesystem is
838 	 * mounted. If not, there is no need to catch counter overflow.
839 	 */
840 	if (!resctrl_mounted || !resctrl_arch_mon_capable())
841 		return;
842 	cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
843 	dom->mbm_work_cpu = cpu;
844 
845 	if (cpu < nr_cpu_ids)
846 		schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
847 }
848 
dom_data_init(struct rdt_resource * r)849 static int dom_data_init(struct rdt_resource *r)
850 {
851 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
852 	u32 num_closid = resctrl_arch_get_num_closid(r);
853 	struct rmid_entry *entry = NULL;
854 	int err = 0, i;
855 	u32 idx;
856 
857 	mutex_lock(&rdtgroup_mutex);
858 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
859 		u32 *tmp;
860 
861 		/*
862 		 * If the architecture hasn't provided a sanitised value here,
863 		 * this may result in larger arrays than necessary. Resctrl will
864 		 * use a smaller system wide value based on the resources in
865 		 * use.
866 		 */
867 		tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
868 		if (!tmp) {
869 			err = -ENOMEM;
870 			goto out_unlock;
871 		}
872 
873 		closid_num_dirty_rmid = tmp;
874 	}
875 
876 	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
877 	if (!rmid_ptrs) {
878 		if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
879 			kfree(closid_num_dirty_rmid);
880 			closid_num_dirty_rmid = NULL;
881 		}
882 		err = -ENOMEM;
883 		goto out_unlock;
884 	}
885 
886 	for (i = 0; i < idx_limit; i++) {
887 		entry = &rmid_ptrs[i];
888 		INIT_LIST_HEAD(&entry->list);
889 
890 		resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
891 		list_add_tail(&entry->list, &rmid_free_lru);
892 	}
893 
894 	/*
895 	 * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
896 	 * are always allocated. These are used for the rdtgroup_default
897 	 * control group, which will be setup later in resctrl_init().
898 	 */
899 	idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
900 					   RESCTRL_RESERVED_RMID);
901 	entry = __rmid_entry(idx);
902 	list_del(&entry->list);
903 
904 out_unlock:
905 	mutex_unlock(&rdtgroup_mutex);
906 
907 	return err;
908 }
909 
dom_data_exit(struct rdt_resource * r)910 static void dom_data_exit(struct rdt_resource *r)
911 {
912 	mutex_lock(&rdtgroup_mutex);
913 
914 	if (!r->mon_capable)
915 		goto out_unlock;
916 
917 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
918 		kfree(closid_num_dirty_rmid);
919 		closid_num_dirty_rmid = NULL;
920 	}
921 
922 	kfree(rmid_ptrs);
923 	rmid_ptrs = NULL;
924 
925 out_unlock:
926 	mutex_unlock(&rdtgroup_mutex);
927 }
928 
929 /*
930  * All available events. Architecture code marks the ones that
931  * are supported by a system using resctrl_enable_mon_event()
932  * to set .enabled.
933  */
934 struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
935 	[QOS_L3_OCCUP_EVENT_ID] = {
936 		.name	= "llc_occupancy",
937 		.evtid	= QOS_L3_OCCUP_EVENT_ID,
938 		.rid	= RDT_RESOURCE_L3,
939 	},
940 	[QOS_L3_MBM_TOTAL_EVENT_ID] = {
941 		.name	= "mbm_total_bytes",
942 		.evtid	= QOS_L3_MBM_TOTAL_EVENT_ID,
943 		.rid	= RDT_RESOURCE_L3,
944 	},
945 	[QOS_L3_MBM_LOCAL_EVENT_ID] = {
946 		.name	= "mbm_local_bytes",
947 		.evtid	= QOS_L3_MBM_LOCAL_EVENT_ID,
948 		.rid	= RDT_RESOURCE_L3,
949 	},
950 };
951 
resctrl_enable_mon_event(enum resctrl_event_id eventid)952 void resctrl_enable_mon_event(enum resctrl_event_id eventid)
953 {
954 	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS))
955 		return;
956 	if (mon_event_all[eventid].enabled) {
957 		pr_warn("Duplicate enable for event %d\n", eventid);
958 		return;
959 	}
960 
961 	mon_event_all[eventid].enabled = true;
962 }
963 
resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)964 bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
965 {
966 	return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS &&
967 	       mon_event_all[eventid].enabled;
968 }
969 
resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)970 u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)
971 {
972 	return mon_event_all[evtid].evt_cfg;
973 }
974 
975 /**
976  * struct mbm_transaction - Memory transaction an MBM event can be configured with.
977  * @name:	Name of memory transaction (read, write ...).
978  * @val:	The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to
979  *		represent the memory transaction within an event's configuration.
980  */
981 struct mbm_transaction {
982 	char	name[32];
983 	u32	val;
984 };
985 
986 /* Decoded values for each type of memory transaction. */
987 static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = {
988 	{"local_reads", READS_TO_LOCAL_MEM},
989 	{"remote_reads", READS_TO_REMOTE_MEM},
990 	{"local_non_temporal_writes", NON_TEMP_WRITE_TO_LOCAL_MEM},
991 	{"remote_non_temporal_writes", NON_TEMP_WRITE_TO_REMOTE_MEM},
992 	{"local_reads_slow_memory", READS_TO_LOCAL_S_MEM},
993 	{"remote_reads_slow_memory", READS_TO_REMOTE_S_MEM},
994 	{"dirty_victim_writes_all", DIRTY_VICTIMS_TO_ALL_MEM},
995 };
996 
event_filter_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)997 int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
998 {
999 	struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
1000 	struct rdt_resource *r;
1001 	bool sep = false;
1002 	int ret = 0, i;
1003 
1004 	mutex_lock(&rdtgroup_mutex);
1005 	rdt_last_cmd_clear();
1006 
1007 	r = resctrl_arch_get_resource(mevt->rid);
1008 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1009 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1010 		ret = -EINVAL;
1011 		goto out_unlock;
1012 	}
1013 
1014 	for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
1015 		if (mevt->evt_cfg & mbm_transactions[i].val) {
1016 			if (sep)
1017 				seq_putc(seq, ',');
1018 			seq_printf(seq, "%s", mbm_transactions[i].name);
1019 			sep = true;
1020 		}
1021 	}
1022 	seq_putc(seq, '\n');
1023 
1024 out_unlock:
1025 	mutex_unlock(&rdtgroup_mutex);
1026 
1027 	return ret;
1028 }
1029 
resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1030 int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s,
1031 				     void *v)
1032 {
1033 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1034 	int ret = 0;
1035 
1036 	mutex_lock(&rdtgroup_mutex);
1037 	rdt_last_cmd_clear();
1038 
1039 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1040 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1041 		ret = -EINVAL;
1042 		goto out_unlock;
1043 	}
1044 
1045 	seq_printf(s, "%u\n", r->mon.mbm_assign_on_mkdir);
1046 
1047 out_unlock:
1048 	mutex_unlock(&rdtgroup_mutex);
1049 
1050 	return ret;
1051 }
1052 
resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1053 ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf,
1054 					  size_t nbytes, loff_t off)
1055 {
1056 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1057 	bool value;
1058 	int ret;
1059 
1060 	ret = kstrtobool(buf, &value);
1061 	if (ret)
1062 		return ret;
1063 
1064 	mutex_lock(&rdtgroup_mutex);
1065 	rdt_last_cmd_clear();
1066 
1067 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1068 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1069 		ret = -EINVAL;
1070 		goto out_unlock;
1071 	}
1072 
1073 	r->mon.mbm_assign_on_mkdir = value;
1074 
1075 out_unlock:
1076 	mutex_unlock(&rdtgroup_mutex);
1077 
1078 	return ret ?: nbytes;
1079 }
1080 
1081 /*
1082  * mbm_cntr_free_all() - Clear all the counter ID configuration details in the
1083  *			 domain @d. Called when mbm_assign_mode is changed.
1084  */
mbm_cntr_free_all(struct rdt_resource * r,struct rdt_mon_domain * d)1085 static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_mon_domain *d)
1086 {
1087 	memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs);
1088 }
1089 
1090 /*
1091  * resctrl_reset_rmid_all() - Reset all non-architecture states for all the
1092  *			      supported RMIDs.
1093  */
resctrl_reset_rmid_all(struct rdt_resource * r,struct rdt_mon_domain * d)1094 static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
1095 {
1096 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
1097 	enum resctrl_event_id evt;
1098 	int idx;
1099 
1100 	for_each_mbm_event_id(evt) {
1101 		if (!resctrl_is_mon_event_enabled(evt))
1102 			continue;
1103 		idx = MBM_STATE_IDX(evt);
1104 		memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit);
1105 	}
1106 }
1107 
1108 /*
1109  * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID
1110  * pair in the domain.
1111  *
1112  * Assign the counter if @assign is true else unassign the counter. Reset the
1113  * associated non-architectural state.
1114  */
rdtgroup_assign_cntr(struct rdt_resource * r,struct rdt_mon_domain * d,enum resctrl_event_id evtid,u32 rmid,u32 closid,u32 cntr_id,bool assign)1115 static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
1116 				 enum resctrl_event_id evtid, u32 rmid, u32 closid,
1117 				 u32 cntr_id, bool assign)
1118 {
1119 	struct mbm_state *m;
1120 
1121 	resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
1122 
1123 	m = get_mbm_state(d, closid, rmid, evtid);
1124 	if (m)
1125 		memset(m, 0, sizeof(*m));
1126 }
1127 
1128 /*
1129  * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event
1130  * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d.
1131  *
1132  * Return:
1133  * 0 on success, < 0 on failure.
1134  */
rdtgroup_alloc_assign_cntr(struct rdt_resource * r,struct rdt_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1135 static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
1136 				      struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1137 {
1138 	int cntr_id;
1139 
1140 	/* No action required if the counter is assigned already. */
1141 	cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
1142 	if (cntr_id >= 0)
1143 		return 0;
1144 
1145 	cntr_id = mbm_cntr_alloc(r, d, rdtgrp, mevt->evtid);
1146 	if (cntr_id < 0) {
1147 		rdt_last_cmd_printf("Failed to allocate counter for %s in domain %d\n",
1148 				    mevt->name, d->hdr.id);
1149 		return cntr_id;
1150 	}
1151 
1152 	rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, true);
1153 
1154 	return 0;
1155 }
1156 
1157 /*
1158  * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in
1159  * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is
1160  * NULL; otherwise, assign the counter to the specified domain @d.
1161  *
1162  * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr()
1163  * will fail. The assignment process will abort at the first failure encountered
1164  * during domain traversal, which may result in the event being only partially
1165  * assigned.
1166  *
1167  * Return:
1168  * 0 on success, < 0 on failure.
1169  */
rdtgroup_assign_cntr_event(struct rdt_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1170 static int rdtgroup_assign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
1171 				      struct mon_evt *mevt)
1172 {
1173 	struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1174 	int ret = 0;
1175 
1176 	if (!d) {
1177 		list_for_each_entry(d, &r->mon_domains, hdr.list) {
1178 			ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
1179 			if (ret)
1180 				return ret;
1181 		}
1182 	} else {
1183 		ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
1184 	}
1185 
1186 	return ret;
1187 }
1188 
1189 /*
1190  * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when
1191  *			     a new group is created.
1192  *
1193  * Each group can accommodate two counters per domain: one for the total
1194  * event and one for the local event. Assignments may fail due to the limited
1195  * number of counters. However, it is not necessary to fail the group creation
1196  * and thus no failure is returned. Users have the option to modify the
1197  * counter assignments after the group has been created.
1198  */
rdtgroup_assign_cntrs(struct rdtgroup * rdtgrp)1199 void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp)
1200 {
1201 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1202 
1203 	if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) ||
1204 	    !r->mon.mbm_assign_on_mkdir)
1205 		return;
1206 
1207 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1208 		rdtgroup_assign_cntr_event(NULL, rdtgrp,
1209 					   &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
1210 
1211 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1212 		rdtgroup_assign_cntr_event(NULL, rdtgrp,
1213 					   &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
1214 }
1215 
1216 /*
1217  * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration
1218  * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp.
1219  */
rdtgroup_free_unassign_cntr(struct rdt_resource * r,struct rdt_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1220 static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
1221 					struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1222 {
1223 	int cntr_id;
1224 
1225 	cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
1226 
1227 	/* If there is no cntr_id assigned, nothing to do */
1228 	if (cntr_id < 0)
1229 		return;
1230 
1231 	rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, false);
1232 
1233 	mbm_cntr_free(d, cntr_id);
1234 }
1235 
1236 /*
1237  * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with
1238  * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign
1239  * the counters from all the domains if @d is NULL else unassign from @d.
1240  */
rdtgroup_unassign_cntr_event(struct rdt_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1241 static void rdtgroup_unassign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
1242 					 struct mon_evt *mevt)
1243 {
1244 	struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1245 
1246 	if (!d) {
1247 		list_for_each_entry(d, &r->mon_domains, hdr.list)
1248 			rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
1249 	} else {
1250 		rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
1251 	}
1252 }
1253 
1254 /*
1255  * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events.
1256  *			       Called when a group is deleted.
1257  */
rdtgroup_unassign_cntrs(struct rdtgroup * rdtgrp)1258 void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp)
1259 {
1260 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1261 
1262 	if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r))
1263 		return;
1264 
1265 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1266 		rdtgroup_unassign_cntr_event(NULL, rdtgrp,
1267 					     &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
1268 
1269 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1270 		rdtgroup_unassign_cntr_event(NULL, rdtgrp,
1271 					     &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
1272 }
1273 
resctrl_parse_mem_transactions(char * tok,u32 * val)1274 static int resctrl_parse_mem_transactions(char *tok, u32 *val)
1275 {
1276 	u32 temp_val = 0;
1277 	char *evt_str;
1278 	bool found;
1279 	int i;
1280 
1281 next_config:
1282 	if (!tok || tok[0] == '\0') {
1283 		*val = temp_val;
1284 		return 0;
1285 	}
1286 
1287 	/* Start processing the strings for each memory transaction type */
1288 	evt_str = strim(strsep(&tok, ","));
1289 	found = false;
1290 	for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
1291 		if (!strcmp(mbm_transactions[i].name, evt_str)) {
1292 			temp_val |= mbm_transactions[i].val;
1293 			found = true;
1294 			break;
1295 		}
1296 	}
1297 
1298 	if (!found) {
1299 		rdt_last_cmd_printf("Invalid memory transaction type %s\n", evt_str);
1300 		return -EINVAL;
1301 	}
1302 
1303 	goto next_config;
1304 }
1305 
1306 /*
1307  * rdtgroup_update_cntr_event - Update the counter assignments for the event
1308  *				in a group.
1309  * @r:		Resource to which update needs to be done.
1310  * @rdtgrp:	Resctrl group.
1311  * @evtid:	MBM monitor event.
1312  */
rdtgroup_update_cntr_event(struct rdt_resource * r,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)1313 static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
1314 				       enum resctrl_event_id evtid)
1315 {
1316 	struct rdt_mon_domain *d;
1317 	int cntr_id;
1318 
1319 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
1320 		cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid);
1321 		if (cntr_id >= 0)
1322 			rdtgroup_assign_cntr(r, d, evtid, rdtgrp->mon.rmid,
1323 					     rdtgrp->closid, cntr_id, true);
1324 	}
1325 }
1326 
1327 /*
1328  * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event
1329  *				   for all the groups.
1330  * @mevt	MBM Monitor event.
1331  */
resctrl_update_cntr_allrdtgrp(struct mon_evt * mevt)1332 static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt)
1333 {
1334 	struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1335 	struct rdtgroup *prgrp, *crgrp;
1336 
1337 	/*
1338 	 * Find all the groups where the event is assigned and update the
1339 	 * configuration of existing assignments.
1340 	 */
1341 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
1342 		rdtgroup_update_cntr_event(r, prgrp, mevt->evtid);
1343 
1344 		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
1345 			rdtgroup_update_cntr_event(r, crgrp, mevt->evtid);
1346 	}
1347 }
1348 
event_filter_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1349 ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
1350 			   loff_t off)
1351 {
1352 	struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
1353 	struct rdt_resource *r;
1354 	u32 evt_cfg = 0;
1355 	int ret = 0;
1356 
1357 	/* Valid input requires a trailing newline */
1358 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1359 		return -EINVAL;
1360 
1361 	buf[nbytes - 1] = '\0';
1362 
1363 	cpus_read_lock();
1364 	mutex_lock(&rdtgroup_mutex);
1365 
1366 	rdt_last_cmd_clear();
1367 
1368 	r = resctrl_arch_get_resource(mevt->rid);
1369 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1370 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1371 		ret = -EINVAL;
1372 		goto out_unlock;
1373 	}
1374 
1375 	ret = resctrl_parse_mem_transactions(buf, &evt_cfg);
1376 	if (!ret && mevt->evt_cfg != evt_cfg) {
1377 		mevt->evt_cfg = evt_cfg;
1378 		resctrl_update_cntr_allrdtgrp(mevt);
1379 	}
1380 
1381 out_unlock:
1382 	mutex_unlock(&rdtgroup_mutex);
1383 	cpus_read_unlock();
1384 
1385 	return ret ?: nbytes;
1386 }
1387 
resctrl_mbm_assign_mode_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1388 int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of,
1389 				 struct seq_file *s, void *v)
1390 {
1391 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1392 	bool enabled;
1393 
1394 	mutex_lock(&rdtgroup_mutex);
1395 	enabled = resctrl_arch_mbm_cntr_assign_enabled(r);
1396 
1397 	if (r->mon.mbm_cntr_assignable) {
1398 		if (enabled)
1399 			seq_puts(s, "[mbm_event]\n");
1400 		else
1401 			seq_puts(s, "[default]\n");
1402 
1403 		if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) {
1404 			if (enabled)
1405 				seq_puts(s, "default\n");
1406 			else
1407 				seq_puts(s, "mbm_event\n");
1408 		}
1409 	} else {
1410 		seq_puts(s, "[default]\n");
1411 	}
1412 
1413 	mutex_unlock(&rdtgroup_mutex);
1414 
1415 	return 0;
1416 }
1417 
resctrl_mbm_assign_mode_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1418 ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
1419 				      size_t nbytes, loff_t off)
1420 {
1421 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1422 	struct rdt_mon_domain *d;
1423 	int ret = 0;
1424 	bool enable;
1425 
1426 	/* Valid input requires a trailing newline */
1427 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1428 		return -EINVAL;
1429 
1430 	buf[nbytes - 1] = '\0';
1431 
1432 	cpus_read_lock();
1433 	mutex_lock(&rdtgroup_mutex);
1434 
1435 	rdt_last_cmd_clear();
1436 
1437 	if (!strcmp(buf, "default")) {
1438 		enable = 0;
1439 	} else if (!strcmp(buf, "mbm_event")) {
1440 		if (r->mon.mbm_cntr_assignable) {
1441 			enable = 1;
1442 		} else {
1443 			ret = -EINVAL;
1444 			rdt_last_cmd_puts("mbm_event mode is not supported\n");
1445 			goto out_unlock;
1446 		}
1447 	} else {
1448 		ret = -EINVAL;
1449 		rdt_last_cmd_puts("Unsupported assign mode\n");
1450 		goto out_unlock;
1451 	}
1452 
1453 	if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) {
1454 		ret = resctrl_arch_mbm_cntr_assign_set(r, enable);
1455 		if (ret)
1456 			goto out_unlock;
1457 
1458 		/* Update the visibility of BMEC related files */
1459 		resctrl_bmec_files_show(r, NULL, !enable);
1460 
1461 		/*
1462 		 * Initialize the default memory transaction values for
1463 		 * total and local events.
1464 		 */
1465 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1466 			mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
1467 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1468 			mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
1469 									   (READS_TO_LOCAL_MEM |
1470 									    READS_TO_LOCAL_S_MEM |
1471 									    NON_TEMP_WRITE_TO_LOCAL_MEM);
1472 		/* Enable auto assignment when switching to "mbm_event" mode */
1473 		if (enable)
1474 			r->mon.mbm_assign_on_mkdir = true;
1475 		/*
1476 		 * Reset all the non-achitectural RMID state and assignable counters.
1477 		 */
1478 		list_for_each_entry(d, &r->mon_domains, hdr.list) {
1479 			mbm_cntr_free_all(r, d);
1480 			resctrl_reset_rmid_all(r, d);
1481 		}
1482 	}
1483 
1484 out_unlock:
1485 	mutex_unlock(&rdtgroup_mutex);
1486 	cpus_read_unlock();
1487 
1488 	return ret ?: nbytes;
1489 }
1490 
resctrl_num_mbm_cntrs_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1491 int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of,
1492 			       struct seq_file *s, void *v)
1493 {
1494 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1495 	struct rdt_mon_domain *dom;
1496 	bool sep = false;
1497 
1498 	cpus_read_lock();
1499 	mutex_lock(&rdtgroup_mutex);
1500 
1501 	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1502 		if (sep)
1503 			seq_putc(s, ';');
1504 
1505 		seq_printf(s, "%d=%d", dom->hdr.id, r->mon.num_mbm_cntrs);
1506 		sep = true;
1507 	}
1508 	seq_putc(s, '\n');
1509 
1510 	mutex_unlock(&rdtgroup_mutex);
1511 	cpus_read_unlock();
1512 	return 0;
1513 }
1514 
resctrl_available_mbm_cntrs_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1515 int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of,
1516 				     struct seq_file *s, void *v)
1517 {
1518 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1519 	struct rdt_mon_domain *dom;
1520 	bool sep = false;
1521 	u32 cntrs, i;
1522 	int ret = 0;
1523 
1524 	cpus_read_lock();
1525 	mutex_lock(&rdtgroup_mutex);
1526 
1527 	rdt_last_cmd_clear();
1528 
1529 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1530 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1531 		ret = -EINVAL;
1532 		goto out_unlock;
1533 	}
1534 
1535 	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1536 		if (sep)
1537 			seq_putc(s, ';');
1538 
1539 		cntrs = 0;
1540 		for (i = 0; i < r->mon.num_mbm_cntrs; i++) {
1541 			if (!dom->cntr_cfg[i].rdtgrp)
1542 				cntrs++;
1543 		}
1544 
1545 		seq_printf(s, "%d=%u", dom->hdr.id, cntrs);
1546 		sep = true;
1547 	}
1548 	seq_putc(s, '\n');
1549 
1550 out_unlock:
1551 	mutex_unlock(&rdtgroup_mutex);
1552 	cpus_read_unlock();
1553 
1554 	return ret;
1555 }
1556 
mbm_L3_assignments_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1557 int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
1558 {
1559 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1560 	struct rdt_mon_domain *d;
1561 	struct rdtgroup *rdtgrp;
1562 	struct mon_evt *mevt;
1563 	int ret = 0;
1564 	bool sep;
1565 
1566 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1567 	if (!rdtgrp) {
1568 		ret = -ENOENT;
1569 		goto out_unlock;
1570 	}
1571 
1572 	rdt_last_cmd_clear();
1573 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1574 		rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1575 		ret = -EINVAL;
1576 		goto out_unlock;
1577 	}
1578 
1579 	for_each_mon_event(mevt) {
1580 		if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid))
1581 			continue;
1582 
1583 		sep = false;
1584 		seq_printf(s, "%s:", mevt->name);
1585 		list_for_each_entry(d, &r->mon_domains, hdr.list) {
1586 			if (sep)
1587 				seq_putc(s, ';');
1588 
1589 			if (mbm_cntr_get(r, d, rdtgrp, mevt->evtid) < 0)
1590 				seq_printf(s, "%d=_", d->hdr.id);
1591 			else
1592 				seq_printf(s, "%d=e", d->hdr.id);
1593 
1594 			sep = true;
1595 		}
1596 		seq_putc(s, '\n');
1597 	}
1598 
1599 out_unlock:
1600 	rdtgroup_kn_unlock(of->kn);
1601 
1602 	return ret;
1603 }
1604 
1605 /*
1606  * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching
1607  * event name.
1608  */
mbm_get_mon_event_by_name(struct rdt_resource * r,char * name)1609 static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name)
1610 {
1611 	struct mon_evt *mevt;
1612 
1613 	for_each_mon_event(mevt) {
1614 		if (mevt->rid == r->rid && mevt->enabled &&
1615 		    resctrl_is_mbm_event(mevt->evtid) &&
1616 		    !strcmp(mevt->name, name))
1617 			return mevt;
1618 	}
1619 
1620 	return NULL;
1621 }
1622 
rdtgroup_modify_assign_state(char * assign,struct rdt_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1623 static int rdtgroup_modify_assign_state(char *assign, struct rdt_mon_domain *d,
1624 					struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1625 {
1626 	int ret = 0;
1627 
1628 	if (!assign || strlen(assign) != 1)
1629 		return -EINVAL;
1630 
1631 	switch (*assign) {
1632 	case 'e':
1633 		ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt);
1634 		break;
1635 	case '_':
1636 		rdtgroup_unassign_cntr_event(d, rdtgrp, mevt);
1637 		break;
1638 	default:
1639 		ret = -EINVAL;
1640 		break;
1641 	}
1642 
1643 	return ret;
1644 }
1645 
resctrl_parse_mbm_assignment(struct rdt_resource * r,struct rdtgroup * rdtgrp,char * event,char * tok)1646 static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp,
1647 					char *event, char *tok)
1648 {
1649 	struct rdt_mon_domain *d;
1650 	unsigned long dom_id = 0;
1651 	char *dom_str, *id_str;
1652 	struct mon_evt *mevt;
1653 	int ret;
1654 
1655 	mevt = mbm_get_mon_event_by_name(r, event);
1656 	if (!mevt) {
1657 		rdt_last_cmd_printf("Invalid event %s\n", event);
1658 		return -ENOENT;
1659 	}
1660 
1661 next:
1662 	if (!tok || tok[0] == '\0')
1663 		return 0;
1664 
1665 	/* Start processing the strings for each domain */
1666 	dom_str = strim(strsep(&tok, ";"));
1667 
1668 	id_str = strsep(&dom_str, "=");
1669 
1670 	/* Check for domain id '*' which means all domains */
1671 	if (id_str && *id_str == '*') {
1672 		ret = rdtgroup_modify_assign_state(dom_str, NULL, rdtgrp, mevt);
1673 		if (ret)
1674 			rdt_last_cmd_printf("Assign operation '%s:*=%s' failed\n",
1675 					    event, dom_str);
1676 		return ret;
1677 	} else if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1678 		rdt_last_cmd_puts("Missing domain id\n");
1679 		return -EINVAL;
1680 	}
1681 
1682 	/* Verify if the dom_id is valid */
1683 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
1684 		if (d->hdr.id == dom_id) {
1685 			ret = rdtgroup_modify_assign_state(dom_str, d, rdtgrp, mevt);
1686 			if (ret) {
1687 				rdt_last_cmd_printf("Assign operation '%s:%ld=%s' failed\n",
1688 						    event, dom_id, dom_str);
1689 				return ret;
1690 			}
1691 			goto next;
1692 		}
1693 	}
1694 
1695 	rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id);
1696 	return -EINVAL;
1697 }
1698 
mbm_L3_assignments_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1699 ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
1700 				 size_t nbytes, loff_t off)
1701 {
1702 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1703 	struct rdtgroup *rdtgrp;
1704 	char *token, *event;
1705 	int ret = 0;
1706 
1707 	/* Valid input requires a trailing newline */
1708 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1709 		return -EINVAL;
1710 
1711 	buf[nbytes - 1] = '\0';
1712 
1713 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1714 	if (!rdtgrp) {
1715 		rdtgroup_kn_unlock(of->kn);
1716 		return -ENOENT;
1717 	}
1718 	rdt_last_cmd_clear();
1719 
1720 	if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1721 		rdt_last_cmd_puts("mbm_event mode is not enabled\n");
1722 		rdtgroup_kn_unlock(of->kn);
1723 		return -EINVAL;
1724 	}
1725 
1726 	while ((token = strsep(&buf, "\n")) != NULL) {
1727 		/*
1728 		 * The write command follows the following format:
1729 		 * "<Event>:<Domain ID>=<Assignment state>"
1730 		 * Extract the event name first.
1731 		 */
1732 		event = strsep(&token, ":");
1733 
1734 		ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, token);
1735 		if (ret)
1736 			break;
1737 	}
1738 
1739 	rdtgroup_kn_unlock(of->kn);
1740 
1741 	return ret ?: nbytes;
1742 }
1743 
1744 /**
1745  * resctrl_mon_resource_init() - Initialise global monitoring structures.
1746  *
1747  * Allocate and initialise global monitor resources that do not belong to a
1748  * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists.
1749  * Called once during boot after the struct rdt_resource's have been configured
1750  * but before the filesystem is mounted.
1751  * Resctrl's cpuhp callbacks may be called before this point to bring a domain
1752  * online.
1753  *
1754  * Returns 0 for success, or -ENOMEM.
1755  */
resctrl_mon_resource_init(void)1756 int resctrl_mon_resource_init(void)
1757 {
1758 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1759 	int ret;
1760 
1761 	if (!r->mon_capable)
1762 		return 0;
1763 
1764 	ret = dom_data_init(r);
1765 	if (ret)
1766 		return ret;
1767 
1768 	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
1769 		mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
1770 		resctrl_file_fflags_init("mbm_total_bytes_config",
1771 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1772 	}
1773 	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
1774 		mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
1775 		resctrl_file_fflags_init("mbm_local_bytes_config",
1776 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1777 	}
1778 
1779 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1780 		mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
1781 	else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1782 		mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
1783 
1784 	if (r->mon.mbm_cntr_assignable) {
1785 		if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1786 			resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID);
1787 		if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1788 			resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID);
1789 		mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
1790 		mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
1791 								   (READS_TO_LOCAL_MEM |
1792 								    READS_TO_LOCAL_S_MEM |
1793 								    NON_TEMP_WRITE_TO_LOCAL_MEM);
1794 		r->mon.mbm_assign_on_mkdir = true;
1795 		resctrl_file_fflags_init("num_mbm_cntrs",
1796 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1797 		resctrl_file_fflags_init("available_mbm_cntrs",
1798 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1799 		resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG);
1800 		resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO |
1801 					 RFTYPE_RES_CACHE);
1802 		resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
1803 	}
1804 
1805 	return 0;
1806 }
1807 
resctrl_mon_resource_exit(void)1808 void resctrl_mon_resource_exit(void)
1809 {
1810 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1811 
1812 	dom_data_exit(r);
1813 }
1814