1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Resource Director Technology(RDT)
4 * - Monitoring code
5 *
6 * Copyright (C) 2017 Intel Corporation
7 *
8 * Author:
9 * Vikas Shivappa <vikas.shivappa@intel.com>
10 *
11 * This replaces the cqm.c based on perf but we reuse a lot of
12 * code and datastructures originally from Peter Zijlstra and Matt Fleming.
13 *
14 * More information about RDT be found in the Intel (R) x86 Architecture
15 * Software Developer Manual June 2016, volume 3, section 17.17.
16 */
17
18 #define pr_fmt(fmt) "resctrl: " fmt
19
20 #include <linux/cpu.h>
21 #include <linux/resctrl.h>
22 #include <linux/sizes.h>
23 #include <linux/slab.h>
24
25 #include "internal.h"
26
27 #define CREATE_TRACE_POINTS
28
29 #include "monitor_trace.h"
30
31 /**
32 * struct rmid_entry - dirty tracking for all RMID.
33 * @closid: The CLOSID for this entry.
34 * @rmid: The RMID for this entry.
35 * @busy: The number of domains with cached data using this RMID.
36 * @list: Member of the rmid_free_lru list when busy == 0.
37 *
38 * Depending on the architecture the correct monitor is accessed using
39 * both @closid and @rmid, or @rmid only.
40 *
41 * Take the rdtgroup_mutex when accessing.
42 */
43 struct rmid_entry {
44 u32 closid;
45 u32 rmid;
46 int busy;
47 struct list_head list;
48 };
49
50 /*
51 * @rmid_free_lru - A least recently used list of free RMIDs
52 * These RMIDs are guaranteed to have an occupancy less than the
53 * threshold occupancy
54 */
55 static LIST_HEAD(rmid_free_lru);
56
57 /*
58 * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has.
59 * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
60 * Indexed by CLOSID. Protected by rdtgroup_mutex.
61 */
62 static u32 *closid_num_dirty_rmid;
63
64 /*
65 * @rmid_limbo_count - count of currently unused but (potentially)
66 * dirty RMIDs.
67 * This counts RMIDs that no one is currently using but that
68 * may have a occupancy value > resctrl_rmid_realloc_threshold. User can
69 * change the threshold occupancy value.
70 */
71 static unsigned int rmid_limbo_count;
72
73 /*
74 * @rmid_entry - The entry in the limbo and free lists.
75 */
76 static struct rmid_entry *rmid_ptrs;
77
78 /*
79 * This is the threshold cache occupancy in bytes at which we will consider an
80 * RMID available for re-allocation.
81 */
82 unsigned int resctrl_rmid_realloc_threshold;
83
84 /*
85 * This is the maximum value for the reallocation threshold, in bytes.
86 */
87 unsigned int resctrl_rmid_realloc_limit;
88
89 /*
90 * x86 and arm64 differ in their handling of monitoring.
91 * x86's RMID are independent numbers, there is only one source of traffic
92 * with an RMID value of '1'.
93 * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
94 * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
95 * value is no longer unique.
96 * To account for this, resctrl uses an index. On x86 this is just the RMID,
97 * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
98 *
99 * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
100 * must accept an attempt to read every index.
101 */
__rmid_entry(u32 idx)102 static inline struct rmid_entry *__rmid_entry(u32 idx)
103 {
104 struct rmid_entry *entry;
105 u32 closid, rmid;
106
107 entry = &rmid_ptrs[idx];
108 resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
109
110 WARN_ON_ONCE(entry->closid != closid);
111 WARN_ON_ONCE(entry->rmid != rmid);
112
113 return entry;
114 }
115
limbo_release_entry(struct rmid_entry * entry)116 static void limbo_release_entry(struct rmid_entry *entry)
117 {
118 lockdep_assert_held(&rdtgroup_mutex);
119
120 rmid_limbo_count--;
121 list_add_tail(&entry->list, &rmid_free_lru);
122
123 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
124 closid_num_dirty_rmid[entry->closid]--;
125 }
126
127 /*
128 * Check the RMIDs that are marked as busy for this domain. If the
129 * reported LLC occupancy is below the threshold clear the busy bit and
130 * decrement the count. If the busy count gets to zero on an RMID, we
131 * free the RMID
132 */
__check_limbo(struct rdt_l3_mon_domain * d,bool force_free)133 void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free)
134 {
135 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
136 u32 idx_limit = resctrl_arch_system_num_rmid_idx();
137 struct rmid_entry *entry;
138 u32 idx, cur_idx = 1;
139 void *arch_mon_ctx;
140 void *arch_priv;
141 bool rmid_dirty;
142 u64 val = 0;
143
144 arch_priv = mon_event_all[QOS_L3_OCCUP_EVENT_ID].arch_priv;
145 arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
146 if (IS_ERR(arch_mon_ctx)) {
147 pr_warn_ratelimited("Failed to allocate monitor context: %ld",
148 PTR_ERR(arch_mon_ctx));
149 return;
150 }
151
152 /*
153 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
154 * are marked as busy for occupancy < threshold. If the occupancy
155 * is less than the threshold decrement the busy counter of the
156 * RMID and move it to the free list when the counter reaches 0.
157 */
158 for (;;) {
159 idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
160 if (idx >= idx_limit)
161 break;
162
163 entry = __rmid_entry(idx);
164 if (resctrl_arch_rmid_read(r, &d->hdr, entry->closid, entry->rmid,
165 QOS_L3_OCCUP_EVENT_ID, arch_priv, &val,
166 arch_mon_ctx)) {
167 rmid_dirty = true;
168 } else {
169 rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
170
171 /*
172 * x86's CLOSID and RMID are independent numbers, so the entry's
173 * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
174 * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
175 * used to select the configuration. It is thus necessary to track both
176 * CLOSID and RMID because there may be dependencies between them
177 * on some architectures.
178 */
179 trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
180 }
181
182 if (force_free || !rmid_dirty) {
183 clear_bit(idx, d->rmid_busy_llc);
184 if (!--entry->busy)
185 limbo_release_entry(entry);
186 }
187 cur_idx = idx + 1;
188 }
189
190 resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
191 }
192
has_busy_rmid(struct rdt_l3_mon_domain * d)193 bool has_busy_rmid(struct rdt_l3_mon_domain *d)
194 {
195 u32 idx_limit = resctrl_arch_system_num_rmid_idx();
196
197 return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
198 }
199
resctrl_find_free_rmid(u32 closid)200 static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
201 {
202 struct rmid_entry *itr;
203 u32 itr_idx, cmp_idx;
204
205 if (list_empty(&rmid_free_lru))
206 return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
207
208 list_for_each_entry(itr, &rmid_free_lru, list) {
209 /*
210 * Get the index of this free RMID, and the index it would need
211 * to be if it were used with this CLOSID.
212 * If the CLOSID is irrelevant on this architecture, the two
213 * index values are always the same on every entry and thus the
214 * very first entry will be returned.
215 */
216 itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
217 cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
218
219 if (itr_idx == cmp_idx)
220 return itr;
221 }
222
223 return ERR_PTR(-ENOSPC);
224 }
225
226 /**
227 * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
228 * RMID are clean, or the CLOSID that has
229 * the most clean RMID.
230 *
231 * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
232 * may not be able to allocate clean RMID. To avoid this the allocator will
233 * choose the CLOSID with the most clean RMID.
234 *
235 * When the CLOSID and RMID are independent numbers, the first free CLOSID will
236 * be returned.
237 */
resctrl_find_cleanest_closid(void)238 int resctrl_find_cleanest_closid(void)
239 {
240 u32 cleanest_closid = ~0;
241 int i = 0;
242
243 lockdep_assert_held(&rdtgroup_mutex);
244
245 if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
246 return -EIO;
247
248 for (i = 0; i < closids_supported(); i++) {
249 int num_dirty;
250
251 if (closid_allocated(i))
252 continue;
253
254 num_dirty = closid_num_dirty_rmid[i];
255 if (num_dirty == 0)
256 return i;
257
258 if (cleanest_closid == ~0)
259 cleanest_closid = i;
260
261 if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
262 cleanest_closid = i;
263 }
264
265 if (cleanest_closid == ~0)
266 return -ENOSPC;
267
268 return cleanest_closid;
269 }
270
271 /*
272 * For MPAM the RMID value is not unique, and has to be considered with
273 * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
274 * allows all domains to be managed by a single free list.
275 * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
276 */
alloc_rmid(u32 closid)277 int alloc_rmid(u32 closid)
278 {
279 struct rmid_entry *entry;
280
281 lockdep_assert_held(&rdtgroup_mutex);
282
283 entry = resctrl_find_free_rmid(closid);
284 if (IS_ERR(entry))
285 return PTR_ERR(entry);
286
287 list_del(&entry->list);
288 return entry->rmid;
289 }
290
add_rmid_to_limbo(struct rmid_entry * entry)291 static void add_rmid_to_limbo(struct rmid_entry *entry)
292 {
293 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
294 struct rdt_l3_mon_domain *d;
295 u32 idx;
296
297 lockdep_assert_held(&rdtgroup_mutex);
298
299 /* Walking r->domains, ensure it can't race with cpuhp */
300 lockdep_assert_cpus_held();
301
302 idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
303
304 entry->busy = 0;
305 list_for_each_entry(d, &r->mon_domains, hdr.list) {
306 /*
307 * For the first limbo RMID in the domain,
308 * setup up the limbo worker.
309 */
310 if (!has_busy_rmid(d))
311 cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
312 RESCTRL_PICK_ANY_CPU);
313 set_bit(idx, d->rmid_busy_llc);
314 entry->busy++;
315 }
316
317 rmid_limbo_count++;
318 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
319 closid_num_dirty_rmid[entry->closid]++;
320 }
321
free_rmid(u32 closid,u32 rmid)322 void free_rmid(u32 closid, u32 rmid)
323 {
324 u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
325 struct rmid_entry *entry;
326
327 lockdep_assert_held(&rdtgroup_mutex);
328
329 /*
330 * Do not allow the default rmid to be free'd. Comparing by index
331 * allows architectures that ignore the closid parameter to avoid an
332 * unnecessary check.
333 */
334 if (!resctrl_arch_mon_capable() ||
335 idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
336 RESCTRL_RESERVED_RMID))
337 return;
338
339 entry = __rmid_entry(idx);
340
341 if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
342 add_rmid_to_limbo(entry);
343 else
344 list_add_tail(&entry->list, &rmid_free_lru);
345 }
346
get_mbm_state(struct rdt_l3_mon_domain * d,u32 closid,u32 rmid,enum resctrl_event_id evtid)347 static struct mbm_state *get_mbm_state(struct rdt_l3_mon_domain *d, u32 closid,
348 u32 rmid, enum resctrl_event_id evtid)
349 {
350 u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
351 struct mbm_state *state;
352
353 if (!resctrl_is_mbm_event(evtid))
354 return NULL;
355
356 state = d->mbm_states[MBM_STATE_IDX(evtid)];
357
358 return state ? &state[idx] : NULL;
359 }
360
361 /*
362 * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp.
363 *
364 * Return:
365 * Valid counter ID on success, or -ENOENT on failure.
366 */
mbm_cntr_get(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)367 static int mbm_cntr_get(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
368 struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
369 {
370 int cntr_id;
371
372 if (!r->mon.mbm_cntr_assignable)
373 return -ENOENT;
374
375 if (!resctrl_is_mbm_event(evtid))
376 return -ENOENT;
377
378 for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
379 if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
380 d->cntr_cfg[cntr_id].evtid == evtid)
381 return cntr_id;
382 }
383
384 return -ENOENT;
385 }
386
387 /*
388 * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d.
389 * Caller must ensure that the specified event is not assigned already.
390 *
391 * Return:
392 * Valid counter ID on success, or -ENOSPC on failure.
393 */
mbm_cntr_alloc(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)394 static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
395 struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
396 {
397 int cntr_id;
398
399 for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
400 if (!d->cntr_cfg[cntr_id].rdtgrp) {
401 d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
402 d->cntr_cfg[cntr_id].evtid = evtid;
403 return cntr_id;
404 }
405 }
406
407 return -ENOSPC;
408 }
409
410 /*
411 * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d.
412 */
mbm_cntr_free(struct rdt_l3_mon_domain * d,int cntr_id)413 static void mbm_cntr_free(struct rdt_l3_mon_domain *d, int cntr_id)
414 {
415 memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg));
416 }
417
__l3_mon_event_count(struct rdtgroup * rdtgrp,struct rmid_read * rr)418 static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
419 {
420 int cpu = smp_processor_id();
421 u32 closid = rdtgrp->closid;
422 u32 rmid = rdtgrp->mon.rmid;
423 struct rdt_l3_mon_domain *d;
424 int cntr_id = -ENOENT;
425 struct mbm_state *m;
426 u64 tval = 0;
427
428 if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) {
429 rr->err = -EIO;
430 return -EINVAL;
431 }
432 d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
433
434 if (rr->is_mbm_cntr) {
435 cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evt->evtid);
436 if (cntr_id < 0) {
437 rr->err = -ENOENT;
438 return -EINVAL;
439 }
440 }
441
442 if (rr->first) {
443 if (rr->is_mbm_cntr)
444 resctrl_arch_reset_cntr(rr->r, d, closid, rmid, cntr_id, rr->evt->evtid);
445 else
446 resctrl_arch_reset_rmid(rr->r, d, closid, rmid, rr->evt->evtid);
447 m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
448 if (m)
449 memset(m, 0, sizeof(struct mbm_state));
450 return 0;
451 }
452
453 /* Reading a single domain, must be on a CPU in that domain. */
454 if (!cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
455 return -EINVAL;
456 if (rr->is_mbm_cntr)
457 rr->err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id,
458 rr->evt->evtid, &tval);
459 else
460 rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, closid, rmid,
461 rr->evt->evtid, rr->evt->arch_priv,
462 &tval, rr->arch_mon_ctx);
463 if (rr->err)
464 return rr->err;
465
466 rr->val += tval;
467
468 return 0;
469 }
470
__l3_mon_event_count_sum(struct rdtgroup * rdtgrp,struct rmid_read * rr)471 static int __l3_mon_event_count_sum(struct rdtgroup *rdtgrp, struct rmid_read *rr)
472 {
473 int cpu = smp_processor_id();
474 u32 closid = rdtgrp->closid;
475 u32 rmid = rdtgrp->mon.rmid;
476 struct rdt_l3_mon_domain *d;
477 u64 tval = 0;
478 int err, ret;
479
480 /*
481 * Summing across domains is only done for systems that implement
482 * Sub-NUMA Cluster. There is no overlap with systems that support
483 * assignable counters.
484 */
485 if (rr->is_mbm_cntr) {
486 pr_warn_once("Summing domains using assignable counters is not supported\n");
487 rr->err = -EINVAL;
488 return -EINVAL;
489 }
490
491 /* Summing domains that share a cache, must be on a CPU for that cache. */
492 if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
493 return -EINVAL;
494
495 /*
496 * Legacy files must report the sum of an event across all
497 * domains that share the same L3 cache instance.
498 * Report success if a read from any domain succeeds, -EINVAL
499 * (translated to "Unavailable" for user space) if reading from
500 * all domains fail for any reason.
501 */
502 ret = -EINVAL;
503 list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
504 if (d->ci_id != rr->ci->id)
505 continue;
506 err = resctrl_arch_rmid_read(rr->r, &d->hdr, closid, rmid,
507 rr->evt->evtid, rr->evt->arch_priv,
508 &tval, rr->arch_mon_ctx);
509 if (!err) {
510 rr->val += tval;
511 ret = 0;
512 }
513 }
514
515 if (ret)
516 rr->err = ret;
517
518 return ret;
519 }
520
__mon_event_count(struct rdtgroup * rdtgrp,struct rmid_read * rr)521 static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
522 {
523 switch (rr->r->rid) {
524 case RDT_RESOURCE_L3:
525 WARN_ON_ONCE(rr->evt->any_cpu);
526 if (rr->hdr)
527 return __l3_mon_event_count(rdtgrp, rr);
528 else
529 return __l3_mon_event_count_sum(rdtgrp, rr);
530 case RDT_RESOURCE_PERF_PKG: {
531 u64 tval = 0;
532
533 rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, rdtgrp->closid,
534 rdtgrp->mon.rmid, rr->evt->evtid,
535 rr->evt->arch_priv,
536 &tval, rr->arch_mon_ctx);
537 if (rr->err)
538 return rr->err;
539
540 rr->val += tval;
541
542 return 0;
543 }
544 default:
545 rr->err = -EINVAL;
546 return -EINVAL;
547 }
548 }
549
550 /*
551 * mbm_bw_count() - Update bw count from values previously read by
552 * __mon_event_count().
553 * @rdtgrp: resctrl group associated with the CLOSID and RMID to identify
554 * the cached mbm_state.
555 * @rr: The struct rmid_read populated by __mon_event_count().
556 *
557 * Supporting function to calculate the memory bandwidth
558 * and delta bandwidth in MBps. The chunks value previously read by
559 * __mon_event_count() is compared with the chunks value from the previous
560 * invocation. This must be called once per second to maintain values in MBps.
561 */
mbm_bw_count(struct rdtgroup * rdtgrp,struct rmid_read * rr)562 static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
563 {
564 u64 cur_bw, bytes, cur_bytes;
565 u32 closid = rdtgrp->closid;
566 u32 rmid = rdtgrp->mon.rmid;
567 struct rdt_l3_mon_domain *d;
568 struct mbm_state *m;
569
570 if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
571 return;
572 d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
573 m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
574 if (WARN_ON_ONCE(!m))
575 return;
576
577 cur_bytes = rr->val;
578 bytes = cur_bytes - m->prev_bw_bytes;
579 m->prev_bw_bytes = cur_bytes;
580
581 cur_bw = bytes / SZ_1M;
582
583 m->prev_bw = cur_bw;
584 }
585
586 /*
587 * This is scheduled by mon_event_read() to read the CQM/MBM counters
588 * on a domain.
589 */
mon_event_count(void * info)590 void mon_event_count(void *info)
591 {
592 struct rdtgroup *rdtgrp, *entry;
593 struct rmid_read *rr = info;
594 struct list_head *head;
595 int ret;
596
597 rdtgrp = rr->rgrp;
598
599 ret = __mon_event_count(rdtgrp, rr);
600
601 /*
602 * For Ctrl groups read data from child monitor groups and
603 * add them together. Count events which are read successfully.
604 * Discard the rmid_read's reporting errors.
605 */
606 head = &rdtgrp->mon.crdtgrp_list;
607
608 if (rdtgrp->type == RDTCTRL_GROUP) {
609 list_for_each_entry(entry, head, mon.crdtgrp_list) {
610 if (__mon_event_count(entry, rr) == 0)
611 ret = 0;
612 }
613 }
614
615 /*
616 * __mon_event_count() calls for newly created monitor groups may
617 * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
618 * Discard error if any of the monitor event reads succeeded.
619 */
620 if (ret == 0)
621 rr->err = 0;
622 }
623
get_ctrl_domain_from_cpu(int cpu,struct rdt_resource * r)624 static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
625 struct rdt_resource *r)
626 {
627 struct rdt_ctrl_domain *d;
628
629 lockdep_assert_cpus_held();
630
631 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
632 /* Find the domain that contains this CPU */
633 if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
634 return d;
635 }
636
637 return NULL;
638 }
639
640 /*
641 * Feedback loop for MBA software controller (mba_sc)
642 *
643 * mba_sc is a feedback loop where we periodically read MBM counters and
644 * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
645 * that:
646 *
647 * current bandwidth(cur_bw) < user specified bandwidth(user_bw)
648 *
649 * This uses the MBM counters to measure the bandwidth and MBA throttle
650 * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
651 * fact that resctrl rdtgroups have both monitoring and control.
652 *
653 * The frequency of the checks is 1s and we just tag along the MBM overflow
654 * timer. Having 1s interval makes the calculation of bandwidth simpler.
655 *
656 * Although MBA's goal is to restrict the bandwidth to a maximum, there may
657 * be a need to increase the bandwidth to avoid unnecessarily restricting
658 * the L2 <-> L3 traffic.
659 *
660 * Since MBA controls the L2 external bandwidth where as MBM measures the
661 * L3 external bandwidth the following sequence could lead to such a
662 * situation.
663 *
664 * Consider an rdtgroup which had high L3 <-> memory traffic in initial
665 * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
666 * after some time rdtgroup has mostly L2 <-> L3 traffic.
667 *
668 * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
669 * throttle MSRs already have low percentage values. To avoid
670 * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
671 */
update_mba_bw(struct rdtgroup * rgrp,struct rdt_l3_mon_domain * dom_mbm)672 static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_l3_mon_domain *dom_mbm)
673 {
674 u32 closid, rmid, cur_msr_val, new_msr_val;
675 struct mbm_state *pmbm_data, *cmbm_data;
676 struct rdt_ctrl_domain *dom_mba;
677 enum resctrl_event_id evt_id;
678 struct rdt_resource *r_mba;
679 struct list_head *head;
680 struct rdtgroup *entry;
681 u32 cur_bw, user_bw;
682
683 r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
684 evt_id = rgrp->mba_mbps_event;
685
686 closid = rgrp->closid;
687 rmid = rgrp->mon.rmid;
688 pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
689 if (WARN_ON_ONCE(!pmbm_data))
690 return;
691
692 dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
693 if (!dom_mba) {
694 pr_warn_once("Failure to get domain for MBA update\n");
695 return;
696 }
697
698 cur_bw = pmbm_data->prev_bw;
699 user_bw = dom_mba->mbps_val[closid];
700
701 /* MBA resource doesn't support CDP */
702 cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
703
704 /*
705 * For Ctrl groups read data from child monitor groups.
706 */
707 head = &rgrp->mon.crdtgrp_list;
708 list_for_each_entry(entry, head, mon.crdtgrp_list) {
709 cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
710 if (WARN_ON_ONCE(!cmbm_data))
711 return;
712 cur_bw += cmbm_data->prev_bw;
713 }
714
715 /*
716 * Scale up/down the bandwidth linearly for the ctrl group. The
717 * bandwidth step is the bandwidth granularity specified by the
718 * hardware.
719 * Always increase throttling if current bandwidth is above the
720 * target set by user.
721 * But avoid thrashing up and down on every poll by checking
722 * whether a decrease in throttling is likely to push the group
723 * back over target. E.g. if currently throttling to 30% of bandwidth
724 * on a system with 10% granularity steps, check whether moving to
725 * 40% would go past the limit by multiplying current bandwidth by
726 * "(30 + 10) / 30".
727 */
728 if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
729 new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
730 } else if (cur_msr_val < MAX_MBA_BW &&
731 (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
732 new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
733 } else {
734 return;
735 }
736
737 resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
738 }
739
mbm_update_one_event(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)740 static void mbm_update_one_event(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
741 struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
742 {
743 struct rmid_read rr = {0};
744
745 rr.r = r;
746 rr.hdr = &d->hdr;
747 rr.evt = &mon_event_all[evtid];
748 if (resctrl_arch_mbm_cntr_assign_enabled(r)) {
749 rr.is_mbm_cntr = true;
750 } else {
751 rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, evtid);
752 if (IS_ERR(rr.arch_mon_ctx)) {
753 pr_warn_ratelimited("Failed to allocate monitor context: %ld",
754 PTR_ERR(rr.arch_mon_ctx));
755 return;
756 }
757 }
758
759 __mon_event_count(rdtgrp, &rr);
760
761 /*
762 * If the software controller is enabled, compute the
763 * bandwidth for this event id.
764 */
765 if (is_mba_sc(NULL))
766 mbm_bw_count(rdtgrp, &rr);
767
768 if (rr.arch_mon_ctx)
769 resctrl_arch_mon_ctx_free(rr.r, evtid, rr.arch_mon_ctx);
770 }
771
mbm_update(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp)772 static void mbm_update(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
773 struct rdtgroup *rdtgrp)
774 {
775 /*
776 * This is protected from concurrent reads from user as both
777 * the user and overflow handler hold the global mutex.
778 */
779 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
780 mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
781
782 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
783 mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
784 }
785
786 /*
787 * Handler to scan the limbo list and move the RMIDs
788 * to free list whose occupancy < threshold_occupancy.
789 */
cqm_handle_limbo(struct work_struct * work)790 void cqm_handle_limbo(struct work_struct *work)
791 {
792 unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
793 struct rdt_l3_mon_domain *d;
794
795 cpus_read_lock();
796 mutex_lock(&rdtgroup_mutex);
797
798 d = container_of(work, struct rdt_l3_mon_domain, cqm_limbo.work);
799
800 __check_limbo(d, false);
801
802 if (has_busy_rmid(d)) {
803 d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
804 RESCTRL_PICK_ANY_CPU);
805 schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
806 delay);
807 }
808
809 mutex_unlock(&rdtgroup_mutex);
810 cpus_read_unlock();
811 }
812
813 /**
814 * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
815 * domain.
816 * @dom: The domain the limbo handler should run for.
817 * @delay_ms: How far in the future the handler should run.
818 * @exclude_cpu: Which CPU the handler should not run on,
819 * RESCTRL_PICK_ANY_CPU to pick any CPU.
820 */
cqm_setup_limbo_handler(struct rdt_l3_mon_domain * dom,unsigned long delay_ms,int exclude_cpu)821 void cqm_setup_limbo_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
822 int exclude_cpu)
823 {
824 unsigned long delay = msecs_to_jiffies(delay_ms);
825 int cpu;
826
827 cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
828 dom->cqm_work_cpu = cpu;
829
830 if (cpu < nr_cpu_ids)
831 schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
832 }
833
mbm_handle_overflow(struct work_struct * work)834 void mbm_handle_overflow(struct work_struct *work)
835 {
836 unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
837 struct rdtgroup *prgrp, *crgrp;
838 struct rdt_l3_mon_domain *d;
839 struct list_head *head;
840 struct rdt_resource *r;
841
842 cpus_read_lock();
843 mutex_lock(&rdtgroup_mutex);
844
845 /*
846 * If the filesystem has been unmounted this work no longer needs to
847 * run.
848 */
849 if (!resctrl_mounted || !resctrl_arch_mon_capable())
850 goto out_unlock;
851
852 r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
853 d = container_of(work, struct rdt_l3_mon_domain, mbm_over.work);
854
855 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
856 mbm_update(r, d, prgrp);
857
858 head = &prgrp->mon.crdtgrp_list;
859 list_for_each_entry(crgrp, head, mon.crdtgrp_list)
860 mbm_update(r, d, crgrp);
861
862 if (is_mba_sc(NULL))
863 update_mba_bw(prgrp, d);
864 }
865
866 /*
867 * Re-check for housekeeping CPUs. This allows the overflow handler to
868 * move off a nohz_full CPU quickly.
869 */
870 d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
871 RESCTRL_PICK_ANY_CPU);
872 schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
873
874 out_unlock:
875 mutex_unlock(&rdtgroup_mutex);
876 cpus_read_unlock();
877 }
878
879 /**
880 * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
881 * domain.
882 * @dom: The domain the overflow handler should run for.
883 * @delay_ms: How far in the future the handler should run.
884 * @exclude_cpu: Which CPU the handler should not run on,
885 * RESCTRL_PICK_ANY_CPU to pick any CPU.
886 */
mbm_setup_overflow_handler(struct rdt_l3_mon_domain * dom,unsigned long delay_ms,int exclude_cpu)887 void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
888 int exclude_cpu)
889 {
890 unsigned long delay = msecs_to_jiffies(delay_ms);
891 int cpu;
892
893 /*
894 * When a domain comes online there is no guarantee the filesystem is
895 * mounted. If not, there is no need to catch counter overflow.
896 */
897 if (!resctrl_mounted || !resctrl_arch_mon_capable())
898 return;
899 cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
900 dom->mbm_work_cpu = cpu;
901
902 if (cpu < nr_cpu_ids)
903 schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
904 }
905
setup_rmid_lru_list(void)906 int setup_rmid_lru_list(void)
907 {
908 struct rmid_entry *entry = NULL;
909 u32 idx_limit;
910 u32 idx;
911 int i;
912
913 if (!resctrl_arch_mon_capable())
914 return 0;
915
916 /*
917 * Called on every mount, but the number of RMIDs cannot change
918 * after the first mount, so keep using the same set of rmid_ptrs[]
919 * until resctrl_exit(). Note that the limbo handler continues to
920 * access rmid_ptrs[] after resctrl is unmounted.
921 */
922 if (rmid_ptrs)
923 return 0;
924
925 idx_limit = resctrl_arch_system_num_rmid_idx();
926 rmid_ptrs = kzalloc_objs(struct rmid_entry, idx_limit);
927 if (!rmid_ptrs)
928 return -ENOMEM;
929
930 for (i = 0; i < idx_limit; i++) {
931 entry = &rmid_ptrs[i];
932 INIT_LIST_HEAD(&entry->list);
933
934 resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
935 list_add_tail(&entry->list, &rmid_free_lru);
936 }
937
938 /*
939 * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
940 * are always allocated. These are used for the rdtgroup_default
941 * control group, which was setup earlier in rdtgroup_setup_default().
942 */
943 idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
944 RESCTRL_RESERVED_RMID);
945 entry = __rmid_entry(idx);
946 list_del(&entry->list);
947
948 return 0;
949 }
950
free_rmid_lru_list(void)951 void free_rmid_lru_list(void)
952 {
953 if (!resctrl_arch_mon_capable())
954 return;
955
956 mutex_lock(&rdtgroup_mutex);
957 kfree(rmid_ptrs);
958 rmid_ptrs = NULL;
959 mutex_unlock(&rdtgroup_mutex);
960 }
961
962 #define MON_EVENT(_eventid, _name, _res, _fp) \
963 [_eventid] = { \
964 .name = _name, \
965 .evtid = _eventid, \
966 .rid = _res, \
967 .is_floating_point = _fp, \
968 }
969
970 /*
971 * All available events. Architecture code marks the ones that
972 * are supported by a system using resctrl_enable_mon_event()
973 * to set .enabled.
974 */
975 struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
976 MON_EVENT(QOS_L3_OCCUP_EVENT_ID, "llc_occupancy", RDT_RESOURCE_L3, false),
977 MON_EVENT(QOS_L3_MBM_TOTAL_EVENT_ID, "mbm_total_bytes", RDT_RESOURCE_L3, false),
978 MON_EVENT(QOS_L3_MBM_LOCAL_EVENT_ID, "mbm_local_bytes", RDT_RESOURCE_L3, false),
979 MON_EVENT(PMT_EVENT_ENERGY, "core_energy", RDT_RESOURCE_PERF_PKG, true),
980 MON_EVENT(PMT_EVENT_ACTIVITY, "activity", RDT_RESOURCE_PERF_PKG, true),
981 MON_EVENT(PMT_EVENT_STALLS_LLC_HIT, "stalls_llc_hit", RDT_RESOURCE_PERF_PKG, false),
982 MON_EVENT(PMT_EVENT_C1_RES, "c1_res", RDT_RESOURCE_PERF_PKG, false),
983 MON_EVENT(PMT_EVENT_UNHALTED_CORE_CYCLES, "unhalted_core_cycles", RDT_RESOURCE_PERF_PKG, false),
984 MON_EVENT(PMT_EVENT_STALLS_LLC_MISS, "stalls_llc_miss", RDT_RESOURCE_PERF_PKG, false),
985 MON_EVENT(PMT_EVENT_AUTO_C6_RES, "c6_res", RDT_RESOURCE_PERF_PKG, false),
986 MON_EVENT(PMT_EVENT_UNHALTED_REF_CYCLES, "unhalted_ref_cycles", RDT_RESOURCE_PERF_PKG, false),
987 MON_EVENT(PMT_EVENT_UOPS_RETIRED, "uops_retired", RDT_RESOURCE_PERF_PKG, false),
988 };
989
resctrl_enable_mon_event(enum resctrl_event_id eventid,bool any_cpu,unsigned int binary_bits,void * arch_priv)990 bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
991 unsigned int binary_bits, void *arch_priv)
992 {
993 if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS ||
994 binary_bits > MAX_BINARY_BITS))
995 return false;
996 if (mon_event_all[eventid].enabled) {
997 pr_warn("Duplicate enable for event %d\n", eventid);
998 return false;
999 }
1000 if (binary_bits && !mon_event_all[eventid].is_floating_point) {
1001 pr_warn("Event %d may not be floating point\n", eventid);
1002 return false;
1003 }
1004
1005 mon_event_all[eventid].any_cpu = any_cpu;
1006 mon_event_all[eventid].binary_bits = binary_bits;
1007 mon_event_all[eventid].arch_priv = arch_priv;
1008 mon_event_all[eventid].enabled = true;
1009
1010 return true;
1011 }
1012
resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)1013 bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
1014 {
1015 return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS &&
1016 mon_event_all[eventid].enabled;
1017 }
1018
resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)1019 u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)
1020 {
1021 return mon_event_all[evtid].evt_cfg;
1022 }
1023
1024 /**
1025 * struct mbm_transaction - Memory transaction an MBM event can be configured with.
1026 * @name: Name of memory transaction (read, write ...).
1027 * @val: The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to
1028 * represent the memory transaction within an event's configuration.
1029 */
1030 struct mbm_transaction {
1031 char name[32];
1032 u32 val;
1033 };
1034
1035 /* Decoded values for each type of memory transaction. */
1036 static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = {
1037 {"local_reads", READS_TO_LOCAL_MEM},
1038 {"remote_reads", READS_TO_REMOTE_MEM},
1039 {"local_non_temporal_writes", NON_TEMP_WRITE_TO_LOCAL_MEM},
1040 {"remote_non_temporal_writes", NON_TEMP_WRITE_TO_REMOTE_MEM},
1041 {"local_reads_slow_memory", READS_TO_LOCAL_S_MEM},
1042 {"remote_reads_slow_memory", READS_TO_REMOTE_S_MEM},
1043 {"dirty_victim_writes_all", DIRTY_VICTIMS_TO_ALL_MEM},
1044 };
1045
event_filter_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1046 int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
1047 {
1048 struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
1049 struct rdt_resource *r;
1050 bool sep = false;
1051 int ret = 0, i;
1052
1053 mutex_lock(&rdtgroup_mutex);
1054 rdt_last_cmd_clear();
1055
1056 r = resctrl_arch_get_resource(mevt->rid);
1057 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1058 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1059 ret = -EINVAL;
1060 goto out_unlock;
1061 }
1062
1063 for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
1064 if (mevt->evt_cfg & mbm_transactions[i].val) {
1065 if (sep)
1066 seq_putc(seq, ',');
1067 seq_printf(seq, "%s", mbm_transactions[i].name);
1068 sep = true;
1069 }
1070 }
1071 seq_putc(seq, '\n');
1072
1073 out_unlock:
1074 mutex_unlock(&rdtgroup_mutex);
1075
1076 return ret;
1077 }
1078
resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1079 int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s,
1080 void *v)
1081 {
1082 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1083 int ret = 0;
1084
1085 mutex_lock(&rdtgroup_mutex);
1086 rdt_last_cmd_clear();
1087
1088 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1089 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1090 ret = -EINVAL;
1091 goto out_unlock;
1092 }
1093
1094 seq_printf(s, "%u\n", r->mon.mbm_assign_on_mkdir);
1095
1096 out_unlock:
1097 mutex_unlock(&rdtgroup_mutex);
1098
1099 return ret;
1100 }
1101
resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1102 ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf,
1103 size_t nbytes, loff_t off)
1104 {
1105 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1106 bool value;
1107 int ret;
1108
1109 ret = kstrtobool(buf, &value);
1110 if (ret)
1111 return ret;
1112
1113 mutex_lock(&rdtgroup_mutex);
1114 rdt_last_cmd_clear();
1115
1116 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1117 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1118 ret = -EINVAL;
1119 goto out_unlock;
1120 }
1121
1122 r->mon.mbm_assign_on_mkdir = value;
1123
1124 out_unlock:
1125 mutex_unlock(&rdtgroup_mutex);
1126
1127 return ret ?: nbytes;
1128 }
1129
1130 /*
1131 * mbm_cntr_free_all() - Clear all the counter ID configuration details in the
1132 * domain @d. Called when mbm_assign_mode is changed.
1133 */
mbm_cntr_free_all(struct rdt_resource * r,struct rdt_l3_mon_domain * d)1134 static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
1135 {
1136 memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs);
1137 }
1138
1139 /*
1140 * resctrl_reset_rmid_all() - Reset all non-architecture states for all the
1141 * supported RMIDs.
1142 */
resctrl_reset_rmid_all(struct rdt_resource * r,struct rdt_l3_mon_domain * d)1143 static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
1144 {
1145 u32 idx_limit = resctrl_arch_system_num_rmid_idx();
1146 enum resctrl_event_id evt;
1147 int idx;
1148
1149 for_each_mbm_event_id(evt) {
1150 if (!resctrl_is_mon_event_enabled(evt))
1151 continue;
1152 idx = MBM_STATE_IDX(evt);
1153 memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit);
1154 }
1155 }
1156
1157 /*
1158 * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID
1159 * pair in the domain.
1160 *
1161 * Assign the counter if @assign is true else unassign the counter. Reset the
1162 * associated non-architectural state.
1163 */
rdtgroup_assign_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,enum resctrl_event_id evtid,u32 rmid,u32 closid,u32 cntr_id,bool assign)1164 static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
1165 enum resctrl_event_id evtid, u32 rmid, u32 closid,
1166 u32 cntr_id, bool assign)
1167 {
1168 struct mbm_state *m;
1169
1170 resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
1171
1172 m = get_mbm_state(d, closid, rmid, evtid);
1173 if (m)
1174 memset(m, 0, sizeof(*m));
1175 }
1176
1177 /*
1178 * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event
1179 * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d.
1180 *
1181 * Return:
1182 * 0 on success, < 0 on failure.
1183 */
rdtgroup_alloc_assign_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1184 static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
1185 struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1186 {
1187 int cntr_id;
1188
1189 /* No action required if the counter is assigned already. */
1190 cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
1191 if (cntr_id >= 0)
1192 return 0;
1193
1194 cntr_id = mbm_cntr_alloc(r, d, rdtgrp, mevt->evtid);
1195 if (cntr_id < 0) {
1196 rdt_last_cmd_printf("Failed to allocate counter for %s in domain %d\n",
1197 mevt->name, d->hdr.id);
1198 return cntr_id;
1199 }
1200
1201 rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, true);
1202
1203 return 0;
1204 }
1205
1206 /*
1207 * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in
1208 * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is
1209 * NULL; otherwise, assign the counter to the specified domain @d.
1210 *
1211 * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr()
1212 * will fail. The assignment process will abort at the first failure encountered
1213 * during domain traversal, which may result in the event being only partially
1214 * assigned.
1215 *
1216 * Return:
1217 * 0 on success, < 0 on failure.
1218 */
rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1219 static int rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
1220 struct mon_evt *mevt)
1221 {
1222 struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1223 int ret = 0;
1224
1225 if (!d) {
1226 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1227 ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
1228 if (ret)
1229 return ret;
1230 }
1231 } else {
1232 ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
1233 }
1234
1235 return ret;
1236 }
1237
1238 /*
1239 * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when
1240 * a new group is created.
1241 *
1242 * Each group can accommodate two counters per domain: one for the total
1243 * event and one for the local event. Assignments may fail due to the limited
1244 * number of counters. However, it is not necessary to fail the group creation
1245 * and thus no failure is returned. Users have the option to modify the
1246 * counter assignments after the group has been created.
1247 */
rdtgroup_assign_cntrs(struct rdtgroup * rdtgrp)1248 void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp)
1249 {
1250 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1251
1252 if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) ||
1253 !r->mon.mbm_assign_on_mkdir)
1254 return;
1255
1256 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1257 rdtgroup_assign_cntr_event(NULL, rdtgrp,
1258 &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
1259
1260 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1261 rdtgroup_assign_cntr_event(NULL, rdtgrp,
1262 &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
1263 }
1264
1265 /*
1266 * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration
1267 * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp.
1268 */
rdtgroup_free_unassign_cntr(struct rdt_resource * r,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1269 static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
1270 struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1271 {
1272 int cntr_id;
1273
1274 cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
1275
1276 /* If there is no cntr_id assigned, nothing to do */
1277 if (cntr_id < 0)
1278 return;
1279
1280 rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, false);
1281
1282 mbm_cntr_free(d, cntr_id);
1283 }
1284
1285 /*
1286 * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with
1287 * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign
1288 * the counters from all the domains if @d is NULL else unassign from @d.
1289 */
rdtgroup_unassign_cntr_event(struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1290 static void rdtgroup_unassign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
1291 struct mon_evt *mevt)
1292 {
1293 struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1294
1295 if (!d) {
1296 list_for_each_entry(d, &r->mon_domains, hdr.list)
1297 rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
1298 } else {
1299 rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
1300 }
1301 }
1302
1303 /*
1304 * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events.
1305 * Called when a group is deleted.
1306 */
rdtgroup_unassign_cntrs(struct rdtgroup * rdtgrp)1307 void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp)
1308 {
1309 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1310
1311 if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r))
1312 return;
1313
1314 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1315 rdtgroup_unassign_cntr_event(NULL, rdtgrp,
1316 &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
1317
1318 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1319 rdtgroup_unassign_cntr_event(NULL, rdtgrp,
1320 &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
1321 }
1322
resctrl_parse_mem_transactions(char * tok,u32 * val)1323 static int resctrl_parse_mem_transactions(char *tok, u32 *val)
1324 {
1325 u32 temp_val = 0;
1326 char *evt_str;
1327 bool found;
1328 int i;
1329
1330 next_config:
1331 if (!tok || tok[0] == '\0') {
1332 *val = temp_val;
1333 return 0;
1334 }
1335
1336 /* Start processing the strings for each memory transaction type */
1337 evt_str = strim(strsep(&tok, ","));
1338 found = false;
1339 for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
1340 if (!strcmp(mbm_transactions[i].name, evt_str)) {
1341 temp_val |= mbm_transactions[i].val;
1342 found = true;
1343 break;
1344 }
1345 }
1346
1347 if (!found) {
1348 rdt_last_cmd_printf("Invalid memory transaction type %s\n", evt_str);
1349 return -EINVAL;
1350 }
1351
1352 goto next_config;
1353 }
1354
1355 /*
1356 * rdtgroup_update_cntr_event - Update the counter assignments for the event
1357 * in a group.
1358 * @r: Resource to which update needs to be done.
1359 * @rdtgrp: Resctrl group.
1360 * @evtid: MBM monitor event.
1361 */
rdtgroup_update_cntr_event(struct rdt_resource * r,struct rdtgroup * rdtgrp,enum resctrl_event_id evtid)1362 static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
1363 enum resctrl_event_id evtid)
1364 {
1365 struct rdt_l3_mon_domain *d;
1366 int cntr_id;
1367
1368 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1369 cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid);
1370 if (cntr_id >= 0)
1371 rdtgroup_assign_cntr(r, d, evtid, rdtgrp->mon.rmid,
1372 rdtgrp->closid, cntr_id, true);
1373 }
1374 }
1375
1376 /*
1377 * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event
1378 * for all the groups.
1379 * @mevt MBM Monitor event.
1380 */
resctrl_update_cntr_allrdtgrp(struct mon_evt * mevt)1381 static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt)
1382 {
1383 struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
1384 struct rdtgroup *prgrp, *crgrp;
1385
1386 /*
1387 * Find all the groups where the event is assigned and update the
1388 * configuration of existing assignments.
1389 */
1390 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
1391 rdtgroup_update_cntr_event(r, prgrp, mevt->evtid);
1392
1393 list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
1394 rdtgroup_update_cntr_event(r, crgrp, mevt->evtid);
1395 }
1396 }
1397
event_filter_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1398 ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
1399 loff_t off)
1400 {
1401 struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
1402 struct rdt_resource *r;
1403 u32 evt_cfg = 0;
1404 int ret = 0;
1405
1406 /* Valid input requires a trailing newline */
1407 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1408 return -EINVAL;
1409
1410 buf[nbytes - 1] = '\0';
1411
1412 cpus_read_lock();
1413 mutex_lock(&rdtgroup_mutex);
1414
1415 rdt_last_cmd_clear();
1416
1417 r = resctrl_arch_get_resource(mevt->rid);
1418 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1419 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1420 ret = -EINVAL;
1421 goto out_unlock;
1422 }
1423
1424 ret = resctrl_parse_mem_transactions(buf, &evt_cfg);
1425 if (!ret && mevt->evt_cfg != evt_cfg) {
1426 mevt->evt_cfg = evt_cfg;
1427 resctrl_update_cntr_allrdtgrp(mevt);
1428 }
1429
1430 out_unlock:
1431 mutex_unlock(&rdtgroup_mutex);
1432 cpus_read_unlock();
1433
1434 return ret ?: nbytes;
1435 }
1436
resctrl_mbm_assign_mode_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1437 int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of,
1438 struct seq_file *s, void *v)
1439 {
1440 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1441 bool enabled;
1442
1443 mutex_lock(&rdtgroup_mutex);
1444 enabled = resctrl_arch_mbm_cntr_assign_enabled(r);
1445
1446 if (r->mon.mbm_cntr_assignable) {
1447 if (enabled)
1448 seq_puts(s, "[mbm_event]\n");
1449 else
1450 seq_puts(s, "[default]\n");
1451
1452 if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) {
1453 if (enabled)
1454 seq_puts(s, "default\n");
1455 else
1456 seq_puts(s, "mbm_event\n");
1457 }
1458 } else {
1459 seq_puts(s, "[default]\n");
1460 }
1461
1462 mutex_unlock(&rdtgroup_mutex);
1463
1464 return 0;
1465 }
1466
resctrl_mbm_assign_mode_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1467 ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
1468 size_t nbytes, loff_t off)
1469 {
1470 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1471 struct rdt_l3_mon_domain *d;
1472 int ret = 0;
1473 bool enable;
1474
1475 /* Valid input requires a trailing newline */
1476 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1477 return -EINVAL;
1478
1479 buf[nbytes - 1] = '\0';
1480
1481 cpus_read_lock();
1482 mutex_lock(&rdtgroup_mutex);
1483
1484 rdt_last_cmd_clear();
1485
1486 if (!strcmp(buf, "default")) {
1487 enable = 0;
1488 } else if (!strcmp(buf, "mbm_event")) {
1489 if (r->mon.mbm_cntr_assignable) {
1490 enable = 1;
1491 } else {
1492 ret = -EINVAL;
1493 rdt_last_cmd_puts("mbm_event mode is not supported\n");
1494 goto out_unlock;
1495 }
1496 } else {
1497 ret = -EINVAL;
1498 rdt_last_cmd_puts("Unsupported assign mode\n");
1499 goto out_unlock;
1500 }
1501
1502 if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) {
1503 ret = resctrl_arch_mbm_cntr_assign_set(r, enable);
1504 if (ret)
1505 goto out_unlock;
1506
1507 /* Update the visibility of BMEC related files */
1508 resctrl_bmec_files_show(r, NULL, !enable);
1509
1510 /*
1511 * Initialize the default memory transaction values for
1512 * total and local events.
1513 */
1514 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1515 mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
1516 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1517 mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
1518 (READS_TO_LOCAL_MEM |
1519 READS_TO_LOCAL_S_MEM |
1520 NON_TEMP_WRITE_TO_LOCAL_MEM);
1521 /* Enable auto assignment when switching to "mbm_event" mode */
1522 if (enable)
1523 r->mon.mbm_assign_on_mkdir = true;
1524 /*
1525 * Reset all the non-achitectural RMID state and assignable counters.
1526 */
1527 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1528 mbm_cntr_free_all(r, d);
1529 resctrl_reset_rmid_all(r, d);
1530 }
1531 }
1532
1533 out_unlock:
1534 mutex_unlock(&rdtgroup_mutex);
1535 cpus_read_unlock();
1536
1537 return ret ?: nbytes;
1538 }
1539
resctrl_num_mbm_cntrs_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1540 int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of,
1541 struct seq_file *s, void *v)
1542 {
1543 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1544 struct rdt_l3_mon_domain *dom;
1545 bool sep = false;
1546
1547 cpus_read_lock();
1548 mutex_lock(&rdtgroup_mutex);
1549
1550 list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1551 if (sep)
1552 seq_putc(s, ';');
1553
1554 seq_printf(s, "%d=%d", dom->hdr.id, r->mon.num_mbm_cntrs);
1555 sep = true;
1556 }
1557 seq_putc(s, '\n');
1558
1559 mutex_unlock(&rdtgroup_mutex);
1560 cpus_read_unlock();
1561 return 0;
1562 }
1563
resctrl_available_mbm_cntrs_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1564 int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of,
1565 struct seq_file *s, void *v)
1566 {
1567 struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
1568 struct rdt_l3_mon_domain *dom;
1569 bool sep = false;
1570 u32 cntrs, i;
1571 int ret = 0;
1572
1573 cpus_read_lock();
1574 mutex_lock(&rdtgroup_mutex);
1575
1576 rdt_last_cmd_clear();
1577
1578 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1579 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1580 ret = -EINVAL;
1581 goto out_unlock;
1582 }
1583
1584 list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1585 if (sep)
1586 seq_putc(s, ';');
1587
1588 cntrs = 0;
1589 for (i = 0; i < r->mon.num_mbm_cntrs; i++) {
1590 if (!dom->cntr_cfg[i].rdtgrp)
1591 cntrs++;
1592 }
1593
1594 seq_printf(s, "%d=%u", dom->hdr.id, cntrs);
1595 sep = true;
1596 }
1597 seq_putc(s, '\n');
1598
1599 out_unlock:
1600 mutex_unlock(&rdtgroup_mutex);
1601 cpus_read_unlock();
1602
1603 return ret;
1604 }
1605
mbm_L3_assignments_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1606 int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
1607 {
1608 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1609 struct rdt_l3_mon_domain *d;
1610 struct rdtgroup *rdtgrp;
1611 struct mon_evt *mevt;
1612 int ret = 0;
1613 bool sep;
1614
1615 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1616 if (!rdtgrp) {
1617 ret = -ENOENT;
1618 goto out_unlock;
1619 }
1620
1621 rdt_last_cmd_clear();
1622 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1623 rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
1624 ret = -EINVAL;
1625 goto out_unlock;
1626 }
1627
1628 for_each_mon_event(mevt) {
1629 if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid))
1630 continue;
1631
1632 sep = false;
1633 seq_printf(s, "%s:", mevt->name);
1634 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1635 if (sep)
1636 seq_putc(s, ';');
1637
1638 if (mbm_cntr_get(r, d, rdtgrp, mevt->evtid) < 0)
1639 seq_printf(s, "%d=_", d->hdr.id);
1640 else
1641 seq_printf(s, "%d=e", d->hdr.id);
1642
1643 sep = true;
1644 }
1645 seq_putc(s, '\n');
1646 }
1647
1648 out_unlock:
1649 rdtgroup_kn_unlock(of->kn);
1650
1651 return ret;
1652 }
1653
1654 /*
1655 * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching
1656 * event name.
1657 */
mbm_get_mon_event_by_name(struct rdt_resource * r,char * name)1658 static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name)
1659 {
1660 struct mon_evt *mevt;
1661
1662 for_each_mon_event(mevt) {
1663 if (mevt->rid == r->rid && mevt->enabled &&
1664 resctrl_is_mbm_event(mevt->evtid) &&
1665 !strcmp(mevt->name, name))
1666 return mevt;
1667 }
1668
1669 return NULL;
1670 }
1671
rdtgroup_modify_assign_state(char * assign,struct rdt_l3_mon_domain * d,struct rdtgroup * rdtgrp,struct mon_evt * mevt)1672 static int rdtgroup_modify_assign_state(char *assign, struct rdt_l3_mon_domain *d,
1673 struct rdtgroup *rdtgrp, struct mon_evt *mevt)
1674 {
1675 int ret = 0;
1676
1677 if (!assign || strlen(assign) != 1)
1678 return -EINVAL;
1679
1680 switch (*assign) {
1681 case 'e':
1682 ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt);
1683 break;
1684 case '_':
1685 rdtgroup_unassign_cntr_event(d, rdtgrp, mevt);
1686 break;
1687 default:
1688 ret = -EINVAL;
1689 break;
1690 }
1691
1692 return ret;
1693 }
1694
resctrl_parse_mbm_assignment(struct rdt_resource * r,struct rdtgroup * rdtgrp,char * event,char * tok)1695 static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp,
1696 char *event, char *tok)
1697 {
1698 struct rdt_l3_mon_domain *d;
1699 unsigned long dom_id = 0;
1700 char *dom_str, *id_str;
1701 struct mon_evt *mevt;
1702 int ret;
1703
1704 mevt = mbm_get_mon_event_by_name(r, event);
1705 if (!mevt) {
1706 rdt_last_cmd_printf("Invalid event %s\n", event);
1707 return -ENOENT;
1708 }
1709
1710 next:
1711 if (!tok || tok[0] == '\0')
1712 return 0;
1713
1714 /* Start processing the strings for each domain */
1715 dom_str = strim(strsep(&tok, ";"));
1716
1717 id_str = strsep(&dom_str, "=");
1718
1719 /* Check for domain id '*' which means all domains */
1720 if (id_str && *id_str == '*') {
1721 ret = rdtgroup_modify_assign_state(dom_str, NULL, rdtgrp, mevt);
1722 if (ret)
1723 rdt_last_cmd_printf("Assign operation '%s:*=%s' failed\n",
1724 event, dom_str);
1725 return ret;
1726 } else if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1727 rdt_last_cmd_puts("Missing domain id\n");
1728 return -EINVAL;
1729 }
1730
1731 /* Verify if the dom_id is valid */
1732 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1733 if (d->hdr.id == dom_id) {
1734 ret = rdtgroup_modify_assign_state(dom_str, d, rdtgrp, mevt);
1735 if (ret) {
1736 rdt_last_cmd_printf("Assign operation '%s:%ld=%s' failed\n",
1737 event, dom_id, dom_str);
1738 return ret;
1739 }
1740 goto next;
1741 }
1742 }
1743
1744 rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id);
1745 return -EINVAL;
1746 }
1747
mbm_L3_assignments_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1748 ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
1749 size_t nbytes, loff_t off)
1750 {
1751 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1752 struct rdtgroup *rdtgrp;
1753 char *token, *event;
1754 int ret = 0;
1755
1756 /* Valid input requires a trailing newline */
1757 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1758 return -EINVAL;
1759
1760 buf[nbytes - 1] = '\0';
1761
1762 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1763 if (!rdtgrp) {
1764 rdtgroup_kn_unlock(of->kn);
1765 return -ENOENT;
1766 }
1767 rdt_last_cmd_clear();
1768
1769 if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
1770 rdt_last_cmd_puts("mbm_event mode is not enabled\n");
1771 rdtgroup_kn_unlock(of->kn);
1772 return -EINVAL;
1773 }
1774
1775 while ((token = strsep(&buf, "\n")) != NULL) {
1776 /*
1777 * The write command follows the following format:
1778 * "<Event>:<Domain ID>=<Assignment state>"
1779 * Extract the event name first.
1780 */
1781 event = strsep(&token, ":");
1782
1783 ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, token);
1784 if (ret)
1785 break;
1786 }
1787
1788 rdtgroup_kn_unlock(of->kn);
1789
1790 return ret ?: nbytes;
1791 }
1792
closid_num_dirty_rmid_alloc(struct rdt_resource * r)1793 static int closid_num_dirty_rmid_alloc(struct rdt_resource *r)
1794 {
1795 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
1796 u32 num_closid = resctrl_arch_get_num_closid(r);
1797 u32 *tmp;
1798
1799 /* For ARM memory ordering access to closid_num_dirty_rmid */
1800 mutex_lock(&rdtgroup_mutex);
1801
1802 /*
1803 * If the architecture hasn't provided a sanitised value here,
1804 * this may result in larger arrays than necessary. Resctrl will
1805 * use a smaller system wide value based on the resources in
1806 * use.
1807 */
1808 tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
1809 if (!tmp) {
1810 mutex_unlock(&rdtgroup_mutex);
1811 return -ENOMEM;
1812 }
1813
1814 closid_num_dirty_rmid = tmp;
1815
1816 mutex_unlock(&rdtgroup_mutex);
1817 }
1818
1819 return 0;
1820 }
1821
closid_num_dirty_rmid_free(void)1822 static void closid_num_dirty_rmid_free(void)
1823 {
1824 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
1825 mutex_lock(&rdtgroup_mutex);
1826 kfree(closid_num_dirty_rmid);
1827 closid_num_dirty_rmid = NULL;
1828 mutex_unlock(&rdtgroup_mutex);
1829 }
1830 }
1831
1832 /**
1833 * resctrl_l3_mon_resource_init() - Initialise global monitoring structures.
1834 *
1835 * Allocate and initialise global monitor resources that do not belong to a
1836 * specific domain. i.e. the closid_num_dirty_rmid[] used to find the CLOSID
1837 * with the cleanest set of RMIDs.
1838 * Called once during boot after the struct rdt_resource's have been configured
1839 * but before the filesystem is mounted.
1840 * Resctrl's cpuhp callbacks may be called before this point to bring a domain
1841 * online.
1842 *
1843 * Return: 0 for success, or -ENOMEM.
1844 */
resctrl_l3_mon_resource_init(void)1845 int resctrl_l3_mon_resource_init(void)
1846 {
1847 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1848 int ret;
1849
1850 if (!r->mon_capable)
1851 return 0;
1852
1853 ret = closid_num_dirty_rmid_alloc(r);
1854 if (ret)
1855 return ret;
1856
1857 if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
1858 mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
1859 resctrl_file_fflags_init("mbm_total_bytes_config",
1860 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1861 }
1862 if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
1863 mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
1864 resctrl_file_fflags_init("mbm_local_bytes_config",
1865 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1866 }
1867
1868 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1869 mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
1870 else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1871 mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
1872
1873 if (r->mon.mbm_cntr_assignable) {
1874 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
1875 mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
1876 if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
1877 mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
1878 (READS_TO_LOCAL_MEM |
1879 READS_TO_LOCAL_S_MEM |
1880 NON_TEMP_WRITE_TO_LOCAL_MEM);
1881 r->mon.mbm_assign_on_mkdir = true;
1882 resctrl_file_fflags_init("num_mbm_cntrs",
1883 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1884 resctrl_file_fflags_init("available_mbm_cntrs",
1885 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1886 resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG);
1887 resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO |
1888 RFTYPE_RES_CACHE);
1889 resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
1890 }
1891
1892 return 0;
1893 }
1894
resctrl_l3_mon_resource_exit(void)1895 void resctrl_l3_mon_resource_exit(void)
1896 {
1897 struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
1898
1899 if (!r->mon_capable)
1900 return;
1901
1902 closid_num_dirty_rmid_free();
1903 }
1904