xref: /linux/fs/resctrl/ctrlmondata.c (revision e37c9a3dc9f9645532780d5ef34ea3b8fcf9ddef)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Resource Director Technology(RDT)
4  * - Cache Allocation code.
5  *
6  * Copyright (C) 2016 Intel Corporation
7  *
8  * Authors:
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  *    Tony Luck <tony.luck@intel.com>
11  *
12  * More information about RDT be found in the Intel (R) x86 Architecture
13  * Software Developer Manual June 2016, volume 3, section 17.17.
14  */
15 
16 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
17 
18 #include <linux/cpu.h>
19 #include <linux/kernfs.h>
20 #include <linux/math.h>
21 #include <linux/seq_file.h>
22 #include <linux/slab.h>
23 #include <linux/tick.h>
24 
25 #include "internal.h"
26 
27 struct rdt_parse_data {
28 	u32			closid;
29 	enum rdtgrp_mode	mode;
30 	char			*buf;
31 };
32 
33 typedef int (ctrlval_parser_t)(struct rdt_parse_data *data,
34 			       struct resctrl_schema *s,
35 			       struct rdt_ctrl_domain *d);
36 
37 /*
38  * Check whether MBA bandwidth percentage value is correct. The value is
39  * checked against the minimum and max bandwidth values specified by the
40  * hardware. The allocated bandwidth percentage is rounded to the next
41  * control step available on the hardware.
42  */
43 static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
44 {
45 	int ret;
46 	u32 bw;
47 
48 	/*
49 	 * Only linear delay values is supported for current Intel SKUs.
50 	 */
51 	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
52 		rdt_last_cmd_puts("No support for non-linear MB domains\n");
53 		return false;
54 	}
55 
56 	ret = kstrtou32(buf, 10, &bw);
57 	if (ret) {
58 		rdt_last_cmd_printf("Invalid MB value %s\n", buf);
59 		return false;
60 	}
61 
62 	/* Nothing else to do if software controller is enabled. */
63 	if (is_mba_sc(r)) {
64 		*data = bw;
65 		return true;
66 	}
67 
68 	if (bw < r->membw.min_bw || bw > r->membw.max_bw) {
69 		rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
70 				    bw, r->membw.min_bw, r->membw.max_bw);
71 		return false;
72 	}
73 
74 	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
75 	return true;
76 }
77 
78 static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
79 		    struct rdt_ctrl_domain *d)
80 {
81 	struct resctrl_staged_config *cfg;
82 	struct rdt_resource *r = s->res;
83 	u32 closid = data->closid;
84 	u32 bw_val;
85 
86 	cfg = &d->staged_config[s->conf_type];
87 	if (cfg->have_new_ctrl) {
88 		rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
89 		return -EINVAL;
90 	}
91 
92 	if (!bw_validate(data->buf, &bw_val, r))
93 		return -EINVAL;
94 
95 	if (is_mba_sc(r)) {
96 		d->mbps_val[closid] = bw_val;
97 		return 0;
98 	}
99 
100 	cfg->new_ctrl = bw_val;
101 	cfg->have_new_ctrl = true;
102 
103 	return 0;
104 }
105 
106 /*
107  * Check whether a cache bit mask is valid.
108  * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID:
109  *   - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1
110  *   - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1
111  *
112  * Haswell does not support a non-contiguous 1s value and additionally
113  * requires at least two bits set.
114  * AMD allows non-contiguous bitmasks.
115  */
116 static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
117 {
118 	u32 supported_bits = BIT_MASK(r->cache.cbm_len) - 1;
119 	unsigned int cbm_len = r->cache.cbm_len;
120 	unsigned long first_bit, zero_bit, val;
121 	int ret;
122 
123 	ret = kstrtoul(buf, 16, &val);
124 	if (ret) {
125 		rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
126 		return false;
127 	}
128 
129 	if ((r->cache.min_cbm_bits > 0 && val == 0) || val > supported_bits) {
130 		rdt_last_cmd_puts("Mask out of range\n");
131 		return false;
132 	}
133 
134 	first_bit = find_first_bit(&val, cbm_len);
135 	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
136 
137 	/* Are non-contiguous bitmasks allowed? */
138 	if (!r->cache.arch_has_sparse_bitmasks &&
139 	    (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
140 		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
141 		return false;
142 	}
143 
144 	if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
145 		rdt_last_cmd_printf("Need at least %d bits in the mask\n",
146 				    r->cache.min_cbm_bits);
147 		return false;
148 	}
149 
150 	*data = val;
151 	return true;
152 }
153 
154 /*
155  * Read one cache bit mask (hex). Check that it is valid for the current
156  * resource type.
157  */
158 static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
159 		     struct rdt_ctrl_domain *d)
160 {
161 	enum rdtgrp_mode mode = data->mode;
162 	struct resctrl_staged_config *cfg;
163 	struct rdt_resource *r = s->res;
164 	u32 closid = data->closid;
165 	u32 cbm_val;
166 
167 	cfg = &d->staged_config[s->conf_type];
168 	if (cfg->have_new_ctrl) {
169 		rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
170 		return -EINVAL;
171 	}
172 
173 	/*
174 	 * Cannot set up more than one pseudo-locked region in a cache
175 	 * hierarchy.
176 	 */
177 	if (mode == RDT_MODE_PSEUDO_LOCKSETUP &&
178 	    rdtgroup_pseudo_locked_in_hierarchy(d)) {
179 		rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
180 		return -EINVAL;
181 	}
182 
183 	if (!cbm_validate(data->buf, &cbm_val, r))
184 		return -EINVAL;
185 
186 	if ((mode == RDT_MODE_EXCLUSIVE || mode == RDT_MODE_SHAREABLE) &&
187 	    rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
188 		rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
189 		return -EINVAL;
190 	}
191 
192 	/*
193 	 * The CBM may not overlap with the CBM of another closid if
194 	 * either is exclusive.
195 	 */
196 	if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, true)) {
197 		rdt_last_cmd_puts("Overlaps with exclusive group\n");
198 		return -EINVAL;
199 	}
200 
201 	if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, false)) {
202 		if (mode == RDT_MODE_EXCLUSIVE ||
203 		    mode == RDT_MODE_PSEUDO_LOCKSETUP) {
204 			rdt_last_cmd_puts("Overlaps with other group\n");
205 			return -EINVAL;
206 		}
207 	}
208 
209 	cfg->new_ctrl = cbm_val;
210 	cfg->have_new_ctrl = true;
211 
212 	return 0;
213 }
214 
215 /*
216  * For each domain in this resource we expect to find a series of:
217  *	id=mask
218  * separated by ";". The "id" is in decimal, and must match one of
219  * the "id"s for this resource.
220  */
221 static int parse_line(char *line, struct resctrl_schema *s,
222 		      struct rdtgroup *rdtgrp)
223 {
224 	enum resctrl_conf_type t = s->conf_type;
225 	ctrlval_parser_t *parse_ctrlval = NULL;
226 	struct resctrl_staged_config *cfg;
227 	struct rdt_resource *r = s->res;
228 	struct rdt_parse_data data;
229 	struct rdt_ctrl_domain *d;
230 	char *dom = NULL, *id;
231 	unsigned long dom_id;
232 
233 	/* Walking r->domains, ensure it can't race with cpuhp */
234 	lockdep_assert_cpus_held();
235 
236 	switch (r->schema_fmt) {
237 	case RESCTRL_SCHEMA_BITMAP:
238 		parse_ctrlval = &parse_cbm;
239 		break;
240 	case RESCTRL_SCHEMA_RANGE:
241 		parse_ctrlval = &parse_bw;
242 		break;
243 	}
244 
245 	if (WARN_ON_ONCE(!parse_ctrlval))
246 		return -EINVAL;
247 
248 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
249 	    (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
250 		rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
251 		return -EINVAL;
252 	}
253 
254 next:
255 	if (!line || line[0] == '\0')
256 		return 0;
257 	dom = strsep(&line, ";");
258 	id = strsep(&dom, "=");
259 	if (!dom || kstrtoul(id, 10, &dom_id)) {
260 		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
261 		return -EINVAL;
262 	}
263 	dom = strim(dom);
264 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
265 		if (d->hdr.id == dom_id) {
266 			data.buf = dom;
267 			data.closid = rdtgrp->closid;
268 			data.mode = rdtgrp->mode;
269 			if (parse_ctrlval(&data, s, d))
270 				return -EINVAL;
271 			if (rdtgrp->mode ==  RDT_MODE_PSEUDO_LOCKSETUP) {
272 				cfg = &d->staged_config[t];
273 				/*
274 				 * In pseudo-locking setup mode and just
275 				 * parsed a valid CBM that should be
276 				 * pseudo-locked. Only one locked region per
277 				 * resource group and domain so just do
278 				 * the required initialization for single
279 				 * region and return.
280 				 */
281 				rdtgrp->plr->s = s;
282 				rdtgrp->plr->d = d;
283 				rdtgrp->plr->cbm = cfg->new_ctrl;
284 				d->plr = rdtgrp->plr;
285 				return 0;
286 			}
287 			goto next;
288 		}
289 	}
290 	return -EINVAL;
291 }
292 
293 static int rdtgroup_parse_resource(char *resname, char *tok,
294 				   struct rdtgroup *rdtgrp)
295 {
296 	struct resctrl_schema *s;
297 
298 	list_for_each_entry(s, &resctrl_schema_all, list) {
299 		if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid)
300 			return parse_line(tok, s, rdtgrp);
301 	}
302 	rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
303 	return -EINVAL;
304 }
305 
306 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
307 				char *buf, size_t nbytes, loff_t off)
308 {
309 	struct resctrl_schema *s;
310 	struct rdtgroup *rdtgrp;
311 	struct rdt_resource *r;
312 	char *tok, *resname;
313 	int ret = 0;
314 
315 	/* Valid input requires a trailing newline */
316 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
317 		return -EINVAL;
318 	buf[nbytes - 1] = '\0';
319 
320 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
321 	if (!rdtgrp) {
322 		rdtgroup_kn_unlock(of->kn);
323 		return -ENOENT;
324 	}
325 	rdt_last_cmd_clear();
326 
327 	/*
328 	 * No changes to pseudo-locked region allowed. It has to be removed
329 	 * and re-created instead.
330 	 */
331 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
332 		ret = -EINVAL;
333 		rdt_last_cmd_puts("Resource group is pseudo-locked\n");
334 		goto out;
335 	}
336 
337 	rdt_staged_configs_clear();
338 
339 	while ((tok = strsep(&buf, "\n")) != NULL) {
340 		resname = strim(strsep(&tok, ":"));
341 		if (!tok) {
342 			rdt_last_cmd_puts("Missing ':'\n");
343 			ret = -EINVAL;
344 			goto out;
345 		}
346 		if (tok[0] == '\0') {
347 			rdt_last_cmd_printf("Missing '%s' value\n", resname);
348 			ret = -EINVAL;
349 			goto out;
350 		}
351 		ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
352 		if (ret)
353 			goto out;
354 	}
355 
356 	list_for_each_entry(s, &resctrl_schema_all, list) {
357 		r = s->res;
358 
359 		/*
360 		 * Writes to mba_sc resources update the software controller,
361 		 * not the control MSR.
362 		 */
363 		if (is_mba_sc(r))
364 			continue;
365 
366 		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
367 		if (ret)
368 			goto out;
369 	}
370 
371 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
372 		/*
373 		 * If pseudo-locking fails we keep the resource group in
374 		 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
375 		 * active and updated for just the domain the pseudo-locked
376 		 * region was requested for.
377 		 */
378 		ret = rdtgroup_pseudo_lock_create(rdtgrp);
379 	}
380 
381 out:
382 	rdt_staged_configs_clear();
383 	rdtgroup_kn_unlock(of->kn);
384 	return ret ?: nbytes;
385 }
386 
387 static void show_doms(struct seq_file *s, struct resctrl_schema *schema,
388 		      char *resource_name, int closid)
389 {
390 	struct rdt_resource *r = schema->res;
391 	struct rdt_ctrl_domain *dom;
392 	bool sep = false;
393 	u32 ctrl_val;
394 
395 	/* Walking r->domains, ensure it can't race with cpuhp */
396 	lockdep_assert_cpus_held();
397 
398 	if (resource_name)
399 		seq_printf(s, "%*s:", max_name_width, resource_name);
400 	list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
401 		if (sep)
402 			seq_puts(s, ";");
403 
404 		if (is_mba_sc(r))
405 			ctrl_val = dom->mbps_val[closid];
406 		else
407 			ctrl_val = resctrl_arch_get_config(r, dom, closid,
408 							   schema->conf_type);
409 
410 		seq_printf(s, schema->fmt_str, dom->hdr.id, ctrl_val);
411 		sep = true;
412 	}
413 	seq_puts(s, "\n");
414 }
415 
416 int rdtgroup_schemata_show(struct kernfs_open_file *of,
417 			   struct seq_file *s, void *v)
418 {
419 	struct resctrl_schema *schema;
420 	struct rdtgroup *rdtgrp;
421 	int ret = 0;
422 	u32 closid;
423 
424 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
425 	if (rdtgrp) {
426 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
427 			list_for_each_entry(schema, &resctrl_schema_all, list) {
428 				seq_printf(s, "%s:uninitialized\n", schema->name);
429 			}
430 		} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
431 			if (!rdtgrp->plr->d) {
432 				rdt_last_cmd_clear();
433 				rdt_last_cmd_puts("Cache domain offline\n");
434 				ret = -ENODEV;
435 			} else {
436 				seq_printf(s, "%s:%d=%x\n",
437 					   rdtgrp->plr->s->res->name,
438 					   rdtgrp->plr->d->hdr.id,
439 					   rdtgrp->plr->cbm);
440 			}
441 		} else {
442 			closid = rdtgrp->closid;
443 			list_for_each_entry(schema, &resctrl_schema_all, list) {
444 				if (closid < schema->num_closid)
445 					show_doms(s, schema, schema->name, closid);
446 			}
447 		}
448 	} else {
449 		ret = -ENOENT;
450 	}
451 	rdtgroup_kn_unlock(of->kn);
452 	return ret;
453 }
454 
455 static int smp_mon_event_count(void *arg)
456 {
457 	mon_event_count(arg);
458 
459 	return 0;
460 }
461 
462 ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
463 				      char *buf, size_t nbytes, loff_t off)
464 {
465 	struct rdtgroup *rdtgrp;
466 	int ret = 0;
467 
468 	/* Valid input requires a trailing newline */
469 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
470 		return -EINVAL;
471 	buf[nbytes - 1] = '\0';
472 
473 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
474 	if (!rdtgrp) {
475 		rdtgroup_kn_unlock(of->kn);
476 		return -ENOENT;
477 	}
478 	rdt_last_cmd_clear();
479 
480 	if (!strcmp(buf, "mbm_local_bytes")) {
481 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
482 			rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
483 		else
484 			ret = -EINVAL;
485 	} else if (!strcmp(buf, "mbm_total_bytes")) {
486 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
487 			rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
488 		else
489 			ret = -EINVAL;
490 	} else {
491 		ret = -EINVAL;
492 	}
493 
494 	if (ret)
495 		rdt_last_cmd_printf("Unsupported event id '%s'\n", buf);
496 
497 	rdtgroup_kn_unlock(of->kn);
498 
499 	return ret ?: nbytes;
500 }
501 
502 int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
503 				 struct seq_file *s, void *v)
504 {
505 	struct rdtgroup *rdtgrp;
506 	int ret = 0;
507 
508 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
509 
510 	if (rdtgrp) {
511 		switch (rdtgrp->mba_mbps_event) {
512 		case QOS_L3_MBM_LOCAL_EVENT_ID:
513 			seq_puts(s, "mbm_local_bytes\n");
514 			break;
515 		case QOS_L3_MBM_TOTAL_EVENT_ID:
516 			seq_puts(s, "mbm_total_bytes\n");
517 			break;
518 		default:
519 			pr_warn_once("Bad event %d\n", rdtgrp->mba_mbps_event);
520 			ret = -EINVAL;
521 			break;
522 		}
523 	} else {
524 		ret = -ENOENT;
525 	}
526 
527 	rdtgroup_kn_unlock(of->kn);
528 
529 	return ret;
530 }
531 
532 struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
533 					   struct list_head **pos)
534 {
535 	struct rdt_domain_hdr *d;
536 	struct list_head *l;
537 
538 	list_for_each(l, h) {
539 		d = list_entry(l, struct rdt_domain_hdr, list);
540 		/* When id is found, return its domain. */
541 		if (id == d->id)
542 			return d;
543 		/* Stop searching when finding id's position in sorted list. */
544 		if (id < d->id)
545 			break;
546 	}
547 
548 	if (pos)
549 		*pos = l;
550 
551 	return NULL;
552 }
553 
554 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
555 		    struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp,
556 		    cpumask_t *cpumask, struct mon_evt *evt, int first)
557 {
558 	int cpu;
559 
560 	/* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
561 	lockdep_assert_cpus_held();
562 
563 	/*
564 	 * Setup the parameters to pass to mon_event_count() to read the data.
565 	 */
566 	rr->rgrp = rdtgrp;
567 	rr->evt = evt;
568 	rr->r = r;
569 	rr->hdr = hdr;
570 	rr->first = first;
571 	if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
572 	    resctrl_is_mbm_event(evt->evtid)) {
573 		rr->is_mbm_cntr = true;
574 	} else {
575 		rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evt->evtid);
576 		if (IS_ERR(rr->arch_mon_ctx)) {
577 			rr->err = -EINVAL;
578 			return;
579 		}
580 	}
581 
582 	if (evt->any_cpu) {
583 		mon_event_count(rr);
584 		goto out_ctx_free;
585 	}
586 
587 	cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
588 
589 	/*
590 	 * cpumask_any_housekeeping() prefers housekeeping CPUs, but
591 	 * are all the CPUs nohz_full? If yes, pick a CPU to IPI.
592 	 * MPAM's resctrl_arch_rmid_read() is unable to read the
593 	 * counters on some platforms if its called in IRQ context.
594 	 */
595 	if (tick_nohz_full_cpu(cpu))
596 		smp_call_function_any(cpumask, mon_event_count, rr, 1);
597 	else
598 		smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
599 
600 out_ctx_free:
601 	if (rr->arch_mon_ctx)
602 		resctrl_arch_mon_ctx_free(r, evt->evtid, rr->arch_mon_ctx);
603 }
604 
605 /*
606  * Decimal place precision to use for each number of fixed-point
607  * binary bits computed from ceil(binary_bits * log10(2)) except
608  * binary_bits == 0 which will print "value.0"
609  */
610 static const unsigned int decplaces[MAX_BINARY_BITS + 1] = {
611 	[0]  =  1,
612 	[1]  =  1,
613 	[2]  =  1,
614 	[3]  =  1,
615 	[4]  =  2,
616 	[5]  =  2,
617 	[6]  =  2,
618 	[7]  =  3,
619 	[8]  =  3,
620 	[9]  =  3,
621 	[10] =  4,
622 	[11] =  4,
623 	[12] =  4,
624 	[13] =  4,
625 	[14] =  5,
626 	[15] =  5,
627 	[16] =  5,
628 	[17] =  6,
629 	[18] =  6,
630 	[19] =  6,
631 	[20] =  7,
632 	[21] =  7,
633 	[22] =  7,
634 	[23] =  7,
635 	[24] =  8,
636 	[25] =  8,
637 	[26] =  8,
638 	[27] =  9
639 };
640 
641 static void print_event_value(struct seq_file *m, unsigned int binary_bits, u64 val)
642 {
643 	unsigned long long frac = 0;
644 
645 	if (binary_bits) {
646 		/* Mask off the integer part of the fixed-point value. */
647 		frac = val & GENMASK_ULL(binary_bits - 1, 0);
648 
649 		/*
650 		 * Multiply by 10^{desired decimal places}. The integer part of
651 		 * the fixed point value is now almost what is needed.
652 		 */
653 		frac *= int_pow(10ull, decplaces[binary_bits]);
654 
655 		/*
656 		 * Round to nearest by adding a value that would be a "1" in the
657 		 * binary_bits + 1 place.  Integer part of fixed point value is
658 		 * now the needed value.
659 		 */
660 		frac += 1ull << (binary_bits - 1);
661 
662 		/*
663 		 * Extract the integer part of the value. This is the decimal
664 		 * representation of the original fixed-point fractional value.
665 		 */
666 		frac >>= binary_bits;
667 	}
668 
669 	/*
670 	 * "frac" is now in the range [0 .. 10^decplaces).  I.e. string
671 	 * representation will fit into chosen number of decimal places.
672 	 */
673 	seq_printf(m, "%llu.%0*llu\n", val >> binary_bits, decplaces[binary_bits], frac);
674 }
675 
676 int rdtgroup_mondata_show(struct seq_file *m, void *arg)
677 {
678 	struct kernfs_open_file *of = m->private;
679 	enum resctrl_res_level resid;
680 	struct rdt_l3_mon_domain *d;
681 	struct rdt_domain_hdr *hdr;
682 	struct rmid_read rr = {0};
683 	struct rdtgroup *rdtgrp;
684 	int domid, cpu, ret = 0;
685 	struct rdt_resource *r;
686 	struct cacheinfo *ci;
687 	struct mon_evt *evt;
688 	struct mon_data *md;
689 
690 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
691 	if (!rdtgrp) {
692 		ret = -ENOENT;
693 		goto out;
694 	}
695 
696 	md = of->kn->priv;
697 	if (WARN_ON_ONCE(!md)) {
698 		ret = -EIO;
699 		goto out;
700 	}
701 
702 	resid = md->rid;
703 	domid = md->domid;
704 	evt = md->evt;
705 	r = resctrl_arch_get_resource(resid);
706 
707 	if (md->sum) {
708 		/*
709 		 * This file requires summing across all domains that share
710 		 * the L3 cache id that was provided in the "domid" field of the
711 		 * struct mon_data. Search all domains in the resource for
712 		 * one that matches this cache id.
713 		 */
714 		list_for_each_entry(d, &r->mon_domains, hdr.list) {
715 			if (d->ci_id == domid) {
716 				cpu = cpumask_any(&d->hdr.cpu_mask);
717 				ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
718 				if (!ci)
719 					continue;
720 				rr.ci = ci;
721 				mon_event_read(&rr, r, NULL, rdtgrp,
722 					       &ci->shared_cpu_map, evt, false);
723 				goto checkresult;
724 			}
725 		}
726 		ret = -ENOENT;
727 		goto out;
728 	} else {
729 		/*
730 		 * This file provides data from a single domain. Search
731 		 * the resource to find the domain with "domid".
732 		 */
733 		hdr = resctrl_find_domain(&r->mon_domains, domid, NULL);
734 		if (!hdr) {
735 			ret = -ENOENT;
736 			goto out;
737 		}
738 		mon_event_read(&rr, r, hdr, rdtgrp, &hdr->cpu_mask, evt, false);
739 	}
740 
741 checkresult:
742 
743 	/*
744 	 * -ENOENT is a special case, set only when "mbm_event" counter assignment
745 	 * mode is enabled and no counter has been assigned.
746 	 */
747 	if (rr.err == -EIO)
748 		seq_puts(m, "Error\n");
749 	else if (rr.err == -EINVAL)
750 		seq_puts(m, "Unavailable\n");
751 	else if (rr.err == -ENOENT)
752 		seq_puts(m, "Unassigned\n");
753 	else if (evt->is_floating_point)
754 		print_event_value(m, evt->binary_bits, rr.val);
755 	else
756 		seq_printf(m, "%llu\n", rr.val);
757 
758 out:
759 	rdtgroup_kn_unlock(of->kn);
760 	return ret;
761 }
762 
763 int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
764 {
765 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
766 	struct rdt_resource *r = s->res;
767 
768 	mutex_lock(&rdtgroup_mutex);
769 
770 	if (r->cache.io_alloc_capable) {
771 		if (resctrl_arch_get_io_alloc_enabled(r))
772 			seq_puts(seq, "enabled\n");
773 		else
774 			seq_puts(seq, "disabled\n");
775 	} else {
776 		seq_puts(seq, "not supported\n");
777 	}
778 
779 	mutex_unlock(&rdtgroup_mutex);
780 
781 	return 0;
782 }
783 
784 /*
785  * resctrl_io_alloc_closid_supported() - io_alloc feature utilizes the
786  * highest CLOSID value to direct I/O traffic. Ensure that io_alloc_closid
787  * is in the supported range.
788  */
789 static bool resctrl_io_alloc_closid_supported(u32 io_alloc_closid)
790 {
791 	return io_alloc_closid < closids_supported();
792 }
793 
794 /*
795  * Initialize io_alloc CLOSID cache resource CBM with all usable (shared
796  * and unused) cache portions.
797  */
798 static int resctrl_io_alloc_init_cbm(struct resctrl_schema *s, u32 closid)
799 {
800 	enum resctrl_conf_type peer_type;
801 	struct rdt_resource *r = s->res;
802 	struct rdt_ctrl_domain *d;
803 	int ret;
804 
805 	rdt_staged_configs_clear();
806 
807 	ret = rdtgroup_init_cat(s, closid);
808 	if (ret < 0)
809 		goto out;
810 
811 	/* Keep CDP_CODE and CDP_DATA of io_alloc CLOSID's CBM in sync. */
812 	if (resctrl_arch_get_cdp_enabled(r->rid)) {
813 		peer_type = resctrl_peer_type(s->conf_type);
814 		list_for_each_entry(d, &s->res->ctrl_domains, hdr.list)
815 			memcpy(&d->staged_config[peer_type],
816 			       &d->staged_config[s->conf_type],
817 			       sizeof(d->staged_config[0]));
818 	}
819 
820 	ret = resctrl_arch_update_domains(r, closid);
821 out:
822 	rdt_staged_configs_clear();
823 	return ret;
824 }
825 
826 /*
827  * resctrl_io_alloc_closid() - io_alloc feature routes I/O traffic using
828  * the highest available CLOSID. Retrieve the maximum CLOSID supported by the
829  * resource. Note that if Code Data Prioritization (CDP) is enabled, the number
830  * of available CLOSIDs is reduced by half.
831  */
832 u32 resctrl_io_alloc_closid(struct rdt_resource *r)
833 {
834 	if (resctrl_arch_get_cdp_enabled(r->rid))
835 		return resctrl_arch_get_num_closid(r) / 2  - 1;
836 	else
837 		return resctrl_arch_get_num_closid(r) - 1;
838 }
839 
840 ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf,
841 			       size_t nbytes, loff_t off)
842 {
843 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
844 	struct rdt_resource *r = s->res;
845 	char const *grp_name;
846 	u32 io_alloc_closid;
847 	bool enable;
848 	int ret;
849 
850 	ret = kstrtobool(buf, &enable);
851 	if (ret)
852 		return ret;
853 
854 	cpus_read_lock();
855 	mutex_lock(&rdtgroup_mutex);
856 
857 	rdt_last_cmd_clear();
858 
859 	if (!r->cache.io_alloc_capable) {
860 		rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
861 		ret = -ENODEV;
862 		goto out_unlock;
863 	}
864 
865 	/* If the feature is already up to date, no action is needed. */
866 	if (resctrl_arch_get_io_alloc_enabled(r) == enable)
867 		goto out_unlock;
868 
869 	io_alloc_closid = resctrl_io_alloc_closid(r);
870 	if (!resctrl_io_alloc_closid_supported(io_alloc_closid)) {
871 		rdt_last_cmd_printf("io_alloc CLOSID (ctrl_hw_id) %u is not available\n",
872 				    io_alloc_closid);
873 		ret = -EINVAL;
874 		goto out_unlock;
875 	}
876 
877 	if (enable) {
878 		if (!closid_alloc_fixed(io_alloc_closid)) {
879 			grp_name = rdtgroup_name_by_closid(io_alloc_closid);
880 			WARN_ON_ONCE(!grp_name);
881 			rdt_last_cmd_printf("CLOSID (ctrl_hw_id) %u for io_alloc is used by %s group\n",
882 					    io_alloc_closid, grp_name ? grp_name : "another");
883 			ret = -ENOSPC;
884 			goto out_unlock;
885 		}
886 
887 		ret = resctrl_io_alloc_init_cbm(s, io_alloc_closid);
888 		if (ret) {
889 			rdt_last_cmd_puts("Failed to initialize io_alloc allocations\n");
890 			closid_free(io_alloc_closid);
891 			goto out_unlock;
892 		}
893 	} else {
894 		closid_free(io_alloc_closid);
895 	}
896 
897 	ret = resctrl_arch_io_alloc_enable(r, enable);
898 	if (enable && ret) {
899 		rdt_last_cmd_puts("Failed to enable io_alloc feature\n");
900 		closid_free(io_alloc_closid);
901 	}
902 
903 out_unlock:
904 	mutex_unlock(&rdtgroup_mutex);
905 	cpus_read_unlock();
906 
907 	return ret ?: nbytes;
908 }
909 
910 int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
911 {
912 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
913 	struct rdt_resource *r = s->res;
914 	int ret = 0;
915 
916 	cpus_read_lock();
917 	mutex_lock(&rdtgroup_mutex);
918 
919 	rdt_last_cmd_clear();
920 
921 	if (!r->cache.io_alloc_capable) {
922 		rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
923 		ret = -ENODEV;
924 		goto out_unlock;
925 	}
926 
927 	if (!resctrl_arch_get_io_alloc_enabled(r)) {
928 		rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name);
929 		ret = -EINVAL;
930 		goto out_unlock;
931 	}
932 
933 	/*
934 	 * When CDP is enabled, the CBMs of the highest CLOSID of CDP_CODE and
935 	 * CDP_DATA are kept in sync. As a result, the io_alloc CBMs shown for
936 	 * either CDP resource are identical and accurately represent the CBMs
937 	 * used for I/O.
938 	 */
939 	show_doms(seq, s, NULL, resctrl_io_alloc_closid(r));
940 
941 out_unlock:
942 	mutex_unlock(&rdtgroup_mutex);
943 	cpus_read_unlock();
944 	return ret;
945 }
946 
947 static int resctrl_io_alloc_parse_line(char *line,  struct rdt_resource *r,
948 				       struct resctrl_schema *s, u32 closid)
949 {
950 	enum resctrl_conf_type peer_type;
951 	struct rdt_parse_data data;
952 	struct rdt_ctrl_domain *d;
953 	char *dom = NULL, *id;
954 	unsigned long dom_id;
955 
956 next:
957 	if (!line || line[0] == '\0')
958 		return 0;
959 
960 	dom = strsep(&line, ";");
961 	id = strsep(&dom, "=");
962 	if (!dom || kstrtoul(id, 10, &dom_id)) {
963 		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
964 		return -EINVAL;
965 	}
966 
967 	dom = strim(dom);
968 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
969 		if (d->hdr.id == dom_id) {
970 			data.buf = dom;
971 			data.mode = RDT_MODE_SHAREABLE;
972 			data.closid = closid;
973 			if (parse_cbm(&data, s, d))
974 				return -EINVAL;
975 			/*
976 			 * Keep io_alloc CLOSID's CBM of CDP_CODE and CDP_DATA
977 			 * in sync.
978 			 */
979 			if (resctrl_arch_get_cdp_enabled(r->rid)) {
980 				peer_type = resctrl_peer_type(s->conf_type);
981 				memcpy(&d->staged_config[peer_type],
982 				       &d->staged_config[s->conf_type],
983 				       sizeof(d->staged_config[0]));
984 			}
985 			goto next;
986 		}
987 	}
988 
989 	return -EINVAL;
990 }
991 
992 ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf,
993 				   size_t nbytes, loff_t off)
994 {
995 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
996 	struct rdt_resource *r = s->res;
997 	u32 io_alloc_closid;
998 	int ret = 0;
999 
1000 	/* Valid input requires a trailing newline */
1001 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1002 		return -EINVAL;
1003 
1004 	buf[nbytes - 1] = '\0';
1005 
1006 	cpus_read_lock();
1007 	mutex_lock(&rdtgroup_mutex);
1008 	rdt_last_cmd_clear();
1009 
1010 	if (!r->cache.io_alloc_capable) {
1011 		rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
1012 		ret = -ENODEV;
1013 		goto out_unlock;
1014 	}
1015 
1016 	if (!resctrl_arch_get_io_alloc_enabled(r)) {
1017 		rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name);
1018 		ret = -EINVAL;
1019 		goto out_unlock;
1020 	}
1021 
1022 	io_alloc_closid = resctrl_io_alloc_closid(r);
1023 
1024 	rdt_staged_configs_clear();
1025 	ret = resctrl_io_alloc_parse_line(buf, r, s, io_alloc_closid);
1026 	if (ret)
1027 		goto out_clear_configs;
1028 
1029 	ret = resctrl_arch_update_domains(r, io_alloc_closid);
1030 
1031 out_clear_configs:
1032 	rdt_staged_configs_clear();
1033 out_unlock:
1034 	mutex_unlock(&rdtgroup_mutex);
1035 	cpus_read_unlock();
1036 
1037 	return ret ?: nbytes;
1038 }
1039