xref: /linux/fs/resctrl/ctrlmondata.c (revision 37a93dd5c49b5fda807fd204edf2547c3493319c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Resource Director Technology(RDT)
4  * - Cache Allocation code.
5  *
6  * Copyright (C) 2016 Intel Corporation
7  *
8  * Authors:
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  *    Tony Luck <tony.luck@intel.com>
11  *
12  * More information about RDT be found in the Intel (R) x86 Architecture
13  * Software Developer Manual June 2016, volume 3, section 17.17.
14  */
15 
16 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
17 
18 #include <linux/cpu.h>
19 #include <linux/kernfs.h>
20 #include <linux/math.h>
21 #include <linux/seq_file.h>
22 #include <linux/slab.h>
23 #include <linux/tick.h>
24 
25 #include "internal.h"
26 
27 struct rdt_parse_data {
28 	u32			closid;
29 	enum rdtgrp_mode	mode;
30 	char			*buf;
31 };
32 
33 typedef int (ctrlval_parser_t)(struct rdt_parse_data *data,
34 			       struct resctrl_schema *s,
35 			       struct rdt_ctrl_domain *d);
36 
37 /*
38  * Check whether MBA bandwidth percentage value is correct. The value is
39  * checked against the minimum and max bandwidth values specified by the
40  * hardware. The allocated bandwidth percentage is rounded to the next
41  * control step available on the hardware.
42  */
43 static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
44 {
45 	int ret;
46 	u32 bw;
47 
48 	/*
49 	 * Only linear delay values is supported for current Intel SKUs.
50 	 */
51 	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
52 		rdt_last_cmd_puts("No support for non-linear MB domains\n");
53 		return false;
54 	}
55 
56 	ret = kstrtou32(buf, 10, &bw);
57 	if (ret) {
58 		rdt_last_cmd_printf("Invalid MB value %s\n", buf);
59 		return false;
60 	}
61 
62 	/* Nothing else to do if software controller is enabled. */
63 	if (is_mba_sc(r)) {
64 		*data = bw;
65 		return true;
66 	}
67 
68 	if (bw < r->membw.min_bw || bw > r->membw.max_bw) {
69 		rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
70 				    bw, r->membw.min_bw, r->membw.max_bw);
71 		return false;
72 	}
73 
74 	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
75 	return true;
76 }
77 
78 static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
79 		    struct rdt_ctrl_domain *d)
80 {
81 	struct resctrl_staged_config *cfg;
82 	struct rdt_resource *r = s->res;
83 	u32 closid = data->closid;
84 	u32 bw_val;
85 
86 	cfg = &d->staged_config[s->conf_type];
87 	if (cfg->have_new_ctrl) {
88 		rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
89 		return -EINVAL;
90 	}
91 
92 	if (!bw_validate(data->buf, &bw_val, r))
93 		return -EINVAL;
94 
95 	if (is_mba_sc(r)) {
96 		d->mbps_val[closid] = bw_val;
97 		return 0;
98 	}
99 
100 	cfg->new_ctrl = bw_val;
101 	cfg->have_new_ctrl = true;
102 
103 	return 0;
104 }
105 
106 /*
107  * Check whether a cache bit mask is valid.
108  * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID:
109  *   - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1
110  *   - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1
111  *
112  * Haswell does not support a non-contiguous 1s value and additionally
113  * requires at least two bits set.
114  * AMD allows non-contiguous bitmasks.
115  */
116 static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
117 {
118 	u32 supported_bits = BIT_MASK(r->cache.cbm_len) - 1;
119 	unsigned int cbm_len = r->cache.cbm_len;
120 	unsigned long first_bit, zero_bit, val;
121 	int ret;
122 
123 	ret = kstrtoul(buf, 16, &val);
124 	if (ret) {
125 		rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
126 		return false;
127 	}
128 
129 	if ((r->cache.min_cbm_bits > 0 && val == 0) || val > supported_bits) {
130 		rdt_last_cmd_puts("Mask out of range\n");
131 		return false;
132 	}
133 
134 	first_bit = find_first_bit(&val, cbm_len);
135 	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
136 
137 	/* Are non-contiguous bitmasks allowed? */
138 	if (!r->cache.arch_has_sparse_bitmasks &&
139 	    (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
140 		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
141 		return false;
142 	}
143 
144 	if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
145 		rdt_last_cmd_printf("Need at least %d bits in the mask\n",
146 				    r->cache.min_cbm_bits);
147 		return false;
148 	}
149 
150 	*data = val;
151 	return true;
152 }
153 
154 /*
155  * Read one cache bit mask (hex). Check that it is valid for the current
156  * resource type.
157  */
158 static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
159 		     struct rdt_ctrl_domain *d)
160 {
161 	enum rdtgrp_mode mode = data->mode;
162 	struct resctrl_staged_config *cfg;
163 	struct rdt_resource *r = s->res;
164 	u32 closid = data->closid;
165 	u32 cbm_val;
166 
167 	cfg = &d->staged_config[s->conf_type];
168 	if (cfg->have_new_ctrl) {
169 		rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
170 		return -EINVAL;
171 	}
172 
173 	/*
174 	 * Cannot set up more than one pseudo-locked region in a cache
175 	 * hierarchy.
176 	 */
177 	if (mode == RDT_MODE_PSEUDO_LOCKSETUP &&
178 	    rdtgroup_pseudo_locked_in_hierarchy(d)) {
179 		rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
180 		return -EINVAL;
181 	}
182 
183 	if (!cbm_validate(data->buf, &cbm_val, r))
184 		return -EINVAL;
185 
186 	if ((mode == RDT_MODE_EXCLUSIVE || mode == RDT_MODE_SHAREABLE) &&
187 	    rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
188 		rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
189 		return -EINVAL;
190 	}
191 
192 	/*
193 	 * The CBM may not overlap with the CBM of another closid if
194 	 * either is exclusive.
195 	 */
196 	if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, true)) {
197 		rdt_last_cmd_puts("Overlaps with exclusive group\n");
198 		return -EINVAL;
199 	}
200 
201 	if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, false)) {
202 		if (mode == RDT_MODE_EXCLUSIVE ||
203 		    mode == RDT_MODE_PSEUDO_LOCKSETUP) {
204 			rdt_last_cmd_puts("Overlaps with other group\n");
205 			return -EINVAL;
206 		}
207 	}
208 
209 	cfg->new_ctrl = cbm_val;
210 	cfg->have_new_ctrl = true;
211 
212 	return 0;
213 }
214 
215 /*
216  * For each domain in this resource we expect to find a series of:
217  *	id=mask
218  * separated by ";". The "id" is in decimal, and must match one of
219  * the "id"s for this resource.
220  */
221 static int parse_line(char *line, struct resctrl_schema *s,
222 		      struct rdtgroup *rdtgrp)
223 {
224 	enum resctrl_conf_type t = s->conf_type;
225 	ctrlval_parser_t *parse_ctrlval = NULL;
226 	struct resctrl_staged_config *cfg;
227 	struct rdt_resource *r = s->res;
228 	struct rdt_parse_data data;
229 	struct rdt_ctrl_domain *d;
230 	char *dom = NULL, *id;
231 	unsigned long dom_id;
232 
233 	/* Walking r->domains, ensure it can't race with cpuhp */
234 	lockdep_assert_cpus_held();
235 
236 	switch (r->schema_fmt) {
237 	case RESCTRL_SCHEMA_BITMAP:
238 		parse_ctrlval = &parse_cbm;
239 		break;
240 	case RESCTRL_SCHEMA_RANGE:
241 		parse_ctrlval = &parse_bw;
242 		break;
243 	}
244 
245 	if (WARN_ON_ONCE(!parse_ctrlval))
246 		return -EINVAL;
247 
248 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
249 	    (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
250 		rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
251 		return -EINVAL;
252 	}
253 
254 next:
255 	if (!line || line[0] == '\0')
256 		return 0;
257 	dom = strsep(&line, ";");
258 	id = strsep(&dom, "=");
259 	if (!dom || kstrtoul(id, 10, &dom_id)) {
260 		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
261 		return -EINVAL;
262 	}
263 	dom = strim(dom);
264 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
265 		if (d->hdr.id == dom_id) {
266 			data.buf = dom;
267 			data.closid = rdtgrp->closid;
268 			data.mode = rdtgrp->mode;
269 			if (parse_ctrlval(&data, s, d))
270 				return -EINVAL;
271 			if (rdtgrp->mode ==  RDT_MODE_PSEUDO_LOCKSETUP) {
272 				cfg = &d->staged_config[t];
273 				/*
274 				 * In pseudo-locking setup mode and just
275 				 * parsed a valid CBM that should be
276 				 * pseudo-locked. Only one locked region per
277 				 * resource group and domain so just do
278 				 * the required initialization for single
279 				 * region and return.
280 				 */
281 				rdtgrp->plr->s = s;
282 				rdtgrp->plr->d = d;
283 				rdtgrp->plr->cbm = cfg->new_ctrl;
284 				d->plr = rdtgrp->plr;
285 				return 0;
286 			}
287 			goto next;
288 		}
289 	}
290 	return -EINVAL;
291 }
292 
293 static int rdtgroup_parse_resource(char *resname, char *tok,
294 				   struct rdtgroup *rdtgrp)
295 {
296 	struct resctrl_schema *s;
297 
298 	list_for_each_entry(s, &resctrl_schema_all, list) {
299 		if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid)
300 			return parse_line(tok, s, rdtgrp);
301 	}
302 	rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
303 	return -EINVAL;
304 }
305 
306 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
307 				char *buf, size_t nbytes, loff_t off)
308 {
309 	struct resctrl_schema *s;
310 	struct rdtgroup *rdtgrp;
311 	struct rdt_resource *r;
312 	char *tok, *resname;
313 	int ret = 0;
314 
315 	/* Valid input requires a trailing newline */
316 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
317 		return -EINVAL;
318 	buf[nbytes - 1] = '\0';
319 
320 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
321 	if (!rdtgrp) {
322 		rdtgroup_kn_unlock(of->kn);
323 		return -ENOENT;
324 	}
325 	rdt_last_cmd_clear();
326 
327 	/*
328 	 * No changes to pseudo-locked region allowed. It has to be removed
329 	 * and re-created instead.
330 	 */
331 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
332 		ret = -EINVAL;
333 		rdt_last_cmd_puts("Resource group is pseudo-locked\n");
334 		goto out;
335 	}
336 
337 	rdt_staged_configs_clear();
338 
339 	while ((tok = strsep(&buf, "\n")) != NULL) {
340 		resname = strim(strsep(&tok, ":"));
341 		if (!tok) {
342 			rdt_last_cmd_puts("Missing ':'\n");
343 			ret = -EINVAL;
344 			goto out;
345 		}
346 		if (tok[0] == '\0') {
347 			rdt_last_cmd_printf("Missing '%s' value\n", resname);
348 			ret = -EINVAL;
349 			goto out;
350 		}
351 		ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
352 		if (ret)
353 			goto out;
354 	}
355 
356 	list_for_each_entry(s, &resctrl_schema_all, list) {
357 		r = s->res;
358 
359 		/*
360 		 * Writes to mba_sc resources update the software controller,
361 		 * not the control MSR.
362 		 */
363 		if (is_mba_sc(r))
364 			continue;
365 
366 		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
367 		if (ret)
368 			goto out;
369 	}
370 
371 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
372 		/*
373 		 * If pseudo-locking fails we keep the resource group in
374 		 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
375 		 * active and updated for just the domain the pseudo-locked
376 		 * region was requested for.
377 		 */
378 		ret = rdtgroup_pseudo_lock_create(rdtgrp);
379 	}
380 
381 out:
382 	rdt_staged_configs_clear();
383 	rdtgroup_kn_unlock(of->kn);
384 	return ret ?: nbytes;
385 }
386 
387 static void show_doms(struct seq_file *s, struct resctrl_schema *schema,
388 		      char *resource_name, int closid)
389 {
390 	struct rdt_resource *r = schema->res;
391 	struct rdt_ctrl_domain *dom;
392 	bool sep = false;
393 	u32 ctrl_val;
394 
395 	/* Walking r->domains, ensure it can't race with cpuhp */
396 	lockdep_assert_cpus_held();
397 
398 	if (resource_name)
399 		seq_printf(s, "%*s:", max_name_width, resource_name);
400 	list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
401 		if (sep)
402 			seq_puts(s, ";");
403 
404 		if (is_mba_sc(r))
405 			ctrl_val = dom->mbps_val[closid];
406 		else
407 			ctrl_val = resctrl_arch_get_config(r, dom, closid,
408 							   schema->conf_type);
409 
410 		seq_printf(s, schema->fmt_str, dom->hdr.id, ctrl_val);
411 		sep = true;
412 	}
413 	seq_puts(s, "\n");
414 }
415 
416 int rdtgroup_schemata_show(struct kernfs_open_file *of,
417 			   struct seq_file *s, void *v)
418 {
419 	struct resctrl_schema *schema;
420 	struct rdtgroup *rdtgrp;
421 	int ret = 0;
422 	u32 closid;
423 
424 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
425 	if (rdtgrp) {
426 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
427 			list_for_each_entry(schema, &resctrl_schema_all, list) {
428 				seq_printf(s, "%s:uninitialized\n", schema->name);
429 			}
430 		} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
431 			if (!rdtgrp->plr->d) {
432 				rdt_last_cmd_clear();
433 				rdt_last_cmd_puts("Cache domain offline\n");
434 				ret = -ENODEV;
435 			} else {
436 				seq_printf(s, "%s:%d=%x\n",
437 					   rdtgrp->plr->s->res->name,
438 					   rdtgrp->plr->d->hdr.id,
439 					   rdtgrp->plr->cbm);
440 			}
441 		} else {
442 			closid = rdtgrp->closid;
443 			list_for_each_entry(schema, &resctrl_schema_all, list) {
444 				if (closid < schema->num_closid)
445 					show_doms(s, schema, schema->name, closid);
446 			}
447 		}
448 	} else {
449 		ret = -ENOENT;
450 	}
451 	rdtgroup_kn_unlock(of->kn);
452 	return ret;
453 }
454 
455 static int smp_mon_event_count(void *arg)
456 {
457 	mon_event_count(arg);
458 
459 	return 0;
460 }
461 
462 ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
463 				      char *buf, size_t nbytes, loff_t off)
464 {
465 	struct rdtgroup *rdtgrp;
466 	int ret = 0;
467 
468 	/* Valid input requires a trailing newline */
469 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
470 		return -EINVAL;
471 	buf[nbytes - 1] = '\0';
472 
473 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
474 	if (!rdtgrp) {
475 		rdtgroup_kn_unlock(of->kn);
476 		return -ENOENT;
477 	}
478 	rdt_last_cmd_clear();
479 
480 	if (!strcmp(buf, "mbm_local_bytes")) {
481 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
482 			rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
483 		else
484 			ret = -EINVAL;
485 	} else if (!strcmp(buf, "mbm_total_bytes")) {
486 		if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
487 			rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
488 		else
489 			ret = -EINVAL;
490 	} else {
491 		ret = -EINVAL;
492 	}
493 
494 	if (ret)
495 		rdt_last_cmd_printf("Unsupported event id '%s'\n", buf);
496 
497 	rdtgroup_kn_unlock(of->kn);
498 
499 	return ret ?: nbytes;
500 }
501 
502 int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
503 				 struct seq_file *s, void *v)
504 {
505 	struct rdtgroup *rdtgrp;
506 	int ret = 0;
507 
508 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
509 
510 	if (rdtgrp) {
511 		switch (rdtgrp->mba_mbps_event) {
512 		case QOS_L3_MBM_LOCAL_EVENT_ID:
513 			seq_puts(s, "mbm_local_bytes\n");
514 			break;
515 		case QOS_L3_MBM_TOTAL_EVENT_ID:
516 			seq_puts(s, "mbm_total_bytes\n");
517 			break;
518 		default:
519 			pr_warn_once("Bad event %d\n", rdtgrp->mba_mbps_event);
520 			ret = -EINVAL;
521 			break;
522 		}
523 	} else {
524 		ret = -ENOENT;
525 	}
526 
527 	rdtgroup_kn_unlock(of->kn);
528 
529 	return ret;
530 }
531 
532 struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
533 					   struct list_head **pos)
534 {
535 	struct rdt_domain_hdr *d;
536 	struct list_head *l;
537 
538 	list_for_each(l, h) {
539 		d = list_entry(l, struct rdt_domain_hdr, list);
540 		/* When id is found, return its domain. */
541 		if (id == d->id)
542 			return d;
543 		/* Stop searching when finding id's position in sorted list. */
544 		if (id < d->id)
545 			break;
546 	}
547 
548 	if (pos)
549 		*pos = l;
550 
551 	return NULL;
552 }
553 
554 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
555 		    struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp,
556 		    cpumask_t *cpumask, struct mon_evt *evt, int first)
557 {
558 	int cpu;
559 
560 	/* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
561 	lockdep_assert_cpus_held();
562 
563 	/*
564 	 * Setup the parameters to pass to mon_event_count() to read the data.
565 	 */
566 	rr->rgrp = rdtgrp;
567 	rr->evt = evt;
568 	rr->r = r;
569 	rr->hdr = hdr;
570 	rr->first = first;
571 	if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
572 	    resctrl_is_mbm_event(evt->evtid)) {
573 		rr->is_mbm_cntr = true;
574 	} else {
575 		rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evt->evtid);
576 		if (IS_ERR(rr->arch_mon_ctx)) {
577 			rr->err = -EINVAL;
578 			return;
579 		}
580 	}
581 
582 	if (evt->any_cpu) {
583 		mon_event_count(rr);
584 		goto out_ctx_free;
585 	}
586 
587 	cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
588 
589 	/*
590 	 * cpumask_any_housekeeping() prefers housekeeping CPUs, but
591 	 * are all the CPUs nohz_full? If yes, pick a CPU to IPI.
592 	 * MPAM's resctrl_arch_rmid_read() is unable to read the
593 	 * counters on some platforms if its called in IRQ context.
594 	 */
595 	if (tick_nohz_full_cpu(cpu))
596 		smp_call_function_any(cpumask, mon_event_count, rr, 1);
597 	else
598 		smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
599 
600 out_ctx_free:
601 	if (rr->arch_mon_ctx)
602 		resctrl_arch_mon_ctx_free(r, evt->evtid, rr->arch_mon_ctx);
603 }
604 
605 /*
606  * Decimal place precision to use for each number of fixed-point
607  * binary bits computed from ceil(binary_bits * log10(2)) except
608  * binary_bits == 0 which will print "value.0"
609  */
610 static const unsigned int decplaces[MAX_BINARY_BITS + 1] = {
611 	[0]  =  1,
612 	[1]  =  1,
613 	[2]  =  1,
614 	[3]  =  1,
615 	[4]  =  2,
616 	[5]  =  2,
617 	[6]  =  2,
618 	[7]  =  3,
619 	[8]  =  3,
620 	[9]  =  3,
621 	[10] =  4,
622 	[11] =  4,
623 	[12] =  4,
624 	[13] =  4,
625 	[14] =  5,
626 	[15] =  5,
627 	[16] =  5,
628 	[17] =  6,
629 	[18] =  6,
630 	[19] =  6,
631 	[20] =  7,
632 	[21] =  7,
633 	[22] =  7,
634 	[23] =  7,
635 	[24] =  8,
636 	[25] =  8,
637 	[26] =  8,
638 	[27] =  9
639 };
640 
641 static void print_event_value(struct seq_file *m, unsigned int binary_bits, u64 val)
642 {
643 	unsigned long long frac = 0;
644 
645 	if (binary_bits) {
646 		/* Mask off the integer part of the fixed-point value. */
647 		frac = val & GENMASK_ULL(binary_bits - 1, 0);
648 
649 		/*
650 		 * Multiply by 10^{desired decimal places}. The integer part of
651 		 * the fixed point value is now almost what is needed.
652 		 */
653 		frac *= int_pow(10ull, decplaces[binary_bits]);
654 
655 		/*
656 		 * Round to nearest by adding a value that would be a "1" in the
657 		 * binary_bits + 1 place.  Integer part of fixed point value is
658 		 * now the needed value.
659 		 */
660 		frac += 1ull << (binary_bits - 1);
661 
662 		/*
663 		 * Extract the integer part of the value. This is the decimal
664 		 * representation of the original fixed-point fractional value.
665 		 */
666 		frac >>= binary_bits;
667 	}
668 
669 	/*
670 	 * "frac" is now in the range [0 .. 10^decplaces).  I.e. string
671 	 * representation will fit into chosen number of decimal places.
672 	 */
673 	seq_printf(m, "%llu.%0*llu\n", val >> binary_bits, decplaces[binary_bits], frac);
674 }
675 
676 int rdtgroup_mondata_show(struct seq_file *m, void *arg)
677 {
678 	struct kernfs_open_file *of = m->private;
679 	enum resctrl_res_level resid;
680 	struct rdt_domain_hdr *hdr;
681 	struct rmid_read rr = {0};
682 	struct rdtgroup *rdtgrp;
683 	int domid, cpu, ret = 0;
684 	struct rdt_resource *r;
685 	struct cacheinfo *ci;
686 	struct mon_evt *evt;
687 	struct mon_data *md;
688 
689 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
690 	if (!rdtgrp) {
691 		ret = -ENOENT;
692 		goto out;
693 	}
694 
695 	md = of->kn->priv;
696 	if (WARN_ON_ONCE(!md)) {
697 		ret = -EIO;
698 		goto out;
699 	}
700 
701 	resid = md->rid;
702 	domid = md->domid;
703 	evt = md->evt;
704 	r = resctrl_arch_get_resource(resid);
705 
706 	if (md->sum) {
707 		struct rdt_l3_mon_domain *d;
708 
709 		if (WARN_ON_ONCE(resid != RDT_RESOURCE_L3)) {
710 			ret = -EINVAL;
711 			goto out;
712 		}
713 
714 		/*
715 		 * This file requires summing across all domains that share
716 		 * the L3 cache id that was provided in the "domid" field of the
717 		 * struct mon_data. Search all domains in the resource for
718 		 * one that matches this cache id.
719 		 */
720 		list_for_each_entry(d, &r->mon_domains, hdr.list) {
721 			if (d->ci_id == domid) {
722 				cpu = cpumask_any(&d->hdr.cpu_mask);
723 				ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
724 				if (!ci)
725 					continue;
726 				rr.ci = ci;
727 				mon_event_read(&rr, r, NULL, rdtgrp,
728 					       &ci->shared_cpu_map, evt, false);
729 				goto checkresult;
730 			}
731 		}
732 		ret = -ENOENT;
733 		goto out;
734 	} else {
735 		/*
736 		 * This file provides data from a single domain. Search
737 		 * the resource to find the domain with "domid".
738 		 */
739 		hdr = resctrl_find_domain(&r->mon_domains, domid, NULL);
740 		if (!hdr) {
741 			ret = -ENOENT;
742 			goto out;
743 		}
744 		mon_event_read(&rr, r, hdr, rdtgrp, &hdr->cpu_mask, evt, false);
745 	}
746 
747 checkresult:
748 
749 	/*
750 	 * -ENOENT is a special case, set only when "mbm_event" counter assignment
751 	 * mode is enabled and no counter has been assigned.
752 	 */
753 	if (rr.err == -EIO)
754 		seq_puts(m, "Error\n");
755 	else if (rr.err == -EINVAL)
756 		seq_puts(m, "Unavailable\n");
757 	else if (rr.err == -ENOENT)
758 		seq_puts(m, "Unassigned\n");
759 	else if (evt->is_floating_point)
760 		print_event_value(m, evt->binary_bits, rr.val);
761 	else
762 		seq_printf(m, "%llu\n", rr.val);
763 
764 out:
765 	rdtgroup_kn_unlock(of->kn);
766 	return ret;
767 }
768 
769 int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
770 {
771 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
772 	struct rdt_resource *r = s->res;
773 
774 	mutex_lock(&rdtgroup_mutex);
775 
776 	if (r->cache.io_alloc_capable) {
777 		if (resctrl_arch_get_io_alloc_enabled(r))
778 			seq_puts(seq, "enabled\n");
779 		else
780 			seq_puts(seq, "disabled\n");
781 	} else {
782 		seq_puts(seq, "not supported\n");
783 	}
784 
785 	mutex_unlock(&rdtgroup_mutex);
786 
787 	return 0;
788 }
789 
790 /*
791  * resctrl_io_alloc_closid_supported() - io_alloc feature utilizes the
792  * highest CLOSID value to direct I/O traffic. Ensure that io_alloc_closid
793  * is in the supported range.
794  */
795 static bool resctrl_io_alloc_closid_supported(u32 io_alloc_closid)
796 {
797 	return io_alloc_closid < closids_supported();
798 }
799 
800 /*
801  * Initialize io_alloc CLOSID cache resource CBM with all usable (shared
802  * and unused) cache portions.
803  */
804 static int resctrl_io_alloc_init_cbm(struct resctrl_schema *s, u32 closid)
805 {
806 	enum resctrl_conf_type peer_type;
807 	struct rdt_resource *r = s->res;
808 	struct rdt_ctrl_domain *d;
809 	int ret;
810 
811 	rdt_staged_configs_clear();
812 
813 	ret = rdtgroup_init_cat(s, closid);
814 	if (ret < 0)
815 		goto out;
816 
817 	/* Keep CDP_CODE and CDP_DATA of io_alloc CLOSID's CBM in sync. */
818 	if (resctrl_arch_get_cdp_enabled(r->rid)) {
819 		peer_type = resctrl_peer_type(s->conf_type);
820 		list_for_each_entry(d, &s->res->ctrl_domains, hdr.list)
821 			memcpy(&d->staged_config[peer_type],
822 			       &d->staged_config[s->conf_type],
823 			       sizeof(d->staged_config[0]));
824 	}
825 
826 	ret = resctrl_arch_update_domains(r, closid);
827 out:
828 	rdt_staged_configs_clear();
829 	return ret;
830 }
831 
832 /*
833  * resctrl_io_alloc_closid() - io_alloc feature routes I/O traffic using
834  * the highest available CLOSID. Retrieve the maximum CLOSID supported by the
835  * resource. Note that if Code Data Prioritization (CDP) is enabled, the number
836  * of available CLOSIDs is reduced by half.
837  */
838 u32 resctrl_io_alloc_closid(struct rdt_resource *r)
839 {
840 	if (resctrl_arch_get_cdp_enabled(r->rid))
841 		return resctrl_arch_get_num_closid(r) / 2  - 1;
842 	else
843 		return resctrl_arch_get_num_closid(r) - 1;
844 }
845 
846 ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf,
847 			       size_t nbytes, loff_t off)
848 {
849 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
850 	struct rdt_resource *r = s->res;
851 	char const *grp_name;
852 	u32 io_alloc_closid;
853 	bool enable;
854 	int ret;
855 
856 	ret = kstrtobool(buf, &enable);
857 	if (ret)
858 		return ret;
859 
860 	cpus_read_lock();
861 	mutex_lock(&rdtgroup_mutex);
862 
863 	rdt_last_cmd_clear();
864 
865 	if (!r->cache.io_alloc_capable) {
866 		rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
867 		ret = -ENODEV;
868 		goto out_unlock;
869 	}
870 
871 	/* If the feature is already up to date, no action is needed. */
872 	if (resctrl_arch_get_io_alloc_enabled(r) == enable)
873 		goto out_unlock;
874 
875 	io_alloc_closid = resctrl_io_alloc_closid(r);
876 	if (!resctrl_io_alloc_closid_supported(io_alloc_closid)) {
877 		rdt_last_cmd_printf("io_alloc CLOSID (ctrl_hw_id) %u is not available\n",
878 				    io_alloc_closid);
879 		ret = -EINVAL;
880 		goto out_unlock;
881 	}
882 
883 	if (enable) {
884 		if (!closid_alloc_fixed(io_alloc_closid)) {
885 			grp_name = rdtgroup_name_by_closid(io_alloc_closid);
886 			WARN_ON_ONCE(!grp_name);
887 			rdt_last_cmd_printf("CLOSID (ctrl_hw_id) %u for io_alloc is used by %s group\n",
888 					    io_alloc_closid, grp_name ? grp_name : "another");
889 			ret = -ENOSPC;
890 			goto out_unlock;
891 		}
892 
893 		ret = resctrl_io_alloc_init_cbm(s, io_alloc_closid);
894 		if (ret) {
895 			rdt_last_cmd_puts("Failed to initialize io_alloc allocations\n");
896 			closid_free(io_alloc_closid);
897 			goto out_unlock;
898 		}
899 	} else {
900 		closid_free(io_alloc_closid);
901 	}
902 
903 	ret = resctrl_arch_io_alloc_enable(r, enable);
904 	if (enable && ret) {
905 		rdt_last_cmd_puts("Failed to enable io_alloc feature\n");
906 		closid_free(io_alloc_closid);
907 	}
908 
909 out_unlock:
910 	mutex_unlock(&rdtgroup_mutex);
911 	cpus_read_unlock();
912 
913 	return ret ?: nbytes;
914 }
915 
916 int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
917 {
918 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
919 	struct rdt_resource *r = s->res;
920 	int ret = 0;
921 
922 	cpus_read_lock();
923 	mutex_lock(&rdtgroup_mutex);
924 
925 	rdt_last_cmd_clear();
926 
927 	if (!r->cache.io_alloc_capable) {
928 		rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
929 		ret = -ENODEV;
930 		goto out_unlock;
931 	}
932 
933 	if (!resctrl_arch_get_io_alloc_enabled(r)) {
934 		rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name);
935 		ret = -EINVAL;
936 		goto out_unlock;
937 	}
938 
939 	/*
940 	 * When CDP is enabled, the CBMs of the highest CLOSID of CDP_CODE and
941 	 * CDP_DATA are kept in sync. As a result, the io_alloc CBMs shown for
942 	 * either CDP resource are identical and accurately represent the CBMs
943 	 * used for I/O.
944 	 */
945 	show_doms(seq, s, NULL, resctrl_io_alloc_closid(r));
946 
947 out_unlock:
948 	mutex_unlock(&rdtgroup_mutex);
949 	cpus_read_unlock();
950 	return ret;
951 }
952 
953 static int resctrl_io_alloc_parse_line(char *line,  struct rdt_resource *r,
954 				       struct resctrl_schema *s, u32 closid)
955 {
956 	enum resctrl_conf_type peer_type;
957 	struct rdt_parse_data data;
958 	struct rdt_ctrl_domain *d;
959 	char *dom = NULL, *id;
960 	unsigned long dom_id;
961 
962 next:
963 	if (!line || line[0] == '\0')
964 		return 0;
965 
966 	dom = strsep(&line, ";");
967 	id = strsep(&dom, "=");
968 	if (!dom || kstrtoul(id, 10, &dom_id)) {
969 		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
970 		return -EINVAL;
971 	}
972 
973 	dom = strim(dom);
974 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
975 		if (d->hdr.id == dom_id) {
976 			data.buf = dom;
977 			data.mode = RDT_MODE_SHAREABLE;
978 			data.closid = closid;
979 			if (parse_cbm(&data, s, d))
980 				return -EINVAL;
981 			/*
982 			 * Keep io_alloc CLOSID's CBM of CDP_CODE and CDP_DATA
983 			 * in sync.
984 			 */
985 			if (resctrl_arch_get_cdp_enabled(r->rid)) {
986 				peer_type = resctrl_peer_type(s->conf_type);
987 				memcpy(&d->staged_config[peer_type],
988 				       &d->staged_config[s->conf_type],
989 				       sizeof(d->staged_config[0]));
990 			}
991 			goto next;
992 		}
993 	}
994 
995 	return -EINVAL;
996 }
997 
998 ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf,
999 				   size_t nbytes, loff_t off)
1000 {
1001 	struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
1002 	struct rdt_resource *r = s->res;
1003 	u32 io_alloc_closid;
1004 	int ret = 0;
1005 
1006 	/* Valid input requires a trailing newline */
1007 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1008 		return -EINVAL;
1009 
1010 	buf[nbytes - 1] = '\0';
1011 
1012 	cpus_read_lock();
1013 	mutex_lock(&rdtgroup_mutex);
1014 	rdt_last_cmd_clear();
1015 
1016 	if (!r->cache.io_alloc_capable) {
1017 		rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
1018 		ret = -ENODEV;
1019 		goto out_unlock;
1020 	}
1021 
1022 	if (!resctrl_arch_get_io_alloc_enabled(r)) {
1023 		rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name);
1024 		ret = -EINVAL;
1025 		goto out_unlock;
1026 	}
1027 
1028 	io_alloc_closid = resctrl_io_alloc_closid(r);
1029 
1030 	rdt_staged_configs_clear();
1031 	ret = resctrl_io_alloc_parse_line(buf, r, s, io_alloc_closid);
1032 	if (ret)
1033 		goto out_clear_configs;
1034 
1035 	ret = resctrl_arch_update_domains(r, io_alloc_closid);
1036 
1037 out_clear_configs:
1038 	rdt_staged_configs_clear();
1039 out_unlock:
1040 	mutex_unlock(&rdtgroup_mutex);
1041 	cpus_read_unlock();
1042 
1043 	return ret ?: nbytes;
1044 }
1045