xref: /linux/arch/x86/kernel/cpu/resctrl/ctrlmondata.c (revision a3f143c461444c0b56360bbf468615fa814a8372)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Resource Director Technology(RDT)
4  * - Cache Allocation code.
5  *
6  * Copyright (C) 2016 Intel Corporation
7  *
8  * Authors:
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  *    Tony Luck <tony.luck@intel.com>
11  *
12  * More information about RDT be found in the Intel (R) x86 Architecture
13  * Software Developer Manual June 2016, volume 3, section 17.17.
14  */
15 
16 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
17 
18 #include <linux/cpu.h>
19 #include <linux/kernfs.h>
20 #include <linux/seq_file.h>
21 #include <linux/slab.h>
22 #include <linux/tick.h>
23 
24 #include "internal.h"
25 
26 /*
27  * Check whether MBA bandwidth percentage value is correct. The value is
28  * checked against the minimum and max bandwidth values specified by the
29  * hardware. The allocated bandwidth percentage is rounded to the next
30  * control step available on the hardware.
31  */
32 static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
33 {
34 	int ret;
35 	u32 bw;
36 
37 	/*
38 	 * Only linear delay values is supported for current Intel SKUs.
39 	 */
40 	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
41 		rdt_last_cmd_puts("No support for non-linear MB domains\n");
42 		return false;
43 	}
44 
45 	ret = kstrtou32(buf, 10, &bw);
46 	if (ret) {
47 		rdt_last_cmd_printf("Invalid MB value %s\n", buf);
48 		return false;
49 	}
50 
51 	/* Nothing else to do if software controller is enabled. */
52 	if (is_mba_sc(r)) {
53 		*data = bw;
54 		return true;
55 	}
56 
57 	if (bw < r->membw.min_bw || bw > r->default_ctrl) {
58 		rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
59 				    bw, r->membw.min_bw, r->default_ctrl);
60 		return false;
61 	}
62 
63 	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
64 	return true;
65 }
66 
67 int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
68 	     struct rdt_ctrl_domain *d)
69 {
70 	struct resctrl_staged_config *cfg;
71 	u32 closid = data->rdtgrp->closid;
72 	struct rdt_resource *r = s->res;
73 	u32 bw_val;
74 
75 	cfg = &d->staged_config[s->conf_type];
76 	if (cfg->have_new_ctrl) {
77 		rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
78 		return -EINVAL;
79 	}
80 
81 	if (!bw_validate(data->buf, &bw_val, r))
82 		return -EINVAL;
83 
84 	if (is_mba_sc(r)) {
85 		d->mbps_val[closid] = bw_val;
86 		return 0;
87 	}
88 
89 	cfg->new_ctrl = bw_val;
90 	cfg->have_new_ctrl = true;
91 
92 	return 0;
93 }
94 
95 /*
96  * Check whether a cache bit mask is valid.
97  * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID:
98  *   - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1
99  *   - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1
100  *
101  * Haswell does not support a non-contiguous 1s value and additionally
102  * requires at least two bits set.
103  * AMD allows non-contiguous bitmasks.
104  */
105 static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
106 {
107 	unsigned long first_bit, zero_bit, val;
108 	unsigned int cbm_len = r->cache.cbm_len;
109 	int ret;
110 
111 	ret = kstrtoul(buf, 16, &val);
112 	if (ret) {
113 		rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
114 		return false;
115 	}
116 
117 	if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) {
118 		rdt_last_cmd_puts("Mask out of range\n");
119 		return false;
120 	}
121 
122 	first_bit = find_first_bit(&val, cbm_len);
123 	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
124 
125 	/* Are non-contiguous bitmasks allowed? */
126 	if (!r->cache.arch_has_sparse_bitmasks &&
127 	    (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
128 		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
129 		return false;
130 	}
131 
132 	if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
133 		rdt_last_cmd_printf("Need at least %d bits in the mask\n",
134 				    r->cache.min_cbm_bits);
135 		return false;
136 	}
137 
138 	*data = val;
139 	return true;
140 }
141 
142 /*
143  * Read one cache bit mask (hex). Check that it is valid for the current
144  * resource type.
145  */
146 int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
147 	      struct rdt_ctrl_domain *d)
148 {
149 	struct rdtgroup *rdtgrp = data->rdtgrp;
150 	struct resctrl_staged_config *cfg;
151 	struct rdt_resource *r = s->res;
152 	u32 cbm_val;
153 
154 	cfg = &d->staged_config[s->conf_type];
155 	if (cfg->have_new_ctrl) {
156 		rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
157 		return -EINVAL;
158 	}
159 
160 	/*
161 	 * Cannot set up more than one pseudo-locked region in a cache
162 	 * hierarchy.
163 	 */
164 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
165 	    rdtgroup_pseudo_locked_in_hierarchy(d)) {
166 		rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
167 		return -EINVAL;
168 	}
169 
170 	if (!cbm_validate(data->buf, &cbm_val, r))
171 		return -EINVAL;
172 
173 	if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
174 	     rdtgrp->mode == RDT_MODE_SHAREABLE) &&
175 	    rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
176 		rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
177 		return -EINVAL;
178 	}
179 
180 	/*
181 	 * The CBM may not overlap with the CBM of another closid if
182 	 * either is exclusive.
183 	 */
184 	if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) {
185 		rdt_last_cmd_puts("Overlaps with exclusive group\n");
186 		return -EINVAL;
187 	}
188 
189 	if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) {
190 		if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
191 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
192 			rdt_last_cmd_puts("Overlaps with other group\n");
193 			return -EINVAL;
194 		}
195 	}
196 
197 	cfg->new_ctrl = cbm_val;
198 	cfg->have_new_ctrl = true;
199 
200 	return 0;
201 }
202 
203 /*
204  * For each domain in this resource we expect to find a series of:
205  *	id=mask
206  * separated by ";". The "id" is in decimal, and must match one of
207  * the "id"s for this resource.
208  */
209 static int parse_line(char *line, struct resctrl_schema *s,
210 		      struct rdtgroup *rdtgrp)
211 {
212 	enum resctrl_conf_type t = s->conf_type;
213 	struct resctrl_staged_config *cfg;
214 	struct rdt_resource *r = s->res;
215 	struct rdt_parse_data data;
216 	struct rdt_ctrl_domain *d;
217 	char *dom = NULL, *id;
218 	unsigned long dom_id;
219 
220 	/* Walking r->domains, ensure it can't race with cpuhp */
221 	lockdep_assert_cpus_held();
222 
223 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
224 	    (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
225 		rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
226 		return -EINVAL;
227 	}
228 
229 next:
230 	if (!line || line[0] == '\0')
231 		return 0;
232 	dom = strsep(&line, ";");
233 	id = strsep(&dom, "=");
234 	if (!dom || kstrtoul(id, 10, &dom_id)) {
235 		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
236 		return -EINVAL;
237 	}
238 	dom = strim(dom);
239 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
240 		if (d->hdr.id == dom_id) {
241 			data.buf = dom;
242 			data.rdtgrp = rdtgrp;
243 			if (r->parse_ctrlval(&data, s, d))
244 				return -EINVAL;
245 			if (rdtgrp->mode ==  RDT_MODE_PSEUDO_LOCKSETUP) {
246 				cfg = &d->staged_config[t];
247 				/*
248 				 * In pseudo-locking setup mode and just
249 				 * parsed a valid CBM that should be
250 				 * pseudo-locked. Only one locked region per
251 				 * resource group and domain so just do
252 				 * the required initialization for single
253 				 * region and return.
254 				 */
255 				rdtgrp->plr->s = s;
256 				rdtgrp->plr->d = d;
257 				rdtgrp->plr->cbm = cfg->new_ctrl;
258 				d->plr = rdtgrp->plr;
259 				return 0;
260 			}
261 			goto next;
262 		}
263 	}
264 	return -EINVAL;
265 }
266 
267 static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
268 {
269 	switch (type) {
270 	default:
271 	case CDP_NONE:
272 		return closid;
273 	case CDP_CODE:
274 		return closid * 2 + 1;
275 	case CDP_DATA:
276 		return closid * 2;
277 	}
278 }
279 
280 int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
281 			    u32 closid, enum resctrl_conf_type t, u32 cfg_val)
282 {
283 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
284 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
285 	u32 idx = get_config_index(closid, t);
286 	struct msr_param msr_param;
287 
288 	if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask))
289 		return -EINVAL;
290 
291 	hw_dom->ctrl_val[idx] = cfg_val;
292 
293 	msr_param.res = r;
294 	msr_param.dom = d;
295 	msr_param.low = idx;
296 	msr_param.high = idx + 1;
297 	hw_res->msr_update(&msr_param);
298 
299 	return 0;
300 }
301 
302 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
303 {
304 	struct resctrl_staged_config *cfg;
305 	struct rdt_hw_ctrl_domain *hw_dom;
306 	struct msr_param msr_param;
307 	struct rdt_ctrl_domain *d;
308 	enum resctrl_conf_type t;
309 	u32 idx;
310 
311 	/* Walking r->domains, ensure it can't race with cpuhp */
312 	lockdep_assert_cpus_held();
313 
314 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
315 		hw_dom = resctrl_to_arch_ctrl_dom(d);
316 		msr_param.res = NULL;
317 		for (t = 0; t < CDP_NUM_TYPES; t++) {
318 			cfg = &hw_dom->d_resctrl.staged_config[t];
319 			if (!cfg->have_new_ctrl)
320 				continue;
321 
322 			idx = get_config_index(closid, t);
323 			if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
324 				continue;
325 			hw_dom->ctrl_val[idx] = cfg->new_ctrl;
326 
327 			if (!msr_param.res) {
328 				msr_param.low = idx;
329 				msr_param.high = msr_param.low + 1;
330 				msr_param.res = r;
331 				msr_param.dom = d;
332 			} else {
333 				msr_param.low = min(msr_param.low, idx);
334 				msr_param.high = max(msr_param.high, idx + 1);
335 			}
336 		}
337 		if (msr_param.res)
338 			smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
339 	}
340 
341 	return 0;
342 }
343 
344 static int rdtgroup_parse_resource(char *resname, char *tok,
345 				   struct rdtgroup *rdtgrp)
346 {
347 	struct resctrl_schema *s;
348 
349 	list_for_each_entry(s, &resctrl_schema_all, list) {
350 		if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid)
351 			return parse_line(tok, s, rdtgrp);
352 	}
353 	rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
354 	return -EINVAL;
355 }
356 
357 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
358 				char *buf, size_t nbytes, loff_t off)
359 {
360 	struct resctrl_schema *s;
361 	struct rdtgroup *rdtgrp;
362 	struct rdt_resource *r;
363 	char *tok, *resname;
364 	int ret = 0;
365 
366 	/* Valid input requires a trailing newline */
367 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
368 		return -EINVAL;
369 	buf[nbytes - 1] = '\0';
370 
371 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
372 	if (!rdtgrp) {
373 		rdtgroup_kn_unlock(of->kn);
374 		return -ENOENT;
375 	}
376 	rdt_last_cmd_clear();
377 
378 	/*
379 	 * No changes to pseudo-locked region allowed. It has to be removed
380 	 * and re-created instead.
381 	 */
382 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
383 		ret = -EINVAL;
384 		rdt_last_cmd_puts("Resource group is pseudo-locked\n");
385 		goto out;
386 	}
387 
388 	rdt_staged_configs_clear();
389 
390 	while ((tok = strsep(&buf, "\n")) != NULL) {
391 		resname = strim(strsep(&tok, ":"));
392 		if (!tok) {
393 			rdt_last_cmd_puts("Missing ':'\n");
394 			ret = -EINVAL;
395 			goto out;
396 		}
397 		if (tok[0] == '\0') {
398 			rdt_last_cmd_printf("Missing '%s' value\n", resname);
399 			ret = -EINVAL;
400 			goto out;
401 		}
402 		ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
403 		if (ret)
404 			goto out;
405 	}
406 
407 	list_for_each_entry(s, &resctrl_schema_all, list) {
408 		r = s->res;
409 
410 		/*
411 		 * Writes to mba_sc resources update the software controller,
412 		 * not the control MSR.
413 		 */
414 		if (is_mba_sc(r))
415 			continue;
416 
417 		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
418 		if (ret)
419 			goto out;
420 	}
421 
422 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
423 		/*
424 		 * If pseudo-locking fails we keep the resource group in
425 		 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
426 		 * active and updated for just the domain the pseudo-locked
427 		 * region was requested for.
428 		 */
429 		ret = rdtgroup_pseudo_lock_create(rdtgrp);
430 	}
431 
432 out:
433 	rdt_staged_configs_clear();
434 	rdtgroup_kn_unlock(of->kn);
435 	return ret ?: nbytes;
436 }
437 
438 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
439 			    u32 closid, enum resctrl_conf_type type)
440 {
441 	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
442 	u32 idx = get_config_index(closid, type);
443 
444 	return hw_dom->ctrl_val[idx];
445 }
446 
447 static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
448 {
449 	struct rdt_resource *r = schema->res;
450 	struct rdt_ctrl_domain *dom;
451 	bool sep = false;
452 	u32 ctrl_val;
453 
454 	/* Walking r->domains, ensure it can't race with cpuhp */
455 	lockdep_assert_cpus_held();
456 
457 	seq_printf(s, "%*s:", max_name_width, schema->name);
458 	list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
459 		if (sep)
460 			seq_puts(s, ";");
461 
462 		if (is_mba_sc(r))
463 			ctrl_val = dom->mbps_val[closid];
464 		else
465 			ctrl_val = resctrl_arch_get_config(r, dom, closid,
466 							   schema->conf_type);
467 
468 		seq_printf(s, r->format_str, dom->hdr.id, max_data_width,
469 			   ctrl_val);
470 		sep = true;
471 	}
472 	seq_puts(s, "\n");
473 }
474 
475 int rdtgroup_schemata_show(struct kernfs_open_file *of,
476 			   struct seq_file *s, void *v)
477 {
478 	struct resctrl_schema *schema;
479 	struct rdtgroup *rdtgrp;
480 	int ret = 0;
481 	u32 closid;
482 
483 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
484 	if (rdtgrp) {
485 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
486 			list_for_each_entry(schema, &resctrl_schema_all, list) {
487 				seq_printf(s, "%s:uninitialized\n", schema->name);
488 			}
489 		} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
490 			if (!rdtgrp->plr->d) {
491 				rdt_last_cmd_clear();
492 				rdt_last_cmd_puts("Cache domain offline\n");
493 				ret = -ENODEV;
494 			} else {
495 				seq_printf(s, "%s:%d=%x\n",
496 					   rdtgrp->plr->s->res->name,
497 					   rdtgrp->plr->d->hdr.id,
498 					   rdtgrp->plr->cbm);
499 			}
500 		} else {
501 			closid = rdtgrp->closid;
502 			list_for_each_entry(schema, &resctrl_schema_all, list) {
503 				if (closid < schema->num_closid)
504 					show_doms(s, schema, closid);
505 			}
506 		}
507 	} else {
508 		ret = -ENOENT;
509 	}
510 	rdtgroup_kn_unlock(of->kn);
511 	return ret;
512 }
513 
514 static int smp_mon_event_count(void *arg)
515 {
516 	mon_event_count(arg);
517 
518 	return 0;
519 }
520 
521 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
522 		    struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
523 		    cpumask_t *cpumask, int evtid, int first)
524 {
525 	int cpu;
526 
527 	/* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
528 	lockdep_assert_cpus_held();
529 
530 	/*
531 	 * Setup the parameters to pass to mon_event_count() to read the data.
532 	 */
533 	rr->rgrp = rdtgrp;
534 	rr->evtid = evtid;
535 	rr->r = r;
536 	rr->d = d;
537 	rr->first = first;
538 	rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
539 	if (IS_ERR(rr->arch_mon_ctx)) {
540 		rr->err = -EINVAL;
541 		return;
542 	}
543 
544 	cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
545 
546 	/*
547 	 * cpumask_any_housekeeping() prefers housekeeping CPUs, but
548 	 * are all the CPUs nohz_full? If yes, pick a CPU to IPI.
549 	 * MPAM's resctrl_arch_rmid_read() is unable to read the
550 	 * counters on some platforms if its called in IRQ context.
551 	 */
552 	if (tick_nohz_full_cpu(cpu))
553 		smp_call_function_any(cpumask, mon_event_count, rr, 1);
554 	else
555 		smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
556 
557 	resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
558 }
559 
560 int rdtgroup_mondata_show(struct seq_file *m, void *arg)
561 {
562 	struct kernfs_open_file *of = m->private;
563 	struct rdt_domain_hdr *hdr;
564 	struct rmid_read rr = {0};
565 	struct rdt_mon_domain *d;
566 	u32 resid, evtid, domid;
567 	struct rdtgroup *rdtgrp;
568 	struct rdt_resource *r;
569 	union mon_data_bits md;
570 	int ret = 0;
571 
572 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
573 	if (!rdtgrp) {
574 		ret = -ENOENT;
575 		goto out;
576 	}
577 
578 	md.priv = of->kn->priv;
579 	resid = md.u.rid;
580 	domid = md.u.domid;
581 	evtid = md.u.evtid;
582 	r = &rdt_resources_all[resid].r_resctrl;
583 
584 	if (md.u.sum) {
585 		/*
586 		 * This file requires summing across all domains that share
587 		 * the L3 cache id that was provided in the "domid" field of the
588 		 * mon_data_bits union. Search all domains in the resource for
589 		 * one that matches this cache id.
590 		 */
591 		list_for_each_entry(d, &r->mon_domains, hdr.list) {
592 			if (d->ci->id == domid) {
593 				rr.ci = d->ci;
594 				mon_event_read(&rr, r, NULL, rdtgrp,
595 					       &d->ci->shared_cpu_map, evtid, false);
596 				goto checkresult;
597 			}
598 		}
599 		ret = -ENOENT;
600 		goto out;
601 	} else {
602 		/*
603 		 * This file provides data from a single domain. Search
604 		 * the resource to find the domain with "domid".
605 		 */
606 		hdr = rdt_find_domain(&r->mon_domains, domid, NULL);
607 		if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
608 			ret = -ENOENT;
609 			goto out;
610 		}
611 		d = container_of(hdr, struct rdt_mon_domain, hdr);
612 		mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false);
613 	}
614 
615 checkresult:
616 
617 	if (rr.err == -EIO)
618 		seq_puts(m, "Error\n");
619 	else if (rr.err == -EINVAL)
620 		seq_puts(m, "Unavailable\n");
621 	else
622 		seq_printf(m, "%llu\n", rr.val);
623 
624 out:
625 	rdtgroup_kn_unlock(of->kn);
626 	return ret;
627 }
628