1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Cache Allocation code. 5 * 6 * Copyright (C) 2016 Intel Corporation 7 * 8 * Authors: 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Tony Luck <tony.luck@intel.com> 11 * 12 * More information about RDT be found in the Intel (R) x86 Architecture 13 * Software Developer Manual June 2016, volume 3, section 17.17. 14 */ 15 16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17 18 #include <linux/cpu.h> 19 #include <linux/kernfs.h> 20 #include <linux/seq_file.h> 21 #include <linux/slab.h> 22 #include <linux/tick.h> 23 24 #include "internal.h" 25 26 /* 27 * Check whether MBA bandwidth percentage value is correct. The value is 28 * checked against the minimum and max bandwidth values specified by the 29 * hardware. The allocated bandwidth percentage is rounded to the next 30 * control step available on the hardware. 31 */ 32 static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) 33 { 34 int ret; 35 u32 bw; 36 37 /* 38 * Only linear delay values is supported for current Intel SKUs. 39 */ 40 if (!r->membw.delay_linear && r->membw.arch_needs_linear) { 41 rdt_last_cmd_puts("No support for non-linear MB domains\n"); 42 return false; 43 } 44 45 ret = kstrtou32(buf, 10, &bw); 46 if (ret) { 47 rdt_last_cmd_printf("Invalid MB value %s\n", buf); 48 return false; 49 } 50 51 /* Nothing else to do if software controller is enabled. */ 52 if (is_mba_sc(r)) { 53 *data = bw; 54 return true; 55 } 56 57 if (bw < r->membw.min_bw || bw > r->default_ctrl) { 58 rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", 59 bw, r->membw.min_bw, r->default_ctrl); 60 return false; 61 } 62 63 *data = roundup(bw, (unsigned long)r->membw.bw_gran); 64 return true; 65 } 66 67 int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, 68 struct rdt_ctrl_domain *d) 69 { 70 struct resctrl_staged_config *cfg; 71 u32 closid = data->rdtgrp->closid; 72 struct rdt_resource *r = s->res; 73 u32 bw_val; 74 75 cfg = &d->staged_config[s->conf_type]; 76 if (cfg->have_new_ctrl) { 77 rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); 78 return -EINVAL; 79 } 80 81 if (!bw_validate(data->buf, &bw_val, r)) 82 return -EINVAL; 83 84 if (is_mba_sc(r)) { 85 d->mbps_val[closid] = bw_val; 86 return 0; 87 } 88 89 cfg->new_ctrl = bw_val; 90 cfg->have_new_ctrl = true; 91 92 return 0; 93 } 94 95 /* 96 * Check whether a cache bit mask is valid. 97 * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID: 98 * - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1 99 * - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1 100 * 101 * Haswell does not support a non-contiguous 1s value and additionally 102 * requires at least two bits set. 103 * AMD allows non-contiguous bitmasks. 104 */ 105 static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) 106 { 107 unsigned long first_bit, zero_bit, val; 108 unsigned int cbm_len = r->cache.cbm_len; 109 int ret; 110 111 ret = kstrtoul(buf, 16, &val); 112 if (ret) { 113 rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf); 114 return false; 115 } 116 117 if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) { 118 rdt_last_cmd_puts("Mask out of range\n"); 119 return false; 120 } 121 122 first_bit = find_first_bit(&val, cbm_len); 123 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); 124 125 /* Are non-contiguous bitmasks allowed? */ 126 if (!r->cache.arch_has_sparse_bitmasks && 127 (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) { 128 rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val); 129 return false; 130 } 131 132 if ((zero_bit - first_bit) < r->cache.min_cbm_bits) { 133 rdt_last_cmd_printf("Need at least %d bits in the mask\n", 134 r->cache.min_cbm_bits); 135 return false; 136 } 137 138 *data = val; 139 return true; 140 } 141 142 /* 143 * Read one cache bit mask (hex). Check that it is valid for the current 144 * resource type. 145 */ 146 int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, 147 struct rdt_ctrl_domain *d) 148 { 149 struct rdtgroup *rdtgrp = data->rdtgrp; 150 struct resctrl_staged_config *cfg; 151 struct rdt_resource *r = s->res; 152 u32 cbm_val; 153 154 cfg = &d->staged_config[s->conf_type]; 155 if (cfg->have_new_ctrl) { 156 rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); 157 return -EINVAL; 158 } 159 160 /* 161 * Cannot set up more than one pseudo-locked region in a cache 162 * hierarchy. 163 */ 164 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && 165 rdtgroup_pseudo_locked_in_hierarchy(d)) { 166 rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n"); 167 return -EINVAL; 168 } 169 170 if (!cbm_validate(data->buf, &cbm_val, r)) 171 return -EINVAL; 172 173 if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || 174 rdtgrp->mode == RDT_MODE_SHAREABLE) && 175 rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) { 176 rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n"); 177 return -EINVAL; 178 } 179 180 /* 181 * The CBM may not overlap with the CBM of another closid if 182 * either is exclusive. 183 */ 184 if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) { 185 rdt_last_cmd_puts("Overlaps with exclusive group\n"); 186 return -EINVAL; 187 } 188 189 if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) { 190 if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || 191 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 192 rdt_last_cmd_puts("Overlaps with other group\n"); 193 return -EINVAL; 194 } 195 } 196 197 cfg->new_ctrl = cbm_val; 198 cfg->have_new_ctrl = true; 199 200 return 0; 201 } 202 203 /* 204 * For each domain in this resource we expect to find a series of: 205 * id=mask 206 * separated by ";". The "id" is in decimal, and must match one of 207 * the "id"s for this resource. 208 */ 209 static int parse_line(char *line, struct resctrl_schema *s, 210 struct rdtgroup *rdtgrp) 211 { 212 enum resctrl_conf_type t = s->conf_type; 213 struct resctrl_staged_config *cfg; 214 struct rdt_resource *r = s->res; 215 struct rdt_parse_data data; 216 struct rdt_ctrl_domain *d; 217 char *dom = NULL, *id; 218 unsigned long dom_id; 219 220 /* Walking r->domains, ensure it can't race with cpuhp */ 221 lockdep_assert_cpus_held(); 222 223 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && 224 (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { 225 rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); 226 return -EINVAL; 227 } 228 229 next: 230 if (!line || line[0] == '\0') 231 return 0; 232 dom = strsep(&line, ";"); 233 id = strsep(&dom, "="); 234 if (!dom || kstrtoul(id, 10, &dom_id)) { 235 rdt_last_cmd_puts("Missing '=' or non-numeric domain\n"); 236 return -EINVAL; 237 } 238 dom = strim(dom); 239 list_for_each_entry(d, &r->ctrl_domains, hdr.list) { 240 if (d->hdr.id == dom_id) { 241 data.buf = dom; 242 data.rdtgrp = rdtgrp; 243 if (r->parse_ctrlval(&data, s, d)) 244 return -EINVAL; 245 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 246 cfg = &d->staged_config[t]; 247 /* 248 * In pseudo-locking setup mode and just 249 * parsed a valid CBM that should be 250 * pseudo-locked. Only one locked region per 251 * resource group and domain so just do 252 * the required initialization for single 253 * region and return. 254 */ 255 rdtgrp->plr->s = s; 256 rdtgrp->plr->d = d; 257 rdtgrp->plr->cbm = cfg->new_ctrl; 258 d->plr = rdtgrp->plr; 259 return 0; 260 } 261 goto next; 262 } 263 } 264 return -EINVAL; 265 } 266 267 static u32 get_config_index(u32 closid, enum resctrl_conf_type type) 268 { 269 switch (type) { 270 default: 271 case CDP_NONE: 272 return closid; 273 case CDP_CODE: 274 return closid * 2 + 1; 275 case CDP_DATA: 276 return closid * 2; 277 } 278 } 279 280 int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, 281 u32 closid, enum resctrl_conf_type t, u32 cfg_val) 282 { 283 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); 284 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 285 u32 idx = get_config_index(closid, t); 286 struct msr_param msr_param; 287 288 if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) 289 return -EINVAL; 290 291 hw_dom->ctrl_val[idx] = cfg_val; 292 293 msr_param.res = r; 294 msr_param.dom = d; 295 msr_param.low = idx; 296 msr_param.high = idx + 1; 297 hw_res->msr_update(&msr_param); 298 299 return 0; 300 } 301 302 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) 303 { 304 struct resctrl_staged_config *cfg; 305 struct rdt_hw_ctrl_domain *hw_dom; 306 struct msr_param msr_param; 307 struct rdt_ctrl_domain *d; 308 enum resctrl_conf_type t; 309 u32 idx; 310 311 /* Walking r->domains, ensure it can't race with cpuhp */ 312 lockdep_assert_cpus_held(); 313 314 list_for_each_entry(d, &r->ctrl_domains, hdr.list) { 315 hw_dom = resctrl_to_arch_ctrl_dom(d); 316 msr_param.res = NULL; 317 for (t = 0; t < CDP_NUM_TYPES; t++) { 318 cfg = &hw_dom->d_resctrl.staged_config[t]; 319 if (!cfg->have_new_ctrl) 320 continue; 321 322 idx = get_config_index(closid, t); 323 if (cfg->new_ctrl == hw_dom->ctrl_val[idx]) 324 continue; 325 hw_dom->ctrl_val[idx] = cfg->new_ctrl; 326 327 if (!msr_param.res) { 328 msr_param.low = idx; 329 msr_param.high = msr_param.low + 1; 330 msr_param.res = r; 331 msr_param.dom = d; 332 } else { 333 msr_param.low = min(msr_param.low, idx); 334 msr_param.high = max(msr_param.high, idx + 1); 335 } 336 } 337 if (msr_param.res) 338 smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); 339 } 340 341 return 0; 342 } 343 344 static int rdtgroup_parse_resource(char *resname, char *tok, 345 struct rdtgroup *rdtgrp) 346 { 347 struct resctrl_schema *s; 348 349 list_for_each_entry(s, &resctrl_schema_all, list) { 350 if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid) 351 return parse_line(tok, s, rdtgrp); 352 } 353 rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname); 354 return -EINVAL; 355 } 356 357 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, 358 char *buf, size_t nbytes, loff_t off) 359 { 360 struct resctrl_schema *s; 361 struct rdtgroup *rdtgrp; 362 struct rdt_resource *r; 363 char *tok, *resname; 364 int ret = 0; 365 366 /* Valid input requires a trailing newline */ 367 if (nbytes == 0 || buf[nbytes - 1] != '\n') 368 return -EINVAL; 369 buf[nbytes - 1] = '\0'; 370 371 rdtgrp = rdtgroup_kn_lock_live(of->kn); 372 if (!rdtgrp) { 373 rdtgroup_kn_unlock(of->kn); 374 return -ENOENT; 375 } 376 rdt_last_cmd_clear(); 377 378 /* 379 * No changes to pseudo-locked region allowed. It has to be removed 380 * and re-created instead. 381 */ 382 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 383 ret = -EINVAL; 384 rdt_last_cmd_puts("Resource group is pseudo-locked\n"); 385 goto out; 386 } 387 388 rdt_staged_configs_clear(); 389 390 while ((tok = strsep(&buf, "\n")) != NULL) { 391 resname = strim(strsep(&tok, ":")); 392 if (!tok) { 393 rdt_last_cmd_puts("Missing ':'\n"); 394 ret = -EINVAL; 395 goto out; 396 } 397 if (tok[0] == '\0') { 398 rdt_last_cmd_printf("Missing '%s' value\n", resname); 399 ret = -EINVAL; 400 goto out; 401 } 402 ret = rdtgroup_parse_resource(resname, tok, rdtgrp); 403 if (ret) 404 goto out; 405 } 406 407 list_for_each_entry(s, &resctrl_schema_all, list) { 408 r = s->res; 409 410 /* 411 * Writes to mba_sc resources update the software controller, 412 * not the control MSR. 413 */ 414 if (is_mba_sc(r)) 415 continue; 416 417 ret = resctrl_arch_update_domains(r, rdtgrp->closid); 418 if (ret) 419 goto out; 420 } 421 422 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 423 /* 424 * If pseudo-locking fails we keep the resource group in 425 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service 426 * active and updated for just the domain the pseudo-locked 427 * region was requested for. 428 */ 429 ret = rdtgroup_pseudo_lock_create(rdtgrp); 430 } 431 432 out: 433 rdt_staged_configs_clear(); 434 rdtgroup_kn_unlock(of->kn); 435 return ret ?: nbytes; 436 } 437 438 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, 439 u32 closid, enum resctrl_conf_type type) 440 { 441 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); 442 u32 idx = get_config_index(closid, type); 443 444 return hw_dom->ctrl_val[idx]; 445 } 446 447 static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) 448 { 449 struct rdt_resource *r = schema->res; 450 struct rdt_ctrl_domain *dom; 451 bool sep = false; 452 u32 ctrl_val; 453 454 /* Walking r->domains, ensure it can't race with cpuhp */ 455 lockdep_assert_cpus_held(); 456 457 seq_printf(s, "%*s:", max_name_width, schema->name); 458 list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { 459 if (sep) 460 seq_puts(s, ";"); 461 462 if (is_mba_sc(r)) 463 ctrl_val = dom->mbps_val[closid]; 464 else 465 ctrl_val = resctrl_arch_get_config(r, dom, closid, 466 schema->conf_type); 467 468 seq_printf(s, r->format_str, dom->hdr.id, max_data_width, 469 ctrl_val); 470 sep = true; 471 } 472 seq_puts(s, "\n"); 473 } 474 475 int rdtgroup_schemata_show(struct kernfs_open_file *of, 476 struct seq_file *s, void *v) 477 { 478 struct resctrl_schema *schema; 479 struct rdtgroup *rdtgrp; 480 int ret = 0; 481 u32 closid; 482 483 rdtgrp = rdtgroup_kn_lock_live(of->kn); 484 if (rdtgrp) { 485 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 486 list_for_each_entry(schema, &resctrl_schema_all, list) { 487 seq_printf(s, "%s:uninitialized\n", schema->name); 488 } 489 } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 490 if (!rdtgrp->plr->d) { 491 rdt_last_cmd_clear(); 492 rdt_last_cmd_puts("Cache domain offline\n"); 493 ret = -ENODEV; 494 } else { 495 seq_printf(s, "%s:%d=%x\n", 496 rdtgrp->plr->s->res->name, 497 rdtgrp->plr->d->hdr.id, 498 rdtgrp->plr->cbm); 499 } 500 } else { 501 closid = rdtgrp->closid; 502 list_for_each_entry(schema, &resctrl_schema_all, list) { 503 if (closid < schema->num_closid) 504 show_doms(s, schema, closid); 505 } 506 } 507 } else { 508 ret = -ENOENT; 509 } 510 rdtgroup_kn_unlock(of->kn); 511 return ret; 512 } 513 514 static int smp_mon_event_count(void *arg) 515 { 516 mon_event_count(arg); 517 518 return 0; 519 } 520 521 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, 522 struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, 523 cpumask_t *cpumask, int evtid, int first) 524 { 525 int cpu; 526 527 /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */ 528 lockdep_assert_cpus_held(); 529 530 /* 531 * Setup the parameters to pass to mon_event_count() to read the data. 532 */ 533 rr->rgrp = rdtgrp; 534 rr->evtid = evtid; 535 rr->r = r; 536 rr->d = d; 537 rr->first = first; 538 rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); 539 if (IS_ERR(rr->arch_mon_ctx)) { 540 rr->err = -EINVAL; 541 return; 542 } 543 544 cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU); 545 546 /* 547 * cpumask_any_housekeeping() prefers housekeeping CPUs, but 548 * are all the CPUs nohz_full? If yes, pick a CPU to IPI. 549 * MPAM's resctrl_arch_rmid_read() is unable to read the 550 * counters on some platforms if its called in IRQ context. 551 */ 552 if (tick_nohz_full_cpu(cpu)) 553 smp_call_function_any(cpumask, mon_event_count, rr, 1); 554 else 555 smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); 556 557 resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx); 558 } 559 560 int rdtgroup_mondata_show(struct seq_file *m, void *arg) 561 { 562 struct kernfs_open_file *of = m->private; 563 struct rdt_domain_hdr *hdr; 564 struct rmid_read rr = {0}; 565 struct rdt_mon_domain *d; 566 u32 resid, evtid, domid; 567 struct rdtgroup *rdtgrp; 568 struct rdt_resource *r; 569 union mon_data_bits md; 570 int ret = 0; 571 572 rdtgrp = rdtgroup_kn_lock_live(of->kn); 573 if (!rdtgrp) { 574 ret = -ENOENT; 575 goto out; 576 } 577 578 md.priv = of->kn->priv; 579 resid = md.u.rid; 580 domid = md.u.domid; 581 evtid = md.u.evtid; 582 r = &rdt_resources_all[resid].r_resctrl; 583 584 if (md.u.sum) { 585 /* 586 * This file requires summing across all domains that share 587 * the L3 cache id that was provided in the "domid" field of the 588 * mon_data_bits union. Search all domains in the resource for 589 * one that matches this cache id. 590 */ 591 list_for_each_entry(d, &r->mon_domains, hdr.list) { 592 if (d->ci->id == domid) { 593 rr.ci = d->ci; 594 mon_event_read(&rr, r, NULL, rdtgrp, 595 &d->ci->shared_cpu_map, evtid, false); 596 goto checkresult; 597 } 598 } 599 ret = -ENOENT; 600 goto out; 601 } else { 602 /* 603 * This file provides data from a single domain. Search 604 * the resource to find the domain with "domid". 605 */ 606 hdr = rdt_find_domain(&r->mon_domains, domid, NULL); 607 if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { 608 ret = -ENOENT; 609 goto out; 610 } 611 d = container_of(hdr, struct rdt_mon_domain, hdr); 612 mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false); 613 } 614 615 checkresult: 616 617 if (rr.err == -EIO) 618 seq_puts(m, "Error\n"); 619 else if (rr.err == -EINVAL) 620 seq_puts(m, "Unavailable\n"); 621 else 622 seq_printf(m, "%llu\n", rr.val); 623 624 out: 625 rdtgroup_kn_unlock(of->kn); 626 return ret; 627 } 628