1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Cache Allocation code. 5 * 6 * Copyright (C) 2016 Intel Corporation 7 * 8 * Authors: 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Tony Luck <tony.luck@intel.com> 11 * 12 * More information about RDT be found in the Intel (R) x86 Architecture 13 * Software Developer Manual June 2016, volume 3, section 17.17. 14 */ 15 16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17 18 #include <linux/cpu.h> 19 #include <linux/kernfs.h> 20 #include <linux/seq_file.h> 21 #include <linux/slab.h> 22 #include <linux/tick.h> 23 24 #include "internal.h" 25 26 /* 27 * Check whether MBA bandwidth percentage value is correct. The value is 28 * checked against the minimum and max bandwidth values specified by the 29 * hardware. The allocated bandwidth percentage is rounded to the next 30 * control step available on the hardware. 31 */ 32 static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) 33 { 34 unsigned long bw; 35 int ret; 36 37 /* 38 * Only linear delay values is supported for current Intel SKUs. 39 */ 40 if (!r->membw.delay_linear && r->membw.arch_needs_linear) { 41 rdt_last_cmd_puts("No support for non-linear MB domains\n"); 42 return false; 43 } 44 45 ret = kstrtoul(buf, 10, &bw); 46 if (ret) { 47 rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); 48 return false; 49 } 50 51 if ((bw < r->membw.min_bw || bw > r->default_ctrl) && 52 !is_mba_sc(r)) { 53 rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, 54 r->membw.min_bw, r->default_ctrl); 55 return false; 56 } 57 58 *data = roundup(bw, (unsigned long)r->membw.bw_gran); 59 return true; 60 } 61 62 int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, 63 struct rdt_domain *d) 64 { 65 struct resctrl_staged_config *cfg; 66 u32 closid = data->rdtgrp->closid; 67 struct rdt_resource *r = s->res; 68 unsigned long bw_val; 69 70 cfg = &d->staged_config[s->conf_type]; 71 if (cfg->have_new_ctrl) { 72 rdt_last_cmd_printf("Duplicate domain %d\n", d->id); 73 return -EINVAL; 74 } 75 76 if (!bw_validate(data->buf, &bw_val, r)) 77 return -EINVAL; 78 79 if (is_mba_sc(r)) { 80 d->mbps_val[closid] = bw_val; 81 return 0; 82 } 83 84 cfg->new_ctrl = bw_val; 85 cfg->have_new_ctrl = true; 86 87 return 0; 88 } 89 90 /* 91 * Check whether a cache bit mask is valid. 92 * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID: 93 * - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1 94 * - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1 95 * 96 * Haswell does not support a non-contiguous 1s value and additionally 97 * requires at least two bits set. 98 * AMD allows non-contiguous bitmasks. 99 */ 100 static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) 101 { 102 unsigned long first_bit, zero_bit, val; 103 unsigned int cbm_len = r->cache.cbm_len; 104 int ret; 105 106 ret = kstrtoul(buf, 16, &val); 107 if (ret) { 108 rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf); 109 return false; 110 } 111 112 if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) { 113 rdt_last_cmd_puts("Mask out of range\n"); 114 return false; 115 } 116 117 first_bit = find_first_bit(&val, cbm_len); 118 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); 119 120 /* Are non-contiguous bitmasks allowed? */ 121 if (!r->cache.arch_has_sparse_bitmasks && 122 (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) { 123 rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val); 124 return false; 125 } 126 127 if ((zero_bit - first_bit) < r->cache.min_cbm_bits) { 128 rdt_last_cmd_printf("Need at least %d bits in the mask\n", 129 r->cache.min_cbm_bits); 130 return false; 131 } 132 133 *data = val; 134 return true; 135 } 136 137 /* 138 * Read one cache bit mask (hex). Check that it is valid for the current 139 * resource type. 140 */ 141 int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, 142 struct rdt_domain *d) 143 { 144 struct rdtgroup *rdtgrp = data->rdtgrp; 145 struct resctrl_staged_config *cfg; 146 struct rdt_resource *r = s->res; 147 u32 cbm_val; 148 149 cfg = &d->staged_config[s->conf_type]; 150 if (cfg->have_new_ctrl) { 151 rdt_last_cmd_printf("Duplicate domain %d\n", d->id); 152 return -EINVAL; 153 } 154 155 /* 156 * Cannot set up more than one pseudo-locked region in a cache 157 * hierarchy. 158 */ 159 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && 160 rdtgroup_pseudo_locked_in_hierarchy(d)) { 161 rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n"); 162 return -EINVAL; 163 } 164 165 if (!cbm_validate(data->buf, &cbm_val, r)) 166 return -EINVAL; 167 168 if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || 169 rdtgrp->mode == RDT_MODE_SHAREABLE) && 170 rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) { 171 rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n"); 172 return -EINVAL; 173 } 174 175 /* 176 * The CBM may not overlap with the CBM of another closid if 177 * either is exclusive. 178 */ 179 if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) { 180 rdt_last_cmd_puts("Overlaps with exclusive group\n"); 181 return -EINVAL; 182 } 183 184 if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) { 185 if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || 186 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 187 rdt_last_cmd_puts("Overlaps with other group\n"); 188 return -EINVAL; 189 } 190 } 191 192 cfg->new_ctrl = cbm_val; 193 cfg->have_new_ctrl = true; 194 195 return 0; 196 } 197 198 /* 199 * For each domain in this resource we expect to find a series of: 200 * id=mask 201 * separated by ";". The "id" is in decimal, and must match one of 202 * the "id"s for this resource. 203 */ 204 static int parse_line(char *line, struct resctrl_schema *s, 205 struct rdtgroup *rdtgrp) 206 { 207 enum resctrl_conf_type t = s->conf_type; 208 struct resctrl_staged_config *cfg; 209 struct rdt_resource *r = s->res; 210 struct rdt_parse_data data; 211 char *dom = NULL, *id; 212 struct rdt_domain *d; 213 unsigned long dom_id; 214 215 /* Walking r->domains, ensure it can't race with cpuhp */ 216 lockdep_assert_cpus_held(); 217 218 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && 219 (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { 220 rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); 221 return -EINVAL; 222 } 223 224 next: 225 if (!line || line[0] == '\0') 226 return 0; 227 dom = strsep(&line, ";"); 228 id = strsep(&dom, "="); 229 if (!dom || kstrtoul(id, 10, &dom_id)) { 230 rdt_last_cmd_puts("Missing '=' or non-numeric domain\n"); 231 return -EINVAL; 232 } 233 dom = strim(dom); 234 list_for_each_entry(d, &r->domains, list) { 235 if (d->id == dom_id) { 236 data.buf = dom; 237 data.rdtgrp = rdtgrp; 238 if (r->parse_ctrlval(&data, s, d)) 239 return -EINVAL; 240 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 241 cfg = &d->staged_config[t]; 242 /* 243 * In pseudo-locking setup mode and just 244 * parsed a valid CBM that should be 245 * pseudo-locked. Only one locked region per 246 * resource group and domain so just do 247 * the required initialization for single 248 * region and return. 249 */ 250 rdtgrp->plr->s = s; 251 rdtgrp->plr->d = d; 252 rdtgrp->plr->cbm = cfg->new_ctrl; 253 d->plr = rdtgrp->plr; 254 return 0; 255 } 256 goto next; 257 } 258 } 259 return -EINVAL; 260 } 261 262 static u32 get_config_index(u32 closid, enum resctrl_conf_type type) 263 { 264 switch (type) { 265 default: 266 case CDP_NONE: 267 return closid; 268 case CDP_CODE: 269 return closid * 2 + 1; 270 case CDP_DATA: 271 return closid * 2; 272 } 273 } 274 275 static bool apply_config(struct rdt_hw_domain *hw_dom, 276 struct resctrl_staged_config *cfg, u32 idx, 277 cpumask_var_t cpu_mask) 278 { 279 struct rdt_domain *dom = &hw_dom->d_resctrl; 280 281 if (cfg->new_ctrl != hw_dom->ctrl_val[idx]) { 282 cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask); 283 hw_dom->ctrl_val[idx] = cfg->new_ctrl; 284 285 return true; 286 } 287 288 return false; 289 } 290 291 int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, 292 u32 closid, enum resctrl_conf_type t, u32 cfg_val) 293 { 294 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 295 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); 296 u32 idx = get_config_index(closid, t); 297 struct msr_param msr_param; 298 299 if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) 300 return -EINVAL; 301 302 hw_dom->ctrl_val[idx] = cfg_val; 303 304 msr_param.res = r; 305 msr_param.low = idx; 306 msr_param.high = idx + 1; 307 hw_res->msr_update(d, &msr_param, r); 308 309 return 0; 310 } 311 312 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) 313 { 314 struct resctrl_staged_config *cfg; 315 struct rdt_hw_domain *hw_dom; 316 struct msr_param msr_param; 317 enum resctrl_conf_type t; 318 cpumask_var_t cpu_mask; 319 struct rdt_domain *d; 320 u32 idx; 321 322 /* Walking r->domains, ensure it can't race with cpuhp */ 323 lockdep_assert_cpus_held(); 324 325 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) 326 return -ENOMEM; 327 328 msr_param.res = NULL; 329 list_for_each_entry(d, &r->domains, list) { 330 hw_dom = resctrl_to_arch_dom(d); 331 for (t = 0; t < CDP_NUM_TYPES; t++) { 332 cfg = &hw_dom->d_resctrl.staged_config[t]; 333 if (!cfg->have_new_ctrl) 334 continue; 335 336 idx = get_config_index(closid, t); 337 if (!apply_config(hw_dom, cfg, idx, cpu_mask)) 338 continue; 339 340 if (!msr_param.res) { 341 msr_param.low = idx; 342 msr_param.high = msr_param.low + 1; 343 msr_param.res = r; 344 } else { 345 msr_param.low = min(msr_param.low, idx); 346 msr_param.high = max(msr_param.high, idx + 1); 347 } 348 } 349 } 350 351 if (cpumask_empty(cpu_mask)) 352 goto done; 353 354 /* Update resource control msr on all the CPUs. */ 355 on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); 356 357 done: 358 free_cpumask_var(cpu_mask); 359 360 return 0; 361 } 362 363 static int rdtgroup_parse_resource(char *resname, char *tok, 364 struct rdtgroup *rdtgrp) 365 { 366 struct resctrl_schema *s; 367 368 list_for_each_entry(s, &resctrl_schema_all, list) { 369 if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid) 370 return parse_line(tok, s, rdtgrp); 371 } 372 rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname); 373 return -EINVAL; 374 } 375 376 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, 377 char *buf, size_t nbytes, loff_t off) 378 { 379 struct resctrl_schema *s; 380 struct rdtgroup *rdtgrp; 381 struct rdt_resource *r; 382 char *tok, *resname; 383 int ret = 0; 384 385 /* Valid input requires a trailing newline */ 386 if (nbytes == 0 || buf[nbytes - 1] != '\n') 387 return -EINVAL; 388 buf[nbytes - 1] = '\0'; 389 390 rdtgrp = rdtgroup_kn_lock_live(of->kn); 391 if (!rdtgrp) { 392 rdtgroup_kn_unlock(of->kn); 393 return -ENOENT; 394 } 395 rdt_last_cmd_clear(); 396 397 /* 398 * No changes to pseudo-locked region allowed. It has to be removed 399 * and re-created instead. 400 */ 401 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 402 ret = -EINVAL; 403 rdt_last_cmd_puts("Resource group is pseudo-locked\n"); 404 goto out; 405 } 406 407 rdt_staged_configs_clear(); 408 409 while ((tok = strsep(&buf, "\n")) != NULL) { 410 resname = strim(strsep(&tok, ":")); 411 if (!tok) { 412 rdt_last_cmd_puts("Missing ':'\n"); 413 ret = -EINVAL; 414 goto out; 415 } 416 if (tok[0] == '\0') { 417 rdt_last_cmd_printf("Missing '%s' value\n", resname); 418 ret = -EINVAL; 419 goto out; 420 } 421 ret = rdtgroup_parse_resource(resname, tok, rdtgrp); 422 if (ret) 423 goto out; 424 } 425 426 list_for_each_entry(s, &resctrl_schema_all, list) { 427 r = s->res; 428 429 /* 430 * Writes to mba_sc resources update the software controller, 431 * not the control MSR. 432 */ 433 if (is_mba_sc(r)) 434 continue; 435 436 ret = resctrl_arch_update_domains(r, rdtgrp->closid); 437 if (ret) 438 goto out; 439 } 440 441 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 442 /* 443 * If pseudo-locking fails we keep the resource group in 444 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service 445 * active and updated for just the domain the pseudo-locked 446 * region was requested for. 447 */ 448 ret = rdtgroup_pseudo_lock_create(rdtgrp); 449 } 450 451 out: 452 rdt_staged_configs_clear(); 453 rdtgroup_kn_unlock(of->kn); 454 return ret ?: nbytes; 455 } 456 457 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, 458 u32 closid, enum resctrl_conf_type type) 459 { 460 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); 461 u32 idx = get_config_index(closid, type); 462 463 return hw_dom->ctrl_val[idx]; 464 } 465 466 static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) 467 { 468 struct rdt_resource *r = schema->res; 469 struct rdt_domain *dom; 470 bool sep = false; 471 u32 ctrl_val; 472 473 /* Walking r->domains, ensure it can't race with cpuhp */ 474 lockdep_assert_cpus_held(); 475 476 seq_printf(s, "%*s:", max_name_width, schema->name); 477 list_for_each_entry(dom, &r->domains, list) { 478 if (sep) 479 seq_puts(s, ";"); 480 481 if (is_mba_sc(r)) 482 ctrl_val = dom->mbps_val[closid]; 483 else 484 ctrl_val = resctrl_arch_get_config(r, dom, closid, 485 schema->conf_type); 486 487 seq_printf(s, r->format_str, dom->id, max_data_width, 488 ctrl_val); 489 sep = true; 490 } 491 seq_puts(s, "\n"); 492 } 493 494 int rdtgroup_schemata_show(struct kernfs_open_file *of, 495 struct seq_file *s, void *v) 496 { 497 struct resctrl_schema *schema; 498 struct rdtgroup *rdtgrp; 499 int ret = 0; 500 u32 closid; 501 502 rdtgrp = rdtgroup_kn_lock_live(of->kn); 503 if (rdtgrp) { 504 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 505 list_for_each_entry(schema, &resctrl_schema_all, list) { 506 seq_printf(s, "%s:uninitialized\n", schema->name); 507 } 508 } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 509 if (!rdtgrp->plr->d) { 510 rdt_last_cmd_clear(); 511 rdt_last_cmd_puts("Cache domain offline\n"); 512 ret = -ENODEV; 513 } else { 514 seq_printf(s, "%s:%d=%x\n", 515 rdtgrp->plr->s->res->name, 516 rdtgrp->plr->d->id, 517 rdtgrp->plr->cbm); 518 } 519 } else { 520 closid = rdtgrp->closid; 521 list_for_each_entry(schema, &resctrl_schema_all, list) { 522 if (closid < schema->num_closid) 523 show_doms(s, schema, closid); 524 } 525 } 526 } else { 527 ret = -ENOENT; 528 } 529 rdtgroup_kn_unlock(of->kn); 530 return ret; 531 } 532 533 static int smp_mon_event_count(void *arg) 534 { 535 mon_event_count(arg); 536 537 return 0; 538 } 539 540 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, 541 struct rdt_domain *d, struct rdtgroup *rdtgrp, 542 int evtid, int first) 543 { 544 int cpu; 545 546 /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */ 547 lockdep_assert_cpus_held(); 548 549 /* 550 * Setup the parameters to pass to mon_event_count() to read the data. 551 */ 552 rr->rgrp = rdtgrp; 553 rr->evtid = evtid; 554 rr->r = r; 555 rr->d = d; 556 rr->val = 0; 557 rr->first = first; 558 rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); 559 if (IS_ERR(rr->arch_mon_ctx)) { 560 rr->err = -EINVAL; 561 return; 562 } 563 564 cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU); 565 566 /* 567 * cpumask_any_housekeeping() prefers housekeeping CPUs, but 568 * are all the CPUs nohz_full? If yes, pick a CPU to IPI. 569 * MPAM's resctrl_arch_rmid_read() is unable to read the 570 * counters on some platforms if its called in IRQ context. 571 */ 572 if (tick_nohz_full_cpu(cpu)) 573 smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1); 574 else 575 smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); 576 577 resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx); 578 } 579 580 int rdtgroup_mondata_show(struct seq_file *m, void *arg) 581 { 582 struct kernfs_open_file *of = m->private; 583 u32 resid, evtid, domid; 584 struct rdtgroup *rdtgrp; 585 struct rdt_resource *r; 586 union mon_data_bits md; 587 struct rdt_domain *d; 588 struct rmid_read rr; 589 int ret = 0; 590 591 rdtgrp = rdtgroup_kn_lock_live(of->kn); 592 if (!rdtgrp) { 593 ret = -ENOENT; 594 goto out; 595 } 596 597 md.priv = of->kn->priv; 598 resid = md.u.rid; 599 domid = md.u.domid; 600 evtid = md.u.evtid; 601 602 r = &rdt_resources_all[resid].r_resctrl; 603 d = rdt_find_domain(r, domid, NULL); 604 if (IS_ERR_OR_NULL(d)) { 605 ret = -ENOENT; 606 goto out; 607 } 608 609 mon_event_read(&rr, r, d, rdtgrp, evtid, false); 610 611 if (rr.err == -EIO) 612 seq_puts(m, "Error\n"); 613 else if (rr.err == -EINVAL) 614 seq_puts(m, "Unavailable\n"); 615 else 616 seq_printf(m, "%llu\n", rr.val); 617 618 out: 619 rdtgroup_kn_unlock(of->kn); 620 return ret; 621 } 622