1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * User interface for Resource Allocation in Resource Director Technology(RDT) 4 * 5 * Copyright (C) 2016 Intel Corporation 6 * 7 * Author: Fenghua Yu <fenghua.yu@intel.com> 8 * 9 * More information about RDT be found in the Intel (R) x86 Architecture 10 * Software Developer Manual. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/cpu.h> 16 #include <linux/debugfs.h> 17 #include <linux/fs.h> 18 #include <linux/fs_parser.h> 19 #include <linux/sysfs.h> 20 #include <linux/kernfs.h> 21 #include <linux/seq_buf.h> 22 #include <linux/seq_file.h> 23 #include <linux/sched/signal.h> 24 #include <linux/sched/task.h> 25 #include <linux/slab.h> 26 #include <linux/task_work.h> 27 #include <linux/user_namespace.h> 28 29 #include <uapi/linux/magic.h> 30 31 #include <asm/resctrl.h> 32 #include "internal.h" 33 34 DEFINE_STATIC_KEY_FALSE(rdt_enable_key); 35 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key); 36 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key); 37 38 /* Mutex to protect rdtgroup access. */ 39 DEFINE_MUTEX(rdtgroup_mutex); 40 41 static struct kernfs_root *rdt_root; 42 struct rdtgroup rdtgroup_default; 43 LIST_HEAD(rdt_all_groups); 44 45 /* list of entries for the schemata file */ 46 LIST_HEAD(resctrl_schema_all); 47 48 /* The filesystem can only be mounted once. */ 49 bool resctrl_mounted; 50 51 /* Kernel fs node for "info" directory under root */ 52 static struct kernfs_node *kn_info; 53 54 /* Kernel fs node for "mon_groups" directory under root */ 55 static struct kernfs_node *kn_mongrp; 56 57 /* Kernel fs node for "mon_data" directory under root */ 58 static struct kernfs_node *kn_mondata; 59 60 static struct seq_buf last_cmd_status; 61 static char last_cmd_status_buf[512]; 62 63 static int rdtgroup_setup_root(struct rdt_fs_context *ctx); 64 static void rdtgroup_destroy_root(void); 65 66 struct dentry *debugfs_resctrl; 67 68 /* 69 * Memory bandwidth monitoring event to use for the default CTRL_MON group 70 * and each new CTRL_MON group created by the user. Only relevant when 71 * the filesystem is mounted with the "mba_MBps" option so it does not 72 * matter that it remains uninitialized on systems that do not support 73 * the "mba_MBps" option. 74 */ 75 enum resctrl_event_id mba_mbps_default_event; 76 77 static bool resctrl_debug; 78 79 void rdt_last_cmd_clear(void) 80 { 81 lockdep_assert_held(&rdtgroup_mutex); 82 seq_buf_clear(&last_cmd_status); 83 } 84 85 void rdt_last_cmd_puts(const char *s) 86 { 87 lockdep_assert_held(&rdtgroup_mutex); 88 seq_buf_puts(&last_cmd_status, s); 89 } 90 91 void rdt_last_cmd_printf(const char *fmt, ...) 92 { 93 va_list ap; 94 95 va_start(ap, fmt); 96 lockdep_assert_held(&rdtgroup_mutex); 97 seq_buf_vprintf(&last_cmd_status, fmt, ap); 98 va_end(ap); 99 } 100 101 void rdt_staged_configs_clear(void) 102 { 103 struct rdt_ctrl_domain *dom; 104 struct rdt_resource *r; 105 106 lockdep_assert_held(&rdtgroup_mutex); 107 108 for_each_alloc_capable_rdt_resource(r) { 109 list_for_each_entry(dom, &r->ctrl_domains, hdr.list) 110 memset(dom->staged_config, 0, sizeof(dom->staged_config)); 111 } 112 } 113 114 /* 115 * Trivial allocator for CLOSIDs. Since h/w only supports a small number, 116 * we can keep a bitmap of free CLOSIDs in a single integer. 117 * 118 * Using a global CLOSID across all resources has some advantages and 119 * some drawbacks: 120 * + We can simply set current's closid to assign a task to a resource 121 * group. 122 * + Context switch code can avoid extra memory references deciding which 123 * CLOSID to load into the PQR_ASSOC MSR 124 * - We give up some options in configuring resource groups across multi-socket 125 * systems. 126 * - Our choices on how to configure each resource become progressively more 127 * limited as the number of resources grows. 128 */ 129 static unsigned long closid_free_map; 130 static int closid_free_map_len; 131 132 int closids_supported(void) 133 { 134 return closid_free_map_len; 135 } 136 137 static void closid_init(void) 138 { 139 struct resctrl_schema *s; 140 u32 rdt_min_closid = 32; 141 142 /* Compute rdt_min_closid across all resources */ 143 list_for_each_entry(s, &resctrl_schema_all, list) 144 rdt_min_closid = min(rdt_min_closid, s->num_closid); 145 146 closid_free_map = BIT_MASK(rdt_min_closid) - 1; 147 148 /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */ 149 __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map); 150 closid_free_map_len = rdt_min_closid; 151 } 152 153 static int closid_alloc(void) 154 { 155 int cleanest_closid; 156 u32 closid; 157 158 lockdep_assert_held(&rdtgroup_mutex); 159 160 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { 161 cleanest_closid = resctrl_find_cleanest_closid(); 162 if (cleanest_closid < 0) 163 return cleanest_closid; 164 closid = cleanest_closid; 165 } else { 166 closid = ffs(closid_free_map); 167 if (closid == 0) 168 return -ENOSPC; 169 closid--; 170 } 171 __clear_bit(closid, &closid_free_map); 172 173 return closid; 174 } 175 176 void closid_free(int closid) 177 { 178 lockdep_assert_held(&rdtgroup_mutex); 179 180 __set_bit(closid, &closid_free_map); 181 } 182 183 /** 184 * closid_allocated - test if provided closid is in use 185 * @closid: closid to be tested 186 * 187 * Return: true if @closid is currently associated with a resource group, 188 * false if @closid is free 189 */ 190 bool closid_allocated(unsigned int closid) 191 { 192 lockdep_assert_held(&rdtgroup_mutex); 193 194 return !test_bit(closid, &closid_free_map); 195 } 196 197 /** 198 * rdtgroup_mode_by_closid - Return mode of resource group with closid 199 * @closid: closid if the resource group 200 * 201 * Each resource group is associated with a @closid. Here the mode 202 * of a resource group can be queried by searching for it using its closid. 203 * 204 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid 205 */ 206 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid) 207 { 208 struct rdtgroup *rdtgrp; 209 210 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { 211 if (rdtgrp->closid == closid) 212 return rdtgrp->mode; 213 } 214 215 return RDT_NUM_MODES; 216 } 217 218 static const char * const rdt_mode_str[] = { 219 [RDT_MODE_SHAREABLE] = "shareable", 220 [RDT_MODE_EXCLUSIVE] = "exclusive", 221 [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup", 222 [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked", 223 }; 224 225 /** 226 * rdtgroup_mode_str - Return the string representation of mode 227 * @mode: the resource group mode as &enum rdtgroup_mode 228 * 229 * Return: string representation of valid mode, "unknown" otherwise 230 */ 231 static const char *rdtgroup_mode_str(enum rdtgrp_mode mode) 232 { 233 if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES) 234 return "unknown"; 235 236 return rdt_mode_str[mode]; 237 } 238 239 /* set uid and gid of rdtgroup dirs and files to that of the creator */ 240 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) 241 { 242 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, 243 .ia_uid = current_fsuid(), 244 .ia_gid = current_fsgid(), }; 245 246 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && 247 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) 248 return 0; 249 250 return kernfs_setattr(kn, &iattr); 251 } 252 253 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) 254 { 255 struct kernfs_node *kn; 256 int ret; 257 258 kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, 259 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 260 0, rft->kf_ops, rft, NULL, NULL); 261 if (IS_ERR(kn)) 262 return PTR_ERR(kn); 263 264 ret = rdtgroup_kn_set_ugid(kn); 265 if (ret) { 266 kernfs_remove(kn); 267 return ret; 268 } 269 270 return 0; 271 } 272 273 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) 274 { 275 struct kernfs_open_file *of = m->private; 276 struct rftype *rft = of->kn->priv; 277 278 if (rft->seq_show) 279 return rft->seq_show(of, m, arg); 280 return 0; 281 } 282 283 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, 284 size_t nbytes, loff_t off) 285 { 286 struct rftype *rft = of->kn->priv; 287 288 if (rft->write) 289 return rft->write(of, buf, nbytes, off); 290 291 return -EINVAL; 292 } 293 294 static const struct kernfs_ops rdtgroup_kf_single_ops = { 295 .atomic_write_len = PAGE_SIZE, 296 .write = rdtgroup_file_write, 297 .seq_show = rdtgroup_seqfile_show, 298 }; 299 300 static const struct kernfs_ops kf_mondata_ops = { 301 .atomic_write_len = PAGE_SIZE, 302 .seq_show = rdtgroup_mondata_show, 303 }; 304 305 static bool is_cpu_list(struct kernfs_open_file *of) 306 { 307 struct rftype *rft = of->kn->priv; 308 309 return rft->flags & RFTYPE_FLAGS_CPUS_LIST; 310 } 311 312 static int rdtgroup_cpus_show(struct kernfs_open_file *of, 313 struct seq_file *s, void *v) 314 { 315 struct rdtgroup *rdtgrp; 316 struct cpumask *mask; 317 int ret = 0; 318 319 rdtgrp = rdtgroup_kn_lock_live(of->kn); 320 321 if (rdtgrp) { 322 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 323 if (!rdtgrp->plr->d) { 324 rdt_last_cmd_clear(); 325 rdt_last_cmd_puts("Cache domain offline\n"); 326 ret = -ENODEV; 327 } else { 328 mask = &rdtgrp->plr->d->hdr.cpu_mask; 329 seq_printf(s, is_cpu_list(of) ? 330 "%*pbl\n" : "%*pb\n", 331 cpumask_pr_args(mask)); 332 } 333 } else { 334 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", 335 cpumask_pr_args(&rdtgrp->cpu_mask)); 336 } 337 } else { 338 ret = -ENOENT; 339 } 340 rdtgroup_kn_unlock(of->kn); 341 342 return ret; 343 } 344 345 /* 346 * This is safe against resctrl_sched_in() called from __switch_to() 347 * because __switch_to() is executed with interrupts disabled. A local call 348 * from update_closid_rmid() is protected against __switch_to() because 349 * preemption is disabled. 350 */ 351 static void update_cpu_closid_rmid(void *info) 352 { 353 struct rdtgroup *r = info; 354 355 if (r) { 356 this_cpu_write(pqr_state.default_closid, r->closid); 357 this_cpu_write(pqr_state.default_rmid, r->mon.rmid); 358 } 359 360 /* 361 * We cannot unconditionally write the MSR because the current 362 * executing task might have its own closid selected. Just reuse 363 * the context switch code. 364 */ 365 resctrl_sched_in(current); 366 } 367 368 /* 369 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, 370 * 371 * Per task closids/rmids must have been set up before calling this function. 372 */ 373 static void 374 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) 375 { 376 on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1); 377 } 378 379 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, 380 cpumask_var_t tmpmask) 381 { 382 struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp; 383 struct list_head *head; 384 385 /* Check whether cpus belong to parent ctrl group */ 386 cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); 387 if (!cpumask_empty(tmpmask)) { 388 rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n"); 389 return -EINVAL; 390 } 391 392 /* Check whether cpus are dropped from this group */ 393 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); 394 if (!cpumask_empty(tmpmask)) { 395 /* Give any dropped cpus to parent rdtgroup */ 396 cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask); 397 update_closid_rmid(tmpmask, prgrp); 398 } 399 400 /* 401 * If we added cpus, remove them from previous group that owned them 402 * and update per-cpu rmid 403 */ 404 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); 405 if (!cpumask_empty(tmpmask)) { 406 head = &prgrp->mon.crdtgrp_list; 407 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 408 if (crgrp == rdtgrp) 409 continue; 410 cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask, 411 tmpmask); 412 } 413 update_closid_rmid(tmpmask, rdtgrp); 414 } 415 416 /* Done pushing/pulling - update this group with new mask */ 417 cpumask_copy(&rdtgrp->cpu_mask, newmask); 418 419 return 0; 420 } 421 422 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m) 423 { 424 struct rdtgroup *crgrp; 425 426 cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m); 427 /* update the child mon group masks as well*/ 428 list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list) 429 cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask); 430 } 431 432 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, 433 cpumask_var_t tmpmask, cpumask_var_t tmpmask1) 434 { 435 struct rdtgroup *r, *crgrp; 436 struct list_head *head; 437 438 /* Check whether cpus are dropped from this group */ 439 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); 440 if (!cpumask_empty(tmpmask)) { 441 /* Can't drop from default group */ 442 if (rdtgrp == &rdtgroup_default) { 443 rdt_last_cmd_puts("Can't drop CPUs from default group\n"); 444 return -EINVAL; 445 } 446 447 /* Give any dropped cpus to rdtgroup_default */ 448 cpumask_or(&rdtgroup_default.cpu_mask, 449 &rdtgroup_default.cpu_mask, tmpmask); 450 update_closid_rmid(tmpmask, &rdtgroup_default); 451 } 452 453 /* 454 * If we added cpus, remove them from previous group and 455 * the prev group's child groups that owned them 456 * and update per-cpu closid/rmid. 457 */ 458 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); 459 if (!cpumask_empty(tmpmask)) { 460 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { 461 if (r == rdtgrp) 462 continue; 463 cpumask_and(tmpmask1, &r->cpu_mask, tmpmask); 464 if (!cpumask_empty(tmpmask1)) 465 cpumask_rdtgrp_clear(r, tmpmask1); 466 } 467 update_closid_rmid(tmpmask, rdtgrp); 468 } 469 470 /* Done pushing/pulling - update this group with new mask */ 471 cpumask_copy(&rdtgrp->cpu_mask, newmask); 472 473 /* 474 * Clear child mon group masks since there is a new parent mask 475 * now and update the rmid for the cpus the child lost. 476 */ 477 head = &rdtgrp->mon.crdtgrp_list; 478 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 479 cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask); 480 update_closid_rmid(tmpmask, rdtgrp); 481 cpumask_clear(&crgrp->cpu_mask); 482 } 483 484 return 0; 485 } 486 487 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, 488 char *buf, size_t nbytes, loff_t off) 489 { 490 cpumask_var_t tmpmask, newmask, tmpmask1; 491 struct rdtgroup *rdtgrp; 492 int ret; 493 494 if (!buf) 495 return -EINVAL; 496 497 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) 498 return -ENOMEM; 499 if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { 500 free_cpumask_var(tmpmask); 501 return -ENOMEM; 502 } 503 if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) { 504 free_cpumask_var(tmpmask); 505 free_cpumask_var(newmask); 506 return -ENOMEM; 507 } 508 509 rdtgrp = rdtgroup_kn_lock_live(of->kn); 510 if (!rdtgrp) { 511 ret = -ENOENT; 512 goto unlock; 513 } 514 515 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || 516 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 517 ret = -EINVAL; 518 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 519 goto unlock; 520 } 521 522 if (is_cpu_list(of)) 523 ret = cpulist_parse(buf, newmask); 524 else 525 ret = cpumask_parse(buf, newmask); 526 527 if (ret) { 528 rdt_last_cmd_puts("Bad CPU list/mask\n"); 529 goto unlock; 530 } 531 532 /* check that user didn't specify any offline cpus */ 533 cpumask_andnot(tmpmask, newmask, cpu_online_mask); 534 if (!cpumask_empty(tmpmask)) { 535 ret = -EINVAL; 536 rdt_last_cmd_puts("Can only assign online CPUs\n"); 537 goto unlock; 538 } 539 540 if (rdtgrp->type == RDTCTRL_GROUP) 541 ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1); 542 else if (rdtgrp->type == RDTMON_GROUP) 543 ret = cpus_mon_write(rdtgrp, newmask, tmpmask); 544 else 545 ret = -EINVAL; 546 547 unlock: 548 rdtgroup_kn_unlock(of->kn); 549 free_cpumask_var(tmpmask); 550 free_cpumask_var(newmask); 551 free_cpumask_var(tmpmask1); 552 553 return ret ?: nbytes; 554 } 555 556 /** 557 * rdtgroup_remove - the helper to remove resource group safely 558 * @rdtgrp: resource group to remove 559 * 560 * On resource group creation via a mkdir, an extra kernfs_node reference is 561 * taken to ensure that the rdtgroup structure remains accessible for the 562 * rdtgroup_kn_unlock() calls where it is removed. 563 * 564 * Drop the extra reference here, then free the rdtgroup structure. 565 * 566 * Return: void 567 */ 568 static void rdtgroup_remove(struct rdtgroup *rdtgrp) 569 { 570 kernfs_put(rdtgrp->kn); 571 kfree(rdtgrp); 572 } 573 574 static void _update_task_closid_rmid(void *task) 575 { 576 /* 577 * If the task is still current on this CPU, update PQR_ASSOC MSR. 578 * Otherwise, the MSR is updated when the task is scheduled in. 579 */ 580 if (task == current) 581 resctrl_sched_in(task); 582 } 583 584 static void update_task_closid_rmid(struct task_struct *t) 585 { 586 if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) 587 smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); 588 else 589 _update_task_closid_rmid(t); 590 } 591 592 static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp) 593 { 594 u32 closid, rmid = rdtgrp->mon.rmid; 595 596 if (rdtgrp->type == RDTCTRL_GROUP) 597 closid = rdtgrp->closid; 598 else if (rdtgrp->type == RDTMON_GROUP) 599 closid = rdtgrp->mon.parent->closid; 600 else 601 return false; 602 603 return resctrl_arch_match_closid(tsk, closid) && 604 resctrl_arch_match_rmid(tsk, closid, rmid); 605 } 606 607 static int __rdtgroup_move_task(struct task_struct *tsk, 608 struct rdtgroup *rdtgrp) 609 { 610 /* If the task is already in rdtgrp, no need to move the task. */ 611 if (task_in_rdtgroup(tsk, rdtgrp)) 612 return 0; 613 614 /* 615 * Set the task's closid/rmid before the PQR_ASSOC MSR can be 616 * updated by them. 617 * 618 * For ctrl_mon groups, move both closid and rmid. 619 * For monitor groups, can move the tasks only from 620 * their parent CTRL group. 621 */ 622 if (rdtgrp->type == RDTMON_GROUP && 623 !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) { 624 rdt_last_cmd_puts("Can't move task to different control group\n"); 625 return -EINVAL; 626 } 627 628 if (rdtgrp->type == RDTMON_GROUP) 629 resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid, 630 rdtgrp->mon.rmid); 631 else 632 resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid, 633 rdtgrp->mon.rmid); 634 635 /* 636 * Ensure the task's closid and rmid are written before determining if 637 * the task is current that will decide if it will be interrupted. 638 * This pairs with the full barrier between the rq->curr update and 639 * resctrl_sched_in() during context switch. 640 */ 641 smp_mb(); 642 643 /* 644 * By now, the task's closid and rmid are set. If the task is current 645 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource 646 * group go into effect. If the task is not current, the MSR will be 647 * updated when the task is scheduled in. 648 */ 649 update_task_closid_rmid(tsk); 650 651 return 0; 652 } 653 654 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) 655 { 656 return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) && 657 resctrl_arch_match_closid(t, r->closid)); 658 } 659 660 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) 661 { 662 return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) && 663 resctrl_arch_match_rmid(t, r->mon.parent->closid, 664 r->mon.rmid)); 665 } 666 667 /** 668 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group 669 * @r: Resource group 670 * 671 * Return: 1 if tasks have been assigned to @r, 0 otherwise 672 */ 673 int rdtgroup_tasks_assigned(struct rdtgroup *r) 674 { 675 struct task_struct *p, *t; 676 int ret = 0; 677 678 lockdep_assert_held(&rdtgroup_mutex); 679 680 rcu_read_lock(); 681 for_each_process_thread(p, t) { 682 if (is_closid_match(t, r) || is_rmid_match(t, r)) { 683 ret = 1; 684 break; 685 } 686 } 687 rcu_read_unlock(); 688 689 return ret; 690 } 691 692 static int rdtgroup_task_write_permission(struct task_struct *task, 693 struct kernfs_open_file *of) 694 { 695 const struct cred *tcred = get_task_cred(task); 696 const struct cred *cred = current_cred(); 697 int ret = 0; 698 699 /* 700 * Even if we're attaching all tasks in the thread group, we only 701 * need to check permissions on one of them. 702 */ 703 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && 704 !uid_eq(cred->euid, tcred->uid) && 705 !uid_eq(cred->euid, tcred->suid)) { 706 rdt_last_cmd_printf("No permission to move task %d\n", task->pid); 707 ret = -EPERM; 708 } 709 710 put_cred(tcred); 711 return ret; 712 } 713 714 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, 715 struct kernfs_open_file *of) 716 { 717 struct task_struct *tsk; 718 int ret; 719 720 rcu_read_lock(); 721 if (pid) { 722 tsk = find_task_by_vpid(pid); 723 if (!tsk) { 724 rcu_read_unlock(); 725 rdt_last_cmd_printf("No task %d\n", pid); 726 return -ESRCH; 727 } 728 } else { 729 tsk = current; 730 } 731 732 get_task_struct(tsk); 733 rcu_read_unlock(); 734 735 ret = rdtgroup_task_write_permission(tsk, of); 736 if (!ret) 737 ret = __rdtgroup_move_task(tsk, rdtgrp); 738 739 put_task_struct(tsk); 740 return ret; 741 } 742 743 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, 744 char *buf, size_t nbytes, loff_t off) 745 { 746 struct rdtgroup *rdtgrp; 747 char *pid_str; 748 int ret = 0; 749 pid_t pid; 750 751 rdtgrp = rdtgroup_kn_lock_live(of->kn); 752 if (!rdtgrp) { 753 rdtgroup_kn_unlock(of->kn); 754 return -ENOENT; 755 } 756 rdt_last_cmd_clear(); 757 758 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || 759 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 760 ret = -EINVAL; 761 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 762 goto unlock; 763 } 764 765 while (buf && buf[0] != '\0' && buf[0] != '\n') { 766 pid_str = strim(strsep(&buf, ",")); 767 768 if (kstrtoint(pid_str, 0, &pid)) { 769 rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str); 770 ret = -EINVAL; 771 break; 772 } 773 774 if (pid < 0) { 775 rdt_last_cmd_printf("Invalid pid %d\n", pid); 776 ret = -EINVAL; 777 break; 778 } 779 780 ret = rdtgroup_move_task(pid, rdtgrp, of); 781 if (ret) { 782 rdt_last_cmd_printf("Error while processing task %d\n", pid); 783 break; 784 } 785 } 786 787 unlock: 788 rdtgroup_kn_unlock(of->kn); 789 790 return ret ?: nbytes; 791 } 792 793 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) 794 { 795 struct task_struct *p, *t; 796 pid_t pid; 797 798 rcu_read_lock(); 799 for_each_process_thread(p, t) { 800 if (is_closid_match(t, r) || is_rmid_match(t, r)) { 801 pid = task_pid_vnr(t); 802 if (pid) 803 seq_printf(s, "%d\n", pid); 804 } 805 } 806 rcu_read_unlock(); 807 } 808 809 static int rdtgroup_tasks_show(struct kernfs_open_file *of, 810 struct seq_file *s, void *v) 811 { 812 struct rdtgroup *rdtgrp; 813 int ret = 0; 814 815 rdtgrp = rdtgroup_kn_lock_live(of->kn); 816 if (rdtgrp) 817 show_rdt_tasks(rdtgrp, s); 818 else 819 ret = -ENOENT; 820 rdtgroup_kn_unlock(of->kn); 821 822 return ret; 823 } 824 825 static int rdtgroup_closid_show(struct kernfs_open_file *of, 826 struct seq_file *s, void *v) 827 { 828 struct rdtgroup *rdtgrp; 829 int ret = 0; 830 831 rdtgrp = rdtgroup_kn_lock_live(of->kn); 832 if (rdtgrp) 833 seq_printf(s, "%u\n", rdtgrp->closid); 834 else 835 ret = -ENOENT; 836 rdtgroup_kn_unlock(of->kn); 837 838 return ret; 839 } 840 841 static int rdtgroup_rmid_show(struct kernfs_open_file *of, 842 struct seq_file *s, void *v) 843 { 844 struct rdtgroup *rdtgrp; 845 int ret = 0; 846 847 rdtgrp = rdtgroup_kn_lock_live(of->kn); 848 if (rdtgrp) 849 seq_printf(s, "%u\n", rdtgrp->mon.rmid); 850 else 851 ret = -ENOENT; 852 rdtgroup_kn_unlock(of->kn); 853 854 return ret; 855 } 856 857 #ifdef CONFIG_PROC_CPU_RESCTRL 858 859 /* 860 * A task can only be part of one resctrl control group and of one monitor 861 * group which is associated to that control group. 862 * 863 * 1) res: 864 * mon: 865 * 866 * resctrl is not available. 867 * 868 * 2) res:/ 869 * mon: 870 * 871 * Task is part of the root resctrl control group, and it is not associated 872 * to any monitor group. 873 * 874 * 3) res:/ 875 * mon:mon0 876 * 877 * Task is part of the root resctrl control group and monitor group mon0. 878 * 879 * 4) res:group0 880 * mon: 881 * 882 * Task is part of resctrl control group group0, and it is not associated 883 * to any monitor group. 884 * 885 * 5) res:group0 886 * mon:mon1 887 * 888 * Task is part of resctrl control group group0 and monitor group mon1. 889 */ 890 int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, 891 struct pid *pid, struct task_struct *tsk) 892 { 893 struct rdtgroup *rdtg; 894 int ret = 0; 895 896 mutex_lock(&rdtgroup_mutex); 897 898 /* Return empty if resctrl has not been mounted. */ 899 if (!resctrl_mounted) { 900 seq_puts(s, "res:\nmon:\n"); 901 goto unlock; 902 } 903 904 list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) { 905 struct rdtgroup *crg; 906 907 /* 908 * Task information is only relevant for shareable 909 * and exclusive groups. 910 */ 911 if (rdtg->mode != RDT_MODE_SHAREABLE && 912 rdtg->mode != RDT_MODE_EXCLUSIVE) 913 continue; 914 915 if (!resctrl_arch_match_closid(tsk, rdtg->closid)) 916 continue; 917 918 seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", 919 rdt_kn_name(rdtg->kn)); 920 seq_puts(s, "mon:"); 921 list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, 922 mon.crdtgrp_list) { 923 if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid, 924 crg->mon.rmid)) 925 continue; 926 seq_printf(s, "%s", rdt_kn_name(crg->kn)); 927 break; 928 } 929 seq_putc(s, '\n'); 930 goto unlock; 931 } 932 /* 933 * The above search should succeed. Otherwise return 934 * with an error. 935 */ 936 ret = -ENOENT; 937 unlock: 938 mutex_unlock(&rdtgroup_mutex); 939 940 return ret; 941 } 942 #endif 943 944 static int rdt_last_cmd_status_show(struct kernfs_open_file *of, 945 struct seq_file *seq, void *v) 946 { 947 int len; 948 949 mutex_lock(&rdtgroup_mutex); 950 len = seq_buf_used(&last_cmd_status); 951 if (len) 952 seq_printf(seq, "%.*s", len, last_cmd_status_buf); 953 else 954 seq_puts(seq, "ok\n"); 955 mutex_unlock(&rdtgroup_mutex); 956 return 0; 957 } 958 959 static void *rdt_kn_parent_priv(struct kernfs_node *kn) 960 { 961 /* 962 * The parent pointer is only valid within RCU section since it can be 963 * replaced. 964 */ 965 guard(rcu)(); 966 return rcu_dereference(kn->__parent)->priv; 967 } 968 969 static int rdt_num_closids_show(struct kernfs_open_file *of, 970 struct seq_file *seq, void *v) 971 { 972 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 973 974 seq_printf(seq, "%u\n", s->num_closid); 975 return 0; 976 } 977 978 static int rdt_default_ctrl_show(struct kernfs_open_file *of, 979 struct seq_file *seq, void *v) 980 { 981 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 982 struct rdt_resource *r = s->res; 983 984 seq_printf(seq, "%x\n", r->default_ctrl); 985 return 0; 986 } 987 988 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, 989 struct seq_file *seq, void *v) 990 { 991 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 992 struct rdt_resource *r = s->res; 993 994 seq_printf(seq, "%u\n", r->cache.min_cbm_bits); 995 return 0; 996 } 997 998 static int rdt_shareable_bits_show(struct kernfs_open_file *of, 999 struct seq_file *seq, void *v) 1000 { 1001 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1002 struct rdt_resource *r = s->res; 1003 1004 seq_printf(seq, "%x\n", r->cache.shareable_bits); 1005 return 0; 1006 } 1007 1008 /* 1009 * rdt_bit_usage_show - Display current usage of resources 1010 * 1011 * A domain is a shared resource that can now be allocated differently. Here 1012 * we display the current regions of the domain as an annotated bitmask. 1013 * For each domain of this resource its allocation bitmask 1014 * is annotated as below to indicate the current usage of the corresponding bit: 1015 * 0 - currently unused 1016 * X - currently available for sharing and used by software and hardware 1017 * H - currently used by hardware only but available for software use 1018 * S - currently used and shareable by software only 1019 * E - currently used exclusively by one resource group 1020 * P - currently pseudo-locked by one resource group 1021 */ 1022 static int rdt_bit_usage_show(struct kernfs_open_file *of, 1023 struct seq_file *seq, void *v) 1024 { 1025 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1026 /* 1027 * Use unsigned long even though only 32 bits are used to ensure 1028 * test_bit() is used safely. 1029 */ 1030 unsigned long sw_shareable = 0, hw_shareable = 0; 1031 unsigned long exclusive = 0, pseudo_locked = 0; 1032 struct rdt_resource *r = s->res; 1033 struct rdt_ctrl_domain *dom; 1034 int i, hwb, swb, excl, psl; 1035 enum rdtgrp_mode mode; 1036 bool sep = false; 1037 u32 ctrl_val; 1038 1039 cpus_read_lock(); 1040 mutex_lock(&rdtgroup_mutex); 1041 hw_shareable = r->cache.shareable_bits; 1042 list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { 1043 if (sep) 1044 seq_putc(seq, ';'); 1045 sw_shareable = 0; 1046 exclusive = 0; 1047 seq_printf(seq, "%d=", dom->hdr.id); 1048 for (i = 0; i < closids_supported(); i++) { 1049 if (!closid_allocated(i)) 1050 continue; 1051 ctrl_val = resctrl_arch_get_config(r, dom, i, 1052 s->conf_type); 1053 mode = rdtgroup_mode_by_closid(i); 1054 switch (mode) { 1055 case RDT_MODE_SHAREABLE: 1056 sw_shareable |= ctrl_val; 1057 break; 1058 case RDT_MODE_EXCLUSIVE: 1059 exclusive |= ctrl_val; 1060 break; 1061 case RDT_MODE_PSEUDO_LOCKSETUP: 1062 /* 1063 * RDT_MODE_PSEUDO_LOCKSETUP is possible 1064 * here but not included since the CBM 1065 * associated with this CLOSID in this mode 1066 * is not initialized and no task or cpu can be 1067 * assigned this CLOSID. 1068 */ 1069 break; 1070 case RDT_MODE_PSEUDO_LOCKED: 1071 case RDT_NUM_MODES: 1072 WARN(1, 1073 "invalid mode for closid %d\n", i); 1074 break; 1075 } 1076 } 1077 for (i = r->cache.cbm_len - 1; i >= 0; i--) { 1078 pseudo_locked = dom->plr ? dom->plr->cbm : 0; 1079 hwb = test_bit(i, &hw_shareable); 1080 swb = test_bit(i, &sw_shareable); 1081 excl = test_bit(i, &exclusive); 1082 psl = test_bit(i, &pseudo_locked); 1083 if (hwb && swb) 1084 seq_putc(seq, 'X'); 1085 else if (hwb && !swb) 1086 seq_putc(seq, 'H'); 1087 else if (!hwb && swb) 1088 seq_putc(seq, 'S'); 1089 else if (excl) 1090 seq_putc(seq, 'E'); 1091 else if (psl) 1092 seq_putc(seq, 'P'); 1093 else /* Unused bits remain */ 1094 seq_putc(seq, '0'); 1095 } 1096 sep = true; 1097 } 1098 seq_putc(seq, '\n'); 1099 mutex_unlock(&rdtgroup_mutex); 1100 cpus_read_unlock(); 1101 return 0; 1102 } 1103 1104 static int rdt_min_bw_show(struct kernfs_open_file *of, 1105 struct seq_file *seq, void *v) 1106 { 1107 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1108 struct rdt_resource *r = s->res; 1109 1110 seq_printf(seq, "%u\n", r->membw.min_bw); 1111 return 0; 1112 } 1113 1114 static int rdt_num_rmids_show(struct kernfs_open_file *of, 1115 struct seq_file *seq, void *v) 1116 { 1117 struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1118 1119 seq_printf(seq, "%d\n", r->num_rmid); 1120 1121 return 0; 1122 } 1123 1124 static int rdt_mon_features_show(struct kernfs_open_file *of, 1125 struct seq_file *seq, void *v) 1126 { 1127 struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1128 struct mon_evt *mevt; 1129 1130 list_for_each_entry(mevt, &r->evt_list, list) { 1131 seq_printf(seq, "%s\n", mevt->name); 1132 if (mevt->configurable) 1133 seq_printf(seq, "%s_config\n", mevt->name); 1134 } 1135 1136 return 0; 1137 } 1138 1139 static int rdt_bw_gran_show(struct kernfs_open_file *of, 1140 struct seq_file *seq, void *v) 1141 { 1142 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1143 struct rdt_resource *r = s->res; 1144 1145 seq_printf(seq, "%u\n", r->membw.bw_gran); 1146 return 0; 1147 } 1148 1149 static int rdt_delay_linear_show(struct kernfs_open_file *of, 1150 struct seq_file *seq, void *v) 1151 { 1152 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1153 struct rdt_resource *r = s->res; 1154 1155 seq_printf(seq, "%u\n", r->membw.delay_linear); 1156 return 0; 1157 } 1158 1159 static int max_threshold_occ_show(struct kernfs_open_file *of, 1160 struct seq_file *seq, void *v) 1161 { 1162 seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold); 1163 1164 return 0; 1165 } 1166 1167 static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, 1168 struct seq_file *seq, void *v) 1169 { 1170 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1171 struct rdt_resource *r = s->res; 1172 1173 if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) 1174 seq_puts(seq, "per-thread\n"); 1175 else 1176 seq_puts(seq, "max\n"); 1177 1178 return 0; 1179 } 1180 1181 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, 1182 char *buf, size_t nbytes, loff_t off) 1183 { 1184 unsigned int bytes; 1185 int ret; 1186 1187 ret = kstrtouint(buf, 0, &bytes); 1188 if (ret) 1189 return ret; 1190 1191 if (bytes > resctrl_rmid_realloc_limit) 1192 return -EINVAL; 1193 1194 resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes); 1195 1196 return nbytes; 1197 } 1198 1199 /* 1200 * rdtgroup_mode_show - Display mode of this resource group 1201 */ 1202 static int rdtgroup_mode_show(struct kernfs_open_file *of, 1203 struct seq_file *s, void *v) 1204 { 1205 struct rdtgroup *rdtgrp; 1206 1207 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1208 if (!rdtgrp) { 1209 rdtgroup_kn_unlock(of->kn); 1210 return -ENOENT; 1211 } 1212 1213 seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode)); 1214 1215 rdtgroup_kn_unlock(of->kn); 1216 return 0; 1217 } 1218 1219 static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) 1220 { 1221 switch (my_type) { 1222 case CDP_CODE: 1223 return CDP_DATA; 1224 case CDP_DATA: 1225 return CDP_CODE; 1226 default: 1227 case CDP_NONE: 1228 return CDP_NONE; 1229 } 1230 } 1231 1232 static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, 1233 struct seq_file *seq, void *v) 1234 { 1235 struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1236 struct rdt_resource *r = s->res; 1237 1238 seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); 1239 1240 return 0; 1241 } 1242 1243 /** 1244 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other 1245 * @r: Resource to which domain instance @d belongs. 1246 * @d: The domain instance for which @closid is being tested. 1247 * @cbm: Capacity bitmask being tested. 1248 * @closid: Intended closid for @cbm. 1249 * @type: CDP type of @r. 1250 * @exclusive: Only check if overlaps with exclusive resource groups 1251 * 1252 * Checks if provided @cbm intended to be used for @closid on domain 1253 * @d overlaps with any other closids or other hardware usage associated 1254 * with this domain. If @exclusive is true then only overlaps with 1255 * resource groups in exclusive mode will be considered. If @exclusive 1256 * is false then overlaps with any resource group or hardware entities 1257 * will be considered. 1258 * 1259 * @cbm is unsigned long, even if only 32 bits are used, to make the 1260 * bitmap functions work correctly. 1261 * 1262 * Return: false if CBM does not overlap, true if it does. 1263 */ 1264 static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d, 1265 unsigned long cbm, int closid, 1266 enum resctrl_conf_type type, bool exclusive) 1267 { 1268 enum rdtgrp_mode mode; 1269 unsigned long ctrl_b; 1270 int i; 1271 1272 /* Check for any overlap with regions used by hardware directly */ 1273 if (!exclusive) { 1274 ctrl_b = r->cache.shareable_bits; 1275 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) 1276 return true; 1277 } 1278 1279 /* Check for overlap with other resource groups */ 1280 for (i = 0; i < closids_supported(); i++) { 1281 ctrl_b = resctrl_arch_get_config(r, d, i, type); 1282 mode = rdtgroup_mode_by_closid(i); 1283 if (closid_allocated(i) && i != closid && 1284 mode != RDT_MODE_PSEUDO_LOCKSETUP) { 1285 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { 1286 if (exclusive) { 1287 if (mode == RDT_MODE_EXCLUSIVE) 1288 return true; 1289 continue; 1290 } 1291 return true; 1292 } 1293 } 1294 } 1295 1296 return false; 1297 } 1298 1299 /** 1300 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware 1301 * @s: Schema for the resource to which domain instance @d belongs. 1302 * @d: The domain instance for which @closid is being tested. 1303 * @cbm: Capacity bitmask being tested. 1304 * @closid: Intended closid for @cbm. 1305 * @exclusive: Only check if overlaps with exclusive resource groups 1306 * 1307 * Resources that can be allocated using a CBM can use the CBM to control 1308 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test 1309 * for overlap. Overlap test is not limited to the specific resource for 1310 * which the CBM is intended though - when dealing with CDP resources that 1311 * share the underlying hardware the overlap check should be performed on 1312 * the CDP resource sharing the hardware also. 1313 * 1314 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the 1315 * overlap test. 1316 * 1317 * Return: true if CBM overlap detected, false if there is no overlap 1318 */ 1319 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d, 1320 unsigned long cbm, int closid, bool exclusive) 1321 { 1322 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); 1323 struct rdt_resource *r = s->res; 1324 1325 if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type, 1326 exclusive)) 1327 return true; 1328 1329 if (!resctrl_arch_get_cdp_enabled(r->rid)) 1330 return false; 1331 return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive); 1332 } 1333 1334 /** 1335 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive 1336 * @rdtgrp: Resource group identified through its closid. 1337 * 1338 * An exclusive resource group implies that there should be no sharing of 1339 * its allocated resources. At the time this group is considered to be 1340 * exclusive this test can determine if its current schemata supports this 1341 * setting by testing for overlap with all other resource groups. 1342 * 1343 * Return: true if resource group can be exclusive, false if there is overlap 1344 * with allocations of other resource groups and thus this resource group 1345 * cannot be exclusive. 1346 */ 1347 static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) 1348 { 1349 int closid = rdtgrp->closid; 1350 struct rdt_ctrl_domain *d; 1351 struct resctrl_schema *s; 1352 struct rdt_resource *r; 1353 bool has_cache = false; 1354 u32 ctrl; 1355 1356 /* Walking r->domains, ensure it can't race with cpuhp */ 1357 lockdep_assert_cpus_held(); 1358 1359 list_for_each_entry(s, &resctrl_schema_all, list) { 1360 r = s->res; 1361 if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) 1362 continue; 1363 has_cache = true; 1364 list_for_each_entry(d, &r->ctrl_domains, hdr.list) { 1365 ctrl = resctrl_arch_get_config(r, d, closid, 1366 s->conf_type); 1367 if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { 1368 rdt_last_cmd_puts("Schemata overlaps\n"); 1369 return false; 1370 } 1371 } 1372 } 1373 1374 if (!has_cache) { 1375 rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n"); 1376 return false; 1377 } 1378 1379 return true; 1380 } 1381 1382 /* 1383 * rdtgroup_mode_write - Modify the resource group's mode 1384 */ 1385 static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, 1386 char *buf, size_t nbytes, loff_t off) 1387 { 1388 struct rdtgroup *rdtgrp; 1389 enum rdtgrp_mode mode; 1390 int ret = 0; 1391 1392 /* Valid input requires a trailing newline */ 1393 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1394 return -EINVAL; 1395 buf[nbytes - 1] = '\0'; 1396 1397 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1398 if (!rdtgrp) { 1399 rdtgroup_kn_unlock(of->kn); 1400 return -ENOENT; 1401 } 1402 1403 rdt_last_cmd_clear(); 1404 1405 mode = rdtgrp->mode; 1406 1407 if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) || 1408 (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) || 1409 (!strcmp(buf, "pseudo-locksetup") && 1410 mode == RDT_MODE_PSEUDO_LOCKSETUP) || 1411 (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED)) 1412 goto out; 1413 1414 if (mode == RDT_MODE_PSEUDO_LOCKED) { 1415 rdt_last_cmd_puts("Cannot change pseudo-locked group\n"); 1416 ret = -EINVAL; 1417 goto out; 1418 } 1419 1420 if (!strcmp(buf, "shareable")) { 1421 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1422 ret = rdtgroup_locksetup_exit(rdtgrp); 1423 if (ret) 1424 goto out; 1425 } 1426 rdtgrp->mode = RDT_MODE_SHAREABLE; 1427 } else if (!strcmp(buf, "exclusive")) { 1428 if (!rdtgroup_mode_test_exclusive(rdtgrp)) { 1429 ret = -EINVAL; 1430 goto out; 1431 } 1432 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1433 ret = rdtgroup_locksetup_exit(rdtgrp); 1434 if (ret) 1435 goto out; 1436 } 1437 rdtgrp->mode = RDT_MODE_EXCLUSIVE; 1438 } else if (!strcmp(buf, "pseudo-locksetup")) { 1439 ret = rdtgroup_locksetup_enter(rdtgrp); 1440 if (ret) 1441 goto out; 1442 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP; 1443 } else { 1444 rdt_last_cmd_puts("Unknown or unsupported mode\n"); 1445 ret = -EINVAL; 1446 } 1447 1448 out: 1449 rdtgroup_kn_unlock(of->kn); 1450 return ret ?: nbytes; 1451 } 1452 1453 /** 1454 * rdtgroup_cbm_to_size - Translate CBM to size in bytes 1455 * @r: RDT resource to which @d belongs. 1456 * @d: RDT domain instance. 1457 * @cbm: bitmask for which the size should be computed. 1458 * 1459 * The bitmask provided associated with the RDT domain instance @d will be 1460 * translated into how many bytes it represents. The size in bytes is 1461 * computed by first dividing the total cache size by the CBM length to 1462 * determine how many bytes each bit in the bitmask represents. The result 1463 * is multiplied with the number of bits set in the bitmask. 1464 * 1465 * @cbm is unsigned long, even if only 32 bits are used to make the 1466 * bitmap functions work correctly. 1467 */ 1468 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, 1469 struct rdt_ctrl_domain *d, unsigned long cbm) 1470 { 1471 unsigned int size = 0; 1472 struct cacheinfo *ci; 1473 int num_b; 1474 1475 if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE)) 1476 return size; 1477 1478 num_b = bitmap_weight(&cbm, r->cache.cbm_len); 1479 ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope); 1480 if (ci) 1481 size = ci->size / r->cache.cbm_len * num_b; 1482 1483 return size; 1484 } 1485 1486 /* 1487 * rdtgroup_size_show - Display size in bytes of allocated regions 1488 * 1489 * The "size" file mirrors the layout of the "schemata" file, printing the 1490 * size in bytes of each region instead of the capacity bitmask. 1491 */ 1492 static int rdtgroup_size_show(struct kernfs_open_file *of, 1493 struct seq_file *s, void *v) 1494 { 1495 struct resctrl_schema *schema; 1496 enum resctrl_conf_type type; 1497 struct rdt_ctrl_domain *d; 1498 struct rdtgroup *rdtgrp; 1499 struct rdt_resource *r; 1500 unsigned int size; 1501 int ret = 0; 1502 u32 closid; 1503 bool sep; 1504 u32 ctrl; 1505 1506 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1507 if (!rdtgrp) { 1508 rdtgroup_kn_unlock(of->kn); 1509 return -ENOENT; 1510 } 1511 1512 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 1513 if (!rdtgrp->plr->d) { 1514 rdt_last_cmd_clear(); 1515 rdt_last_cmd_puts("Cache domain offline\n"); 1516 ret = -ENODEV; 1517 } else { 1518 seq_printf(s, "%*s:", max_name_width, 1519 rdtgrp->plr->s->name); 1520 size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, 1521 rdtgrp->plr->d, 1522 rdtgrp->plr->cbm); 1523 seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size); 1524 } 1525 goto out; 1526 } 1527 1528 closid = rdtgrp->closid; 1529 1530 list_for_each_entry(schema, &resctrl_schema_all, list) { 1531 r = schema->res; 1532 type = schema->conf_type; 1533 sep = false; 1534 seq_printf(s, "%*s:", max_name_width, schema->name); 1535 list_for_each_entry(d, &r->ctrl_domains, hdr.list) { 1536 if (sep) 1537 seq_putc(s, ';'); 1538 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1539 size = 0; 1540 } else { 1541 if (is_mba_sc(r)) 1542 ctrl = d->mbps_val[closid]; 1543 else 1544 ctrl = resctrl_arch_get_config(r, d, 1545 closid, 1546 type); 1547 if (r->rid == RDT_RESOURCE_MBA || 1548 r->rid == RDT_RESOURCE_SMBA) 1549 size = ctrl; 1550 else 1551 size = rdtgroup_cbm_to_size(r, d, ctrl); 1552 } 1553 seq_printf(s, "%d=%u", d->hdr.id, size); 1554 sep = true; 1555 } 1556 seq_putc(s, '\n'); 1557 } 1558 1559 out: 1560 rdtgroup_kn_unlock(of->kn); 1561 1562 return ret; 1563 } 1564 1565 struct mon_config_info { 1566 u32 evtid; 1567 u32 mon_config; 1568 }; 1569 1570 #define INVALID_CONFIG_INDEX UINT_MAX 1571 1572 /** 1573 * mon_event_config_index_get - get the hardware index for the 1574 * configurable event 1575 * @evtid: event id. 1576 * 1577 * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID 1578 * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID 1579 * INVALID_CONFIG_INDEX for invalid evtid 1580 */ 1581 static inline unsigned int mon_event_config_index_get(u32 evtid) 1582 { 1583 switch (evtid) { 1584 case QOS_L3_MBM_TOTAL_EVENT_ID: 1585 return 0; 1586 case QOS_L3_MBM_LOCAL_EVENT_ID: 1587 return 1; 1588 default: 1589 /* Should never reach here */ 1590 return INVALID_CONFIG_INDEX; 1591 } 1592 } 1593 1594 static void mon_event_config_read(void *info) 1595 { 1596 struct mon_config_info *mon_info = info; 1597 unsigned int index; 1598 u64 msrval; 1599 1600 index = mon_event_config_index_get(mon_info->evtid); 1601 if (index == INVALID_CONFIG_INDEX) { 1602 pr_warn_once("Invalid event id %d\n", mon_info->evtid); 1603 return; 1604 } 1605 rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); 1606 1607 /* Report only the valid event configuration bits */ 1608 mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; 1609 } 1610 1611 static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info) 1612 { 1613 smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1); 1614 } 1615 1616 static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) 1617 { 1618 struct mon_config_info mon_info; 1619 struct rdt_mon_domain *dom; 1620 bool sep = false; 1621 1622 cpus_read_lock(); 1623 mutex_lock(&rdtgroup_mutex); 1624 1625 list_for_each_entry(dom, &r->mon_domains, hdr.list) { 1626 if (sep) 1627 seq_puts(s, ";"); 1628 1629 memset(&mon_info, 0, sizeof(struct mon_config_info)); 1630 mon_info.evtid = evtid; 1631 mondata_config_read(dom, &mon_info); 1632 1633 seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config); 1634 sep = true; 1635 } 1636 seq_puts(s, "\n"); 1637 1638 mutex_unlock(&rdtgroup_mutex); 1639 cpus_read_unlock(); 1640 1641 return 0; 1642 } 1643 1644 static int mbm_total_bytes_config_show(struct kernfs_open_file *of, 1645 struct seq_file *seq, void *v) 1646 { 1647 struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1648 1649 mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); 1650 1651 return 0; 1652 } 1653 1654 static int mbm_local_bytes_config_show(struct kernfs_open_file *of, 1655 struct seq_file *seq, void *v) 1656 { 1657 struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1658 1659 mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); 1660 1661 return 0; 1662 } 1663 1664 static void mon_event_config_write(void *info) 1665 { 1666 struct mon_config_info *mon_info = info; 1667 unsigned int index; 1668 1669 index = mon_event_config_index_get(mon_info->evtid); 1670 if (index == INVALID_CONFIG_INDEX) { 1671 pr_warn_once("Invalid event id %d\n", mon_info->evtid); 1672 return; 1673 } 1674 wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); 1675 } 1676 1677 static void mbm_config_write_domain(struct rdt_resource *r, 1678 struct rdt_mon_domain *d, u32 evtid, u32 val) 1679 { 1680 struct mon_config_info mon_info = {0}; 1681 1682 /* 1683 * Read the current config value first. If both are the same then 1684 * no need to write it again. 1685 */ 1686 mon_info.evtid = evtid; 1687 mondata_config_read(d, &mon_info); 1688 if (mon_info.mon_config == val) 1689 return; 1690 1691 mon_info.mon_config = val; 1692 1693 /* 1694 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the 1695 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE 1696 * are scoped at the domain level. Writing any of these MSRs 1697 * on one CPU is observed by all the CPUs in the domain. 1698 */ 1699 smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write, 1700 &mon_info, 1); 1701 1702 /* 1703 * When an Event Configuration is changed, the bandwidth counters 1704 * for all RMIDs and Events will be cleared by the hardware. The 1705 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for 1706 * every RMID on the next read to any event for every RMID. 1707 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62) 1708 * cleared while it is tracked by the hardware. Clear the 1709 * mbm_local and mbm_total counts for all the RMIDs. 1710 */ 1711 resctrl_arch_reset_rmid_all(r, d); 1712 } 1713 1714 static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) 1715 { 1716 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 1717 char *dom_str = NULL, *id_str; 1718 unsigned long dom_id, val; 1719 struct rdt_mon_domain *d; 1720 1721 /* Walking r->domains, ensure it can't race with cpuhp */ 1722 lockdep_assert_cpus_held(); 1723 1724 next: 1725 if (!tok || tok[0] == '\0') 1726 return 0; 1727 1728 /* Start processing the strings for each domain */ 1729 dom_str = strim(strsep(&tok, ";")); 1730 id_str = strsep(&dom_str, "="); 1731 1732 if (!id_str || kstrtoul(id_str, 10, &dom_id)) { 1733 rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n"); 1734 return -EINVAL; 1735 } 1736 1737 if (!dom_str || kstrtoul(dom_str, 16, &val)) { 1738 rdt_last_cmd_puts("Non-numeric event configuration value\n"); 1739 return -EINVAL; 1740 } 1741 1742 /* Value from user cannot be more than the supported set of events */ 1743 if ((val & hw_res->mbm_cfg_mask) != val) { 1744 rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", 1745 hw_res->mbm_cfg_mask); 1746 return -EINVAL; 1747 } 1748 1749 list_for_each_entry(d, &r->mon_domains, hdr.list) { 1750 if (d->hdr.id == dom_id) { 1751 mbm_config_write_domain(r, d, evtid, val); 1752 goto next; 1753 } 1754 } 1755 1756 return -EINVAL; 1757 } 1758 1759 static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, 1760 char *buf, size_t nbytes, 1761 loff_t off) 1762 { 1763 struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1764 int ret; 1765 1766 /* Valid input requires a trailing newline */ 1767 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1768 return -EINVAL; 1769 1770 cpus_read_lock(); 1771 mutex_lock(&rdtgroup_mutex); 1772 1773 rdt_last_cmd_clear(); 1774 1775 buf[nbytes - 1] = '\0'; 1776 1777 ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); 1778 1779 mutex_unlock(&rdtgroup_mutex); 1780 cpus_read_unlock(); 1781 1782 return ret ?: nbytes; 1783 } 1784 1785 static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, 1786 char *buf, size_t nbytes, 1787 loff_t off) 1788 { 1789 struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1790 int ret; 1791 1792 /* Valid input requires a trailing newline */ 1793 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1794 return -EINVAL; 1795 1796 cpus_read_lock(); 1797 mutex_lock(&rdtgroup_mutex); 1798 1799 rdt_last_cmd_clear(); 1800 1801 buf[nbytes - 1] = '\0'; 1802 1803 ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID); 1804 1805 mutex_unlock(&rdtgroup_mutex); 1806 cpus_read_unlock(); 1807 1808 return ret ?: nbytes; 1809 } 1810 1811 /* rdtgroup information files for one cache resource. */ 1812 static struct rftype res_common_files[] = { 1813 { 1814 .name = "last_cmd_status", 1815 .mode = 0444, 1816 .kf_ops = &rdtgroup_kf_single_ops, 1817 .seq_show = rdt_last_cmd_status_show, 1818 .fflags = RFTYPE_TOP_INFO, 1819 }, 1820 { 1821 .name = "num_closids", 1822 .mode = 0444, 1823 .kf_ops = &rdtgroup_kf_single_ops, 1824 .seq_show = rdt_num_closids_show, 1825 .fflags = RFTYPE_CTRL_INFO, 1826 }, 1827 { 1828 .name = "mon_features", 1829 .mode = 0444, 1830 .kf_ops = &rdtgroup_kf_single_ops, 1831 .seq_show = rdt_mon_features_show, 1832 .fflags = RFTYPE_MON_INFO, 1833 }, 1834 { 1835 .name = "num_rmids", 1836 .mode = 0444, 1837 .kf_ops = &rdtgroup_kf_single_ops, 1838 .seq_show = rdt_num_rmids_show, 1839 .fflags = RFTYPE_MON_INFO, 1840 }, 1841 { 1842 .name = "cbm_mask", 1843 .mode = 0444, 1844 .kf_ops = &rdtgroup_kf_single_ops, 1845 .seq_show = rdt_default_ctrl_show, 1846 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1847 }, 1848 { 1849 .name = "min_cbm_bits", 1850 .mode = 0444, 1851 .kf_ops = &rdtgroup_kf_single_ops, 1852 .seq_show = rdt_min_cbm_bits_show, 1853 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1854 }, 1855 { 1856 .name = "shareable_bits", 1857 .mode = 0444, 1858 .kf_ops = &rdtgroup_kf_single_ops, 1859 .seq_show = rdt_shareable_bits_show, 1860 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1861 }, 1862 { 1863 .name = "bit_usage", 1864 .mode = 0444, 1865 .kf_ops = &rdtgroup_kf_single_ops, 1866 .seq_show = rdt_bit_usage_show, 1867 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1868 }, 1869 { 1870 .name = "min_bandwidth", 1871 .mode = 0444, 1872 .kf_ops = &rdtgroup_kf_single_ops, 1873 .seq_show = rdt_min_bw_show, 1874 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, 1875 }, 1876 { 1877 .name = "bandwidth_gran", 1878 .mode = 0444, 1879 .kf_ops = &rdtgroup_kf_single_ops, 1880 .seq_show = rdt_bw_gran_show, 1881 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, 1882 }, 1883 { 1884 .name = "delay_linear", 1885 .mode = 0444, 1886 .kf_ops = &rdtgroup_kf_single_ops, 1887 .seq_show = rdt_delay_linear_show, 1888 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, 1889 }, 1890 /* 1891 * Platform specific which (if any) capabilities are provided by 1892 * thread_throttle_mode. Defer "fflags" initialization to platform 1893 * discovery. 1894 */ 1895 { 1896 .name = "thread_throttle_mode", 1897 .mode = 0444, 1898 .kf_ops = &rdtgroup_kf_single_ops, 1899 .seq_show = rdt_thread_throttle_mode_show, 1900 }, 1901 { 1902 .name = "max_threshold_occupancy", 1903 .mode = 0644, 1904 .kf_ops = &rdtgroup_kf_single_ops, 1905 .write = max_threshold_occ_write, 1906 .seq_show = max_threshold_occ_show, 1907 .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE, 1908 }, 1909 { 1910 .name = "mbm_total_bytes_config", 1911 .mode = 0644, 1912 .kf_ops = &rdtgroup_kf_single_ops, 1913 .seq_show = mbm_total_bytes_config_show, 1914 .write = mbm_total_bytes_config_write, 1915 }, 1916 { 1917 .name = "mbm_local_bytes_config", 1918 .mode = 0644, 1919 .kf_ops = &rdtgroup_kf_single_ops, 1920 .seq_show = mbm_local_bytes_config_show, 1921 .write = mbm_local_bytes_config_write, 1922 }, 1923 { 1924 .name = "cpus", 1925 .mode = 0644, 1926 .kf_ops = &rdtgroup_kf_single_ops, 1927 .write = rdtgroup_cpus_write, 1928 .seq_show = rdtgroup_cpus_show, 1929 .fflags = RFTYPE_BASE, 1930 }, 1931 { 1932 .name = "cpus_list", 1933 .mode = 0644, 1934 .kf_ops = &rdtgroup_kf_single_ops, 1935 .write = rdtgroup_cpus_write, 1936 .seq_show = rdtgroup_cpus_show, 1937 .flags = RFTYPE_FLAGS_CPUS_LIST, 1938 .fflags = RFTYPE_BASE, 1939 }, 1940 { 1941 .name = "tasks", 1942 .mode = 0644, 1943 .kf_ops = &rdtgroup_kf_single_ops, 1944 .write = rdtgroup_tasks_write, 1945 .seq_show = rdtgroup_tasks_show, 1946 .fflags = RFTYPE_BASE, 1947 }, 1948 { 1949 .name = "mon_hw_id", 1950 .mode = 0444, 1951 .kf_ops = &rdtgroup_kf_single_ops, 1952 .seq_show = rdtgroup_rmid_show, 1953 .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG, 1954 }, 1955 { 1956 .name = "schemata", 1957 .mode = 0644, 1958 .kf_ops = &rdtgroup_kf_single_ops, 1959 .write = rdtgroup_schemata_write, 1960 .seq_show = rdtgroup_schemata_show, 1961 .fflags = RFTYPE_CTRL_BASE, 1962 }, 1963 { 1964 .name = "mba_MBps_event", 1965 .mode = 0644, 1966 .kf_ops = &rdtgroup_kf_single_ops, 1967 .write = rdtgroup_mba_mbps_event_write, 1968 .seq_show = rdtgroup_mba_mbps_event_show, 1969 }, 1970 { 1971 .name = "mode", 1972 .mode = 0644, 1973 .kf_ops = &rdtgroup_kf_single_ops, 1974 .write = rdtgroup_mode_write, 1975 .seq_show = rdtgroup_mode_show, 1976 .fflags = RFTYPE_CTRL_BASE, 1977 }, 1978 { 1979 .name = "size", 1980 .mode = 0444, 1981 .kf_ops = &rdtgroup_kf_single_ops, 1982 .seq_show = rdtgroup_size_show, 1983 .fflags = RFTYPE_CTRL_BASE, 1984 }, 1985 { 1986 .name = "sparse_masks", 1987 .mode = 0444, 1988 .kf_ops = &rdtgroup_kf_single_ops, 1989 .seq_show = rdt_has_sparse_bitmasks_show, 1990 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1991 }, 1992 { 1993 .name = "ctrl_hw_id", 1994 .mode = 0444, 1995 .kf_ops = &rdtgroup_kf_single_ops, 1996 .seq_show = rdtgroup_closid_show, 1997 .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG, 1998 }, 1999 2000 }; 2001 2002 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) 2003 { 2004 struct rftype *rfts, *rft; 2005 int ret, len; 2006 2007 rfts = res_common_files; 2008 len = ARRAY_SIZE(res_common_files); 2009 2010 lockdep_assert_held(&rdtgroup_mutex); 2011 2012 if (resctrl_debug) 2013 fflags |= RFTYPE_DEBUG; 2014 2015 for (rft = rfts; rft < rfts + len; rft++) { 2016 if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) { 2017 ret = rdtgroup_add_file(kn, rft); 2018 if (ret) 2019 goto error; 2020 } 2021 } 2022 2023 return 0; 2024 error: 2025 pr_warn("Failed to add %s, err=%d\n", rft->name, ret); 2026 while (--rft >= rfts) { 2027 if ((fflags & rft->fflags) == rft->fflags) 2028 kernfs_remove_by_name(kn, rft->name); 2029 } 2030 return ret; 2031 } 2032 2033 static struct rftype *rdtgroup_get_rftype_by_name(const char *name) 2034 { 2035 struct rftype *rfts, *rft; 2036 int len; 2037 2038 rfts = res_common_files; 2039 len = ARRAY_SIZE(res_common_files); 2040 2041 for (rft = rfts; rft < rfts + len; rft++) { 2042 if (!strcmp(rft->name, name)) 2043 return rft; 2044 } 2045 2046 return NULL; 2047 } 2048 2049 void resctrl_file_fflags_init(const char *config, unsigned long fflags) 2050 { 2051 struct rftype *rft; 2052 2053 rft = rdtgroup_get_rftype_by_name(config); 2054 if (rft) 2055 rft->fflags = fflags; 2056 } 2057 2058 /** 2059 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file 2060 * @r: The resource group with which the file is associated. 2061 * @name: Name of the file 2062 * 2063 * The permissions of named resctrl file, directory, or link are modified 2064 * to not allow read, write, or execute by any user. 2065 * 2066 * WARNING: This function is intended to communicate to the user that the 2067 * resctrl file has been locked down - that it is not relevant to the 2068 * particular state the system finds itself in. It should not be relied 2069 * on to protect from user access because after the file's permissions 2070 * are restricted the user can still change the permissions using chmod 2071 * from the command line. 2072 * 2073 * Return: 0 on success, <0 on failure. 2074 */ 2075 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name) 2076 { 2077 struct iattr iattr = {.ia_valid = ATTR_MODE,}; 2078 struct kernfs_node *kn; 2079 int ret = 0; 2080 2081 kn = kernfs_find_and_get_ns(r->kn, name, NULL); 2082 if (!kn) 2083 return -ENOENT; 2084 2085 switch (kernfs_type(kn)) { 2086 case KERNFS_DIR: 2087 iattr.ia_mode = S_IFDIR; 2088 break; 2089 case KERNFS_FILE: 2090 iattr.ia_mode = S_IFREG; 2091 break; 2092 case KERNFS_LINK: 2093 iattr.ia_mode = S_IFLNK; 2094 break; 2095 } 2096 2097 ret = kernfs_setattr(kn, &iattr); 2098 kernfs_put(kn); 2099 return ret; 2100 } 2101 2102 /** 2103 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file 2104 * @r: The resource group with which the file is associated. 2105 * @name: Name of the file 2106 * @mask: Mask of permissions that should be restored 2107 * 2108 * Restore the permissions of the named file. If @name is a directory the 2109 * permissions of its parent will be used. 2110 * 2111 * Return: 0 on success, <0 on failure. 2112 */ 2113 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, 2114 umode_t mask) 2115 { 2116 struct iattr iattr = {.ia_valid = ATTR_MODE,}; 2117 struct kernfs_node *kn, *parent; 2118 struct rftype *rfts, *rft; 2119 int ret, len; 2120 2121 rfts = res_common_files; 2122 len = ARRAY_SIZE(res_common_files); 2123 2124 for (rft = rfts; rft < rfts + len; rft++) { 2125 if (!strcmp(rft->name, name)) 2126 iattr.ia_mode = rft->mode & mask; 2127 } 2128 2129 kn = kernfs_find_and_get_ns(r->kn, name, NULL); 2130 if (!kn) 2131 return -ENOENT; 2132 2133 switch (kernfs_type(kn)) { 2134 case KERNFS_DIR: 2135 parent = kernfs_get_parent(kn); 2136 if (parent) { 2137 iattr.ia_mode |= parent->mode; 2138 kernfs_put(parent); 2139 } 2140 iattr.ia_mode |= S_IFDIR; 2141 break; 2142 case KERNFS_FILE: 2143 iattr.ia_mode |= S_IFREG; 2144 break; 2145 case KERNFS_LINK: 2146 iattr.ia_mode |= S_IFLNK; 2147 break; 2148 } 2149 2150 ret = kernfs_setattr(kn, &iattr); 2151 kernfs_put(kn); 2152 return ret; 2153 } 2154 2155 static int rdtgroup_mkdir_info_resdir(void *priv, char *name, 2156 unsigned long fflags) 2157 { 2158 struct kernfs_node *kn_subdir; 2159 int ret; 2160 2161 kn_subdir = kernfs_create_dir(kn_info, name, 2162 kn_info->mode, priv); 2163 if (IS_ERR(kn_subdir)) 2164 return PTR_ERR(kn_subdir); 2165 2166 ret = rdtgroup_kn_set_ugid(kn_subdir); 2167 if (ret) 2168 return ret; 2169 2170 ret = rdtgroup_add_files(kn_subdir, fflags); 2171 if (!ret) 2172 kernfs_activate(kn_subdir); 2173 2174 return ret; 2175 } 2176 2177 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) 2178 { 2179 struct resctrl_schema *s; 2180 struct rdt_resource *r; 2181 unsigned long fflags; 2182 char name[32]; 2183 int ret; 2184 2185 /* create the directory */ 2186 kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); 2187 if (IS_ERR(kn_info)) 2188 return PTR_ERR(kn_info); 2189 2190 ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO); 2191 if (ret) 2192 goto out_destroy; 2193 2194 /* loop over enabled controls, these are all alloc_capable */ 2195 list_for_each_entry(s, &resctrl_schema_all, list) { 2196 r = s->res; 2197 fflags = r->fflags | RFTYPE_CTRL_INFO; 2198 ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); 2199 if (ret) 2200 goto out_destroy; 2201 } 2202 2203 for_each_mon_capable_rdt_resource(r) { 2204 fflags = r->fflags | RFTYPE_MON_INFO; 2205 sprintf(name, "%s_MON", r->name); 2206 ret = rdtgroup_mkdir_info_resdir(r, name, fflags); 2207 if (ret) 2208 goto out_destroy; 2209 } 2210 2211 ret = rdtgroup_kn_set_ugid(kn_info); 2212 if (ret) 2213 goto out_destroy; 2214 2215 kernfs_activate(kn_info); 2216 2217 return 0; 2218 2219 out_destroy: 2220 kernfs_remove(kn_info); 2221 return ret; 2222 } 2223 2224 static int 2225 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, 2226 char *name, struct kernfs_node **dest_kn) 2227 { 2228 struct kernfs_node *kn; 2229 int ret; 2230 2231 /* create the directory */ 2232 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); 2233 if (IS_ERR(kn)) 2234 return PTR_ERR(kn); 2235 2236 if (dest_kn) 2237 *dest_kn = kn; 2238 2239 ret = rdtgroup_kn_set_ugid(kn); 2240 if (ret) 2241 goto out_destroy; 2242 2243 kernfs_activate(kn); 2244 2245 return 0; 2246 2247 out_destroy: 2248 kernfs_remove(kn); 2249 return ret; 2250 } 2251 2252 static void l3_qos_cfg_update(void *arg) 2253 { 2254 bool *enable = arg; 2255 2256 wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); 2257 } 2258 2259 static void l2_qos_cfg_update(void *arg) 2260 { 2261 bool *enable = arg; 2262 2263 wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); 2264 } 2265 2266 static inline bool is_mba_linear(void) 2267 { 2268 return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; 2269 } 2270 2271 static int set_cache_qos_cfg(int level, bool enable) 2272 { 2273 void (*update)(void *arg); 2274 struct rdt_ctrl_domain *d; 2275 struct rdt_resource *r_l; 2276 cpumask_var_t cpu_mask; 2277 int cpu; 2278 2279 /* Walking r->domains, ensure it can't race with cpuhp */ 2280 lockdep_assert_cpus_held(); 2281 2282 if (level == RDT_RESOURCE_L3) 2283 update = l3_qos_cfg_update; 2284 else if (level == RDT_RESOURCE_L2) 2285 update = l2_qos_cfg_update; 2286 else 2287 return -EINVAL; 2288 2289 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) 2290 return -ENOMEM; 2291 2292 r_l = &rdt_resources_all[level].r_resctrl; 2293 list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) { 2294 if (r_l->cache.arch_has_per_cpu_cfg) 2295 /* Pick all the CPUs in the domain instance */ 2296 for_each_cpu(cpu, &d->hdr.cpu_mask) 2297 cpumask_set_cpu(cpu, cpu_mask); 2298 else 2299 /* Pick one CPU from each domain instance to update MSR */ 2300 cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask); 2301 } 2302 2303 /* Update QOS_CFG MSR on all the CPUs in cpu_mask */ 2304 on_each_cpu_mask(cpu_mask, update, &enable, 1); 2305 2306 free_cpumask_var(cpu_mask); 2307 2308 return 0; 2309 } 2310 2311 /* Restore the qos cfg state when a domain comes online */ 2312 void rdt_domain_reconfigure_cdp(struct rdt_resource *r) 2313 { 2314 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 2315 2316 if (!r->cdp_capable) 2317 return; 2318 2319 if (r->rid == RDT_RESOURCE_L2) 2320 l2_qos_cfg_update(&hw_res->cdp_enabled); 2321 2322 if (r->rid == RDT_RESOURCE_L3) 2323 l3_qos_cfg_update(&hw_res->cdp_enabled); 2324 } 2325 2326 static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d) 2327 { 2328 u32 num_closid = resctrl_arch_get_num_closid(r); 2329 int cpu = cpumask_any(&d->hdr.cpu_mask); 2330 int i; 2331 2332 d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), 2333 GFP_KERNEL, cpu_to_node(cpu)); 2334 if (!d->mbps_val) 2335 return -ENOMEM; 2336 2337 for (i = 0; i < num_closid; i++) 2338 d->mbps_val[i] = MBA_MAX_MBPS; 2339 2340 return 0; 2341 } 2342 2343 static void mba_sc_domain_destroy(struct rdt_resource *r, 2344 struct rdt_ctrl_domain *d) 2345 { 2346 kfree(d->mbps_val); 2347 d->mbps_val = NULL; 2348 } 2349 2350 /* 2351 * MBA software controller is supported only if 2352 * MBM is supported and MBA is in linear scale, 2353 * and the MBM monitor scope is the same as MBA 2354 * control scope. 2355 */ 2356 static bool supports_mba_mbps(void) 2357 { 2358 struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 2359 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; 2360 2361 return (is_mbm_enabled() && 2362 r->alloc_capable && is_mba_linear() && 2363 r->ctrl_scope == rmbm->mon_scope); 2364 } 2365 2366 /* 2367 * Enable or disable the MBA software controller 2368 * which helps user specify bandwidth in MBps. 2369 */ 2370 static int set_mba_sc(bool mba_sc) 2371 { 2372 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; 2373 u32 num_closid = resctrl_arch_get_num_closid(r); 2374 struct rdt_ctrl_domain *d; 2375 unsigned long fflags; 2376 int i; 2377 2378 if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) 2379 return -EINVAL; 2380 2381 r->membw.mba_sc = mba_sc; 2382 2383 rdtgroup_default.mba_mbps_event = mba_mbps_default_event; 2384 2385 list_for_each_entry(d, &r->ctrl_domains, hdr.list) { 2386 for (i = 0; i < num_closid; i++) 2387 d->mbps_val[i] = MBA_MAX_MBPS; 2388 } 2389 2390 fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0; 2391 resctrl_file_fflags_init("mba_MBps_event", fflags); 2392 2393 return 0; 2394 } 2395 2396 static int cdp_enable(int level) 2397 { 2398 struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl; 2399 int ret; 2400 2401 if (!r_l->alloc_capable) 2402 return -EINVAL; 2403 2404 ret = set_cache_qos_cfg(level, true); 2405 if (!ret) 2406 rdt_resources_all[level].cdp_enabled = true; 2407 2408 return ret; 2409 } 2410 2411 static void cdp_disable(int level) 2412 { 2413 struct rdt_hw_resource *r_hw = &rdt_resources_all[level]; 2414 2415 if (r_hw->cdp_enabled) { 2416 set_cache_qos_cfg(level, false); 2417 r_hw->cdp_enabled = false; 2418 } 2419 } 2420 2421 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) 2422 { 2423 struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; 2424 2425 if (!hw_res->r_resctrl.cdp_capable) 2426 return -EINVAL; 2427 2428 if (enable) 2429 return cdp_enable(l); 2430 2431 cdp_disable(l); 2432 2433 return 0; 2434 } 2435 2436 /* 2437 * We don't allow rdtgroup directories to be created anywhere 2438 * except the root directory. Thus when looking for the rdtgroup 2439 * structure for a kernfs node we are either looking at a directory, 2440 * in which case the rdtgroup structure is pointed at by the "priv" 2441 * field, otherwise we have a file, and need only look to the parent 2442 * to find the rdtgroup. 2443 */ 2444 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) 2445 { 2446 if (kernfs_type(kn) == KERNFS_DIR) { 2447 /* 2448 * All the resource directories use "kn->priv" 2449 * to point to the "struct rdtgroup" for the 2450 * resource. "info" and its subdirectories don't 2451 * have rdtgroup structures, so return NULL here. 2452 */ 2453 if (kn == kn_info || 2454 rcu_access_pointer(kn->__parent) == kn_info) 2455 return NULL; 2456 else 2457 return kn->priv; 2458 } else { 2459 return rdt_kn_parent_priv(kn); 2460 } 2461 } 2462 2463 static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn) 2464 { 2465 atomic_inc(&rdtgrp->waitcount); 2466 kernfs_break_active_protection(kn); 2467 } 2468 2469 static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn) 2470 { 2471 if (atomic_dec_and_test(&rdtgrp->waitcount) && 2472 (rdtgrp->flags & RDT_DELETED)) { 2473 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 2474 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) 2475 rdtgroup_pseudo_lock_remove(rdtgrp); 2476 kernfs_unbreak_active_protection(kn); 2477 rdtgroup_remove(rdtgrp); 2478 } else { 2479 kernfs_unbreak_active_protection(kn); 2480 } 2481 } 2482 2483 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) 2484 { 2485 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); 2486 2487 if (!rdtgrp) 2488 return NULL; 2489 2490 rdtgroup_kn_get(rdtgrp, kn); 2491 2492 cpus_read_lock(); 2493 mutex_lock(&rdtgroup_mutex); 2494 2495 /* Was this group deleted while we waited? */ 2496 if (rdtgrp->flags & RDT_DELETED) 2497 return NULL; 2498 2499 return rdtgrp; 2500 } 2501 2502 void rdtgroup_kn_unlock(struct kernfs_node *kn) 2503 { 2504 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); 2505 2506 if (!rdtgrp) 2507 return; 2508 2509 mutex_unlock(&rdtgroup_mutex); 2510 cpus_read_unlock(); 2511 2512 rdtgroup_kn_put(rdtgrp, kn); 2513 } 2514 2515 static int mkdir_mondata_all(struct kernfs_node *parent_kn, 2516 struct rdtgroup *prgrp, 2517 struct kernfs_node **mon_data_kn); 2518 2519 static void rdt_disable_ctx(void) 2520 { 2521 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); 2522 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); 2523 set_mba_sc(false); 2524 2525 resctrl_debug = false; 2526 } 2527 2528 static int rdt_enable_ctx(struct rdt_fs_context *ctx) 2529 { 2530 int ret = 0; 2531 2532 if (ctx->enable_cdpl2) { 2533 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true); 2534 if (ret) 2535 goto out_done; 2536 } 2537 2538 if (ctx->enable_cdpl3) { 2539 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true); 2540 if (ret) 2541 goto out_cdpl2; 2542 } 2543 2544 if (ctx->enable_mba_mbps) { 2545 ret = set_mba_sc(true); 2546 if (ret) 2547 goto out_cdpl3; 2548 } 2549 2550 if (ctx->enable_debug) 2551 resctrl_debug = true; 2552 2553 return 0; 2554 2555 out_cdpl3: 2556 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); 2557 out_cdpl2: 2558 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); 2559 out_done: 2560 return ret; 2561 } 2562 2563 static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type) 2564 { 2565 struct resctrl_schema *s; 2566 const char *suffix = ""; 2567 int ret, cl; 2568 2569 s = kzalloc(sizeof(*s), GFP_KERNEL); 2570 if (!s) 2571 return -ENOMEM; 2572 2573 s->res = r; 2574 s->num_closid = resctrl_arch_get_num_closid(r); 2575 if (resctrl_arch_get_cdp_enabled(r->rid)) 2576 s->num_closid /= 2; 2577 2578 s->conf_type = type; 2579 switch (type) { 2580 case CDP_CODE: 2581 suffix = "CODE"; 2582 break; 2583 case CDP_DATA: 2584 suffix = "DATA"; 2585 break; 2586 case CDP_NONE: 2587 suffix = ""; 2588 break; 2589 } 2590 2591 ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix); 2592 if (ret >= sizeof(s->name)) { 2593 kfree(s); 2594 return -EINVAL; 2595 } 2596 2597 cl = strlen(s->name); 2598 2599 /* 2600 * If CDP is supported by this resource, but not enabled, 2601 * include the suffix. This ensures the tabular format of the 2602 * schemata file does not change between mounts of the filesystem. 2603 */ 2604 if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid)) 2605 cl += 4; 2606 2607 if (cl > max_name_width) 2608 max_name_width = cl; 2609 2610 INIT_LIST_HEAD(&s->list); 2611 list_add(&s->list, &resctrl_schema_all); 2612 2613 return 0; 2614 } 2615 2616 static int schemata_list_create(void) 2617 { 2618 struct rdt_resource *r; 2619 int ret = 0; 2620 2621 for_each_alloc_capable_rdt_resource(r) { 2622 if (resctrl_arch_get_cdp_enabled(r->rid)) { 2623 ret = schemata_list_add(r, CDP_CODE); 2624 if (ret) 2625 break; 2626 2627 ret = schemata_list_add(r, CDP_DATA); 2628 } else { 2629 ret = schemata_list_add(r, CDP_NONE); 2630 } 2631 2632 if (ret) 2633 break; 2634 } 2635 2636 return ret; 2637 } 2638 2639 static void schemata_list_destroy(void) 2640 { 2641 struct resctrl_schema *s, *tmp; 2642 2643 list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) { 2644 list_del(&s->list); 2645 kfree(s); 2646 } 2647 } 2648 2649 static int rdt_get_tree(struct fs_context *fc) 2650 { 2651 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2652 unsigned long flags = RFTYPE_CTRL_BASE; 2653 struct rdt_mon_domain *dom; 2654 struct rdt_resource *r; 2655 int ret; 2656 2657 cpus_read_lock(); 2658 mutex_lock(&rdtgroup_mutex); 2659 /* 2660 * resctrl file system can only be mounted once. 2661 */ 2662 if (resctrl_mounted) { 2663 ret = -EBUSY; 2664 goto out; 2665 } 2666 2667 ret = rdtgroup_setup_root(ctx); 2668 if (ret) 2669 goto out; 2670 2671 ret = rdt_enable_ctx(ctx); 2672 if (ret) 2673 goto out_root; 2674 2675 ret = schemata_list_create(); 2676 if (ret) { 2677 schemata_list_destroy(); 2678 goto out_ctx; 2679 } 2680 2681 closid_init(); 2682 2683 if (resctrl_arch_mon_capable()) 2684 flags |= RFTYPE_MON; 2685 2686 ret = rdtgroup_add_files(rdtgroup_default.kn, flags); 2687 if (ret) 2688 goto out_schemata_free; 2689 2690 kernfs_activate(rdtgroup_default.kn); 2691 2692 ret = rdtgroup_create_info_dir(rdtgroup_default.kn); 2693 if (ret < 0) 2694 goto out_schemata_free; 2695 2696 if (resctrl_arch_mon_capable()) { 2697 ret = mongroup_create_dir(rdtgroup_default.kn, 2698 &rdtgroup_default, "mon_groups", 2699 &kn_mongrp); 2700 if (ret < 0) 2701 goto out_info; 2702 2703 ret = mkdir_mondata_all(rdtgroup_default.kn, 2704 &rdtgroup_default, &kn_mondata); 2705 if (ret < 0) 2706 goto out_mongrp; 2707 rdtgroup_default.mon.mon_data_kn = kn_mondata; 2708 } 2709 2710 ret = rdt_pseudo_lock_init(); 2711 if (ret) 2712 goto out_mondata; 2713 2714 ret = kernfs_get_tree(fc); 2715 if (ret < 0) 2716 goto out_psl; 2717 2718 if (resctrl_arch_alloc_capable()) 2719 resctrl_arch_enable_alloc(); 2720 if (resctrl_arch_mon_capable()) 2721 resctrl_arch_enable_mon(); 2722 2723 if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable()) 2724 resctrl_mounted = true; 2725 2726 if (is_mbm_enabled()) { 2727 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 2728 list_for_each_entry(dom, &r->mon_domains, hdr.list) 2729 mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, 2730 RESCTRL_PICK_ANY_CPU); 2731 } 2732 2733 goto out; 2734 2735 out_psl: 2736 rdt_pseudo_lock_release(); 2737 out_mondata: 2738 if (resctrl_arch_mon_capable()) 2739 kernfs_remove(kn_mondata); 2740 out_mongrp: 2741 if (resctrl_arch_mon_capable()) 2742 kernfs_remove(kn_mongrp); 2743 out_info: 2744 kernfs_remove(kn_info); 2745 out_schemata_free: 2746 schemata_list_destroy(); 2747 out_ctx: 2748 rdt_disable_ctx(); 2749 out_root: 2750 rdtgroup_destroy_root(); 2751 out: 2752 rdt_last_cmd_clear(); 2753 mutex_unlock(&rdtgroup_mutex); 2754 cpus_read_unlock(); 2755 return ret; 2756 } 2757 2758 enum rdt_param { 2759 Opt_cdp, 2760 Opt_cdpl2, 2761 Opt_mba_mbps, 2762 Opt_debug, 2763 nr__rdt_params 2764 }; 2765 2766 static const struct fs_parameter_spec rdt_fs_parameters[] = { 2767 fsparam_flag("cdp", Opt_cdp), 2768 fsparam_flag("cdpl2", Opt_cdpl2), 2769 fsparam_flag("mba_MBps", Opt_mba_mbps), 2770 fsparam_flag("debug", Opt_debug), 2771 {} 2772 }; 2773 2774 static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) 2775 { 2776 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2777 struct fs_parse_result result; 2778 const char *msg; 2779 int opt; 2780 2781 opt = fs_parse(fc, rdt_fs_parameters, param, &result); 2782 if (opt < 0) 2783 return opt; 2784 2785 switch (opt) { 2786 case Opt_cdp: 2787 ctx->enable_cdpl3 = true; 2788 return 0; 2789 case Opt_cdpl2: 2790 ctx->enable_cdpl2 = true; 2791 return 0; 2792 case Opt_mba_mbps: 2793 msg = "mba_MBps requires MBM and linear scale MBA at L3 scope"; 2794 if (!supports_mba_mbps()) 2795 return invalfc(fc, msg); 2796 ctx->enable_mba_mbps = true; 2797 return 0; 2798 case Opt_debug: 2799 ctx->enable_debug = true; 2800 return 0; 2801 } 2802 2803 return -EINVAL; 2804 } 2805 2806 static void rdt_fs_context_free(struct fs_context *fc) 2807 { 2808 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2809 2810 kernfs_free_fs_context(fc); 2811 kfree(ctx); 2812 } 2813 2814 static const struct fs_context_operations rdt_fs_context_ops = { 2815 .free = rdt_fs_context_free, 2816 .parse_param = rdt_parse_param, 2817 .get_tree = rdt_get_tree, 2818 }; 2819 2820 static int rdt_init_fs_context(struct fs_context *fc) 2821 { 2822 struct rdt_fs_context *ctx; 2823 2824 ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); 2825 if (!ctx) 2826 return -ENOMEM; 2827 2828 ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; 2829 fc->fs_private = &ctx->kfc; 2830 fc->ops = &rdt_fs_context_ops; 2831 put_user_ns(fc->user_ns); 2832 fc->user_ns = get_user_ns(&init_user_ns); 2833 fc->global = true; 2834 return 0; 2835 } 2836 2837 static int reset_all_ctrls(struct rdt_resource *r) 2838 { 2839 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 2840 struct rdt_hw_ctrl_domain *hw_dom; 2841 struct msr_param msr_param; 2842 struct rdt_ctrl_domain *d; 2843 int i; 2844 2845 /* Walking r->domains, ensure it can't race with cpuhp */ 2846 lockdep_assert_cpus_held(); 2847 2848 msr_param.res = r; 2849 msr_param.low = 0; 2850 msr_param.high = hw_res->num_closid; 2851 2852 /* 2853 * Disable resource control for this resource by setting all 2854 * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU 2855 * from each domain to update the MSRs below. 2856 */ 2857 list_for_each_entry(d, &r->ctrl_domains, hdr.list) { 2858 hw_dom = resctrl_to_arch_ctrl_dom(d); 2859 2860 for (i = 0; i < hw_res->num_closid; i++) 2861 hw_dom->ctrl_val[i] = r->default_ctrl; 2862 msr_param.dom = d; 2863 smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); 2864 } 2865 2866 return 0; 2867 } 2868 2869 /* 2870 * Move tasks from one to the other group. If @from is NULL, then all tasks 2871 * in the systems are moved unconditionally (used for teardown). 2872 * 2873 * If @mask is not NULL the cpus on which moved tasks are running are set 2874 * in that mask so the update smp function call is restricted to affected 2875 * cpus. 2876 */ 2877 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, 2878 struct cpumask *mask) 2879 { 2880 struct task_struct *p, *t; 2881 2882 read_lock(&tasklist_lock); 2883 for_each_process_thread(p, t) { 2884 if (!from || is_closid_match(t, from) || 2885 is_rmid_match(t, from)) { 2886 resctrl_arch_set_closid_rmid(t, to->closid, 2887 to->mon.rmid); 2888 2889 /* 2890 * Order the closid/rmid stores above before the loads 2891 * in task_curr(). This pairs with the full barrier 2892 * between the rq->curr update and resctrl_sched_in() 2893 * during context switch. 2894 */ 2895 smp_mb(); 2896 2897 /* 2898 * If the task is on a CPU, set the CPU in the mask. 2899 * The detection is inaccurate as tasks might move or 2900 * schedule before the smp function call takes place. 2901 * In such a case the function call is pointless, but 2902 * there is no other side effect. 2903 */ 2904 if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t)) 2905 cpumask_set_cpu(task_cpu(t), mask); 2906 } 2907 } 2908 read_unlock(&tasklist_lock); 2909 } 2910 2911 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) 2912 { 2913 struct rdtgroup *sentry, *stmp; 2914 struct list_head *head; 2915 2916 head = &rdtgrp->mon.crdtgrp_list; 2917 list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { 2918 free_rmid(sentry->closid, sentry->mon.rmid); 2919 list_del(&sentry->mon.crdtgrp_list); 2920 2921 if (atomic_read(&sentry->waitcount) != 0) 2922 sentry->flags = RDT_DELETED; 2923 else 2924 rdtgroup_remove(sentry); 2925 } 2926 } 2927 2928 /* 2929 * Forcibly remove all of subdirectories under root. 2930 */ 2931 static void rmdir_all_sub(void) 2932 { 2933 struct rdtgroup *rdtgrp, *tmp; 2934 2935 /* Move all tasks to the default resource group */ 2936 rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); 2937 2938 list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { 2939 /* Free any child rmids */ 2940 free_all_child_rdtgrp(rdtgrp); 2941 2942 /* Remove each rdtgroup other than root */ 2943 if (rdtgrp == &rdtgroup_default) 2944 continue; 2945 2946 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 2947 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) 2948 rdtgroup_pseudo_lock_remove(rdtgrp); 2949 2950 /* 2951 * Give any CPUs back to the default group. We cannot copy 2952 * cpu_online_mask because a CPU might have executed the 2953 * offline callback already, but is still marked online. 2954 */ 2955 cpumask_or(&rdtgroup_default.cpu_mask, 2956 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); 2957 2958 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 2959 2960 kernfs_remove(rdtgrp->kn); 2961 list_del(&rdtgrp->rdtgroup_list); 2962 2963 if (atomic_read(&rdtgrp->waitcount) != 0) 2964 rdtgrp->flags = RDT_DELETED; 2965 else 2966 rdtgroup_remove(rdtgrp); 2967 } 2968 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ 2969 update_closid_rmid(cpu_online_mask, &rdtgroup_default); 2970 2971 kernfs_remove(kn_info); 2972 kernfs_remove(kn_mongrp); 2973 kernfs_remove(kn_mondata); 2974 } 2975 2976 static void rdt_kill_sb(struct super_block *sb) 2977 { 2978 struct rdt_resource *r; 2979 2980 cpus_read_lock(); 2981 mutex_lock(&rdtgroup_mutex); 2982 2983 rdt_disable_ctx(); 2984 2985 /*Put everything back to default values. */ 2986 for_each_alloc_capable_rdt_resource(r) 2987 reset_all_ctrls(r); 2988 rmdir_all_sub(); 2989 rdt_pseudo_lock_release(); 2990 rdtgroup_default.mode = RDT_MODE_SHAREABLE; 2991 schemata_list_destroy(); 2992 rdtgroup_destroy_root(); 2993 if (resctrl_arch_alloc_capable()) 2994 resctrl_arch_disable_alloc(); 2995 if (resctrl_arch_mon_capable()) 2996 resctrl_arch_disable_mon(); 2997 resctrl_mounted = false; 2998 kernfs_kill_sb(sb); 2999 mutex_unlock(&rdtgroup_mutex); 3000 cpus_read_unlock(); 3001 } 3002 3003 static struct file_system_type rdt_fs_type = { 3004 .name = "resctrl", 3005 .init_fs_context = rdt_init_fs_context, 3006 .parameters = rdt_fs_parameters, 3007 .kill_sb = rdt_kill_sb, 3008 }; 3009 3010 static int mon_addfile(struct kernfs_node *parent_kn, const char *name, 3011 void *priv) 3012 { 3013 struct kernfs_node *kn; 3014 int ret = 0; 3015 3016 kn = __kernfs_create_file(parent_kn, name, 0444, 3017 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, 3018 &kf_mondata_ops, priv, NULL, NULL); 3019 if (IS_ERR(kn)) 3020 return PTR_ERR(kn); 3021 3022 ret = rdtgroup_kn_set_ugid(kn); 3023 if (ret) { 3024 kernfs_remove(kn); 3025 return ret; 3026 } 3027 3028 return ret; 3029 } 3030 3031 static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname) 3032 { 3033 struct kernfs_node *kn; 3034 3035 kn = kernfs_find_and_get(pkn, name); 3036 if (!kn) 3037 return; 3038 kernfs_put(kn); 3039 3040 if (kn->dir.subdirs <= 1) 3041 kernfs_remove(kn); 3042 else 3043 kernfs_remove_by_name(kn, subname); 3044 } 3045 3046 /* 3047 * Remove all subdirectories of mon_data of ctrl_mon groups 3048 * and monitor groups for the given domain. 3049 * Remove files and directories containing "sum" of domain data 3050 * when last domain being summed is removed. 3051 */ 3052 static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, 3053 struct rdt_mon_domain *d) 3054 { 3055 struct rdtgroup *prgrp, *crgrp; 3056 char subname[32]; 3057 bool snc_mode; 3058 char name[32]; 3059 3060 snc_mode = r->mon_scope == RESCTRL_L3_NODE; 3061 sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); 3062 if (snc_mode) 3063 sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); 3064 3065 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { 3066 mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname); 3067 3068 list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) 3069 mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname); 3070 } 3071 } 3072 3073 static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, 3074 struct rdt_resource *r, struct rdtgroup *prgrp, 3075 bool do_sum) 3076 { 3077 struct rmid_read rr = {0}; 3078 union mon_data_bits priv; 3079 struct mon_evt *mevt; 3080 int ret; 3081 3082 if (WARN_ON(list_empty(&r->evt_list))) 3083 return -EPERM; 3084 3085 priv.u.rid = r->rid; 3086 priv.u.domid = do_sum ? d->ci->id : d->hdr.id; 3087 priv.u.sum = do_sum; 3088 list_for_each_entry(mevt, &r->evt_list, list) { 3089 priv.u.evtid = mevt->evtid; 3090 ret = mon_addfile(kn, mevt->name, priv.priv); 3091 if (ret) 3092 return ret; 3093 3094 if (!do_sum && is_mbm_event(mevt->evtid)) 3095 mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true); 3096 } 3097 3098 return 0; 3099 } 3100 3101 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, 3102 struct rdt_mon_domain *d, 3103 struct rdt_resource *r, struct rdtgroup *prgrp) 3104 { 3105 struct kernfs_node *kn, *ckn; 3106 char name[32]; 3107 bool snc_mode; 3108 int ret = 0; 3109 3110 lockdep_assert_held(&rdtgroup_mutex); 3111 3112 snc_mode = r->mon_scope == RESCTRL_L3_NODE; 3113 sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); 3114 kn = kernfs_find_and_get(parent_kn, name); 3115 if (kn) { 3116 /* 3117 * rdtgroup_mutex will prevent this directory from being 3118 * removed. No need to keep this hold. 3119 */ 3120 kernfs_put(kn); 3121 } else { 3122 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); 3123 if (IS_ERR(kn)) 3124 return PTR_ERR(kn); 3125 3126 ret = rdtgroup_kn_set_ugid(kn); 3127 if (ret) 3128 goto out_destroy; 3129 ret = mon_add_all_files(kn, d, r, prgrp, snc_mode); 3130 if (ret) 3131 goto out_destroy; 3132 } 3133 3134 if (snc_mode) { 3135 sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id); 3136 ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp); 3137 if (IS_ERR(ckn)) { 3138 ret = -EINVAL; 3139 goto out_destroy; 3140 } 3141 3142 ret = rdtgroup_kn_set_ugid(ckn); 3143 if (ret) 3144 goto out_destroy; 3145 3146 ret = mon_add_all_files(ckn, d, r, prgrp, false); 3147 if (ret) 3148 goto out_destroy; 3149 } 3150 3151 kernfs_activate(kn); 3152 return 0; 3153 3154 out_destroy: 3155 kernfs_remove(kn); 3156 return ret; 3157 } 3158 3159 /* 3160 * Add all subdirectories of mon_data for "ctrl_mon" groups 3161 * and "monitor" groups with given domain id. 3162 */ 3163 static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, 3164 struct rdt_mon_domain *d) 3165 { 3166 struct kernfs_node *parent_kn; 3167 struct rdtgroup *prgrp, *crgrp; 3168 struct list_head *head; 3169 3170 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { 3171 parent_kn = prgrp->mon.mon_data_kn; 3172 mkdir_mondata_subdir(parent_kn, d, r, prgrp); 3173 3174 head = &prgrp->mon.crdtgrp_list; 3175 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 3176 parent_kn = crgrp->mon.mon_data_kn; 3177 mkdir_mondata_subdir(parent_kn, d, r, crgrp); 3178 } 3179 } 3180 } 3181 3182 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, 3183 struct rdt_resource *r, 3184 struct rdtgroup *prgrp) 3185 { 3186 struct rdt_mon_domain *dom; 3187 int ret; 3188 3189 /* Walking r->domains, ensure it can't race with cpuhp */ 3190 lockdep_assert_cpus_held(); 3191 3192 list_for_each_entry(dom, &r->mon_domains, hdr.list) { 3193 ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); 3194 if (ret) 3195 return ret; 3196 } 3197 3198 return 0; 3199 } 3200 3201 /* 3202 * This creates a directory mon_data which contains the monitored data. 3203 * 3204 * mon_data has one directory for each domain which are named 3205 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data 3206 * with L3 domain looks as below: 3207 * ./mon_data: 3208 * mon_L3_00 3209 * mon_L3_01 3210 * mon_L3_02 3211 * ... 3212 * 3213 * Each domain directory has one file per event: 3214 * ./mon_L3_00/: 3215 * llc_occupancy 3216 * 3217 */ 3218 static int mkdir_mondata_all(struct kernfs_node *parent_kn, 3219 struct rdtgroup *prgrp, 3220 struct kernfs_node **dest_kn) 3221 { 3222 struct rdt_resource *r; 3223 struct kernfs_node *kn; 3224 int ret; 3225 3226 /* 3227 * Create the mon_data directory first. 3228 */ 3229 ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); 3230 if (ret) 3231 return ret; 3232 3233 if (dest_kn) 3234 *dest_kn = kn; 3235 3236 /* 3237 * Create the subdirectories for each domain. Note that all events 3238 * in a domain like L3 are grouped into a resource whose domain is L3 3239 */ 3240 for_each_mon_capable_rdt_resource(r) { 3241 ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); 3242 if (ret) 3243 goto out_destroy; 3244 } 3245 3246 return 0; 3247 3248 out_destroy: 3249 kernfs_remove(kn); 3250 return ret; 3251 } 3252 3253 /** 3254 * cbm_ensure_valid - Enforce validity on provided CBM 3255 * @_val: Candidate CBM 3256 * @r: RDT resource to which the CBM belongs 3257 * 3258 * The provided CBM represents all cache portions available for use. This 3259 * may be represented by a bitmap that does not consist of contiguous ones 3260 * and thus be an invalid CBM. 3261 * Here the provided CBM is forced to be a valid CBM by only considering 3262 * the first set of contiguous bits as valid and clearing all bits. 3263 * The intention here is to provide a valid default CBM with which a new 3264 * resource group is initialized. The user can follow this with a 3265 * modification to the CBM if the default does not satisfy the 3266 * requirements. 3267 */ 3268 static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) 3269 { 3270 unsigned int cbm_len = r->cache.cbm_len; 3271 unsigned long first_bit, zero_bit; 3272 unsigned long val = _val; 3273 3274 if (!val) 3275 return 0; 3276 3277 first_bit = find_first_bit(&val, cbm_len); 3278 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); 3279 3280 /* Clear any remaining bits to ensure contiguous region */ 3281 bitmap_clear(&val, zero_bit, cbm_len - zero_bit); 3282 return (u32)val; 3283 } 3284 3285 /* 3286 * Initialize cache resources per RDT domain 3287 * 3288 * Set the RDT domain up to start off with all usable allocations. That is, 3289 * all shareable and unused bits. All-zero CBM is invalid. 3290 */ 3291 static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s, 3292 u32 closid) 3293 { 3294 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); 3295 enum resctrl_conf_type t = s->conf_type; 3296 struct resctrl_staged_config *cfg; 3297 struct rdt_resource *r = s->res; 3298 u32 used_b = 0, unused_b = 0; 3299 unsigned long tmp_cbm; 3300 enum rdtgrp_mode mode; 3301 u32 peer_ctl, ctrl_val; 3302 int i; 3303 3304 cfg = &d->staged_config[t]; 3305 cfg->have_new_ctrl = false; 3306 cfg->new_ctrl = r->cache.shareable_bits; 3307 used_b = r->cache.shareable_bits; 3308 for (i = 0; i < closids_supported(); i++) { 3309 if (closid_allocated(i) && i != closid) { 3310 mode = rdtgroup_mode_by_closid(i); 3311 if (mode == RDT_MODE_PSEUDO_LOCKSETUP) 3312 /* 3313 * ctrl values for locksetup aren't relevant 3314 * until the schemata is written, and the mode 3315 * becomes RDT_MODE_PSEUDO_LOCKED. 3316 */ 3317 continue; 3318 /* 3319 * If CDP is active include peer domain's 3320 * usage to ensure there is no overlap 3321 * with an exclusive group. 3322 */ 3323 if (resctrl_arch_get_cdp_enabled(r->rid)) 3324 peer_ctl = resctrl_arch_get_config(r, d, i, 3325 peer_type); 3326 else 3327 peer_ctl = 0; 3328 ctrl_val = resctrl_arch_get_config(r, d, i, 3329 s->conf_type); 3330 used_b |= ctrl_val | peer_ctl; 3331 if (mode == RDT_MODE_SHAREABLE) 3332 cfg->new_ctrl |= ctrl_val | peer_ctl; 3333 } 3334 } 3335 if (d->plr && d->plr->cbm > 0) 3336 used_b |= d->plr->cbm; 3337 unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); 3338 unused_b &= BIT_MASK(r->cache.cbm_len) - 1; 3339 cfg->new_ctrl |= unused_b; 3340 /* 3341 * Force the initial CBM to be valid, user can 3342 * modify the CBM based on system availability. 3343 */ 3344 cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r); 3345 /* 3346 * Assign the u32 CBM to an unsigned long to ensure that 3347 * bitmap_weight() does not access out-of-bound memory. 3348 */ 3349 tmp_cbm = cfg->new_ctrl; 3350 if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { 3351 rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id); 3352 return -ENOSPC; 3353 } 3354 cfg->have_new_ctrl = true; 3355 3356 return 0; 3357 } 3358 3359 /* 3360 * Initialize cache resources with default values. 3361 * 3362 * A new RDT group is being created on an allocation capable (CAT) 3363 * supporting system. Set this group up to start off with all usable 3364 * allocations. 3365 * 3366 * If there are no more shareable bits available on any domain then 3367 * the entire allocation will fail. 3368 */ 3369 static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) 3370 { 3371 struct rdt_ctrl_domain *d; 3372 int ret; 3373 3374 list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) { 3375 ret = __init_one_rdt_domain(d, s, closid); 3376 if (ret < 0) 3377 return ret; 3378 } 3379 3380 return 0; 3381 } 3382 3383 /* Initialize MBA resource with default values. */ 3384 static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) 3385 { 3386 struct resctrl_staged_config *cfg; 3387 struct rdt_ctrl_domain *d; 3388 3389 list_for_each_entry(d, &r->ctrl_domains, hdr.list) { 3390 if (is_mba_sc(r)) { 3391 d->mbps_val[closid] = MBA_MAX_MBPS; 3392 continue; 3393 } 3394 3395 cfg = &d->staged_config[CDP_NONE]; 3396 cfg->new_ctrl = r->default_ctrl; 3397 cfg->have_new_ctrl = true; 3398 } 3399 } 3400 3401 /* Initialize the RDT group's allocations. */ 3402 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) 3403 { 3404 struct resctrl_schema *s; 3405 struct rdt_resource *r; 3406 int ret = 0; 3407 3408 rdt_staged_configs_clear(); 3409 3410 list_for_each_entry(s, &resctrl_schema_all, list) { 3411 r = s->res; 3412 if (r->rid == RDT_RESOURCE_MBA || 3413 r->rid == RDT_RESOURCE_SMBA) { 3414 rdtgroup_init_mba(r, rdtgrp->closid); 3415 if (is_mba_sc(r)) 3416 continue; 3417 } else { 3418 ret = rdtgroup_init_cat(s, rdtgrp->closid); 3419 if (ret < 0) 3420 goto out; 3421 } 3422 3423 ret = resctrl_arch_update_domains(r, rdtgrp->closid); 3424 if (ret < 0) { 3425 rdt_last_cmd_puts("Failed to initialize allocations\n"); 3426 goto out; 3427 } 3428 3429 } 3430 3431 rdtgrp->mode = RDT_MODE_SHAREABLE; 3432 3433 out: 3434 rdt_staged_configs_clear(); 3435 return ret; 3436 } 3437 3438 static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) 3439 { 3440 int ret; 3441 3442 if (!resctrl_arch_mon_capable()) 3443 return 0; 3444 3445 ret = alloc_rmid(rdtgrp->closid); 3446 if (ret < 0) { 3447 rdt_last_cmd_puts("Out of RMIDs\n"); 3448 return ret; 3449 } 3450 rdtgrp->mon.rmid = ret; 3451 3452 ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn); 3453 if (ret) { 3454 rdt_last_cmd_puts("kernfs subdir error\n"); 3455 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 3456 return ret; 3457 } 3458 3459 return 0; 3460 } 3461 3462 static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) 3463 { 3464 if (resctrl_arch_mon_capable()) 3465 free_rmid(rgrp->closid, rgrp->mon.rmid); 3466 } 3467 3468 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, 3469 const char *name, umode_t mode, 3470 enum rdt_group_type rtype, struct rdtgroup **r) 3471 { 3472 struct rdtgroup *prdtgrp, *rdtgrp; 3473 unsigned long files = 0; 3474 struct kernfs_node *kn; 3475 int ret; 3476 3477 prdtgrp = rdtgroup_kn_lock_live(parent_kn); 3478 if (!prdtgrp) { 3479 ret = -ENODEV; 3480 goto out_unlock; 3481 } 3482 3483 if (rtype == RDTMON_GROUP && 3484 (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 3485 prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { 3486 ret = -EINVAL; 3487 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 3488 goto out_unlock; 3489 } 3490 3491 /* allocate the rdtgroup. */ 3492 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); 3493 if (!rdtgrp) { 3494 ret = -ENOSPC; 3495 rdt_last_cmd_puts("Kernel out of memory\n"); 3496 goto out_unlock; 3497 } 3498 *r = rdtgrp; 3499 rdtgrp->mon.parent = prdtgrp; 3500 rdtgrp->type = rtype; 3501 INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list); 3502 3503 /* kernfs creates the directory for rdtgrp */ 3504 kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); 3505 if (IS_ERR(kn)) { 3506 ret = PTR_ERR(kn); 3507 rdt_last_cmd_puts("kernfs create error\n"); 3508 goto out_free_rgrp; 3509 } 3510 rdtgrp->kn = kn; 3511 3512 /* 3513 * kernfs_remove() will drop the reference count on "kn" which 3514 * will free it. But we still need it to stick around for the 3515 * rdtgroup_kn_unlock(kn) call. Take one extra reference here, 3516 * which will be dropped by kernfs_put() in rdtgroup_remove(). 3517 */ 3518 kernfs_get(kn); 3519 3520 ret = rdtgroup_kn_set_ugid(kn); 3521 if (ret) { 3522 rdt_last_cmd_puts("kernfs perm error\n"); 3523 goto out_destroy; 3524 } 3525 3526 if (rtype == RDTCTRL_GROUP) { 3527 files = RFTYPE_BASE | RFTYPE_CTRL; 3528 if (resctrl_arch_mon_capable()) 3529 files |= RFTYPE_MON; 3530 } else { 3531 files = RFTYPE_BASE | RFTYPE_MON; 3532 } 3533 3534 ret = rdtgroup_add_files(kn, files); 3535 if (ret) { 3536 rdt_last_cmd_puts("kernfs fill error\n"); 3537 goto out_destroy; 3538 } 3539 3540 /* 3541 * The caller unlocks the parent_kn upon success. 3542 */ 3543 return 0; 3544 3545 out_destroy: 3546 kernfs_put(rdtgrp->kn); 3547 kernfs_remove(rdtgrp->kn); 3548 out_free_rgrp: 3549 kfree(rdtgrp); 3550 out_unlock: 3551 rdtgroup_kn_unlock(parent_kn); 3552 return ret; 3553 } 3554 3555 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) 3556 { 3557 kernfs_remove(rgrp->kn); 3558 rdtgroup_remove(rgrp); 3559 } 3560 3561 /* 3562 * Create a monitor group under "mon_groups" directory of a control 3563 * and monitor group(ctrl_mon). This is a resource group 3564 * to monitor a subset of tasks and cpus in its parent ctrl_mon group. 3565 */ 3566 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, 3567 const char *name, umode_t mode) 3568 { 3569 struct rdtgroup *rdtgrp, *prgrp; 3570 int ret; 3571 3572 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp); 3573 if (ret) 3574 return ret; 3575 3576 prgrp = rdtgrp->mon.parent; 3577 rdtgrp->closid = prgrp->closid; 3578 3579 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); 3580 if (ret) { 3581 mkdir_rdt_prepare_clean(rdtgrp); 3582 goto out_unlock; 3583 } 3584 3585 kernfs_activate(rdtgrp->kn); 3586 3587 /* 3588 * Add the rdtgrp to the list of rdtgrps the parent 3589 * ctrl_mon group has to track. 3590 */ 3591 list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); 3592 3593 out_unlock: 3594 rdtgroup_kn_unlock(parent_kn); 3595 return ret; 3596 } 3597 3598 /* 3599 * These are rdtgroups created under the root directory. Can be used 3600 * to allocate and monitor resources. 3601 */ 3602 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, 3603 const char *name, umode_t mode) 3604 { 3605 struct rdtgroup *rdtgrp; 3606 struct kernfs_node *kn; 3607 u32 closid; 3608 int ret; 3609 3610 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp); 3611 if (ret) 3612 return ret; 3613 3614 kn = rdtgrp->kn; 3615 ret = closid_alloc(); 3616 if (ret < 0) { 3617 rdt_last_cmd_puts("Out of CLOSIDs\n"); 3618 goto out_common_fail; 3619 } 3620 closid = ret; 3621 ret = 0; 3622 3623 rdtgrp->closid = closid; 3624 3625 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); 3626 if (ret) 3627 goto out_closid_free; 3628 3629 kernfs_activate(rdtgrp->kn); 3630 3631 ret = rdtgroup_init_alloc(rdtgrp); 3632 if (ret < 0) 3633 goto out_rmid_free; 3634 3635 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); 3636 3637 if (resctrl_arch_mon_capable()) { 3638 /* 3639 * Create an empty mon_groups directory to hold the subset 3640 * of tasks and cpus to monitor. 3641 */ 3642 ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); 3643 if (ret) { 3644 rdt_last_cmd_puts("kernfs subdir error\n"); 3645 goto out_del_list; 3646 } 3647 if (is_mba_sc(NULL)) 3648 rdtgrp->mba_mbps_event = mba_mbps_default_event; 3649 } 3650 3651 goto out_unlock; 3652 3653 out_del_list: 3654 list_del(&rdtgrp->rdtgroup_list); 3655 out_rmid_free: 3656 mkdir_rdt_prepare_rmid_free(rdtgrp); 3657 out_closid_free: 3658 closid_free(closid); 3659 out_common_fail: 3660 mkdir_rdt_prepare_clean(rdtgrp); 3661 out_unlock: 3662 rdtgroup_kn_unlock(parent_kn); 3663 return ret; 3664 } 3665 3666 /* 3667 * We allow creating mon groups only with in a directory called "mon_groups" 3668 * which is present in every ctrl_mon group. Check if this is a valid 3669 * "mon_groups" directory. 3670 * 3671 * 1. The directory should be named "mon_groups". 3672 * 2. The mon group itself should "not" be named "mon_groups". 3673 * This makes sure "mon_groups" directory always has a ctrl_mon group 3674 * as parent. 3675 */ 3676 static bool is_mon_groups(struct kernfs_node *kn, const char *name) 3677 { 3678 return (!strcmp(rdt_kn_name(kn), "mon_groups") && 3679 strcmp(name, "mon_groups")); 3680 } 3681 3682 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, 3683 umode_t mode) 3684 { 3685 /* Do not accept '\n' to avoid unparsable situation. */ 3686 if (strchr(name, '\n')) 3687 return -EINVAL; 3688 3689 /* 3690 * If the parent directory is the root directory and RDT 3691 * allocation is supported, add a control and monitoring 3692 * subdirectory 3693 */ 3694 if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn) 3695 return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); 3696 3697 /* 3698 * If RDT monitoring is supported and the parent directory is a valid 3699 * "mon_groups" directory, add a monitoring subdirectory. 3700 */ 3701 if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name)) 3702 return rdtgroup_mkdir_mon(parent_kn, name, mode); 3703 3704 return -EPERM; 3705 } 3706 3707 static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) 3708 { 3709 struct rdtgroup *prdtgrp = rdtgrp->mon.parent; 3710 int cpu; 3711 3712 /* Give any tasks back to the parent group */ 3713 rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); 3714 3715 /* Update per cpu rmid of the moved CPUs first */ 3716 for_each_cpu(cpu, &rdtgrp->cpu_mask) 3717 per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; 3718 /* 3719 * Update the MSR on moved CPUs and CPUs which have moved 3720 * task running on them. 3721 */ 3722 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); 3723 update_closid_rmid(tmpmask, NULL); 3724 3725 rdtgrp->flags = RDT_DELETED; 3726 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 3727 3728 /* 3729 * Remove the rdtgrp from the parent ctrl_mon group's list 3730 */ 3731 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); 3732 list_del(&rdtgrp->mon.crdtgrp_list); 3733 3734 kernfs_remove(rdtgrp->kn); 3735 3736 return 0; 3737 } 3738 3739 static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) 3740 { 3741 rdtgrp->flags = RDT_DELETED; 3742 list_del(&rdtgrp->rdtgroup_list); 3743 3744 kernfs_remove(rdtgrp->kn); 3745 return 0; 3746 } 3747 3748 static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) 3749 { 3750 int cpu; 3751 3752 /* Give any tasks back to the default group */ 3753 rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); 3754 3755 /* Give any CPUs back to the default group */ 3756 cpumask_or(&rdtgroup_default.cpu_mask, 3757 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); 3758 3759 /* Update per cpu closid and rmid of the moved CPUs first */ 3760 for_each_cpu(cpu, &rdtgrp->cpu_mask) { 3761 per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; 3762 per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; 3763 } 3764 3765 /* 3766 * Update the MSR on moved CPUs and CPUs which have moved 3767 * task running on them. 3768 */ 3769 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); 3770 update_closid_rmid(tmpmask, NULL); 3771 3772 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 3773 closid_free(rdtgrp->closid); 3774 3775 rdtgroup_ctrl_remove(rdtgrp); 3776 3777 /* 3778 * Free all the child monitor group rmids. 3779 */ 3780 free_all_child_rdtgrp(rdtgrp); 3781 3782 return 0; 3783 } 3784 3785 static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn) 3786 { 3787 /* 3788 * Valid within the RCU section it was obtained or while rdtgroup_mutex 3789 * is held. 3790 */ 3791 return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex)); 3792 } 3793 3794 static int rdtgroup_rmdir(struct kernfs_node *kn) 3795 { 3796 struct kernfs_node *parent_kn; 3797 struct rdtgroup *rdtgrp; 3798 cpumask_var_t tmpmask; 3799 int ret = 0; 3800 3801 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) 3802 return -ENOMEM; 3803 3804 rdtgrp = rdtgroup_kn_lock_live(kn); 3805 if (!rdtgrp) { 3806 ret = -EPERM; 3807 goto out; 3808 } 3809 parent_kn = rdt_kn_parent(kn); 3810 3811 /* 3812 * If the rdtgroup is a ctrl_mon group and parent directory 3813 * is the root directory, remove the ctrl_mon group. 3814 * 3815 * If the rdtgroup is a mon group and parent directory 3816 * is a valid "mon_groups" directory, remove the mon group. 3817 */ 3818 if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && 3819 rdtgrp != &rdtgroup_default) { 3820 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 3821 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 3822 ret = rdtgroup_ctrl_remove(rdtgrp); 3823 } else { 3824 ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); 3825 } 3826 } else if (rdtgrp->type == RDTMON_GROUP && 3827 is_mon_groups(parent_kn, rdt_kn_name(kn))) { 3828 ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); 3829 } else { 3830 ret = -EPERM; 3831 } 3832 3833 out: 3834 rdtgroup_kn_unlock(kn); 3835 free_cpumask_var(tmpmask); 3836 return ret; 3837 } 3838 3839 /** 3840 * mongrp_reparent() - replace parent CTRL_MON group of a MON group 3841 * @rdtgrp: the MON group whose parent should be replaced 3842 * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp 3843 * @cpus: cpumask provided by the caller for use during this call 3844 * 3845 * Replaces the parent CTRL_MON group for a MON group, resulting in all member 3846 * tasks' CLOSID immediately changing to that of the new parent group. 3847 * Monitoring data for the group is unaffected by this operation. 3848 */ 3849 static void mongrp_reparent(struct rdtgroup *rdtgrp, 3850 struct rdtgroup *new_prdtgrp, 3851 cpumask_var_t cpus) 3852 { 3853 struct rdtgroup *prdtgrp = rdtgrp->mon.parent; 3854 3855 WARN_ON(rdtgrp->type != RDTMON_GROUP); 3856 WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP); 3857 3858 /* Nothing to do when simply renaming a MON group. */ 3859 if (prdtgrp == new_prdtgrp) 3860 return; 3861 3862 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); 3863 list_move_tail(&rdtgrp->mon.crdtgrp_list, 3864 &new_prdtgrp->mon.crdtgrp_list); 3865 3866 rdtgrp->mon.parent = new_prdtgrp; 3867 rdtgrp->closid = new_prdtgrp->closid; 3868 3869 /* Propagate updated closid to all tasks in this group. */ 3870 rdt_move_group_tasks(rdtgrp, rdtgrp, cpus); 3871 3872 update_closid_rmid(cpus, NULL); 3873 } 3874 3875 static int rdtgroup_rename(struct kernfs_node *kn, 3876 struct kernfs_node *new_parent, const char *new_name) 3877 { 3878 struct kernfs_node *kn_parent; 3879 struct rdtgroup *new_prdtgrp; 3880 struct rdtgroup *rdtgrp; 3881 cpumask_var_t tmpmask; 3882 int ret; 3883 3884 rdtgrp = kernfs_to_rdtgroup(kn); 3885 new_prdtgrp = kernfs_to_rdtgroup(new_parent); 3886 if (!rdtgrp || !new_prdtgrp) 3887 return -ENOENT; 3888 3889 /* Release both kernfs active_refs before obtaining rdtgroup mutex. */ 3890 rdtgroup_kn_get(rdtgrp, kn); 3891 rdtgroup_kn_get(new_prdtgrp, new_parent); 3892 3893 mutex_lock(&rdtgroup_mutex); 3894 3895 rdt_last_cmd_clear(); 3896 3897 /* 3898 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if 3899 * either kernfs_node is a file. 3900 */ 3901 if (kernfs_type(kn) != KERNFS_DIR || 3902 kernfs_type(new_parent) != KERNFS_DIR) { 3903 rdt_last_cmd_puts("Source and destination must be directories"); 3904 ret = -EPERM; 3905 goto out; 3906 } 3907 3908 if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) { 3909 ret = -ENOENT; 3910 goto out; 3911 } 3912 3913 kn_parent = rdt_kn_parent(kn); 3914 if (rdtgrp->type != RDTMON_GROUP || !kn_parent || 3915 !is_mon_groups(kn_parent, rdt_kn_name(kn))) { 3916 rdt_last_cmd_puts("Source must be a MON group\n"); 3917 ret = -EPERM; 3918 goto out; 3919 } 3920 3921 if (!is_mon_groups(new_parent, new_name)) { 3922 rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n"); 3923 ret = -EPERM; 3924 goto out; 3925 } 3926 3927 /* 3928 * If the MON group is monitoring CPUs, the CPUs must be assigned to the 3929 * current parent CTRL_MON group and therefore cannot be assigned to 3930 * the new parent, making the move illegal. 3931 */ 3932 if (!cpumask_empty(&rdtgrp->cpu_mask) && 3933 rdtgrp->mon.parent != new_prdtgrp) { 3934 rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n"); 3935 ret = -EPERM; 3936 goto out; 3937 } 3938 3939 /* 3940 * Allocate the cpumask for use in mongrp_reparent() to avoid the 3941 * possibility of failing to allocate it after kernfs_rename() has 3942 * succeeded. 3943 */ 3944 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) { 3945 ret = -ENOMEM; 3946 goto out; 3947 } 3948 3949 /* 3950 * Perform all input validation and allocations needed to ensure 3951 * mongrp_reparent() will succeed before calling kernfs_rename(), 3952 * otherwise it would be necessary to revert this call if 3953 * mongrp_reparent() failed. 3954 */ 3955 ret = kernfs_rename(kn, new_parent, new_name); 3956 if (!ret) 3957 mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask); 3958 3959 free_cpumask_var(tmpmask); 3960 3961 out: 3962 mutex_unlock(&rdtgroup_mutex); 3963 rdtgroup_kn_put(rdtgrp, kn); 3964 rdtgroup_kn_put(new_prdtgrp, new_parent); 3965 return ret; 3966 } 3967 3968 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) 3969 { 3970 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) 3971 seq_puts(seq, ",cdp"); 3972 3973 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) 3974 seq_puts(seq, ",cdpl2"); 3975 3976 if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) 3977 seq_puts(seq, ",mba_MBps"); 3978 3979 if (resctrl_debug) 3980 seq_puts(seq, ",debug"); 3981 3982 return 0; 3983 } 3984 3985 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { 3986 .mkdir = rdtgroup_mkdir, 3987 .rmdir = rdtgroup_rmdir, 3988 .rename = rdtgroup_rename, 3989 .show_options = rdtgroup_show_options, 3990 }; 3991 3992 static int rdtgroup_setup_root(struct rdt_fs_context *ctx) 3993 { 3994 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, 3995 KERNFS_ROOT_CREATE_DEACTIVATED | 3996 KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, 3997 &rdtgroup_default); 3998 if (IS_ERR(rdt_root)) 3999 return PTR_ERR(rdt_root); 4000 4001 ctx->kfc.root = rdt_root; 4002 rdtgroup_default.kn = kernfs_root_to_node(rdt_root); 4003 4004 return 0; 4005 } 4006 4007 static void rdtgroup_destroy_root(void) 4008 { 4009 kernfs_destroy_root(rdt_root); 4010 rdtgroup_default.kn = NULL; 4011 } 4012 4013 static void __init rdtgroup_setup_default(void) 4014 { 4015 mutex_lock(&rdtgroup_mutex); 4016 4017 rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; 4018 rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; 4019 rdtgroup_default.type = RDTCTRL_GROUP; 4020 INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); 4021 4022 list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); 4023 4024 mutex_unlock(&rdtgroup_mutex); 4025 } 4026 4027 static void domain_destroy_mon_state(struct rdt_mon_domain *d) 4028 { 4029 bitmap_free(d->rmid_busy_llc); 4030 kfree(d->mbm_total); 4031 kfree(d->mbm_local); 4032 } 4033 4034 void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) 4035 { 4036 mutex_lock(&rdtgroup_mutex); 4037 4038 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) 4039 mba_sc_domain_destroy(r, d); 4040 4041 mutex_unlock(&rdtgroup_mutex); 4042 } 4043 4044 void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) 4045 { 4046 mutex_lock(&rdtgroup_mutex); 4047 4048 /* 4049 * If resctrl is mounted, remove all the 4050 * per domain monitor data directories. 4051 */ 4052 if (resctrl_mounted && resctrl_arch_mon_capable()) 4053 rmdir_mondata_subdir_allrdtgrp(r, d); 4054 4055 if (is_mbm_enabled()) 4056 cancel_delayed_work(&d->mbm_over); 4057 if (is_llc_occupancy_enabled() && has_busy_rmid(d)) { 4058 /* 4059 * When a package is going down, forcefully 4060 * decrement rmid->ebusy. There is no way to know 4061 * that the L3 was flushed and hence may lead to 4062 * incorrect counts in rare scenarios, but leaving 4063 * the RMID as busy creates RMID leaks if the 4064 * package never comes back. 4065 */ 4066 __check_limbo(d, true); 4067 cancel_delayed_work(&d->cqm_limbo); 4068 } 4069 4070 domain_destroy_mon_state(d); 4071 4072 mutex_unlock(&rdtgroup_mutex); 4073 } 4074 4075 static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) 4076 { 4077 u32 idx_limit = resctrl_arch_system_num_rmid_idx(); 4078 size_t tsize; 4079 4080 if (is_llc_occupancy_enabled()) { 4081 d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); 4082 if (!d->rmid_busy_llc) 4083 return -ENOMEM; 4084 } 4085 if (is_mbm_total_enabled()) { 4086 tsize = sizeof(*d->mbm_total); 4087 d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); 4088 if (!d->mbm_total) { 4089 bitmap_free(d->rmid_busy_llc); 4090 return -ENOMEM; 4091 } 4092 } 4093 if (is_mbm_local_enabled()) { 4094 tsize = sizeof(*d->mbm_local); 4095 d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); 4096 if (!d->mbm_local) { 4097 bitmap_free(d->rmid_busy_llc); 4098 kfree(d->mbm_total); 4099 return -ENOMEM; 4100 } 4101 } 4102 4103 return 0; 4104 } 4105 4106 int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) 4107 { 4108 int err = 0; 4109 4110 mutex_lock(&rdtgroup_mutex); 4111 4112 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) { 4113 /* RDT_RESOURCE_MBA is never mon_capable */ 4114 err = mba_sc_domain_allocate(r, d); 4115 } 4116 4117 mutex_unlock(&rdtgroup_mutex); 4118 4119 return err; 4120 } 4121 4122 int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) 4123 { 4124 int err; 4125 4126 mutex_lock(&rdtgroup_mutex); 4127 4128 err = domain_setup_mon_state(r, d); 4129 if (err) 4130 goto out_unlock; 4131 4132 if (is_mbm_enabled()) { 4133 INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); 4134 mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL, 4135 RESCTRL_PICK_ANY_CPU); 4136 } 4137 4138 if (is_llc_occupancy_enabled()) 4139 INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); 4140 4141 /* 4142 * If the filesystem is not mounted then only the default resource group 4143 * exists. Creation of its directories is deferred until mount time 4144 * by rdt_get_tree() calling mkdir_mondata_all(). 4145 * If resctrl is mounted, add per domain monitor data directories. 4146 */ 4147 if (resctrl_mounted && resctrl_arch_mon_capable()) 4148 mkdir_mondata_subdir_allrdtgrp(r, d); 4149 4150 out_unlock: 4151 mutex_unlock(&rdtgroup_mutex); 4152 4153 return err; 4154 } 4155 4156 void resctrl_online_cpu(unsigned int cpu) 4157 { 4158 mutex_lock(&rdtgroup_mutex); 4159 /* The CPU is set in default rdtgroup after online. */ 4160 cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); 4161 mutex_unlock(&rdtgroup_mutex); 4162 } 4163 4164 static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) 4165 { 4166 struct rdtgroup *cr; 4167 4168 list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) { 4169 if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) 4170 break; 4171 } 4172 } 4173 4174 void resctrl_offline_cpu(unsigned int cpu) 4175 { 4176 struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 4177 struct rdt_mon_domain *d; 4178 struct rdtgroup *rdtgrp; 4179 4180 mutex_lock(&rdtgroup_mutex); 4181 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { 4182 if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { 4183 clear_childcpus(rdtgrp, cpu); 4184 break; 4185 } 4186 } 4187 4188 if (!l3->mon_capable) 4189 goto out_unlock; 4190 4191 d = get_mon_domain_from_cpu(cpu, l3); 4192 if (d) { 4193 if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { 4194 cancel_delayed_work(&d->mbm_over); 4195 mbm_setup_overflow_handler(d, 0, cpu); 4196 } 4197 if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && 4198 has_busy_rmid(d)) { 4199 cancel_delayed_work(&d->cqm_limbo); 4200 cqm_setup_limbo_handler(d, 0, cpu); 4201 } 4202 } 4203 4204 out_unlock: 4205 mutex_unlock(&rdtgroup_mutex); 4206 } 4207 4208 /* 4209 * rdtgroup_init - rdtgroup initialization 4210 * 4211 * Setup resctrl file system including set up root, create mount point, 4212 * register rdtgroup filesystem, and initialize files under root directory. 4213 * 4214 * Return: 0 on success or -errno 4215 */ 4216 int __init rdtgroup_init(void) 4217 { 4218 int ret = 0; 4219 4220 seq_buf_init(&last_cmd_status, last_cmd_status_buf, 4221 sizeof(last_cmd_status_buf)); 4222 4223 rdtgroup_setup_default(); 4224 4225 ret = sysfs_create_mount_point(fs_kobj, "resctrl"); 4226 if (ret) 4227 return ret; 4228 4229 ret = register_filesystem(&rdt_fs_type); 4230 if (ret) 4231 goto cleanup_mountpoint; 4232 4233 /* 4234 * Adding the resctrl debugfs directory here may not be ideal since 4235 * it would let the resctrl debugfs directory appear on the debugfs 4236 * filesystem before the resctrl filesystem is mounted. 4237 * It may also be ok since that would enable debugging of RDT before 4238 * resctrl is mounted. 4239 * The reason why the debugfs directory is created here and not in 4240 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and 4241 * during the debugfs directory creation also &sb->s_type->i_mutex_key 4242 * (the lockdep class of inode->i_rwsem). Other filesystem 4243 * interactions (eg. SyS_getdents) have the lock ordering: 4244 * &sb->s_type->i_mutex_key --> &mm->mmap_lock 4245 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex 4246 * is taken, thus creating dependency: 4247 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause 4248 * issues considering the other two lock dependencies. 4249 * By creating the debugfs directory here we avoid a dependency 4250 * that may cause deadlock (even though file operations cannot 4251 * occur until the filesystem is mounted, but I do not know how to 4252 * tell lockdep that). 4253 */ 4254 debugfs_resctrl = debugfs_create_dir("resctrl", NULL); 4255 4256 return 0; 4257 4258 cleanup_mountpoint: 4259 sysfs_remove_mount_point(fs_kobj, "resctrl"); 4260 4261 return ret; 4262 } 4263 4264 void __exit rdtgroup_exit(void) 4265 { 4266 debugfs_remove_recursive(debugfs_resctrl); 4267 unregister_filesystem(&rdt_fs_type); 4268 sysfs_remove_mount_point(fs_kobj, "resctrl"); 4269 } 4270