1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * User interface for Resource Allocation in Resource Director Technology(RDT) 4 * 5 * Copyright (C) 2016 Intel Corporation 6 * 7 * Author: Fenghua Yu <fenghua.yu@intel.com> 8 * 9 * More information about RDT be found in the Intel (R) x86 Architecture 10 * Software Developer Manual. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/cacheinfo.h> 16 #include <linux/cpu.h> 17 #include <linux/debugfs.h> 18 #include <linux/fs.h> 19 #include <linux/fs_parser.h> 20 #include <linux/sysfs.h> 21 #include <linux/kernfs.h> 22 #include <linux/seq_buf.h> 23 #include <linux/seq_file.h> 24 #include <linux/sched/signal.h> 25 #include <linux/sched/task.h> 26 #include <linux/slab.h> 27 #include <linux/task_work.h> 28 #include <linux/user_namespace.h> 29 30 #include <uapi/linux/magic.h> 31 32 #include <asm/resctrl.h> 33 #include "internal.h" 34 35 DEFINE_STATIC_KEY_FALSE(rdt_enable_key); 36 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key); 37 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key); 38 39 /* Mutex to protect rdtgroup access. */ 40 DEFINE_MUTEX(rdtgroup_mutex); 41 42 static struct kernfs_root *rdt_root; 43 struct rdtgroup rdtgroup_default; 44 LIST_HEAD(rdt_all_groups); 45 46 /* list of entries for the schemata file */ 47 LIST_HEAD(resctrl_schema_all); 48 49 /* The filesystem can only be mounted once. */ 50 bool resctrl_mounted; 51 52 /* Kernel fs node for "info" directory under root */ 53 static struct kernfs_node *kn_info; 54 55 /* Kernel fs node for "mon_groups" directory under root */ 56 static struct kernfs_node *kn_mongrp; 57 58 /* Kernel fs node for "mon_data" directory under root */ 59 static struct kernfs_node *kn_mondata; 60 61 static struct seq_buf last_cmd_status; 62 static char last_cmd_status_buf[512]; 63 64 static int rdtgroup_setup_root(struct rdt_fs_context *ctx); 65 static void rdtgroup_destroy_root(void); 66 67 struct dentry *debugfs_resctrl; 68 69 static bool resctrl_debug; 70 71 void rdt_last_cmd_clear(void) 72 { 73 lockdep_assert_held(&rdtgroup_mutex); 74 seq_buf_clear(&last_cmd_status); 75 } 76 77 void rdt_last_cmd_puts(const char *s) 78 { 79 lockdep_assert_held(&rdtgroup_mutex); 80 seq_buf_puts(&last_cmd_status, s); 81 } 82 83 void rdt_last_cmd_printf(const char *fmt, ...) 84 { 85 va_list ap; 86 87 va_start(ap, fmt); 88 lockdep_assert_held(&rdtgroup_mutex); 89 seq_buf_vprintf(&last_cmd_status, fmt, ap); 90 va_end(ap); 91 } 92 93 void rdt_staged_configs_clear(void) 94 { 95 struct rdt_resource *r; 96 struct rdt_domain *dom; 97 98 lockdep_assert_held(&rdtgroup_mutex); 99 100 for_each_alloc_capable_rdt_resource(r) { 101 list_for_each_entry(dom, &r->domains, list) 102 memset(dom->staged_config, 0, sizeof(dom->staged_config)); 103 } 104 } 105 106 /* 107 * Trivial allocator for CLOSIDs. Since h/w only supports a small number, 108 * we can keep a bitmap of free CLOSIDs in a single integer. 109 * 110 * Using a global CLOSID across all resources has some advantages and 111 * some drawbacks: 112 * + We can simply set current's closid to assign a task to a resource 113 * group. 114 * + Context switch code can avoid extra memory references deciding which 115 * CLOSID to load into the PQR_ASSOC MSR 116 * - We give up some options in configuring resource groups across multi-socket 117 * systems. 118 * - Our choices on how to configure each resource become progressively more 119 * limited as the number of resources grows. 120 */ 121 static unsigned long closid_free_map; 122 static int closid_free_map_len; 123 124 int closids_supported(void) 125 { 126 return closid_free_map_len; 127 } 128 129 static void closid_init(void) 130 { 131 struct resctrl_schema *s; 132 u32 rdt_min_closid = 32; 133 134 /* Compute rdt_min_closid across all resources */ 135 list_for_each_entry(s, &resctrl_schema_all, list) 136 rdt_min_closid = min(rdt_min_closid, s->num_closid); 137 138 closid_free_map = BIT_MASK(rdt_min_closid) - 1; 139 140 /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */ 141 __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map); 142 closid_free_map_len = rdt_min_closid; 143 } 144 145 static int closid_alloc(void) 146 { 147 int cleanest_closid; 148 u32 closid; 149 150 lockdep_assert_held(&rdtgroup_mutex); 151 152 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { 153 cleanest_closid = resctrl_find_cleanest_closid(); 154 if (cleanest_closid < 0) 155 return cleanest_closid; 156 closid = cleanest_closid; 157 } else { 158 closid = ffs(closid_free_map); 159 if (closid == 0) 160 return -ENOSPC; 161 closid--; 162 } 163 __clear_bit(closid, &closid_free_map); 164 165 return closid; 166 } 167 168 void closid_free(int closid) 169 { 170 lockdep_assert_held(&rdtgroup_mutex); 171 172 __set_bit(closid, &closid_free_map); 173 } 174 175 /** 176 * closid_allocated - test if provided closid is in use 177 * @closid: closid to be tested 178 * 179 * Return: true if @closid is currently associated with a resource group, 180 * false if @closid is free 181 */ 182 bool closid_allocated(unsigned int closid) 183 { 184 lockdep_assert_held(&rdtgroup_mutex); 185 186 return !test_bit(closid, &closid_free_map); 187 } 188 189 /** 190 * rdtgroup_mode_by_closid - Return mode of resource group with closid 191 * @closid: closid if the resource group 192 * 193 * Each resource group is associated with a @closid. Here the mode 194 * of a resource group can be queried by searching for it using its closid. 195 * 196 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid 197 */ 198 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid) 199 { 200 struct rdtgroup *rdtgrp; 201 202 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { 203 if (rdtgrp->closid == closid) 204 return rdtgrp->mode; 205 } 206 207 return RDT_NUM_MODES; 208 } 209 210 static const char * const rdt_mode_str[] = { 211 [RDT_MODE_SHAREABLE] = "shareable", 212 [RDT_MODE_EXCLUSIVE] = "exclusive", 213 [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup", 214 [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked", 215 }; 216 217 /** 218 * rdtgroup_mode_str - Return the string representation of mode 219 * @mode: the resource group mode as &enum rdtgroup_mode 220 * 221 * Return: string representation of valid mode, "unknown" otherwise 222 */ 223 static const char *rdtgroup_mode_str(enum rdtgrp_mode mode) 224 { 225 if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES) 226 return "unknown"; 227 228 return rdt_mode_str[mode]; 229 } 230 231 /* set uid and gid of rdtgroup dirs and files to that of the creator */ 232 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) 233 { 234 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, 235 .ia_uid = current_fsuid(), 236 .ia_gid = current_fsgid(), }; 237 238 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && 239 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) 240 return 0; 241 242 return kernfs_setattr(kn, &iattr); 243 } 244 245 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) 246 { 247 struct kernfs_node *kn; 248 int ret; 249 250 kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, 251 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 252 0, rft->kf_ops, rft, NULL, NULL); 253 if (IS_ERR(kn)) 254 return PTR_ERR(kn); 255 256 ret = rdtgroup_kn_set_ugid(kn); 257 if (ret) { 258 kernfs_remove(kn); 259 return ret; 260 } 261 262 return 0; 263 } 264 265 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) 266 { 267 struct kernfs_open_file *of = m->private; 268 struct rftype *rft = of->kn->priv; 269 270 if (rft->seq_show) 271 return rft->seq_show(of, m, arg); 272 return 0; 273 } 274 275 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, 276 size_t nbytes, loff_t off) 277 { 278 struct rftype *rft = of->kn->priv; 279 280 if (rft->write) 281 return rft->write(of, buf, nbytes, off); 282 283 return -EINVAL; 284 } 285 286 static const struct kernfs_ops rdtgroup_kf_single_ops = { 287 .atomic_write_len = PAGE_SIZE, 288 .write = rdtgroup_file_write, 289 .seq_show = rdtgroup_seqfile_show, 290 }; 291 292 static const struct kernfs_ops kf_mondata_ops = { 293 .atomic_write_len = PAGE_SIZE, 294 .seq_show = rdtgroup_mondata_show, 295 }; 296 297 static bool is_cpu_list(struct kernfs_open_file *of) 298 { 299 struct rftype *rft = of->kn->priv; 300 301 return rft->flags & RFTYPE_FLAGS_CPUS_LIST; 302 } 303 304 static int rdtgroup_cpus_show(struct kernfs_open_file *of, 305 struct seq_file *s, void *v) 306 { 307 struct rdtgroup *rdtgrp; 308 struct cpumask *mask; 309 int ret = 0; 310 311 rdtgrp = rdtgroup_kn_lock_live(of->kn); 312 313 if (rdtgrp) { 314 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 315 if (!rdtgrp->plr->d) { 316 rdt_last_cmd_clear(); 317 rdt_last_cmd_puts("Cache domain offline\n"); 318 ret = -ENODEV; 319 } else { 320 mask = &rdtgrp->plr->d->cpu_mask; 321 seq_printf(s, is_cpu_list(of) ? 322 "%*pbl\n" : "%*pb\n", 323 cpumask_pr_args(mask)); 324 } 325 } else { 326 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", 327 cpumask_pr_args(&rdtgrp->cpu_mask)); 328 } 329 } else { 330 ret = -ENOENT; 331 } 332 rdtgroup_kn_unlock(of->kn); 333 334 return ret; 335 } 336 337 /* 338 * This is safe against resctrl_sched_in() called from __switch_to() 339 * because __switch_to() is executed with interrupts disabled. A local call 340 * from update_closid_rmid() is protected against __switch_to() because 341 * preemption is disabled. 342 */ 343 static void update_cpu_closid_rmid(void *info) 344 { 345 struct rdtgroup *r = info; 346 347 if (r) { 348 this_cpu_write(pqr_state.default_closid, r->closid); 349 this_cpu_write(pqr_state.default_rmid, r->mon.rmid); 350 } 351 352 /* 353 * We cannot unconditionally write the MSR because the current 354 * executing task might have its own closid selected. Just reuse 355 * the context switch code. 356 */ 357 resctrl_sched_in(current); 358 } 359 360 /* 361 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, 362 * 363 * Per task closids/rmids must have been set up before calling this function. 364 */ 365 static void 366 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) 367 { 368 on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1); 369 } 370 371 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, 372 cpumask_var_t tmpmask) 373 { 374 struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp; 375 struct list_head *head; 376 377 /* Check whether cpus belong to parent ctrl group */ 378 cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); 379 if (!cpumask_empty(tmpmask)) { 380 rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n"); 381 return -EINVAL; 382 } 383 384 /* Check whether cpus are dropped from this group */ 385 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); 386 if (!cpumask_empty(tmpmask)) { 387 /* Give any dropped cpus to parent rdtgroup */ 388 cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask); 389 update_closid_rmid(tmpmask, prgrp); 390 } 391 392 /* 393 * If we added cpus, remove them from previous group that owned them 394 * and update per-cpu rmid 395 */ 396 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); 397 if (!cpumask_empty(tmpmask)) { 398 head = &prgrp->mon.crdtgrp_list; 399 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 400 if (crgrp == rdtgrp) 401 continue; 402 cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask, 403 tmpmask); 404 } 405 update_closid_rmid(tmpmask, rdtgrp); 406 } 407 408 /* Done pushing/pulling - update this group with new mask */ 409 cpumask_copy(&rdtgrp->cpu_mask, newmask); 410 411 return 0; 412 } 413 414 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m) 415 { 416 struct rdtgroup *crgrp; 417 418 cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m); 419 /* update the child mon group masks as well*/ 420 list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list) 421 cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask); 422 } 423 424 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, 425 cpumask_var_t tmpmask, cpumask_var_t tmpmask1) 426 { 427 struct rdtgroup *r, *crgrp; 428 struct list_head *head; 429 430 /* Check whether cpus are dropped from this group */ 431 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); 432 if (!cpumask_empty(tmpmask)) { 433 /* Can't drop from default group */ 434 if (rdtgrp == &rdtgroup_default) { 435 rdt_last_cmd_puts("Can't drop CPUs from default group\n"); 436 return -EINVAL; 437 } 438 439 /* Give any dropped cpus to rdtgroup_default */ 440 cpumask_or(&rdtgroup_default.cpu_mask, 441 &rdtgroup_default.cpu_mask, tmpmask); 442 update_closid_rmid(tmpmask, &rdtgroup_default); 443 } 444 445 /* 446 * If we added cpus, remove them from previous group and 447 * the prev group's child groups that owned them 448 * and update per-cpu closid/rmid. 449 */ 450 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); 451 if (!cpumask_empty(tmpmask)) { 452 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { 453 if (r == rdtgrp) 454 continue; 455 cpumask_and(tmpmask1, &r->cpu_mask, tmpmask); 456 if (!cpumask_empty(tmpmask1)) 457 cpumask_rdtgrp_clear(r, tmpmask1); 458 } 459 update_closid_rmid(tmpmask, rdtgrp); 460 } 461 462 /* Done pushing/pulling - update this group with new mask */ 463 cpumask_copy(&rdtgrp->cpu_mask, newmask); 464 465 /* 466 * Clear child mon group masks since there is a new parent mask 467 * now and update the rmid for the cpus the child lost. 468 */ 469 head = &rdtgrp->mon.crdtgrp_list; 470 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 471 cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask); 472 update_closid_rmid(tmpmask, rdtgrp); 473 cpumask_clear(&crgrp->cpu_mask); 474 } 475 476 return 0; 477 } 478 479 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, 480 char *buf, size_t nbytes, loff_t off) 481 { 482 cpumask_var_t tmpmask, newmask, tmpmask1; 483 struct rdtgroup *rdtgrp; 484 int ret; 485 486 if (!buf) 487 return -EINVAL; 488 489 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) 490 return -ENOMEM; 491 if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { 492 free_cpumask_var(tmpmask); 493 return -ENOMEM; 494 } 495 if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) { 496 free_cpumask_var(tmpmask); 497 free_cpumask_var(newmask); 498 return -ENOMEM; 499 } 500 501 rdtgrp = rdtgroup_kn_lock_live(of->kn); 502 if (!rdtgrp) { 503 ret = -ENOENT; 504 goto unlock; 505 } 506 507 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || 508 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 509 ret = -EINVAL; 510 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 511 goto unlock; 512 } 513 514 if (is_cpu_list(of)) 515 ret = cpulist_parse(buf, newmask); 516 else 517 ret = cpumask_parse(buf, newmask); 518 519 if (ret) { 520 rdt_last_cmd_puts("Bad CPU list/mask\n"); 521 goto unlock; 522 } 523 524 /* check that user didn't specify any offline cpus */ 525 cpumask_andnot(tmpmask, newmask, cpu_online_mask); 526 if (!cpumask_empty(tmpmask)) { 527 ret = -EINVAL; 528 rdt_last_cmd_puts("Can only assign online CPUs\n"); 529 goto unlock; 530 } 531 532 if (rdtgrp->type == RDTCTRL_GROUP) 533 ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1); 534 else if (rdtgrp->type == RDTMON_GROUP) 535 ret = cpus_mon_write(rdtgrp, newmask, tmpmask); 536 else 537 ret = -EINVAL; 538 539 unlock: 540 rdtgroup_kn_unlock(of->kn); 541 free_cpumask_var(tmpmask); 542 free_cpumask_var(newmask); 543 free_cpumask_var(tmpmask1); 544 545 return ret ?: nbytes; 546 } 547 548 /** 549 * rdtgroup_remove - the helper to remove resource group safely 550 * @rdtgrp: resource group to remove 551 * 552 * On resource group creation via a mkdir, an extra kernfs_node reference is 553 * taken to ensure that the rdtgroup structure remains accessible for the 554 * rdtgroup_kn_unlock() calls where it is removed. 555 * 556 * Drop the extra reference here, then free the rdtgroup structure. 557 * 558 * Return: void 559 */ 560 static void rdtgroup_remove(struct rdtgroup *rdtgrp) 561 { 562 kernfs_put(rdtgrp->kn); 563 kfree(rdtgrp); 564 } 565 566 static void _update_task_closid_rmid(void *task) 567 { 568 /* 569 * If the task is still current on this CPU, update PQR_ASSOC MSR. 570 * Otherwise, the MSR is updated when the task is scheduled in. 571 */ 572 if (task == current) 573 resctrl_sched_in(task); 574 } 575 576 static void update_task_closid_rmid(struct task_struct *t) 577 { 578 if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) 579 smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); 580 else 581 _update_task_closid_rmid(t); 582 } 583 584 static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp) 585 { 586 u32 closid, rmid = rdtgrp->mon.rmid; 587 588 if (rdtgrp->type == RDTCTRL_GROUP) 589 closid = rdtgrp->closid; 590 else if (rdtgrp->type == RDTMON_GROUP) 591 closid = rdtgrp->mon.parent->closid; 592 else 593 return false; 594 595 return resctrl_arch_match_closid(tsk, closid) && 596 resctrl_arch_match_rmid(tsk, closid, rmid); 597 } 598 599 static int __rdtgroup_move_task(struct task_struct *tsk, 600 struct rdtgroup *rdtgrp) 601 { 602 /* If the task is already in rdtgrp, no need to move the task. */ 603 if (task_in_rdtgroup(tsk, rdtgrp)) 604 return 0; 605 606 /* 607 * Set the task's closid/rmid before the PQR_ASSOC MSR can be 608 * updated by them. 609 * 610 * For ctrl_mon groups, move both closid and rmid. 611 * For monitor groups, can move the tasks only from 612 * their parent CTRL group. 613 */ 614 if (rdtgrp->type == RDTMON_GROUP && 615 !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) { 616 rdt_last_cmd_puts("Can't move task to different control group\n"); 617 return -EINVAL; 618 } 619 620 if (rdtgrp->type == RDTMON_GROUP) 621 resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid, 622 rdtgrp->mon.rmid); 623 else 624 resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid, 625 rdtgrp->mon.rmid); 626 627 /* 628 * Ensure the task's closid and rmid are written before determining if 629 * the task is current that will decide if it will be interrupted. 630 * This pairs with the full barrier between the rq->curr update and 631 * resctrl_sched_in() during context switch. 632 */ 633 smp_mb(); 634 635 /* 636 * By now, the task's closid and rmid are set. If the task is current 637 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource 638 * group go into effect. If the task is not current, the MSR will be 639 * updated when the task is scheduled in. 640 */ 641 update_task_closid_rmid(tsk); 642 643 return 0; 644 } 645 646 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) 647 { 648 return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) && 649 resctrl_arch_match_closid(t, r->closid)); 650 } 651 652 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) 653 { 654 return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) && 655 resctrl_arch_match_rmid(t, r->mon.parent->closid, 656 r->mon.rmid)); 657 } 658 659 /** 660 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group 661 * @r: Resource group 662 * 663 * Return: 1 if tasks have been assigned to @r, 0 otherwise 664 */ 665 int rdtgroup_tasks_assigned(struct rdtgroup *r) 666 { 667 struct task_struct *p, *t; 668 int ret = 0; 669 670 lockdep_assert_held(&rdtgroup_mutex); 671 672 rcu_read_lock(); 673 for_each_process_thread(p, t) { 674 if (is_closid_match(t, r) || is_rmid_match(t, r)) { 675 ret = 1; 676 break; 677 } 678 } 679 rcu_read_unlock(); 680 681 return ret; 682 } 683 684 static int rdtgroup_task_write_permission(struct task_struct *task, 685 struct kernfs_open_file *of) 686 { 687 const struct cred *tcred = get_task_cred(task); 688 const struct cred *cred = current_cred(); 689 int ret = 0; 690 691 /* 692 * Even if we're attaching all tasks in the thread group, we only 693 * need to check permissions on one of them. 694 */ 695 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && 696 !uid_eq(cred->euid, tcred->uid) && 697 !uid_eq(cred->euid, tcred->suid)) { 698 rdt_last_cmd_printf("No permission to move task %d\n", task->pid); 699 ret = -EPERM; 700 } 701 702 put_cred(tcred); 703 return ret; 704 } 705 706 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, 707 struct kernfs_open_file *of) 708 { 709 struct task_struct *tsk; 710 int ret; 711 712 rcu_read_lock(); 713 if (pid) { 714 tsk = find_task_by_vpid(pid); 715 if (!tsk) { 716 rcu_read_unlock(); 717 rdt_last_cmd_printf("No task %d\n", pid); 718 return -ESRCH; 719 } 720 } else { 721 tsk = current; 722 } 723 724 get_task_struct(tsk); 725 rcu_read_unlock(); 726 727 ret = rdtgroup_task_write_permission(tsk, of); 728 if (!ret) 729 ret = __rdtgroup_move_task(tsk, rdtgrp); 730 731 put_task_struct(tsk); 732 return ret; 733 } 734 735 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, 736 char *buf, size_t nbytes, loff_t off) 737 { 738 struct rdtgroup *rdtgrp; 739 char *pid_str; 740 int ret = 0; 741 pid_t pid; 742 743 rdtgrp = rdtgroup_kn_lock_live(of->kn); 744 if (!rdtgrp) { 745 rdtgroup_kn_unlock(of->kn); 746 return -ENOENT; 747 } 748 rdt_last_cmd_clear(); 749 750 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || 751 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 752 ret = -EINVAL; 753 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 754 goto unlock; 755 } 756 757 while (buf && buf[0] != '\0' && buf[0] != '\n') { 758 pid_str = strim(strsep(&buf, ",")); 759 760 if (kstrtoint(pid_str, 0, &pid)) { 761 rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str); 762 ret = -EINVAL; 763 break; 764 } 765 766 if (pid < 0) { 767 rdt_last_cmd_printf("Invalid pid %d\n", pid); 768 ret = -EINVAL; 769 break; 770 } 771 772 ret = rdtgroup_move_task(pid, rdtgrp, of); 773 if (ret) { 774 rdt_last_cmd_printf("Error while processing task %d\n", pid); 775 break; 776 } 777 } 778 779 unlock: 780 rdtgroup_kn_unlock(of->kn); 781 782 return ret ?: nbytes; 783 } 784 785 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) 786 { 787 struct task_struct *p, *t; 788 pid_t pid; 789 790 rcu_read_lock(); 791 for_each_process_thread(p, t) { 792 if (is_closid_match(t, r) || is_rmid_match(t, r)) { 793 pid = task_pid_vnr(t); 794 if (pid) 795 seq_printf(s, "%d\n", pid); 796 } 797 } 798 rcu_read_unlock(); 799 } 800 801 static int rdtgroup_tasks_show(struct kernfs_open_file *of, 802 struct seq_file *s, void *v) 803 { 804 struct rdtgroup *rdtgrp; 805 int ret = 0; 806 807 rdtgrp = rdtgroup_kn_lock_live(of->kn); 808 if (rdtgrp) 809 show_rdt_tasks(rdtgrp, s); 810 else 811 ret = -ENOENT; 812 rdtgroup_kn_unlock(of->kn); 813 814 return ret; 815 } 816 817 static int rdtgroup_closid_show(struct kernfs_open_file *of, 818 struct seq_file *s, void *v) 819 { 820 struct rdtgroup *rdtgrp; 821 int ret = 0; 822 823 rdtgrp = rdtgroup_kn_lock_live(of->kn); 824 if (rdtgrp) 825 seq_printf(s, "%u\n", rdtgrp->closid); 826 else 827 ret = -ENOENT; 828 rdtgroup_kn_unlock(of->kn); 829 830 return ret; 831 } 832 833 static int rdtgroup_rmid_show(struct kernfs_open_file *of, 834 struct seq_file *s, void *v) 835 { 836 struct rdtgroup *rdtgrp; 837 int ret = 0; 838 839 rdtgrp = rdtgroup_kn_lock_live(of->kn); 840 if (rdtgrp) 841 seq_printf(s, "%u\n", rdtgrp->mon.rmid); 842 else 843 ret = -ENOENT; 844 rdtgroup_kn_unlock(of->kn); 845 846 return ret; 847 } 848 849 #ifdef CONFIG_PROC_CPU_RESCTRL 850 851 /* 852 * A task can only be part of one resctrl control group and of one monitor 853 * group which is associated to that control group. 854 * 855 * 1) res: 856 * mon: 857 * 858 * resctrl is not available. 859 * 860 * 2) res:/ 861 * mon: 862 * 863 * Task is part of the root resctrl control group, and it is not associated 864 * to any monitor group. 865 * 866 * 3) res:/ 867 * mon:mon0 868 * 869 * Task is part of the root resctrl control group and monitor group mon0. 870 * 871 * 4) res:group0 872 * mon: 873 * 874 * Task is part of resctrl control group group0, and it is not associated 875 * to any monitor group. 876 * 877 * 5) res:group0 878 * mon:mon1 879 * 880 * Task is part of resctrl control group group0 and monitor group mon1. 881 */ 882 int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, 883 struct pid *pid, struct task_struct *tsk) 884 { 885 struct rdtgroup *rdtg; 886 int ret = 0; 887 888 mutex_lock(&rdtgroup_mutex); 889 890 /* Return empty if resctrl has not been mounted. */ 891 if (!resctrl_mounted) { 892 seq_puts(s, "res:\nmon:\n"); 893 goto unlock; 894 } 895 896 list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) { 897 struct rdtgroup *crg; 898 899 /* 900 * Task information is only relevant for shareable 901 * and exclusive groups. 902 */ 903 if (rdtg->mode != RDT_MODE_SHAREABLE && 904 rdtg->mode != RDT_MODE_EXCLUSIVE) 905 continue; 906 907 if (!resctrl_arch_match_closid(tsk, rdtg->closid)) 908 continue; 909 910 seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", 911 rdtg->kn->name); 912 seq_puts(s, "mon:"); 913 list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, 914 mon.crdtgrp_list) { 915 if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid, 916 crg->mon.rmid)) 917 continue; 918 seq_printf(s, "%s", crg->kn->name); 919 break; 920 } 921 seq_putc(s, '\n'); 922 goto unlock; 923 } 924 /* 925 * The above search should succeed. Otherwise return 926 * with an error. 927 */ 928 ret = -ENOENT; 929 unlock: 930 mutex_unlock(&rdtgroup_mutex); 931 932 return ret; 933 } 934 #endif 935 936 static int rdt_last_cmd_status_show(struct kernfs_open_file *of, 937 struct seq_file *seq, void *v) 938 { 939 int len; 940 941 mutex_lock(&rdtgroup_mutex); 942 len = seq_buf_used(&last_cmd_status); 943 if (len) 944 seq_printf(seq, "%.*s", len, last_cmd_status_buf); 945 else 946 seq_puts(seq, "ok\n"); 947 mutex_unlock(&rdtgroup_mutex); 948 return 0; 949 } 950 951 static int rdt_num_closids_show(struct kernfs_open_file *of, 952 struct seq_file *seq, void *v) 953 { 954 struct resctrl_schema *s = of->kn->parent->priv; 955 956 seq_printf(seq, "%u\n", s->num_closid); 957 return 0; 958 } 959 960 static int rdt_default_ctrl_show(struct kernfs_open_file *of, 961 struct seq_file *seq, void *v) 962 { 963 struct resctrl_schema *s = of->kn->parent->priv; 964 struct rdt_resource *r = s->res; 965 966 seq_printf(seq, "%x\n", r->default_ctrl); 967 return 0; 968 } 969 970 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, 971 struct seq_file *seq, void *v) 972 { 973 struct resctrl_schema *s = of->kn->parent->priv; 974 struct rdt_resource *r = s->res; 975 976 seq_printf(seq, "%u\n", r->cache.min_cbm_bits); 977 return 0; 978 } 979 980 static int rdt_shareable_bits_show(struct kernfs_open_file *of, 981 struct seq_file *seq, void *v) 982 { 983 struct resctrl_schema *s = of->kn->parent->priv; 984 struct rdt_resource *r = s->res; 985 986 seq_printf(seq, "%x\n", r->cache.shareable_bits); 987 return 0; 988 } 989 990 /* 991 * rdt_bit_usage_show - Display current usage of resources 992 * 993 * A domain is a shared resource that can now be allocated differently. Here 994 * we display the current regions of the domain as an annotated bitmask. 995 * For each domain of this resource its allocation bitmask 996 * is annotated as below to indicate the current usage of the corresponding bit: 997 * 0 - currently unused 998 * X - currently available for sharing and used by software and hardware 999 * H - currently used by hardware only but available for software use 1000 * S - currently used and shareable by software only 1001 * E - currently used exclusively by one resource group 1002 * P - currently pseudo-locked by one resource group 1003 */ 1004 static int rdt_bit_usage_show(struct kernfs_open_file *of, 1005 struct seq_file *seq, void *v) 1006 { 1007 struct resctrl_schema *s = of->kn->parent->priv; 1008 /* 1009 * Use unsigned long even though only 32 bits are used to ensure 1010 * test_bit() is used safely. 1011 */ 1012 unsigned long sw_shareable = 0, hw_shareable = 0; 1013 unsigned long exclusive = 0, pseudo_locked = 0; 1014 struct rdt_resource *r = s->res; 1015 struct rdt_domain *dom; 1016 int i, hwb, swb, excl, psl; 1017 enum rdtgrp_mode mode; 1018 bool sep = false; 1019 u32 ctrl_val; 1020 1021 cpus_read_lock(); 1022 mutex_lock(&rdtgroup_mutex); 1023 hw_shareable = r->cache.shareable_bits; 1024 list_for_each_entry(dom, &r->domains, list) { 1025 if (sep) 1026 seq_putc(seq, ';'); 1027 sw_shareable = 0; 1028 exclusive = 0; 1029 seq_printf(seq, "%d=", dom->id); 1030 for (i = 0; i < closids_supported(); i++) { 1031 if (!closid_allocated(i)) 1032 continue; 1033 ctrl_val = resctrl_arch_get_config(r, dom, i, 1034 s->conf_type); 1035 mode = rdtgroup_mode_by_closid(i); 1036 switch (mode) { 1037 case RDT_MODE_SHAREABLE: 1038 sw_shareable |= ctrl_val; 1039 break; 1040 case RDT_MODE_EXCLUSIVE: 1041 exclusive |= ctrl_val; 1042 break; 1043 case RDT_MODE_PSEUDO_LOCKSETUP: 1044 /* 1045 * RDT_MODE_PSEUDO_LOCKSETUP is possible 1046 * here but not included since the CBM 1047 * associated with this CLOSID in this mode 1048 * is not initialized and no task or cpu can be 1049 * assigned this CLOSID. 1050 */ 1051 break; 1052 case RDT_MODE_PSEUDO_LOCKED: 1053 case RDT_NUM_MODES: 1054 WARN(1, 1055 "invalid mode for closid %d\n", i); 1056 break; 1057 } 1058 } 1059 for (i = r->cache.cbm_len - 1; i >= 0; i--) { 1060 pseudo_locked = dom->plr ? dom->plr->cbm : 0; 1061 hwb = test_bit(i, &hw_shareable); 1062 swb = test_bit(i, &sw_shareable); 1063 excl = test_bit(i, &exclusive); 1064 psl = test_bit(i, &pseudo_locked); 1065 if (hwb && swb) 1066 seq_putc(seq, 'X'); 1067 else if (hwb && !swb) 1068 seq_putc(seq, 'H'); 1069 else if (!hwb && swb) 1070 seq_putc(seq, 'S'); 1071 else if (excl) 1072 seq_putc(seq, 'E'); 1073 else if (psl) 1074 seq_putc(seq, 'P'); 1075 else /* Unused bits remain */ 1076 seq_putc(seq, '0'); 1077 } 1078 sep = true; 1079 } 1080 seq_putc(seq, '\n'); 1081 mutex_unlock(&rdtgroup_mutex); 1082 cpus_read_unlock(); 1083 return 0; 1084 } 1085 1086 static int rdt_min_bw_show(struct kernfs_open_file *of, 1087 struct seq_file *seq, void *v) 1088 { 1089 struct resctrl_schema *s = of->kn->parent->priv; 1090 struct rdt_resource *r = s->res; 1091 1092 seq_printf(seq, "%u\n", r->membw.min_bw); 1093 return 0; 1094 } 1095 1096 static int rdt_num_rmids_show(struct kernfs_open_file *of, 1097 struct seq_file *seq, void *v) 1098 { 1099 struct rdt_resource *r = of->kn->parent->priv; 1100 1101 seq_printf(seq, "%d\n", r->num_rmid); 1102 1103 return 0; 1104 } 1105 1106 static int rdt_mon_features_show(struct kernfs_open_file *of, 1107 struct seq_file *seq, void *v) 1108 { 1109 struct rdt_resource *r = of->kn->parent->priv; 1110 struct mon_evt *mevt; 1111 1112 list_for_each_entry(mevt, &r->evt_list, list) { 1113 seq_printf(seq, "%s\n", mevt->name); 1114 if (mevt->configurable) 1115 seq_printf(seq, "%s_config\n", mevt->name); 1116 } 1117 1118 return 0; 1119 } 1120 1121 static int rdt_bw_gran_show(struct kernfs_open_file *of, 1122 struct seq_file *seq, void *v) 1123 { 1124 struct resctrl_schema *s = of->kn->parent->priv; 1125 struct rdt_resource *r = s->res; 1126 1127 seq_printf(seq, "%u\n", r->membw.bw_gran); 1128 return 0; 1129 } 1130 1131 static int rdt_delay_linear_show(struct kernfs_open_file *of, 1132 struct seq_file *seq, void *v) 1133 { 1134 struct resctrl_schema *s = of->kn->parent->priv; 1135 struct rdt_resource *r = s->res; 1136 1137 seq_printf(seq, "%u\n", r->membw.delay_linear); 1138 return 0; 1139 } 1140 1141 static int max_threshold_occ_show(struct kernfs_open_file *of, 1142 struct seq_file *seq, void *v) 1143 { 1144 seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold); 1145 1146 return 0; 1147 } 1148 1149 static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, 1150 struct seq_file *seq, void *v) 1151 { 1152 struct resctrl_schema *s = of->kn->parent->priv; 1153 struct rdt_resource *r = s->res; 1154 1155 if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) 1156 seq_puts(seq, "per-thread\n"); 1157 else 1158 seq_puts(seq, "max\n"); 1159 1160 return 0; 1161 } 1162 1163 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, 1164 char *buf, size_t nbytes, loff_t off) 1165 { 1166 unsigned int bytes; 1167 int ret; 1168 1169 ret = kstrtouint(buf, 0, &bytes); 1170 if (ret) 1171 return ret; 1172 1173 if (bytes > resctrl_rmid_realloc_limit) 1174 return -EINVAL; 1175 1176 resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes); 1177 1178 return nbytes; 1179 } 1180 1181 /* 1182 * rdtgroup_mode_show - Display mode of this resource group 1183 */ 1184 static int rdtgroup_mode_show(struct kernfs_open_file *of, 1185 struct seq_file *s, void *v) 1186 { 1187 struct rdtgroup *rdtgrp; 1188 1189 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1190 if (!rdtgrp) { 1191 rdtgroup_kn_unlock(of->kn); 1192 return -ENOENT; 1193 } 1194 1195 seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode)); 1196 1197 rdtgroup_kn_unlock(of->kn); 1198 return 0; 1199 } 1200 1201 static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) 1202 { 1203 switch (my_type) { 1204 case CDP_CODE: 1205 return CDP_DATA; 1206 case CDP_DATA: 1207 return CDP_CODE; 1208 default: 1209 case CDP_NONE: 1210 return CDP_NONE; 1211 } 1212 } 1213 1214 static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, 1215 struct seq_file *seq, void *v) 1216 { 1217 struct resctrl_schema *s = of->kn->parent->priv; 1218 struct rdt_resource *r = s->res; 1219 1220 seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); 1221 1222 return 0; 1223 } 1224 1225 /** 1226 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other 1227 * @r: Resource to which domain instance @d belongs. 1228 * @d: The domain instance for which @closid is being tested. 1229 * @cbm: Capacity bitmask being tested. 1230 * @closid: Intended closid for @cbm. 1231 * @type: CDP type of @r. 1232 * @exclusive: Only check if overlaps with exclusive resource groups 1233 * 1234 * Checks if provided @cbm intended to be used for @closid on domain 1235 * @d overlaps with any other closids or other hardware usage associated 1236 * with this domain. If @exclusive is true then only overlaps with 1237 * resource groups in exclusive mode will be considered. If @exclusive 1238 * is false then overlaps with any resource group or hardware entities 1239 * will be considered. 1240 * 1241 * @cbm is unsigned long, even if only 32 bits are used, to make the 1242 * bitmap functions work correctly. 1243 * 1244 * Return: false if CBM does not overlap, true if it does. 1245 */ 1246 static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, 1247 unsigned long cbm, int closid, 1248 enum resctrl_conf_type type, bool exclusive) 1249 { 1250 enum rdtgrp_mode mode; 1251 unsigned long ctrl_b; 1252 int i; 1253 1254 /* Check for any overlap with regions used by hardware directly */ 1255 if (!exclusive) { 1256 ctrl_b = r->cache.shareable_bits; 1257 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) 1258 return true; 1259 } 1260 1261 /* Check for overlap with other resource groups */ 1262 for (i = 0; i < closids_supported(); i++) { 1263 ctrl_b = resctrl_arch_get_config(r, d, i, type); 1264 mode = rdtgroup_mode_by_closid(i); 1265 if (closid_allocated(i) && i != closid && 1266 mode != RDT_MODE_PSEUDO_LOCKSETUP) { 1267 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { 1268 if (exclusive) { 1269 if (mode == RDT_MODE_EXCLUSIVE) 1270 return true; 1271 continue; 1272 } 1273 return true; 1274 } 1275 } 1276 } 1277 1278 return false; 1279 } 1280 1281 /** 1282 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware 1283 * @s: Schema for the resource to which domain instance @d belongs. 1284 * @d: The domain instance for which @closid is being tested. 1285 * @cbm: Capacity bitmask being tested. 1286 * @closid: Intended closid for @cbm. 1287 * @exclusive: Only check if overlaps with exclusive resource groups 1288 * 1289 * Resources that can be allocated using a CBM can use the CBM to control 1290 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test 1291 * for overlap. Overlap test is not limited to the specific resource for 1292 * which the CBM is intended though - when dealing with CDP resources that 1293 * share the underlying hardware the overlap check should be performed on 1294 * the CDP resource sharing the hardware also. 1295 * 1296 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the 1297 * overlap test. 1298 * 1299 * Return: true if CBM overlap detected, false if there is no overlap 1300 */ 1301 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, 1302 unsigned long cbm, int closid, bool exclusive) 1303 { 1304 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); 1305 struct rdt_resource *r = s->res; 1306 1307 if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type, 1308 exclusive)) 1309 return true; 1310 1311 if (!resctrl_arch_get_cdp_enabled(r->rid)) 1312 return false; 1313 return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive); 1314 } 1315 1316 /** 1317 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive 1318 * @rdtgrp: Resource group identified through its closid. 1319 * 1320 * An exclusive resource group implies that there should be no sharing of 1321 * its allocated resources. At the time this group is considered to be 1322 * exclusive this test can determine if its current schemata supports this 1323 * setting by testing for overlap with all other resource groups. 1324 * 1325 * Return: true if resource group can be exclusive, false if there is overlap 1326 * with allocations of other resource groups and thus this resource group 1327 * cannot be exclusive. 1328 */ 1329 static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) 1330 { 1331 int closid = rdtgrp->closid; 1332 struct resctrl_schema *s; 1333 struct rdt_resource *r; 1334 bool has_cache = false; 1335 struct rdt_domain *d; 1336 u32 ctrl; 1337 1338 /* Walking r->domains, ensure it can't race with cpuhp */ 1339 lockdep_assert_cpus_held(); 1340 1341 list_for_each_entry(s, &resctrl_schema_all, list) { 1342 r = s->res; 1343 if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) 1344 continue; 1345 has_cache = true; 1346 list_for_each_entry(d, &r->domains, list) { 1347 ctrl = resctrl_arch_get_config(r, d, closid, 1348 s->conf_type); 1349 if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { 1350 rdt_last_cmd_puts("Schemata overlaps\n"); 1351 return false; 1352 } 1353 } 1354 } 1355 1356 if (!has_cache) { 1357 rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n"); 1358 return false; 1359 } 1360 1361 return true; 1362 } 1363 1364 /* 1365 * rdtgroup_mode_write - Modify the resource group's mode 1366 */ 1367 static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, 1368 char *buf, size_t nbytes, loff_t off) 1369 { 1370 struct rdtgroup *rdtgrp; 1371 enum rdtgrp_mode mode; 1372 int ret = 0; 1373 1374 /* Valid input requires a trailing newline */ 1375 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1376 return -EINVAL; 1377 buf[nbytes - 1] = '\0'; 1378 1379 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1380 if (!rdtgrp) { 1381 rdtgroup_kn_unlock(of->kn); 1382 return -ENOENT; 1383 } 1384 1385 rdt_last_cmd_clear(); 1386 1387 mode = rdtgrp->mode; 1388 1389 if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) || 1390 (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) || 1391 (!strcmp(buf, "pseudo-locksetup") && 1392 mode == RDT_MODE_PSEUDO_LOCKSETUP) || 1393 (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED)) 1394 goto out; 1395 1396 if (mode == RDT_MODE_PSEUDO_LOCKED) { 1397 rdt_last_cmd_puts("Cannot change pseudo-locked group\n"); 1398 ret = -EINVAL; 1399 goto out; 1400 } 1401 1402 if (!strcmp(buf, "shareable")) { 1403 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1404 ret = rdtgroup_locksetup_exit(rdtgrp); 1405 if (ret) 1406 goto out; 1407 } 1408 rdtgrp->mode = RDT_MODE_SHAREABLE; 1409 } else if (!strcmp(buf, "exclusive")) { 1410 if (!rdtgroup_mode_test_exclusive(rdtgrp)) { 1411 ret = -EINVAL; 1412 goto out; 1413 } 1414 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1415 ret = rdtgroup_locksetup_exit(rdtgrp); 1416 if (ret) 1417 goto out; 1418 } 1419 rdtgrp->mode = RDT_MODE_EXCLUSIVE; 1420 } else if (!strcmp(buf, "pseudo-locksetup")) { 1421 ret = rdtgroup_locksetup_enter(rdtgrp); 1422 if (ret) 1423 goto out; 1424 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP; 1425 } else { 1426 rdt_last_cmd_puts("Unknown or unsupported mode\n"); 1427 ret = -EINVAL; 1428 } 1429 1430 out: 1431 rdtgroup_kn_unlock(of->kn); 1432 return ret ?: nbytes; 1433 } 1434 1435 /** 1436 * rdtgroup_cbm_to_size - Translate CBM to size in bytes 1437 * @r: RDT resource to which @d belongs. 1438 * @d: RDT domain instance. 1439 * @cbm: bitmask for which the size should be computed. 1440 * 1441 * The bitmask provided associated with the RDT domain instance @d will be 1442 * translated into how many bytes it represents. The size in bytes is 1443 * computed by first dividing the total cache size by the CBM length to 1444 * determine how many bytes each bit in the bitmask represents. The result 1445 * is multiplied with the number of bits set in the bitmask. 1446 * 1447 * @cbm is unsigned long, even if only 32 bits are used to make the 1448 * bitmap functions work correctly. 1449 */ 1450 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, 1451 struct rdt_domain *d, unsigned long cbm) 1452 { 1453 struct cpu_cacheinfo *ci; 1454 unsigned int size = 0; 1455 int num_b, i; 1456 1457 num_b = bitmap_weight(&cbm, r->cache.cbm_len); 1458 ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); 1459 for (i = 0; i < ci->num_leaves; i++) { 1460 if (ci->info_list[i].level == r->cache_level) { 1461 size = ci->info_list[i].size / r->cache.cbm_len * num_b; 1462 break; 1463 } 1464 } 1465 1466 return size; 1467 } 1468 1469 /* 1470 * rdtgroup_size_show - Display size in bytes of allocated regions 1471 * 1472 * The "size" file mirrors the layout of the "schemata" file, printing the 1473 * size in bytes of each region instead of the capacity bitmask. 1474 */ 1475 static int rdtgroup_size_show(struct kernfs_open_file *of, 1476 struct seq_file *s, void *v) 1477 { 1478 struct resctrl_schema *schema; 1479 enum resctrl_conf_type type; 1480 struct rdtgroup *rdtgrp; 1481 struct rdt_resource *r; 1482 struct rdt_domain *d; 1483 unsigned int size; 1484 int ret = 0; 1485 u32 closid; 1486 bool sep; 1487 u32 ctrl; 1488 1489 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1490 if (!rdtgrp) { 1491 rdtgroup_kn_unlock(of->kn); 1492 return -ENOENT; 1493 } 1494 1495 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 1496 if (!rdtgrp->plr->d) { 1497 rdt_last_cmd_clear(); 1498 rdt_last_cmd_puts("Cache domain offline\n"); 1499 ret = -ENODEV; 1500 } else { 1501 seq_printf(s, "%*s:", max_name_width, 1502 rdtgrp->plr->s->name); 1503 size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, 1504 rdtgrp->plr->d, 1505 rdtgrp->plr->cbm); 1506 seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); 1507 } 1508 goto out; 1509 } 1510 1511 closid = rdtgrp->closid; 1512 1513 list_for_each_entry(schema, &resctrl_schema_all, list) { 1514 r = schema->res; 1515 type = schema->conf_type; 1516 sep = false; 1517 seq_printf(s, "%*s:", max_name_width, schema->name); 1518 list_for_each_entry(d, &r->domains, list) { 1519 if (sep) 1520 seq_putc(s, ';'); 1521 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1522 size = 0; 1523 } else { 1524 if (is_mba_sc(r)) 1525 ctrl = d->mbps_val[closid]; 1526 else 1527 ctrl = resctrl_arch_get_config(r, d, 1528 closid, 1529 type); 1530 if (r->rid == RDT_RESOURCE_MBA || 1531 r->rid == RDT_RESOURCE_SMBA) 1532 size = ctrl; 1533 else 1534 size = rdtgroup_cbm_to_size(r, d, ctrl); 1535 } 1536 seq_printf(s, "%d=%u", d->id, size); 1537 sep = true; 1538 } 1539 seq_putc(s, '\n'); 1540 } 1541 1542 out: 1543 rdtgroup_kn_unlock(of->kn); 1544 1545 return ret; 1546 } 1547 1548 struct mon_config_info { 1549 u32 evtid; 1550 u32 mon_config; 1551 }; 1552 1553 #define INVALID_CONFIG_INDEX UINT_MAX 1554 1555 /** 1556 * mon_event_config_index_get - get the hardware index for the 1557 * configurable event 1558 * @evtid: event id. 1559 * 1560 * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID 1561 * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID 1562 * INVALID_CONFIG_INDEX for invalid evtid 1563 */ 1564 static inline unsigned int mon_event_config_index_get(u32 evtid) 1565 { 1566 switch (evtid) { 1567 case QOS_L3_MBM_TOTAL_EVENT_ID: 1568 return 0; 1569 case QOS_L3_MBM_LOCAL_EVENT_ID: 1570 return 1; 1571 default: 1572 /* Should never reach here */ 1573 return INVALID_CONFIG_INDEX; 1574 } 1575 } 1576 1577 static void mon_event_config_read(void *info) 1578 { 1579 struct mon_config_info *mon_info = info; 1580 unsigned int index; 1581 u64 msrval; 1582 1583 index = mon_event_config_index_get(mon_info->evtid); 1584 if (index == INVALID_CONFIG_INDEX) { 1585 pr_warn_once("Invalid event id %d\n", mon_info->evtid); 1586 return; 1587 } 1588 rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); 1589 1590 /* Report only the valid event configuration bits */ 1591 mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; 1592 } 1593 1594 static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info) 1595 { 1596 smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1); 1597 } 1598 1599 static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) 1600 { 1601 struct mon_config_info mon_info = {0}; 1602 struct rdt_domain *dom; 1603 bool sep = false; 1604 1605 cpus_read_lock(); 1606 mutex_lock(&rdtgroup_mutex); 1607 1608 list_for_each_entry(dom, &r->domains, list) { 1609 if (sep) 1610 seq_puts(s, ";"); 1611 1612 memset(&mon_info, 0, sizeof(struct mon_config_info)); 1613 mon_info.evtid = evtid; 1614 mondata_config_read(dom, &mon_info); 1615 1616 seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config); 1617 sep = true; 1618 } 1619 seq_puts(s, "\n"); 1620 1621 mutex_unlock(&rdtgroup_mutex); 1622 cpus_read_unlock(); 1623 1624 return 0; 1625 } 1626 1627 static int mbm_total_bytes_config_show(struct kernfs_open_file *of, 1628 struct seq_file *seq, void *v) 1629 { 1630 struct rdt_resource *r = of->kn->parent->priv; 1631 1632 mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); 1633 1634 return 0; 1635 } 1636 1637 static int mbm_local_bytes_config_show(struct kernfs_open_file *of, 1638 struct seq_file *seq, void *v) 1639 { 1640 struct rdt_resource *r = of->kn->parent->priv; 1641 1642 mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); 1643 1644 return 0; 1645 } 1646 1647 static void mon_event_config_write(void *info) 1648 { 1649 struct mon_config_info *mon_info = info; 1650 unsigned int index; 1651 1652 index = mon_event_config_index_get(mon_info->evtid); 1653 if (index == INVALID_CONFIG_INDEX) { 1654 pr_warn_once("Invalid event id %d\n", mon_info->evtid); 1655 return; 1656 } 1657 wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); 1658 } 1659 1660 static void mbm_config_write_domain(struct rdt_resource *r, 1661 struct rdt_domain *d, u32 evtid, u32 val) 1662 { 1663 struct mon_config_info mon_info = {0}; 1664 1665 /* 1666 * Read the current config value first. If both are the same then 1667 * no need to write it again. 1668 */ 1669 mon_info.evtid = evtid; 1670 mondata_config_read(d, &mon_info); 1671 if (mon_info.mon_config == val) 1672 return; 1673 1674 mon_info.mon_config = val; 1675 1676 /* 1677 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the 1678 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE 1679 * are scoped at the domain level. Writing any of these MSRs 1680 * on one CPU is observed by all the CPUs in the domain. 1681 */ 1682 smp_call_function_any(&d->cpu_mask, mon_event_config_write, 1683 &mon_info, 1); 1684 1685 /* 1686 * When an Event Configuration is changed, the bandwidth counters 1687 * for all RMIDs and Events will be cleared by the hardware. The 1688 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for 1689 * every RMID on the next read to any event for every RMID. 1690 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62) 1691 * cleared while it is tracked by the hardware. Clear the 1692 * mbm_local and mbm_total counts for all the RMIDs. 1693 */ 1694 resctrl_arch_reset_rmid_all(r, d); 1695 } 1696 1697 static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) 1698 { 1699 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 1700 char *dom_str = NULL, *id_str; 1701 unsigned long dom_id, val; 1702 struct rdt_domain *d; 1703 1704 /* Walking r->domains, ensure it can't race with cpuhp */ 1705 lockdep_assert_cpus_held(); 1706 1707 next: 1708 if (!tok || tok[0] == '\0') 1709 return 0; 1710 1711 /* Start processing the strings for each domain */ 1712 dom_str = strim(strsep(&tok, ";")); 1713 id_str = strsep(&dom_str, "="); 1714 1715 if (!id_str || kstrtoul(id_str, 10, &dom_id)) { 1716 rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n"); 1717 return -EINVAL; 1718 } 1719 1720 if (!dom_str || kstrtoul(dom_str, 16, &val)) { 1721 rdt_last_cmd_puts("Non-numeric event configuration value\n"); 1722 return -EINVAL; 1723 } 1724 1725 /* Value from user cannot be more than the supported set of events */ 1726 if ((val & hw_res->mbm_cfg_mask) != val) { 1727 rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", 1728 hw_res->mbm_cfg_mask); 1729 return -EINVAL; 1730 } 1731 1732 list_for_each_entry(d, &r->domains, list) { 1733 if (d->id == dom_id) { 1734 mbm_config_write_domain(r, d, evtid, val); 1735 goto next; 1736 } 1737 } 1738 1739 return -EINVAL; 1740 } 1741 1742 static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, 1743 char *buf, size_t nbytes, 1744 loff_t off) 1745 { 1746 struct rdt_resource *r = of->kn->parent->priv; 1747 int ret; 1748 1749 /* Valid input requires a trailing newline */ 1750 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1751 return -EINVAL; 1752 1753 cpus_read_lock(); 1754 mutex_lock(&rdtgroup_mutex); 1755 1756 rdt_last_cmd_clear(); 1757 1758 buf[nbytes - 1] = '\0'; 1759 1760 ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); 1761 1762 mutex_unlock(&rdtgroup_mutex); 1763 cpus_read_unlock(); 1764 1765 return ret ?: nbytes; 1766 } 1767 1768 static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, 1769 char *buf, size_t nbytes, 1770 loff_t off) 1771 { 1772 struct rdt_resource *r = of->kn->parent->priv; 1773 int ret; 1774 1775 /* Valid input requires a trailing newline */ 1776 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1777 return -EINVAL; 1778 1779 cpus_read_lock(); 1780 mutex_lock(&rdtgroup_mutex); 1781 1782 rdt_last_cmd_clear(); 1783 1784 buf[nbytes - 1] = '\0'; 1785 1786 ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID); 1787 1788 mutex_unlock(&rdtgroup_mutex); 1789 cpus_read_unlock(); 1790 1791 return ret ?: nbytes; 1792 } 1793 1794 /* rdtgroup information files for one cache resource. */ 1795 static struct rftype res_common_files[] = { 1796 { 1797 .name = "last_cmd_status", 1798 .mode = 0444, 1799 .kf_ops = &rdtgroup_kf_single_ops, 1800 .seq_show = rdt_last_cmd_status_show, 1801 .fflags = RFTYPE_TOP_INFO, 1802 }, 1803 { 1804 .name = "num_closids", 1805 .mode = 0444, 1806 .kf_ops = &rdtgroup_kf_single_ops, 1807 .seq_show = rdt_num_closids_show, 1808 .fflags = RFTYPE_CTRL_INFO, 1809 }, 1810 { 1811 .name = "mon_features", 1812 .mode = 0444, 1813 .kf_ops = &rdtgroup_kf_single_ops, 1814 .seq_show = rdt_mon_features_show, 1815 .fflags = RFTYPE_MON_INFO, 1816 }, 1817 { 1818 .name = "num_rmids", 1819 .mode = 0444, 1820 .kf_ops = &rdtgroup_kf_single_ops, 1821 .seq_show = rdt_num_rmids_show, 1822 .fflags = RFTYPE_MON_INFO, 1823 }, 1824 { 1825 .name = "cbm_mask", 1826 .mode = 0444, 1827 .kf_ops = &rdtgroup_kf_single_ops, 1828 .seq_show = rdt_default_ctrl_show, 1829 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1830 }, 1831 { 1832 .name = "min_cbm_bits", 1833 .mode = 0444, 1834 .kf_ops = &rdtgroup_kf_single_ops, 1835 .seq_show = rdt_min_cbm_bits_show, 1836 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1837 }, 1838 { 1839 .name = "shareable_bits", 1840 .mode = 0444, 1841 .kf_ops = &rdtgroup_kf_single_ops, 1842 .seq_show = rdt_shareable_bits_show, 1843 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1844 }, 1845 { 1846 .name = "bit_usage", 1847 .mode = 0444, 1848 .kf_ops = &rdtgroup_kf_single_ops, 1849 .seq_show = rdt_bit_usage_show, 1850 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1851 }, 1852 { 1853 .name = "min_bandwidth", 1854 .mode = 0444, 1855 .kf_ops = &rdtgroup_kf_single_ops, 1856 .seq_show = rdt_min_bw_show, 1857 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, 1858 }, 1859 { 1860 .name = "bandwidth_gran", 1861 .mode = 0444, 1862 .kf_ops = &rdtgroup_kf_single_ops, 1863 .seq_show = rdt_bw_gran_show, 1864 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, 1865 }, 1866 { 1867 .name = "delay_linear", 1868 .mode = 0444, 1869 .kf_ops = &rdtgroup_kf_single_ops, 1870 .seq_show = rdt_delay_linear_show, 1871 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, 1872 }, 1873 /* 1874 * Platform specific which (if any) capabilities are provided by 1875 * thread_throttle_mode. Defer "fflags" initialization to platform 1876 * discovery. 1877 */ 1878 { 1879 .name = "thread_throttle_mode", 1880 .mode = 0444, 1881 .kf_ops = &rdtgroup_kf_single_ops, 1882 .seq_show = rdt_thread_throttle_mode_show, 1883 }, 1884 { 1885 .name = "max_threshold_occupancy", 1886 .mode = 0644, 1887 .kf_ops = &rdtgroup_kf_single_ops, 1888 .write = max_threshold_occ_write, 1889 .seq_show = max_threshold_occ_show, 1890 .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE, 1891 }, 1892 { 1893 .name = "mbm_total_bytes_config", 1894 .mode = 0644, 1895 .kf_ops = &rdtgroup_kf_single_ops, 1896 .seq_show = mbm_total_bytes_config_show, 1897 .write = mbm_total_bytes_config_write, 1898 }, 1899 { 1900 .name = "mbm_local_bytes_config", 1901 .mode = 0644, 1902 .kf_ops = &rdtgroup_kf_single_ops, 1903 .seq_show = mbm_local_bytes_config_show, 1904 .write = mbm_local_bytes_config_write, 1905 }, 1906 { 1907 .name = "cpus", 1908 .mode = 0644, 1909 .kf_ops = &rdtgroup_kf_single_ops, 1910 .write = rdtgroup_cpus_write, 1911 .seq_show = rdtgroup_cpus_show, 1912 .fflags = RFTYPE_BASE, 1913 }, 1914 { 1915 .name = "cpus_list", 1916 .mode = 0644, 1917 .kf_ops = &rdtgroup_kf_single_ops, 1918 .write = rdtgroup_cpus_write, 1919 .seq_show = rdtgroup_cpus_show, 1920 .flags = RFTYPE_FLAGS_CPUS_LIST, 1921 .fflags = RFTYPE_BASE, 1922 }, 1923 { 1924 .name = "tasks", 1925 .mode = 0644, 1926 .kf_ops = &rdtgroup_kf_single_ops, 1927 .write = rdtgroup_tasks_write, 1928 .seq_show = rdtgroup_tasks_show, 1929 .fflags = RFTYPE_BASE, 1930 }, 1931 { 1932 .name = "mon_hw_id", 1933 .mode = 0444, 1934 .kf_ops = &rdtgroup_kf_single_ops, 1935 .seq_show = rdtgroup_rmid_show, 1936 .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG, 1937 }, 1938 { 1939 .name = "schemata", 1940 .mode = 0644, 1941 .kf_ops = &rdtgroup_kf_single_ops, 1942 .write = rdtgroup_schemata_write, 1943 .seq_show = rdtgroup_schemata_show, 1944 .fflags = RFTYPE_CTRL_BASE, 1945 }, 1946 { 1947 .name = "mode", 1948 .mode = 0644, 1949 .kf_ops = &rdtgroup_kf_single_ops, 1950 .write = rdtgroup_mode_write, 1951 .seq_show = rdtgroup_mode_show, 1952 .fflags = RFTYPE_CTRL_BASE, 1953 }, 1954 { 1955 .name = "size", 1956 .mode = 0444, 1957 .kf_ops = &rdtgroup_kf_single_ops, 1958 .seq_show = rdtgroup_size_show, 1959 .fflags = RFTYPE_CTRL_BASE, 1960 }, 1961 { 1962 .name = "sparse_masks", 1963 .mode = 0444, 1964 .kf_ops = &rdtgroup_kf_single_ops, 1965 .seq_show = rdt_has_sparse_bitmasks_show, 1966 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1967 }, 1968 { 1969 .name = "ctrl_hw_id", 1970 .mode = 0444, 1971 .kf_ops = &rdtgroup_kf_single_ops, 1972 .seq_show = rdtgroup_closid_show, 1973 .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG, 1974 }, 1975 1976 }; 1977 1978 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) 1979 { 1980 struct rftype *rfts, *rft; 1981 int ret, len; 1982 1983 rfts = res_common_files; 1984 len = ARRAY_SIZE(res_common_files); 1985 1986 lockdep_assert_held(&rdtgroup_mutex); 1987 1988 if (resctrl_debug) 1989 fflags |= RFTYPE_DEBUG; 1990 1991 for (rft = rfts; rft < rfts + len; rft++) { 1992 if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) { 1993 ret = rdtgroup_add_file(kn, rft); 1994 if (ret) 1995 goto error; 1996 } 1997 } 1998 1999 return 0; 2000 error: 2001 pr_warn("Failed to add %s, err=%d\n", rft->name, ret); 2002 while (--rft >= rfts) { 2003 if ((fflags & rft->fflags) == rft->fflags) 2004 kernfs_remove_by_name(kn, rft->name); 2005 } 2006 return ret; 2007 } 2008 2009 static struct rftype *rdtgroup_get_rftype_by_name(const char *name) 2010 { 2011 struct rftype *rfts, *rft; 2012 int len; 2013 2014 rfts = res_common_files; 2015 len = ARRAY_SIZE(res_common_files); 2016 2017 for (rft = rfts; rft < rfts + len; rft++) { 2018 if (!strcmp(rft->name, name)) 2019 return rft; 2020 } 2021 2022 return NULL; 2023 } 2024 2025 void __init thread_throttle_mode_init(void) 2026 { 2027 struct rftype *rft; 2028 2029 rft = rdtgroup_get_rftype_by_name("thread_throttle_mode"); 2030 if (!rft) 2031 return; 2032 2033 rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB; 2034 } 2035 2036 void __init mbm_config_rftype_init(const char *config) 2037 { 2038 struct rftype *rft; 2039 2040 rft = rdtgroup_get_rftype_by_name(config); 2041 if (rft) 2042 rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE; 2043 } 2044 2045 /** 2046 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file 2047 * @r: The resource group with which the file is associated. 2048 * @name: Name of the file 2049 * 2050 * The permissions of named resctrl file, directory, or link are modified 2051 * to not allow read, write, or execute by any user. 2052 * 2053 * WARNING: This function is intended to communicate to the user that the 2054 * resctrl file has been locked down - that it is not relevant to the 2055 * particular state the system finds itself in. It should not be relied 2056 * on to protect from user access because after the file's permissions 2057 * are restricted the user can still change the permissions using chmod 2058 * from the command line. 2059 * 2060 * Return: 0 on success, <0 on failure. 2061 */ 2062 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name) 2063 { 2064 struct iattr iattr = {.ia_valid = ATTR_MODE,}; 2065 struct kernfs_node *kn; 2066 int ret = 0; 2067 2068 kn = kernfs_find_and_get_ns(r->kn, name, NULL); 2069 if (!kn) 2070 return -ENOENT; 2071 2072 switch (kernfs_type(kn)) { 2073 case KERNFS_DIR: 2074 iattr.ia_mode = S_IFDIR; 2075 break; 2076 case KERNFS_FILE: 2077 iattr.ia_mode = S_IFREG; 2078 break; 2079 case KERNFS_LINK: 2080 iattr.ia_mode = S_IFLNK; 2081 break; 2082 } 2083 2084 ret = kernfs_setattr(kn, &iattr); 2085 kernfs_put(kn); 2086 return ret; 2087 } 2088 2089 /** 2090 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file 2091 * @r: The resource group with which the file is associated. 2092 * @name: Name of the file 2093 * @mask: Mask of permissions that should be restored 2094 * 2095 * Restore the permissions of the named file. If @name is a directory the 2096 * permissions of its parent will be used. 2097 * 2098 * Return: 0 on success, <0 on failure. 2099 */ 2100 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, 2101 umode_t mask) 2102 { 2103 struct iattr iattr = {.ia_valid = ATTR_MODE,}; 2104 struct kernfs_node *kn, *parent; 2105 struct rftype *rfts, *rft; 2106 int ret, len; 2107 2108 rfts = res_common_files; 2109 len = ARRAY_SIZE(res_common_files); 2110 2111 for (rft = rfts; rft < rfts + len; rft++) { 2112 if (!strcmp(rft->name, name)) 2113 iattr.ia_mode = rft->mode & mask; 2114 } 2115 2116 kn = kernfs_find_and_get_ns(r->kn, name, NULL); 2117 if (!kn) 2118 return -ENOENT; 2119 2120 switch (kernfs_type(kn)) { 2121 case KERNFS_DIR: 2122 parent = kernfs_get_parent(kn); 2123 if (parent) { 2124 iattr.ia_mode |= parent->mode; 2125 kernfs_put(parent); 2126 } 2127 iattr.ia_mode |= S_IFDIR; 2128 break; 2129 case KERNFS_FILE: 2130 iattr.ia_mode |= S_IFREG; 2131 break; 2132 case KERNFS_LINK: 2133 iattr.ia_mode |= S_IFLNK; 2134 break; 2135 } 2136 2137 ret = kernfs_setattr(kn, &iattr); 2138 kernfs_put(kn); 2139 return ret; 2140 } 2141 2142 static int rdtgroup_mkdir_info_resdir(void *priv, char *name, 2143 unsigned long fflags) 2144 { 2145 struct kernfs_node *kn_subdir; 2146 int ret; 2147 2148 kn_subdir = kernfs_create_dir(kn_info, name, 2149 kn_info->mode, priv); 2150 if (IS_ERR(kn_subdir)) 2151 return PTR_ERR(kn_subdir); 2152 2153 ret = rdtgroup_kn_set_ugid(kn_subdir); 2154 if (ret) 2155 return ret; 2156 2157 ret = rdtgroup_add_files(kn_subdir, fflags); 2158 if (!ret) 2159 kernfs_activate(kn_subdir); 2160 2161 return ret; 2162 } 2163 2164 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) 2165 { 2166 struct resctrl_schema *s; 2167 struct rdt_resource *r; 2168 unsigned long fflags; 2169 char name[32]; 2170 int ret; 2171 2172 /* create the directory */ 2173 kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); 2174 if (IS_ERR(kn_info)) 2175 return PTR_ERR(kn_info); 2176 2177 ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO); 2178 if (ret) 2179 goto out_destroy; 2180 2181 /* loop over enabled controls, these are all alloc_capable */ 2182 list_for_each_entry(s, &resctrl_schema_all, list) { 2183 r = s->res; 2184 fflags = r->fflags | RFTYPE_CTRL_INFO; 2185 ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); 2186 if (ret) 2187 goto out_destroy; 2188 } 2189 2190 for_each_mon_capable_rdt_resource(r) { 2191 fflags = r->fflags | RFTYPE_MON_INFO; 2192 sprintf(name, "%s_MON", r->name); 2193 ret = rdtgroup_mkdir_info_resdir(r, name, fflags); 2194 if (ret) 2195 goto out_destroy; 2196 } 2197 2198 ret = rdtgroup_kn_set_ugid(kn_info); 2199 if (ret) 2200 goto out_destroy; 2201 2202 kernfs_activate(kn_info); 2203 2204 return 0; 2205 2206 out_destroy: 2207 kernfs_remove(kn_info); 2208 return ret; 2209 } 2210 2211 static int 2212 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, 2213 char *name, struct kernfs_node **dest_kn) 2214 { 2215 struct kernfs_node *kn; 2216 int ret; 2217 2218 /* create the directory */ 2219 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); 2220 if (IS_ERR(kn)) 2221 return PTR_ERR(kn); 2222 2223 if (dest_kn) 2224 *dest_kn = kn; 2225 2226 ret = rdtgroup_kn_set_ugid(kn); 2227 if (ret) 2228 goto out_destroy; 2229 2230 kernfs_activate(kn); 2231 2232 return 0; 2233 2234 out_destroy: 2235 kernfs_remove(kn); 2236 return ret; 2237 } 2238 2239 static void l3_qos_cfg_update(void *arg) 2240 { 2241 bool *enable = arg; 2242 2243 wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); 2244 } 2245 2246 static void l2_qos_cfg_update(void *arg) 2247 { 2248 bool *enable = arg; 2249 2250 wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); 2251 } 2252 2253 static inline bool is_mba_linear(void) 2254 { 2255 return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; 2256 } 2257 2258 static int set_cache_qos_cfg(int level, bool enable) 2259 { 2260 void (*update)(void *arg); 2261 struct rdt_resource *r_l; 2262 cpumask_var_t cpu_mask; 2263 struct rdt_domain *d; 2264 int cpu; 2265 2266 /* Walking r->domains, ensure it can't race with cpuhp */ 2267 lockdep_assert_cpus_held(); 2268 2269 if (level == RDT_RESOURCE_L3) 2270 update = l3_qos_cfg_update; 2271 else if (level == RDT_RESOURCE_L2) 2272 update = l2_qos_cfg_update; 2273 else 2274 return -EINVAL; 2275 2276 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) 2277 return -ENOMEM; 2278 2279 r_l = &rdt_resources_all[level].r_resctrl; 2280 list_for_each_entry(d, &r_l->domains, list) { 2281 if (r_l->cache.arch_has_per_cpu_cfg) 2282 /* Pick all the CPUs in the domain instance */ 2283 for_each_cpu(cpu, &d->cpu_mask) 2284 cpumask_set_cpu(cpu, cpu_mask); 2285 else 2286 /* Pick one CPU from each domain instance to update MSR */ 2287 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); 2288 } 2289 2290 /* Update QOS_CFG MSR on all the CPUs in cpu_mask */ 2291 on_each_cpu_mask(cpu_mask, update, &enable, 1); 2292 2293 free_cpumask_var(cpu_mask); 2294 2295 return 0; 2296 } 2297 2298 /* Restore the qos cfg state when a domain comes online */ 2299 void rdt_domain_reconfigure_cdp(struct rdt_resource *r) 2300 { 2301 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 2302 2303 if (!r->cdp_capable) 2304 return; 2305 2306 if (r->rid == RDT_RESOURCE_L2) 2307 l2_qos_cfg_update(&hw_res->cdp_enabled); 2308 2309 if (r->rid == RDT_RESOURCE_L3) 2310 l3_qos_cfg_update(&hw_res->cdp_enabled); 2311 } 2312 2313 static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) 2314 { 2315 u32 num_closid = resctrl_arch_get_num_closid(r); 2316 int cpu = cpumask_any(&d->cpu_mask); 2317 int i; 2318 2319 d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), 2320 GFP_KERNEL, cpu_to_node(cpu)); 2321 if (!d->mbps_val) 2322 return -ENOMEM; 2323 2324 for (i = 0; i < num_closid; i++) 2325 d->mbps_val[i] = MBA_MAX_MBPS; 2326 2327 return 0; 2328 } 2329 2330 static void mba_sc_domain_destroy(struct rdt_resource *r, 2331 struct rdt_domain *d) 2332 { 2333 kfree(d->mbps_val); 2334 d->mbps_val = NULL; 2335 } 2336 2337 /* 2338 * MBA software controller is supported only if 2339 * MBM is supported and MBA is in linear scale. 2340 */ 2341 static bool supports_mba_mbps(void) 2342 { 2343 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; 2344 2345 return (is_mbm_local_enabled() && 2346 r->alloc_capable && is_mba_linear()); 2347 } 2348 2349 /* 2350 * Enable or disable the MBA software controller 2351 * which helps user specify bandwidth in MBps. 2352 */ 2353 static int set_mba_sc(bool mba_sc) 2354 { 2355 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; 2356 u32 num_closid = resctrl_arch_get_num_closid(r); 2357 struct rdt_domain *d; 2358 int i; 2359 2360 if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) 2361 return -EINVAL; 2362 2363 r->membw.mba_sc = mba_sc; 2364 2365 list_for_each_entry(d, &r->domains, list) { 2366 for (i = 0; i < num_closid; i++) 2367 d->mbps_val[i] = MBA_MAX_MBPS; 2368 } 2369 2370 return 0; 2371 } 2372 2373 static int cdp_enable(int level) 2374 { 2375 struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl; 2376 int ret; 2377 2378 if (!r_l->alloc_capable) 2379 return -EINVAL; 2380 2381 ret = set_cache_qos_cfg(level, true); 2382 if (!ret) 2383 rdt_resources_all[level].cdp_enabled = true; 2384 2385 return ret; 2386 } 2387 2388 static void cdp_disable(int level) 2389 { 2390 struct rdt_hw_resource *r_hw = &rdt_resources_all[level]; 2391 2392 if (r_hw->cdp_enabled) { 2393 set_cache_qos_cfg(level, false); 2394 r_hw->cdp_enabled = false; 2395 } 2396 } 2397 2398 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) 2399 { 2400 struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; 2401 2402 if (!hw_res->r_resctrl.cdp_capable) 2403 return -EINVAL; 2404 2405 if (enable) 2406 return cdp_enable(l); 2407 2408 cdp_disable(l); 2409 2410 return 0; 2411 } 2412 2413 /* 2414 * We don't allow rdtgroup directories to be created anywhere 2415 * except the root directory. Thus when looking for the rdtgroup 2416 * structure for a kernfs node we are either looking at a directory, 2417 * in which case the rdtgroup structure is pointed at by the "priv" 2418 * field, otherwise we have a file, and need only look to the parent 2419 * to find the rdtgroup. 2420 */ 2421 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) 2422 { 2423 if (kernfs_type(kn) == KERNFS_DIR) { 2424 /* 2425 * All the resource directories use "kn->priv" 2426 * to point to the "struct rdtgroup" for the 2427 * resource. "info" and its subdirectories don't 2428 * have rdtgroup structures, so return NULL here. 2429 */ 2430 if (kn == kn_info || kn->parent == kn_info) 2431 return NULL; 2432 else 2433 return kn->priv; 2434 } else { 2435 return kn->parent->priv; 2436 } 2437 } 2438 2439 static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn) 2440 { 2441 atomic_inc(&rdtgrp->waitcount); 2442 kernfs_break_active_protection(kn); 2443 } 2444 2445 static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn) 2446 { 2447 if (atomic_dec_and_test(&rdtgrp->waitcount) && 2448 (rdtgrp->flags & RDT_DELETED)) { 2449 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 2450 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) 2451 rdtgroup_pseudo_lock_remove(rdtgrp); 2452 kernfs_unbreak_active_protection(kn); 2453 rdtgroup_remove(rdtgrp); 2454 } else { 2455 kernfs_unbreak_active_protection(kn); 2456 } 2457 } 2458 2459 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) 2460 { 2461 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); 2462 2463 if (!rdtgrp) 2464 return NULL; 2465 2466 rdtgroup_kn_get(rdtgrp, kn); 2467 2468 cpus_read_lock(); 2469 mutex_lock(&rdtgroup_mutex); 2470 2471 /* Was this group deleted while we waited? */ 2472 if (rdtgrp->flags & RDT_DELETED) 2473 return NULL; 2474 2475 return rdtgrp; 2476 } 2477 2478 void rdtgroup_kn_unlock(struct kernfs_node *kn) 2479 { 2480 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); 2481 2482 if (!rdtgrp) 2483 return; 2484 2485 mutex_unlock(&rdtgroup_mutex); 2486 cpus_read_unlock(); 2487 2488 rdtgroup_kn_put(rdtgrp, kn); 2489 } 2490 2491 static int mkdir_mondata_all(struct kernfs_node *parent_kn, 2492 struct rdtgroup *prgrp, 2493 struct kernfs_node **mon_data_kn); 2494 2495 static void rdt_disable_ctx(void) 2496 { 2497 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); 2498 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); 2499 set_mba_sc(false); 2500 2501 resctrl_debug = false; 2502 } 2503 2504 static int rdt_enable_ctx(struct rdt_fs_context *ctx) 2505 { 2506 int ret = 0; 2507 2508 if (ctx->enable_cdpl2) { 2509 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true); 2510 if (ret) 2511 goto out_done; 2512 } 2513 2514 if (ctx->enable_cdpl3) { 2515 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true); 2516 if (ret) 2517 goto out_cdpl2; 2518 } 2519 2520 if (ctx->enable_mba_mbps) { 2521 ret = set_mba_sc(true); 2522 if (ret) 2523 goto out_cdpl3; 2524 } 2525 2526 if (ctx->enable_debug) 2527 resctrl_debug = true; 2528 2529 return 0; 2530 2531 out_cdpl3: 2532 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); 2533 out_cdpl2: 2534 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); 2535 out_done: 2536 return ret; 2537 } 2538 2539 static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type) 2540 { 2541 struct resctrl_schema *s; 2542 const char *suffix = ""; 2543 int ret, cl; 2544 2545 s = kzalloc(sizeof(*s), GFP_KERNEL); 2546 if (!s) 2547 return -ENOMEM; 2548 2549 s->res = r; 2550 s->num_closid = resctrl_arch_get_num_closid(r); 2551 if (resctrl_arch_get_cdp_enabled(r->rid)) 2552 s->num_closid /= 2; 2553 2554 s->conf_type = type; 2555 switch (type) { 2556 case CDP_CODE: 2557 suffix = "CODE"; 2558 break; 2559 case CDP_DATA: 2560 suffix = "DATA"; 2561 break; 2562 case CDP_NONE: 2563 suffix = ""; 2564 break; 2565 } 2566 2567 ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix); 2568 if (ret >= sizeof(s->name)) { 2569 kfree(s); 2570 return -EINVAL; 2571 } 2572 2573 cl = strlen(s->name); 2574 2575 /* 2576 * If CDP is supported by this resource, but not enabled, 2577 * include the suffix. This ensures the tabular format of the 2578 * schemata file does not change between mounts of the filesystem. 2579 */ 2580 if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid)) 2581 cl += 4; 2582 2583 if (cl > max_name_width) 2584 max_name_width = cl; 2585 2586 INIT_LIST_HEAD(&s->list); 2587 list_add(&s->list, &resctrl_schema_all); 2588 2589 return 0; 2590 } 2591 2592 static int schemata_list_create(void) 2593 { 2594 struct rdt_resource *r; 2595 int ret = 0; 2596 2597 for_each_alloc_capable_rdt_resource(r) { 2598 if (resctrl_arch_get_cdp_enabled(r->rid)) { 2599 ret = schemata_list_add(r, CDP_CODE); 2600 if (ret) 2601 break; 2602 2603 ret = schemata_list_add(r, CDP_DATA); 2604 } else { 2605 ret = schemata_list_add(r, CDP_NONE); 2606 } 2607 2608 if (ret) 2609 break; 2610 } 2611 2612 return ret; 2613 } 2614 2615 static void schemata_list_destroy(void) 2616 { 2617 struct resctrl_schema *s, *tmp; 2618 2619 list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) { 2620 list_del(&s->list); 2621 kfree(s); 2622 } 2623 } 2624 2625 static int rdt_get_tree(struct fs_context *fc) 2626 { 2627 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2628 unsigned long flags = RFTYPE_CTRL_BASE; 2629 struct rdt_domain *dom; 2630 struct rdt_resource *r; 2631 int ret; 2632 2633 cpus_read_lock(); 2634 mutex_lock(&rdtgroup_mutex); 2635 /* 2636 * resctrl file system can only be mounted once. 2637 */ 2638 if (resctrl_mounted) { 2639 ret = -EBUSY; 2640 goto out; 2641 } 2642 2643 ret = rdtgroup_setup_root(ctx); 2644 if (ret) 2645 goto out; 2646 2647 ret = rdt_enable_ctx(ctx); 2648 if (ret) 2649 goto out_root; 2650 2651 ret = schemata_list_create(); 2652 if (ret) { 2653 schemata_list_destroy(); 2654 goto out_ctx; 2655 } 2656 2657 closid_init(); 2658 2659 if (resctrl_arch_mon_capable()) 2660 flags |= RFTYPE_MON; 2661 2662 ret = rdtgroup_add_files(rdtgroup_default.kn, flags); 2663 if (ret) 2664 goto out_schemata_free; 2665 2666 kernfs_activate(rdtgroup_default.kn); 2667 2668 ret = rdtgroup_create_info_dir(rdtgroup_default.kn); 2669 if (ret < 0) 2670 goto out_schemata_free; 2671 2672 if (resctrl_arch_mon_capable()) { 2673 ret = mongroup_create_dir(rdtgroup_default.kn, 2674 &rdtgroup_default, "mon_groups", 2675 &kn_mongrp); 2676 if (ret < 0) 2677 goto out_info; 2678 2679 ret = mkdir_mondata_all(rdtgroup_default.kn, 2680 &rdtgroup_default, &kn_mondata); 2681 if (ret < 0) 2682 goto out_mongrp; 2683 rdtgroup_default.mon.mon_data_kn = kn_mondata; 2684 } 2685 2686 ret = rdt_pseudo_lock_init(); 2687 if (ret) 2688 goto out_mondata; 2689 2690 ret = kernfs_get_tree(fc); 2691 if (ret < 0) 2692 goto out_psl; 2693 2694 if (resctrl_arch_alloc_capable()) 2695 resctrl_arch_enable_alloc(); 2696 if (resctrl_arch_mon_capable()) 2697 resctrl_arch_enable_mon(); 2698 2699 if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable()) 2700 resctrl_mounted = true; 2701 2702 if (is_mbm_enabled()) { 2703 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 2704 list_for_each_entry(dom, &r->domains, list) 2705 mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, 2706 RESCTRL_PICK_ANY_CPU); 2707 } 2708 2709 goto out; 2710 2711 out_psl: 2712 rdt_pseudo_lock_release(); 2713 out_mondata: 2714 if (resctrl_arch_mon_capable()) 2715 kernfs_remove(kn_mondata); 2716 out_mongrp: 2717 if (resctrl_arch_mon_capable()) 2718 kernfs_remove(kn_mongrp); 2719 out_info: 2720 kernfs_remove(kn_info); 2721 out_schemata_free: 2722 schemata_list_destroy(); 2723 out_ctx: 2724 rdt_disable_ctx(); 2725 out_root: 2726 rdtgroup_destroy_root(); 2727 out: 2728 rdt_last_cmd_clear(); 2729 mutex_unlock(&rdtgroup_mutex); 2730 cpus_read_unlock(); 2731 return ret; 2732 } 2733 2734 enum rdt_param { 2735 Opt_cdp, 2736 Opt_cdpl2, 2737 Opt_mba_mbps, 2738 Opt_debug, 2739 nr__rdt_params 2740 }; 2741 2742 static const struct fs_parameter_spec rdt_fs_parameters[] = { 2743 fsparam_flag("cdp", Opt_cdp), 2744 fsparam_flag("cdpl2", Opt_cdpl2), 2745 fsparam_flag("mba_MBps", Opt_mba_mbps), 2746 fsparam_flag("debug", Opt_debug), 2747 {} 2748 }; 2749 2750 static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) 2751 { 2752 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2753 struct fs_parse_result result; 2754 int opt; 2755 2756 opt = fs_parse(fc, rdt_fs_parameters, param, &result); 2757 if (opt < 0) 2758 return opt; 2759 2760 switch (opt) { 2761 case Opt_cdp: 2762 ctx->enable_cdpl3 = true; 2763 return 0; 2764 case Opt_cdpl2: 2765 ctx->enable_cdpl2 = true; 2766 return 0; 2767 case Opt_mba_mbps: 2768 if (!supports_mba_mbps()) 2769 return -EINVAL; 2770 ctx->enable_mba_mbps = true; 2771 return 0; 2772 case Opt_debug: 2773 ctx->enable_debug = true; 2774 return 0; 2775 } 2776 2777 return -EINVAL; 2778 } 2779 2780 static void rdt_fs_context_free(struct fs_context *fc) 2781 { 2782 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2783 2784 kernfs_free_fs_context(fc); 2785 kfree(ctx); 2786 } 2787 2788 static const struct fs_context_operations rdt_fs_context_ops = { 2789 .free = rdt_fs_context_free, 2790 .parse_param = rdt_parse_param, 2791 .get_tree = rdt_get_tree, 2792 }; 2793 2794 static int rdt_init_fs_context(struct fs_context *fc) 2795 { 2796 struct rdt_fs_context *ctx; 2797 2798 ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); 2799 if (!ctx) 2800 return -ENOMEM; 2801 2802 ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; 2803 fc->fs_private = &ctx->kfc; 2804 fc->ops = &rdt_fs_context_ops; 2805 put_user_ns(fc->user_ns); 2806 fc->user_ns = get_user_ns(&init_user_ns); 2807 fc->global = true; 2808 return 0; 2809 } 2810 2811 static int reset_all_ctrls(struct rdt_resource *r) 2812 { 2813 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 2814 struct rdt_hw_domain *hw_dom; 2815 struct msr_param msr_param; 2816 cpumask_var_t cpu_mask; 2817 struct rdt_domain *d; 2818 int i; 2819 2820 /* Walking r->domains, ensure it can't race with cpuhp */ 2821 lockdep_assert_cpus_held(); 2822 2823 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) 2824 return -ENOMEM; 2825 2826 msr_param.res = r; 2827 msr_param.low = 0; 2828 msr_param.high = hw_res->num_closid; 2829 2830 /* 2831 * Disable resource control for this resource by setting all 2832 * CBMs in all domains to the maximum mask value. Pick one CPU 2833 * from each domain to update the MSRs below. 2834 */ 2835 list_for_each_entry(d, &r->domains, list) { 2836 hw_dom = resctrl_to_arch_dom(d); 2837 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); 2838 2839 for (i = 0; i < hw_res->num_closid; i++) 2840 hw_dom->ctrl_val[i] = r->default_ctrl; 2841 } 2842 2843 /* Update CBM on all the CPUs in cpu_mask */ 2844 on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); 2845 2846 free_cpumask_var(cpu_mask); 2847 2848 return 0; 2849 } 2850 2851 /* 2852 * Move tasks from one to the other group. If @from is NULL, then all tasks 2853 * in the systems are moved unconditionally (used for teardown). 2854 * 2855 * If @mask is not NULL the cpus on which moved tasks are running are set 2856 * in that mask so the update smp function call is restricted to affected 2857 * cpus. 2858 */ 2859 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, 2860 struct cpumask *mask) 2861 { 2862 struct task_struct *p, *t; 2863 2864 read_lock(&tasklist_lock); 2865 for_each_process_thread(p, t) { 2866 if (!from || is_closid_match(t, from) || 2867 is_rmid_match(t, from)) { 2868 resctrl_arch_set_closid_rmid(t, to->closid, 2869 to->mon.rmid); 2870 2871 /* 2872 * Order the closid/rmid stores above before the loads 2873 * in task_curr(). This pairs with the full barrier 2874 * between the rq->curr update and resctrl_sched_in() 2875 * during context switch. 2876 */ 2877 smp_mb(); 2878 2879 /* 2880 * If the task is on a CPU, set the CPU in the mask. 2881 * The detection is inaccurate as tasks might move or 2882 * schedule before the smp function call takes place. 2883 * In such a case the function call is pointless, but 2884 * there is no other side effect. 2885 */ 2886 if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t)) 2887 cpumask_set_cpu(task_cpu(t), mask); 2888 } 2889 } 2890 read_unlock(&tasklist_lock); 2891 } 2892 2893 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) 2894 { 2895 struct rdtgroup *sentry, *stmp; 2896 struct list_head *head; 2897 2898 head = &rdtgrp->mon.crdtgrp_list; 2899 list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { 2900 free_rmid(sentry->closid, sentry->mon.rmid); 2901 list_del(&sentry->mon.crdtgrp_list); 2902 2903 if (atomic_read(&sentry->waitcount) != 0) 2904 sentry->flags = RDT_DELETED; 2905 else 2906 rdtgroup_remove(sentry); 2907 } 2908 } 2909 2910 /* 2911 * Forcibly remove all of subdirectories under root. 2912 */ 2913 static void rmdir_all_sub(void) 2914 { 2915 struct rdtgroup *rdtgrp, *tmp; 2916 2917 /* Move all tasks to the default resource group */ 2918 rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); 2919 2920 list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { 2921 /* Free any child rmids */ 2922 free_all_child_rdtgrp(rdtgrp); 2923 2924 /* Remove each rdtgroup other than root */ 2925 if (rdtgrp == &rdtgroup_default) 2926 continue; 2927 2928 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 2929 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) 2930 rdtgroup_pseudo_lock_remove(rdtgrp); 2931 2932 /* 2933 * Give any CPUs back to the default group. We cannot copy 2934 * cpu_online_mask because a CPU might have executed the 2935 * offline callback already, but is still marked online. 2936 */ 2937 cpumask_or(&rdtgroup_default.cpu_mask, 2938 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); 2939 2940 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 2941 2942 kernfs_remove(rdtgrp->kn); 2943 list_del(&rdtgrp->rdtgroup_list); 2944 2945 if (atomic_read(&rdtgrp->waitcount) != 0) 2946 rdtgrp->flags = RDT_DELETED; 2947 else 2948 rdtgroup_remove(rdtgrp); 2949 } 2950 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ 2951 update_closid_rmid(cpu_online_mask, &rdtgroup_default); 2952 2953 kernfs_remove(kn_info); 2954 kernfs_remove(kn_mongrp); 2955 kernfs_remove(kn_mondata); 2956 } 2957 2958 static void rdt_kill_sb(struct super_block *sb) 2959 { 2960 struct rdt_resource *r; 2961 2962 cpus_read_lock(); 2963 mutex_lock(&rdtgroup_mutex); 2964 2965 rdt_disable_ctx(); 2966 2967 /*Put everything back to default values. */ 2968 for_each_alloc_capable_rdt_resource(r) 2969 reset_all_ctrls(r); 2970 rmdir_all_sub(); 2971 rdt_pseudo_lock_release(); 2972 rdtgroup_default.mode = RDT_MODE_SHAREABLE; 2973 schemata_list_destroy(); 2974 rdtgroup_destroy_root(); 2975 if (resctrl_arch_alloc_capable()) 2976 resctrl_arch_disable_alloc(); 2977 if (resctrl_arch_mon_capable()) 2978 resctrl_arch_disable_mon(); 2979 resctrl_mounted = false; 2980 kernfs_kill_sb(sb); 2981 mutex_unlock(&rdtgroup_mutex); 2982 cpus_read_unlock(); 2983 } 2984 2985 static struct file_system_type rdt_fs_type = { 2986 .name = "resctrl", 2987 .init_fs_context = rdt_init_fs_context, 2988 .parameters = rdt_fs_parameters, 2989 .kill_sb = rdt_kill_sb, 2990 }; 2991 2992 static int mon_addfile(struct kernfs_node *parent_kn, const char *name, 2993 void *priv) 2994 { 2995 struct kernfs_node *kn; 2996 int ret = 0; 2997 2998 kn = __kernfs_create_file(parent_kn, name, 0444, 2999 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, 3000 &kf_mondata_ops, priv, NULL, NULL); 3001 if (IS_ERR(kn)) 3002 return PTR_ERR(kn); 3003 3004 ret = rdtgroup_kn_set_ugid(kn); 3005 if (ret) { 3006 kernfs_remove(kn); 3007 return ret; 3008 } 3009 3010 return ret; 3011 } 3012 3013 /* 3014 * Remove all subdirectories of mon_data of ctrl_mon groups 3015 * and monitor groups with given domain id. 3016 */ 3017 static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, 3018 unsigned int dom_id) 3019 { 3020 struct rdtgroup *prgrp, *crgrp; 3021 char name[32]; 3022 3023 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { 3024 sprintf(name, "mon_%s_%02d", r->name, dom_id); 3025 kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); 3026 3027 list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) 3028 kernfs_remove_by_name(crgrp->mon.mon_data_kn, name); 3029 } 3030 } 3031 3032 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, 3033 struct rdt_domain *d, 3034 struct rdt_resource *r, struct rdtgroup *prgrp) 3035 { 3036 union mon_data_bits priv; 3037 struct kernfs_node *kn; 3038 struct mon_evt *mevt; 3039 struct rmid_read rr; 3040 char name[32]; 3041 int ret; 3042 3043 sprintf(name, "mon_%s_%02d", r->name, d->id); 3044 /* create the directory */ 3045 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); 3046 if (IS_ERR(kn)) 3047 return PTR_ERR(kn); 3048 3049 ret = rdtgroup_kn_set_ugid(kn); 3050 if (ret) 3051 goto out_destroy; 3052 3053 if (WARN_ON(list_empty(&r->evt_list))) { 3054 ret = -EPERM; 3055 goto out_destroy; 3056 } 3057 3058 priv.u.rid = r->rid; 3059 priv.u.domid = d->id; 3060 list_for_each_entry(mevt, &r->evt_list, list) { 3061 priv.u.evtid = mevt->evtid; 3062 ret = mon_addfile(kn, mevt->name, priv.priv); 3063 if (ret) 3064 goto out_destroy; 3065 3066 if (is_mbm_event(mevt->evtid)) 3067 mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); 3068 } 3069 kernfs_activate(kn); 3070 return 0; 3071 3072 out_destroy: 3073 kernfs_remove(kn); 3074 return ret; 3075 } 3076 3077 /* 3078 * Add all subdirectories of mon_data for "ctrl_mon" groups 3079 * and "monitor" groups with given domain id. 3080 */ 3081 static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, 3082 struct rdt_domain *d) 3083 { 3084 struct kernfs_node *parent_kn; 3085 struct rdtgroup *prgrp, *crgrp; 3086 struct list_head *head; 3087 3088 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { 3089 parent_kn = prgrp->mon.mon_data_kn; 3090 mkdir_mondata_subdir(parent_kn, d, r, prgrp); 3091 3092 head = &prgrp->mon.crdtgrp_list; 3093 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 3094 parent_kn = crgrp->mon.mon_data_kn; 3095 mkdir_mondata_subdir(parent_kn, d, r, crgrp); 3096 } 3097 } 3098 } 3099 3100 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, 3101 struct rdt_resource *r, 3102 struct rdtgroup *prgrp) 3103 { 3104 struct rdt_domain *dom; 3105 int ret; 3106 3107 /* Walking r->domains, ensure it can't race with cpuhp */ 3108 lockdep_assert_cpus_held(); 3109 3110 list_for_each_entry(dom, &r->domains, list) { 3111 ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); 3112 if (ret) 3113 return ret; 3114 } 3115 3116 return 0; 3117 } 3118 3119 /* 3120 * This creates a directory mon_data which contains the monitored data. 3121 * 3122 * mon_data has one directory for each domain which are named 3123 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data 3124 * with L3 domain looks as below: 3125 * ./mon_data: 3126 * mon_L3_00 3127 * mon_L3_01 3128 * mon_L3_02 3129 * ... 3130 * 3131 * Each domain directory has one file per event: 3132 * ./mon_L3_00/: 3133 * llc_occupancy 3134 * 3135 */ 3136 static int mkdir_mondata_all(struct kernfs_node *parent_kn, 3137 struct rdtgroup *prgrp, 3138 struct kernfs_node **dest_kn) 3139 { 3140 struct rdt_resource *r; 3141 struct kernfs_node *kn; 3142 int ret; 3143 3144 /* 3145 * Create the mon_data directory first. 3146 */ 3147 ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); 3148 if (ret) 3149 return ret; 3150 3151 if (dest_kn) 3152 *dest_kn = kn; 3153 3154 /* 3155 * Create the subdirectories for each domain. Note that all events 3156 * in a domain like L3 are grouped into a resource whose domain is L3 3157 */ 3158 for_each_mon_capable_rdt_resource(r) { 3159 ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); 3160 if (ret) 3161 goto out_destroy; 3162 } 3163 3164 return 0; 3165 3166 out_destroy: 3167 kernfs_remove(kn); 3168 return ret; 3169 } 3170 3171 /** 3172 * cbm_ensure_valid - Enforce validity on provided CBM 3173 * @_val: Candidate CBM 3174 * @r: RDT resource to which the CBM belongs 3175 * 3176 * The provided CBM represents all cache portions available for use. This 3177 * may be represented by a bitmap that does not consist of contiguous ones 3178 * and thus be an invalid CBM. 3179 * Here the provided CBM is forced to be a valid CBM by only considering 3180 * the first set of contiguous bits as valid and clearing all bits. 3181 * The intention here is to provide a valid default CBM with which a new 3182 * resource group is initialized. The user can follow this with a 3183 * modification to the CBM if the default does not satisfy the 3184 * requirements. 3185 */ 3186 static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) 3187 { 3188 unsigned int cbm_len = r->cache.cbm_len; 3189 unsigned long first_bit, zero_bit; 3190 unsigned long val = _val; 3191 3192 if (!val) 3193 return 0; 3194 3195 first_bit = find_first_bit(&val, cbm_len); 3196 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); 3197 3198 /* Clear any remaining bits to ensure contiguous region */ 3199 bitmap_clear(&val, zero_bit, cbm_len - zero_bit); 3200 return (u32)val; 3201 } 3202 3203 /* 3204 * Initialize cache resources per RDT domain 3205 * 3206 * Set the RDT domain up to start off with all usable allocations. That is, 3207 * all shareable and unused bits. All-zero CBM is invalid. 3208 */ 3209 static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, 3210 u32 closid) 3211 { 3212 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); 3213 enum resctrl_conf_type t = s->conf_type; 3214 struct resctrl_staged_config *cfg; 3215 struct rdt_resource *r = s->res; 3216 u32 used_b = 0, unused_b = 0; 3217 unsigned long tmp_cbm; 3218 enum rdtgrp_mode mode; 3219 u32 peer_ctl, ctrl_val; 3220 int i; 3221 3222 cfg = &d->staged_config[t]; 3223 cfg->have_new_ctrl = false; 3224 cfg->new_ctrl = r->cache.shareable_bits; 3225 used_b = r->cache.shareable_bits; 3226 for (i = 0; i < closids_supported(); i++) { 3227 if (closid_allocated(i) && i != closid) { 3228 mode = rdtgroup_mode_by_closid(i); 3229 if (mode == RDT_MODE_PSEUDO_LOCKSETUP) 3230 /* 3231 * ctrl values for locksetup aren't relevant 3232 * until the schemata is written, and the mode 3233 * becomes RDT_MODE_PSEUDO_LOCKED. 3234 */ 3235 continue; 3236 /* 3237 * If CDP is active include peer domain's 3238 * usage to ensure there is no overlap 3239 * with an exclusive group. 3240 */ 3241 if (resctrl_arch_get_cdp_enabled(r->rid)) 3242 peer_ctl = resctrl_arch_get_config(r, d, i, 3243 peer_type); 3244 else 3245 peer_ctl = 0; 3246 ctrl_val = resctrl_arch_get_config(r, d, i, 3247 s->conf_type); 3248 used_b |= ctrl_val | peer_ctl; 3249 if (mode == RDT_MODE_SHAREABLE) 3250 cfg->new_ctrl |= ctrl_val | peer_ctl; 3251 } 3252 } 3253 if (d->plr && d->plr->cbm > 0) 3254 used_b |= d->plr->cbm; 3255 unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); 3256 unused_b &= BIT_MASK(r->cache.cbm_len) - 1; 3257 cfg->new_ctrl |= unused_b; 3258 /* 3259 * Force the initial CBM to be valid, user can 3260 * modify the CBM based on system availability. 3261 */ 3262 cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r); 3263 /* 3264 * Assign the u32 CBM to an unsigned long to ensure that 3265 * bitmap_weight() does not access out-of-bound memory. 3266 */ 3267 tmp_cbm = cfg->new_ctrl; 3268 if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { 3269 rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); 3270 return -ENOSPC; 3271 } 3272 cfg->have_new_ctrl = true; 3273 3274 return 0; 3275 } 3276 3277 /* 3278 * Initialize cache resources with default values. 3279 * 3280 * A new RDT group is being created on an allocation capable (CAT) 3281 * supporting system. Set this group up to start off with all usable 3282 * allocations. 3283 * 3284 * If there are no more shareable bits available on any domain then 3285 * the entire allocation will fail. 3286 */ 3287 static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) 3288 { 3289 struct rdt_domain *d; 3290 int ret; 3291 3292 list_for_each_entry(d, &s->res->domains, list) { 3293 ret = __init_one_rdt_domain(d, s, closid); 3294 if (ret < 0) 3295 return ret; 3296 } 3297 3298 return 0; 3299 } 3300 3301 /* Initialize MBA resource with default values. */ 3302 static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) 3303 { 3304 struct resctrl_staged_config *cfg; 3305 struct rdt_domain *d; 3306 3307 list_for_each_entry(d, &r->domains, list) { 3308 if (is_mba_sc(r)) { 3309 d->mbps_val[closid] = MBA_MAX_MBPS; 3310 continue; 3311 } 3312 3313 cfg = &d->staged_config[CDP_NONE]; 3314 cfg->new_ctrl = r->default_ctrl; 3315 cfg->have_new_ctrl = true; 3316 } 3317 } 3318 3319 /* Initialize the RDT group's allocations. */ 3320 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) 3321 { 3322 struct resctrl_schema *s; 3323 struct rdt_resource *r; 3324 int ret = 0; 3325 3326 rdt_staged_configs_clear(); 3327 3328 list_for_each_entry(s, &resctrl_schema_all, list) { 3329 r = s->res; 3330 if (r->rid == RDT_RESOURCE_MBA || 3331 r->rid == RDT_RESOURCE_SMBA) { 3332 rdtgroup_init_mba(r, rdtgrp->closid); 3333 if (is_mba_sc(r)) 3334 continue; 3335 } else { 3336 ret = rdtgroup_init_cat(s, rdtgrp->closid); 3337 if (ret < 0) 3338 goto out; 3339 } 3340 3341 ret = resctrl_arch_update_domains(r, rdtgrp->closid); 3342 if (ret < 0) { 3343 rdt_last_cmd_puts("Failed to initialize allocations\n"); 3344 goto out; 3345 } 3346 3347 } 3348 3349 rdtgrp->mode = RDT_MODE_SHAREABLE; 3350 3351 out: 3352 rdt_staged_configs_clear(); 3353 return ret; 3354 } 3355 3356 static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) 3357 { 3358 int ret; 3359 3360 if (!resctrl_arch_mon_capable()) 3361 return 0; 3362 3363 ret = alloc_rmid(rdtgrp->closid); 3364 if (ret < 0) { 3365 rdt_last_cmd_puts("Out of RMIDs\n"); 3366 return ret; 3367 } 3368 rdtgrp->mon.rmid = ret; 3369 3370 ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn); 3371 if (ret) { 3372 rdt_last_cmd_puts("kernfs subdir error\n"); 3373 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 3374 return ret; 3375 } 3376 3377 return 0; 3378 } 3379 3380 static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) 3381 { 3382 if (resctrl_arch_mon_capable()) 3383 free_rmid(rgrp->closid, rgrp->mon.rmid); 3384 } 3385 3386 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, 3387 const char *name, umode_t mode, 3388 enum rdt_group_type rtype, struct rdtgroup **r) 3389 { 3390 struct rdtgroup *prdtgrp, *rdtgrp; 3391 unsigned long files = 0; 3392 struct kernfs_node *kn; 3393 int ret; 3394 3395 prdtgrp = rdtgroup_kn_lock_live(parent_kn); 3396 if (!prdtgrp) { 3397 ret = -ENODEV; 3398 goto out_unlock; 3399 } 3400 3401 if (rtype == RDTMON_GROUP && 3402 (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 3403 prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { 3404 ret = -EINVAL; 3405 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 3406 goto out_unlock; 3407 } 3408 3409 /* allocate the rdtgroup. */ 3410 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); 3411 if (!rdtgrp) { 3412 ret = -ENOSPC; 3413 rdt_last_cmd_puts("Kernel out of memory\n"); 3414 goto out_unlock; 3415 } 3416 *r = rdtgrp; 3417 rdtgrp->mon.parent = prdtgrp; 3418 rdtgrp->type = rtype; 3419 INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list); 3420 3421 /* kernfs creates the directory for rdtgrp */ 3422 kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); 3423 if (IS_ERR(kn)) { 3424 ret = PTR_ERR(kn); 3425 rdt_last_cmd_puts("kernfs create error\n"); 3426 goto out_free_rgrp; 3427 } 3428 rdtgrp->kn = kn; 3429 3430 /* 3431 * kernfs_remove() will drop the reference count on "kn" which 3432 * will free it. But we still need it to stick around for the 3433 * rdtgroup_kn_unlock(kn) call. Take one extra reference here, 3434 * which will be dropped by kernfs_put() in rdtgroup_remove(). 3435 */ 3436 kernfs_get(kn); 3437 3438 ret = rdtgroup_kn_set_ugid(kn); 3439 if (ret) { 3440 rdt_last_cmd_puts("kernfs perm error\n"); 3441 goto out_destroy; 3442 } 3443 3444 if (rtype == RDTCTRL_GROUP) { 3445 files = RFTYPE_BASE | RFTYPE_CTRL; 3446 if (resctrl_arch_mon_capable()) 3447 files |= RFTYPE_MON; 3448 } else { 3449 files = RFTYPE_BASE | RFTYPE_MON; 3450 } 3451 3452 ret = rdtgroup_add_files(kn, files); 3453 if (ret) { 3454 rdt_last_cmd_puts("kernfs fill error\n"); 3455 goto out_destroy; 3456 } 3457 3458 /* 3459 * The caller unlocks the parent_kn upon success. 3460 */ 3461 return 0; 3462 3463 out_destroy: 3464 kernfs_put(rdtgrp->kn); 3465 kernfs_remove(rdtgrp->kn); 3466 out_free_rgrp: 3467 kfree(rdtgrp); 3468 out_unlock: 3469 rdtgroup_kn_unlock(parent_kn); 3470 return ret; 3471 } 3472 3473 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) 3474 { 3475 kernfs_remove(rgrp->kn); 3476 rdtgroup_remove(rgrp); 3477 } 3478 3479 /* 3480 * Create a monitor group under "mon_groups" directory of a control 3481 * and monitor group(ctrl_mon). This is a resource group 3482 * to monitor a subset of tasks and cpus in its parent ctrl_mon group. 3483 */ 3484 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, 3485 const char *name, umode_t mode) 3486 { 3487 struct rdtgroup *rdtgrp, *prgrp; 3488 int ret; 3489 3490 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp); 3491 if (ret) 3492 return ret; 3493 3494 prgrp = rdtgrp->mon.parent; 3495 rdtgrp->closid = prgrp->closid; 3496 3497 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); 3498 if (ret) { 3499 mkdir_rdt_prepare_clean(rdtgrp); 3500 goto out_unlock; 3501 } 3502 3503 kernfs_activate(rdtgrp->kn); 3504 3505 /* 3506 * Add the rdtgrp to the list of rdtgrps the parent 3507 * ctrl_mon group has to track. 3508 */ 3509 list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); 3510 3511 out_unlock: 3512 rdtgroup_kn_unlock(parent_kn); 3513 return ret; 3514 } 3515 3516 /* 3517 * These are rdtgroups created under the root directory. Can be used 3518 * to allocate and monitor resources. 3519 */ 3520 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, 3521 const char *name, umode_t mode) 3522 { 3523 struct rdtgroup *rdtgrp; 3524 struct kernfs_node *kn; 3525 u32 closid; 3526 int ret; 3527 3528 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp); 3529 if (ret) 3530 return ret; 3531 3532 kn = rdtgrp->kn; 3533 ret = closid_alloc(); 3534 if (ret < 0) { 3535 rdt_last_cmd_puts("Out of CLOSIDs\n"); 3536 goto out_common_fail; 3537 } 3538 closid = ret; 3539 ret = 0; 3540 3541 rdtgrp->closid = closid; 3542 3543 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); 3544 if (ret) 3545 goto out_closid_free; 3546 3547 kernfs_activate(rdtgrp->kn); 3548 3549 ret = rdtgroup_init_alloc(rdtgrp); 3550 if (ret < 0) 3551 goto out_rmid_free; 3552 3553 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); 3554 3555 if (resctrl_arch_mon_capable()) { 3556 /* 3557 * Create an empty mon_groups directory to hold the subset 3558 * of tasks and cpus to monitor. 3559 */ 3560 ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); 3561 if (ret) { 3562 rdt_last_cmd_puts("kernfs subdir error\n"); 3563 goto out_del_list; 3564 } 3565 } 3566 3567 goto out_unlock; 3568 3569 out_del_list: 3570 list_del(&rdtgrp->rdtgroup_list); 3571 out_rmid_free: 3572 mkdir_rdt_prepare_rmid_free(rdtgrp); 3573 out_closid_free: 3574 closid_free(closid); 3575 out_common_fail: 3576 mkdir_rdt_prepare_clean(rdtgrp); 3577 out_unlock: 3578 rdtgroup_kn_unlock(parent_kn); 3579 return ret; 3580 } 3581 3582 /* 3583 * We allow creating mon groups only with in a directory called "mon_groups" 3584 * which is present in every ctrl_mon group. Check if this is a valid 3585 * "mon_groups" directory. 3586 * 3587 * 1. The directory should be named "mon_groups". 3588 * 2. The mon group itself should "not" be named "mon_groups". 3589 * This makes sure "mon_groups" directory always has a ctrl_mon group 3590 * as parent. 3591 */ 3592 static bool is_mon_groups(struct kernfs_node *kn, const char *name) 3593 { 3594 return (!strcmp(kn->name, "mon_groups") && 3595 strcmp(name, "mon_groups")); 3596 } 3597 3598 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, 3599 umode_t mode) 3600 { 3601 /* Do not accept '\n' to avoid unparsable situation. */ 3602 if (strchr(name, '\n')) 3603 return -EINVAL; 3604 3605 /* 3606 * If the parent directory is the root directory and RDT 3607 * allocation is supported, add a control and monitoring 3608 * subdirectory 3609 */ 3610 if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn) 3611 return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); 3612 3613 /* 3614 * If RDT monitoring is supported and the parent directory is a valid 3615 * "mon_groups" directory, add a monitoring subdirectory. 3616 */ 3617 if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name)) 3618 return rdtgroup_mkdir_mon(parent_kn, name, mode); 3619 3620 return -EPERM; 3621 } 3622 3623 static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) 3624 { 3625 struct rdtgroup *prdtgrp = rdtgrp->mon.parent; 3626 int cpu; 3627 3628 /* Give any tasks back to the parent group */ 3629 rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); 3630 3631 /* Update per cpu rmid of the moved CPUs first */ 3632 for_each_cpu(cpu, &rdtgrp->cpu_mask) 3633 per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; 3634 /* 3635 * Update the MSR on moved CPUs and CPUs which have moved 3636 * task running on them. 3637 */ 3638 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); 3639 update_closid_rmid(tmpmask, NULL); 3640 3641 rdtgrp->flags = RDT_DELETED; 3642 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 3643 3644 /* 3645 * Remove the rdtgrp from the parent ctrl_mon group's list 3646 */ 3647 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); 3648 list_del(&rdtgrp->mon.crdtgrp_list); 3649 3650 kernfs_remove(rdtgrp->kn); 3651 3652 return 0; 3653 } 3654 3655 static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) 3656 { 3657 rdtgrp->flags = RDT_DELETED; 3658 list_del(&rdtgrp->rdtgroup_list); 3659 3660 kernfs_remove(rdtgrp->kn); 3661 return 0; 3662 } 3663 3664 static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) 3665 { 3666 int cpu; 3667 3668 /* Give any tasks back to the default group */ 3669 rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); 3670 3671 /* Give any CPUs back to the default group */ 3672 cpumask_or(&rdtgroup_default.cpu_mask, 3673 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); 3674 3675 /* Update per cpu closid and rmid of the moved CPUs first */ 3676 for_each_cpu(cpu, &rdtgrp->cpu_mask) { 3677 per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; 3678 per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; 3679 } 3680 3681 /* 3682 * Update the MSR on moved CPUs and CPUs which have moved 3683 * task running on them. 3684 */ 3685 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); 3686 update_closid_rmid(tmpmask, NULL); 3687 3688 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 3689 closid_free(rdtgrp->closid); 3690 3691 rdtgroup_ctrl_remove(rdtgrp); 3692 3693 /* 3694 * Free all the child monitor group rmids. 3695 */ 3696 free_all_child_rdtgrp(rdtgrp); 3697 3698 return 0; 3699 } 3700 3701 static int rdtgroup_rmdir(struct kernfs_node *kn) 3702 { 3703 struct kernfs_node *parent_kn = kn->parent; 3704 struct rdtgroup *rdtgrp; 3705 cpumask_var_t tmpmask; 3706 int ret = 0; 3707 3708 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) 3709 return -ENOMEM; 3710 3711 rdtgrp = rdtgroup_kn_lock_live(kn); 3712 if (!rdtgrp) { 3713 ret = -EPERM; 3714 goto out; 3715 } 3716 3717 /* 3718 * If the rdtgroup is a ctrl_mon group and parent directory 3719 * is the root directory, remove the ctrl_mon group. 3720 * 3721 * If the rdtgroup is a mon group and parent directory 3722 * is a valid "mon_groups" directory, remove the mon group. 3723 */ 3724 if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && 3725 rdtgrp != &rdtgroup_default) { 3726 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 3727 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 3728 ret = rdtgroup_ctrl_remove(rdtgrp); 3729 } else { 3730 ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); 3731 } 3732 } else if (rdtgrp->type == RDTMON_GROUP && 3733 is_mon_groups(parent_kn, kn->name)) { 3734 ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); 3735 } else { 3736 ret = -EPERM; 3737 } 3738 3739 out: 3740 rdtgroup_kn_unlock(kn); 3741 free_cpumask_var(tmpmask); 3742 return ret; 3743 } 3744 3745 /** 3746 * mongrp_reparent() - replace parent CTRL_MON group of a MON group 3747 * @rdtgrp: the MON group whose parent should be replaced 3748 * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp 3749 * @cpus: cpumask provided by the caller for use during this call 3750 * 3751 * Replaces the parent CTRL_MON group for a MON group, resulting in all member 3752 * tasks' CLOSID immediately changing to that of the new parent group. 3753 * Monitoring data for the group is unaffected by this operation. 3754 */ 3755 static void mongrp_reparent(struct rdtgroup *rdtgrp, 3756 struct rdtgroup *new_prdtgrp, 3757 cpumask_var_t cpus) 3758 { 3759 struct rdtgroup *prdtgrp = rdtgrp->mon.parent; 3760 3761 WARN_ON(rdtgrp->type != RDTMON_GROUP); 3762 WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP); 3763 3764 /* Nothing to do when simply renaming a MON group. */ 3765 if (prdtgrp == new_prdtgrp) 3766 return; 3767 3768 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); 3769 list_move_tail(&rdtgrp->mon.crdtgrp_list, 3770 &new_prdtgrp->mon.crdtgrp_list); 3771 3772 rdtgrp->mon.parent = new_prdtgrp; 3773 rdtgrp->closid = new_prdtgrp->closid; 3774 3775 /* Propagate updated closid to all tasks in this group. */ 3776 rdt_move_group_tasks(rdtgrp, rdtgrp, cpus); 3777 3778 update_closid_rmid(cpus, NULL); 3779 } 3780 3781 static int rdtgroup_rename(struct kernfs_node *kn, 3782 struct kernfs_node *new_parent, const char *new_name) 3783 { 3784 struct rdtgroup *new_prdtgrp; 3785 struct rdtgroup *rdtgrp; 3786 cpumask_var_t tmpmask; 3787 int ret; 3788 3789 rdtgrp = kernfs_to_rdtgroup(kn); 3790 new_prdtgrp = kernfs_to_rdtgroup(new_parent); 3791 if (!rdtgrp || !new_prdtgrp) 3792 return -ENOENT; 3793 3794 /* Release both kernfs active_refs before obtaining rdtgroup mutex. */ 3795 rdtgroup_kn_get(rdtgrp, kn); 3796 rdtgroup_kn_get(new_prdtgrp, new_parent); 3797 3798 mutex_lock(&rdtgroup_mutex); 3799 3800 rdt_last_cmd_clear(); 3801 3802 /* 3803 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if 3804 * either kernfs_node is a file. 3805 */ 3806 if (kernfs_type(kn) != KERNFS_DIR || 3807 kernfs_type(new_parent) != KERNFS_DIR) { 3808 rdt_last_cmd_puts("Source and destination must be directories"); 3809 ret = -EPERM; 3810 goto out; 3811 } 3812 3813 if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) { 3814 ret = -ENOENT; 3815 goto out; 3816 } 3817 3818 if (rdtgrp->type != RDTMON_GROUP || !kn->parent || 3819 !is_mon_groups(kn->parent, kn->name)) { 3820 rdt_last_cmd_puts("Source must be a MON group\n"); 3821 ret = -EPERM; 3822 goto out; 3823 } 3824 3825 if (!is_mon_groups(new_parent, new_name)) { 3826 rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n"); 3827 ret = -EPERM; 3828 goto out; 3829 } 3830 3831 /* 3832 * If the MON group is monitoring CPUs, the CPUs must be assigned to the 3833 * current parent CTRL_MON group and therefore cannot be assigned to 3834 * the new parent, making the move illegal. 3835 */ 3836 if (!cpumask_empty(&rdtgrp->cpu_mask) && 3837 rdtgrp->mon.parent != new_prdtgrp) { 3838 rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n"); 3839 ret = -EPERM; 3840 goto out; 3841 } 3842 3843 /* 3844 * Allocate the cpumask for use in mongrp_reparent() to avoid the 3845 * possibility of failing to allocate it after kernfs_rename() has 3846 * succeeded. 3847 */ 3848 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) { 3849 ret = -ENOMEM; 3850 goto out; 3851 } 3852 3853 /* 3854 * Perform all input validation and allocations needed to ensure 3855 * mongrp_reparent() will succeed before calling kernfs_rename(), 3856 * otherwise it would be necessary to revert this call if 3857 * mongrp_reparent() failed. 3858 */ 3859 ret = kernfs_rename(kn, new_parent, new_name); 3860 if (!ret) 3861 mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask); 3862 3863 free_cpumask_var(tmpmask); 3864 3865 out: 3866 mutex_unlock(&rdtgroup_mutex); 3867 rdtgroup_kn_put(rdtgrp, kn); 3868 rdtgroup_kn_put(new_prdtgrp, new_parent); 3869 return ret; 3870 } 3871 3872 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) 3873 { 3874 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) 3875 seq_puts(seq, ",cdp"); 3876 3877 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) 3878 seq_puts(seq, ",cdpl2"); 3879 3880 if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) 3881 seq_puts(seq, ",mba_MBps"); 3882 3883 if (resctrl_debug) 3884 seq_puts(seq, ",debug"); 3885 3886 return 0; 3887 } 3888 3889 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { 3890 .mkdir = rdtgroup_mkdir, 3891 .rmdir = rdtgroup_rmdir, 3892 .rename = rdtgroup_rename, 3893 .show_options = rdtgroup_show_options, 3894 }; 3895 3896 static int rdtgroup_setup_root(struct rdt_fs_context *ctx) 3897 { 3898 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, 3899 KERNFS_ROOT_CREATE_DEACTIVATED | 3900 KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, 3901 &rdtgroup_default); 3902 if (IS_ERR(rdt_root)) 3903 return PTR_ERR(rdt_root); 3904 3905 ctx->kfc.root = rdt_root; 3906 rdtgroup_default.kn = kernfs_root_to_node(rdt_root); 3907 3908 return 0; 3909 } 3910 3911 static void rdtgroup_destroy_root(void) 3912 { 3913 kernfs_destroy_root(rdt_root); 3914 rdtgroup_default.kn = NULL; 3915 } 3916 3917 static void __init rdtgroup_setup_default(void) 3918 { 3919 mutex_lock(&rdtgroup_mutex); 3920 3921 rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; 3922 rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; 3923 rdtgroup_default.type = RDTCTRL_GROUP; 3924 INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); 3925 3926 list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); 3927 3928 mutex_unlock(&rdtgroup_mutex); 3929 } 3930 3931 static void domain_destroy_mon_state(struct rdt_domain *d) 3932 { 3933 bitmap_free(d->rmid_busy_llc); 3934 kfree(d->mbm_total); 3935 kfree(d->mbm_local); 3936 } 3937 3938 void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) 3939 { 3940 mutex_lock(&rdtgroup_mutex); 3941 3942 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) 3943 mba_sc_domain_destroy(r, d); 3944 3945 if (!r->mon_capable) 3946 goto out_unlock; 3947 3948 /* 3949 * If resctrl is mounted, remove all the 3950 * per domain monitor data directories. 3951 */ 3952 if (resctrl_mounted && resctrl_arch_mon_capable()) 3953 rmdir_mondata_subdir_allrdtgrp(r, d->id); 3954 3955 if (is_mbm_enabled()) 3956 cancel_delayed_work(&d->mbm_over); 3957 if (is_llc_occupancy_enabled() && has_busy_rmid(d)) { 3958 /* 3959 * When a package is going down, forcefully 3960 * decrement rmid->ebusy. There is no way to know 3961 * that the L3 was flushed and hence may lead to 3962 * incorrect counts in rare scenarios, but leaving 3963 * the RMID as busy creates RMID leaks if the 3964 * package never comes back. 3965 */ 3966 __check_limbo(d, true); 3967 cancel_delayed_work(&d->cqm_limbo); 3968 } 3969 3970 domain_destroy_mon_state(d); 3971 3972 out_unlock: 3973 mutex_unlock(&rdtgroup_mutex); 3974 } 3975 3976 static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) 3977 { 3978 u32 idx_limit = resctrl_arch_system_num_rmid_idx(); 3979 size_t tsize; 3980 3981 if (is_llc_occupancy_enabled()) { 3982 d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); 3983 if (!d->rmid_busy_llc) 3984 return -ENOMEM; 3985 } 3986 if (is_mbm_total_enabled()) { 3987 tsize = sizeof(*d->mbm_total); 3988 d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); 3989 if (!d->mbm_total) { 3990 bitmap_free(d->rmid_busy_llc); 3991 return -ENOMEM; 3992 } 3993 } 3994 if (is_mbm_local_enabled()) { 3995 tsize = sizeof(*d->mbm_local); 3996 d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); 3997 if (!d->mbm_local) { 3998 bitmap_free(d->rmid_busy_llc); 3999 kfree(d->mbm_total); 4000 return -ENOMEM; 4001 } 4002 } 4003 4004 return 0; 4005 } 4006 4007 int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) 4008 { 4009 int err = 0; 4010 4011 mutex_lock(&rdtgroup_mutex); 4012 4013 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) { 4014 /* RDT_RESOURCE_MBA is never mon_capable */ 4015 err = mba_sc_domain_allocate(r, d); 4016 goto out_unlock; 4017 } 4018 4019 if (!r->mon_capable) 4020 goto out_unlock; 4021 4022 err = domain_setup_mon_state(r, d); 4023 if (err) 4024 goto out_unlock; 4025 4026 if (is_mbm_enabled()) { 4027 INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); 4028 mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL, 4029 RESCTRL_PICK_ANY_CPU); 4030 } 4031 4032 if (is_llc_occupancy_enabled()) 4033 INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); 4034 4035 /* 4036 * If the filesystem is not mounted then only the default resource group 4037 * exists. Creation of its directories is deferred until mount time 4038 * by rdt_get_tree() calling mkdir_mondata_all(). 4039 * If resctrl is mounted, add per domain monitor data directories. 4040 */ 4041 if (resctrl_mounted && resctrl_arch_mon_capable()) 4042 mkdir_mondata_subdir_allrdtgrp(r, d); 4043 4044 out_unlock: 4045 mutex_unlock(&rdtgroup_mutex); 4046 4047 return err; 4048 } 4049 4050 void resctrl_online_cpu(unsigned int cpu) 4051 { 4052 mutex_lock(&rdtgroup_mutex); 4053 /* The CPU is set in default rdtgroup after online. */ 4054 cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); 4055 mutex_unlock(&rdtgroup_mutex); 4056 } 4057 4058 static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) 4059 { 4060 struct rdtgroup *cr; 4061 4062 list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) { 4063 if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) 4064 break; 4065 } 4066 } 4067 4068 void resctrl_offline_cpu(unsigned int cpu) 4069 { 4070 struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 4071 struct rdtgroup *rdtgrp; 4072 struct rdt_domain *d; 4073 4074 mutex_lock(&rdtgroup_mutex); 4075 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { 4076 if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { 4077 clear_childcpus(rdtgrp, cpu); 4078 break; 4079 } 4080 } 4081 4082 if (!l3->mon_capable) 4083 goto out_unlock; 4084 4085 d = get_domain_from_cpu(cpu, l3); 4086 if (d) { 4087 if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { 4088 cancel_delayed_work(&d->mbm_over); 4089 mbm_setup_overflow_handler(d, 0, cpu); 4090 } 4091 if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && 4092 has_busy_rmid(d)) { 4093 cancel_delayed_work(&d->cqm_limbo); 4094 cqm_setup_limbo_handler(d, 0, cpu); 4095 } 4096 } 4097 4098 out_unlock: 4099 mutex_unlock(&rdtgroup_mutex); 4100 } 4101 4102 /* 4103 * rdtgroup_init - rdtgroup initialization 4104 * 4105 * Setup resctrl file system including set up root, create mount point, 4106 * register rdtgroup filesystem, and initialize files under root directory. 4107 * 4108 * Return: 0 on success or -errno 4109 */ 4110 int __init rdtgroup_init(void) 4111 { 4112 int ret = 0; 4113 4114 seq_buf_init(&last_cmd_status, last_cmd_status_buf, 4115 sizeof(last_cmd_status_buf)); 4116 4117 rdtgroup_setup_default(); 4118 4119 ret = sysfs_create_mount_point(fs_kobj, "resctrl"); 4120 if (ret) 4121 return ret; 4122 4123 ret = register_filesystem(&rdt_fs_type); 4124 if (ret) 4125 goto cleanup_mountpoint; 4126 4127 /* 4128 * Adding the resctrl debugfs directory here may not be ideal since 4129 * it would let the resctrl debugfs directory appear on the debugfs 4130 * filesystem before the resctrl filesystem is mounted. 4131 * It may also be ok since that would enable debugging of RDT before 4132 * resctrl is mounted. 4133 * The reason why the debugfs directory is created here and not in 4134 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and 4135 * during the debugfs directory creation also &sb->s_type->i_mutex_key 4136 * (the lockdep class of inode->i_rwsem). Other filesystem 4137 * interactions (eg. SyS_getdents) have the lock ordering: 4138 * &sb->s_type->i_mutex_key --> &mm->mmap_lock 4139 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex 4140 * is taken, thus creating dependency: 4141 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause 4142 * issues considering the other two lock dependencies. 4143 * By creating the debugfs directory here we avoid a dependency 4144 * that may cause deadlock (even though file operations cannot 4145 * occur until the filesystem is mounted, but I do not know how to 4146 * tell lockdep that). 4147 */ 4148 debugfs_resctrl = debugfs_create_dir("resctrl", NULL); 4149 4150 return 0; 4151 4152 cleanup_mountpoint: 4153 sysfs_remove_mount_point(fs_kobj, "resctrl"); 4154 4155 return ret; 4156 } 4157 4158 void __exit rdtgroup_exit(void) 4159 { 4160 debugfs_remove_recursive(debugfs_resctrl); 4161 unregister_filesystem(&rdt_fs_type); 4162 sysfs_remove_mount_point(fs_kobj, "resctrl"); 4163 } 4164