1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * User interface for Resource Allocation in Resource Director Technology(RDT) 4 * 5 * Copyright (C) 2016 Intel Corporation 6 * 7 * Author: Fenghua Yu <fenghua.yu@intel.com> 8 * 9 * More information about RDT be found in the Intel (R) x86 Architecture 10 * Software Developer Manual. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/cacheinfo.h> 16 #include <linux/cpu.h> 17 #include <linux/debugfs.h> 18 #include <linux/fs.h> 19 #include <linux/fs_parser.h> 20 #include <linux/sysfs.h> 21 #include <linux/kernfs.h> 22 #include <linux/seq_buf.h> 23 #include <linux/seq_file.h> 24 #include <linux/sched/signal.h> 25 #include <linux/sched/task.h> 26 #include <linux/slab.h> 27 #include <linux/task_work.h> 28 #include <linux/user_namespace.h> 29 30 #include <uapi/linux/magic.h> 31 32 #include <asm/resctrl.h> 33 #include "internal.h" 34 35 DEFINE_STATIC_KEY_FALSE(rdt_enable_key); 36 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key); 37 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key); 38 39 /* Mutex to protect rdtgroup access. */ 40 DEFINE_MUTEX(rdtgroup_mutex); 41 42 static struct kernfs_root *rdt_root; 43 struct rdtgroup rdtgroup_default; 44 LIST_HEAD(rdt_all_groups); 45 46 /* list of entries for the schemata file */ 47 LIST_HEAD(resctrl_schema_all); 48 49 /* The filesystem can only be mounted once. */ 50 bool resctrl_mounted; 51 52 /* Kernel fs node for "info" directory under root */ 53 static struct kernfs_node *kn_info; 54 55 /* Kernel fs node for "mon_groups" directory under root */ 56 static struct kernfs_node *kn_mongrp; 57 58 /* Kernel fs node for "mon_data" directory under root */ 59 static struct kernfs_node *kn_mondata; 60 61 static struct seq_buf last_cmd_status; 62 static char last_cmd_status_buf[512]; 63 64 static int rdtgroup_setup_root(struct rdt_fs_context *ctx); 65 static void rdtgroup_destroy_root(void); 66 67 struct dentry *debugfs_resctrl; 68 69 static bool resctrl_debug; 70 71 void rdt_last_cmd_clear(void) 72 { 73 lockdep_assert_held(&rdtgroup_mutex); 74 seq_buf_clear(&last_cmd_status); 75 } 76 77 void rdt_last_cmd_puts(const char *s) 78 { 79 lockdep_assert_held(&rdtgroup_mutex); 80 seq_buf_puts(&last_cmd_status, s); 81 } 82 83 void rdt_last_cmd_printf(const char *fmt, ...) 84 { 85 va_list ap; 86 87 va_start(ap, fmt); 88 lockdep_assert_held(&rdtgroup_mutex); 89 seq_buf_vprintf(&last_cmd_status, fmt, ap); 90 va_end(ap); 91 } 92 93 void rdt_staged_configs_clear(void) 94 { 95 struct rdt_resource *r; 96 struct rdt_domain *dom; 97 98 lockdep_assert_held(&rdtgroup_mutex); 99 100 for_each_alloc_capable_rdt_resource(r) { 101 list_for_each_entry(dom, &r->domains, list) 102 memset(dom->staged_config, 0, sizeof(dom->staged_config)); 103 } 104 } 105 106 /* 107 * Trivial allocator for CLOSIDs. Since h/w only supports a small number, 108 * we can keep a bitmap of free CLOSIDs in a single integer. 109 * 110 * Using a global CLOSID across all resources has some advantages and 111 * some drawbacks: 112 * + We can simply set current's closid to assign a task to a resource 113 * group. 114 * + Context switch code can avoid extra memory references deciding which 115 * CLOSID to load into the PQR_ASSOC MSR 116 * - We give up some options in configuring resource groups across multi-socket 117 * systems. 118 * - Our choices on how to configure each resource become progressively more 119 * limited as the number of resources grows. 120 */ 121 static unsigned long closid_free_map; 122 static int closid_free_map_len; 123 124 int closids_supported(void) 125 { 126 return closid_free_map_len; 127 } 128 129 static void closid_init(void) 130 { 131 struct resctrl_schema *s; 132 u32 rdt_min_closid = 32; 133 134 /* Compute rdt_min_closid across all resources */ 135 list_for_each_entry(s, &resctrl_schema_all, list) 136 rdt_min_closid = min(rdt_min_closid, s->num_closid); 137 138 closid_free_map = BIT_MASK(rdt_min_closid) - 1; 139 140 /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */ 141 __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map); 142 closid_free_map_len = rdt_min_closid; 143 } 144 145 static int closid_alloc(void) 146 { 147 int cleanest_closid; 148 u32 closid; 149 150 lockdep_assert_held(&rdtgroup_mutex); 151 152 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { 153 cleanest_closid = resctrl_find_cleanest_closid(); 154 if (cleanest_closid < 0) 155 return cleanest_closid; 156 closid = cleanest_closid; 157 } else { 158 closid = ffs(closid_free_map); 159 if (closid == 0) 160 return -ENOSPC; 161 closid--; 162 } 163 __clear_bit(closid, &closid_free_map); 164 165 return closid; 166 } 167 168 void closid_free(int closid) 169 { 170 lockdep_assert_held(&rdtgroup_mutex); 171 172 __set_bit(closid, &closid_free_map); 173 } 174 175 /** 176 * closid_allocated - test if provided closid is in use 177 * @closid: closid to be tested 178 * 179 * Return: true if @closid is currently associated with a resource group, 180 * false if @closid is free 181 */ 182 bool closid_allocated(unsigned int closid) 183 { 184 lockdep_assert_held(&rdtgroup_mutex); 185 186 return !test_bit(closid, &closid_free_map); 187 } 188 189 /** 190 * rdtgroup_mode_by_closid - Return mode of resource group with closid 191 * @closid: closid if the resource group 192 * 193 * Each resource group is associated with a @closid. Here the mode 194 * of a resource group can be queried by searching for it using its closid. 195 * 196 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid 197 */ 198 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid) 199 { 200 struct rdtgroup *rdtgrp; 201 202 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { 203 if (rdtgrp->closid == closid) 204 return rdtgrp->mode; 205 } 206 207 return RDT_NUM_MODES; 208 } 209 210 static const char * const rdt_mode_str[] = { 211 [RDT_MODE_SHAREABLE] = "shareable", 212 [RDT_MODE_EXCLUSIVE] = "exclusive", 213 [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup", 214 [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked", 215 }; 216 217 /** 218 * rdtgroup_mode_str - Return the string representation of mode 219 * @mode: the resource group mode as &enum rdtgroup_mode 220 * 221 * Return: string representation of valid mode, "unknown" otherwise 222 */ 223 static const char *rdtgroup_mode_str(enum rdtgrp_mode mode) 224 { 225 if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES) 226 return "unknown"; 227 228 return rdt_mode_str[mode]; 229 } 230 231 /* set uid and gid of rdtgroup dirs and files to that of the creator */ 232 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) 233 { 234 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, 235 .ia_uid = current_fsuid(), 236 .ia_gid = current_fsgid(), }; 237 238 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && 239 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) 240 return 0; 241 242 return kernfs_setattr(kn, &iattr); 243 } 244 245 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) 246 { 247 struct kernfs_node *kn; 248 int ret; 249 250 kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, 251 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 252 0, rft->kf_ops, rft, NULL, NULL); 253 if (IS_ERR(kn)) 254 return PTR_ERR(kn); 255 256 ret = rdtgroup_kn_set_ugid(kn); 257 if (ret) { 258 kernfs_remove(kn); 259 return ret; 260 } 261 262 return 0; 263 } 264 265 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) 266 { 267 struct kernfs_open_file *of = m->private; 268 struct rftype *rft = of->kn->priv; 269 270 if (rft->seq_show) 271 return rft->seq_show(of, m, arg); 272 return 0; 273 } 274 275 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, 276 size_t nbytes, loff_t off) 277 { 278 struct rftype *rft = of->kn->priv; 279 280 if (rft->write) 281 return rft->write(of, buf, nbytes, off); 282 283 return -EINVAL; 284 } 285 286 static const struct kernfs_ops rdtgroup_kf_single_ops = { 287 .atomic_write_len = PAGE_SIZE, 288 .write = rdtgroup_file_write, 289 .seq_show = rdtgroup_seqfile_show, 290 }; 291 292 static const struct kernfs_ops kf_mondata_ops = { 293 .atomic_write_len = PAGE_SIZE, 294 .seq_show = rdtgroup_mondata_show, 295 }; 296 297 static bool is_cpu_list(struct kernfs_open_file *of) 298 { 299 struct rftype *rft = of->kn->priv; 300 301 return rft->flags & RFTYPE_FLAGS_CPUS_LIST; 302 } 303 304 static int rdtgroup_cpus_show(struct kernfs_open_file *of, 305 struct seq_file *s, void *v) 306 { 307 struct rdtgroup *rdtgrp; 308 struct cpumask *mask; 309 int ret = 0; 310 311 rdtgrp = rdtgroup_kn_lock_live(of->kn); 312 313 if (rdtgrp) { 314 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 315 if (!rdtgrp->plr->d) { 316 rdt_last_cmd_clear(); 317 rdt_last_cmd_puts("Cache domain offline\n"); 318 ret = -ENODEV; 319 } else { 320 mask = &rdtgrp->plr->d->cpu_mask; 321 seq_printf(s, is_cpu_list(of) ? 322 "%*pbl\n" : "%*pb\n", 323 cpumask_pr_args(mask)); 324 } 325 } else { 326 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", 327 cpumask_pr_args(&rdtgrp->cpu_mask)); 328 } 329 } else { 330 ret = -ENOENT; 331 } 332 rdtgroup_kn_unlock(of->kn); 333 334 return ret; 335 } 336 337 /* 338 * This is safe against resctrl_sched_in() called from __switch_to() 339 * because __switch_to() is executed with interrupts disabled. A local call 340 * from update_closid_rmid() is protected against __switch_to() because 341 * preemption is disabled. 342 */ 343 static void update_cpu_closid_rmid(void *info) 344 { 345 struct rdtgroup *r = info; 346 347 if (r) { 348 this_cpu_write(pqr_state.default_closid, r->closid); 349 this_cpu_write(pqr_state.default_rmid, r->mon.rmid); 350 } 351 352 /* 353 * We cannot unconditionally write the MSR because the current 354 * executing task might have its own closid selected. Just reuse 355 * the context switch code. 356 */ 357 resctrl_sched_in(current); 358 } 359 360 /* 361 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, 362 * 363 * Per task closids/rmids must have been set up before calling this function. 364 */ 365 static void 366 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) 367 { 368 on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1); 369 } 370 371 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, 372 cpumask_var_t tmpmask) 373 { 374 struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp; 375 struct list_head *head; 376 377 /* Check whether cpus belong to parent ctrl group */ 378 cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); 379 if (!cpumask_empty(tmpmask)) { 380 rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n"); 381 return -EINVAL; 382 } 383 384 /* Check whether cpus are dropped from this group */ 385 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); 386 if (!cpumask_empty(tmpmask)) { 387 /* Give any dropped cpus to parent rdtgroup */ 388 cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask); 389 update_closid_rmid(tmpmask, prgrp); 390 } 391 392 /* 393 * If we added cpus, remove them from previous group that owned them 394 * and update per-cpu rmid 395 */ 396 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); 397 if (!cpumask_empty(tmpmask)) { 398 head = &prgrp->mon.crdtgrp_list; 399 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 400 if (crgrp == rdtgrp) 401 continue; 402 cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask, 403 tmpmask); 404 } 405 update_closid_rmid(tmpmask, rdtgrp); 406 } 407 408 /* Done pushing/pulling - update this group with new mask */ 409 cpumask_copy(&rdtgrp->cpu_mask, newmask); 410 411 return 0; 412 } 413 414 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m) 415 { 416 struct rdtgroup *crgrp; 417 418 cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m); 419 /* update the child mon group masks as well*/ 420 list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list) 421 cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask); 422 } 423 424 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, 425 cpumask_var_t tmpmask, cpumask_var_t tmpmask1) 426 { 427 struct rdtgroup *r, *crgrp; 428 struct list_head *head; 429 430 /* Check whether cpus are dropped from this group */ 431 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); 432 if (!cpumask_empty(tmpmask)) { 433 /* Can't drop from default group */ 434 if (rdtgrp == &rdtgroup_default) { 435 rdt_last_cmd_puts("Can't drop CPUs from default group\n"); 436 return -EINVAL; 437 } 438 439 /* Give any dropped cpus to rdtgroup_default */ 440 cpumask_or(&rdtgroup_default.cpu_mask, 441 &rdtgroup_default.cpu_mask, tmpmask); 442 update_closid_rmid(tmpmask, &rdtgroup_default); 443 } 444 445 /* 446 * If we added cpus, remove them from previous group and 447 * the prev group's child groups that owned them 448 * and update per-cpu closid/rmid. 449 */ 450 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); 451 if (!cpumask_empty(tmpmask)) { 452 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { 453 if (r == rdtgrp) 454 continue; 455 cpumask_and(tmpmask1, &r->cpu_mask, tmpmask); 456 if (!cpumask_empty(tmpmask1)) 457 cpumask_rdtgrp_clear(r, tmpmask1); 458 } 459 update_closid_rmid(tmpmask, rdtgrp); 460 } 461 462 /* Done pushing/pulling - update this group with new mask */ 463 cpumask_copy(&rdtgrp->cpu_mask, newmask); 464 465 /* 466 * Clear child mon group masks since there is a new parent mask 467 * now and update the rmid for the cpus the child lost. 468 */ 469 head = &rdtgrp->mon.crdtgrp_list; 470 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 471 cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask); 472 update_closid_rmid(tmpmask, rdtgrp); 473 cpumask_clear(&crgrp->cpu_mask); 474 } 475 476 return 0; 477 } 478 479 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, 480 char *buf, size_t nbytes, loff_t off) 481 { 482 cpumask_var_t tmpmask, newmask, tmpmask1; 483 struct rdtgroup *rdtgrp; 484 int ret; 485 486 if (!buf) 487 return -EINVAL; 488 489 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) 490 return -ENOMEM; 491 if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { 492 free_cpumask_var(tmpmask); 493 return -ENOMEM; 494 } 495 if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) { 496 free_cpumask_var(tmpmask); 497 free_cpumask_var(newmask); 498 return -ENOMEM; 499 } 500 501 rdtgrp = rdtgroup_kn_lock_live(of->kn); 502 if (!rdtgrp) { 503 ret = -ENOENT; 504 goto unlock; 505 } 506 507 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || 508 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 509 ret = -EINVAL; 510 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 511 goto unlock; 512 } 513 514 if (is_cpu_list(of)) 515 ret = cpulist_parse(buf, newmask); 516 else 517 ret = cpumask_parse(buf, newmask); 518 519 if (ret) { 520 rdt_last_cmd_puts("Bad CPU list/mask\n"); 521 goto unlock; 522 } 523 524 /* check that user didn't specify any offline cpus */ 525 cpumask_andnot(tmpmask, newmask, cpu_online_mask); 526 if (!cpumask_empty(tmpmask)) { 527 ret = -EINVAL; 528 rdt_last_cmd_puts("Can only assign online CPUs\n"); 529 goto unlock; 530 } 531 532 if (rdtgrp->type == RDTCTRL_GROUP) 533 ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1); 534 else if (rdtgrp->type == RDTMON_GROUP) 535 ret = cpus_mon_write(rdtgrp, newmask, tmpmask); 536 else 537 ret = -EINVAL; 538 539 unlock: 540 rdtgroup_kn_unlock(of->kn); 541 free_cpumask_var(tmpmask); 542 free_cpumask_var(newmask); 543 free_cpumask_var(tmpmask1); 544 545 return ret ?: nbytes; 546 } 547 548 /** 549 * rdtgroup_remove - the helper to remove resource group safely 550 * @rdtgrp: resource group to remove 551 * 552 * On resource group creation via a mkdir, an extra kernfs_node reference is 553 * taken to ensure that the rdtgroup structure remains accessible for the 554 * rdtgroup_kn_unlock() calls where it is removed. 555 * 556 * Drop the extra reference here, then free the rdtgroup structure. 557 * 558 * Return: void 559 */ 560 static void rdtgroup_remove(struct rdtgroup *rdtgrp) 561 { 562 kernfs_put(rdtgrp->kn); 563 kfree(rdtgrp); 564 } 565 566 static void _update_task_closid_rmid(void *task) 567 { 568 /* 569 * If the task is still current on this CPU, update PQR_ASSOC MSR. 570 * Otherwise, the MSR is updated when the task is scheduled in. 571 */ 572 if (task == current) 573 resctrl_sched_in(task); 574 } 575 576 static void update_task_closid_rmid(struct task_struct *t) 577 { 578 if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) 579 smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); 580 else 581 _update_task_closid_rmid(t); 582 } 583 584 static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp) 585 { 586 u32 closid, rmid = rdtgrp->mon.rmid; 587 588 if (rdtgrp->type == RDTCTRL_GROUP) 589 closid = rdtgrp->closid; 590 else if (rdtgrp->type == RDTMON_GROUP) 591 closid = rdtgrp->mon.parent->closid; 592 else 593 return false; 594 595 return resctrl_arch_match_closid(tsk, closid) && 596 resctrl_arch_match_rmid(tsk, closid, rmid); 597 } 598 599 static int __rdtgroup_move_task(struct task_struct *tsk, 600 struct rdtgroup *rdtgrp) 601 { 602 /* If the task is already in rdtgrp, no need to move the task. */ 603 if (task_in_rdtgroup(tsk, rdtgrp)) 604 return 0; 605 606 /* 607 * Set the task's closid/rmid before the PQR_ASSOC MSR can be 608 * updated by them. 609 * 610 * For ctrl_mon groups, move both closid and rmid. 611 * For monitor groups, can move the tasks only from 612 * their parent CTRL group. 613 */ 614 if (rdtgrp->type == RDTMON_GROUP && 615 !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) { 616 rdt_last_cmd_puts("Can't move task to different control group\n"); 617 return -EINVAL; 618 } 619 620 if (rdtgrp->type == RDTMON_GROUP) 621 resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid, 622 rdtgrp->mon.rmid); 623 else 624 resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid, 625 rdtgrp->mon.rmid); 626 627 /* 628 * Ensure the task's closid and rmid are written before determining if 629 * the task is current that will decide if it will be interrupted. 630 * This pairs with the full barrier between the rq->curr update and 631 * resctrl_sched_in() during context switch. 632 */ 633 smp_mb(); 634 635 /* 636 * By now, the task's closid and rmid are set. If the task is current 637 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource 638 * group go into effect. If the task is not current, the MSR will be 639 * updated when the task is scheduled in. 640 */ 641 update_task_closid_rmid(tsk); 642 643 return 0; 644 } 645 646 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) 647 { 648 return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) && 649 resctrl_arch_match_closid(t, r->closid)); 650 } 651 652 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) 653 { 654 return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) && 655 resctrl_arch_match_rmid(t, r->mon.parent->closid, 656 r->mon.rmid)); 657 } 658 659 /** 660 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group 661 * @r: Resource group 662 * 663 * Return: 1 if tasks have been assigned to @r, 0 otherwise 664 */ 665 int rdtgroup_tasks_assigned(struct rdtgroup *r) 666 { 667 struct task_struct *p, *t; 668 int ret = 0; 669 670 lockdep_assert_held(&rdtgroup_mutex); 671 672 rcu_read_lock(); 673 for_each_process_thread(p, t) { 674 if (is_closid_match(t, r) || is_rmid_match(t, r)) { 675 ret = 1; 676 break; 677 } 678 } 679 rcu_read_unlock(); 680 681 return ret; 682 } 683 684 static int rdtgroup_task_write_permission(struct task_struct *task, 685 struct kernfs_open_file *of) 686 { 687 const struct cred *tcred = get_task_cred(task); 688 const struct cred *cred = current_cred(); 689 int ret = 0; 690 691 /* 692 * Even if we're attaching all tasks in the thread group, we only 693 * need to check permissions on one of them. 694 */ 695 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && 696 !uid_eq(cred->euid, tcred->uid) && 697 !uid_eq(cred->euid, tcred->suid)) { 698 rdt_last_cmd_printf("No permission to move task %d\n", task->pid); 699 ret = -EPERM; 700 } 701 702 put_cred(tcred); 703 return ret; 704 } 705 706 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, 707 struct kernfs_open_file *of) 708 { 709 struct task_struct *tsk; 710 int ret; 711 712 rcu_read_lock(); 713 if (pid) { 714 tsk = find_task_by_vpid(pid); 715 if (!tsk) { 716 rcu_read_unlock(); 717 rdt_last_cmd_printf("No task %d\n", pid); 718 return -ESRCH; 719 } 720 } else { 721 tsk = current; 722 } 723 724 get_task_struct(tsk); 725 rcu_read_unlock(); 726 727 ret = rdtgroup_task_write_permission(tsk, of); 728 if (!ret) 729 ret = __rdtgroup_move_task(tsk, rdtgrp); 730 731 put_task_struct(tsk); 732 return ret; 733 } 734 735 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, 736 char *buf, size_t nbytes, loff_t off) 737 { 738 struct rdtgroup *rdtgrp; 739 char *pid_str; 740 int ret = 0; 741 pid_t pid; 742 743 rdtgrp = rdtgroup_kn_lock_live(of->kn); 744 if (!rdtgrp) { 745 rdtgroup_kn_unlock(of->kn); 746 return -ENOENT; 747 } 748 rdt_last_cmd_clear(); 749 750 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || 751 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 752 ret = -EINVAL; 753 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 754 goto unlock; 755 } 756 757 while (buf && buf[0] != '\0' && buf[0] != '\n') { 758 pid_str = strim(strsep(&buf, ",")); 759 760 if (kstrtoint(pid_str, 0, &pid)) { 761 rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str); 762 ret = -EINVAL; 763 break; 764 } 765 766 if (pid < 0) { 767 rdt_last_cmd_printf("Invalid pid %d\n", pid); 768 ret = -EINVAL; 769 break; 770 } 771 772 ret = rdtgroup_move_task(pid, rdtgrp, of); 773 if (ret) { 774 rdt_last_cmd_printf("Error while processing task %d\n", pid); 775 break; 776 } 777 } 778 779 unlock: 780 rdtgroup_kn_unlock(of->kn); 781 782 return ret ?: nbytes; 783 } 784 785 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) 786 { 787 struct task_struct *p, *t; 788 pid_t pid; 789 790 rcu_read_lock(); 791 for_each_process_thread(p, t) { 792 if (is_closid_match(t, r) || is_rmid_match(t, r)) { 793 pid = task_pid_vnr(t); 794 if (pid) 795 seq_printf(s, "%d\n", pid); 796 } 797 } 798 rcu_read_unlock(); 799 } 800 801 static int rdtgroup_tasks_show(struct kernfs_open_file *of, 802 struct seq_file *s, void *v) 803 { 804 struct rdtgroup *rdtgrp; 805 int ret = 0; 806 807 rdtgrp = rdtgroup_kn_lock_live(of->kn); 808 if (rdtgrp) 809 show_rdt_tasks(rdtgrp, s); 810 else 811 ret = -ENOENT; 812 rdtgroup_kn_unlock(of->kn); 813 814 return ret; 815 } 816 817 static int rdtgroup_closid_show(struct kernfs_open_file *of, 818 struct seq_file *s, void *v) 819 { 820 struct rdtgroup *rdtgrp; 821 int ret = 0; 822 823 rdtgrp = rdtgroup_kn_lock_live(of->kn); 824 if (rdtgrp) 825 seq_printf(s, "%u\n", rdtgrp->closid); 826 else 827 ret = -ENOENT; 828 rdtgroup_kn_unlock(of->kn); 829 830 return ret; 831 } 832 833 static int rdtgroup_rmid_show(struct kernfs_open_file *of, 834 struct seq_file *s, void *v) 835 { 836 struct rdtgroup *rdtgrp; 837 int ret = 0; 838 839 rdtgrp = rdtgroup_kn_lock_live(of->kn); 840 if (rdtgrp) 841 seq_printf(s, "%u\n", rdtgrp->mon.rmid); 842 else 843 ret = -ENOENT; 844 rdtgroup_kn_unlock(of->kn); 845 846 return ret; 847 } 848 849 #ifdef CONFIG_PROC_CPU_RESCTRL 850 851 /* 852 * A task can only be part of one resctrl control group and of one monitor 853 * group which is associated to that control group. 854 * 855 * 1) res: 856 * mon: 857 * 858 * resctrl is not available. 859 * 860 * 2) res:/ 861 * mon: 862 * 863 * Task is part of the root resctrl control group, and it is not associated 864 * to any monitor group. 865 * 866 * 3) res:/ 867 * mon:mon0 868 * 869 * Task is part of the root resctrl control group and monitor group mon0. 870 * 871 * 4) res:group0 872 * mon: 873 * 874 * Task is part of resctrl control group group0, and it is not associated 875 * to any monitor group. 876 * 877 * 5) res:group0 878 * mon:mon1 879 * 880 * Task is part of resctrl control group group0 and monitor group mon1. 881 */ 882 int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, 883 struct pid *pid, struct task_struct *tsk) 884 { 885 struct rdtgroup *rdtg; 886 int ret = 0; 887 888 mutex_lock(&rdtgroup_mutex); 889 890 /* Return empty if resctrl has not been mounted. */ 891 if (!resctrl_mounted) { 892 seq_puts(s, "res:\nmon:\n"); 893 goto unlock; 894 } 895 896 list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) { 897 struct rdtgroup *crg; 898 899 /* 900 * Task information is only relevant for shareable 901 * and exclusive groups. 902 */ 903 if (rdtg->mode != RDT_MODE_SHAREABLE && 904 rdtg->mode != RDT_MODE_EXCLUSIVE) 905 continue; 906 907 if (!resctrl_arch_match_closid(tsk, rdtg->closid)) 908 continue; 909 910 seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", 911 rdtg->kn->name); 912 seq_puts(s, "mon:"); 913 list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, 914 mon.crdtgrp_list) { 915 if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid, 916 crg->mon.rmid)) 917 continue; 918 seq_printf(s, "%s", crg->kn->name); 919 break; 920 } 921 seq_putc(s, '\n'); 922 goto unlock; 923 } 924 /* 925 * The above search should succeed. Otherwise return 926 * with an error. 927 */ 928 ret = -ENOENT; 929 unlock: 930 mutex_unlock(&rdtgroup_mutex); 931 932 return ret; 933 } 934 #endif 935 936 static int rdt_last_cmd_status_show(struct kernfs_open_file *of, 937 struct seq_file *seq, void *v) 938 { 939 int len; 940 941 mutex_lock(&rdtgroup_mutex); 942 len = seq_buf_used(&last_cmd_status); 943 if (len) 944 seq_printf(seq, "%.*s", len, last_cmd_status_buf); 945 else 946 seq_puts(seq, "ok\n"); 947 mutex_unlock(&rdtgroup_mutex); 948 return 0; 949 } 950 951 static int rdt_num_closids_show(struct kernfs_open_file *of, 952 struct seq_file *seq, void *v) 953 { 954 struct resctrl_schema *s = of->kn->parent->priv; 955 956 seq_printf(seq, "%u\n", s->num_closid); 957 return 0; 958 } 959 960 static int rdt_default_ctrl_show(struct kernfs_open_file *of, 961 struct seq_file *seq, void *v) 962 { 963 struct resctrl_schema *s = of->kn->parent->priv; 964 struct rdt_resource *r = s->res; 965 966 seq_printf(seq, "%x\n", r->default_ctrl); 967 return 0; 968 } 969 970 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, 971 struct seq_file *seq, void *v) 972 { 973 struct resctrl_schema *s = of->kn->parent->priv; 974 struct rdt_resource *r = s->res; 975 976 seq_printf(seq, "%u\n", r->cache.min_cbm_bits); 977 return 0; 978 } 979 980 static int rdt_shareable_bits_show(struct kernfs_open_file *of, 981 struct seq_file *seq, void *v) 982 { 983 struct resctrl_schema *s = of->kn->parent->priv; 984 struct rdt_resource *r = s->res; 985 986 seq_printf(seq, "%x\n", r->cache.shareable_bits); 987 return 0; 988 } 989 990 /* 991 * rdt_bit_usage_show - Display current usage of resources 992 * 993 * A domain is a shared resource that can now be allocated differently. Here 994 * we display the current regions of the domain as an annotated bitmask. 995 * For each domain of this resource its allocation bitmask 996 * is annotated as below to indicate the current usage of the corresponding bit: 997 * 0 - currently unused 998 * X - currently available for sharing and used by software and hardware 999 * H - currently used by hardware only but available for software use 1000 * S - currently used and shareable by software only 1001 * E - currently used exclusively by one resource group 1002 * P - currently pseudo-locked by one resource group 1003 */ 1004 static int rdt_bit_usage_show(struct kernfs_open_file *of, 1005 struct seq_file *seq, void *v) 1006 { 1007 struct resctrl_schema *s = of->kn->parent->priv; 1008 /* 1009 * Use unsigned long even though only 32 bits are used to ensure 1010 * test_bit() is used safely. 1011 */ 1012 unsigned long sw_shareable = 0, hw_shareable = 0; 1013 unsigned long exclusive = 0, pseudo_locked = 0; 1014 struct rdt_resource *r = s->res; 1015 struct rdt_domain *dom; 1016 int i, hwb, swb, excl, psl; 1017 enum rdtgrp_mode mode; 1018 bool sep = false; 1019 u32 ctrl_val; 1020 1021 cpus_read_lock(); 1022 mutex_lock(&rdtgroup_mutex); 1023 hw_shareable = r->cache.shareable_bits; 1024 list_for_each_entry(dom, &r->domains, list) { 1025 if (sep) 1026 seq_putc(seq, ';'); 1027 sw_shareable = 0; 1028 exclusive = 0; 1029 seq_printf(seq, "%d=", dom->id); 1030 for (i = 0; i < closids_supported(); i++) { 1031 if (!closid_allocated(i)) 1032 continue; 1033 ctrl_val = resctrl_arch_get_config(r, dom, i, 1034 s->conf_type); 1035 mode = rdtgroup_mode_by_closid(i); 1036 switch (mode) { 1037 case RDT_MODE_SHAREABLE: 1038 sw_shareable |= ctrl_val; 1039 break; 1040 case RDT_MODE_EXCLUSIVE: 1041 exclusive |= ctrl_val; 1042 break; 1043 case RDT_MODE_PSEUDO_LOCKSETUP: 1044 /* 1045 * RDT_MODE_PSEUDO_LOCKSETUP is possible 1046 * here but not included since the CBM 1047 * associated with this CLOSID in this mode 1048 * is not initialized and no task or cpu can be 1049 * assigned this CLOSID. 1050 */ 1051 break; 1052 case RDT_MODE_PSEUDO_LOCKED: 1053 case RDT_NUM_MODES: 1054 WARN(1, 1055 "invalid mode for closid %d\n", i); 1056 break; 1057 } 1058 } 1059 for (i = r->cache.cbm_len - 1; i >= 0; i--) { 1060 pseudo_locked = dom->plr ? dom->plr->cbm : 0; 1061 hwb = test_bit(i, &hw_shareable); 1062 swb = test_bit(i, &sw_shareable); 1063 excl = test_bit(i, &exclusive); 1064 psl = test_bit(i, &pseudo_locked); 1065 if (hwb && swb) 1066 seq_putc(seq, 'X'); 1067 else if (hwb && !swb) 1068 seq_putc(seq, 'H'); 1069 else if (!hwb && swb) 1070 seq_putc(seq, 'S'); 1071 else if (excl) 1072 seq_putc(seq, 'E'); 1073 else if (psl) 1074 seq_putc(seq, 'P'); 1075 else /* Unused bits remain */ 1076 seq_putc(seq, '0'); 1077 } 1078 sep = true; 1079 } 1080 seq_putc(seq, '\n'); 1081 mutex_unlock(&rdtgroup_mutex); 1082 cpus_read_unlock(); 1083 return 0; 1084 } 1085 1086 static int rdt_min_bw_show(struct kernfs_open_file *of, 1087 struct seq_file *seq, void *v) 1088 { 1089 struct resctrl_schema *s = of->kn->parent->priv; 1090 struct rdt_resource *r = s->res; 1091 1092 seq_printf(seq, "%u\n", r->membw.min_bw); 1093 return 0; 1094 } 1095 1096 static int rdt_num_rmids_show(struct kernfs_open_file *of, 1097 struct seq_file *seq, void *v) 1098 { 1099 struct rdt_resource *r = of->kn->parent->priv; 1100 1101 seq_printf(seq, "%d\n", r->num_rmid); 1102 1103 return 0; 1104 } 1105 1106 static int rdt_mon_features_show(struct kernfs_open_file *of, 1107 struct seq_file *seq, void *v) 1108 { 1109 struct rdt_resource *r = of->kn->parent->priv; 1110 struct mon_evt *mevt; 1111 1112 list_for_each_entry(mevt, &r->evt_list, list) { 1113 seq_printf(seq, "%s\n", mevt->name); 1114 if (mevt->configurable) 1115 seq_printf(seq, "%s_config\n", mevt->name); 1116 } 1117 1118 return 0; 1119 } 1120 1121 static int rdt_bw_gran_show(struct kernfs_open_file *of, 1122 struct seq_file *seq, void *v) 1123 { 1124 struct resctrl_schema *s = of->kn->parent->priv; 1125 struct rdt_resource *r = s->res; 1126 1127 seq_printf(seq, "%u\n", r->membw.bw_gran); 1128 return 0; 1129 } 1130 1131 static int rdt_delay_linear_show(struct kernfs_open_file *of, 1132 struct seq_file *seq, void *v) 1133 { 1134 struct resctrl_schema *s = of->kn->parent->priv; 1135 struct rdt_resource *r = s->res; 1136 1137 seq_printf(seq, "%u\n", r->membw.delay_linear); 1138 return 0; 1139 } 1140 1141 static int max_threshold_occ_show(struct kernfs_open_file *of, 1142 struct seq_file *seq, void *v) 1143 { 1144 seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold); 1145 1146 return 0; 1147 } 1148 1149 static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, 1150 struct seq_file *seq, void *v) 1151 { 1152 struct resctrl_schema *s = of->kn->parent->priv; 1153 struct rdt_resource *r = s->res; 1154 1155 if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) 1156 seq_puts(seq, "per-thread\n"); 1157 else 1158 seq_puts(seq, "max\n"); 1159 1160 return 0; 1161 } 1162 1163 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, 1164 char *buf, size_t nbytes, loff_t off) 1165 { 1166 unsigned int bytes; 1167 int ret; 1168 1169 ret = kstrtouint(buf, 0, &bytes); 1170 if (ret) 1171 return ret; 1172 1173 if (bytes > resctrl_rmid_realloc_limit) 1174 return -EINVAL; 1175 1176 resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes); 1177 1178 return nbytes; 1179 } 1180 1181 /* 1182 * rdtgroup_mode_show - Display mode of this resource group 1183 */ 1184 static int rdtgroup_mode_show(struct kernfs_open_file *of, 1185 struct seq_file *s, void *v) 1186 { 1187 struct rdtgroup *rdtgrp; 1188 1189 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1190 if (!rdtgrp) { 1191 rdtgroup_kn_unlock(of->kn); 1192 return -ENOENT; 1193 } 1194 1195 seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode)); 1196 1197 rdtgroup_kn_unlock(of->kn); 1198 return 0; 1199 } 1200 1201 static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) 1202 { 1203 switch (my_type) { 1204 case CDP_CODE: 1205 return CDP_DATA; 1206 case CDP_DATA: 1207 return CDP_CODE; 1208 default: 1209 case CDP_NONE: 1210 return CDP_NONE; 1211 } 1212 } 1213 1214 static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, 1215 struct seq_file *seq, void *v) 1216 { 1217 struct resctrl_schema *s = of->kn->parent->priv; 1218 struct rdt_resource *r = s->res; 1219 1220 seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); 1221 1222 return 0; 1223 } 1224 1225 /** 1226 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other 1227 * @r: Resource to which domain instance @d belongs. 1228 * @d: The domain instance for which @closid is being tested. 1229 * @cbm: Capacity bitmask being tested. 1230 * @closid: Intended closid for @cbm. 1231 * @type: CDP type of @r. 1232 * @exclusive: Only check if overlaps with exclusive resource groups 1233 * 1234 * Checks if provided @cbm intended to be used for @closid on domain 1235 * @d overlaps with any other closids or other hardware usage associated 1236 * with this domain. If @exclusive is true then only overlaps with 1237 * resource groups in exclusive mode will be considered. If @exclusive 1238 * is false then overlaps with any resource group or hardware entities 1239 * will be considered. 1240 * 1241 * @cbm is unsigned long, even if only 32 bits are used, to make the 1242 * bitmap functions work correctly. 1243 * 1244 * Return: false if CBM does not overlap, true if it does. 1245 */ 1246 static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, 1247 unsigned long cbm, int closid, 1248 enum resctrl_conf_type type, bool exclusive) 1249 { 1250 enum rdtgrp_mode mode; 1251 unsigned long ctrl_b; 1252 int i; 1253 1254 /* Check for any overlap with regions used by hardware directly */ 1255 if (!exclusive) { 1256 ctrl_b = r->cache.shareable_bits; 1257 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) 1258 return true; 1259 } 1260 1261 /* Check for overlap with other resource groups */ 1262 for (i = 0; i < closids_supported(); i++) { 1263 ctrl_b = resctrl_arch_get_config(r, d, i, type); 1264 mode = rdtgroup_mode_by_closid(i); 1265 if (closid_allocated(i) && i != closid && 1266 mode != RDT_MODE_PSEUDO_LOCKSETUP) { 1267 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { 1268 if (exclusive) { 1269 if (mode == RDT_MODE_EXCLUSIVE) 1270 return true; 1271 continue; 1272 } 1273 return true; 1274 } 1275 } 1276 } 1277 1278 return false; 1279 } 1280 1281 /** 1282 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware 1283 * @s: Schema for the resource to which domain instance @d belongs. 1284 * @d: The domain instance for which @closid is being tested. 1285 * @cbm: Capacity bitmask being tested. 1286 * @closid: Intended closid for @cbm. 1287 * @exclusive: Only check if overlaps with exclusive resource groups 1288 * 1289 * Resources that can be allocated using a CBM can use the CBM to control 1290 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test 1291 * for overlap. Overlap test is not limited to the specific resource for 1292 * which the CBM is intended though - when dealing with CDP resources that 1293 * share the underlying hardware the overlap check should be performed on 1294 * the CDP resource sharing the hardware also. 1295 * 1296 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the 1297 * overlap test. 1298 * 1299 * Return: true if CBM overlap detected, false if there is no overlap 1300 */ 1301 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, 1302 unsigned long cbm, int closid, bool exclusive) 1303 { 1304 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); 1305 struct rdt_resource *r = s->res; 1306 1307 if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type, 1308 exclusive)) 1309 return true; 1310 1311 if (!resctrl_arch_get_cdp_enabled(r->rid)) 1312 return false; 1313 return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive); 1314 } 1315 1316 /** 1317 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive 1318 * @rdtgrp: Resource group identified through its closid. 1319 * 1320 * An exclusive resource group implies that there should be no sharing of 1321 * its allocated resources. At the time this group is considered to be 1322 * exclusive this test can determine if its current schemata supports this 1323 * setting by testing for overlap with all other resource groups. 1324 * 1325 * Return: true if resource group can be exclusive, false if there is overlap 1326 * with allocations of other resource groups and thus this resource group 1327 * cannot be exclusive. 1328 */ 1329 static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) 1330 { 1331 int closid = rdtgrp->closid; 1332 struct resctrl_schema *s; 1333 struct rdt_resource *r; 1334 bool has_cache = false; 1335 struct rdt_domain *d; 1336 u32 ctrl; 1337 1338 /* Walking r->domains, ensure it can't race with cpuhp */ 1339 lockdep_assert_cpus_held(); 1340 1341 list_for_each_entry(s, &resctrl_schema_all, list) { 1342 r = s->res; 1343 if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) 1344 continue; 1345 has_cache = true; 1346 list_for_each_entry(d, &r->domains, list) { 1347 ctrl = resctrl_arch_get_config(r, d, closid, 1348 s->conf_type); 1349 if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { 1350 rdt_last_cmd_puts("Schemata overlaps\n"); 1351 return false; 1352 } 1353 } 1354 } 1355 1356 if (!has_cache) { 1357 rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n"); 1358 return false; 1359 } 1360 1361 return true; 1362 } 1363 1364 /* 1365 * rdtgroup_mode_write - Modify the resource group's mode 1366 */ 1367 static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, 1368 char *buf, size_t nbytes, loff_t off) 1369 { 1370 struct rdtgroup *rdtgrp; 1371 enum rdtgrp_mode mode; 1372 int ret = 0; 1373 1374 /* Valid input requires a trailing newline */ 1375 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1376 return -EINVAL; 1377 buf[nbytes - 1] = '\0'; 1378 1379 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1380 if (!rdtgrp) { 1381 rdtgroup_kn_unlock(of->kn); 1382 return -ENOENT; 1383 } 1384 1385 rdt_last_cmd_clear(); 1386 1387 mode = rdtgrp->mode; 1388 1389 if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) || 1390 (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) || 1391 (!strcmp(buf, "pseudo-locksetup") && 1392 mode == RDT_MODE_PSEUDO_LOCKSETUP) || 1393 (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED)) 1394 goto out; 1395 1396 if (mode == RDT_MODE_PSEUDO_LOCKED) { 1397 rdt_last_cmd_puts("Cannot change pseudo-locked group\n"); 1398 ret = -EINVAL; 1399 goto out; 1400 } 1401 1402 if (!strcmp(buf, "shareable")) { 1403 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1404 ret = rdtgroup_locksetup_exit(rdtgrp); 1405 if (ret) 1406 goto out; 1407 } 1408 rdtgrp->mode = RDT_MODE_SHAREABLE; 1409 } else if (!strcmp(buf, "exclusive")) { 1410 if (!rdtgroup_mode_test_exclusive(rdtgrp)) { 1411 ret = -EINVAL; 1412 goto out; 1413 } 1414 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1415 ret = rdtgroup_locksetup_exit(rdtgrp); 1416 if (ret) 1417 goto out; 1418 } 1419 rdtgrp->mode = RDT_MODE_EXCLUSIVE; 1420 } else if (!strcmp(buf, "pseudo-locksetup")) { 1421 ret = rdtgroup_locksetup_enter(rdtgrp); 1422 if (ret) 1423 goto out; 1424 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP; 1425 } else { 1426 rdt_last_cmd_puts("Unknown or unsupported mode\n"); 1427 ret = -EINVAL; 1428 } 1429 1430 out: 1431 rdtgroup_kn_unlock(of->kn); 1432 return ret ?: nbytes; 1433 } 1434 1435 /** 1436 * rdtgroup_cbm_to_size - Translate CBM to size in bytes 1437 * @r: RDT resource to which @d belongs. 1438 * @d: RDT domain instance. 1439 * @cbm: bitmask for which the size should be computed. 1440 * 1441 * The bitmask provided associated with the RDT domain instance @d will be 1442 * translated into how many bytes it represents. The size in bytes is 1443 * computed by first dividing the total cache size by the CBM length to 1444 * determine how many bytes each bit in the bitmask represents. The result 1445 * is multiplied with the number of bits set in the bitmask. 1446 * 1447 * @cbm is unsigned long, even if only 32 bits are used to make the 1448 * bitmap functions work correctly. 1449 */ 1450 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, 1451 struct rdt_domain *d, unsigned long cbm) 1452 { 1453 struct cpu_cacheinfo *ci; 1454 unsigned int size = 0; 1455 int num_b, i; 1456 1457 num_b = bitmap_weight(&cbm, r->cache.cbm_len); 1458 ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); 1459 for (i = 0; i < ci->num_leaves; i++) { 1460 if (ci->info_list[i].level == r->cache_level) { 1461 size = ci->info_list[i].size / r->cache.cbm_len * num_b; 1462 break; 1463 } 1464 } 1465 1466 return size; 1467 } 1468 1469 /* 1470 * rdtgroup_size_show - Display size in bytes of allocated regions 1471 * 1472 * The "size" file mirrors the layout of the "schemata" file, printing the 1473 * size in bytes of each region instead of the capacity bitmask. 1474 */ 1475 static int rdtgroup_size_show(struct kernfs_open_file *of, 1476 struct seq_file *s, void *v) 1477 { 1478 struct resctrl_schema *schema; 1479 enum resctrl_conf_type type; 1480 struct rdtgroup *rdtgrp; 1481 struct rdt_resource *r; 1482 struct rdt_domain *d; 1483 unsigned int size; 1484 int ret = 0; 1485 u32 closid; 1486 bool sep; 1487 u32 ctrl; 1488 1489 rdtgrp = rdtgroup_kn_lock_live(of->kn); 1490 if (!rdtgrp) { 1491 rdtgroup_kn_unlock(of->kn); 1492 return -ENOENT; 1493 } 1494 1495 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 1496 if (!rdtgrp->plr->d) { 1497 rdt_last_cmd_clear(); 1498 rdt_last_cmd_puts("Cache domain offline\n"); 1499 ret = -ENODEV; 1500 } else { 1501 seq_printf(s, "%*s:", max_name_width, 1502 rdtgrp->plr->s->name); 1503 size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, 1504 rdtgrp->plr->d, 1505 rdtgrp->plr->cbm); 1506 seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); 1507 } 1508 goto out; 1509 } 1510 1511 closid = rdtgrp->closid; 1512 1513 list_for_each_entry(schema, &resctrl_schema_all, list) { 1514 r = schema->res; 1515 type = schema->conf_type; 1516 sep = false; 1517 seq_printf(s, "%*s:", max_name_width, schema->name); 1518 list_for_each_entry(d, &r->domains, list) { 1519 if (sep) 1520 seq_putc(s, ';'); 1521 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { 1522 size = 0; 1523 } else { 1524 if (is_mba_sc(r)) 1525 ctrl = d->mbps_val[closid]; 1526 else 1527 ctrl = resctrl_arch_get_config(r, d, 1528 closid, 1529 type); 1530 if (r->rid == RDT_RESOURCE_MBA || 1531 r->rid == RDT_RESOURCE_SMBA) 1532 size = ctrl; 1533 else 1534 size = rdtgroup_cbm_to_size(r, d, ctrl); 1535 } 1536 seq_printf(s, "%d=%u", d->id, size); 1537 sep = true; 1538 } 1539 seq_putc(s, '\n'); 1540 } 1541 1542 out: 1543 rdtgroup_kn_unlock(of->kn); 1544 1545 return ret; 1546 } 1547 1548 struct mon_config_info { 1549 u32 evtid; 1550 u32 mon_config; 1551 }; 1552 1553 #define INVALID_CONFIG_INDEX UINT_MAX 1554 1555 /** 1556 * mon_event_config_index_get - get the hardware index for the 1557 * configurable event 1558 * @evtid: event id. 1559 * 1560 * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID 1561 * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID 1562 * INVALID_CONFIG_INDEX for invalid evtid 1563 */ 1564 static inline unsigned int mon_event_config_index_get(u32 evtid) 1565 { 1566 switch (evtid) { 1567 case QOS_L3_MBM_TOTAL_EVENT_ID: 1568 return 0; 1569 case QOS_L3_MBM_LOCAL_EVENT_ID: 1570 return 1; 1571 default: 1572 /* Should never reach here */ 1573 return INVALID_CONFIG_INDEX; 1574 } 1575 } 1576 1577 static void mon_event_config_read(void *info) 1578 { 1579 struct mon_config_info *mon_info = info; 1580 unsigned int index; 1581 u64 msrval; 1582 1583 index = mon_event_config_index_get(mon_info->evtid); 1584 if (index == INVALID_CONFIG_INDEX) { 1585 pr_warn_once("Invalid event id %d\n", mon_info->evtid); 1586 return; 1587 } 1588 rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); 1589 1590 /* Report only the valid event configuration bits */ 1591 mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; 1592 } 1593 1594 static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info) 1595 { 1596 smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1); 1597 } 1598 1599 static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) 1600 { 1601 struct mon_config_info mon_info = {0}; 1602 struct rdt_domain *dom; 1603 bool sep = false; 1604 1605 cpus_read_lock(); 1606 mutex_lock(&rdtgroup_mutex); 1607 1608 list_for_each_entry(dom, &r->domains, list) { 1609 if (sep) 1610 seq_puts(s, ";"); 1611 1612 memset(&mon_info, 0, sizeof(struct mon_config_info)); 1613 mon_info.evtid = evtid; 1614 mondata_config_read(dom, &mon_info); 1615 1616 seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config); 1617 sep = true; 1618 } 1619 seq_puts(s, "\n"); 1620 1621 mutex_unlock(&rdtgroup_mutex); 1622 cpus_read_unlock(); 1623 1624 return 0; 1625 } 1626 1627 static int mbm_total_bytes_config_show(struct kernfs_open_file *of, 1628 struct seq_file *seq, void *v) 1629 { 1630 struct rdt_resource *r = of->kn->parent->priv; 1631 1632 mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); 1633 1634 return 0; 1635 } 1636 1637 static int mbm_local_bytes_config_show(struct kernfs_open_file *of, 1638 struct seq_file *seq, void *v) 1639 { 1640 struct rdt_resource *r = of->kn->parent->priv; 1641 1642 mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); 1643 1644 return 0; 1645 } 1646 1647 static void mon_event_config_write(void *info) 1648 { 1649 struct mon_config_info *mon_info = info; 1650 unsigned int index; 1651 1652 index = mon_event_config_index_get(mon_info->evtid); 1653 if (index == INVALID_CONFIG_INDEX) { 1654 pr_warn_once("Invalid event id %d\n", mon_info->evtid); 1655 return; 1656 } 1657 wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); 1658 } 1659 1660 static void mbm_config_write_domain(struct rdt_resource *r, 1661 struct rdt_domain *d, u32 evtid, u32 val) 1662 { 1663 struct mon_config_info mon_info = {0}; 1664 1665 /* 1666 * Read the current config value first. If both are the same then 1667 * no need to write it again. 1668 */ 1669 mon_info.evtid = evtid; 1670 mondata_config_read(d, &mon_info); 1671 if (mon_info.mon_config == val) 1672 return; 1673 1674 mon_info.mon_config = val; 1675 1676 /* 1677 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the 1678 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE 1679 * are scoped at the domain level. Writing any of these MSRs 1680 * on one CPU is observed by all the CPUs in the domain. 1681 */ 1682 smp_call_function_any(&d->cpu_mask, mon_event_config_write, 1683 &mon_info, 1); 1684 1685 /* 1686 * When an Event Configuration is changed, the bandwidth counters 1687 * for all RMIDs and Events will be cleared by the hardware. The 1688 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for 1689 * every RMID on the next read to any event for every RMID. 1690 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62) 1691 * cleared while it is tracked by the hardware. Clear the 1692 * mbm_local and mbm_total counts for all the RMIDs. 1693 */ 1694 resctrl_arch_reset_rmid_all(r, d); 1695 } 1696 1697 static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) 1698 { 1699 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 1700 char *dom_str = NULL, *id_str; 1701 unsigned long dom_id, val; 1702 struct rdt_domain *d; 1703 1704 /* Walking r->domains, ensure it can't race with cpuhp */ 1705 lockdep_assert_cpus_held(); 1706 1707 next: 1708 if (!tok || tok[0] == '\0') 1709 return 0; 1710 1711 /* Start processing the strings for each domain */ 1712 dom_str = strim(strsep(&tok, ";")); 1713 id_str = strsep(&dom_str, "="); 1714 1715 if (!id_str || kstrtoul(id_str, 10, &dom_id)) { 1716 rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n"); 1717 return -EINVAL; 1718 } 1719 1720 if (!dom_str || kstrtoul(dom_str, 16, &val)) { 1721 rdt_last_cmd_puts("Non-numeric event configuration value\n"); 1722 return -EINVAL; 1723 } 1724 1725 /* Value from user cannot be more than the supported set of events */ 1726 if ((val & hw_res->mbm_cfg_mask) != val) { 1727 rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", 1728 hw_res->mbm_cfg_mask); 1729 return -EINVAL; 1730 } 1731 1732 list_for_each_entry(d, &r->domains, list) { 1733 if (d->id == dom_id) { 1734 mbm_config_write_domain(r, d, evtid, val); 1735 goto next; 1736 } 1737 } 1738 1739 return -EINVAL; 1740 } 1741 1742 static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, 1743 char *buf, size_t nbytes, 1744 loff_t off) 1745 { 1746 struct rdt_resource *r = of->kn->parent->priv; 1747 int ret; 1748 1749 /* Valid input requires a trailing newline */ 1750 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1751 return -EINVAL; 1752 1753 cpus_read_lock(); 1754 mutex_lock(&rdtgroup_mutex); 1755 1756 rdt_last_cmd_clear(); 1757 1758 buf[nbytes - 1] = '\0'; 1759 1760 ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); 1761 1762 mutex_unlock(&rdtgroup_mutex); 1763 cpus_read_unlock(); 1764 1765 return ret ?: nbytes; 1766 } 1767 1768 static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, 1769 char *buf, size_t nbytes, 1770 loff_t off) 1771 { 1772 struct rdt_resource *r = of->kn->parent->priv; 1773 int ret; 1774 1775 /* Valid input requires a trailing newline */ 1776 if (nbytes == 0 || buf[nbytes - 1] != '\n') 1777 return -EINVAL; 1778 1779 cpus_read_lock(); 1780 mutex_lock(&rdtgroup_mutex); 1781 1782 rdt_last_cmd_clear(); 1783 1784 buf[nbytes - 1] = '\0'; 1785 1786 ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID); 1787 1788 mutex_unlock(&rdtgroup_mutex); 1789 cpus_read_unlock(); 1790 1791 return ret ?: nbytes; 1792 } 1793 1794 /* rdtgroup information files for one cache resource. */ 1795 static struct rftype res_common_files[] = { 1796 { 1797 .name = "last_cmd_status", 1798 .mode = 0444, 1799 .kf_ops = &rdtgroup_kf_single_ops, 1800 .seq_show = rdt_last_cmd_status_show, 1801 .fflags = RFTYPE_TOP_INFO, 1802 }, 1803 { 1804 .name = "num_closids", 1805 .mode = 0444, 1806 .kf_ops = &rdtgroup_kf_single_ops, 1807 .seq_show = rdt_num_closids_show, 1808 .fflags = RFTYPE_CTRL_INFO, 1809 }, 1810 { 1811 .name = "mon_features", 1812 .mode = 0444, 1813 .kf_ops = &rdtgroup_kf_single_ops, 1814 .seq_show = rdt_mon_features_show, 1815 .fflags = RFTYPE_MON_INFO, 1816 }, 1817 { 1818 .name = "num_rmids", 1819 .mode = 0444, 1820 .kf_ops = &rdtgroup_kf_single_ops, 1821 .seq_show = rdt_num_rmids_show, 1822 .fflags = RFTYPE_MON_INFO, 1823 }, 1824 { 1825 .name = "cbm_mask", 1826 .mode = 0444, 1827 .kf_ops = &rdtgroup_kf_single_ops, 1828 .seq_show = rdt_default_ctrl_show, 1829 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1830 }, 1831 { 1832 .name = "min_cbm_bits", 1833 .mode = 0444, 1834 .kf_ops = &rdtgroup_kf_single_ops, 1835 .seq_show = rdt_min_cbm_bits_show, 1836 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1837 }, 1838 { 1839 .name = "shareable_bits", 1840 .mode = 0444, 1841 .kf_ops = &rdtgroup_kf_single_ops, 1842 .seq_show = rdt_shareable_bits_show, 1843 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1844 }, 1845 { 1846 .name = "bit_usage", 1847 .mode = 0444, 1848 .kf_ops = &rdtgroup_kf_single_ops, 1849 .seq_show = rdt_bit_usage_show, 1850 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1851 }, 1852 { 1853 .name = "min_bandwidth", 1854 .mode = 0444, 1855 .kf_ops = &rdtgroup_kf_single_ops, 1856 .seq_show = rdt_min_bw_show, 1857 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, 1858 }, 1859 { 1860 .name = "bandwidth_gran", 1861 .mode = 0444, 1862 .kf_ops = &rdtgroup_kf_single_ops, 1863 .seq_show = rdt_bw_gran_show, 1864 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, 1865 }, 1866 { 1867 .name = "delay_linear", 1868 .mode = 0444, 1869 .kf_ops = &rdtgroup_kf_single_ops, 1870 .seq_show = rdt_delay_linear_show, 1871 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, 1872 }, 1873 /* 1874 * Platform specific which (if any) capabilities are provided by 1875 * thread_throttle_mode. Defer "fflags" initialization to platform 1876 * discovery. 1877 */ 1878 { 1879 .name = "thread_throttle_mode", 1880 .mode = 0444, 1881 .kf_ops = &rdtgroup_kf_single_ops, 1882 .seq_show = rdt_thread_throttle_mode_show, 1883 }, 1884 { 1885 .name = "max_threshold_occupancy", 1886 .mode = 0644, 1887 .kf_ops = &rdtgroup_kf_single_ops, 1888 .write = max_threshold_occ_write, 1889 .seq_show = max_threshold_occ_show, 1890 .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE, 1891 }, 1892 { 1893 .name = "mbm_total_bytes_config", 1894 .mode = 0644, 1895 .kf_ops = &rdtgroup_kf_single_ops, 1896 .seq_show = mbm_total_bytes_config_show, 1897 .write = mbm_total_bytes_config_write, 1898 }, 1899 { 1900 .name = "mbm_local_bytes_config", 1901 .mode = 0644, 1902 .kf_ops = &rdtgroup_kf_single_ops, 1903 .seq_show = mbm_local_bytes_config_show, 1904 .write = mbm_local_bytes_config_write, 1905 }, 1906 { 1907 .name = "cpus", 1908 .mode = 0644, 1909 .kf_ops = &rdtgroup_kf_single_ops, 1910 .write = rdtgroup_cpus_write, 1911 .seq_show = rdtgroup_cpus_show, 1912 .fflags = RFTYPE_BASE, 1913 }, 1914 { 1915 .name = "cpus_list", 1916 .mode = 0644, 1917 .kf_ops = &rdtgroup_kf_single_ops, 1918 .write = rdtgroup_cpus_write, 1919 .seq_show = rdtgroup_cpus_show, 1920 .flags = RFTYPE_FLAGS_CPUS_LIST, 1921 .fflags = RFTYPE_BASE, 1922 }, 1923 { 1924 .name = "tasks", 1925 .mode = 0644, 1926 .kf_ops = &rdtgroup_kf_single_ops, 1927 .write = rdtgroup_tasks_write, 1928 .seq_show = rdtgroup_tasks_show, 1929 .fflags = RFTYPE_BASE, 1930 }, 1931 { 1932 .name = "mon_hw_id", 1933 .mode = 0444, 1934 .kf_ops = &rdtgroup_kf_single_ops, 1935 .seq_show = rdtgroup_rmid_show, 1936 .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG, 1937 }, 1938 { 1939 .name = "schemata", 1940 .mode = 0644, 1941 .kf_ops = &rdtgroup_kf_single_ops, 1942 .write = rdtgroup_schemata_write, 1943 .seq_show = rdtgroup_schemata_show, 1944 .fflags = RFTYPE_CTRL_BASE, 1945 }, 1946 { 1947 .name = "mode", 1948 .mode = 0644, 1949 .kf_ops = &rdtgroup_kf_single_ops, 1950 .write = rdtgroup_mode_write, 1951 .seq_show = rdtgroup_mode_show, 1952 .fflags = RFTYPE_CTRL_BASE, 1953 }, 1954 { 1955 .name = "size", 1956 .mode = 0444, 1957 .kf_ops = &rdtgroup_kf_single_ops, 1958 .seq_show = rdtgroup_size_show, 1959 .fflags = RFTYPE_CTRL_BASE, 1960 }, 1961 { 1962 .name = "sparse_masks", 1963 .mode = 0444, 1964 .kf_ops = &rdtgroup_kf_single_ops, 1965 .seq_show = rdt_has_sparse_bitmasks_show, 1966 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, 1967 }, 1968 { 1969 .name = "ctrl_hw_id", 1970 .mode = 0444, 1971 .kf_ops = &rdtgroup_kf_single_ops, 1972 .seq_show = rdtgroup_closid_show, 1973 .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG, 1974 }, 1975 1976 }; 1977 1978 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) 1979 { 1980 struct rftype *rfts, *rft; 1981 int ret, len; 1982 1983 rfts = res_common_files; 1984 len = ARRAY_SIZE(res_common_files); 1985 1986 lockdep_assert_held(&rdtgroup_mutex); 1987 1988 if (resctrl_debug) 1989 fflags |= RFTYPE_DEBUG; 1990 1991 for (rft = rfts; rft < rfts + len; rft++) { 1992 if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) { 1993 ret = rdtgroup_add_file(kn, rft); 1994 if (ret) 1995 goto error; 1996 } 1997 } 1998 1999 return 0; 2000 error: 2001 pr_warn("Failed to add %s, err=%d\n", rft->name, ret); 2002 while (--rft >= rfts) { 2003 if ((fflags & rft->fflags) == rft->fflags) 2004 kernfs_remove_by_name(kn, rft->name); 2005 } 2006 return ret; 2007 } 2008 2009 static struct rftype *rdtgroup_get_rftype_by_name(const char *name) 2010 { 2011 struct rftype *rfts, *rft; 2012 int len; 2013 2014 rfts = res_common_files; 2015 len = ARRAY_SIZE(res_common_files); 2016 2017 for (rft = rfts; rft < rfts + len; rft++) { 2018 if (!strcmp(rft->name, name)) 2019 return rft; 2020 } 2021 2022 return NULL; 2023 } 2024 2025 void __init thread_throttle_mode_init(void) 2026 { 2027 struct rftype *rft; 2028 2029 rft = rdtgroup_get_rftype_by_name("thread_throttle_mode"); 2030 if (!rft) 2031 return; 2032 2033 rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB; 2034 } 2035 2036 void __init mbm_config_rftype_init(const char *config) 2037 { 2038 struct rftype *rft; 2039 2040 rft = rdtgroup_get_rftype_by_name(config); 2041 if (rft) 2042 rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE; 2043 } 2044 2045 /** 2046 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file 2047 * @r: The resource group with which the file is associated. 2048 * @name: Name of the file 2049 * 2050 * The permissions of named resctrl file, directory, or link are modified 2051 * to not allow read, write, or execute by any user. 2052 * 2053 * WARNING: This function is intended to communicate to the user that the 2054 * resctrl file has been locked down - that it is not relevant to the 2055 * particular state the system finds itself in. It should not be relied 2056 * on to protect from user access because after the file's permissions 2057 * are restricted the user can still change the permissions using chmod 2058 * from the command line. 2059 * 2060 * Return: 0 on success, <0 on failure. 2061 */ 2062 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name) 2063 { 2064 struct iattr iattr = {.ia_valid = ATTR_MODE,}; 2065 struct kernfs_node *kn; 2066 int ret = 0; 2067 2068 kn = kernfs_find_and_get_ns(r->kn, name, NULL); 2069 if (!kn) 2070 return -ENOENT; 2071 2072 switch (kernfs_type(kn)) { 2073 case KERNFS_DIR: 2074 iattr.ia_mode = S_IFDIR; 2075 break; 2076 case KERNFS_FILE: 2077 iattr.ia_mode = S_IFREG; 2078 break; 2079 case KERNFS_LINK: 2080 iattr.ia_mode = S_IFLNK; 2081 break; 2082 } 2083 2084 ret = kernfs_setattr(kn, &iattr); 2085 kernfs_put(kn); 2086 return ret; 2087 } 2088 2089 /** 2090 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file 2091 * @r: The resource group with which the file is associated. 2092 * @name: Name of the file 2093 * @mask: Mask of permissions that should be restored 2094 * 2095 * Restore the permissions of the named file. If @name is a directory the 2096 * permissions of its parent will be used. 2097 * 2098 * Return: 0 on success, <0 on failure. 2099 */ 2100 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, 2101 umode_t mask) 2102 { 2103 struct iattr iattr = {.ia_valid = ATTR_MODE,}; 2104 struct kernfs_node *kn, *parent; 2105 struct rftype *rfts, *rft; 2106 int ret, len; 2107 2108 rfts = res_common_files; 2109 len = ARRAY_SIZE(res_common_files); 2110 2111 for (rft = rfts; rft < rfts + len; rft++) { 2112 if (!strcmp(rft->name, name)) 2113 iattr.ia_mode = rft->mode & mask; 2114 } 2115 2116 kn = kernfs_find_and_get_ns(r->kn, name, NULL); 2117 if (!kn) 2118 return -ENOENT; 2119 2120 switch (kernfs_type(kn)) { 2121 case KERNFS_DIR: 2122 parent = kernfs_get_parent(kn); 2123 if (parent) { 2124 iattr.ia_mode |= parent->mode; 2125 kernfs_put(parent); 2126 } 2127 iattr.ia_mode |= S_IFDIR; 2128 break; 2129 case KERNFS_FILE: 2130 iattr.ia_mode |= S_IFREG; 2131 break; 2132 case KERNFS_LINK: 2133 iattr.ia_mode |= S_IFLNK; 2134 break; 2135 } 2136 2137 ret = kernfs_setattr(kn, &iattr); 2138 kernfs_put(kn); 2139 return ret; 2140 } 2141 2142 static int rdtgroup_mkdir_info_resdir(void *priv, char *name, 2143 unsigned long fflags) 2144 { 2145 struct kernfs_node *kn_subdir; 2146 int ret; 2147 2148 kn_subdir = kernfs_create_dir(kn_info, name, 2149 kn_info->mode, priv); 2150 if (IS_ERR(kn_subdir)) 2151 return PTR_ERR(kn_subdir); 2152 2153 ret = rdtgroup_kn_set_ugid(kn_subdir); 2154 if (ret) 2155 return ret; 2156 2157 ret = rdtgroup_add_files(kn_subdir, fflags); 2158 if (!ret) 2159 kernfs_activate(kn_subdir); 2160 2161 return ret; 2162 } 2163 2164 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) 2165 { 2166 struct resctrl_schema *s; 2167 struct rdt_resource *r; 2168 unsigned long fflags; 2169 char name[32]; 2170 int ret; 2171 2172 /* create the directory */ 2173 kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); 2174 if (IS_ERR(kn_info)) 2175 return PTR_ERR(kn_info); 2176 2177 ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO); 2178 if (ret) 2179 goto out_destroy; 2180 2181 /* loop over enabled controls, these are all alloc_capable */ 2182 list_for_each_entry(s, &resctrl_schema_all, list) { 2183 r = s->res; 2184 fflags = r->fflags | RFTYPE_CTRL_INFO; 2185 ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); 2186 if (ret) 2187 goto out_destroy; 2188 } 2189 2190 for_each_mon_capable_rdt_resource(r) { 2191 fflags = r->fflags | RFTYPE_MON_INFO; 2192 sprintf(name, "%s_MON", r->name); 2193 ret = rdtgroup_mkdir_info_resdir(r, name, fflags); 2194 if (ret) 2195 goto out_destroy; 2196 } 2197 2198 ret = rdtgroup_kn_set_ugid(kn_info); 2199 if (ret) 2200 goto out_destroy; 2201 2202 kernfs_activate(kn_info); 2203 2204 return 0; 2205 2206 out_destroy: 2207 kernfs_remove(kn_info); 2208 return ret; 2209 } 2210 2211 static int 2212 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, 2213 char *name, struct kernfs_node **dest_kn) 2214 { 2215 struct kernfs_node *kn; 2216 int ret; 2217 2218 /* create the directory */ 2219 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); 2220 if (IS_ERR(kn)) 2221 return PTR_ERR(kn); 2222 2223 if (dest_kn) 2224 *dest_kn = kn; 2225 2226 ret = rdtgroup_kn_set_ugid(kn); 2227 if (ret) 2228 goto out_destroy; 2229 2230 kernfs_activate(kn); 2231 2232 return 0; 2233 2234 out_destroy: 2235 kernfs_remove(kn); 2236 return ret; 2237 } 2238 2239 static void l3_qos_cfg_update(void *arg) 2240 { 2241 bool *enable = arg; 2242 2243 wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); 2244 } 2245 2246 static void l2_qos_cfg_update(void *arg) 2247 { 2248 bool *enable = arg; 2249 2250 wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); 2251 } 2252 2253 static inline bool is_mba_linear(void) 2254 { 2255 return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; 2256 } 2257 2258 static int set_cache_qos_cfg(int level, bool enable) 2259 { 2260 void (*update)(void *arg); 2261 struct rdt_resource *r_l; 2262 cpumask_var_t cpu_mask; 2263 struct rdt_domain *d; 2264 int cpu; 2265 2266 /* Walking r->domains, ensure it can't race with cpuhp */ 2267 lockdep_assert_cpus_held(); 2268 2269 if (level == RDT_RESOURCE_L3) 2270 update = l3_qos_cfg_update; 2271 else if (level == RDT_RESOURCE_L2) 2272 update = l2_qos_cfg_update; 2273 else 2274 return -EINVAL; 2275 2276 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) 2277 return -ENOMEM; 2278 2279 r_l = &rdt_resources_all[level].r_resctrl; 2280 list_for_each_entry(d, &r_l->domains, list) { 2281 if (r_l->cache.arch_has_per_cpu_cfg) 2282 /* Pick all the CPUs in the domain instance */ 2283 for_each_cpu(cpu, &d->cpu_mask) 2284 cpumask_set_cpu(cpu, cpu_mask); 2285 else 2286 /* Pick one CPU from each domain instance to update MSR */ 2287 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); 2288 } 2289 2290 /* Update QOS_CFG MSR on all the CPUs in cpu_mask */ 2291 on_each_cpu_mask(cpu_mask, update, &enable, 1); 2292 2293 free_cpumask_var(cpu_mask); 2294 2295 return 0; 2296 } 2297 2298 /* Restore the qos cfg state when a domain comes online */ 2299 void rdt_domain_reconfigure_cdp(struct rdt_resource *r) 2300 { 2301 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 2302 2303 if (!r->cdp_capable) 2304 return; 2305 2306 if (r->rid == RDT_RESOURCE_L2) 2307 l2_qos_cfg_update(&hw_res->cdp_enabled); 2308 2309 if (r->rid == RDT_RESOURCE_L3) 2310 l3_qos_cfg_update(&hw_res->cdp_enabled); 2311 } 2312 2313 static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) 2314 { 2315 u32 num_closid = resctrl_arch_get_num_closid(r); 2316 int cpu = cpumask_any(&d->cpu_mask); 2317 int i; 2318 2319 d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), 2320 GFP_KERNEL, cpu_to_node(cpu)); 2321 if (!d->mbps_val) 2322 return -ENOMEM; 2323 2324 for (i = 0; i < num_closid; i++) 2325 d->mbps_val[i] = MBA_MAX_MBPS; 2326 2327 return 0; 2328 } 2329 2330 static void mba_sc_domain_destroy(struct rdt_resource *r, 2331 struct rdt_domain *d) 2332 { 2333 kfree(d->mbps_val); 2334 d->mbps_val = NULL; 2335 } 2336 2337 /* 2338 * MBA software controller is supported only if 2339 * MBM is supported and MBA is in linear scale. 2340 */ 2341 static bool supports_mba_mbps(void) 2342 { 2343 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; 2344 2345 return (is_mbm_local_enabled() && 2346 r->alloc_capable && is_mba_linear()); 2347 } 2348 2349 /* 2350 * Enable or disable the MBA software controller 2351 * which helps user specify bandwidth in MBps. 2352 */ 2353 static int set_mba_sc(bool mba_sc) 2354 { 2355 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; 2356 u32 num_closid = resctrl_arch_get_num_closid(r); 2357 struct rdt_domain *d; 2358 int i; 2359 2360 if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) 2361 return -EINVAL; 2362 2363 r->membw.mba_sc = mba_sc; 2364 2365 list_for_each_entry(d, &r->domains, list) { 2366 for (i = 0; i < num_closid; i++) 2367 d->mbps_val[i] = MBA_MAX_MBPS; 2368 } 2369 2370 return 0; 2371 } 2372 2373 static int cdp_enable(int level) 2374 { 2375 struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl; 2376 int ret; 2377 2378 if (!r_l->alloc_capable) 2379 return -EINVAL; 2380 2381 ret = set_cache_qos_cfg(level, true); 2382 if (!ret) 2383 rdt_resources_all[level].cdp_enabled = true; 2384 2385 return ret; 2386 } 2387 2388 static void cdp_disable(int level) 2389 { 2390 struct rdt_hw_resource *r_hw = &rdt_resources_all[level]; 2391 2392 if (r_hw->cdp_enabled) { 2393 set_cache_qos_cfg(level, false); 2394 r_hw->cdp_enabled = false; 2395 } 2396 } 2397 2398 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) 2399 { 2400 struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; 2401 2402 if (!hw_res->r_resctrl.cdp_capable) 2403 return -EINVAL; 2404 2405 if (enable) 2406 return cdp_enable(l); 2407 2408 cdp_disable(l); 2409 2410 return 0; 2411 } 2412 2413 /* 2414 * We don't allow rdtgroup directories to be created anywhere 2415 * except the root directory. Thus when looking for the rdtgroup 2416 * structure for a kernfs node we are either looking at a directory, 2417 * in which case the rdtgroup structure is pointed at by the "priv" 2418 * field, otherwise we have a file, and need only look to the parent 2419 * to find the rdtgroup. 2420 */ 2421 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) 2422 { 2423 if (kernfs_type(kn) == KERNFS_DIR) { 2424 /* 2425 * All the resource directories use "kn->priv" 2426 * to point to the "struct rdtgroup" for the 2427 * resource. "info" and its subdirectories don't 2428 * have rdtgroup structures, so return NULL here. 2429 */ 2430 if (kn == kn_info || kn->parent == kn_info) 2431 return NULL; 2432 else 2433 return kn->priv; 2434 } else { 2435 return kn->parent->priv; 2436 } 2437 } 2438 2439 static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn) 2440 { 2441 atomic_inc(&rdtgrp->waitcount); 2442 kernfs_break_active_protection(kn); 2443 } 2444 2445 static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn) 2446 { 2447 if (atomic_dec_and_test(&rdtgrp->waitcount) && 2448 (rdtgrp->flags & RDT_DELETED)) { 2449 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 2450 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) 2451 rdtgroup_pseudo_lock_remove(rdtgrp); 2452 kernfs_unbreak_active_protection(kn); 2453 rdtgroup_remove(rdtgrp); 2454 } else { 2455 kernfs_unbreak_active_protection(kn); 2456 } 2457 } 2458 2459 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) 2460 { 2461 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); 2462 2463 if (!rdtgrp) 2464 return NULL; 2465 2466 rdtgroup_kn_get(rdtgrp, kn); 2467 2468 cpus_read_lock(); 2469 mutex_lock(&rdtgroup_mutex); 2470 2471 /* Was this group deleted while we waited? */ 2472 if (rdtgrp->flags & RDT_DELETED) 2473 return NULL; 2474 2475 return rdtgrp; 2476 } 2477 2478 void rdtgroup_kn_unlock(struct kernfs_node *kn) 2479 { 2480 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); 2481 2482 if (!rdtgrp) 2483 return; 2484 2485 mutex_unlock(&rdtgroup_mutex); 2486 cpus_read_unlock(); 2487 2488 rdtgroup_kn_put(rdtgrp, kn); 2489 } 2490 2491 static int mkdir_mondata_all(struct kernfs_node *parent_kn, 2492 struct rdtgroup *prgrp, 2493 struct kernfs_node **mon_data_kn); 2494 2495 static void rdt_disable_ctx(void) 2496 { 2497 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); 2498 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); 2499 set_mba_sc(false); 2500 2501 resctrl_debug = false; 2502 } 2503 2504 static int rdt_enable_ctx(struct rdt_fs_context *ctx) 2505 { 2506 int ret = 0; 2507 2508 if (ctx->enable_cdpl2) { 2509 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true); 2510 if (ret) 2511 goto out_done; 2512 } 2513 2514 if (ctx->enable_cdpl3) { 2515 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true); 2516 if (ret) 2517 goto out_cdpl2; 2518 } 2519 2520 if (ctx->enable_mba_mbps) { 2521 ret = set_mba_sc(true); 2522 if (ret) 2523 goto out_cdpl3; 2524 } 2525 2526 if (ctx->enable_debug) 2527 resctrl_debug = true; 2528 2529 return 0; 2530 2531 out_cdpl3: 2532 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); 2533 out_cdpl2: 2534 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); 2535 out_done: 2536 return ret; 2537 } 2538 2539 static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type) 2540 { 2541 struct resctrl_schema *s; 2542 const char *suffix = ""; 2543 int ret, cl; 2544 2545 s = kzalloc(sizeof(*s), GFP_KERNEL); 2546 if (!s) 2547 return -ENOMEM; 2548 2549 s->res = r; 2550 s->num_closid = resctrl_arch_get_num_closid(r); 2551 if (resctrl_arch_get_cdp_enabled(r->rid)) 2552 s->num_closid /= 2; 2553 2554 s->conf_type = type; 2555 switch (type) { 2556 case CDP_CODE: 2557 suffix = "CODE"; 2558 break; 2559 case CDP_DATA: 2560 suffix = "DATA"; 2561 break; 2562 case CDP_NONE: 2563 suffix = ""; 2564 break; 2565 } 2566 2567 ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix); 2568 if (ret >= sizeof(s->name)) { 2569 kfree(s); 2570 return -EINVAL; 2571 } 2572 2573 cl = strlen(s->name); 2574 2575 /* 2576 * If CDP is supported by this resource, but not enabled, 2577 * include the suffix. This ensures the tabular format of the 2578 * schemata file does not change between mounts of the filesystem. 2579 */ 2580 if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid)) 2581 cl += 4; 2582 2583 if (cl > max_name_width) 2584 max_name_width = cl; 2585 2586 INIT_LIST_HEAD(&s->list); 2587 list_add(&s->list, &resctrl_schema_all); 2588 2589 return 0; 2590 } 2591 2592 static int schemata_list_create(void) 2593 { 2594 struct rdt_resource *r; 2595 int ret = 0; 2596 2597 for_each_alloc_capable_rdt_resource(r) { 2598 if (resctrl_arch_get_cdp_enabled(r->rid)) { 2599 ret = schemata_list_add(r, CDP_CODE); 2600 if (ret) 2601 break; 2602 2603 ret = schemata_list_add(r, CDP_DATA); 2604 } else { 2605 ret = schemata_list_add(r, CDP_NONE); 2606 } 2607 2608 if (ret) 2609 break; 2610 } 2611 2612 return ret; 2613 } 2614 2615 static void schemata_list_destroy(void) 2616 { 2617 struct resctrl_schema *s, *tmp; 2618 2619 list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) { 2620 list_del(&s->list); 2621 kfree(s); 2622 } 2623 } 2624 2625 static int rdt_get_tree(struct fs_context *fc) 2626 { 2627 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2628 unsigned long flags = RFTYPE_CTRL_BASE; 2629 struct rdt_domain *dom; 2630 struct rdt_resource *r; 2631 int ret; 2632 2633 cpus_read_lock(); 2634 mutex_lock(&rdtgroup_mutex); 2635 /* 2636 * resctrl file system can only be mounted once. 2637 */ 2638 if (resctrl_mounted) { 2639 ret = -EBUSY; 2640 goto out; 2641 } 2642 2643 ret = rdtgroup_setup_root(ctx); 2644 if (ret) 2645 goto out; 2646 2647 ret = rdt_enable_ctx(ctx); 2648 if (ret) 2649 goto out_root; 2650 2651 ret = schemata_list_create(); 2652 if (ret) { 2653 schemata_list_destroy(); 2654 goto out_ctx; 2655 } 2656 2657 closid_init(); 2658 2659 if (resctrl_arch_mon_capable()) 2660 flags |= RFTYPE_MON; 2661 2662 ret = rdtgroup_add_files(rdtgroup_default.kn, flags); 2663 if (ret) 2664 goto out_schemata_free; 2665 2666 kernfs_activate(rdtgroup_default.kn); 2667 2668 ret = rdtgroup_create_info_dir(rdtgroup_default.kn); 2669 if (ret < 0) 2670 goto out_schemata_free; 2671 2672 if (resctrl_arch_mon_capable()) { 2673 ret = mongroup_create_dir(rdtgroup_default.kn, 2674 &rdtgroup_default, "mon_groups", 2675 &kn_mongrp); 2676 if (ret < 0) 2677 goto out_info; 2678 2679 ret = mkdir_mondata_all(rdtgroup_default.kn, 2680 &rdtgroup_default, &kn_mondata); 2681 if (ret < 0) 2682 goto out_mongrp; 2683 rdtgroup_default.mon.mon_data_kn = kn_mondata; 2684 } 2685 2686 ret = rdt_pseudo_lock_init(); 2687 if (ret) 2688 goto out_mondata; 2689 2690 ret = kernfs_get_tree(fc); 2691 if (ret < 0) 2692 goto out_psl; 2693 2694 if (resctrl_arch_alloc_capable()) 2695 resctrl_arch_enable_alloc(); 2696 if (resctrl_arch_mon_capable()) 2697 resctrl_arch_enable_mon(); 2698 2699 if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable()) 2700 resctrl_mounted = true; 2701 2702 if (is_mbm_enabled()) { 2703 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 2704 list_for_each_entry(dom, &r->domains, list) 2705 mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, 2706 RESCTRL_PICK_ANY_CPU); 2707 } 2708 2709 goto out; 2710 2711 out_psl: 2712 rdt_pseudo_lock_release(); 2713 out_mondata: 2714 if (resctrl_arch_mon_capable()) 2715 kernfs_remove(kn_mondata); 2716 out_mongrp: 2717 if (resctrl_arch_mon_capable()) 2718 kernfs_remove(kn_mongrp); 2719 out_info: 2720 kernfs_remove(kn_info); 2721 out_schemata_free: 2722 schemata_list_destroy(); 2723 out_ctx: 2724 rdt_disable_ctx(); 2725 out_root: 2726 rdtgroup_destroy_root(); 2727 out: 2728 rdt_last_cmd_clear(); 2729 mutex_unlock(&rdtgroup_mutex); 2730 cpus_read_unlock(); 2731 return ret; 2732 } 2733 2734 enum rdt_param { 2735 Opt_cdp, 2736 Opt_cdpl2, 2737 Opt_mba_mbps, 2738 Opt_debug, 2739 nr__rdt_params 2740 }; 2741 2742 static const struct fs_parameter_spec rdt_fs_parameters[] = { 2743 fsparam_flag("cdp", Opt_cdp), 2744 fsparam_flag("cdpl2", Opt_cdpl2), 2745 fsparam_flag("mba_MBps", Opt_mba_mbps), 2746 fsparam_flag("debug", Opt_debug), 2747 {} 2748 }; 2749 2750 static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) 2751 { 2752 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2753 struct fs_parse_result result; 2754 int opt; 2755 2756 opt = fs_parse(fc, rdt_fs_parameters, param, &result); 2757 if (opt < 0) 2758 return opt; 2759 2760 switch (opt) { 2761 case Opt_cdp: 2762 ctx->enable_cdpl3 = true; 2763 return 0; 2764 case Opt_cdpl2: 2765 ctx->enable_cdpl2 = true; 2766 return 0; 2767 case Opt_mba_mbps: 2768 if (!supports_mba_mbps()) 2769 return -EINVAL; 2770 ctx->enable_mba_mbps = true; 2771 return 0; 2772 case Opt_debug: 2773 ctx->enable_debug = true; 2774 return 0; 2775 } 2776 2777 return -EINVAL; 2778 } 2779 2780 static void rdt_fs_context_free(struct fs_context *fc) 2781 { 2782 struct rdt_fs_context *ctx = rdt_fc2context(fc); 2783 2784 kernfs_free_fs_context(fc); 2785 kfree(ctx); 2786 } 2787 2788 static const struct fs_context_operations rdt_fs_context_ops = { 2789 .free = rdt_fs_context_free, 2790 .parse_param = rdt_parse_param, 2791 .get_tree = rdt_get_tree, 2792 }; 2793 2794 static int rdt_init_fs_context(struct fs_context *fc) 2795 { 2796 struct rdt_fs_context *ctx; 2797 2798 ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); 2799 if (!ctx) 2800 return -ENOMEM; 2801 2802 ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; 2803 fc->fs_private = &ctx->kfc; 2804 fc->ops = &rdt_fs_context_ops; 2805 put_user_ns(fc->user_ns); 2806 fc->user_ns = get_user_ns(&init_user_ns); 2807 fc->global = true; 2808 return 0; 2809 } 2810 2811 static int reset_all_ctrls(struct rdt_resource *r) 2812 { 2813 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 2814 struct rdt_hw_domain *hw_dom; 2815 struct msr_param msr_param; 2816 struct rdt_domain *d; 2817 int i; 2818 2819 /* Walking r->domains, ensure it can't race with cpuhp */ 2820 lockdep_assert_cpus_held(); 2821 2822 msr_param.res = r; 2823 msr_param.low = 0; 2824 msr_param.high = hw_res->num_closid; 2825 2826 /* 2827 * Disable resource control for this resource by setting all 2828 * CBMs in all domains to the maximum mask value. Pick one CPU 2829 * from each domain to update the MSRs below. 2830 */ 2831 list_for_each_entry(d, &r->domains, list) { 2832 hw_dom = resctrl_to_arch_dom(d); 2833 2834 for (i = 0; i < hw_res->num_closid; i++) 2835 hw_dom->ctrl_val[i] = r->default_ctrl; 2836 msr_param.dom = d; 2837 smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1); 2838 } 2839 2840 return 0; 2841 } 2842 2843 /* 2844 * Move tasks from one to the other group. If @from is NULL, then all tasks 2845 * in the systems are moved unconditionally (used for teardown). 2846 * 2847 * If @mask is not NULL the cpus on which moved tasks are running are set 2848 * in that mask so the update smp function call is restricted to affected 2849 * cpus. 2850 */ 2851 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, 2852 struct cpumask *mask) 2853 { 2854 struct task_struct *p, *t; 2855 2856 read_lock(&tasklist_lock); 2857 for_each_process_thread(p, t) { 2858 if (!from || is_closid_match(t, from) || 2859 is_rmid_match(t, from)) { 2860 resctrl_arch_set_closid_rmid(t, to->closid, 2861 to->mon.rmid); 2862 2863 /* 2864 * Order the closid/rmid stores above before the loads 2865 * in task_curr(). This pairs with the full barrier 2866 * between the rq->curr update and resctrl_sched_in() 2867 * during context switch. 2868 */ 2869 smp_mb(); 2870 2871 /* 2872 * If the task is on a CPU, set the CPU in the mask. 2873 * The detection is inaccurate as tasks might move or 2874 * schedule before the smp function call takes place. 2875 * In such a case the function call is pointless, but 2876 * there is no other side effect. 2877 */ 2878 if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t)) 2879 cpumask_set_cpu(task_cpu(t), mask); 2880 } 2881 } 2882 read_unlock(&tasklist_lock); 2883 } 2884 2885 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) 2886 { 2887 struct rdtgroup *sentry, *stmp; 2888 struct list_head *head; 2889 2890 head = &rdtgrp->mon.crdtgrp_list; 2891 list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { 2892 free_rmid(sentry->closid, sentry->mon.rmid); 2893 list_del(&sentry->mon.crdtgrp_list); 2894 2895 if (atomic_read(&sentry->waitcount) != 0) 2896 sentry->flags = RDT_DELETED; 2897 else 2898 rdtgroup_remove(sentry); 2899 } 2900 } 2901 2902 /* 2903 * Forcibly remove all of subdirectories under root. 2904 */ 2905 static void rmdir_all_sub(void) 2906 { 2907 struct rdtgroup *rdtgrp, *tmp; 2908 2909 /* Move all tasks to the default resource group */ 2910 rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); 2911 2912 list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { 2913 /* Free any child rmids */ 2914 free_all_child_rdtgrp(rdtgrp); 2915 2916 /* Remove each rdtgroup other than root */ 2917 if (rdtgrp == &rdtgroup_default) 2918 continue; 2919 2920 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 2921 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) 2922 rdtgroup_pseudo_lock_remove(rdtgrp); 2923 2924 /* 2925 * Give any CPUs back to the default group. We cannot copy 2926 * cpu_online_mask because a CPU might have executed the 2927 * offline callback already, but is still marked online. 2928 */ 2929 cpumask_or(&rdtgroup_default.cpu_mask, 2930 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); 2931 2932 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 2933 2934 kernfs_remove(rdtgrp->kn); 2935 list_del(&rdtgrp->rdtgroup_list); 2936 2937 if (atomic_read(&rdtgrp->waitcount) != 0) 2938 rdtgrp->flags = RDT_DELETED; 2939 else 2940 rdtgroup_remove(rdtgrp); 2941 } 2942 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ 2943 update_closid_rmid(cpu_online_mask, &rdtgroup_default); 2944 2945 kernfs_remove(kn_info); 2946 kernfs_remove(kn_mongrp); 2947 kernfs_remove(kn_mondata); 2948 } 2949 2950 static void rdt_kill_sb(struct super_block *sb) 2951 { 2952 struct rdt_resource *r; 2953 2954 cpus_read_lock(); 2955 mutex_lock(&rdtgroup_mutex); 2956 2957 rdt_disable_ctx(); 2958 2959 /*Put everything back to default values. */ 2960 for_each_alloc_capable_rdt_resource(r) 2961 reset_all_ctrls(r); 2962 rmdir_all_sub(); 2963 rdt_pseudo_lock_release(); 2964 rdtgroup_default.mode = RDT_MODE_SHAREABLE; 2965 schemata_list_destroy(); 2966 rdtgroup_destroy_root(); 2967 if (resctrl_arch_alloc_capable()) 2968 resctrl_arch_disable_alloc(); 2969 if (resctrl_arch_mon_capable()) 2970 resctrl_arch_disable_mon(); 2971 resctrl_mounted = false; 2972 kernfs_kill_sb(sb); 2973 mutex_unlock(&rdtgroup_mutex); 2974 cpus_read_unlock(); 2975 } 2976 2977 static struct file_system_type rdt_fs_type = { 2978 .name = "resctrl", 2979 .init_fs_context = rdt_init_fs_context, 2980 .parameters = rdt_fs_parameters, 2981 .kill_sb = rdt_kill_sb, 2982 }; 2983 2984 static int mon_addfile(struct kernfs_node *parent_kn, const char *name, 2985 void *priv) 2986 { 2987 struct kernfs_node *kn; 2988 int ret = 0; 2989 2990 kn = __kernfs_create_file(parent_kn, name, 0444, 2991 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, 2992 &kf_mondata_ops, priv, NULL, NULL); 2993 if (IS_ERR(kn)) 2994 return PTR_ERR(kn); 2995 2996 ret = rdtgroup_kn_set_ugid(kn); 2997 if (ret) { 2998 kernfs_remove(kn); 2999 return ret; 3000 } 3001 3002 return ret; 3003 } 3004 3005 /* 3006 * Remove all subdirectories of mon_data of ctrl_mon groups 3007 * and monitor groups with given domain id. 3008 */ 3009 static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, 3010 unsigned int dom_id) 3011 { 3012 struct rdtgroup *prgrp, *crgrp; 3013 char name[32]; 3014 3015 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { 3016 sprintf(name, "mon_%s_%02d", r->name, dom_id); 3017 kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); 3018 3019 list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) 3020 kernfs_remove_by_name(crgrp->mon.mon_data_kn, name); 3021 } 3022 } 3023 3024 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, 3025 struct rdt_domain *d, 3026 struct rdt_resource *r, struct rdtgroup *prgrp) 3027 { 3028 union mon_data_bits priv; 3029 struct kernfs_node *kn; 3030 struct mon_evt *mevt; 3031 struct rmid_read rr; 3032 char name[32]; 3033 int ret; 3034 3035 sprintf(name, "mon_%s_%02d", r->name, d->id); 3036 /* create the directory */ 3037 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); 3038 if (IS_ERR(kn)) 3039 return PTR_ERR(kn); 3040 3041 ret = rdtgroup_kn_set_ugid(kn); 3042 if (ret) 3043 goto out_destroy; 3044 3045 if (WARN_ON(list_empty(&r->evt_list))) { 3046 ret = -EPERM; 3047 goto out_destroy; 3048 } 3049 3050 priv.u.rid = r->rid; 3051 priv.u.domid = d->id; 3052 list_for_each_entry(mevt, &r->evt_list, list) { 3053 priv.u.evtid = mevt->evtid; 3054 ret = mon_addfile(kn, mevt->name, priv.priv); 3055 if (ret) 3056 goto out_destroy; 3057 3058 if (is_mbm_event(mevt->evtid)) 3059 mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); 3060 } 3061 kernfs_activate(kn); 3062 return 0; 3063 3064 out_destroy: 3065 kernfs_remove(kn); 3066 return ret; 3067 } 3068 3069 /* 3070 * Add all subdirectories of mon_data for "ctrl_mon" groups 3071 * and "monitor" groups with given domain id. 3072 */ 3073 static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, 3074 struct rdt_domain *d) 3075 { 3076 struct kernfs_node *parent_kn; 3077 struct rdtgroup *prgrp, *crgrp; 3078 struct list_head *head; 3079 3080 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { 3081 parent_kn = prgrp->mon.mon_data_kn; 3082 mkdir_mondata_subdir(parent_kn, d, r, prgrp); 3083 3084 head = &prgrp->mon.crdtgrp_list; 3085 list_for_each_entry(crgrp, head, mon.crdtgrp_list) { 3086 parent_kn = crgrp->mon.mon_data_kn; 3087 mkdir_mondata_subdir(parent_kn, d, r, crgrp); 3088 } 3089 } 3090 } 3091 3092 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, 3093 struct rdt_resource *r, 3094 struct rdtgroup *prgrp) 3095 { 3096 struct rdt_domain *dom; 3097 int ret; 3098 3099 /* Walking r->domains, ensure it can't race with cpuhp */ 3100 lockdep_assert_cpus_held(); 3101 3102 list_for_each_entry(dom, &r->domains, list) { 3103 ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); 3104 if (ret) 3105 return ret; 3106 } 3107 3108 return 0; 3109 } 3110 3111 /* 3112 * This creates a directory mon_data which contains the monitored data. 3113 * 3114 * mon_data has one directory for each domain which are named 3115 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data 3116 * with L3 domain looks as below: 3117 * ./mon_data: 3118 * mon_L3_00 3119 * mon_L3_01 3120 * mon_L3_02 3121 * ... 3122 * 3123 * Each domain directory has one file per event: 3124 * ./mon_L3_00/: 3125 * llc_occupancy 3126 * 3127 */ 3128 static int mkdir_mondata_all(struct kernfs_node *parent_kn, 3129 struct rdtgroup *prgrp, 3130 struct kernfs_node **dest_kn) 3131 { 3132 struct rdt_resource *r; 3133 struct kernfs_node *kn; 3134 int ret; 3135 3136 /* 3137 * Create the mon_data directory first. 3138 */ 3139 ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); 3140 if (ret) 3141 return ret; 3142 3143 if (dest_kn) 3144 *dest_kn = kn; 3145 3146 /* 3147 * Create the subdirectories for each domain. Note that all events 3148 * in a domain like L3 are grouped into a resource whose domain is L3 3149 */ 3150 for_each_mon_capable_rdt_resource(r) { 3151 ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); 3152 if (ret) 3153 goto out_destroy; 3154 } 3155 3156 return 0; 3157 3158 out_destroy: 3159 kernfs_remove(kn); 3160 return ret; 3161 } 3162 3163 /** 3164 * cbm_ensure_valid - Enforce validity on provided CBM 3165 * @_val: Candidate CBM 3166 * @r: RDT resource to which the CBM belongs 3167 * 3168 * The provided CBM represents all cache portions available for use. This 3169 * may be represented by a bitmap that does not consist of contiguous ones 3170 * and thus be an invalid CBM. 3171 * Here the provided CBM is forced to be a valid CBM by only considering 3172 * the first set of contiguous bits as valid and clearing all bits. 3173 * The intention here is to provide a valid default CBM with which a new 3174 * resource group is initialized. The user can follow this with a 3175 * modification to the CBM if the default does not satisfy the 3176 * requirements. 3177 */ 3178 static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) 3179 { 3180 unsigned int cbm_len = r->cache.cbm_len; 3181 unsigned long first_bit, zero_bit; 3182 unsigned long val = _val; 3183 3184 if (!val) 3185 return 0; 3186 3187 first_bit = find_first_bit(&val, cbm_len); 3188 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); 3189 3190 /* Clear any remaining bits to ensure contiguous region */ 3191 bitmap_clear(&val, zero_bit, cbm_len - zero_bit); 3192 return (u32)val; 3193 } 3194 3195 /* 3196 * Initialize cache resources per RDT domain 3197 * 3198 * Set the RDT domain up to start off with all usable allocations. That is, 3199 * all shareable and unused bits. All-zero CBM is invalid. 3200 */ 3201 static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, 3202 u32 closid) 3203 { 3204 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); 3205 enum resctrl_conf_type t = s->conf_type; 3206 struct resctrl_staged_config *cfg; 3207 struct rdt_resource *r = s->res; 3208 u32 used_b = 0, unused_b = 0; 3209 unsigned long tmp_cbm; 3210 enum rdtgrp_mode mode; 3211 u32 peer_ctl, ctrl_val; 3212 int i; 3213 3214 cfg = &d->staged_config[t]; 3215 cfg->have_new_ctrl = false; 3216 cfg->new_ctrl = r->cache.shareable_bits; 3217 used_b = r->cache.shareable_bits; 3218 for (i = 0; i < closids_supported(); i++) { 3219 if (closid_allocated(i) && i != closid) { 3220 mode = rdtgroup_mode_by_closid(i); 3221 if (mode == RDT_MODE_PSEUDO_LOCKSETUP) 3222 /* 3223 * ctrl values for locksetup aren't relevant 3224 * until the schemata is written, and the mode 3225 * becomes RDT_MODE_PSEUDO_LOCKED. 3226 */ 3227 continue; 3228 /* 3229 * If CDP is active include peer domain's 3230 * usage to ensure there is no overlap 3231 * with an exclusive group. 3232 */ 3233 if (resctrl_arch_get_cdp_enabled(r->rid)) 3234 peer_ctl = resctrl_arch_get_config(r, d, i, 3235 peer_type); 3236 else 3237 peer_ctl = 0; 3238 ctrl_val = resctrl_arch_get_config(r, d, i, 3239 s->conf_type); 3240 used_b |= ctrl_val | peer_ctl; 3241 if (mode == RDT_MODE_SHAREABLE) 3242 cfg->new_ctrl |= ctrl_val | peer_ctl; 3243 } 3244 } 3245 if (d->plr && d->plr->cbm > 0) 3246 used_b |= d->plr->cbm; 3247 unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); 3248 unused_b &= BIT_MASK(r->cache.cbm_len) - 1; 3249 cfg->new_ctrl |= unused_b; 3250 /* 3251 * Force the initial CBM to be valid, user can 3252 * modify the CBM based on system availability. 3253 */ 3254 cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r); 3255 /* 3256 * Assign the u32 CBM to an unsigned long to ensure that 3257 * bitmap_weight() does not access out-of-bound memory. 3258 */ 3259 tmp_cbm = cfg->new_ctrl; 3260 if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { 3261 rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); 3262 return -ENOSPC; 3263 } 3264 cfg->have_new_ctrl = true; 3265 3266 return 0; 3267 } 3268 3269 /* 3270 * Initialize cache resources with default values. 3271 * 3272 * A new RDT group is being created on an allocation capable (CAT) 3273 * supporting system. Set this group up to start off with all usable 3274 * allocations. 3275 * 3276 * If there are no more shareable bits available on any domain then 3277 * the entire allocation will fail. 3278 */ 3279 static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) 3280 { 3281 struct rdt_domain *d; 3282 int ret; 3283 3284 list_for_each_entry(d, &s->res->domains, list) { 3285 ret = __init_one_rdt_domain(d, s, closid); 3286 if (ret < 0) 3287 return ret; 3288 } 3289 3290 return 0; 3291 } 3292 3293 /* Initialize MBA resource with default values. */ 3294 static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) 3295 { 3296 struct resctrl_staged_config *cfg; 3297 struct rdt_domain *d; 3298 3299 list_for_each_entry(d, &r->domains, list) { 3300 if (is_mba_sc(r)) { 3301 d->mbps_val[closid] = MBA_MAX_MBPS; 3302 continue; 3303 } 3304 3305 cfg = &d->staged_config[CDP_NONE]; 3306 cfg->new_ctrl = r->default_ctrl; 3307 cfg->have_new_ctrl = true; 3308 } 3309 } 3310 3311 /* Initialize the RDT group's allocations. */ 3312 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) 3313 { 3314 struct resctrl_schema *s; 3315 struct rdt_resource *r; 3316 int ret = 0; 3317 3318 rdt_staged_configs_clear(); 3319 3320 list_for_each_entry(s, &resctrl_schema_all, list) { 3321 r = s->res; 3322 if (r->rid == RDT_RESOURCE_MBA || 3323 r->rid == RDT_RESOURCE_SMBA) { 3324 rdtgroup_init_mba(r, rdtgrp->closid); 3325 if (is_mba_sc(r)) 3326 continue; 3327 } else { 3328 ret = rdtgroup_init_cat(s, rdtgrp->closid); 3329 if (ret < 0) 3330 goto out; 3331 } 3332 3333 ret = resctrl_arch_update_domains(r, rdtgrp->closid); 3334 if (ret < 0) { 3335 rdt_last_cmd_puts("Failed to initialize allocations\n"); 3336 goto out; 3337 } 3338 3339 } 3340 3341 rdtgrp->mode = RDT_MODE_SHAREABLE; 3342 3343 out: 3344 rdt_staged_configs_clear(); 3345 return ret; 3346 } 3347 3348 static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) 3349 { 3350 int ret; 3351 3352 if (!resctrl_arch_mon_capable()) 3353 return 0; 3354 3355 ret = alloc_rmid(rdtgrp->closid); 3356 if (ret < 0) { 3357 rdt_last_cmd_puts("Out of RMIDs\n"); 3358 return ret; 3359 } 3360 rdtgrp->mon.rmid = ret; 3361 3362 ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn); 3363 if (ret) { 3364 rdt_last_cmd_puts("kernfs subdir error\n"); 3365 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 3366 return ret; 3367 } 3368 3369 return 0; 3370 } 3371 3372 static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) 3373 { 3374 if (resctrl_arch_mon_capable()) 3375 free_rmid(rgrp->closid, rgrp->mon.rmid); 3376 } 3377 3378 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, 3379 const char *name, umode_t mode, 3380 enum rdt_group_type rtype, struct rdtgroup **r) 3381 { 3382 struct rdtgroup *prdtgrp, *rdtgrp; 3383 unsigned long files = 0; 3384 struct kernfs_node *kn; 3385 int ret; 3386 3387 prdtgrp = rdtgroup_kn_lock_live(parent_kn); 3388 if (!prdtgrp) { 3389 ret = -ENODEV; 3390 goto out_unlock; 3391 } 3392 3393 if (rtype == RDTMON_GROUP && 3394 (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 3395 prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { 3396 ret = -EINVAL; 3397 rdt_last_cmd_puts("Pseudo-locking in progress\n"); 3398 goto out_unlock; 3399 } 3400 3401 /* allocate the rdtgroup. */ 3402 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); 3403 if (!rdtgrp) { 3404 ret = -ENOSPC; 3405 rdt_last_cmd_puts("Kernel out of memory\n"); 3406 goto out_unlock; 3407 } 3408 *r = rdtgrp; 3409 rdtgrp->mon.parent = prdtgrp; 3410 rdtgrp->type = rtype; 3411 INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list); 3412 3413 /* kernfs creates the directory for rdtgrp */ 3414 kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); 3415 if (IS_ERR(kn)) { 3416 ret = PTR_ERR(kn); 3417 rdt_last_cmd_puts("kernfs create error\n"); 3418 goto out_free_rgrp; 3419 } 3420 rdtgrp->kn = kn; 3421 3422 /* 3423 * kernfs_remove() will drop the reference count on "kn" which 3424 * will free it. But we still need it to stick around for the 3425 * rdtgroup_kn_unlock(kn) call. Take one extra reference here, 3426 * which will be dropped by kernfs_put() in rdtgroup_remove(). 3427 */ 3428 kernfs_get(kn); 3429 3430 ret = rdtgroup_kn_set_ugid(kn); 3431 if (ret) { 3432 rdt_last_cmd_puts("kernfs perm error\n"); 3433 goto out_destroy; 3434 } 3435 3436 if (rtype == RDTCTRL_GROUP) { 3437 files = RFTYPE_BASE | RFTYPE_CTRL; 3438 if (resctrl_arch_mon_capable()) 3439 files |= RFTYPE_MON; 3440 } else { 3441 files = RFTYPE_BASE | RFTYPE_MON; 3442 } 3443 3444 ret = rdtgroup_add_files(kn, files); 3445 if (ret) { 3446 rdt_last_cmd_puts("kernfs fill error\n"); 3447 goto out_destroy; 3448 } 3449 3450 /* 3451 * The caller unlocks the parent_kn upon success. 3452 */ 3453 return 0; 3454 3455 out_destroy: 3456 kernfs_put(rdtgrp->kn); 3457 kernfs_remove(rdtgrp->kn); 3458 out_free_rgrp: 3459 kfree(rdtgrp); 3460 out_unlock: 3461 rdtgroup_kn_unlock(parent_kn); 3462 return ret; 3463 } 3464 3465 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) 3466 { 3467 kernfs_remove(rgrp->kn); 3468 rdtgroup_remove(rgrp); 3469 } 3470 3471 /* 3472 * Create a monitor group under "mon_groups" directory of a control 3473 * and monitor group(ctrl_mon). This is a resource group 3474 * to monitor a subset of tasks and cpus in its parent ctrl_mon group. 3475 */ 3476 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, 3477 const char *name, umode_t mode) 3478 { 3479 struct rdtgroup *rdtgrp, *prgrp; 3480 int ret; 3481 3482 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp); 3483 if (ret) 3484 return ret; 3485 3486 prgrp = rdtgrp->mon.parent; 3487 rdtgrp->closid = prgrp->closid; 3488 3489 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); 3490 if (ret) { 3491 mkdir_rdt_prepare_clean(rdtgrp); 3492 goto out_unlock; 3493 } 3494 3495 kernfs_activate(rdtgrp->kn); 3496 3497 /* 3498 * Add the rdtgrp to the list of rdtgrps the parent 3499 * ctrl_mon group has to track. 3500 */ 3501 list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); 3502 3503 out_unlock: 3504 rdtgroup_kn_unlock(parent_kn); 3505 return ret; 3506 } 3507 3508 /* 3509 * These are rdtgroups created under the root directory. Can be used 3510 * to allocate and monitor resources. 3511 */ 3512 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, 3513 const char *name, umode_t mode) 3514 { 3515 struct rdtgroup *rdtgrp; 3516 struct kernfs_node *kn; 3517 u32 closid; 3518 int ret; 3519 3520 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp); 3521 if (ret) 3522 return ret; 3523 3524 kn = rdtgrp->kn; 3525 ret = closid_alloc(); 3526 if (ret < 0) { 3527 rdt_last_cmd_puts("Out of CLOSIDs\n"); 3528 goto out_common_fail; 3529 } 3530 closid = ret; 3531 ret = 0; 3532 3533 rdtgrp->closid = closid; 3534 3535 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); 3536 if (ret) 3537 goto out_closid_free; 3538 3539 kernfs_activate(rdtgrp->kn); 3540 3541 ret = rdtgroup_init_alloc(rdtgrp); 3542 if (ret < 0) 3543 goto out_rmid_free; 3544 3545 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); 3546 3547 if (resctrl_arch_mon_capable()) { 3548 /* 3549 * Create an empty mon_groups directory to hold the subset 3550 * of tasks and cpus to monitor. 3551 */ 3552 ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); 3553 if (ret) { 3554 rdt_last_cmd_puts("kernfs subdir error\n"); 3555 goto out_del_list; 3556 } 3557 } 3558 3559 goto out_unlock; 3560 3561 out_del_list: 3562 list_del(&rdtgrp->rdtgroup_list); 3563 out_rmid_free: 3564 mkdir_rdt_prepare_rmid_free(rdtgrp); 3565 out_closid_free: 3566 closid_free(closid); 3567 out_common_fail: 3568 mkdir_rdt_prepare_clean(rdtgrp); 3569 out_unlock: 3570 rdtgroup_kn_unlock(parent_kn); 3571 return ret; 3572 } 3573 3574 /* 3575 * We allow creating mon groups only with in a directory called "mon_groups" 3576 * which is present in every ctrl_mon group. Check if this is a valid 3577 * "mon_groups" directory. 3578 * 3579 * 1. The directory should be named "mon_groups". 3580 * 2. The mon group itself should "not" be named "mon_groups". 3581 * This makes sure "mon_groups" directory always has a ctrl_mon group 3582 * as parent. 3583 */ 3584 static bool is_mon_groups(struct kernfs_node *kn, const char *name) 3585 { 3586 return (!strcmp(kn->name, "mon_groups") && 3587 strcmp(name, "mon_groups")); 3588 } 3589 3590 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, 3591 umode_t mode) 3592 { 3593 /* Do not accept '\n' to avoid unparsable situation. */ 3594 if (strchr(name, '\n')) 3595 return -EINVAL; 3596 3597 /* 3598 * If the parent directory is the root directory and RDT 3599 * allocation is supported, add a control and monitoring 3600 * subdirectory 3601 */ 3602 if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn) 3603 return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); 3604 3605 /* 3606 * If RDT monitoring is supported and the parent directory is a valid 3607 * "mon_groups" directory, add a monitoring subdirectory. 3608 */ 3609 if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name)) 3610 return rdtgroup_mkdir_mon(parent_kn, name, mode); 3611 3612 return -EPERM; 3613 } 3614 3615 static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) 3616 { 3617 struct rdtgroup *prdtgrp = rdtgrp->mon.parent; 3618 int cpu; 3619 3620 /* Give any tasks back to the parent group */ 3621 rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); 3622 3623 /* Update per cpu rmid of the moved CPUs first */ 3624 for_each_cpu(cpu, &rdtgrp->cpu_mask) 3625 per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; 3626 /* 3627 * Update the MSR on moved CPUs and CPUs which have moved 3628 * task running on them. 3629 */ 3630 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); 3631 update_closid_rmid(tmpmask, NULL); 3632 3633 rdtgrp->flags = RDT_DELETED; 3634 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 3635 3636 /* 3637 * Remove the rdtgrp from the parent ctrl_mon group's list 3638 */ 3639 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); 3640 list_del(&rdtgrp->mon.crdtgrp_list); 3641 3642 kernfs_remove(rdtgrp->kn); 3643 3644 return 0; 3645 } 3646 3647 static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) 3648 { 3649 rdtgrp->flags = RDT_DELETED; 3650 list_del(&rdtgrp->rdtgroup_list); 3651 3652 kernfs_remove(rdtgrp->kn); 3653 return 0; 3654 } 3655 3656 static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) 3657 { 3658 int cpu; 3659 3660 /* Give any tasks back to the default group */ 3661 rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); 3662 3663 /* Give any CPUs back to the default group */ 3664 cpumask_or(&rdtgroup_default.cpu_mask, 3665 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); 3666 3667 /* Update per cpu closid and rmid of the moved CPUs first */ 3668 for_each_cpu(cpu, &rdtgrp->cpu_mask) { 3669 per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; 3670 per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; 3671 } 3672 3673 /* 3674 * Update the MSR on moved CPUs and CPUs which have moved 3675 * task running on them. 3676 */ 3677 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); 3678 update_closid_rmid(tmpmask, NULL); 3679 3680 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); 3681 closid_free(rdtgrp->closid); 3682 3683 rdtgroup_ctrl_remove(rdtgrp); 3684 3685 /* 3686 * Free all the child monitor group rmids. 3687 */ 3688 free_all_child_rdtgrp(rdtgrp); 3689 3690 return 0; 3691 } 3692 3693 static int rdtgroup_rmdir(struct kernfs_node *kn) 3694 { 3695 struct kernfs_node *parent_kn = kn->parent; 3696 struct rdtgroup *rdtgrp; 3697 cpumask_var_t tmpmask; 3698 int ret = 0; 3699 3700 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) 3701 return -ENOMEM; 3702 3703 rdtgrp = rdtgroup_kn_lock_live(kn); 3704 if (!rdtgrp) { 3705 ret = -EPERM; 3706 goto out; 3707 } 3708 3709 /* 3710 * If the rdtgroup is a ctrl_mon group and parent directory 3711 * is the root directory, remove the ctrl_mon group. 3712 * 3713 * If the rdtgroup is a mon group and parent directory 3714 * is a valid "mon_groups" directory, remove the mon group. 3715 */ 3716 if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && 3717 rdtgrp != &rdtgroup_default) { 3718 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || 3719 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { 3720 ret = rdtgroup_ctrl_remove(rdtgrp); 3721 } else { 3722 ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); 3723 } 3724 } else if (rdtgrp->type == RDTMON_GROUP && 3725 is_mon_groups(parent_kn, kn->name)) { 3726 ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); 3727 } else { 3728 ret = -EPERM; 3729 } 3730 3731 out: 3732 rdtgroup_kn_unlock(kn); 3733 free_cpumask_var(tmpmask); 3734 return ret; 3735 } 3736 3737 /** 3738 * mongrp_reparent() - replace parent CTRL_MON group of a MON group 3739 * @rdtgrp: the MON group whose parent should be replaced 3740 * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp 3741 * @cpus: cpumask provided by the caller for use during this call 3742 * 3743 * Replaces the parent CTRL_MON group for a MON group, resulting in all member 3744 * tasks' CLOSID immediately changing to that of the new parent group. 3745 * Monitoring data for the group is unaffected by this operation. 3746 */ 3747 static void mongrp_reparent(struct rdtgroup *rdtgrp, 3748 struct rdtgroup *new_prdtgrp, 3749 cpumask_var_t cpus) 3750 { 3751 struct rdtgroup *prdtgrp = rdtgrp->mon.parent; 3752 3753 WARN_ON(rdtgrp->type != RDTMON_GROUP); 3754 WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP); 3755 3756 /* Nothing to do when simply renaming a MON group. */ 3757 if (prdtgrp == new_prdtgrp) 3758 return; 3759 3760 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); 3761 list_move_tail(&rdtgrp->mon.crdtgrp_list, 3762 &new_prdtgrp->mon.crdtgrp_list); 3763 3764 rdtgrp->mon.parent = new_prdtgrp; 3765 rdtgrp->closid = new_prdtgrp->closid; 3766 3767 /* Propagate updated closid to all tasks in this group. */ 3768 rdt_move_group_tasks(rdtgrp, rdtgrp, cpus); 3769 3770 update_closid_rmid(cpus, NULL); 3771 } 3772 3773 static int rdtgroup_rename(struct kernfs_node *kn, 3774 struct kernfs_node *new_parent, const char *new_name) 3775 { 3776 struct rdtgroup *new_prdtgrp; 3777 struct rdtgroup *rdtgrp; 3778 cpumask_var_t tmpmask; 3779 int ret; 3780 3781 rdtgrp = kernfs_to_rdtgroup(kn); 3782 new_prdtgrp = kernfs_to_rdtgroup(new_parent); 3783 if (!rdtgrp || !new_prdtgrp) 3784 return -ENOENT; 3785 3786 /* Release both kernfs active_refs before obtaining rdtgroup mutex. */ 3787 rdtgroup_kn_get(rdtgrp, kn); 3788 rdtgroup_kn_get(new_prdtgrp, new_parent); 3789 3790 mutex_lock(&rdtgroup_mutex); 3791 3792 rdt_last_cmd_clear(); 3793 3794 /* 3795 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if 3796 * either kernfs_node is a file. 3797 */ 3798 if (kernfs_type(kn) != KERNFS_DIR || 3799 kernfs_type(new_parent) != KERNFS_DIR) { 3800 rdt_last_cmd_puts("Source and destination must be directories"); 3801 ret = -EPERM; 3802 goto out; 3803 } 3804 3805 if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) { 3806 ret = -ENOENT; 3807 goto out; 3808 } 3809 3810 if (rdtgrp->type != RDTMON_GROUP || !kn->parent || 3811 !is_mon_groups(kn->parent, kn->name)) { 3812 rdt_last_cmd_puts("Source must be a MON group\n"); 3813 ret = -EPERM; 3814 goto out; 3815 } 3816 3817 if (!is_mon_groups(new_parent, new_name)) { 3818 rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n"); 3819 ret = -EPERM; 3820 goto out; 3821 } 3822 3823 /* 3824 * If the MON group is monitoring CPUs, the CPUs must be assigned to the 3825 * current parent CTRL_MON group and therefore cannot be assigned to 3826 * the new parent, making the move illegal. 3827 */ 3828 if (!cpumask_empty(&rdtgrp->cpu_mask) && 3829 rdtgrp->mon.parent != new_prdtgrp) { 3830 rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n"); 3831 ret = -EPERM; 3832 goto out; 3833 } 3834 3835 /* 3836 * Allocate the cpumask for use in mongrp_reparent() to avoid the 3837 * possibility of failing to allocate it after kernfs_rename() has 3838 * succeeded. 3839 */ 3840 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) { 3841 ret = -ENOMEM; 3842 goto out; 3843 } 3844 3845 /* 3846 * Perform all input validation and allocations needed to ensure 3847 * mongrp_reparent() will succeed before calling kernfs_rename(), 3848 * otherwise it would be necessary to revert this call if 3849 * mongrp_reparent() failed. 3850 */ 3851 ret = kernfs_rename(kn, new_parent, new_name); 3852 if (!ret) 3853 mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask); 3854 3855 free_cpumask_var(tmpmask); 3856 3857 out: 3858 mutex_unlock(&rdtgroup_mutex); 3859 rdtgroup_kn_put(rdtgrp, kn); 3860 rdtgroup_kn_put(new_prdtgrp, new_parent); 3861 return ret; 3862 } 3863 3864 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) 3865 { 3866 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) 3867 seq_puts(seq, ",cdp"); 3868 3869 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) 3870 seq_puts(seq, ",cdpl2"); 3871 3872 if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) 3873 seq_puts(seq, ",mba_MBps"); 3874 3875 if (resctrl_debug) 3876 seq_puts(seq, ",debug"); 3877 3878 return 0; 3879 } 3880 3881 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { 3882 .mkdir = rdtgroup_mkdir, 3883 .rmdir = rdtgroup_rmdir, 3884 .rename = rdtgroup_rename, 3885 .show_options = rdtgroup_show_options, 3886 }; 3887 3888 static int rdtgroup_setup_root(struct rdt_fs_context *ctx) 3889 { 3890 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, 3891 KERNFS_ROOT_CREATE_DEACTIVATED | 3892 KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, 3893 &rdtgroup_default); 3894 if (IS_ERR(rdt_root)) 3895 return PTR_ERR(rdt_root); 3896 3897 ctx->kfc.root = rdt_root; 3898 rdtgroup_default.kn = kernfs_root_to_node(rdt_root); 3899 3900 return 0; 3901 } 3902 3903 static void rdtgroup_destroy_root(void) 3904 { 3905 kernfs_destroy_root(rdt_root); 3906 rdtgroup_default.kn = NULL; 3907 } 3908 3909 static void __init rdtgroup_setup_default(void) 3910 { 3911 mutex_lock(&rdtgroup_mutex); 3912 3913 rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; 3914 rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; 3915 rdtgroup_default.type = RDTCTRL_GROUP; 3916 INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); 3917 3918 list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); 3919 3920 mutex_unlock(&rdtgroup_mutex); 3921 } 3922 3923 static void domain_destroy_mon_state(struct rdt_domain *d) 3924 { 3925 bitmap_free(d->rmid_busy_llc); 3926 kfree(d->mbm_total); 3927 kfree(d->mbm_local); 3928 } 3929 3930 void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) 3931 { 3932 mutex_lock(&rdtgroup_mutex); 3933 3934 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) 3935 mba_sc_domain_destroy(r, d); 3936 3937 if (!r->mon_capable) 3938 goto out_unlock; 3939 3940 /* 3941 * If resctrl is mounted, remove all the 3942 * per domain monitor data directories. 3943 */ 3944 if (resctrl_mounted && resctrl_arch_mon_capable()) 3945 rmdir_mondata_subdir_allrdtgrp(r, d->id); 3946 3947 if (is_mbm_enabled()) 3948 cancel_delayed_work(&d->mbm_over); 3949 if (is_llc_occupancy_enabled() && has_busy_rmid(d)) { 3950 /* 3951 * When a package is going down, forcefully 3952 * decrement rmid->ebusy. There is no way to know 3953 * that the L3 was flushed and hence may lead to 3954 * incorrect counts in rare scenarios, but leaving 3955 * the RMID as busy creates RMID leaks if the 3956 * package never comes back. 3957 */ 3958 __check_limbo(d, true); 3959 cancel_delayed_work(&d->cqm_limbo); 3960 } 3961 3962 domain_destroy_mon_state(d); 3963 3964 out_unlock: 3965 mutex_unlock(&rdtgroup_mutex); 3966 } 3967 3968 static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) 3969 { 3970 u32 idx_limit = resctrl_arch_system_num_rmid_idx(); 3971 size_t tsize; 3972 3973 if (is_llc_occupancy_enabled()) { 3974 d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); 3975 if (!d->rmid_busy_llc) 3976 return -ENOMEM; 3977 } 3978 if (is_mbm_total_enabled()) { 3979 tsize = sizeof(*d->mbm_total); 3980 d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); 3981 if (!d->mbm_total) { 3982 bitmap_free(d->rmid_busy_llc); 3983 return -ENOMEM; 3984 } 3985 } 3986 if (is_mbm_local_enabled()) { 3987 tsize = sizeof(*d->mbm_local); 3988 d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); 3989 if (!d->mbm_local) { 3990 bitmap_free(d->rmid_busy_llc); 3991 kfree(d->mbm_total); 3992 return -ENOMEM; 3993 } 3994 } 3995 3996 return 0; 3997 } 3998 3999 int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) 4000 { 4001 int err = 0; 4002 4003 mutex_lock(&rdtgroup_mutex); 4004 4005 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) { 4006 /* RDT_RESOURCE_MBA is never mon_capable */ 4007 err = mba_sc_domain_allocate(r, d); 4008 goto out_unlock; 4009 } 4010 4011 if (!r->mon_capable) 4012 goto out_unlock; 4013 4014 err = domain_setup_mon_state(r, d); 4015 if (err) 4016 goto out_unlock; 4017 4018 if (is_mbm_enabled()) { 4019 INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); 4020 mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL, 4021 RESCTRL_PICK_ANY_CPU); 4022 } 4023 4024 if (is_llc_occupancy_enabled()) 4025 INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); 4026 4027 /* 4028 * If the filesystem is not mounted then only the default resource group 4029 * exists. Creation of its directories is deferred until mount time 4030 * by rdt_get_tree() calling mkdir_mondata_all(). 4031 * If resctrl is mounted, add per domain monitor data directories. 4032 */ 4033 if (resctrl_mounted && resctrl_arch_mon_capable()) 4034 mkdir_mondata_subdir_allrdtgrp(r, d); 4035 4036 out_unlock: 4037 mutex_unlock(&rdtgroup_mutex); 4038 4039 return err; 4040 } 4041 4042 void resctrl_online_cpu(unsigned int cpu) 4043 { 4044 mutex_lock(&rdtgroup_mutex); 4045 /* The CPU is set in default rdtgroup after online. */ 4046 cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); 4047 mutex_unlock(&rdtgroup_mutex); 4048 } 4049 4050 static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) 4051 { 4052 struct rdtgroup *cr; 4053 4054 list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) { 4055 if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) 4056 break; 4057 } 4058 } 4059 4060 void resctrl_offline_cpu(unsigned int cpu) 4061 { 4062 struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 4063 struct rdtgroup *rdtgrp; 4064 struct rdt_domain *d; 4065 4066 mutex_lock(&rdtgroup_mutex); 4067 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { 4068 if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { 4069 clear_childcpus(rdtgrp, cpu); 4070 break; 4071 } 4072 } 4073 4074 if (!l3->mon_capable) 4075 goto out_unlock; 4076 4077 d = get_domain_from_cpu(cpu, l3); 4078 if (d) { 4079 if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { 4080 cancel_delayed_work(&d->mbm_over); 4081 mbm_setup_overflow_handler(d, 0, cpu); 4082 } 4083 if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && 4084 has_busy_rmid(d)) { 4085 cancel_delayed_work(&d->cqm_limbo); 4086 cqm_setup_limbo_handler(d, 0, cpu); 4087 } 4088 } 4089 4090 out_unlock: 4091 mutex_unlock(&rdtgroup_mutex); 4092 } 4093 4094 /* 4095 * rdtgroup_init - rdtgroup initialization 4096 * 4097 * Setup resctrl file system including set up root, create mount point, 4098 * register rdtgroup filesystem, and initialize files under root directory. 4099 * 4100 * Return: 0 on success or -errno 4101 */ 4102 int __init rdtgroup_init(void) 4103 { 4104 int ret = 0; 4105 4106 seq_buf_init(&last_cmd_status, last_cmd_status_buf, 4107 sizeof(last_cmd_status_buf)); 4108 4109 rdtgroup_setup_default(); 4110 4111 ret = sysfs_create_mount_point(fs_kobj, "resctrl"); 4112 if (ret) 4113 return ret; 4114 4115 ret = register_filesystem(&rdt_fs_type); 4116 if (ret) 4117 goto cleanup_mountpoint; 4118 4119 /* 4120 * Adding the resctrl debugfs directory here may not be ideal since 4121 * it would let the resctrl debugfs directory appear on the debugfs 4122 * filesystem before the resctrl filesystem is mounted. 4123 * It may also be ok since that would enable debugging of RDT before 4124 * resctrl is mounted. 4125 * The reason why the debugfs directory is created here and not in 4126 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and 4127 * during the debugfs directory creation also &sb->s_type->i_mutex_key 4128 * (the lockdep class of inode->i_rwsem). Other filesystem 4129 * interactions (eg. SyS_getdents) have the lock ordering: 4130 * &sb->s_type->i_mutex_key --> &mm->mmap_lock 4131 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex 4132 * is taken, thus creating dependency: 4133 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause 4134 * issues considering the other two lock dependencies. 4135 * By creating the debugfs directory here we avoid a dependency 4136 * that may cause deadlock (even though file operations cannot 4137 * occur until the filesystem is mounted, but I do not know how to 4138 * tell lockdep that). 4139 */ 4140 debugfs_resctrl = debugfs_create_dir("resctrl", NULL); 4141 4142 return 0; 4143 4144 cleanup_mountpoint: 4145 sysfs_remove_mount_point(fs_kobj, "resctrl"); 4146 4147 return ret; 4148 } 4149 4150 void __exit rdtgroup_exit(void) 4151 { 4152 debugfs_remove_recursive(debugfs_resctrl); 4153 unregister_filesystem(&rdt_fs_type); 4154 sysfs_remove_mount_point(fs_kobj, "resctrl"); 4155 } 4156