1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (C) 2025 Arm Ltd. 3 4 #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ 5 6 #include <linux/arm_mpam.h> 7 #include <linux/cacheinfo.h> 8 #include <linux/cpu.h> 9 #include <linux/cpumask.h> 10 #include <linux/errno.h> 11 #include <linux/limits.h> 12 #include <linux/list.h> 13 #include <linux/math.h> 14 #include <linux/printk.h> 15 #include <linux/rculist.h> 16 #include <linux/resctrl.h> 17 #include <linux/slab.h> 18 #include <linux/types.h> 19 #include <linux/wait.h> 20 21 #include <asm/mpam.h> 22 23 #include "mpam_internal.h" 24 25 DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters); 26 27 /* 28 * The classes we've picked to map to resctrl resources, wrapped 29 * in with their resctrl structure. 30 * Class pointer may be NULL. 31 */ 32 static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES]; 33 34 #define for_each_mpam_resctrl_control(res, rid) \ 35 for (rid = 0, res = &mpam_resctrl_controls[rid]; \ 36 rid < RDT_NUM_RESOURCES; \ 37 rid++, res = &mpam_resctrl_controls[rid]) 38 39 /* 40 * The classes we've picked to map to resctrl events. 41 * Resctrl believes all the worlds a Xeon, and these are all on the L3. This 42 * array lets us find the actual class backing the event counters. e.g. 43 * the only memory bandwidth counters may be on the memory controller, but to 44 * make use of them, we pretend they are on L3. Restrict the events considered 45 * to those supported by MPAM. 46 * Class pointer may be NULL. 47 */ 48 #define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID 49 static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1]; 50 51 #define for_each_mpam_resctrl_mon(mon, eventid) \ 52 for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid]; \ 53 eventid <= MPAM_MAX_EVENT; \ 54 eventid++, mon = &mpam_resctrl_counters[eventid]) 55 56 /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */ 57 static DEFINE_MUTEX(domain_list_lock); 58 59 /* 60 * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1. 61 * This applies globally to all traffic the CPU generates. 62 */ 63 static bool cdp_enabled; 64 65 /* 66 * We use cacheinfo to discover the size of the caches and their id. cacheinfo 67 * populates this from a device_initcall(). mpam_resctrl_setup() must wait. 68 */ 69 static bool cacheinfo_ready; 70 static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready); 71 72 /* 73 * If resctrl_init() succeeded, resctrl_exit() can be used to remove support 74 * for the filesystem in the event of an error. 75 */ 76 static bool resctrl_enabled; 77 78 bool resctrl_arch_alloc_capable(void) 79 { 80 struct mpam_resctrl_res *res; 81 enum resctrl_res_level rid; 82 83 for_each_mpam_resctrl_control(res, rid) { 84 if (res->resctrl_res.alloc_capable) 85 return true; 86 } 87 88 return false; 89 } 90 91 bool resctrl_arch_mon_capable(void) 92 { 93 struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 94 struct rdt_resource *l3 = &res->resctrl_res; 95 96 /* All monitors are presented as being on the L3 cache */ 97 return l3->mon_capable; 98 } 99 100 bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) 101 { 102 return false; 103 } 104 105 void resctrl_arch_mon_event_config_read(void *info) 106 { 107 } 108 109 void resctrl_arch_mon_event_config_write(void *info) 110 { 111 } 112 113 void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d) 114 { 115 } 116 117 void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d, 118 u32 closid, u32 rmid, enum resctrl_event_id eventid) 119 { 120 } 121 122 void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, 123 u32 closid, u32 rmid, int cntr_id, 124 enum resctrl_event_id eventid) 125 { 126 } 127 128 void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, 129 enum resctrl_event_id evtid, u32 rmid, u32 closid, 130 u32 cntr_id, bool assign) 131 { 132 } 133 134 int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d, 135 u32 unused, u32 rmid, int cntr_id, 136 enum resctrl_event_id eventid, u64 *val) 137 { 138 return -EOPNOTSUPP; 139 } 140 141 bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r) 142 { 143 return false; 144 } 145 146 int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable) 147 { 148 return -EINVAL; 149 } 150 151 int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable) 152 { 153 return -EOPNOTSUPP; 154 } 155 156 bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r) 157 { 158 return false; 159 } 160 161 void resctrl_arch_pre_mount(void) 162 { 163 } 164 165 bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid) 166 { 167 return mpam_resctrl_controls[rid].cdp_enabled; 168 } 169 170 /** 171 * resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks. 172 * 173 * At boot, all existing tasks use partid zero for D and I. 174 * To enable/disable CDP emulation, all these tasks need relabelling. 175 */ 176 static void resctrl_reset_task_closids(void) 177 { 178 struct task_struct *p, *t; 179 180 read_lock(&tasklist_lock); 181 for_each_process_thread(p, t) { 182 resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID, 183 RESCTRL_RESERVED_RMID); 184 } 185 read_unlock(&tasklist_lock); 186 } 187 188 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable) 189 { 190 u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID; 191 struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 192 struct rdt_resource *l3 = &res->resctrl_res; 193 int cpu; 194 195 if (!IS_ENABLED(CONFIG_EXPERT) && enable) { 196 /* 197 * If the resctrl fs is mounted more than once, sequentially, 198 * then CDP can lead to the use of out of range PARTIDs. 199 */ 200 pr_warn("CDP not supported\n"); 201 return -EOPNOTSUPP; 202 } 203 204 if (enable) 205 pr_warn("CDP is an expert feature and may cause MPAM to malfunction.\n"); 206 207 /* 208 * resctrl_arch_set_cdp_enabled() is only called with enable set to 209 * false on error and unmount. 210 */ 211 cdp_enabled = enable; 212 mpam_resctrl_controls[rid].cdp_enabled = enable; 213 214 if (enable) 215 l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx() / 2; 216 else 217 l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx(); 218 219 /* The mbw_max feature can't hide cdp as it's a per-partid maximum. */ 220 if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled) 221 mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false; 222 223 if (mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled && 224 mpam_resctrl_controls[RDT_RESOURCE_MBA].class) 225 mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true; 226 227 if (enable) { 228 if (mpam_partid_max < 1) 229 return -EINVAL; 230 231 partid_d = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_DATA); 232 partid_i = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_CODE); 233 } 234 235 mpam_set_task_partid_pmg(current, partid_d, partid_i, 0, 0); 236 WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current)); 237 238 resctrl_reset_task_closids(); 239 240 for_each_possible_cpu(cpu) 241 mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0); 242 on_each_cpu(resctrl_arch_sync_cpu_closid_rmid, NULL, 1); 243 244 return 0; 245 } 246 247 static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid) 248 { 249 return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid); 250 } 251 252 /* 253 * MSC may raise an error interrupt if it sees an out or range partid/pmg, 254 * and go on to truncate the value. Regardless of what the hardware supports, 255 * only the system wide safe value is safe to use. 256 */ 257 u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) 258 { 259 return mpam_partid_max + 1; 260 } 261 262 u32 resctrl_arch_system_num_rmid_idx(void) 263 { 264 return (mpam_pmg_max + 1) * (mpam_partid_max + 1); 265 } 266 267 u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid) 268 { 269 return closid * (mpam_pmg_max + 1) + rmid; 270 } 271 272 void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid) 273 { 274 *closid = idx / (mpam_pmg_max + 1); 275 *rmid = idx % (mpam_pmg_max + 1); 276 } 277 278 void resctrl_arch_sched_in(struct task_struct *tsk) 279 { 280 lockdep_assert_preemption_disabled(); 281 282 mpam_thread_switch(tsk); 283 } 284 285 void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid) 286 { 287 WARN_ON_ONCE(closid > U16_MAX); 288 WARN_ON_ONCE(rmid > U8_MAX); 289 290 if (!cdp_enabled) { 291 mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid); 292 } else { 293 /* 294 * When CDP is enabled, resctrl halves the closid range and we 295 * use odd/even partid for one closid. 296 */ 297 u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); 298 u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); 299 300 mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid); 301 } 302 } 303 304 void resctrl_arch_sync_cpu_closid_rmid(void *info) 305 { 306 struct resctrl_cpu_defaults *r = info; 307 308 lockdep_assert_preemption_disabled(); 309 310 if (r) { 311 resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(), 312 r->closid, r->rmid); 313 } 314 315 resctrl_arch_sched_in(current); 316 } 317 318 void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid) 319 { 320 WARN_ON_ONCE(closid > U16_MAX); 321 WARN_ON_ONCE(rmid > U8_MAX); 322 323 if (!cdp_enabled) { 324 mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid); 325 } else { 326 u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); 327 u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); 328 329 mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid); 330 } 331 } 332 333 bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid) 334 { 335 u64 regval = mpam_get_regval(tsk); 336 u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval); 337 338 if (cdp_enabled) 339 tsk_closid >>= 1; 340 341 return tsk_closid == closid; 342 } 343 344 /* The task's pmg is not unique, the partid must be considered too */ 345 bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid) 346 { 347 u64 regval = mpam_get_regval(tsk); 348 u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval); 349 u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval); 350 351 if (cdp_enabled) 352 tsk_closid >>= 1; 353 354 return (tsk_closid == closid) && (tsk_rmid == rmid); 355 } 356 357 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) 358 { 359 if (l >= RDT_NUM_RESOURCES) 360 return NULL; 361 362 return &mpam_resctrl_controls[l].resctrl_res; 363 } 364 365 static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid) 366 { 367 struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; 368 369 if (!mpam_is_enabled()) 370 return -EINVAL; 371 372 if (!mon->class) 373 return -EINVAL; 374 375 switch (evtid) { 376 case QOS_L3_OCCUP_EVENT_ID: 377 /* With CDP, one monitor gets used for both code/data reads */ 378 return mpam_alloc_csu_mon(mon->class); 379 case QOS_L3_MBM_LOCAL_EVENT_ID: 380 case QOS_L3_MBM_TOTAL_EVENT_ID: 381 return USE_PRE_ALLOCATED; 382 default: 383 return -EOPNOTSUPP; 384 } 385 } 386 387 void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, 388 enum resctrl_event_id evtid) 389 { 390 DEFINE_WAIT(wait); 391 int *ret; 392 393 ret = kmalloc_obj(*ret); 394 if (!ret) 395 return ERR_PTR(-ENOMEM); 396 397 do { 398 prepare_to_wait(&resctrl_mon_ctx_waiters, &wait, 399 TASK_INTERRUPTIBLE); 400 *ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid); 401 if (*ret == -ENOSPC) 402 schedule(); 403 } while (*ret == -ENOSPC && !signal_pending(current)); 404 finish_wait(&resctrl_mon_ctx_waiters, &wait); 405 406 return ret; 407 } 408 409 static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid, 410 u32 mon_idx) 411 { 412 struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; 413 414 if (!mpam_is_enabled()) 415 return; 416 417 if (!mon->class) 418 return; 419 420 if (evtid == QOS_L3_OCCUP_EVENT_ID) 421 mpam_free_csu_mon(mon->class, mon_idx); 422 423 wake_up(&resctrl_mon_ctx_waiters); 424 } 425 426 void resctrl_arch_mon_ctx_free(struct rdt_resource *r, 427 enum resctrl_event_id evtid, void *arch_mon_ctx) 428 { 429 u32 mon_idx = *(u32 *)arch_mon_ctx; 430 431 kfree(arch_mon_ctx); 432 433 resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx); 434 } 435 436 static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, 437 enum mpam_device_features mon_type, 438 int mon_idx, 439 enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val) 440 { 441 struct mon_cfg cfg; 442 443 if (!mpam_is_enabled()) 444 return -EINVAL; 445 446 /* Shift closid to account for CDP */ 447 closid = resctrl_get_config_index(closid, cdp_type); 448 449 if (irqs_disabled()) { 450 /* Check if we can access this domain without an IPI */ 451 return -EIO; 452 } 453 454 cfg = (struct mon_cfg) { 455 .mon = mon_idx, 456 .match_pmg = true, 457 .partid = closid, 458 .pmg = rmid, 459 }; 460 461 return mpam_msmon_read(mon_comp, &cfg, mon_type, val); 462 } 463 464 static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, 465 enum mpam_device_features mon_type, 466 int mon_idx, u32 closid, u32 rmid, u64 *val) 467 { 468 if (cdp_enabled) { 469 u64 code_val = 0, data_val = 0; 470 int err; 471 472 err = __read_mon(mon, mon_comp, mon_type, mon_idx, 473 CDP_CODE, closid, rmid, &code_val); 474 if (err) 475 return err; 476 477 err = __read_mon(mon, mon_comp, mon_type, mon_idx, 478 CDP_DATA, closid, rmid, &data_val); 479 if (err) 480 return err; 481 482 *val += code_val + data_val; 483 return 0; 484 } 485 486 return __read_mon(mon, mon_comp, mon_type, mon_idx, 487 CDP_NONE, closid, rmid, val); 488 } 489 490 /* MBWU when not in ABMC mode (not supported), and CSU counters. */ 491 int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, 492 u32 closid, u32 rmid, enum resctrl_event_id eventid, 493 void *arch_priv, u64 *val, void *arch_mon_ctx) 494 { 495 struct mpam_resctrl_dom *l3_dom; 496 struct mpam_component *mon_comp; 497 u32 mon_idx = *(u32 *)arch_mon_ctx; 498 enum mpam_device_features mon_type; 499 struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; 500 501 resctrl_arch_rmid_read_context_check(); 502 503 if (!mpam_is_enabled()) 504 return -EINVAL; 505 506 if (eventid >= QOS_NUM_EVENTS || !mon->class) 507 return -EINVAL; 508 509 l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr); 510 mon_comp = l3_dom->mon_comp[eventid]; 511 512 if (eventid != QOS_L3_OCCUP_EVENT_ID) 513 return -EINVAL; 514 515 mon_type = mpam_feat_msmon_csu; 516 517 return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx, 518 closid, rmid, val); 519 } 520 521 /* 522 * The rmid realloc threshold should be for the smallest cache exposed to 523 * resctrl. 524 */ 525 static int update_rmid_limits(struct mpam_class *class) 526 { 527 u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx(); 528 struct mpam_props *cprops = &class->props; 529 struct cacheinfo *ci; 530 531 lockdep_assert_cpus_held(); 532 533 if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) 534 return 0; 535 536 /* 537 * Assume cache levels are the same size for all CPUs... 538 * The check just requires any online CPU and it can't go offline as we 539 * hold the cpu lock. 540 */ 541 ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level); 542 if (!ci || ci->size == 0) { 543 pr_debug("Could not read cache size for class %u\n", 544 class->level); 545 return -EINVAL; 546 } 547 548 if (!resctrl_rmid_realloc_limit || 549 ci->size < resctrl_rmid_realloc_limit) { 550 resctrl_rmid_realloc_limit = ci->size; 551 resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg; 552 } 553 554 return 0; 555 } 556 557 static bool cache_has_usable_cpor(struct mpam_class *class) 558 { 559 struct mpam_props *cprops = &class->props; 560 561 if (!mpam_has_feature(mpam_feat_cpor_part, cprops)) 562 return false; 563 564 /* resctrl uses u32 for all bitmap configurations */ 565 return class->props.cpbm_wd <= 32; 566 } 567 568 static bool mba_class_use_mbw_max(struct mpam_props *cprops) 569 { 570 return (mpam_has_feature(mpam_feat_mbw_max, cprops) && 571 cprops->bwa_wd); 572 } 573 574 static bool class_has_usable_mba(struct mpam_props *cprops) 575 { 576 return mba_class_use_mbw_max(cprops); 577 } 578 579 static bool cache_has_usable_csu(struct mpam_class *class) 580 { 581 struct mpam_props *cprops; 582 583 if (!class) 584 return false; 585 586 cprops = &class->props; 587 588 if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) 589 return false; 590 591 /* 592 * CSU counters settle on the value, so we can get away with 593 * having only one. 594 */ 595 if (!cprops->num_csu_mon) 596 return false; 597 598 return true; 599 } 600 601 /* 602 * Calculate the worst-case percentage change from each implemented step 603 * in the control. 604 */ 605 static u32 get_mba_granularity(struct mpam_props *cprops) 606 { 607 if (!mba_class_use_mbw_max(cprops)) 608 return 0; 609 610 /* 611 * bwa_wd is the number of bits implemented in the 0.xxx 612 * fixed point fraction. 1 bit is 50%, 2 is 25% etc. 613 */ 614 return DIV_ROUND_UP(MAX_MBA_BW, 1 << cprops->bwa_wd); 615 } 616 617 /* 618 * Each fixed-point hardware value architecturally represents a range 619 * of values: the full range 0% - 100% is split contiguously into 620 * (1 << cprops->bwa_wd) equal bands. 621 * 622 * Although the bwa_bwd fields have 6 bits the maximum valid value is 16 623 * as it reports the width of fields that are at most 16 bits. When 624 * fewer than 16 bits are valid the least significant bits are 625 * ignored. The implied binary point is kept between bits 15 and 16 and 626 * so the valid bits are leftmost. 627 * 628 * See ARM IHI0099B.a "MPAM system component specification", Section 9.3, 629 * "The fixed-point fractional format" for more information. 630 * 631 * Find the nearest percentage value to the upper bound of the selected band: 632 */ 633 static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops) 634 { 635 u32 val = mbw_max; 636 637 val >>= 16 - cprops->bwa_wd; 638 val += 1; 639 val *= MAX_MBA_BW; 640 val = DIV_ROUND_CLOSEST(val, 1 << cprops->bwa_wd); 641 642 return val; 643 } 644 645 /* 646 * Find the band whose upper bound is closest to the specified percentage. 647 * 648 * A round-to-nearest policy is followed here as a balanced compromise 649 * between unexpected under-commit of the resource (where the total of 650 * a set of resource allocations after conversion is less than the 651 * expected total, due to rounding of the individual converted 652 * percentages) and over-commit (where the total of the converted 653 * allocations is greater than expected). 654 */ 655 static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) 656 { 657 u32 val = pc; 658 659 val <<= cprops->bwa_wd; 660 val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW); 661 val = max(val, 1) - 1; 662 val <<= 16 - cprops->bwa_wd; 663 664 return val; 665 } 666 667 static u32 get_mba_min(struct mpam_props *cprops) 668 { 669 if (!mba_class_use_mbw_max(cprops)) { 670 WARN_ON_ONCE(1); 671 return 0; 672 } 673 674 return mbw_max_to_percent(0, cprops); 675 } 676 677 /* Find the L3 cache that has affinity with this CPU */ 678 static int find_l3_equivalent_bitmask(int cpu, cpumask_var_t tmp_cpumask) 679 { 680 u32 cache_id = get_cpu_cacheinfo_id(cpu, 3); 681 682 lockdep_assert_cpus_held(); 683 684 return mpam_get_cpumask_from_cache_id(cache_id, 3, tmp_cpumask); 685 } 686 687 /* 688 * topology_matches_l3() - Is the provided class the same shape as L3 689 * @victim: The class we'd like to pretend is L3. 690 * 691 * resctrl expects all the world's a Xeon, and all counters are on the 692 * L3. We allow some mapping counters on other classes. This requires 693 * that the CPU->domain mapping is the same kind of shape. 694 * 695 * Using cacheinfo directly would make this work even if resctrl can't 696 * use the L3 - but cacheinfo can't tell us anything about offline CPUs. 697 * Using the L3 resctrl domain list also depends on CPUs being online. 698 * Using the mpam_class we picked for L3 so we can use its domain list 699 * assumes that there are MPAM controls on the L3. 700 * Instead, this path eventually uses the mpam_get_cpumask_from_cache_id() 701 * helper which can tell us about offline CPUs ... but getting the cache_id 702 * to start with relies on at least one CPU per L3 cache being online at 703 * boot. 704 * 705 * Walk the victim component list and compare the affinity mask with the 706 * corresponding L3. The topology matches if each victim:component's affinity 707 * mask is the same as the CPU's corresponding L3's. These lists/masks are 708 * computed from firmware tables so don't change at runtime. 709 */ 710 static bool topology_matches_l3(struct mpam_class *victim) 711 { 712 int cpu, err; 713 struct mpam_component *victim_iter; 714 715 lockdep_assert_cpus_held(); 716 717 cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL; 718 if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) 719 return false; 720 721 guard(srcu)(&mpam_srcu); 722 list_for_each_entry_srcu(victim_iter, &victim->components, class_list, 723 srcu_read_lock_held(&mpam_srcu)) { 724 if (cpumask_empty(&victim_iter->affinity)) { 725 pr_debug("class %u has CPU-less component %u - can't match L3!\n", 726 victim->level, victim_iter->comp_id); 727 return false; 728 } 729 730 cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask); 731 if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) 732 return false; 733 734 cpumask_clear(tmp_cpumask); 735 err = find_l3_equivalent_bitmask(cpu, tmp_cpumask); 736 if (err) { 737 pr_debug("Failed to find L3's equivalent component to class %u component %u\n", 738 victim->level, victim_iter->comp_id); 739 return false; 740 } 741 742 /* Any differing bits in the affinity mask? */ 743 if (!cpumask_equal(tmp_cpumask, &victim_iter->affinity)) { 744 pr_debug("class %u component %u has Mismatched CPU mask with L3 equivalent\n" 745 "L3:%*pbl != victim:%*pbl\n", 746 victim->level, victim_iter->comp_id, 747 cpumask_pr_args(tmp_cpumask), 748 cpumask_pr_args(&victim_iter->affinity)); 749 750 return false; 751 } 752 } 753 754 return true; 755 } 756 757 /* 758 * Test if the traffic for a class matches that at egress from the L3. For 759 * MSC at memory controllers this is only possible if there is a single L3 760 * as otherwise the counters at the memory can include bandwidth from the 761 * non-local L3. 762 */ 763 static bool traffic_matches_l3(struct mpam_class *class) 764 { 765 int err, cpu; 766 767 lockdep_assert_cpus_held(); 768 769 if (class->type == MPAM_CLASS_CACHE && class->level == 3) 770 return true; 771 772 if (class->type == MPAM_CLASS_CACHE && class->level != 3) { 773 pr_debug("class %u is a different cache from L3\n", class->level); 774 return false; 775 } 776 777 if (class->type != MPAM_CLASS_MEMORY) { 778 pr_debug("class %u is neither of type cache or memory\n", class->level); 779 return false; 780 } 781 782 cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL; 783 if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) { 784 pr_debug("cpumask allocation failed\n"); 785 return false; 786 } 787 788 cpu = cpumask_any_and(&class->affinity, cpu_online_mask); 789 err = find_l3_equivalent_bitmask(cpu, tmp_cpumask); 790 if (err) { 791 pr_debug("Failed to find L3 downstream to cpu %d\n", cpu); 792 return false; 793 } 794 795 if (!cpumask_equal(tmp_cpumask, cpu_possible_mask)) { 796 pr_debug("There is more than one L3\n"); 797 return false; 798 } 799 800 /* Be strict; the traffic might stop in the intermediate cache. */ 801 if (get_cpu_cacheinfo_id(cpu, 4) != -1) { 802 pr_debug("L3 isn't the last level of cache\n"); 803 return false; 804 } 805 806 if (num_possible_nodes() > 1) { 807 pr_debug("There is more than one numa node\n"); 808 return false; 809 } 810 811 #ifdef CONFIG_HMEM_REPORTING 812 if (node_devices[cpu_to_node(cpu)]->cache_dev) { 813 pr_debug("There is a memory side cache\n"); 814 return false; 815 } 816 #endif 817 818 return true; 819 } 820 821 /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ 822 static void mpam_resctrl_pick_caches(void) 823 { 824 struct mpam_class *class; 825 struct mpam_resctrl_res *res; 826 827 lockdep_assert_cpus_held(); 828 829 guard(srcu)(&mpam_srcu); 830 list_for_each_entry_srcu(class, &mpam_classes, classes_list, 831 srcu_read_lock_held(&mpam_srcu)) { 832 if (class->type != MPAM_CLASS_CACHE) { 833 pr_debug("class %u is not a cache\n", class->level); 834 continue; 835 } 836 837 if (class->level != 2 && class->level != 3) { 838 pr_debug("class %u is not L2 or L3\n", class->level); 839 continue; 840 } 841 842 if (!cache_has_usable_cpor(class)) { 843 pr_debug("class %u cache misses CPOR\n", class->level); 844 continue; 845 } 846 847 if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { 848 pr_debug("class %u has missing CPUs, mask %*pb != %*pb\n", class->level, 849 cpumask_pr_args(&class->affinity), 850 cpumask_pr_args(cpu_possible_mask)); 851 continue; 852 } 853 854 if (class->level == 2) 855 res = &mpam_resctrl_controls[RDT_RESOURCE_L2]; 856 else 857 res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 858 res->class = class; 859 } 860 } 861 862 static void mpam_resctrl_pick_mba(void) 863 { 864 struct mpam_class *class, *candidate_class = NULL; 865 struct mpam_resctrl_res *res; 866 867 lockdep_assert_cpus_held(); 868 869 guard(srcu)(&mpam_srcu); 870 list_for_each_entry_srcu(class, &mpam_classes, classes_list, 871 srcu_read_lock_held(&mpam_srcu)) { 872 struct mpam_props *cprops = &class->props; 873 874 if (class->level != 3 && class->type == MPAM_CLASS_CACHE) { 875 pr_debug("class %u is a cache but not the L3\n", class->level); 876 continue; 877 } 878 879 if (!class_has_usable_mba(cprops)) { 880 pr_debug("class %u has no bandwidth control\n", 881 class->level); 882 continue; 883 } 884 885 if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { 886 pr_debug("class %u has missing CPUs\n", class->level); 887 continue; 888 } 889 890 if (!topology_matches_l3(class)) { 891 pr_debug("class %u topology doesn't match L3\n", 892 class->level); 893 continue; 894 } 895 896 if (!traffic_matches_l3(class)) { 897 pr_debug("class %u traffic doesn't match L3 egress\n", 898 class->level); 899 continue; 900 } 901 902 /* 903 * Pick a resource to be MBA that as close as possible to 904 * the L3. mbm_total counts the bandwidth leaving the L3 905 * cache and MBA should correspond as closely as possible 906 * for proper operation of mba_sc. 907 */ 908 if (!candidate_class || class->level < candidate_class->level) 909 candidate_class = class; 910 } 911 912 if (candidate_class) { 913 pr_debug("selected class %u to back MBA\n", 914 candidate_class->level); 915 res = &mpam_resctrl_controls[RDT_RESOURCE_MBA]; 916 res->class = candidate_class; 917 } 918 } 919 920 static void counter_update_class(enum resctrl_event_id evt_id, 921 struct mpam_class *class) 922 { 923 struct mpam_class *existing_class = mpam_resctrl_counters[evt_id].class; 924 925 if (existing_class) { 926 if (class->level == 3) { 927 pr_debug("Existing class is L3 - L3 wins\n"); 928 return; 929 } 930 931 if (existing_class->level < class->level) { 932 pr_debug("Existing class is closer to L3, %u versus %u - closer is better\n", 933 existing_class->level, class->level); 934 return; 935 } 936 } 937 938 mpam_resctrl_counters[evt_id].class = class; 939 } 940 941 static void mpam_resctrl_pick_counters(void) 942 { 943 struct mpam_class *class; 944 945 lockdep_assert_cpus_held(); 946 947 guard(srcu)(&mpam_srcu); 948 list_for_each_entry_srcu(class, &mpam_classes, classes_list, 949 srcu_read_lock_held(&mpam_srcu)) { 950 /* The name of the resource is L3... */ 951 if (class->type == MPAM_CLASS_CACHE && class->level != 3) { 952 pr_debug("class %u is a cache but not the L3", class->level); 953 continue; 954 } 955 956 if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { 957 pr_debug("class %u does not cover all CPUs", 958 class->level); 959 continue; 960 } 961 962 if (cache_has_usable_csu(class)) { 963 pr_debug("class %u has usable CSU", 964 class->level); 965 966 /* CSU counters only make sense on a cache. */ 967 switch (class->type) { 968 case MPAM_CLASS_CACHE: 969 if (update_rmid_limits(class)) 970 break; 971 972 counter_update_class(QOS_L3_OCCUP_EVENT_ID, class); 973 break; 974 default: 975 break; 976 } 977 } 978 } 979 } 980 981 static int mpam_resctrl_control_init(struct mpam_resctrl_res *res) 982 { 983 struct mpam_class *class = res->class; 984 struct mpam_props *cprops = &class->props; 985 struct rdt_resource *r = &res->resctrl_res; 986 987 switch (r->rid) { 988 case RDT_RESOURCE_L2: 989 case RDT_RESOURCE_L3: 990 r->schema_fmt = RESCTRL_SCHEMA_BITMAP; 991 r->cache.arch_has_sparse_bitmasks = true; 992 993 r->cache.cbm_len = class->props.cpbm_wd; 994 /* mpam_devices will reject empty bitmaps */ 995 r->cache.min_cbm_bits = 1; 996 997 if (r->rid == RDT_RESOURCE_L2) { 998 r->name = "L2"; 999 r->ctrl_scope = RESCTRL_L2_CACHE; 1000 r->cdp_capable = true; 1001 } else { 1002 r->name = "L3"; 1003 r->ctrl_scope = RESCTRL_L3_CACHE; 1004 r->cdp_capable = true; 1005 } 1006 1007 /* 1008 * Which bits are shared with other ...things... Unknown 1009 * devices use partid-0 which uses all the bitmap fields. Until 1010 * we have configured the SMMU and GIC not to do this 'all the 1011 * bits' is the correct answer here. 1012 */ 1013 r->cache.shareable_bits = resctrl_get_default_ctrl(r); 1014 r->alloc_capable = true; 1015 break; 1016 case RDT_RESOURCE_MBA: 1017 r->schema_fmt = RESCTRL_SCHEMA_RANGE; 1018 r->ctrl_scope = RESCTRL_L3_CACHE; 1019 1020 r->membw.delay_linear = true; 1021 r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; 1022 r->membw.min_bw = get_mba_min(cprops); 1023 r->membw.max_bw = MAX_MBA_BW; 1024 r->membw.bw_gran = get_mba_granularity(cprops); 1025 1026 r->name = "MB"; 1027 r->alloc_capable = true; 1028 break; 1029 default: 1030 return -EINVAL; 1031 } 1032 1033 return 0; 1034 } 1035 1036 static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) 1037 { 1038 struct mpam_class *class = comp->class; 1039 1040 if (class->type == MPAM_CLASS_CACHE) 1041 return comp->comp_id; 1042 1043 if (topology_matches_l3(class)) { 1044 /* Use the corresponding L3 component ID as the domain ID */ 1045 int id = get_cpu_cacheinfo_id(cpu, 3); 1046 1047 /* Implies topology_matches_l3() made a mistake */ 1048 if (WARN_ON_ONCE(id == -1)) 1049 return comp->comp_id; 1050 1051 return id; 1052 } 1053 1054 /* Otherwise, expose the ID used by the firmware table code. */ 1055 return comp->comp_id; 1056 } 1057 1058 static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon, 1059 enum resctrl_event_id type) 1060 { 1061 struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 1062 struct rdt_resource *l3 = &res->resctrl_res; 1063 1064 lockdep_assert_cpus_held(); 1065 1066 /* 1067 * There also needs to be an L3 cache present. 1068 * The check just requires any online CPU and it can't go offline as we 1069 * hold the cpu lock. 1070 */ 1071 if (get_cpu_cacheinfo_id(raw_smp_processor_id(), 3) == -1) 1072 return 0; 1073 1074 /* 1075 * If there are no MPAM resources on L3, force it into existence. 1076 * topology_matches_l3() already ensures this looks like the L3. 1077 * The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init(). 1078 */ 1079 if (!res->class) { 1080 pr_warn_once("Faking L3 MSC to enable counters.\n"); 1081 res->class = mpam_resctrl_counters[type].class; 1082 } 1083 1084 /* 1085 * Called multiple times!, once per event type that has a 1086 * monitoring class. 1087 * Setting name is necessary on monitor only platforms. 1088 */ 1089 l3->name = "L3"; 1090 l3->mon_scope = RESCTRL_L3_CACHE; 1091 1092 /* 1093 * num-rmid is the upper bound for the number of monitoring groups that 1094 * can exist simultaneously, including the default monitoring group for 1095 * each control group. Hence, advertise the whole rmid_idx space even 1096 * though each control group has its own pmg/rmid space. Unfortunately, 1097 * this does mean userspace needs to know the architecture to correctly 1098 * interpret this value. 1099 */ 1100 l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx(); 1101 1102 if (resctrl_enable_mon_event(type, false, 0, NULL)) 1103 l3->mon_capable = true; 1104 1105 return 0; 1106 } 1107 1108 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, 1109 u32 closid, enum resctrl_conf_type type) 1110 { 1111 u32 partid; 1112 struct mpam_config *cfg; 1113 struct mpam_props *cprops; 1114 struct mpam_resctrl_res *res; 1115 struct mpam_resctrl_dom *dom; 1116 enum mpam_device_features configured_by; 1117 1118 lockdep_assert_cpus_held(); 1119 1120 if (!mpam_is_enabled()) 1121 return resctrl_get_default_ctrl(r); 1122 1123 res = container_of(r, struct mpam_resctrl_res, resctrl_res); 1124 dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); 1125 cprops = &res->class->props; 1126 1127 /* 1128 * When CDP is enabled, but the resource doesn't support it, 1129 * the control is cloned across both partids. 1130 * Pick one at random to read: 1131 */ 1132 if (mpam_resctrl_hide_cdp(r->rid)) 1133 type = CDP_DATA; 1134 1135 partid = resctrl_get_config_index(closid, type); 1136 cfg = &dom->ctrl_comp->cfg[partid]; 1137 1138 switch (r->rid) { 1139 case RDT_RESOURCE_L2: 1140 case RDT_RESOURCE_L3: 1141 configured_by = mpam_feat_cpor_part; 1142 break; 1143 case RDT_RESOURCE_MBA: 1144 if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { 1145 configured_by = mpam_feat_mbw_max; 1146 break; 1147 } 1148 fallthrough; 1149 default: 1150 return resctrl_get_default_ctrl(r); 1151 } 1152 1153 if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) || 1154 !mpam_has_feature(configured_by, cfg)) 1155 return resctrl_get_default_ctrl(r); 1156 1157 switch (configured_by) { 1158 case mpam_feat_cpor_part: 1159 return cfg->cpbm; 1160 case mpam_feat_mbw_max: 1161 return mbw_max_to_percent(cfg->mbw_max, cprops); 1162 default: 1163 return resctrl_get_default_ctrl(r); 1164 } 1165 } 1166 1167 int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, 1168 u32 closid, enum resctrl_conf_type t, u32 cfg_val) 1169 { 1170 int err; 1171 u32 partid; 1172 struct mpam_config cfg; 1173 struct mpam_props *cprops; 1174 struct mpam_resctrl_res *res; 1175 struct mpam_resctrl_dom *dom; 1176 1177 lockdep_assert_cpus_held(); 1178 lockdep_assert_irqs_enabled(); 1179 1180 if (!mpam_is_enabled()) 1181 return -EINVAL; 1182 1183 /* 1184 * No need to check the CPU as mpam_apply_config() doesn't care, and 1185 * resctrl_arch_update_domains() relies on this. 1186 */ 1187 res = container_of(r, struct mpam_resctrl_res, resctrl_res); 1188 dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); 1189 cprops = &res->class->props; 1190 1191 if (mpam_resctrl_hide_cdp(r->rid)) 1192 t = CDP_DATA; 1193 1194 partid = resctrl_get_config_index(closid, t); 1195 if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) { 1196 pr_debug("Not alloc capable or computed PARTID out of range\n"); 1197 return -EINVAL; 1198 } 1199 1200 /* 1201 * Copy the current config to avoid clearing other resources when the 1202 * same component is exposed multiple times through resctrl. 1203 */ 1204 cfg = dom->ctrl_comp->cfg[partid]; 1205 1206 switch (r->rid) { 1207 case RDT_RESOURCE_L2: 1208 case RDT_RESOURCE_L3: 1209 cfg.cpbm = cfg_val; 1210 mpam_set_feature(mpam_feat_cpor_part, &cfg); 1211 break; 1212 case RDT_RESOURCE_MBA: 1213 if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { 1214 cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops); 1215 mpam_set_feature(mpam_feat_mbw_max, &cfg); 1216 break; 1217 } 1218 fallthrough; 1219 default: 1220 return -EINVAL; 1221 } 1222 1223 /* 1224 * When CDP is enabled, but the resource doesn't support it, we need to 1225 * apply the same configuration to the other partid. 1226 */ 1227 if (mpam_resctrl_hide_cdp(r->rid)) { 1228 partid = resctrl_get_config_index(closid, CDP_CODE); 1229 err = mpam_apply_config(dom->ctrl_comp, partid, &cfg); 1230 if (err) 1231 return err; 1232 1233 partid = resctrl_get_config_index(closid, CDP_DATA); 1234 return mpam_apply_config(dom->ctrl_comp, partid, &cfg); 1235 } 1236 1237 return mpam_apply_config(dom->ctrl_comp, partid, &cfg); 1238 } 1239 1240 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) 1241 { 1242 int err; 1243 struct rdt_ctrl_domain *d; 1244 1245 lockdep_assert_cpus_held(); 1246 lockdep_assert_irqs_enabled(); 1247 1248 if (!mpam_is_enabled()) 1249 return -EINVAL; 1250 1251 list_for_each_entry_rcu(d, &r->ctrl_domains, hdr.list) { 1252 for (enum resctrl_conf_type t = 0; t < CDP_NUM_TYPES; t++) { 1253 struct resctrl_staged_config *cfg = &d->staged_config[t]; 1254 1255 if (!cfg->have_new_ctrl) 1256 continue; 1257 1258 err = resctrl_arch_update_one(r, d, closid, t, 1259 cfg->new_ctrl); 1260 if (err) 1261 return err; 1262 } 1263 } 1264 1265 return 0; 1266 } 1267 1268 void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) 1269 { 1270 struct mpam_resctrl_res *res; 1271 1272 lockdep_assert_cpus_held(); 1273 1274 if (!mpam_is_enabled()) 1275 return; 1276 1277 res = container_of(r, struct mpam_resctrl_res, resctrl_res); 1278 mpam_reset_class_locked(res->class); 1279 } 1280 1281 static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp, 1282 enum resctrl_res_level rid, 1283 struct rdt_domain_hdr *hdr) 1284 { 1285 lockdep_assert_cpus_held(); 1286 1287 INIT_LIST_HEAD(&hdr->list); 1288 hdr->id = mpam_resctrl_pick_domain_id(cpu, comp); 1289 hdr->rid = rid; 1290 cpumask_set_cpu(cpu, &hdr->cpu_mask); 1291 } 1292 1293 static void mpam_resctrl_online_domain_hdr(unsigned int cpu, 1294 struct rdt_domain_hdr *hdr) 1295 { 1296 lockdep_assert_cpus_held(); 1297 1298 cpumask_set_cpu(cpu, &hdr->cpu_mask); 1299 } 1300 1301 /** 1302 * mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU. 1303 * @cpu: The CPU to remove from the domain. 1304 * @hdr: The domain's header. 1305 * 1306 * Removes @cpu from the header mask. If this was the last CPU in the domain, 1307 * the domain header is removed from its parent list and true is returned, 1308 * indicating the parent structure can be freed. 1309 * If there are other CPUs in the domain, returns false. 1310 */ 1311 static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu, 1312 struct rdt_domain_hdr *hdr) 1313 { 1314 lockdep_assert_held(&domain_list_lock); 1315 1316 cpumask_clear_cpu(cpu, &hdr->cpu_mask); 1317 if (cpumask_empty(&hdr->cpu_mask)) { 1318 list_del_rcu(&hdr->list); 1319 synchronize_rcu(); 1320 return true; 1321 } 1322 1323 return false; 1324 } 1325 1326 static void mpam_resctrl_domain_insert(struct list_head *list, 1327 struct rdt_domain_hdr *new) 1328 { 1329 struct rdt_domain_hdr *err; 1330 struct list_head *pos = NULL; 1331 1332 lockdep_assert_held(&domain_list_lock); 1333 1334 err = resctrl_find_domain(list, new->id, &pos); 1335 if (WARN_ON_ONCE(err)) 1336 return; 1337 1338 list_add_tail_rcu(&new->list, pos); 1339 } 1340 1341 static struct mpam_component *find_component(struct mpam_class *class, int cpu) 1342 { 1343 struct mpam_component *comp; 1344 1345 guard(srcu)(&mpam_srcu); 1346 list_for_each_entry_srcu(comp, &class->components, class_list, 1347 srcu_read_lock_held(&mpam_srcu)) { 1348 if (cpumask_test_cpu(cpu, &comp->affinity)) 1349 return comp; 1350 } 1351 1352 return NULL; 1353 } 1354 1355 static struct mpam_resctrl_dom * 1356 mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) 1357 { 1358 int err; 1359 struct mpam_resctrl_dom *dom; 1360 struct rdt_l3_mon_domain *mon_d; 1361 struct rdt_ctrl_domain *ctrl_d; 1362 struct mpam_class *class = res->class; 1363 struct mpam_component *comp_iter, *ctrl_comp; 1364 struct rdt_resource *r = &res->resctrl_res; 1365 1366 lockdep_assert_held(&domain_list_lock); 1367 1368 ctrl_comp = NULL; 1369 guard(srcu)(&mpam_srcu); 1370 list_for_each_entry_srcu(comp_iter, &class->components, class_list, 1371 srcu_read_lock_held(&mpam_srcu)) { 1372 if (cpumask_test_cpu(cpu, &comp_iter->affinity)) { 1373 ctrl_comp = comp_iter; 1374 break; 1375 } 1376 } 1377 1378 /* class has no component for this CPU */ 1379 if (WARN_ON_ONCE(!ctrl_comp)) 1380 return ERR_PTR(-EINVAL); 1381 1382 dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu)); 1383 if (!dom) 1384 return ERR_PTR(-ENOMEM); 1385 1386 if (r->alloc_capable) { 1387 dom->ctrl_comp = ctrl_comp; 1388 1389 ctrl_d = &dom->resctrl_ctrl_dom; 1390 mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr); 1391 ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN; 1392 err = resctrl_online_ctrl_domain(r, ctrl_d); 1393 if (err) 1394 goto free_domain; 1395 1396 mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr); 1397 } else { 1398 pr_debug("Skipped control domain online - no controls\n"); 1399 } 1400 1401 if (r->mon_capable) { 1402 struct mpam_component *any_mon_comp; 1403 struct mpam_resctrl_mon *mon; 1404 enum resctrl_event_id eventid; 1405 1406 /* 1407 * Even if the monitor domain is backed by a different 1408 * component, the L3 component IDs need to be used... only 1409 * there may be no ctrl_comp for the L3. 1410 * Search each event's class list for a component with 1411 * overlapping CPUs and set up the dom->mon_comp array. 1412 */ 1413 1414 for_each_mpam_resctrl_mon(mon, eventid) { 1415 struct mpam_component *mon_comp; 1416 1417 if (!mon->class) 1418 continue; // dummy resource 1419 1420 mon_comp = find_component(mon->class, cpu); 1421 dom->mon_comp[eventid] = mon_comp; 1422 if (mon_comp) 1423 any_mon_comp = mon_comp; 1424 } 1425 if (!any_mon_comp) { 1426 WARN_ON_ONCE(0); 1427 err = -EFAULT; 1428 goto offline_ctrl_domain; 1429 } 1430 1431 mon_d = &dom->resctrl_mon_dom; 1432 mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr); 1433 mon_d->hdr.type = RESCTRL_MON_DOMAIN; 1434 err = resctrl_online_mon_domain(r, &mon_d->hdr); 1435 if (err) 1436 goto offline_ctrl_domain; 1437 1438 mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr); 1439 } else { 1440 pr_debug("Skipped monitor domain online - no monitors\n"); 1441 } 1442 1443 return dom; 1444 1445 offline_ctrl_domain: 1446 if (r->alloc_capable) { 1447 mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr); 1448 resctrl_offline_ctrl_domain(r, ctrl_d); 1449 } 1450 free_domain: 1451 kfree(dom); 1452 dom = ERR_PTR(err); 1453 1454 return dom; 1455 } 1456 1457 /* 1458 * We know all the monitors are associated with the L3, even if there are no 1459 * controls and therefore no control component. Find the cache-id for the CPU 1460 * and use that to search for existing resctrl domains. 1461 * This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id 1462 * for anything that is not a cache. 1463 */ 1464 static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu) 1465 { 1466 int cache_id; 1467 struct mpam_resctrl_dom *dom; 1468 struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 1469 1470 lockdep_assert_cpus_held(); 1471 1472 if (!l3->class) 1473 return NULL; 1474 cache_id = get_cpu_cacheinfo_id(cpu, 3); 1475 if (cache_id < 0) 1476 return NULL; 1477 1478 list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) { 1479 if (dom->resctrl_mon_dom.hdr.id == cache_id) 1480 return dom; 1481 } 1482 1483 return NULL; 1484 } 1485 1486 static struct mpam_resctrl_dom * 1487 mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) 1488 { 1489 struct mpam_resctrl_dom *dom; 1490 struct rdt_resource *r = &res->resctrl_res; 1491 1492 lockdep_assert_cpus_held(); 1493 1494 list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) { 1495 if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity)) 1496 return dom; 1497 } 1498 1499 if (r->rid != RDT_RESOURCE_L3) 1500 return NULL; 1501 1502 /* Search the mon domain list too - needed on monitor only platforms. */ 1503 return mpam_resctrl_get_mon_domain_from_cpu(cpu); 1504 } 1505 1506 int mpam_resctrl_online_cpu(unsigned int cpu) 1507 { 1508 struct mpam_resctrl_res *res; 1509 enum resctrl_res_level rid; 1510 1511 guard(mutex)(&domain_list_lock); 1512 for_each_mpam_resctrl_control(res, rid) { 1513 struct mpam_resctrl_dom *dom; 1514 struct rdt_resource *r = &res->resctrl_res; 1515 1516 if (!res->class) 1517 continue; // dummy_resource; 1518 1519 dom = mpam_resctrl_get_domain_from_cpu(cpu, res); 1520 if (!dom) { 1521 dom = mpam_resctrl_alloc_domain(cpu, res); 1522 if (IS_ERR(dom)) 1523 return PTR_ERR(dom); 1524 } else { 1525 if (r->alloc_capable) { 1526 struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom; 1527 1528 mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr); 1529 } 1530 if (r->mon_capable) { 1531 struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom; 1532 1533 mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr); 1534 } 1535 } 1536 } 1537 1538 resctrl_online_cpu(cpu); 1539 1540 return 0; 1541 } 1542 1543 void mpam_resctrl_offline_cpu(unsigned int cpu) 1544 { 1545 struct mpam_resctrl_res *res; 1546 enum resctrl_res_level rid; 1547 1548 resctrl_offline_cpu(cpu); 1549 1550 guard(mutex)(&domain_list_lock); 1551 for_each_mpam_resctrl_control(res, rid) { 1552 struct mpam_resctrl_dom *dom; 1553 struct rdt_l3_mon_domain *mon_d; 1554 struct rdt_ctrl_domain *ctrl_d; 1555 bool ctrl_dom_empty, mon_dom_empty; 1556 struct rdt_resource *r = &res->resctrl_res; 1557 1558 if (!res->class) 1559 continue; // dummy resource 1560 1561 dom = mpam_resctrl_get_domain_from_cpu(cpu, res); 1562 if (WARN_ON_ONCE(!dom)) 1563 continue; 1564 1565 if (r->alloc_capable) { 1566 ctrl_d = &dom->resctrl_ctrl_dom; 1567 ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr); 1568 if (ctrl_dom_empty) 1569 resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d); 1570 } else { 1571 ctrl_dom_empty = true; 1572 } 1573 1574 if (r->mon_capable) { 1575 mon_d = &dom->resctrl_mon_dom; 1576 mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr); 1577 if (mon_dom_empty) 1578 resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr); 1579 } else { 1580 mon_dom_empty = true; 1581 } 1582 1583 if (ctrl_dom_empty && mon_dom_empty) 1584 kfree(dom); 1585 } 1586 } 1587 1588 int mpam_resctrl_setup(void) 1589 { 1590 int err = 0; 1591 struct mpam_resctrl_res *res; 1592 enum resctrl_res_level rid; 1593 struct mpam_resctrl_mon *mon; 1594 enum resctrl_event_id eventid; 1595 1596 wait_event(wait_cacheinfo_ready, cacheinfo_ready); 1597 1598 cpus_read_lock(); 1599 for_each_mpam_resctrl_control(res, rid) { 1600 INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains); 1601 INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains); 1602 res->resctrl_res.rid = rid; 1603 } 1604 1605 /* Find some classes to use for controls */ 1606 mpam_resctrl_pick_caches(); 1607 mpam_resctrl_pick_mba(); 1608 1609 /* Initialise the resctrl structures from the classes */ 1610 for_each_mpam_resctrl_control(res, rid) { 1611 if (!res->class) 1612 continue; // dummy resource 1613 1614 err = mpam_resctrl_control_init(res); 1615 if (err) { 1616 pr_debug("Failed to initialise rid %u\n", rid); 1617 goto internal_error; 1618 } 1619 } 1620 1621 /* Find some classes to use for monitors */ 1622 mpam_resctrl_pick_counters(); 1623 1624 for_each_mpam_resctrl_mon(mon, eventid) { 1625 if (!mon->class) 1626 continue; // dummy resource 1627 1628 err = mpam_resctrl_monitor_init(mon, eventid); 1629 if (err) { 1630 pr_debug("Failed to initialise event %u\n", eventid); 1631 goto internal_error; 1632 } 1633 } 1634 1635 cpus_read_unlock(); 1636 1637 if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) { 1638 pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n", 1639 resctrl_arch_alloc_capable(), resctrl_arch_mon_capable()); 1640 return -EOPNOTSUPP; 1641 } 1642 1643 err = resctrl_init(); 1644 if (err) 1645 return err; 1646 1647 WRITE_ONCE(resctrl_enabled, true); 1648 1649 return 0; 1650 1651 internal_error: 1652 cpus_read_unlock(); 1653 pr_debug("Internal error %d - resctrl not supported\n", err); 1654 return err; 1655 } 1656 1657 void mpam_resctrl_exit(void) 1658 { 1659 if (!READ_ONCE(resctrl_enabled)) 1660 return; 1661 1662 WRITE_ONCE(resctrl_enabled, false); 1663 resctrl_exit(); 1664 } 1665 1666 /* 1667 * The driver is detaching an MSC from this class, if resctrl was using it, 1668 * pull on resctrl_exit(). 1669 */ 1670 void mpam_resctrl_teardown_class(struct mpam_class *class) 1671 { 1672 struct mpam_resctrl_res *res; 1673 enum resctrl_res_level rid; 1674 struct mpam_resctrl_mon *mon; 1675 enum resctrl_event_id eventid; 1676 1677 might_sleep(); 1678 1679 for_each_mpam_resctrl_control(res, rid) { 1680 if (res->class == class) { 1681 res->class = NULL; 1682 break; 1683 } 1684 } 1685 for_each_mpam_resctrl_mon(mon, eventid) { 1686 if (mon->class == class) { 1687 mon->class = NULL; 1688 break; 1689 } 1690 } 1691 } 1692 1693 static int __init __cacheinfo_ready(void) 1694 { 1695 cacheinfo_ready = true; 1696 wake_up(&wait_cacheinfo_ready); 1697 1698 return 0; 1699 } 1700 device_initcall_sync(__cacheinfo_ready); 1701 1702 #ifdef CONFIG_MPAM_KUNIT_TEST 1703 #include "test_mpam_resctrl.c" 1704 #endif 1705