1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (C) 2025 Arm Ltd. 3 4 #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ 5 6 #include <linux/arm_mpam.h> 7 #include <linux/cacheinfo.h> 8 #include <linux/cpu.h> 9 #include <linux/cpumask.h> 10 #include <linux/errno.h> 11 #include <linux/limits.h> 12 #include <linux/list.h> 13 #include <linux/math.h> 14 #include <linux/printk.h> 15 #include <linux/rculist.h> 16 #include <linux/resctrl.h> 17 #include <linux/slab.h> 18 #include <linux/types.h> 19 #include <linux/wait.h> 20 21 #include <asm/mpam.h> 22 23 #include "mpam_internal.h" 24 25 static DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters); 26 27 /* 28 * The classes we've picked to map to resctrl resources, wrapped 29 * in with their resctrl structure. 30 * Class pointer may be NULL. 31 */ 32 static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES]; 33 34 #define for_each_mpam_resctrl_control(res, rid) \ 35 for (rid = 0, res = &mpam_resctrl_controls[rid]; \ 36 rid < RDT_NUM_RESOURCES; \ 37 rid++, res = &mpam_resctrl_controls[rid]) 38 39 /* 40 * The classes we've picked to map to resctrl events. 41 * Resctrl believes all the worlds a Xeon, and these are all on the L3. This 42 * array lets us find the actual class backing the event counters. e.g. 43 * the only memory bandwidth counters may be on the memory controller, but to 44 * make use of them, we pretend they are on L3. Restrict the events considered 45 * to those supported by MPAM. 46 * Class pointer may be NULL. 47 */ 48 #define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID 49 static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1]; 50 51 #define for_each_mpam_resctrl_mon(mon, eventid) \ 52 for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid]; \ 53 eventid <= MPAM_MAX_EVENT; \ 54 eventid++, mon = &mpam_resctrl_counters[eventid]) 55 56 /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */ 57 static DEFINE_MUTEX(domain_list_lock); 58 59 /* 60 * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1. 61 * This applies globally to all traffic the CPU generates. 62 */ 63 static bool cdp_enabled; 64 65 /* 66 * We use cacheinfo to discover the size of the caches and their id. cacheinfo 67 * populates this from a device_initcall(). mpam_resctrl_setup() must wait. 68 */ 69 static bool cacheinfo_ready; 70 static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready); 71 72 /* 73 * If resctrl_init() succeeded, resctrl_exit() can be used to remove support 74 * for the filesystem in the event of an error. 75 */ 76 static bool resctrl_enabled; 77 78 bool resctrl_arch_alloc_capable(void) 79 { 80 struct mpam_resctrl_res *res; 81 enum resctrl_res_level rid; 82 83 for_each_mpam_resctrl_control(res, rid) { 84 if (res->resctrl_res.alloc_capable) 85 return true; 86 } 87 88 return false; 89 } 90 91 bool resctrl_arch_mon_capable(void) 92 { 93 struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 94 struct rdt_resource *l3 = &res->resctrl_res; 95 96 /* All monitors are presented as being on the L3 cache */ 97 return l3->mon_capable; 98 } 99 100 bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) 101 { 102 return false; 103 } 104 105 void resctrl_arch_mon_event_config_read(void *info) 106 { 107 } 108 109 void resctrl_arch_mon_event_config_write(void *info) 110 { 111 } 112 113 void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d) 114 { 115 } 116 117 void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d, 118 u32 closid, u32 rmid, enum resctrl_event_id eventid) 119 { 120 } 121 122 void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, 123 u32 closid, u32 rmid, int cntr_id, 124 enum resctrl_event_id eventid) 125 { 126 } 127 128 void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, 129 enum resctrl_event_id evtid, u32 rmid, u32 closid, 130 u32 cntr_id, bool assign) 131 { 132 } 133 134 int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d, 135 u32 unused, u32 rmid, int cntr_id, 136 enum resctrl_event_id eventid, u64 *val) 137 { 138 return -EOPNOTSUPP; 139 } 140 141 bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r) 142 { 143 return false; 144 } 145 146 int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable) 147 { 148 return -EINVAL; 149 } 150 151 int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable) 152 { 153 return -EOPNOTSUPP; 154 } 155 156 bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r) 157 { 158 return false; 159 } 160 161 void resctrl_arch_pre_mount(void) 162 { 163 } 164 165 bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid) 166 { 167 return mpam_resctrl_controls[rid].cdp_enabled; 168 } 169 170 /** 171 * resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks. 172 * 173 * At boot, all existing tasks use partid zero for D and I. 174 * To enable/disable CDP emulation, all these tasks need relabelling. 175 */ 176 static void resctrl_reset_task_closids(void) 177 { 178 struct task_struct *p, *t; 179 180 read_lock(&tasklist_lock); 181 for_each_process_thread(p, t) { 182 resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID, 183 RESCTRL_RESERVED_RMID); 184 } 185 read_unlock(&tasklist_lock); 186 } 187 188 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable) 189 { 190 u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID; 191 struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 192 struct rdt_resource *l3 = &res->resctrl_res; 193 int cpu; 194 195 if (!IS_ENABLED(CONFIG_EXPERT) && enable) { 196 /* 197 * If the resctrl fs is mounted more than once, sequentially, 198 * then CDP can lead to the use of out of range PARTIDs. 199 */ 200 pr_warn("CDP not supported\n"); 201 return -EOPNOTSUPP; 202 } 203 204 if (enable) 205 pr_warn("CDP is an expert feature and may cause MPAM to malfunction.\n"); 206 207 /* 208 * resctrl_arch_set_cdp_enabled() is only called with enable set to 209 * false on error and unmount. 210 */ 211 cdp_enabled = enable; 212 mpam_resctrl_controls[rid].cdp_enabled = enable; 213 214 if (enable) 215 l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx() / 2; 216 else 217 l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx(); 218 219 /* The mbw_max feature can't hide cdp as it's a per-partid maximum. */ 220 if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled) 221 mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false; 222 223 /* 224 * If resctrl has attempted to enable CDP on MBA, re-enable MBA as two 225 * configurations will be provided so there is no aliasing problem. 226 */ 227 if (mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled && 228 mpam_resctrl_controls[RDT_RESOURCE_MBA].class) 229 mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true; 230 231 /* On unmount when CDP is disabled, re-enable MBA */ 232 if (!cdp_enabled && mpam_resctrl_controls[RDT_RESOURCE_MBA].class) 233 mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true; 234 235 if (enable) { 236 if (mpam_partid_max < 1) 237 return -EINVAL; 238 239 partid_d = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_DATA); 240 partid_i = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_CODE); 241 } 242 243 mpam_set_task_partid_pmg(current, partid_d, partid_i, 0, 0); 244 WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current)); 245 246 resctrl_reset_task_closids(); 247 248 for_each_possible_cpu(cpu) 249 mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0); 250 on_each_cpu(resctrl_arch_sync_cpu_closid_rmid, NULL, 1); 251 252 return 0; 253 } 254 255 static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid) 256 { 257 return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid); 258 } 259 260 /* 261 * MSC may raise an error interrupt if it sees an out or range partid/pmg, 262 * and go on to truncate the value. Regardless of what the hardware supports, 263 * only the system wide safe value is safe to use. 264 */ 265 u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) 266 { 267 return mpam_partid_max + 1; 268 } 269 270 u32 resctrl_arch_system_num_rmid_idx(void) 271 { 272 return (mpam_pmg_max + 1) * (mpam_partid_max + 1); 273 } 274 275 u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid) 276 { 277 return closid * (mpam_pmg_max + 1) + rmid; 278 } 279 280 void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid) 281 { 282 *closid = idx / (mpam_pmg_max + 1); 283 *rmid = idx % (mpam_pmg_max + 1); 284 } 285 286 void resctrl_arch_sched_in(struct task_struct *tsk) 287 { 288 lockdep_assert_preemption_disabled(); 289 290 mpam_thread_switch(tsk); 291 } 292 293 void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid) 294 { 295 WARN_ON_ONCE(closid > U16_MAX); 296 WARN_ON_ONCE(rmid > U8_MAX); 297 298 if (!cdp_enabled) { 299 mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid); 300 } else { 301 /* 302 * When CDP is enabled, resctrl halves the closid range and we 303 * use odd/even partid for one closid. 304 */ 305 u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); 306 u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); 307 308 mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid); 309 } 310 } 311 312 void resctrl_arch_sync_cpu_closid_rmid(void *info) 313 { 314 struct resctrl_cpu_defaults *r = info; 315 316 lockdep_assert_preemption_disabled(); 317 318 if (r) { 319 resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(), 320 r->closid, r->rmid); 321 } 322 323 resctrl_arch_sched_in(current); 324 } 325 326 void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid) 327 { 328 WARN_ON_ONCE(closid > U16_MAX); 329 WARN_ON_ONCE(rmid > U8_MAX); 330 331 if (!cdp_enabled) { 332 mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid); 333 } else { 334 u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); 335 u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); 336 337 mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid); 338 } 339 } 340 341 bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid) 342 { 343 u64 regval = mpam_get_regval(tsk); 344 u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval); 345 346 if (cdp_enabled) 347 tsk_closid >>= 1; 348 349 return tsk_closid == closid; 350 } 351 352 /* The task's pmg is not unique, the partid must be considered too */ 353 bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid) 354 { 355 u64 regval = mpam_get_regval(tsk); 356 u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval); 357 u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval); 358 359 if (cdp_enabled) 360 tsk_closid >>= 1; 361 362 return (tsk_closid == closid) && (tsk_rmid == rmid); 363 } 364 365 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) 366 { 367 if (l >= RDT_NUM_RESOURCES) 368 return NULL; 369 370 return &mpam_resctrl_controls[l].resctrl_res; 371 } 372 373 static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid) 374 { 375 struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; 376 377 if (!mpam_is_enabled()) 378 return -EINVAL; 379 380 if (!mon->class) 381 return -EINVAL; 382 383 switch (evtid) { 384 case QOS_L3_OCCUP_EVENT_ID: 385 /* With CDP, one monitor gets used for both code/data reads */ 386 return mpam_alloc_csu_mon(mon->class); 387 case QOS_L3_MBM_LOCAL_EVENT_ID: 388 case QOS_L3_MBM_TOTAL_EVENT_ID: 389 return USE_PRE_ALLOCATED; 390 default: 391 return -EOPNOTSUPP; 392 } 393 } 394 395 void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, 396 enum resctrl_event_id evtid) 397 { 398 DEFINE_WAIT(wait); 399 int *ret; 400 401 ret = kmalloc_obj(*ret); 402 if (!ret) 403 return ERR_PTR(-ENOMEM); 404 405 do { 406 prepare_to_wait(&resctrl_mon_ctx_waiters, &wait, 407 TASK_INTERRUPTIBLE); 408 *ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid); 409 if (*ret == -ENOSPC) 410 schedule(); 411 } while (*ret == -ENOSPC && !signal_pending(current)); 412 finish_wait(&resctrl_mon_ctx_waiters, &wait); 413 414 return ret; 415 } 416 417 static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid, 418 u32 mon_idx) 419 { 420 struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; 421 422 if (!mpam_is_enabled()) 423 return; 424 425 if (!mon->class) 426 return; 427 428 if (evtid == QOS_L3_OCCUP_EVENT_ID) 429 mpam_free_csu_mon(mon->class, mon_idx); 430 431 wake_up(&resctrl_mon_ctx_waiters); 432 } 433 434 void resctrl_arch_mon_ctx_free(struct rdt_resource *r, 435 enum resctrl_event_id evtid, void *arch_mon_ctx) 436 { 437 u32 mon_idx = *(u32 *)arch_mon_ctx; 438 439 kfree(arch_mon_ctx); 440 441 resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx); 442 } 443 444 static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, 445 enum mpam_device_features mon_type, 446 int mon_idx, 447 enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val) 448 { 449 struct mon_cfg cfg; 450 451 if (!mpam_is_enabled()) 452 return -EINVAL; 453 454 /* Shift closid to account for CDP */ 455 closid = resctrl_get_config_index(closid, cdp_type); 456 457 if (irqs_disabled()) { 458 /* Check if we can access this domain without an IPI */ 459 return -EIO; 460 } 461 462 cfg = (struct mon_cfg) { 463 .mon = mon_idx, 464 .match_pmg = true, 465 .partid = closid, 466 .pmg = rmid, 467 }; 468 469 return mpam_msmon_read(mon_comp, &cfg, mon_type, val); 470 } 471 472 static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, 473 enum mpam_device_features mon_type, 474 int mon_idx, u32 closid, u32 rmid, u64 *val) 475 { 476 if (cdp_enabled) { 477 u64 code_val = 0, data_val = 0; 478 int err; 479 480 err = __read_mon(mon, mon_comp, mon_type, mon_idx, 481 CDP_CODE, closid, rmid, &code_val); 482 if (err) 483 return err; 484 485 err = __read_mon(mon, mon_comp, mon_type, mon_idx, 486 CDP_DATA, closid, rmid, &data_val); 487 if (err) 488 return err; 489 490 *val += code_val + data_val; 491 return 0; 492 } 493 494 return __read_mon(mon, mon_comp, mon_type, mon_idx, 495 CDP_NONE, closid, rmid, val); 496 } 497 498 /* MBWU when not in ABMC mode (not supported), and CSU counters. */ 499 int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, 500 u32 closid, u32 rmid, enum resctrl_event_id eventid, 501 void *arch_priv, u64 *val, void *arch_mon_ctx) 502 { 503 struct mpam_resctrl_dom *l3_dom; 504 struct mpam_component *mon_comp; 505 u32 mon_idx = *(u32 *)arch_mon_ctx; 506 enum mpam_device_features mon_type; 507 struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; 508 509 resctrl_arch_rmid_read_context_check(); 510 511 if (!mpam_is_enabled()) 512 return -EINVAL; 513 514 if (eventid >= QOS_NUM_EVENTS || !mon->class) 515 return -EINVAL; 516 517 l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr); 518 mon_comp = l3_dom->mon_comp[eventid]; 519 520 if (eventid != QOS_L3_OCCUP_EVENT_ID) 521 return -EINVAL; 522 523 mon_type = mpam_feat_msmon_csu; 524 525 return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx, 526 closid, rmid, val); 527 } 528 529 /* 530 * The rmid realloc threshold should be for the smallest cache exposed to 531 * resctrl. 532 */ 533 static int update_rmid_limits(struct mpam_class *class) 534 { 535 u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx(); 536 struct mpam_props *cprops = &class->props; 537 struct cacheinfo *ci; 538 539 lockdep_assert_cpus_held(); 540 541 if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) 542 return 0; 543 544 /* 545 * Assume cache levels are the same size for all CPUs... 546 * The check just requires any online CPU and it can't go offline as we 547 * hold the cpu lock. 548 */ 549 ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level); 550 if (!ci || ci->size == 0) { 551 pr_debug("Could not read cache size for class %u\n", 552 class->level); 553 return -EINVAL; 554 } 555 556 if (!resctrl_rmid_realloc_limit || 557 ci->size < resctrl_rmid_realloc_limit) { 558 resctrl_rmid_realloc_limit = ci->size; 559 resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg; 560 } 561 562 return 0; 563 } 564 565 static bool cache_has_usable_cpor(struct mpam_class *class) 566 { 567 struct mpam_props *cprops = &class->props; 568 569 if (!mpam_has_feature(mpam_feat_cpor_part, cprops)) 570 return false; 571 572 /* resctrl uses u32 for all bitmap configurations */ 573 return class->props.cpbm_wd <= 32; 574 } 575 576 static bool mba_class_use_mbw_max(struct mpam_props *cprops) 577 { 578 return (mpam_has_feature(mpam_feat_mbw_max, cprops) && 579 cprops->bwa_wd); 580 } 581 582 static bool class_has_usable_mba(struct mpam_props *cprops) 583 { 584 return mba_class_use_mbw_max(cprops); 585 } 586 587 static bool cache_has_usable_csu(struct mpam_class *class) 588 { 589 struct mpam_props *cprops; 590 591 if (!class) 592 return false; 593 594 cprops = &class->props; 595 596 if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) 597 return false; 598 599 /* 600 * CSU counters settle on the value, so we can get away with 601 * having only one. 602 */ 603 if (!cprops->num_csu_mon) 604 return false; 605 606 return true; 607 } 608 609 /* 610 * Calculate the worst-case percentage change from each implemented step 611 * in the control. 612 */ 613 static u32 get_mba_granularity(struct mpam_props *cprops) 614 { 615 if (!mba_class_use_mbw_max(cprops)) 616 return 0; 617 618 /* 619 * bwa_wd is the number of bits implemented in the 0.xxx 620 * fixed point fraction. 1 bit is 50%, 2 is 25% etc. 621 */ 622 return DIV_ROUND_UP(MAX_MBA_BW, 1 << cprops->bwa_wd); 623 } 624 625 /* 626 * Each fixed-point hardware value architecturally represents a range 627 * of values: the full range 0% - 100% is split contiguously into 628 * (1 << cprops->bwa_wd) equal bands. 629 * 630 * Although the bwa_bwd fields have 6 bits the maximum valid value is 16 631 * as it reports the width of fields that are at most 16 bits. When 632 * fewer than 16 bits are valid the least significant bits are 633 * ignored. The implied binary point is kept between bits 15 and 16 and 634 * so the valid bits are leftmost. 635 * 636 * See ARM IHI0099B.a "MPAM system component specification", Section 9.3, 637 * "The fixed-point fractional format" for more information. 638 * 639 * Find the nearest percentage value to the upper bound of the selected band: 640 */ 641 static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops) 642 { 643 u32 val = mbw_max; 644 645 val >>= 16 - cprops->bwa_wd; 646 val += 1; 647 val *= MAX_MBA_BW; 648 val = DIV_ROUND_CLOSEST(val, 1 << cprops->bwa_wd); 649 650 return val; 651 } 652 653 /* 654 * Find the band whose upper bound is closest to the specified percentage. 655 * 656 * A round-to-nearest policy is followed here as a balanced compromise 657 * between unexpected under-commit of the resource (where the total of 658 * a set of resource allocations after conversion is less than the 659 * expected total, due to rounding of the individual converted 660 * percentages) and over-commit (where the total of the converted 661 * allocations is greater than expected). 662 */ 663 static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) 664 { 665 u32 val = pc; 666 667 val <<= cprops->bwa_wd; 668 val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW); 669 val = max(val, 1) - 1; 670 val <<= 16 - cprops->bwa_wd; 671 672 return val; 673 } 674 675 static u32 get_mba_min(struct mpam_props *cprops) 676 { 677 if (!mba_class_use_mbw_max(cprops)) { 678 WARN_ON_ONCE(1); 679 return 0; 680 } 681 682 return mbw_max_to_percent(0, cprops); 683 } 684 685 /* Find the L3 cache that has affinity with this CPU */ 686 static int find_l3_equivalent_bitmask(int cpu, cpumask_var_t tmp_cpumask) 687 { 688 u32 cache_id = get_cpu_cacheinfo_id(cpu, 3); 689 690 lockdep_assert_cpus_held(); 691 692 return mpam_get_cpumask_from_cache_id(cache_id, 3, tmp_cpumask); 693 } 694 695 /* 696 * topology_matches_l3() - Is the provided class the same shape as L3 697 * @victim: The class we'd like to pretend is L3. 698 * 699 * resctrl expects all the world's a Xeon, and all counters are on the 700 * L3. We allow some mapping counters on other classes. This requires 701 * that the CPU->domain mapping is the same kind of shape. 702 * 703 * Using cacheinfo directly would make this work even if resctrl can't 704 * use the L3 - but cacheinfo can't tell us anything about offline CPUs. 705 * Using the L3 resctrl domain list also depends on CPUs being online. 706 * Using the mpam_class we picked for L3 so we can use its domain list 707 * assumes that there are MPAM controls on the L3. 708 * Instead, this path eventually uses the mpam_get_cpumask_from_cache_id() 709 * helper which can tell us about offline CPUs ... but getting the cache_id 710 * to start with relies on at least one CPU per L3 cache being online at 711 * boot. 712 * 713 * Walk the victim component list and compare the affinity mask with the 714 * corresponding L3. The topology matches if each victim:component's affinity 715 * mask is the same as the CPU's corresponding L3's. These lists/masks are 716 * computed from firmware tables so don't change at runtime. 717 */ 718 static bool topology_matches_l3(struct mpam_class *victim) 719 { 720 int cpu, err; 721 struct mpam_component *victim_iter; 722 723 lockdep_assert_cpus_held(); 724 725 cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL; 726 if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) 727 return false; 728 729 guard(srcu)(&mpam_srcu); 730 list_for_each_entry_srcu(victim_iter, &victim->components, class_list, 731 srcu_read_lock_held(&mpam_srcu)) { 732 if (cpumask_empty(&victim_iter->affinity)) { 733 pr_debug("class %u has CPU-less component %u - can't match L3!\n", 734 victim->level, victim_iter->comp_id); 735 return false; 736 } 737 738 cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask); 739 if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) 740 return false; 741 742 cpumask_clear(tmp_cpumask); 743 err = find_l3_equivalent_bitmask(cpu, tmp_cpumask); 744 if (err) { 745 pr_debug("Failed to find L3's equivalent component to class %u component %u\n", 746 victim->level, victim_iter->comp_id); 747 return false; 748 } 749 750 /* Any differing bits in the affinity mask? */ 751 if (!cpumask_equal(tmp_cpumask, &victim_iter->affinity)) { 752 pr_debug("class %u component %u has Mismatched CPU mask with L3 equivalent\n" 753 "L3:%*pbl != victim:%*pbl\n", 754 victim->level, victim_iter->comp_id, 755 cpumask_pr_args(tmp_cpumask), 756 cpumask_pr_args(&victim_iter->affinity)); 757 758 return false; 759 } 760 } 761 762 return true; 763 } 764 765 /* 766 * Test if the traffic for a class matches that at egress from the L3. For 767 * MSC at memory controllers this is only possible if there is a single L3 768 * as otherwise the counters at the memory can include bandwidth from the 769 * non-local L3. 770 */ 771 static bool traffic_matches_l3(struct mpam_class *class) 772 { 773 int err, cpu; 774 775 lockdep_assert_cpus_held(); 776 777 if (class->type == MPAM_CLASS_CACHE && class->level == 3) 778 return true; 779 780 if (class->type == MPAM_CLASS_CACHE && class->level != 3) { 781 pr_debug("class %u is a different cache from L3\n", class->level); 782 return false; 783 } 784 785 if (class->type != MPAM_CLASS_MEMORY) { 786 pr_debug("class %u is neither of type cache or memory\n", class->level); 787 return false; 788 } 789 790 cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL; 791 if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) { 792 pr_debug("cpumask allocation failed\n"); 793 return false; 794 } 795 796 cpu = cpumask_any_and(&class->affinity, cpu_online_mask); 797 err = find_l3_equivalent_bitmask(cpu, tmp_cpumask); 798 if (err) { 799 pr_debug("Failed to find L3 downstream to cpu %d\n", cpu); 800 return false; 801 } 802 803 if (!cpumask_equal(tmp_cpumask, cpu_possible_mask)) { 804 pr_debug("There is more than one L3\n"); 805 return false; 806 } 807 808 /* Be strict; the traffic might stop in the intermediate cache. */ 809 if (get_cpu_cacheinfo_id(cpu, 4) != -1) { 810 pr_debug("L3 isn't the last level of cache\n"); 811 return false; 812 } 813 814 if (num_possible_nodes() > 1) { 815 pr_debug("There is more than one numa node\n"); 816 return false; 817 } 818 819 #ifdef CONFIG_HMEM_REPORTING 820 if (node_devices[cpu_to_node(cpu)]->cache_dev) { 821 pr_debug("There is a memory side cache\n"); 822 return false; 823 } 824 #endif 825 826 return true; 827 } 828 829 /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ 830 static void mpam_resctrl_pick_caches(void) 831 { 832 struct mpam_class *class; 833 struct mpam_resctrl_res *res; 834 835 lockdep_assert_cpus_held(); 836 837 guard(srcu)(&mpam_srcu); 838 list_for_each_entry_srcu(class, &mpam_classes, classes_list, 839 srcu_read_lock_held(&mpam_srcu)) { 840 if (class->type != MPAM_CLASS_CACHE) { 841 pr_debug("class %u is not a cache\n", class->level); 842 continue; 843 } 844 845 if (class->level != 2 && class->level != 3) { 846 pr_debug("class %u is not L2 or L3\n", class->level); 847 continue; 848 } 849 850 if (!cache_has_usable_cpor(class)) { 851 pr_debug("class %u cache misses CPOR\n", class->level); 852 continue; 853 } 854 855 if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { 856 pr_debug("class %u has missing CPUs, mask %*pb != %*pb\n", class->level, 857 cpumask_pr_args(&class->affinity), 858 cpumask_pr_args(cpu_possible_mask)); 859 continue; 860 } 861 862 if (class->level == 2) 863 res = &mpam_resctrl_controls[RDT_RESOURCE_L2]; 864 else 865 res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 866 res->class = class; 867 } 868 } 869 870 static void mpam_resctrl_pick_mba(void) 871 { 872 struct mpam_class *class, *candidate_class = NULL; 873 struct mpam_resctrl_res *res; 874 875 lockdep_assert_cpus_held(); 876 877 guard(srcu)(&mpam_srcu); 878 list_for_each_entry_srcu(class, &mpam_classes, classes_list, 879 srcu_read_lock_held(&mpam_srcu)) { 880 struct mpam_props *cprops = &class->props; 881 882 if (class->level != 3 && class->type == MPAM_CLASS_CACHE) { 883 pr_debug("class %u is a cache but not the L3\n", class->level); 884 continue; 885 } 886 887 if (!class_has_usable_mba(cprops)) { 888 pr_debug("class %u has no bandwidth control\n", 889 class->level); 890 continue; 891 } 892 893 if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { 894 pr_debug("class %u has missing CPUs\n", class->level); 895 continue; 896 } 897 898 if (!topology_matches_l3(class)) { 899 pr_debug("class %u topology doesn't match L3\n", 900 class->level); 901 continue; 902 } 903 904 if (!traffic_matches_l3(class)) { 905 pr_debug("class %u traffic doesn't match L3 egress\n", 906 class->level); 907 continue; 908 } 909 910 /* 911 * Pick a resource to be MBA that as close as possible to 912 * the L3. mbm_total counts the bandwidth leaving the L3 913 * cache and MBA should correspond as closely as possible 914 * for proper operation of mba_sc. 915 */ 916 if (!candidate_class || class->level < candidate_class->level) 917 candidate_class = class; 918 } 919 920 if (candidate_class) { 921 pr_debug("selected class %u to back MBA\n", 922 candidate_class->level); 923 res = &mpam_resctrl_controls[RDT_RESOURCE_MBA]; 924 res->class = candidate_class; 925 } 926 } 927 928 static void counter_update_class(enum resctrl_event_id evt_id, 929 struct mpam_class *class) 930 { 931 struct mpam_class *existing_class = mpam_resctrl_counters[evt_id].class; 932 933 if (existing_class) { 934 if (class->level == 3) { 935 pr_debug("Existing class is L3 - L3 wins\n"); 936 return; 937 } 938 939 if (existing_class->level < class->level) { 940 pr_debug("Existing class is closer to L3, %u versus %u - closer is better\n", 941 existing_class->level, class->level); 942 return; 943 } 944 } 945 946 mpam_resctrl_counters[evt_id].class = class; 947 } 948 949 static void mpam_resctrl_pick_counters(void) 950 { 951 struct mpam_class *class; 952 953 lockdep_assert_cpus_held(); 954 955 guard(srcu)(&mpam_srcu); 956 list_for_each_entry_srcu(class, &mpam_classes, classes_list, 957 srcu_read_lock_held(&mpam_srcu)) { 958 /* The name of the resource is L3... */ 959 if (class->type == MPAM_CLASS_CACHE && class->level != 3) { 960 pr_debug("class %u is a cache but not the L3", class->level); 961 continue; 962 } 963 964 if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { 965 pr_debug("class %u does not cover all CPUs", 966 class->level); 967 continue; 968 } 969 970 if (cache_has_usable_csu(class)) { 971 pr_debug("class %u has usable CSU", 972 class->level); 973 974 /* CSU counters only make sense on a cache. */ 975 switch (class->type) { 976 case MPAM_CLASS_CACHE: 977 if (update_rmid_limits(class)) 978 break; 979 980 counter_update_class(QOS_L3_OCCUP_EVENT_ID, class); 981 break; 982 default: 983 break; 984 } 985 } 986 } 987 } 988 989 static int mpam_resctrl_control_init(struct mpam_resctrl_res *res) 990 { 991 struct mpam_class *class = res->class; 992 struct mpam_props *cprops = &class->props; 993 struct rdt_resource *r = &res->resctrl_res; 994 995 switch (r->rid) { 996 case RDT_RESOURCE_L2: 997 case RDT_RESOURCE_L3: 998 r->schema_fmt = RESCTRL_SCHEMA_BITMAP; 999 r->cache.arch_has_sparse_bitmasks = true; 1000 1001 r->cache.cbm_len = class->props.cpbm_wd; 1002 /* mpam_devices will reject empty bitmaps */ 1003 r->cache.min_cbm_bits = 1; 1004 1005 if (r->rid == RDT_RESOURCE_L2) { 1006 r->name = "L2"; 1007 r->ctrl_scope = RESCTRL_L2_CACHE; 1008 r->cdp_capable = true; 1009 } else { 1010 r->name = "L3"; 1011 r->ctrl_scope = RESCTRL_L3_CACHE; 1012 r->cdp_capable = true; 1013 } 1014 1015 /* 1016 * Which bits are shared with other ...things... Unknown 1017 * devices use partid-0 which uses all the bitmap fields. Until 1018 * we have configured the SMMU and GIC not to do this 'all the 1019 * bits' is the correct answer here. 1020 */ 1021 r->cache.shareable_bits = resctrl_get_default_ctrl(r); 1022 r->alloc_capable = true; 1023 break; 1024 case RDT_RESOURCE_MBA: 1025 r->schema_fmt = RESCTRL_SCHEMA_RANGE; 1026 r->ctrl_scope = RESCTRL_L3_CACHE; 1027 1028 r->membw.delay_linear = true; 1029 r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; 1030 r->membw.min_bw = get_mba_min(cprops); 1031 r->membw.max_bw = MAX_MBA_BW; 1032 r->membw.bw_gran = get_mba_granularity(cprops); 1033 1034 r->name = "MB"; 1035 r->alloc_capable = true; 1036 break; 1037 default: 1038 return -EINVAL; 1039 } 1040 1041 return 0; 1042 } 1043 1044 static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) 1045 { 1046 struct mpam_class *class = comp->class; 1047 1048 if (class->type == MPAM_CLASS_CACHE) 1049 return comp->comp_id; 1050 1051 if (topology_matches_l3(class)) { 1052 /* Use the corresponding L3 component ID as the domain ID */ 1053 int id = get_cpu_cacheinfo_id(cpu, 3); 1054 1055 /* Implies topology_matches_l3() made a mistake */ 1056 if (WARN_ON_ONCE(id == -1)) 1057 return comp->comp_id; 1058 1059 return id; 1060 } 1061 1062 /* Otherwise, expose the ID used by the firmware table code. */ 1063 return comp->comp_id; 1064 } 1065 1066 static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon, 1067 enum resctrl_event_id type) 1068 { 1069 struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 1070 struct rdt_resource *l3 = &res->resctrl_res; 1071 1072 lockdep_assert_cpus_held(); 1073 1074 /* 1075 * There also needs to be an L3 cache present. 1076 * The check just requires any online CPU and it can't go offline as we 1077 * hold the cpu lock. 1078 */ 1079 if (get_cpu_cacheinfo_id(raw_smp_processor_id(), 3) == -1) 1080 return 0; 1081 1082 /* 1083 * If there are no MPAM resources on L3, force it into existence. 1084 * topology_matches_l3() already ensures this looks like the L3. 1085 * The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init(). 1086 */ 1087 if (!res->class) { 1088 pr_warn_once("Faking L3 MSC to enable counters.\n"); 1089 res->class = mpam_resctrl_counters[type].class; 1090 } 1091 1092 /* 1093 * Called multiple times!, once per event type that has a 1094 * monitoring class. 1095 * Setting name is necessary on monitor only platforms. 1096 */ 1097 l3->name = "L3"; 1098 l3->mon_scope = RESCTRL_L3_CACHE; 1099 1100 /* 1101 * num-rmid is the upper bound for the number of monitoring groups that 1102 * can exist simultaneously, including the default monitoring group for 1103 * each control group. Hence, advertise the whole rmid_idx space even 1104 * though each control group has its own pmg/rmid space. Unfortunately, 1105 * this does mean userspace needs to know the architecture to correctly 1106 * interpret this value. 1107 */ 1108 l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx(); 1109 1110 if (resctrl_enable_mon_event(type, false, 0, NULL)) 1111 l3->mon_capable = true; 1112 1113 return 0; 1114 } 1115 1116 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, 1117 u32 closid, enum resctrl_conf_type type) 1118 { 1119 u32 partid; 1120 struct mpam_config *cfg; 1121 struct mpam_props *cprops; 1122 struct mpam_resctrl_res *res; 1123 struct mpam_resctrl_dom *dom; 1124 enum mpam_device_features configured_by; 1125 1126 lockdep_assert_cpus_held(); 1127 1128 if (!mpam_is_enabled()) 1129 return resctrl_get_default_ctrl(r); 1130 1131 res = container_of(r, struct mpam_resctrl_res, resctrl_res); 1132 dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); 1133 cprops = &res->class->props; 1134 1135 /* 1136 * When CDP is enabled, but the resource doesn't support it, 1137 * the control is cloned across both partids. 1138 * Pick one at random to read: 1139 */ 1140 if (mpam_resctrl_hide_cdp(r->rid)) 1141 type = CDP_DATA; 1142 1143 partid = resctrl_get_config_index(closid, type); 1144 cfg = &dom->ctrl_comp->cfg[partid]; 1145 1146 switch (r->rid) { 1147 case RDT_RESOURCE_L2: 1148 case RDT_RESOURCE_L3: 1149 configured_by = mpam_feat_cpor_part; 1150 break; 1151 case RDT_RESOURCE_MBA: 1152 if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { 1153 configured_by = mpam_feat_mbw_max; 1154 break; 1155 } 1156 fallthrough; 1157 default: 1158 return resctrl_get_default_ctrl(r); 1159 } 1160 1161 if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) || 1162 !mpam_has_feature(configured_by, cfg)) 1163 return resctrl_get_default_ctrl(r); 1164 1165 switch (configured_by) { 1166 case mpam_feat_cpor_part: 1167 return cfg->cpbm; 1168 case mpam_feat_mbw_max: 1169 return mbw_max_to_percent(cfg->mbw_max, cprops); 1170 default: 1171 return resctrl_get_default_ctrl(r); 1172 } 1173 } 1174 1175 int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, 1176 u32 closid, enum resctrl_conf_type t, u32 cfg_val) 1177 { 1178 int err; 1179 u32 partid; 1180 struct mpam_config cfg; 1181 struct mpam_props *cprops; 1182 struct mpam_resctrl_res *res; 1183 struct mpam_resctrl_dom *dom; 1184 1185 lockdep_assert_cpus_held(); 1186 lockdep_assert_irqs_enabled(); 1187 1188 if (!mpam_is_enabled()) 1189 return -EINVAL; 1190 1191 /* 1192 * No need to check the CPU as mpam_apply_config() doesn't care, and 1193 * resctrl_arch_update_domains() relies on this. 1194 */ 1195 res = container_of(r, struct mpam_resctrl_res, resctrl_res); 1196 dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); 1197 cprops = &res->class->props; 1198 1199 if (mpam_resctrl_hide_cdp(r->rid)) 1200 t = CDP_DATA; 1201 1202 partid = resctrl_get_config_index(closid, t); 1203 if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) { 1204 pr_debug("Not alloc capable or computed PARTID out of range\n"); 1205 return -EINVAL; 1206 } 1207 1208 /* 1209 * Copy the current config to avoid clearing other resources when the 1210 * same component is exposed multiple times through resctrl. 1211 */ 1212 cfg = dom->ctrl_comp->cfg[partid]; 1213 1214 switch (r->rid) { 1215 case RDT_RESOURCE_L2: 1216 case RDT_RESOURCE_L3: 1217 cfg.cpbm = cfg_val; 1218 mpam_set_feature(mpam_feat_cpor_part, &cfg); 1219 break; 1220 case RDT_RESOURCE_MBA: 1221 if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { 1222 cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops); 1223 mpam_set_feature(mpam_feat_mbw_max, &cfg); 1224 break; 1225 } 1226 fallthrough; 1227 default: 1228 return -EINVAL; 1229 } 1230 1231 /* 1232 * When CDP is enabled, but the resource doesn't support it, we need to 1233 * apply the same configuration to the other partid. 1234 */ 1235 if (mpam_resctrl_hide_cdp(r->rid)) { 1236 partid = resctrl_get_config_index(closid, CDP_CODE); 1237 err = mpam_apply_config(dom->ctrl_comp, partid, &cfg); 1238 if (err) 1239 return err; 1240 1241 partid = resctrl_get_config_index(closid, CDP_DATA); 1242 return mpam_apply_config(dom->ctrl_comp, partid, &cfg); 1243 } 1244 1245 return mpam_apply_config(dom->ctrl_comp, partid, &cfg); 1246 } 1247 1248 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) 1249 { 1250 int err; 1251 struct rdt_ctrl_domain *d; 1252 1253 lockdep_assert_cpus_held(); 1254 lockdep_assert_irqs_enabled(); 1255 1256 if (!mpam_is_enabled()) 1257 return -EINVAL; 1258 1259 list_for_each_entry_rcu(d, &r->ctrl_domains, hdr.list) { 1260 for (enum resctrl_conf_type t = 0; t < CDP_NUM_TYPES; t++) { 1261 struct resctrl_staged_config *cfg = &d->staged_config[t]; 1262 1263 if (!cfg->have_new_ctrl) 1264 continue; 1265 1266 err = resctrl_arch_update_one(r, d, closid, t, 1267 cfg->new_ctrl); 1268 if (err) 1269 return err; 1270 } 1271 } 1272 1273 return 0; 1274 } 1275 1276 void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) 1277 { 1278 struct mpam_resctrl_res *res; 1279 1280 lockdep_assert_cpus_held(); 1281 1282 if (!mpam_is_enabled()) 1283 return; 1284 1285 res = container_of(r, struct mpam_resctrl_res, resctrl_res); 1286 mpam_reset_class_locked(res->class); 1287 } 1288 1289 static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp, 1290 enum resctrl_res_level rid, 1291 struct rdt_domain_hdr *hdr) 1292 { 1293 lockdep_assert_cpus_held(); 1294 1295 INIT_LIST_HEAD(&hdr->list); 1296 hdr->id = mpam_resctrl_pick_domain_id(cpu, comp); 1297 hdr->rid = rid; 1298 cpumask_set_cpu(cpu, &hdr->cpu_mask); 1299 } 1300 1301 static void mpam_resctrl_online_domain_hdr(unsigned int cpu, 1302 struct rdt_domain_hdr *hdr) 1303 { 1304 lockdep_assert_cpus_held(); 1305 1306 cpumask_set_cpu(cpu, &hdr->cpu_mask); 1307 } 1308 1309 /** 1310 * mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU. 1311 * @cpu: The CPU to remove from the domain. 1312 * @hdr: The domain's header. 1313 * 1314 * Removes @cpu from the header mask. If this was the last CPU in the domain, 1315 * the domain header is removed from its parent list and true is returned, 1316 * indicating the parent structure can be freed. 1317 * If there are other CPUs in the domain, returns false. 1318 */ 1319 static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu, 1320 struct rdt_domain_hdr *hdr) 1321 { 1322 lockdep_assert_held(&domain_list_lock); 1323 1324 cpumask_clear_cpu(cpu, &hdr->cpu_mask); 1325 if (cpumask_empty(&hdr->cpu_mask)) { 1326 list_del_rcu(&hdr->list); 1327 synchronize_rcu(); 1328 return true; 1329 } 1330 1331 return false; 1332 } 1333 1334 static void mpam_resctrl_domain_insert(struct list_head *list, 1335 struct rdt_domain_hdr *new) 1336 { 1337 struct rdt_domain_hdr *err; 1338 struct list_head *pos = NULL; 1339 1340 lockdep_assert_held(&domain_list_lock); 1341 1342 err = resctrl_find_domain(list, new->id, &pos); 1343 if (WARN_ON_ONCE(err)) 1344 return; 1345 1346 list_add_tail_rcu(&new->list, pos); 1347 } 1348 1349 static struct mpam_component *find_component(struct mpam_class *class, int cpu) 1350 { 1351 struct mpam_component *comp; 1352 1353 guard(srcu)(&mpam_srcu); 1354 list_for_each_entry_srcu(comp, &class->components, class_list, 1355 srcu_read_lock_held(&mpam_srcu)) { 1356 if (cpumask_test_cpu(cpu, &comp->affinity)) 1357 return comp; 1358 } 1359 1360 return NULL; 1361 } 1362 1363 static struct mpam_resctrl_dom * 1364 mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) 1365 { 1366 int err; 1367 struct mpam_resctrl_dom *dom; 1368 struct rdt_l3_mon_domain *mon_d; 1369 struct rdt_ctrl_domain *ctrl_d; 1370 struct mpam_class *class = res->class; 1371 struct mpam_component *comp_iter, *ctrl_comp; 1372 struct rdt_resource *r = &res->resctrl_res; 1373 1374 lockdep_assert_held(&domain_list_lock); 1375 1376 ctrl_comp = NULL; 1377 guard(srcu)(&mpam_srcu); 1378 list_for_each_entry_srcu(comp_iter, &class->components, class_list, 1379 srcu_read_lock_held(&mpam_srcu)) { 1380 if (cpumask_test_cpu(cpu, &comp_iter->affinity)) { 1381 ctrl_comp = comp_iter; 1382 break; 1383 } 1384 } 1385 1386 /* class has no component for this CPU */ 1387 if (WARN_ON_ONCE(!ctrl_comp)) 1388 return ERR_PTR(-EINVAL); 1389 1390 dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu)); 1391 if (!dom) 1392 return ERR_PTR(-ENOMEM); 1393 1394 if (r->alloc_capable) { 1395 dom->ctrl_comp = ctrl_comp; 1396 1397 ctrl_d = &dom->resctrl_ctrl_dom; 1398 mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr); 1399 ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN; 1400 err = resctrl_online_ctrl_domain(r, ctrl_d); 1401 if (err) 1402 goto free_domain; 1403 1404 mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr); 1405 } else { 1406 pr_debug("Skipped control domain online - no controls\n"); 1407 } 1408 1409 if (r->mon_capable) { 1410 struct mpam_component *any_mon_comp = NULL; 1411 struct mpam_resctrl_mon *mon; 1412 enum resctrl_event_id eventid; 1413 1414 /* 1415 * Even if the monitor domain is backed by a different 1416 * component, the L3 component IDs need to be used... only 1417 * there may be no ctrl_comp for the L3. 1418 * Search each event's class list for a component with 1419 * overlapping CPUs and set up the dom->mon_comp array. 1420 */ 1421 1422 for_each_mpam_resctrl_mon(mon, eventid) { 1423 struct mpam_component *mon_comp; 1424 1425 if (!mon->class) 1426 continue; // dummy resource 1427 1428 mon_comp = find_component(mon->class, cpu); 1429 dom->mon_comp[eventid] = mon_comp; 1430 if (mon_comp) 1431 any_mon_comp = mon_comp; 1432 } 1433 if (!any_mon_comp) { 1434 WARN_ON_ONCE(0); 1435 err = -EFAULT; 1436 goto offline_ctrl_domain; 1437 } 1438 1439 mon_d = &dom->resctrl_mon_dom; 1440 mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr); 1441 mon_d->hdr.type = RESCTRL_MON_DOMAIN; 1442 err = resctrl_online_mon_domain(r, &mon_d->hdr); 1443 if (err) 1444 goto offline_ctrl_domain; 1445 1446 mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr); 1447 } else { 1448 pr_debug("Skipped monitor domain online - no monitors\n"); 1449 } 1450 1451 return dom; 1452 1453 offline_ctrl_domain: 1454 if (r->alloc_capable) { 1455 mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr); 1456 resctrl_offline_ctrl_domain(r, ctrl_d); 1457 } 1458 free_domain: 1459 kfree(dom); 1460 dom = ERR_PTR(err); 1461 1462 return dom; 1463 } 1464 1465 /* 1466 * We know all the monitors are associated with the L3, even if there are no 1467 * controls and therefore no control component. Find the cache-id for the CPU 1468 * and use that to search for existing resctrl domains. 1469 * This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id 1470 * for anything that is not a cache. 1471 */ 1472 static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu) 1473 { 1474 int cache_id; 1475 struct mpam_resctrl_dom *dom; 1476 struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3]; 1477 1478 lockdep_assert_cpus_held(); 1479 1480 if (!l3->class) 1481 return NULL; 1482 cache_id = get_cpu_cacheinfo_id(cpu, 3); 1483 if (cache_id < 0) 1484 return NULL; 1485 1486 list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) { 1487 if (dom->resctrl_mon_dom.hdr.id == cache_id) 1488 return dom; 1489 } 1490 1491 return NULL; 1492 } 1493 1494 static struct mpam_resctrl_dom * 1495 mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) 1496 { 1497 struct mpam_resctrl_dom *dom; 1498 struct rdt_resource *r = &res->resctrl_res; 1499 1500 lockdep_assert_cpus_held(); 1501 1502 list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) { 1503 if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity)) 1504 return dom; 1505 } 1506 1507 if (r->rid != RDT_RESOURCE_L3) 1508 return NULL; 1509 1510 /* Search the mon domain list too - needed on monitor only platforms. */ 1511 return mpam_resctrl_get_mon_domain_from_cpu(cpu); 1512 } 1513 1514 int mpam_resctrl_online_cpu(unsigned int cpu) 1515 { 1516 struct mpam_resctrl_res *res; 1517 enum resctrl_res_level rid; 1518 1519 guard(mutex)(&domain_list_lock); 1520 for_each_mpam_resctrl_control(res, rid) { 1521 struct mpam_resctrl_dom *dom; 1522 struct rdt_resource *r = &res->resctrl_res; 1523 1524 if (!res->class) 1525 continue; // dummy_resource; 1526 1527 dom = mpam_resctrl_get_domain_from_cpu(cpu, res); 1528 if (!dom) { 1529 dom = mpam_resctrl_alloc_domain(cpu, res); 1530 if (IS_ERR(dom)) 1531 return PTR_ERR(dom); 1532 } else { 1533 if (r->alloc_capable) { 1534 struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom; 1535 1536 mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr); 1537 } 1538 if (r->mon_capable) { 1539 struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom; 1540 1541 mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr); 1542 } 1543 } 1544 } 1545 1546 resctrl_online_cpu(cpu); 1547 1548 return 0; 1549 } 1550 1551 void mpam_resctrl_offline_cpu(unsigned int cpu) 1552 { 1553 struct mpam_resctrl_res *res; 1554 enum resctrl_res_level rid; 1555 1556 resctrl_offline_cpu(cpu); 1557 1558 guard(mutex)(&domain_list_lock); 1559 for_each_mpam_resctrl_control(res, rid) { 1560 struct mpam_resctrl_dom *dom; 1561 struct rdt_l3_mon_domain *mon_d; 1562 struct rdt_ctrl_domain *ctrl_d; 1563 bool ctrl_dom_empty, mon_dom_empty; 1564 struct rdt_resource *r = &res->resctrl_res; 1565 1566 if (!res->class) 1567 continue; // dummy resource 1568 1569 dom = mpam_resctrl_get_domain_from_cpu(cpu, res); 1570 if (WARN_ON_ONCE(!dom)) 1571 continue; 1572 1573 if (r->alloc_capable) { 1574 ctrl_d = &dom->resctrl_ctrl_dom; 1575 ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr); 1576 if (ctrl_dom_empty) 1577 resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d); 1578 } else { 1579 ctrl_dom_empty = true; 1580 } 1581 1582 if (r->mon_capable) { 1583 mon_d = &dom->resctrl_mon_dom; 1584 mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr); 1585 if (mon_dom_empty) 1586 resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr); 1587 } else { 1588 mon_dom_empty = true; 1589 } 1590 1591 if (ctrl_dom_empty && mon_dom_empty) 1592 kfree(dom); 1593 } 1594 } 1595 1596 int mpam_resctrl_setup(void) 1597 { 1598 int err = 0; 1599 struct mpam_resctrl_res *res; 1600 enum resctrl_res_level rid; 1601 struct mpam_resctrl_mon *mon; 1602 enum resctrl_event_id eventid; 1603 1604 wait_event(wait_cacheinfo_ready, cacheinfo_ready); 1605 1606 cpus_read_lock(); 1607 for_each_mpam_resctrl_control(res, rid) { 1608 INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains); 1609 INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains); 1610 res->resctrl_res.rid = rid; 1611 } 1612 1613 /* Find some classes to use for controls */ 1614 mpam_resctrl_pick_caches(); 1615 mpam_resctrl_pick_mba(); 1616 1617 /* Initialise the resctrl structures from the classes */ 1618 for_each_mpam_resctrl_control(res, rid) { 1619 if (!res->class) 1620 continue; // dummy resource 1621 1622 err = mpam_resctrl_control_init(res); 1623 if (err) { 1624 pr_debug("Failed to initialise rid %u\n", rid); 1625 goto internal_error; 1626 } 1627 } 1628 1629 /* Find some classes to use for monitors */ 1630 mpam_resctrl_pick_counters(); 1631 1632 for_each_mpam_resctrl_mon(mon, eventid) { 1633 if (!mon->class) 1634 continue; // dummy resource 1635 1636 err = mpam_resctrl_monitor_init(mon, eventid); 1637 if (err) { 1638 pr_debug("Failed to initialise event %u\n", eventid); 1639 goto internal_error; 1640 } 1641 } 1642 1643 cpus_read_unlock(); 1644 1645 if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) { 1646 pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n", 1647 resctrl_arch_alloc_capable(), resctrl_arch_mon_capable()); 1648 return -EOPNOTSUPP; 1649 } 1650 1651 err = resctrl_init(); 1652 if (err) 1653 return err; 1654 1655 WRITE_ONCE(resctrl_enabled, true); 1656 1657 return 0; 1658 1659 internal_error: 1660 cpus_read_unlock(); 1661 pr_debug("Internal error %d - resctrl not supported\n", err); 1662 return err; 1663 } 1664 1665 void mpam_resctrl_exit(void) 1666 { 1667 if (!READ_ONCE(resctrl_enabled)) 1668 return; 1669 1670 WRITE_ONCE(resctrl_enabled, false); 1671 resctrl_exit(); 1672 } 1673 1674 /* 1675 * The driver is detaching an MSC from this class, if resctrl was using it, 1676 * pull on resctrl_exit(). 1677 */ 1678 void mpam_resctrl_teardown_class(struct mpam_class *class) 1679 { 1680 struct mpam_resctrl_res *res; 1681 enum resctrl_res_level rid; 1682 struct mpam_resctrl_mon *mon; 1683 enum resctrl_event_id eventid; 1684 1685 might_sleep(); 1686 1687 for_each_mpam_resctrl_control(res, rid) { 1688 if (res->class == class) { 1689 res->class = NULL; 1690 break; 1691 } 1692 } 1693 for_each_mpam_resctrl_mon(mon, eventid) { 1694 if (mon->class == class) { 1695 mon->class = NULL; 1696 break; 1697 } 1698 } 1699 } 1700 1701 static int __init __cacheinfo_ready(void) 1702 { 1703 cacheinfo_ready = true; 1704 wake_up(&wait_cacheinfo_ready); 1705 1706 return 0; 1707 } 1708 device_initcall_sync(__cacheinfo_ready); 1709 1710 #ifdef CONFIG_MPAM_KUNIT_TEST 1711 #include "test_mpam_resctrl.c" 1712 #endif 1713