1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Cache Allocation code. 5 * 6 * Copyright (C) 2016 Intel Corporation 7 * 8 * Authors: 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Tony Luck <tony.luck@intel.com> 11 * Vikas Shivappa <vikas.shivappa@intel.com> 12 * 13 * More information about RDT be found in the Intel (R) x86 Architecture 14 * Software Developer Manual June 2016, volume 3, section 17.17. 15 */ 16 17 #define pr_fmt(fmt) "resctrl: " fmt 18 19 #include <linux/cpu.h> 20 #include <linux/slab.h> 21 #include <linux/err.h> 22 #include <linux/cpuhotplug.h> 23 24 #include <asm/cpu_device_id.h> 25 #include <asm/cpuid/api.h> 26 #include <asm/msr.h> 27 #include <asm/resctrl.h> 28 #include "internal.h" 29 30 /* 31 * rdt_domain structures are kfree()d when their last CPU goes offline, 32 * and allocated when the first CPU in a new domain comes online. 33 * The rdt_resource's domain list is updated when this happens. Readers of 34 * the domain list must either take cpus_read_lock(), or rely on an RCU 35 * read-side critical section, to avoid observing concurrent modification. 36 * All writers take this mutex: 37 */ 38 static DEFINE_MUTEX(domain_list_lock); 39 40 /* 41 * The cached resctrl_pqr_state is strictly per CPU and can never be 42 * updated from a remote CPU. Functions which modify the state 43 * are called with interrupts disabled and no preemption, which 44 * is sufficient for the protection. 45 */ 46 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); 47 48 /* 49 * Global boolean for rdt_alloc which is true if any 50 * resource allocation is enabled. 51 */ 52 bool rdt_alloc_capable; 53 54 static void mba_wrmsr_intel(struct msr_param *m); 55 static void cat_wrmsr(struct msr_param *m); 56 static void mba_wrmsr_amd(struct msr_param *m); 57 58 #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) 59 #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains) 60 61 struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { 62 [RDT_RESOURCE_L3] = 63 { 64 .r_resctrl = { 65 .name = "L3", 66 .ctrl_scope = RESCTRL_L3_CACHE, 67 .mon_scope = RESCTRL_L3_CACHE, 68 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), 69 .mon_domains = mon_domain_init(RDT_RESOURCE_L3), 70 .schema_fmt = RESCTRL_SCHEMA_BITMAP, 71 }, 72 .msr_base = MSR_IA32_L3_CBM_BASE, 73 .msr_update = cat_wrmsr, 74 }, 75 [RDT_RESOURCE_L2] = 76 { 77 .r_resctrl = { 78 .name = "L2", 79 .ctrl_scope = RESCTRL_L2_CACHE, 80 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), 81 .schema_fmt = RESCTRL_SCHEMA_BITMAP, 82 }, 83 .msr_base = MSR_IA32_L2_CBM_BASE, 84 .msr_update = cat_wrmsr, 85 }, 86 [RDT_RESOURCE_MBA] = 87 { 88 .r_resctrl = { 89 .name = "MB", 90 .ctrl_scope = RESCTRL_L3_CACHE, 91 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), 92 .schema_fmt = RESCTRL_SCHEMA_RANGE, 93 }, 94 }, 95 [RDT_RESOURCE_SMBA] = 96 { 97 .r_resctrl = { 98 .name = "SMBA", 99 .ctrl_scope = RESCTRL_L3_CACHE, 100 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), 101 .schema_fmt = RESCTRL_SCHEMA_RANGE, 102 }, 103 }, 104 [RDT_RESOURCE_PERF_PKG] = 105 { 106 .r_resctrl = { 107 .name = "PERF_PKG", 108 .mon_scope = RESCTRL_PACKAGE, 109 .mon_domains = mon_domain_init(RDT_RESOURCE_PERF_PKG), 110 }, 111 }, 112 }; 113 114 /** 115 * resctrl_arch_system_num_rmid_idx - Compute number of supported RMIDs 116 * (minimum across all mon_capable resource) 117 * 118 * Return: Number of supported RMIDs at time of call. Note that mount time 119 * enumeration of resources may reduce the number. 120 */ 121 u32 resctrl_arch_system_num_rmid_idx(void) 122 { 123 u32 num_rmids = U32_MAX; 124 struct rdt_resource *r; 125 126 for_each_mon_capable_rdt_resource(r) 127 num_rmids = min(num_rmids, r->mon.num_rmid); 128 129 /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */ 130 return num_rmids == U32_MAX ? 0 : num_rmids; 131 } 132 133 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) 134 { 135 if (l >= RDT_NUM_RESOURCES) 136 return NULL; 137 138 return &rdt_resources_all[l].r_resctrl; 139 } 140 141 /* 142 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs 143 * as they do not have CPUID enumeration support for Cache allocation. 144 * The check for Vendor/Family/Model is not enough to guarantee that 145 * the MSRs won't #GP fault because only the following SKUs support 146 * CAT: 147 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz 148 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz 149 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz 150 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz 151 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz 152 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz 153 * 154 * Probe by trying to write the first of the L3 cache mask registers 155 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length 156 * is always 20 on hsw server parts. The minimum cache bitmask length 157 * allowed for HSW server is always 2 bits. Hardcode all of them. 158 */ 159 static inline void cache_alloc_hsw_probe(void) 160 { 161 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3]; 162 struct rdt_resource *r = &hw_res->r_resctrl; 163 u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0; 164 165 if (wrmsrq_safe(MSR_IA32_L3_CBM_BASE, max_cbm)) 166 return; 167 168 rdmsrq(MSR_IA32_L3_CBM_BASE, l3_cbm_0); 169 170 /* If all the bits were set in MSR, return success */ 171 if (l3_cbm_0 != max_cbm) 172 return; 173 174 hw_res->num_closid = 4; 175 r->cache.cbm_len = 20; 176 r->cache.shareable_bits = 0xc0000; 177 r->cache.min_cbm_bits = 2; 178 r->cache.arch_has_sparse_bitmasks = false; 179 r->alloc_capable = true; 180 181 rdt_alloc_capable = true; 182 } 183 184 /* 185 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values 186 * exposed to user interface and the h/w understandable delay values. 187 * 188 * The non-linear delay values have the granularity of power of two 189 * and also the h/w does not guarantee a curve for configured delay 190 * values vs. actual b/w enforced. 191 * Hence we need a mapping that is pre calibrated so the user can 192 * express the memory b/w as a percentage value. 193 */ 194 static inline bool rdt_get_mb_table(struct rdt_resource *r) 195 { 196 /* 197 * There are no Intel SKUs as of now to support non-linear delay. 198 */ 199 pr_info("MBA b/w map not implemented for cpu:%d, model:%d", 200 boot_cpu_data.x86, boot_cpu_data.x86_model); 201 202 return false; 203 } 204 205 static __init bool __get_mem_config_intel(struct rdt_resource *r) 206 { 207 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 208 union cpuid_0x10_3_eax eax; 209 union cpuid_0x10_x_edx edx; 210 u32 ebx, ecx, max_delay; 211 212 cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); 213 hw_res->num_closid = edx.split.cos_max + 1; 214 max_delay = eax.split.max_delay + 1; 215 r->membw.max_bw = MAX_MBA_BW; 216 r->membw.arch_needs_linear = true; 217 if (ecx & MBA_IS_LINEAR) { 218 r->membw.delay_linear = true; 219 r->membw.min_bw = MAX_MBA_BW - max_delay; 220 r->membw.bw_gran = MAX_MBA_BW - max_delay; 221 } else { 222 if (!rdt_get_mb_table(r)) 223 return false; 224 r->membw.arch_needs_linear = false; 225 } 226 227 if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) 228 r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; 229 else 230 r->membw.throttle_mode = THREAD_THROTTLE_MAX; 231 232 r->alloc_capable = true; 233 234 return true; 235 } 236 237 static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) 238 { 239 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 240 u32 eax, ebx, ecx, edx, subleaf; 241 242 /* 243 * Query CPUID_Fn80000020_EDX_x01 for MBA and 244 * CPUID_Fn80000020_EDX_x02 for SMBA 245 */ 246 subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1; 247 248 cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); 249 hw_res->num_closid = edx + 1; 250 r->membw.max_bw = 1 << eax; 251 252 /* AMD does not use delay */ 253 r->membw.delay_linear = false; 254 r->membw.arch_needs_linear = false; 255 256 /* 257 * AMD does not use memory delay throttle model to control 258 * the allocation like Intel does. 259 */ 260 r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; 261 r->membw.min_bw = 0; 262 r->membw.bw_gran = 1; 263 264 r->alloc_capable = true; 265 266 return true; 267 } 268 269 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) 270 { 271 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 272 union cpuid_0x10_1_eax eax; 273 union cpuid_0x10_x_ecx ecx; 274 union cpuid_0x10_x_edx edx; 275 u32 ebx, default_ctrl; 276 277 cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full); 278 hw_res->num_closid = edx.split.cos_max + 1; 279 r->cache.cbm_len = eax.split.cbm_len + 1; 280 default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; 281 r->cache.shareable_bits = ebx & default_ctrl; 282 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 283 r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; 284 r->alloc_capable = true; 285 } 286 287 static void rdt_get_cdp_config(int level) 288 { 289 /* 290 * By default, CDP is disabled. CDP can be enabled by mount parameter 291 * "cdp" during resctrl file system mount time. 292 */ 293 rdt_resources_all[level].cdp_enabled = false; 294 rdt_resources_all[level].r_resctrl.cdp_capable = true; 295 } 296 297 static void rdt_set_io_alloc_capable(struct rdt_resource *r) 298 { 299 r->cache.io_alloc_capable = true; 300 } 301 302 static void rdt_get_cdp_l3_config(void) 303 { 304 rdt_get_cdp_config(RDT_RESOURCE_L3); 305 } 306 307 static void rdt_get_cdp_l2_config(void) 308 { 309 rdt_get_cdp_config(RDT_RESOURCE_L2); 310 } 311 312 static void mba_wrmsr_amd(struct msr_param *m) 313 { 314 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); 315 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 316 unsigned int i; 317 318 for (i = m->low; i < m->high; i++) 319 wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 320 } 321 322 /* 323 * Map the memory b/w percentage value to delay values 324 * that can be written to QOS_MSRs. 325 * There are currently no SKUs which support non linear delay values. 326 */ 327 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) 328 { 329 if (r->membw.delay_linear) 330 return MAX_MBA_BW - bw; 331 332 pr_warn_once("Non Linear delay-bw map not supported but queried\n"); 333 return MAX_MBA_BW; 334 } 335 336 static void mba_wrmsr_intel(struct msr_param *m) 337 { 338 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); 339 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 340 unsigned int i; 341 342 /* Write the delay values for mba. */ 343 for (i = m->low; i < m->high; i++) 344 wrmsrq(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res)); 345 } 346 347 static void cat_wrmsr(struct msr_param *m) 348 { 349 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); 350 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 351 unsigned int i; 352 353 for (i = m->low; i < m->high; i++) 354 wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 355 } 356 357 u32 resctrl_arch_get_num_closid(struct rdt_resource *r) 358 { 359 return resctrl_to_arch_res(r)->num_closid; 360 } 361 362 void rdt_ctrl_update(void *arg) 363 { 364 struct rdt_hw_resource *hw_res; 365 struct msr_param *m = arg; 366 367 hw_res = resctrl_to_arch_res(m->res); 368 hw_res->msr_update(m); 369 } 370 371 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) 372 { 373 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 374 int i; 375 376 /* 377 * Initialize the Control MSRs to having no control. 378 * For Cache Allocation: Set all bits in cbm 379 * For Memory Allocation: Set b/w requested to 100% 380 */ 381 for (i = 0; i < hw_res->num_closid; i++, dc++) 382 *dc = resctrl_get_default_ctrl(r); 383 } 384 385 static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) 386 { 387 kfree(hw_dom->ctrl_val); 388 kfree(hw_dom); 389 } 390 391 static void l3_mon_domain_free(struct rdt_hw_l3_mon_domain *hw_dom) 392 { 393 int idx; 394 395 for_each_mbm_idx(idx) 396 kfree(hw_dom->arch_mbm_states[idx]); 397 kfree(hw_dom); 398 } 399 400 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d) 401 { 402 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); 403 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 404 struct msr_param m; 405 u32 *dc; 406 407 dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val), 408 GFP_KERNEL); 409 if (!dc) 410 return -ENOMEM; 411 412 hw_dom->ctrl_val = dc; 413 setup_default_ctrlval(r, dc); 414 415 m.res = r; 416 m.dom = d; 417 m.low = 0; 418 m.high = hw_res->num_closid; 419 hw_res->msr_update(&m); 420 return 0; 421 } 422 423 /** 424 * l3_mon_domain_mbm_alloc() - Allocate arch private storage for the MBM counters 425 * @num_rmid: The size of the MBM counter array 426 * @hw_dom: The domain that owns the allocated arrays 427 * 428 * Return: 0 for success, or -ENOMEM. 429 */ 430 static int l3_mon_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_l3_mon_domain *hw_dom) 431 { 432 size_t tsize = sizeof(*hw_dom->arch_mbm_states[0]); 433 enum resctrl_event_id eventid; 434 int idx; 435 436 for_each_mbm_event_id(eventid) { 437 if (!resctrl_is_mon_event_enabled(eventid)) 438 continue; 439 idx = MBM_STATE_IDX(eventid); 440 hw_dom->arch_mbm_states[idx] = kcalloc(num_rmid, tsize, GFP_KERNEL); 441 if (!hw_dom->arch_mbm_states[idx]) 442 goto cleanup; 443 } 444 445 return 0; 446 cleanup: 447 for_each_mbm_idx(idx) { 448 kfree(hw_dom->arch_mbm_states[idx]); 449 hw_dom->arch_mbm_states[idx] = NULL; 450 } 451 452 return -ENOMEM; 453 } 454 455 static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) 456 { 457 switch (scope) { 458 case RESCTRL_L2_CACHE: 459 case RESCTRL_L3_CACHE: 460 return get_cpu_cacheinfo_id(cpu, scope); 461 case RESCTRL_L3_NODE: 462 return cpu_to_node(cpu); 463 case RESCTRL_PACKAGE: 464 return topology_physical_package_id(cpu); 465 default: 466 break; 467 } 468 469 return -EINVAL; 470 } 471 472 static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) 473 { 474 int id = get_domain_id_from_scope(cpu, r->ctrl_scope); 475 struct rdt_hw_ctrl_domain *hw_dom; 476 struct list_head *add_pos = NULL; 477 struct rdt_domain_hdr *hdr; 478 struct rdt_ctrl_domain *d; 479 int err; 480 481 lockdep_assert_held(&domain_list_lock); 482 483 if (id < 0) { 484 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", 485 cpu, r->ctrl_scope, r->name); 486 return; 487 } 488 489 hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos); 490 if (hdr) { 491 if (!domain_header_is_valid(hdr, RESCTRL_CTRL_DOMAIN, r->rid)) 492 return; 493 d = container_of(hdr, struct rdt_ctrl_domain, hdr); 494 495 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 496 if (r->cache.arch_has_per_cpu_cfg) 497 rdt_domain_reconfigure_cdp(r); 498 return; 499 } 500 501 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); 502 if (!hw_dom) 503 return; 504 505 d = &hw_dom->d_resctrl; 506 d->hdr.id = id; 507 d->hdr.type = RESCTRL_CTRL_DOMAIN; 508 d->hdr.rid = r->rid; 509 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 510 511 rdt_domain_reconfigure_cdp(r); 512 513 if (domain_setup_ctrlval(r, d)) { 514 ctrl_domain_free(hw_dom); 515 return; 516 } 517 518 list_add_tail_rcu(&d->hdr.list, add_pos); 519 520 err = resctrl_online_ctrl_domain(r, d); 521 if (err) { 522 list_del_rcu(&d->hdr.list); 523 synchronize_rcu(); 524 ctrl_domain_free(hw_dom); 525 } 526 } 527 528 static void l3_mon_domain_setup(int cpu, int id, struct rdt_resource *r, struct list_head *add_pos) 529 { 530 struct rdt_hw_l3_mon_domain *hw_dom; 531 struct rdt_l3_mon_domain *d; 532 struct cacheinfo *ci; 533 int err; 534 535 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); 536 if (!hw_dom) 537 return; 538 539 d = &hw_dom->d_resctrl; 540 d->hdr.id = id; 541 d->hdr.type = RESCTRL_MON_DOMAIN; 542 d->hdr.rid = RDT_RESOURCE_L3; 543 ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); 544 if (!ci) { 545 pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); 546 l3_mon_domain_free(hw_dom); 547 return; 548 } 549 d->ci_id = ci->id; 550 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 551 552 arch_mon_domain_online(r, d); 553 554 if (l3_mon_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { 555 l3_mon_domain_free(hw_dom); 556 return; 557 } 558 559 list_add_tail_rcu(&d->hdr.list, add_pos); 560 561 err = resctrl_online_mon_domain(r, &d->hdr); 562 if (err) { 563 list_del_rcu(&d->hdr.list); 564 synchronize_rcu(); 565 l3_mon_domain_free(hw_dom); 566 } 567 } 568 569 static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) 570 { 571 int id = get_domain_id_from_scope(cpu, r->mon_scope); 572 struct list_head *add_pos = NULL; 573 struct rdt_domain_hdr *hdr; 574 575 lockdep_assert_held(&domain_list_lock); 576 577 if (id < 0) { 578 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", 579 cpu, r->mon_scope, r->name); 580 return; 581 } 582 583 hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos); 584 if (hdr) 585 cpumask_set_cpu(cpu, &hdr->cpu_mask); 586 587 switch (r->rid) { 588 case RDT_RESOURCE_L3: 589 /* Update the mbm_assign_mode state for the CPU if supported */ 590 if (r->mon.mbm_cntr_assignable) 591 resctrl_arch_mbm_cntr_assign_set_one(r); 592 if (!hdr) 593 l3_mon_domain_setup(cpu, id, r, add_pos); 594 break; 595 case RDT_RESOURCE_PERF_PKG: 596 if (!hdr) 597 intel_aet_mon_domain_setup(cpu, id, r, add_pos); 598 break; 599 default: 600 pr_warn_once("Unknown resource rid=%d\n", r->rid); 601 break; 602 } 603 } 604 605 static void domain_add_cpu(int cpu, struct rdt_resource *r) 606 { 607 if (r->alloc_capable) 608 domain_add_cpu_ctrl(cpu, r); 609 if (r->mon_capable) 610 domain_add_cpu_mon(cpu, r); 611 } 612 613 static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) 614 { 615 int id = get_domain_id_from_scope(cpu, r->ctrl_scope); 616 struct rdt_hw_ctrl_domain *hw_dom; 617 struct rdt_domain_hdr *hdr; 618 struct rdt_ctrl_domain *d; 619 620 lockdep_assert_held(&domain_list_lock); 621 622 if (id < 0) { 623 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", 624 cpu, r->ctrl_scope, r->name); 625 return; 626 } 627 628 hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL); 629 if (!hdr) { 630 pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n", 631 id, cpu, r->name); 632 return; 633 } 634 635 cpumask_clear_cpu(cpu, &hdr->cpu_mask); 636 if (!cpumask_empty(&hdr->cpu_mask)) 637 return; 638 639 if (!domain_header_is_valid(hdr, RESCTRL_CTRL_DOMAIN, r->rid)) 640 return; 641 642 d = container_of(hdr, struct rdt_ctrl_domain, hdr); 643 hw_dom = resctrl_to_arch_ctrl_dom(d); 644 645 resctrl_offline_ctrl_domain(r, d); 646 list_del_rcu(&hdr->list); 647 synchronize_rcu(); 648 649 /* 650 * rdt_ctrl_domain "d" is going to be freed below, so clear 651 * its pointer from pseudo_lock_region struct. 652 */ 653 if (d->plr) 654 d->plr->d = NULL; 655 ctrl_domain_free(hw_dom); 656 } 657 658 static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) 659 { 660 int id = get_domain_id_from_scope(cpu, r->mon_scope); 661 struct rdt_domain_hdr *hdr; 662 663 lockdep_assert_held(&domain_list_lock); 664 665 if (id < 0) { 666 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", 667 cpu, r->mon_scope, r->name); 668 return; 669 } 670 671 hdr = resctrl_find_domain(&r->mon_domains, id, NULL); 672 if (!hdr) { 673 pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n", 674 id, cpu, r->name); 675 return; 676 } 677 678 cpumask_clear_cpu(cpu, &hdr->cpu_mask); 679 if (!cpumask_empty(&hdr->cpu_mask)) 680 return; 681 682 switch (r->rid) { 683 case RDT_RESOURCE_L3: { 684 struct rdt_hw_l3_mon_domain *hw_dom; 685 struct rdt_l3_mon_domain *d; 686 687 if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) 688 return; 689 690 d = container_of(hdr, struct rdt_l3_mon_domain, hdr); 691 hw_dom = resctrl_to_arch_mon_dom(d); 692 resctrl_offline_mon_domain(r, hdr); 693 list_del_rcu(&hdr->list); 694 synchronize_rcu(); 695 l3_mon_domain_free(hw_dom); 696 break; 697 } 698 case RDT_RESOURCE_PERF_PKG: { 699 struct rdt_perf_pkg_mon_domain *pkgd; 700 701 if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_PERF_PKG)) 702 return; 703 704 pkgd = container_of(hdr, struct rdt_perf_pkg_mon_domain, hdr); 705 resctrl_offline_mon_domain(r, hdr); 706 list_del_rcu(&hdr->list); 707 synchronize_rcu(); 708 kfree(pkgd); 709 break; 710 } 711 default: 712 pr_warn_once("Unknown resource rid=%d\n", r->rid); 713 break; 714 } 715 } 716 717 static void domain_remove_cpu(int cpu, struct rdt_resource *r) 718 { 719 if (r->alloc_capable) 720 domain_remove_cpu_ctrl(cpu, r); 721 if (r->mon_capable) 722 domain_remove_cpu_mon(cpu, r); 723 } 724 725 static void clear_closid_rmid(int cpu) 726 { 727 struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); 728 729 state->default_closid = RESCTRL_RESERVED_CLOSID; 730 state->default_rmid = RESCTRL_RESERVED_RMID; 731 state->cur_closid = RESCTRL_RESERVED_CLOSID; 732 state->cur_rmid = RESCTRL_RESERVED_RMID; 733 wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID, 734 RESCTRL_RESERVED_CLOSID); 735 } 736 737 static int resctrl_arch_online_cpu(unsigned int cpu) 738 { 739 struct rdt_resource *r; 740 741 mutex_lock(&domain_list_lock); 742 for_each_capable_rdt_resource(r) 743 domain_add_cpu(cpu, r); 744 mutex_unlock(&domain_list_lock); 745 746 clear_closid_rmid(cpu); 747 resctrl_online_cpu(cpu); 748 749 return 0; 750 } 751 752 static int resctrl_arch_offline_cpu(unsigned int cpu) 753 { 754 struct rdt_resource *r; 755 756 resctrl_offline_cpu(cpu); 757 758 mutex_lock(&domain_list_lock); 759 for_each_capable_rdt_resource(r) 760 domain_remove_cpu(cpu, r); 761 mutex_unlock(&domain_list_lock); 762 763 clear_closid_rmid(cpu); 764 765 return 0; 766 } 767 768 void resctrl_arch_pre_mount(void) 769 { 770 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl; 771 int cpu; 772 773 if (!intel_aet_get_events()) 774 return; 775 776 /* 777 * Late discovery of telemetry events means the domains for the 778 * resource were not built. Do that now. 779 */ 780 cpus_read_lock(); 781 mutex_lock(&domain_list_lock); 782 r->mon_capable = true; 783 rdt_mon_capable = true; 784 for_each_online_cpu(cpu) 785 domain_add_cpu_mon(cpu, r); 786 mutex_unlock(&domain_list_lock); 787 cpus_read_unlock(); 788 } 789 790 enum { 791 RDT_FLAG_CMT, 792 RDT_FLAG_MBM_TOTAL, 793 RDT_FLAG_MBM_LOCAL, 794 RDT_FLAG_L3_CAT, 795 RDT_FLAG_L3_CDP, 796 RDT_FLAG_L2_CAT, 797 RDT_FLAG_L2_CDP, 798 RDT_FLAG_MBA, 799 RDT_FLAG_SMBA, 800 RDT_FLAG_BMEC, 801 RDT_FLAG_ABMC, 802 RDT_FLAG_SDCIAE, 803 }; 804 805 #define RDT_OPT(idx, n, f) \ 806 [idx] = { \ 807 .name = n, \ 808 .flag = f \ 809 } 810 811 struct rdt_options { 812 char *name; 813 int flag; 814 bool force_off, force_on; 815 }; 816 817 static struct rdt_options rdt_options[] __ro_after_init = { 818 RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC), 819 RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL), 820 RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL), 821 RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), 822 RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), 823 RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), 824 RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2), 825 RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), 826 RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), 827 RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), 828 RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC), 829 RDT_OPT(RDT_FLAG_SDCIAE, "sdciae", X86_FEATURE_SDCIAE), 830 }; 831 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) 832 833 static int __init set_rdt_options(char *str) 834 { 835 struct rdt_options *o; 836 bool force_off; 837 char *tok; 838 839 if (*str == '=') 840 str++; 841 while ((tok = strsep(&str, ",")) != NULL) { 842 force_off = *tok == '!'; 843 if (force_off) 844 tok++; 845 if (intel_handle_aet_option(force_off, tok)) 846 continue; 847 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 848 if (strcmp(tok, o->name) == 0) { 849 if (force_off) 850 o->force_off = true; 851 else 852 o->force_on = true; 853 break; 854 } 855 } 856 } 857 return 1; 858 } 859 __setup("rdt", set_rdt_options); 860 861 bool rdt_cpu_has(int flag) 862 { 863 bool ret = boot_cpu_has(flag); 864 struct rdt_options *o; 865 866 if (!ret) 867 return ret; 868 869 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 870 if (flag == o->flag) { 871 if (o->force_off) 872 ret = false; 873 if (o->force_on) 874 ret = true; 875 break; 876 } 877 } 878 return ret; 879 } 880 881 bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) 882 { 883 if (!rdt_cpu_has(X86_FEATURE_BMEC)) 884 return false; 885 886 switch (evt) { 887 case QOS_L3_MBM_TOTAL_EVENT_ID: 888 return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL); 889 case QOS_L3_MBM_LOCAL_EVENT_ID: 890 return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL); 891 default: 892 return false; 893 } 894 } 895 896 static __init bool get_mem_config(void) 897 { 898 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; 899 900 if (!rdt_cpu_has(X86_FEATURE_MBA)) 901 return false; 902 903 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 904 return __get_mem_config_intel(&hw_res->r_resctrl); 905 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || 906 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) 907 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 908 909 return false; 910 } 911 912 static __init bool get_slow_mem_config(void) 913 { 914 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA]; 915 916 if (!rdt_cpu_has(X86_FEATURE_SMBA)) 917 return false; 918 919 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 920 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 921 922 return false; 923 } 924 925 static __init bool get_rdt_alloc_resources(void) 926 { 927 struct rdt_resource *r; 928 bool ret = false; 929 930 if (rdt_alloc_capable) 931 return true; 932 933 if (!boot_cpu_has(X86_FEATURE_RDT_A)) 934 return false; 935 936 if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { 937 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 938 rdt_get_cache_alloc_cfg(1, r); 939 if (rdt_cpu_has(X86_FEATURE_CDP_L3)) 940 rdt_get_cdp_l3_config(); 941 if (rdt_cpu_has(X86_FEATURE_SDCIAE)) 942 rdt_set_io_alloc_capable(r); 943 ret = true; 944 } 945 if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { 946 /* CPUID 0x10.2 fields are same format at 0x10.1 */ 947 r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl; 948 rdt_get_cache_alloc_cfg(2, r); 949 if (rdt_cpu_has(X86_FEATURE_CDP_L2)) 950 rdt_get_cdp_l2_config(); 951 ret = true; 952 } 953 954 if (get_mem_config()) 955 ret = true; 956 957 if (get_slow_mem_config()) 958 ret = true; 959 960 return ret; 961 } 962 963 static __init bool get_rdt_mon_resources(void) 964 { 965 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 966 bool ret = false; 967 968 if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) { 969 resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false, 0, NULL); 970 ret = true; 971 } 972 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { 973 resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false, 0, NULL); 974 ret = true; 975 } 976 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { 977 resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false, 0, NULL); 978 ret = true; 979 } 980 if (rdt_cpu_has(X86_FEATURE_ABMC)) 981 ret = true; 982 983 if (!ret) 984 return false; 985 986 return !rdt_get_l3_mon_config(r); 987 } 988 989 static __init void __check_quirks_intel(void) 990 { 991 switch (boot_cpu_data.x86_vfm) { 992 case INTEL_HASWELL_X: 993 if (!rdt_options[RDT_FLAG_L3_CAT].force_off) 994 cache_alloc_hsw_probe(); 995 break; 996 case INTEL_SKYLAKE_X: 997 if (boot_cpu_data.x86_stepping <= 4) 998 set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); 999 else 1000 set_rdt_options("!l3cat"); 1001 fallthrough; 1002 case INTEL_BROADWELL_X: 1003 intel_rdt_mbm_apply_quirk(); 1004 break; 1005 } 1006 } 1007 1008 static __init void check_quirks(void) 1009 { 1010 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 1011 __check_quirks_intel(); 1012 } 1013 1014 static __init bool get_rdt_resources(void) 1015 { 1016 rdt_alloc_capable = get_rdt_alloc_resources(); 1017 rdt_mon_capable = get_rdt_mon_resources(); 1018 1019 return (rdt_mon_capable || rdt_alloc_capable); 1020 } 1021 1022 static __init void rdt_init_res_defs_intel(void) 1023 { 1024 struct rdt_hw_resource *hw_res; 1025 struct rdt_resource *r; 1026 1027 for_each_rdt_resource(r) { 1028 hw_res = resctrl_to_arch_res(r); 1029 1030 if (r->rid == RDT_RESOURCE_L3 || 1031 r->rid == RDT_RESOURCE_L2) { 1032 r->cache.arch_has_per_cpu_cfg = false; 1033 r->cache.min_cbm_bits = 1; 1034 } else if (r->rid == RDT_RESOURCE_MBA) { 1035 hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE; 1036 hw_res->msr_update = mba_wrmsr_intel; 1037 } 1038 } 1039 } 1040 1041 static __init void rdt_init_res_defs_amd(void) 1042 { 1043 struct rdt_hw_resource *hw_res; 1044 struct rdt_resource *r; 1045 1046 for_each_rdt_resource(r) { 1047 hw_res = resctrl_to_arch_res(r); 1048 1049 if (r->rid == RDT_RESOURCE_L3 || 1050 r->rid == RDT_RESOURCE_L2) { 1051 r->cache.arch_has_sparse_bitmasks = true; 1052 r->cache.arch_has_per_cpu_cfg = true; 1053 r->cache.min_cbm_bits = 0; 1054 } else if (r->rid == RDT_RESOURCE_MBA) { 1055 hw_res->msr_base = MSR_IA32_MBA_BW_BASE; 1056 hw_res->msr_update = mba_wrmsr_amd; 1057 } else if (r->rid == RDT_RESOURCE_SMBA) { 1058 hw_res->msr_base = MSR_IA32_SMBA_BW_BASE; 1059 hw_res->msr_update = mba_wrmsr_amd; 1060 } 1061 } 1062 } 1063 1064 static __init void rdt_init_res_defs(void) 1065 { 1066 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 1067 rdt_init_res_defs_intel(); 1068 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || 1069 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) 1070 rdt_init_res_defs_amd(); 1071 } 1072 1073 static enum cpuhp_state rdt_online; 1074 1075 /* Runs once on the BSP during boot. */ 1076 void resctrl_cpu_detect(struct cpuinfo_x86 *c) 1077 { 1078 if (!cpu_has(c, X86_FEATURE_CQM_LLC) && !cpu_has(c, X86_FEATURE_ABMC)) { 1079 c->x86_cache_max_rmid = -1; 1080 c->x86_cache_occ_scale = -1; 1081 c->x86_cache_mbm_width_offset = -1; 1082 return; 1083 } 1084 1085 /* will be overridden if occupancy monitoring exists */ 1086 c->x86_cache_max_rmid = cpuid_ebx(0xf); 1087 1088 if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || 1089 cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || 1090 cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL) || 1091 cpu_has(c, X86_FEATURE_ABMC)) { 1092 u32 eax, ebx, ecx, edx; 1093 1094 /* QoS sub-leaf, EAX=0Fh, ECX=1 */ 1095 cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); 1096 1097 c->x86_cache_max_rmid = ecx; 1098 c->x86_cache_occ_scale = ebx; 1099 c->x86_cache_mbm_width_offset = eax & 0xff; 1100 1101 if (!c->x86_cache_mbm_width_offset) { 1102 switch (c->x86_vendor) { 1103 case X86_VENDOR_AMD: 1104 c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; 1105 break; 1106 case X86_VENDOR_HYGON: 1107 c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_HYGON; 1108 break; 1109 default: 1110 /* Leave c->x86_cache_mbm_width_offset as 0 */ 1111 break; 1112 } 1113 } 1114 } 1115 } 1116 1117 static int __init resctrl_arch_late_init(void) 1118 { 1119 struct rdt_resource *r; 1120 int state, ret, i; 1121 1122 /* for_each_rdt_resource() requires all rid to be initialised. */ 1123 for (i = 0; i < RDT_NUM_RESOURCES; i++) 1124 rdt_resources_all[i].r_resctrl.rid = i; 1125 1126 /* 1127 * Initialize functions(or definitions) that are different 1128 * between vendors here. 1129 */ 1130 rdt_init_res_defs(); 1131 1132 check_quirks(); 1133 1134 if (!get_rdt_resources()) 1135 return -ENODEV; 1136 1137 state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, 1138 "x86/resctrl/cat:online:", 1139 resctrl_arch_online_cpu, 1140 resctrl_arch_offline_cpu); 1141 if (state < 0) 1142 return state; 1143 1144 ret = resctrl_init(); 1145 if (ret) { 1146 cpuhp_remove_state(state); 1147 return ret; 1148 } 1149 rdt_online = state; 1150 1151 for_each_alloc_capable_rdt_resource(r) 1152 pr_info("%s allocation detected\n", r->name); 1153 1154 for_each_mon_capable_rdt_resource(r) 1155 pr_info("%s monitoring detected\n", r->name); 1156 1157 return 0; 1158 } 1159 1160 late_initcall(resctrl_arch_late_init); 1161 1162 static void __exit resctrl_arch_exit(void) 1163 { 1164 intel_aet_exit(); 1165 1166 cpuhp_remove_state(rdt_online); 1167 1168 resctrl_exit(); 1169 } 1170 1171 __exitcall(resctrl_arch_exit); 1172