1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Cache Allocation code. 5 * 6 * Copyright (C) 2016 Intel Corporation 7 * 8 * Authors: 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Tony Luck <tony.luck@intel.com> 11 * Vikas Shivappa <vikas.shivappa@intel.com> 12 * 13 * More information about RDT be found in the Intel (R) x86 Architecture 14 * Software Developer Manual June 2016, volume 3, section 17.17. 15 */ 16 17 #define pr_fmt(fmt) "resctrl: " fmt 18 19 #include <linux/cpu.h> 20 #include <linux/slab.h> 21 #include <linux/err.h> 22 #include <linux/cpuhotplug.h> 23 24 #include <asm/cpu_device_id.h> 25 #include <asm/resctrl.h> 26 #include "internal.h" 27 28 /* 29 * rdt_domain structures are kfree()d when their last CPU goes offline, 30 * and allocated when the first CPU in a new domain comes online. 31 * The rdt_resource's domain list is updated when this happens. Readers of 32 * the domain list must either take cpus_read_lock(), or rely on an RCU 33 * read-side critical section, to avoid observing concurrent modification. 34 * All writers take this mutex: 35 */ 36 static DEFINE_MUTEX(domain_list_lock); 37 38 /* 39 * The cached resctrl_pqr_state is strictly per CPU and can never be 40 * updated from a remote CPU. Functions which modify the state 41 * are called with interrupts disabled and no preemption, which 42 * is sufficient for the protection. 43 */ 44 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); 45 46 /* 47 * Used to store the max resource name width and max resource data width 48 * to display the schemata in a tabular format 49 */ 50 int max_name_width, max_data_width; 51 52 /* 53 * Global boolean for rdt_alloc which is true if any 54 * resource allocation is enabled. 55 */ 56 bool rdt_alloc_capable; 57 58 static void mba_wrmsr_intel(struct msr_param *m); 59 static void cat_wrmsr(struct msr_param *m); 60 static void mba_wrmsr_amd(struct msr_param *m); 61 62 #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) 63 #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains) 64 65 struct rdt_hw_resource rdt_resources_all[] = { 66 [RDT_RESOURCE_L3] = 67 { 68 .r_resctrl = { 69 .rid = RDT_RESOURCE_L3, 70 .name = "L3", 71 .ctrl_scope = RESCTRL_L3_CACHE, 72 .mon_scope = RESCTRL_L3_CACHE, 73 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), 74 .mon_domains = mon_domain_init(RDT_RESOURCE_L3), 75 .parse_ctrlval = parse_cbm, 76 .format_str = "%d=%0*x", 77 .fflags = RFTYPE_RES_CACHE, 78 }, 79 .msr_base = MSR_IA32_L3_CBM_BASE, 80 .msr_update = cat_wrmsr, 81 }, 82 [RDT_RESOURCE_L2] = 83 { 84 .r_resctrl = { 85 .rid = RDT_RESOURCE_L2, 86 .name = "L2", 87 .ctrl_scope = RESCTRL_L2_CACHE, 88 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), 89 .parse_ctrlval = parse_cbm, 90 .format_str = "%d=%0*x", 91 .fflags = RFTYPE_RES_CACHE, 92 }, 93 .msr_base = MSR_IA32_L2_CBM_BASE, 94 .msr_update = cat_wrmsr, 95 }, 96 [RDT_RESOURCE_MBA] = 97 { 98 .r_resctrl = { 99 .rid = RDT_RESOURCE_MBA, 100 .name = "MB", 101 .ctrl_scope = RESCTRL_L3_CACHE, 102 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), 103 .parse_ctrlval = parse_bw, 104 .format_str = "%d=%*u", 105 .fflags = RFTYPE_RES_MB, 106 }, 107 }, 108 [RDT_RESOURCE_SMBA] = 109 { 110 .r_resctrl = { 111 .rid = RDT_RESOURCE_SMBA, 112 .name = "SMBA", 113 .ctrl_scope = RESCTRL_L3_CACHE, 114 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), 115 .parse_ctrlval = parse_bw, 116 .format_str = "%d=%*u", 117 .fflags = RFTYPE_RES_MB, 118 }, 119 }, 120 }; 121 122 /* 123 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs 124 * as they do not have CPUID enumeration support for Cache allocation. 125 * The check for Vendor/Family/Model is not enough to guarantee that 126 * the MSRs won't #GP fault because only the following SKUs support 127 * CAT: 128 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz 129 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz 130 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz 131 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz 132 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz 133 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz 134 * 135 * Probe by trying to write the first of the L3 cache mask registers 136 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length 137 * is always 20 on hsw server parts. The minimum cache bitmask length 138 * allowed for HSW server is always 2 bits. Hardcode all of them. 139 */ 140 static inline void cache_alloc_hsw_probe(void) 141 { 142 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3]; 143 struct rdt_resource *r = &hw_res->r_resctrl; 144 u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0; 145 146 if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE, max_cbm)) 147 return; 148 149 rdmsrl(MSR_IA32_L3_CBM_BASE, l3_cbm_0); 150 151 /* If all the bits were set in MSR, return success */ 152 if (l3_cbm_0 != max_cbm) 153 return; 154 155 hw_res->num_closid = 4; 156 r->default_ctrl = max_cbm; 157 r->cache.cbm_len = 20; 158 r->cache.shareable_bits = 0xc0000; 159 r->cache.min_cbm_bits = 2; 160 r->cache.arch_has_sparse_bitmasks = false; 161 r->alloc_capable = true; 162 163 rdt_alloc_capable = true; 164 } 165 166 bool is_mba_sc(struct rdt_resource *r) 167 { 168 if (!r) 169 return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; 170 171 /* 172 * The software controller support is only applicable to MBA resource. 173 * Make sure to check for resource type. 174 */ 175 if (r->rid != RDT_RESOURCE_MBA) 176 return false; 177 178 return r->membw.mba_sc; 179 } 180 181 /* 182 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values 183 * exposed to user interface and the h/w understandable delay values. 184 * 185 * The non-linear delay values have the granularity of power of two 186 * and also the h/w does not guarantee a curve for configured delay 187 * values vs. actual b/w enforced. 188 * Hence we need a mapping that is pre calibrated so the user can 189 * express the memory b/w as a percentage value. 190 */ 191 static inline bool rdt_get_mb_table(struct rdt_resource *r) 192 { 193 /* 194 * There are no Intel SKUs as of now to support non-linear delay. 195 */ 196 pr_info("MBA b/w map not implemented for cpu:%d, model:%d", 197 boot_cpu_data.x86, boot_cpu_data.x86_model); 198 199 return false; 200 } 201 202 static bool __get_mem_config_intel(struct rdt_resource *r) 203 { 204 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 205 union cpuid_0x10_3_eax eax; 206 union cpuid_0x10_x_edx edx; 207 u32 ebx, ecx, max_delay; 208 209 cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); 210 hw_res->num_closid = edx.split.cos_max + 1; 211 max_delay = eax.split.max_delay + 1; 212 r->default_ctrl = MAX_MBA_BW; 213 r->membw.arch_needs_linear = true; 214 if (ecx & MBA_IS_LINEAR) { 215 r->membw.delay_linear = true; 216 r->membw.min_bw = MAX_MBA_BW - max_delay; 217 r->membw.bw_gran = MAX_MBA_BW - max_delay; 218 } else { 219 if (!rdt_get_mb_table(r)) 220 return false; 221 r->membw.arch_needs_linear = false; 222 } 223 r->data_width = 3; 224 225 if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) 226 r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; 227 else 228 r->membw.throttle_mode = THREAD_THROTTLE_MAX; 229 thread_throttle_mode_init(); 230 231 r->alloc_capable = true; 232 233 return true; 234 } 235 236 static bool __rdt_get_mem_config_amd(struct rdt_resource *r) 237 { 238 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 239 u32 eax, ebx, ecx, edx, subleaf; 240 241 /* 242 * Query CPUID_Fn80000020_EDX_x01 for MBA and 243 * CPUID_Fn80000020_EDX_x02 for SMBA 244 */ 245 subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1; 246 247 cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); 248 hw_res->num_closid = edx + 1; 249 r->default_ctrl = 1 << eax; 250 251 /* AMD does not use delay */ 252 r->membw.delay_linear = false; 253 r->membw.arch_needs_linear = false; 254 255 /* 256 * AMD does not use memory delay throttle model to control 257 * the allocation like Intel does. 258 */ 259 r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; 260 r->membw.min_bw = 0; 261 r->membw.bw_gran = 1; 262 /* Max value is 2048, Data width should be 4 in decimal */ 263 r->data_width = 4; 264 265 r->alloc_capable = true; 266 267 return true; 268 } 269 270 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) 271 { 272 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 273 union cpuid_0x10_1_eax eax; 274 union cpuid_0x10_x_ecx ecx; 275 union cpuid_0x10_x_edx edx; 276 u32 ebx; 277 278 cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full); 279 hw_res->num_closid = edx.split.cos_max + 1; 280 r->cache.cbm_len = eax.split.cbm_len + 1; 281 r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; 282 r->cache.shareable_bits = ebx & r->default_ctrl; 283 r->data_width = (r->cache.cbm_len + 3) / 4; 284 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 285 r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; 286 r->alloc_capable = true; 287 } 288 289 static void rdt_get_cdp_config(int level) 290 { 291 /* 292 * By default, CDP is disabled. CDP can be enabled by mount parameter 293 * "cdp" during resctrl file system mount time. 294 */ 295 rdt_resources_all[level].cdp_enabled = false; 296 rdt_resources_all[level].r_resctrl.cdp_capable = true; 297 } 298 299 static void rdt_get_cdp_l3_config(void) 300 { 301 rdt_get_cdp_config(RDT_RESOURCE_L3); 302 } 303 304 static void rdt_get_cdp_l2_config(void) 305 { 306 rdt_get_cdp_config(RDT_RESOURCE_L2); 307 } 308 309 static void mba_wrmsr_amd(struct msr_param *m) 310 { 311 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); 312 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 313 unsigned int i; 314 315 for (i = m->low; i < m->high; i++) 316 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 317 } 318 319 /* 320 * Map the memory b/w percentage value to delay values 321 * that can be written to QOS_MSRs. 322 * There are currently no SKUs which support non linear delay values. 323 */ 324 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) 325 { 326 if (r->membw.delay_linear) 327 return MAX_MBA_BW - bw; 328 329 pr_warn_once("Non Linear delay-bw map not supported but queried\n"); 330 return r->default_ctrl; 331 } 332 333 static void mba_wrmsr_intel(struct msr_param *m) 334 { 335 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); 336 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 337 unsigned int i; 338 339 /* Write the delay values for mba. */ 340 for (i = m->low; i < m->high; i++) 341 wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res)); 342 } 343 344 static void cat_wrmsr(struct msr_param *m) 345 { 346 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); 347 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 348 unsigned int i; 349 350 for (i = m->low; i < m->high; i++) 351 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 352 } 353 354 struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r) 355 { 356 struct rdt_ctrl_domain *d; 357 358 lockdep_assert_cpus_held(); 359 360 list_for_each_entry(d, &r->ctrl_domains, hdr.list) { 361 /* Find the domain that contains this CPU */ 362 if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) 363 return d; 364 } 365 366 return NULL; 367 } 368 369 struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r) 370 { 371 struct rdt_mon_domain *d; 372 373 lockdep_assert_cpus_held(); 374 375 list_for_each_entry(d, &r->mon_domains, hdr.list) { 376 /* Find the domain that contains this CPU */ 377 if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) 378 return d; 379 } 380 381 return NULL; 382 } 383 384 u32 resctrl_arch_get_num_closid(struct rdt_resource *r) 385 { 386 return resctrl_to_arch_res(r)->num_closid; 387 } 388 389 void rdt_ctrl_update(void *arg) 390 { 391 struct rdt_hw_resource *hw_res; 392 struct msr_param *m = arg; 393 394 hw_res = resctrl_to_arch_res(m->res); 395 hw_res->msr_update(m); 396 } 397 398 /* 399 * rdt_find_domain - Search for a domain id in a resource domain list. 400 * 401 * Search the domain list to find the domain id. If the domain id is 402 * found, return the domain. NULL otherwise. If the domain id is not 403 * found (and NULL returned) then the first domain with id bigger than 404 * the input id can be returned to the caller via @pos. 405 */ 406 struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, 407 struct list_head **pos) 408 { 409 struct rdt_domain_hdr *d; 410 struct list_head *l; 411 412 list_for_each(l, h) { 413 d = list_entry(l, struct rdt_domain_hdr, list); 414 /* When id is found, return its domain. */ 415 if (id == d->id) 416 return d; 417 /* Stop searching when finding id's position in sorted list. */ 418 if (id < d->id) 419 break; 420 } 421 422 if (pos) 423 *pos = l; 424 425 return NULL; 426 } 427 428 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) 429 { 430 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 431 int i; 432 433 /* 434 * Initialize the Control MSRs to having no control. 435 * For Cache Allocation: Set all bits in cbm 436 * For Memory Allocation: Set b/w requested to 100% 437 */ 438 for (i = 0; i < hw_res->num_closid; i++, dc++) 439 *dc = r->default_ctrl; 440 } 441 442 static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) 443 { 444 kfree(hw_dom->ctrl_val); 445 kfree(hw_dom); 446 } 447 448 static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom) 449 { 450 kfree(hw_dom->arch_mbm_total); 451 kfree(hw_dom->arch_mbm_local); 452 kfree(hw_dom); 453 } 454 455 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d) 456 { 457 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); 458 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 459 struct msr_param m; 460 u32 *dc; 461 462 dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val), 463 GFP_KERNEL); 464 if (!dc) 465 return -ENOMEM; 466 467 hw_dom->ctrl_val = dc; 468 setup_default_ctrlval(r, dc); 469 470 m.res = r; 471 m.dom = d; 472 m.low = 0; 473 m.high = hw_res->num_closid; 474 hw_res->msr_update(&m); 475 return 0; 476 } 477 478 /** 479 * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters 480 * @num_rmid: The size of the MBM counter array 481 * @hw_dom: The domain that owns the allocated arrays 482 */ 483 static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) 484 { 485 size_t tsize; 486 487 if (is_mbm_total_enabled()) { 488 tsize = sizeof(*hw_dom->arch_mbm_total); 489 hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); 490 if (!hw_dom->arch_mbm_total) 491 return -ENOMEM; 492 } 493 if (is_mbm_local_enabled()) { 494 tsize = sizeof(*hw_dom->arch_mbm_local); 495 hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); 496 if (!hw_dom->arch_mbm_local) { 497 kfree(hw_dom->arch_mbm_total); 498 hw_dom->arch_mbm_total = NULL; 499 return -ENOMEM; 500 } 501 } 502 503 return 0; 504 } 505 506 static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) 507 { 508 switch (scope) { 509 case RESCTRL_L2_CACHE: 510 case RESCTRL_L3_CACHE: 511 return get_cpu_cacheinfo_id(cpu, scope); 512 case RESCTRL_L3_NODE: 513 return cpu_to_node(cpu); 514 default: 515 break; 516 } 517 518 return -EINVAL; 519 } 520 521 static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) 522 { 523 int id = get_domain_id_from_scope(cpu, r->ctrl_scope); 524 struct rdt_hw_ctrl_domain *hw_dom; 525 struct list_head *add_pos = NULL; 526 struct rdt_domain_hdr *hdr; 527 struct rdt_ctrl_domain *d; 528 int err; 529 530 lockdep_assert_held(&domain_list_lock); 531 532 if (id < 0) { 533 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", 534 cpu, r->ctrl_scope, r->name); 535 return; 536 } 537 538 hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos); 539 if (hdr) { 540 if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) 541 return; 542 d = container_of(hdr, struct rdt_ctrl_domain, hdr); 543 544 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 545 if (r->cache.arch_has_per_cpu_cfg) 546 rdt_domain_reconfigure_cdp(r); 547 return; 548 } 549 550 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); 551 if (!hw_dom) 552 return; 553 554 d = &hw_dom->d_resctrl; 555 d->hdr.id = id; 556 d->hdr.type = RESCTRL_CTRL_DOMAIN; 557 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 558 559 rdt_domain_reconfigure_cdp(r); 560 561 if (domain_setup_ctrlval(r, d)) { 562 ctrl_domain_free(hw_dom); 563 return; 564 } 565 566 list_add_tail_rcu(&d->hdr.list, add_pos); 567 568 err = resctrl_online_ctrl_domain(r, d); 569 if (err) { 570 list_del_rcu(&d->hdr.list); 571 synchronize_rcu(); 572 ctrl_domain_free(hw_dom); 573 } 574 } 575 576 static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) 577 { 578 int id = get_domain_id_from_scope(cpu, r->mon_scope); 579 struct list_head *add_pos = NULL; 580 struct rdt_hw_mon_domain *hw_dom; 581 struct rdt_domain_hdr *hdr; 582 struct rdt_mon_domain *d; 583 int err; 584 585 lockdep_assert_held(&domain_list_lock); 586 587 if (id < 0) { 588 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", 589 cpu, r->mon_scope, r->name); 590 return; 591 } 592 593 hdr = rdt_find_domain(&r->mon_domains, id, &add_pos); 594 if (hdr) { 595 if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) 596 return; 597 d = container_of(hdr, struct rdt_mon_domain, hdr); 598 599 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 600 return; 601 } 602 603 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); 604 if (!hw_dom) 605 return; 606 607 d = &hw_dom->d_resctrl; 608 d->hdr.id = id; 609 d->hdr.type = RESCTRL_MON_DOMAIN; 610 d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); 611 if (!d->ci) { 612 pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); 613 mon_domain_free(hw_dom); 614 return; 615 } 616 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 617 618 arch_mon_domain_online(r, d); 619 620 if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { 621 mon_domain_free(hw_dom); 622 return; 623 } 624 625 list_add_tail_rcu(&d->hdr.list, add_pos); 626 627 err = resctrl_online_mon_domain(r, d); 628 if (err) { 629 list_del_rcu(&d->hdr.list); 630 synchronize_rcu(); 631 mon_domain_free(hw_dom); 632 } 633 } 634 635 static void domain_add_cpu(int cpu, struct rdt_resource *r) 636 { 637 if (r->alloc_capable) 638 domain_add_cpu_ctrl(cpu, r); 639 if (r->mon_capable) 640 domain_add_cpu_mon(cpu, r); 641 } 642 643 static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) 644 { 645 int id = get_domain_id_from_scope(cpu, r->ctrl_scope); 646 struct rdt_hw_ctrl_domain *hw_dom; 647 struct rdt_domain_hdr *hdr; 648 struct rdt_ctrl_domain *d; 649 650 lockdep_assert_held(&domain_list_lock); 651 652 if (id < 0) { 653 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", 654 cpu, r->ctrl_scope, r->name); 655 return; 656 } 657 658 hdr = rdt_find_domain(&r->ctrl_domains, id, NULL); 659 if (!hdr) { 660 pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n", 661 id, cpu, r->name); 662 return; 663 } 664 665 if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) 666 return; 667 668 d = container_of(hdr, struct rdt_ctrl_domain, hdr); 669 hw_dom = resctrl_to_arch_ctrl_dom(d); 670 671 cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); 672 if (cpumask_empty(&d->hdr.cpu_mask)) { 673 resctrl_offline_ctrl_domain(r, d); 674 list_del_rcu(&d->hdr.list); 675 synchronize_rcu(); 676 677 /* 678 * rdt_ctrl_domain "d" is going to be freed below, so clear 679 * its pointer from pseudo_lock_region struct. 680 */ 681 if (d->plr) 682 d->plr->d = NULL; 683 ctrl_domain_free(hw_dom); 684 685 return; 686 } 687 } 688 689 static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) 690 { 691 int id = get_domain_id_from_scope(cpu, r->mon_scope); 692 struct rdt_hw_mon_domain *hw_dom; 693 struct rdt_domain_hdr *hdr; 694 struct rdt_mon_domain *d; 695 696 lockdep_assert_held(&domain_list_lock); 697 698 if (id < 0) { 699 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", 700 cpu, r->mon_scope, r->name); 701 return; 702 } 703 704 hdr = rdt_find_domain(&r->mon_domains, id, NULL); 705 if (!hdr) { 706 pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n", 707 id, cpu, r->name); 708 return; 709 } 710 711 if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) 712 return; 713 714 d = container_of(hdr, struct rdt_mon_domain, hdr); 715 hw_dom = resctrl_to_arch_mon_dom(d); 716 717 cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); 718 if (cpumask_empty(&d->hdr.cpu_mask)) { 719 resctrl_offline_mon_domain(r, d); 720 list_del_rcu(&d->hdr.list); 721 synchronize_rcu(); 722 mon_domain_free(hw_dom); 723 724 return; 725 } 726 } 727 728 static void domain_remove_cpu(int cpu, struct rdt_resource *r) 729 { 730 if (r->alloc_capable) 731 domain_remove_cpu_ctrl(cpu, r); 732 if (r->mon_capable) 733 domain_remove_cpu_mon(cpu, r); 734 } 735 736 static void clear_closid_rmid(int cpu) 737 { 738 struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); 739 740 state->default_closid = RESCTRL_RESERVED_CLOSID; 741 state->default_rmid = RESCTRL_RESERVED_RMID; 742 state->cur_closid = RESCTRL_RESERVED_CLOSID; 743 state->cur_rmid = RESCTRL_RESERVED_RMID; 744 wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID, 745 RESCTRL_RESERVED_CLOSID); 746 } 747 748 static int resctrl_arch_online_cpu(unsigned int cpu) 749 { 750 struct rdt_resource *r; 751 752 mutex_lock(&domain_list_lock); 753 for_each_capable_rdt_resource(r) 754 domain_add_cpu(cpu, r); 755 mutex_unlock(&domain_list_lock); 756 757 clear_closid_rmid(cpu); 758 resctrl_online_cpu(cpu); 759 760 return 0; 761 } 762 763 static int resctrl_arch_offline_cpu(unsigned int cpu) 764 { 765 struct rdt_resource *r; 766 767 resctrl_offline_cpu(cpu); 768 769 mutex_lock(&domain_list_lock); 770 for_each_capable_rdt_resource(r) 771 domain_remove_cpu(cpu, r); 772 mutex_unlock(&domain_list_lock); 773 774 clear_closid_rmid(cpu); 775 776 return 0; 777 } 778 779 /* 780 * Choose a width for the resource name and resource data based on the 781 * resource that has widest name and cbm. 782 */ 783 static __init void rdt_init_padding(void) 784 { 785 struct rdt_resource *r; 786 787 for_each_alloc_capable_rdt_resource(r) { 788 if (r->data_width > max_data_width) 789 max_data_width = r->data_width; 790 } 791 } 792 793 enum { 794 RDT_FLAG_CMT, 795 RDT_FLAG_MBM_TOTAL, 796 RDT_FLAG_MBM_LOCAL, 797 RDT_FLAG_L3_CAT, 798 RDT_FLAG_L3_CDP, 799 RDT_FLAG_L2_CAT, 800 RDT_FLAG_L2_CDP, 801 RDT_FLAG_MBA, 802 RDT_FLAG_SMBA, 803 RDT_FLAG_BMEC, 804 }; 805 806 #define RDT_OPT(idx, n, f) \ 807 [idx] = { \ 808 .name = n, \ 809 .flag = f \ 810 } 811 812 struct rdt_options { 813 char *name; 814 int flag; 815 bool force_off, force_on; 816 }; 817 818 static struct rdt_options rdt_options[] __initdata = { 819 RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC), 820 RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL), 821 RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL), 822 RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), 823 RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), 824 RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), 825 RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2), 826 RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), 827 RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), 828 RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), 829 }; 830 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) 831 832 static int __init set_rdt_options(char *str) 833 { 834 struct rdt_options *o; 835 bool force_off; 836 char *tok; 837 838 if (*str == '=') 839 str++; 840 while ((tok = strsep(&str, ",")) != NULL) { 841 force_off = *tok == '!'; 842 if (force_off) 843 tok++; 844 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 845 if (strcmp(tok, o->name) == 0) { 846 if (force_off) 847 o->force_off = true; 848 else 849 o->force_on = true; 850 break; 851 } 852 } 853 } 854 return 1; 855 } 856 __setup("rdt", set_rdt_options); 857 858 bool __init rdt_cpu_has(int flag) 859 { 860 bool ret = boot_cpu_has(flag); 861 struct rdt_options *o; 862 863 if (!ret) 864 return ret; 865 866 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 867 if (flag == o->flag) { 868 if (o->force_off) 869 ret = false; 870 if (o->force_on) 871 ret = true; 872 break; 873 } 874 } 875 return ret; 876 } 877 878 static __init bool get_mem_config(void) 879 { 880 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; 881 882 if (!rdt_cpu_has(X86_FEATURE_MBA)) 883 return false; 884 885 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 886 return __get_mem_config_intel(&hw_res->r_resctrl); 887 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 888 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 889 890 return false; 891 } 892 893 static __init bool get_slow_mem_config(void) 894 { 895 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA]; 896 897 if (!rdt_cpu_has(X86_FEATURE_SMBA)) 898 return false; 899 900 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 901 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 902 903 return false; 904 } 905 906 static __init bool get_rdt_alloc_resources(void) 907 { 908 struct rdt_resource *r; 909 bool ret = false; 910 911 if (rdt_alloc_capable) 912 return true; 913 914 if (!boot_cpu_has(X86_FEATURE_RDT_A)) 915 return false; 916 917 if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { 918 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 919 rdt_get_cache_alloc_cfg(1, r); 920 if (rdt_cpu_has(X86_FEATURE_CDP_L3)) 921 rdt_get_cdp_l3_config(); 922 ret = true; 923 } 924 if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { 925 /* CPUID 0x10.2 fields are same format at 0x10.1 */ 926 r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl; 927 rdt_get_cache_alloc_cfg(2, r); 928 if (rdt_cpu_has(X86_FEATURE_CDP_L2)) 929 rdt_get_cdp_l2_config(); 930 ret = true; 931 } 932 933 if (get_mem_config()) 934 ret = true; 935 936 if (get_slow_mem_config()) 937 ret = true; 938 939 return ret; 940 } 941 942 static __init bool get_rdt_mon_resources(void) 943 { 944 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 945 946 if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) 947 rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); 948 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) 949 rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID); 950 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) 951 rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID); 952 953 if (!rdt_mon_features) 954 return false; 955 956 return !rdt_get_mon_l3_config(r); 957 } 958 959 static __init void __check_quirks_intel(void) 960 { 961 switch (boot_cpu_data.x86_vfm) { 962 case INTEL_HASWELL_X: 963 if (!rdt_options[RDT_FLAG_L3_CAT].force_off) 964 cache_alloc_hsw_probe(); 965 break; 966 case INTEL_SKYLAKE_X: 967 if (boot_cpu_data.x86_stepping <= 4) 968 set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); 969 else 970 set_rdt_options("!l3cat"); 971 fallthrough; 972 case INTEL_BROADWELL_X: 973 intel_rdt_mbm_apply_quirk(); 974 break; 975 } 976 } 977 978 static __init void check_quirks(void) 979 { 980 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 981 __check_quirks_intel(); 982 } 983 984 static __init bool get_rdt_resources(void) 985 { 986 rdt_alloc_capable = get_rdt_alloc_resources(); 987 rdt_mon_capable = get_rdt_mon_resources(); 988 989 return (rdt_mon_capable || rdt_alloc_capable); 990 } 991 992 static __init void rdt_init_res_defs_intel(void) 993 { 994 struct rdt_hw_resource *hw_res; 995 struct rdt_resource *r; 996 997 for_each_rdt_resource(r) { 998 hw_res = resctrl_to_arch_res(r); 999 1000 if (r->rid == RDT_RESOURCE_L3 || 1001 r->rid == RDT_RESOURCE_L2) { 1002 r->cache.arch_has_per_cpu_cfg = false; 1003 r->cache.min_cbm_bits = 1; 1004 } else if (r->rid == RDT_RESOURCE_MBA) { 1005 hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE; 1006 hw_res->msr_update = mba_wrmsr_intel; 1007 } 1008 } 1009 } 1010 1011 static __init void rdt_init_res_defs_amd(void) 1012 { 1013 struct rdt_hw_resource *hw_res; 1014 struct rdt_resource *r; 1015 1016 for_each_rdt_resource(r) { 1017 hw_res = resctrl_to_arch_res(r); 1018 1019 if (r->rid == RDT_RESOURCE_L3 || 1020 r->rid == RDT_RESOURCE_L2) { 1021 r->cache.arch_has_sparse_bitmasks = true; 1022 r->cache.arch_has_per_cpu_cfg = true; 1023 r->cache.min_cbm_bits = 0; 1024 } else if (r->rid == RDT_RESOURCE_MBA) { 1025 hw_res->msr_base = MSR_IA32_MBA_BW_BASE; 1026 hw_res->msr_update = mba_wrmsr_amd; 1027 } else if (r->rid == RDT_RESOURCE_SMBA) { 1028 hw_res->msr_base = MSR_IA32_SMBA_BW_BASE; 1029 hw_res->msr_update = mba_wrmsr_amd; 1030 } 1031 } 1032 } 1033 1034 static __init void rdt_init_res_defs(void) 1035 { 1036 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 1037 rdt_init_res_defs_intel(); 1038 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 1039 rdt_init_res_defs_amd(); 1040 } 1041 1042 static enum cpuhp_state rdt_online; 1043 1044 /* Runs once on the BSP during boot. */ 1045 void resctrl_cpu_detect(struct cpuinfo_x86 *c) 1046 { 1047 if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { 1048 c->x86_cache_max_rmid = -1; 1049 c->x86_cache_occ_scale = -1; 1050 c->x86_cache_mbm_width_offset = -1; 1051 return; 1052 } 1053 1054 /* will be overridden if occupancy monitoring exists */ 1055 c->x86_cache_max_rmid = cpuid_ebx(0xf); 1056 1057 if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || 1058 cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || 1059 cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { 1060 u32 eax, ebx, ecx, edx; 1061 1062 /* QoS sub-leaf, EAX=0Fh, ECX=1 */ 1063 cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); 1064 1065 c->x86_cache_max_rmid = ecx; 1066 c->x86_cache_occ_scale = ebx; 1067 c->x86_cache_mbm_width_offset = eax & 0xff; 1068 1069 if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset) 1070 c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; 1071 } 1072 } 1073 1074 static int __init resctrl_late_init(void) 1075 { 1076 struct rdt_resource *r; 1077 int state, ret; 1078 1079 /* 1080 * Initialize functions(or definitions) that are different 1081 * between vendors here. 1082 */ 1083 rdt_init_res_defs(); 1084 1085 check_quirks(); 1086 1087 if (!get_rdt_resources()) 1088 return -ENODEV; 1089 1090 rdt_init_padding(); 1091 1092 state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, 1093 "x86/resctrl/cat:online:", 1094 resctrl_arch_online_cpu, 1095 resctrl_arch_offline_cpu); 1096 if (state < 0) 1097 return state; 1098 1099 ret = rdtgroup_init(); 1100 if (ret) { 1101 cpuhp_remove_state(state); 1102 return ret; 1103 } 1104 rdt_online = state; 1105 1106 for_each_alloc_capable_rdt_resource(r) 1107 pr_info("%s allocation detected\n", r->name); 1108 1109 for_each_mon_capable_rdt_resource(r) 1110 pr_info("%s monitoring detected\n", r->name); 1111 1112 return 0; 1113 } 1114 1115 late_initcall(resctrl_late_init); 1116 1117 static void __exit resctrl_exit(void) 1118 { 1119 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 1120 1121 cpuhp_remove_state(rdt_online); 1122 1123 rdtgroup_exit(); 1124 1125 if (r->mon_capable) 1126 rdt_put_mon_l3_config(); 1127 } 1128 1129 __exitcall(resctrl_exit); 1130