1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Cache Allocation code. 5 * 6 * Copyright (C) 2016 Intel Corporation 7 * 8 * Authors: 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Tony Luck <tony.luck@intel.com> 11 * Vikas Shivappa <vikas.shivappa@intel.com> 12 * 13 * More information about RDT be found in the Intel (R) x86 Architecture 14 * Software Developer Manual June 2016, volume 3, section 17.17. 15 */ 16 17 #define pr_fmt(fmt) "resctrl: " fmt 18 19 #include <linux/cpu.h> 20 #include <linux/slab.h> 21 #include <linux/err.h> 22 #include <linux/cacheinfo.h> 23 #include <linux/cpuhotplug.h> 24 25 #include <asm/cpu_device_id.h> 26 #include <asm/resctrl.h> 27 #include "internal.h" 28 29 /* 30 * rdt_domain structures are kfree()d when their last CPU goes offline, 31 * and allocated when the first CPU in a new domain comes online. 32 * The rdt_resource's domain list is updated when this happens. Readers of 33 * the domain list must either take cpus_read_lock(), or rely on an RCU 34 * read-side critical section, to avoid observing concurrent modification. 35 * All writers take this mutex: 36 */ 37 static DEFINE_MUTEX(domain_list_lock); 38 39 /* 40 * The cached resctrl_pqr_state is strictly per CPU and can never be 41 * updated from a remote CPU. Functions which modify the state 42 * are called with interrupts disabled and no preemption, which 43 * is sufficient for the protection. 44 */ 45 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); 46 47 /* 48 * Used to store the max resource name width and max resource data width 49 * to display the schemata in a tabular format 50 */ 51 int max_name_width, max_data_width; 52 53 /* 54 * Global boolean for rdt_alloc which is true if any 55 * resource allocation is enabled. 56 */ 57 bool rdt_alloc_capable; 58 59 static void mba_wrmsr_intel(struct msr_param *m); 60 static void cat_wrmsr(struct msr_param *m); 61 static void mba_wrmsr_amd(struct msr_param *m); 62 63 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains) 64 65 struct rdt_hw_resource rdt_resources_all[] = { 66 [RDT_RESOURCE_L3] = 67 { 68 .r_resctrl = { 69 .rid = RDT_RESOURCE_L3, 70 .name = "L3", 71 .cache_level = 3, 72 .domains = domain_init(RDT_RESOURCE_L3), 73 .parse_ctrlval = parse_cbm, 74 .format_str = "%d=%0*x", 75 .fflags = RFTYPE_RES_CACHE, 76 }, 77 .msr_base = MSR_IA32_L3_CBM_BASE, 78 .msr_update = cat_wrmsr, 79 }, 80 [RDT_RESOURCE_L2] = 81 { 82 .r_resctrl = { 83 .rid = RDT_RESOURCE_L2, 84 .name = "L2", 85 .cache_level = 2, 86 .domains = domain_init(RDT_RESOURCE_L2), 87 .parse_ctrlval = parse_cbm, 88 .format_str = "%d=%0*x", 89 .fflags = RFTYPE_RES_CACHE, 90 }, 91 .msr_base = MSR_IA32_L2_CBM_BASE, 92 .msr_update = cat_wrmsr, 93 }, 94 [RDT_RESOURCE_MBA] = 95 { 96 .r_resctrl = { 97 .rid = RDT_RESOURCE_MBA, 98 .name = "MB", 99 .cache_level = 3, 100 .domains = domain_init(RDT_RESOURCE_MBA), 101 .parse_ctrlval = parse_bw, 102 .format_str = "%d=%*u", 103 .fflags = RFTYPE_RES_MB, 104 }, 105 }, 106 [RDT_RESOURCE_SMBA] = 107 { 108 .r_resctrl = { 109 .rid = RDT_RESOURCE_SMBA, 110 .name = "SMBA", 111 .cache_level = 3, 112 .domains = domain_init(RDT_RESOURCE_SMBA), 113 .parse_ctrlval = parse_bw, 114 .format_str = "%d=%*u", 115 .fflags = RFTYPE_RES_MB, 116 }, 117 }, 118 }; 119 120 /* 121 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs 122 * as they do not have CPUID enumeration support for Cache allocation. 123 * The check for Vendor/Family/Model is not enough to guarantee that 124 * the MSRs won't #GP fault because only the following SKUs support 125 * CAT: 126 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz 127 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz 128 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz 129 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz 130 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz 131 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz 132 * 133 * Probe by trying to write the first of the L3 cache mask registers 134 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length 135 * is always 20 on hsw server parts. The minimum cache bitmask length 136 * allowed for HSW server is always 2 bits. Hardcode all of them. 137 */ 138 static inline void cache_alloc_hsw_probe(void) 139 { 140 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3]; 141 struct rdt_resource *r = &hw_res->r_resctrl; 142 u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0; 143 144 if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE, max_cbm)) 145 return; 146 147 rdmsrl(MSR_IA32_L3_CBM_BASE, l3_cbm_0); 148 149 /* If all the bits were set in MSR, return success */ 150 if (l3_cbm_0 != max_cbm) 151 return; 152 153 hw_res->num_closid = 4; 154 r->default_ctrl = max_cbm; 155 r->cache.cbm_len = 20; 156 r->cache.shareable_bits = 0xc0000; 157 r->cache.min_cbm_bits = 2; 158 r->cache.arch_has_sparse_bitmasks = false; 159 r->alloc_capable = true; 160 161 rdt_alloc_capable = true; 162 } 163 164 bool is_mba_sc(struct rdt_resource *r) 165 { 166 if (!r) 167 return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; 168 169 /* 170 * The software controller support is only applicable to MBA resource. 171 * Make sure to check for resource type. 172 */ 173 if (r->rid != RDT_RESOURCE_MBA) 174 return false; 175 176 return r->membw.mba_sc; 177 } 178 179 /* 180 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values 181 * exposed to user interface and the h/w understandable delay values. 182 * 183 * The non-linear delay values have the granularity of power of two 184 * and also the h/w does not guarantee a curve for configured delay 185 * values vs. actual b/w enforced. 186 * Hence we need a mapping that is pre calibrated so the user can 187 * express the memory b/w as a percentage value. 188 */ 189 static inline bool rdt_get_mb_table(struct rdt_resource *r) 190 { 191 /* 192 * There are no Intel SKUs as of now to support non-linear delay. 193 */ 194 pr_info("MBA b/w map not implemented for cpu:%d, model:%d", 195 boot_cpu_data.x86, boot_cpu_data.x86_model); 196 197 return false; 198 } 199 200 static bool __get_mem_config_intel(struct rdt_resource *r) 201 { 202 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 203 union cpuid_0x10_3_eax eax; 204 union cpuid_0x10_x_edx edx; 205 u32 ebx, ecx, max_delay; 206 207 cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); 208 hw_res->num_closid = edx.split.cos_max + 1; 209 max_delay = eax.split.max_delay + 1; 210 r->default_ctrl = MAX_MBA_BW; 211 r->membw.arch_needs_linear = true; 212 if (ecx & MBA_IS_LINEAR) { 213 r->membw.delay_linear = true; 214 r->membw.min_bw = MAX_MBA_BW - max_delay; 215 r->membw.bw_gran = MAX_MBA_BW - max_delay; 216 } else { 217 if (!rdt_get_mb_table(r)) 218 return false; 219 r->membw.arch_needs_linear = false; 220 } 221 r->data_width = 3; 222 223 if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) 224 r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; 225 else 226 r->membw.throttle_mode = THREAD_THROTTLE_MAX; 227 thread_throttle_mode_init(); 228 229 r->alloc_capable = true; 230 231 return true; 232 } 233 234 static bool __rdt_get_mem_config_amd(struct rdt_resource *r) 235 { 236 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 237 u32 eax, ebx, ecx, edx, subleaf; 238 239 /* 240 * Query CPUID_Fn80000020_EDX_x01 for MBA and 241 * CPUID_Fn80000020_EDX_x02 for SMBA 242 */ 243 subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1; 244 245 cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); 246 hw_res->num_closid = edx + 1; 247 r->default_ctrl = 1 << eax; 248 249 /* AMD does not use delay */ 250 r->membw.delay_linear = false; 251 r->membw.arch_needs_linear = false; 252 253 /* 254 * AMD does not use memory delay throttle model to control 255 * the allocation like Intel does. 256 */ 257 r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; 258 r->membw.min_bw = 0; 259 r->membw.bw_gran = 1; 260 /* Max value is 2048, Data width should be 4 in decimal */ 261 r->data_width = 4; 262 263 r->alloc_capable = true; 264 265 return true; 266 } 267 268 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) 269 { 270 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 271 union cpuid_0x10_1_eax eax; 272 union cpuid_0x10_x_ecx ecx; 273 union cpuid_0x10_x_edx edx; 274 u32 ebx; 275 276 cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full); 277 hw_res->num_closid = edx.split.cos_max + 1; 278 r->cache.cbm_len = eax.split.cbm_len + 1; 279 r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; 280 r->cache.shareable_bits = ebx & r->default_ctrl; 281 r->data_width = (r->cache.cbm_len + 3) / 4; 282 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 283 r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; 284 r->alloc_capable = true; 285 } 286 287 static void rdt_get_cdp_config(int level) 288 { 289 /* 290 * By default, CDP is disabled. CDP can be enabled by mount parameter 291 * "cdp" during resctrl file system mount time. 292 */ 293 rdt_resources_all[level].cdp_enabled = false; 294 rdt_resources_all[level].r_resctrl.cdp_capable = true; 295 } 296 297 static void rdt_get_cdp_l3_config(void) 298 { 299 rdt_get_cdp_config(RDT_RESOURCE_L3); 300 } 301 302 static void rdt_get_cdp_l2_config(void) 303 { 304 rdt_get_cdp_config(RDT_RESOURCE_L2); 305 } 306 307 static void mba_wrmsr_amd(struct msr_param *m) 308 { 309 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 310 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); 311 unsigned int i; 312 313 for (i = m->low; i < m->high; i++) 314 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 315 } 316 317 /* 318 * Map the memory b/w percentage value to delay values 319 * that can be written to QOS_MSRs. 320 * There are currently no SKUs which support non linear delay values. 321 */ 322 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) 323 { 324 if (r->membw.delay_linear) 325 return MAX_MBA_BW - bw; 326 327 pr_warn_once("Non Linear delay-bw map not supported but queried\n"); 328 return r->default_ctrl; 329 } 330 331 static void mba_wrmsr_intel(struct msr_param *m) 332 { 333 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 334 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); 335 unsigned int i; 336 337 /* Write the delay values for mba. */ 338 for (i = m->low; i < m->high; i++) 339 wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res)); 340 } 341 342 static void cat_wrmsr(struct msr_param *m) 343 { 344 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 345 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); 346 unsigned int i; 347 348 for (i = m->low; i < m->high; i++) 349 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 350 } 351 352 struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) 353 { 354 struct rdt_domain *d; 355 356 lockdep_assert_cpus_held(); 357 358 list_for_each_entry(d, &r->domains, list) { 359 /* Find the domain that contains this CPU */ 360 if (cpumask_test_cpu(cpu, &d->cpu_mask)) 361 return d; 362 } 363 364 return NULL; 365 } 366 367 u32 resctrl_arch_get_num_closid(struct rdt_resource *r) 368 { 369 return resctrl_to_arch_res(r)->num_closid; 370 } 371 372 void rdt_ctrl_update(void *arg) 373 { 374 struct rdt_hw_resource *hw_res; 375 struct msr_param *m = arg; 376 377 hw_res = resctrl_to_arch_res(m->res); 378 hw_res->msr_update(m); 379 } 380 381 /* 382 * rdt_find_domain - Find a domain in a resource that matches input resource id 383 * 384 * Search resource r's domain list to find the resource id. If the resource 385 * id is found in a domain, return the domain. Otherwise, if requested by 386 * caller, return the first domain whose id is bigger than the input id. 387 * The domain list is sorted by id in ascending order. 388 */ 389 struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, 390 struct list_head **pos) 391 { 392 struct rdt_domain *d; 393 struct list_head *l; 394 395 if (id < 0) 396 return ERR_PTR(-ENODEV); 397 398 list_for_each(l, &r->domains) { 399 d = list_entry(l, struct rdt_domain, list); 400 /* When id is found, return its domain. */ 401 if (id == d->id) 402 return d; 403 /* Stop searching when finding id's position in sorted list. */ 404 if (id < d->id) 405 break; 406 } 407 408 if (pos) 409 *pos = l; 410 411 return NULL; 412 } 413 414 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) 415 { 416 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 417 int i; 418 419 /* 420 * Initialize the Control MSRs to having no control. 421 * For Cache Allocation: Set all bits in cbm 422 * For Memory Allocation: Set b/w requested to 100% 423 */ 424 for (i = 0; i < hw_res->num_closid; i++, dc++) 425 *dc = r->default_ctrl; 426 } 427 428 static void domain_free(struct rdt_hw_domain *hw_dom) 429 { 430 kfree(hw_dom->arch_mbm_total); 431 kfree(hw_dom->arch_mbm_local); 432 kfree(hw_dom->ctrl_val); 433 kfree(hw_dom); 434 } 435 436 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) 437 { 438 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 439 struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); 440 struct msr_param m; 441 u32 *dc; 442 443 dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val), 444 GFP_KERNEL); 445 if (!dc) 446 return -ENOMEM; 447 448 hw_dom->ctrl_val = dc; 449 setup_default_ctrlval(r, dc); 450 451 m.res = r; 452 m.dom = d; 453 m.low = 0; 454 m.high = hw_res->num_closid; 455 hw_res->msr_update(&m); 456 return 0; 457 } 458 459 /** 460 * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters 461 * @num_rmid: The size of the MBM counter array 462 * @hw_dom: The domain that owns the allocated arrays 463 */ 464 static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) 465 { 466 size_t tsize; 467 468 if (is_mbm_total_enabled()) { 469 tsize = sizeof(*hw_dom->arch_mbm_total); 470 hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); 471 if (!hw_dom->arch_mbm_total) 472 return -ENOMEM; 473 } 474 if (is_mbm_local_enabled()) { 475 tsize = sizeof(*hw_dom->arch_mbm_local); 476 hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); 477 if (!hw_dom->arch_mbm_local) { 478 kfree(hw_dom->arch_mbm_total); 479 hw_dom->arch_mbm_total = NULL; 480 return -ENOMEM; 481 } 482 } 483 484 return 0; 485 } 486 487 /* 488 * domain_add_cpu - Add a cpu to a resource's domain list. 489 * 490 * If an existing domain in the resource r's domain list matches the cpu's 491 * resource id, add the cpu in the domain. 492 * 493 * Otherwise, a new domain is allocated and inserted into the right position 494 * in the domain list sorted by id in ascending order. 495 * 496 * The order in the domain list is visible to users when we print entries 497 * in the schemata file and schemata input is validated to have the same order 498 * as this list. 499 */ 500 static void domain_add_cpu(int cpu, struct rdt_resource *r) 501 { 502 int id = get_cpu_cacheinfo_id(cpu, r->cache_level); 503 struct list_head *add_pos = NULL; 504 struct rdt_hw_domain *hw_dom; 505 struct rdt_domain *d; 506 int err; 507 508 lockdep_assert_held(&domain_list_lock); 509 510 d = rdt_find_domain(r, id, &add_pos); 511 if (IS_ERR(d)) { 512 pr_warn("Couldn't find cache id for CPU %d\n", cpu); 513 return; 514 } 515 516 if (d) { 517 cpumask_set_cpu(cpu, &d->cpu_mask); 518 if (r->cache.arch_has_per_cpu_cfg) 519 rdt_domain_reconfigure_cdp(r); 520 return; 521 } 522 523 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); 524 if (!hw_dom) 525 return; 526 527 d = &hw_dom->d_resctrl; 528 d->id = id; 529 cpumask_set_cpu(cpu, &d->cpu_mask); 530 531 rdt_domain_reconfigure_cdp(r); 532 533 if (r->alloc_capable && domain_setup_ctrlval(r, d)) { 534 domain_free(hw_dom); 535 return; 536 } 537 538 if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { 539 domain_free(hw_dom); 540 return; 541 } 542 543 list_add_tail_rcu(&d->list, add_pos); 544 545 err = resctrl_online_domain(r, d); 546 if (err) { 547 list_del_rcu(&d->list); 548 synchronize_rcu(); 549 domain_free(hw_dom); 550 } 551 } 552 553 static void domain_remove_cpu(int cpu, struct rdt_resource *r) 554 { 555 int id = get_cpu_cacheinfo_id(cpu, r->cache_level); 556 struct rdt_hw_domain *hw_dom; 557 struct rdt_domain *d; 558 559 lockdep_assert_held(&domain_list_lock); 560 561 d = rdt_find_domain(r, id, NULL); 562 if (IS_ERR_OR_NULL(d)) { 563 pr_warn("Couldn't find cache id for CPU %d\n", cpu); 564 return; 565 } 566 hw_dom = resctrl_to_arch_dom(d); 567 568 cpumask_clear_cpu(cpu, &d->cpu_mask); 569 if (cpumask_empty(&d->cpu_mask)) { 570 resctrl_offline_domain(r, d); 571 list_del_rcu(&d->list); 572 synchronize_rcu(); 573 574 /* 575 * rdt_domain "d" is going to be freed below, so clear 576 * its pointer from pseudo_lock_region struct. 577 */ 578 if (d->plr) 579 d->plr->d = NULL; 580 domain_free(hw_dom); 581 582 return; 583 } 584 } 585 586 static void clear_closid_rmid(int cpu) 587 { 588 struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); 589 590 state->default_closid = RESCTRL_RESERVED_CLOSID; 591 state->default_rmid = RESCTRL_RESERVED_RMID; 592 state->cur_closid = RESCTRL_RESERVED_CLOSID; 593 state->cur_rmid = RESCTRL_RESERVED_RMID; 594 wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID, 595 RESCTRL_RESERVED_CLOSID); 596 } 597 598 static int resctrl_arch_online_cpu(unsigned int cpu) 599 { 600 struct rdt_resource *r; 601 602 mutex_lock(&domain_list_lock); 603 for_each_capable_rdt_resource(r) 604 domain_add_cpu(cpu, r); 605 mutex_unlock(&domain_list_lock); 606 607 clear_closid_rmid(cpu); 608 resctrl_online_cpu(cpu); 609 610 return 0; 611 } 612 613 static int resctrl_arch_offline_cpu(unsigned int cpu) 614 { 615 struct rdt_resource *r; 616 617 resctrl_offline_cpu(cpu); 618 619 mutex_lock(&domain_list_lock); 620 for_each_capable_rdt_resource(r) 621 domain_remove_cpu(cpu, r); 622 mutex_unlock(&domain_list_lock); 623 624 clear_closid_rmid(cpu); 625 626 return 0; 627 } 628 629 /* 630 * Choose a width for the resource name and resource data based on the 631 * resource that has widest name and cbm. 632 */ 633 static __init void rdt_init_padding(void) 634 { 635 struct rdt_resource *r; 636 637 for_each_alloc_capable_rdt_resource(r) { 638 if (r->data_width > max_data_width) 639 max_data_width = r->data_width; 640 } 641 } 642 643 enum { 644 RDT_FLAG_CMT, 645 RDT_FLAG_MBM_TOTAL, 646 RDT_FLAG_MBM_LOCAL, 647 RDT_FLAG_L3_CAT, 648 RDT_FLAG_L3_CDP, 649 RDT_FLAG_L2_CAT, 650 RDT_FLAG_L2_CDP, 651 RDT_FLAG_MBA, 652 RDT_FLAG_SMBA, 653 RDT_FLAG_BMEC, 654 }; 655 656 #define RDT_OPT(idx, n, f) \ 657 [idx] = { \ 658 .name = n, \ 659 .flag = f \ 660 } 661 662 struct rdt_options { 663 char *name; 664 int flag; 665 bool force_off, force_on; 666 }; 667 668 static struct rdt_options rdt_options[] __initdata = { 669 RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC), 670 RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL), 671 RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL), 672 RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), 673 RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), 674 RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), 675 RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2), 676 RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), 677 RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), 678 RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), 679 }; 680 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) 681 682 static int __init set_rdt_options(char *str) 683 { 684 struct rdt_options *o; 685 bool force_off; 686 char *tok; 687 688 if (*str == '=') 689 str++; 690 while ((tok = strsep(&str, ",")) != NULL) { 691 force_off = *tok == '!'; 692 if (force_off) 693 tok++; 694 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 695 if (strcmp(tok, o->name) == 0) { 696 if (force_off) 697 o->force_off = true; 698 else 699 o->force_on = true; 700 break; 701 } 702 } 703 } 704 return 1; 705 } 706 __setup("rdt", set_rdt_options); 707 708 bool __init rdt_cpu_has(int flag) 709 { 710 bool ret = boot_cpu_has(flag); 711 struct rdt_options *o; 712 713 if (!ret) 714 return ret; 715 716 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 717 if (flag == o->flag) { 718 if (o->force_off) 719 ret = false; 720 if (o->force_on) 721 ret = true; 722 break; 723 } 724 } 725 return ret; 726 } 727 728 static __init bool get_mem_config(void) 729 { 730 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; 731 732 if (!rdt_cpu_has(X86_FEATURE_MBA)) 733 return false; 734 735 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 736 return __get_mem_config_intel(&hw_res->r_resctrl); 737 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 738 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 739 740 return false; 741 } 742 743 static __init bool get_slow_mem_config(void) 744 { 745 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA]; 746 747 if (!rdt_cpu_has(X86_FEATURE_SMBA)) 748 return false; 749 750 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 751 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 752 753 return false; 754 } 755 756 static __init bool get_rdt_alloc_resources(void) 757 { 758 struct rdt_resource *r; 759 bool ret = false; 760 761 if (rdt_alloc_capable) 762 return true; 763 764 if (!boot_cpu_has(X86_FEATURE_RDT_A)) 765 return false; 766 767 if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { 768 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 769 rdt_get_cache_alloc_cfg(1, r); 770 if (rdt_cpu_has(X86_FEATURE_CDP_L3)) 771 rdt_get_cdp_l3_config(); 772 ret = true; 773 } 774 if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { 775 /* CPUID 0x10.2 fields are same format at 0x10.1 */ 776 r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl; 777 rdt_get_cache_alloc_cfg(2, r); 778 if (rdt_cpu_has(X86_FEATURE_CDP_L2)) 779 rdt_get_cdp_l2_config(); 780 ret = true; 781 } 782 783 if (get_mem_config()) 784 ret = true; 785 786 if (get_slow_mem_config()) 787 ret = true; 788 789 return ret; 790 } 791 792 static __init bool get_rdt_mon_resources(void) 793 { 794 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 795 796 if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) 797 rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); 798 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) 799 rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID); 800 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) 801 rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID); 802 803 if (!rdt_mon_features) 804 return false; 805 806 return !rdt_get_mon_l3_config(r); 807 } 808 809 static __init void __check_quirks_intel(void) 810 { 811 switch (boot_cpu_data.x86_vfm) { 812 case INTEL_HASWELL_X: 813 if (!rdt_options[RDT_FLAG_L3_CAT].force_off) 814 cache_alloc_hsw_probe(); 815 break; 816 case INTEL_SKYLAKE_X: 817 if (boot_cpu_data.x86_stepping <= 4) 818 set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); 819 else 820 set_rdt_options("!l3cat"); 821 fallthrough; 822 case INTEL_BROADWELL_X: 823 intel_rdt_mbm_apply_quirk(); 824 break; 825 } 826 } 827 828 static __init void check_quirks(void) 829 { 830 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 831 __check_quirks_intel(); 832 } 833 834 static __init bool get_rdt_resources(void) 835 { 836 rdt_alloc_capable = get_rdt_alloc_resources(); 837 rdt_mon_capable = get_rdt_mon_resources(); 838 839 return (rdt_mon_capable || rdt_alloc_capable); 840 } 841 842 static __init void rdt_init_res_defs_intel(void) 843 { 844 struct rdt_hw_resource *hw_res; 845 struct rdt_resource *r; 846 847 for_each_rdt_resource(r) { 848 hw_res = resctrl_to_arch_res(r); 849 850 if (r->rid == RDT_RESOURCE_L3 || 851 r->rid == RDT_RESOURCE_L2) { 852 r->cache.arch_has_per_cpu_cfg = false; 853 r->cache.min_cbm_bits = 1; 854 } else if (r->rid == RDT_RESOURCE_MBA) { 855 hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE; 856 hw_res->msr_update = mba_wrmsr_intel; 857 } 858 } 859 } 860 861 static __init void rdt_init_res_defs_amd(void) 862 { 863 struct rdt_hw_resource *hw_res; 864 struct rdt_resource *r; 865 866 for_each_rdt_resource(r) { 867 hw_res = resctrl_to_arch_res(r); 868 869 if (r->rid == RDT_RESOURCE_L3 || 870 r->rid == RDT_RESOURCE_L2) { 871 r->cache.arch_has_sparse_bitmasks = true; 872 r->cache.arch_has_per_cpu_cfg = true; 873 r->cache.min_cbm_bits = 0; 874 } else if (r->rid == RDT_RESOURCE_MBA) { 875 hw_res->msr_base = MSR_IA32_MBA_BW_BASE; 876 hw_res->msr_update = mba_wrmsr_amd; 877 } else if (r->rid == RDT_RESOURCE_SMBA) { 878 hw_res->msr_base = MSR_IA32_SMBA_BW_BASE; 879 hw_res->msr_update = mba_wrmsr_amd; 880 } 881 } 882 } 883 884 static __init void rdt_init_res_defs(void) 885 { 886 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 887 rdt_init_res_defs_intel(); 888 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 889 rdt_init_res_defs_amd(); 890 } 891 892 static enum cpuhp_state rdt_online; 893 894 /* Runs once on the BSP during boot. */ 895 void resctrl_cpu_detect(struct cpuinfo_x86 *c) 896 { 897 if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { 898 c->x86_cache_max_rmid = -1; 899 c->x86_cache_occ_scale = -1; 900 c->x86_cache_mbm_width_offset = -1; 901 return; 902 } 903 904 /* will be overridden if occupancy monitoring exists */ 905 c->x86_cache_max_rmid = cpuid_ebx(0xf); 906 907 if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || 908 cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || 909 cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { 910 u32 eax, ebx, ecx, edx; 911 912 /* QoS sub-leaf, EAX=0Fh, ECX=1 */ 913 cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); 914 915 c->x86_cache_max_rmid = ecx; 916 c->x86_cache_occ_scale = ebx; 917 c->x86_cache_mbm_width_offset = eax & 0xff; 918 919 if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset) 920 c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; 921 } 922 } 923 924 static int __init resctrl_late_init(void) 925 { 926 struct rdt_resource *r; 927 int state, ret; 928 929 /* 930 * Initialize functions(or definitions) that are different 931 * between vendors here. 932 */ 933 rdt_init_res_defs(); 934 935 check_quirks(); 936 937 if (!get_rdt_resources()) 938 return -ENODEV; 939 940 rdt_init_padding(); 941 942 state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, 943 "x86/resctrl/cat:online:", 944 resctrl_arch_online_cpu, 945 resctrl_arch_offline_cpu); 946 if (state < 0) 947 return state; 948 949 ret = rdtgroup_init(); 950 if (ret) { 951 cpuhp_remove_state(state); 952 return ret; 953 } 954 rdt_online = state; 955 956 for_each_alloc_capable_rdt_resource(r) 957 pr_info("%s allocation detected\n", r->name); 958 959 for_each_mon_capable_rdt_resource(r) 960 pr_info("%s monitoring detected\n", r->name); 961 962 return 0; 963 } 964 965 late_initcall(resctrl_late_init); 966 967 static void __exit resctrl_exit(void) 968 { 969 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 970 971 cpuhp_remove_state(rdt_online); 972 973 rdtgroup_exit(); 974 975 if (r->mon_capable) 976 rdt_put_mon_l3_config(); 977 } 978 979 __exitcall(resctrl_exit); 980