1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Cache Allocation code. 5 * 6 * Copyright (C) 2016 Intel Corporation 7 * 8 * Authors: 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Tony Luck <tony.luck@intel.com> 11 * Vikas Shivappa <vikas.shivappa@intel.com> 12 * 13 * More information about RDT be found in the Intel (R) x86 Architecture 14 * Software Developer Manual June 2016, volume 3, section 17.17. 15 */ 16 17 #define pr_fmt(fmt) "resctrl: " fmt 18 19 #include <linux/cpu.h> 20 #include <linux/slab.h> 21 #include <linux/err.h> 22 #include <linux/cpuhotplug.h> 23 24 #include <asm/cpu_device_id.h> 25 #include <asm/msr.h> 26 #include <asm/resctrl.h> 27 #include "internal.h" 28 29 /* 30 * rdt_domain structures are kfree()d when their last CPU goes offline, 31 * and allocated when the first CPU in a new domain comes online. 32 * The rdt_resource's domain list is updated when this happens. Readers of 33 * the domain list must either take cpus_read_lock(), or rely on an RCU 34 * read-side critical section, to avoid observing concurrent modification. 35 * All writers take this mutex: 36 */ 37 static DEFINE_MUTEX(domain_list_lock); 38 39 /* 40 * The cached resctrl_pqr_state is strictly per CPU and can never be 41 * updated from a remote CPU. Functions which modify the state 42 * are called with interrupts disabled and no preemption, which 43 * is sufficient for the protection. 44 */ 45 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); 46 47 /* 48 * Global boolean for rdt_alloc which is true if any 49 * resource allocation is enabled. 50 */ 51 bool rdt_alloc_capable; 52 53 static void mba_wrmsr_intel(struct msr_param *m); 54 static void cat_wrmsr(struct msr_param *m); 55 static void mba_wrmsr_amd(struct msr_param *m); 56 57 #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) 58 #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains) 59 60 struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { 61 [RDT_RESOURCE_L3] = 62 { 63 .r_resctrl = { 64 .rid = RDT_RESOURCE_L3, 65 .name = "L3", 66 .ctrl_scope = RESCTRL_L3_CACHE, 67 .mon_scope = RESCTRL_L3_CACHE, 68 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), 69 .mon_domains = mon_domain_init(RDT_RESOURCE_L3), 70 .schema_fmt = RESCTRL_SCHEMA_BITMAP, 71 }, 72 .msr_base = MSR_IA32_L3_CBM_BASE, 73 .msr_update = cat_wrmsr, 74 }, 75 [RDT_RESOURCE_L2] = 76 { 77 .r_resctrl = { 78 .rid = RDT_RESOURCE_L2, 79 .name = "L2", 80 .ctrl_scope = RESCTRL_L2_CACHE, 81 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), 82 .schema_fmt = RESCTRL_SCHEMA_BITMAP, 83 }, 84 .msr_base = MSR_IA32_L2_CBM_BASE, 85 .msr_update = cat_wrmsr, 86 }, 87 [RDT_RESOURCE_MBA] = 88 { 89 .r_resctrl = { 90 .rid = RDT_RESOURCE_MBA, 91 .name = "MB", 92 .ctrl_scope = RESCTRL_L3_CACHE, 93 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), 94 .schema_fmt = RESCTRL_SCHEMA_RANGE, 95 }, 96 }, 97 [RDT_RESOURCE_SMBA] = 98 { 99 .r_resctrl = { 100 .rid = RDT_RESOURCE_SMBA, 101 .name = "SMBA", 102 .ctrl_scope = RESCTRL_L3_CACHE, 103 .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), 104 .schema_fmt = RESCTRL_SCHEMA_RANGE, 105 }, 106 }, 107 }; 108 109 u32 resctrl_arch_system_num_rmid_idx(void) 110 { 111 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 112 113 /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */ 114 return r->num_rmid; 115 } 116 117 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) 118 { 119 if (l >= RDT_NUM_RESOURCES) 120 return NULL; 121 122 return &rdt_resources_all[l].r_resctrl; 123 } 124 125 /* 126 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs 127 * as they do not have CPUID enumeration support for Cache allocation. 128 * The check for Vendor/Family/Model is not enough to guarantee that 129 * the MSRs won't #GP fault because only the following SKUs support 130 * CAT: 131 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz 132 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz 133 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz 134 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz 135 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz 136 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz 137 * 138 * Probe by trying to write the first of the L3 cache mask registers 139 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length 140 * is always 20 on hsw server parts. The minimum cache bitmask length 141 * allowed for HSW server is always 2 bits. Hardcode all of them. 142 */ 143 static inline void cache_alloc_hsw_probe(void) 144 { 145 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3]; 146 struct rdt_resource *r = &hw_res->r_resctrl; 147 u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0; 148 149 if (wrmsrq_safe(MSR_IA32_L3_CBM_BASE, max_cbm)) 150 return; 151 152 rdmsrq(MSR_IA32_L3_CBM_BASE, l3_cbm_0); 153 154 /* If all the bits were set in MSR, return success */ 155 if (l3_cbm_0 != max_cbm) 156 return; 157 158 hw_res->num_closid = 4; 159 r->cache.cbm_len = 20; 160 r->cache.shareable_bits = 0xc0000; 161 r->cache.min_cbm_bits = 2; 162 r->cache.arch_has_sparse_bitmasks = false; 163 r->alloc_capable = true; 164 165 rdt_alloc_capable = true; 166 } 167 168 bool is_mba_sc(struct rdt_resource *r) 169 { 170 if (!r) 171 r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); 172 173 /* 174 * The software controller support is only applicable to MBA resource. 175 * Make sure to check for resource type. 176 */ 177 if (r->rid != RDT_RESOURCE_MBA) 178 return false; 179 180 return r->membw.mba_sc; 181 } 182 183 /* 184 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values 185 * exposed to user interface and the h/w understandable delay values. 186 * 187 * The non-linear delay values have the granularity of power of two 188 * and also the h/w does not guarantee a curve for configured delay 189 * values vs. actual b/w enforced. 190 * Hence we need a mapping that is pre calibrated so the user can 191 * express the memory b/w as a percentage value. 192 */ 193 static inline bool rdt_get_mb_table(struct rdt_resource *r) 194 { 195 /* 196 * There are no Intel SKUs as of now to support non-linear delay. 197 */ 198 pr_info("MBA b/w map not implemented for cpu:%d, model:%d", 199 boot_cpu_data.x86, boot_cpu_data.x86_model); 200 201 return false; 202 } 203 204 static __init bool __get_mem_config_intel(struct rdt_resource *r) 205 { 206 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 207 union cpuid_0x10_3_eax eax; 208 union cpuid_0x10_x_edx edx; 209 u32 ebx, ecx, max_delay; 210 211 cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); 212 hw_res->num_closid = edx.split.cos_max + 1; 213 max_delay = eax.split.max_delay + 1; 214 r->membw.max_bw = MAX_MBA_BW; 215 r->membw.arch_needs_linear = true; 216 if (ecx & MBA_IS_LINEAR) { 217 r->membw.delay_linear = true; 218 r->membw.min_bw = MAX_MBA_BW - max_delay; 219 r->membw.bw_gran = MAX_MBA_BW - max_delay; 220 } else { 221 if (!rdt_get_mb_table(r)) 222 return false; 223 r->membw.arch_needs_linear = false; 224 } 225 226 if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) 227 r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; 228 else 229 r->membw.throttle_mode = THREAD_THROTTLE_MAX; 230 231 r->alloc_capable = true; 232 233 return true; 234 } 235 236 static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) 237 { 238 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 239 u32 eax, ebx, ecx, edx, subleaf; 240 241 /* 242 * Query CPUID_Fn80000020_EDX_x01 for MBA and 243 * CPUID_Fn80000020_EDX_x02 for SMBA 244 */ 245 subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1; 246 247 cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); 248 hw_res->num_closid = edx + 1; 249 r->membw.max_bw = 1 << eax; 250 251 /* AMD does not use delay */ 252 r->membw.delay_linear = false; 253 r->membw.arch_needs_linear = false; 254 255 /* 256 * AMD does not use memory delay throttle model to control 257 * the allocation like Intel does. 258 */ 259 r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; 260 r->membw.min_bw = 0; 261 r->membw.bw_gran = 1; 262 263 r->alloc_capable = true; 264 265 return true; 266 } 267 268 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) 269 { 270 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 271 union cpuid_0x10_1_eax eax; 272 union cpuid_0x10_x_ecx ecx; 273 union cpuid_0x10_x_edx edx; 274 u32 ebx, default_ctrl; 275 276 cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full); 277 hw_res->num_closid = edx.split.cos_max + 1; 278 r->cache.cbm_len = eax.split.cbm_len + 1; 279 default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; 280 r->cache.shareable_bits = ebx & default_ctrl; 281 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 282 r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; 283 r->alloc_capable = true; 284 } 285 286 static void rdt_get_cdp_config(int level) 287 { 288 /* 289 * By default, CDP is disabled. CDP can be enabled by mount parameter 290 * "cdp" during resctrl file system mount time. 291 */ 292 rdt_resources_all[level].cdp_enabled = false; 293 rdt_resources_all[level].r_resctrl.cdp_capable = true; 294 } 295 296 static void rdt_get_cdp_l3_config(void) 297 { 298 rdt_get_cdp_config(RDT_RESOURCE_L3); 299 } 300 301 static void rdt_get_cdp_l2_config(void) 302 { 303 rdt_get_cdp_config(RDT_RESOURCE_L2); 304 } 305 306 static void mba_wrmsr_amd(struct msr_param *m) 307 { 308 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); 309 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 310 unsigned int i; 311 312 for (i = m->low; i < m->high; i++) 313 wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 314 } 315 316 /* 317 * Map the memory b/w percentage value to delay values 318 * that can be written to QOS_MSRs. 319 * There are currently no SKUs which support non linear delay values. 320 */ 321 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) 322 { 323 if (r->membw.delay_linear) 324 return MAX_MBA_BW - bw; 325 326 pr_warn_once("Non Linear delay-bw map not supported but queried\n"); 327 return MAX_MBA_BW; 328 } 329 330 static void mba_wrmsr_intel(struct msr_param *m) 331 { 332 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); 333 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 334 unsigned int i; 335 336 /* Write the delay values for mba. */ 337 for (i = m->low; i < m->high; i++) 338 wrmsrq(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res)); 339 } 340 341 static void cat_wrmsr(struct msr_param *m) 342 { 343 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); 344 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); 345 unsigned int i; 346 347 for (i = m->low; i < m->high; i++) 348 wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]); 349 } 350 351 u32 resctrl_arch_get_num_closid(struct rdt_resource *r) 352 { 353 return resctrl_to_arch_res(r)->num_closid; 354 } 355 356 void rdt_ctrl_update(void *arg) 357 { 358 struct rdt_hw_resource *hw_res; 359 struct msr_param *m = arg; 360 361 hw_res = resctrl_to_arch_res(m->res); 362 hw_res->msr_update(m); 363 } 364 365 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) 366 { 367 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 368 int i; 369 370 /* 371 * Initialize the Control MSRs to having no control. 372 * For Cache Allocation: Set all bits in cbm 373 * For Memory Allocation: Set b/w requested to 100% 374 */ 375 for (i = 0; i < hw_res->num_closid; i++, dc++) 376 *dc = resctrl_get_default_ctrl(r); 377 } 378 379 static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) 380 { 381 kfree(hw_dom->ctrl_val); 382 kfree(hw_dom); 383 } 384 385 static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom) 386 { 387 kfree(hw_dom->arch_mbm_total); 388 kfree(hw_dom->arch_mbm_local); 389 kfree(hw_dom); 390 } 391 392 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d) 393 { 394 struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); 395 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 396 struct msr_param m; 397 u32 *dc; 398 399 dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val), 400 GFP_KERNEL); 401 if (!dc) 402 return -ENOMEM; 403 404 hw_dom->ctrl_val = dc; 405 setup_default_ctrlval(r, dc); 406 407 m.res = r; 408 m.dom = d; 409 m.low = 0; 410 m.high = hw_res->num_closid; 411 hw_res->msr_update(&m); 412 return 0; 413 } 414 415 /** 416 * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters 417 * @num_rmid: The size of the MBM counter array 418 * @hw_dom: The domain that owns the allocated arrays 419 */ 420 static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) 421 { 422 size_t tsize; 423 424 if (resctrl_arch_is_mbm_total_enabled()) { 425 tsize = sizeof(*hw_dom->arch_mbm_total); 426 hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); 427 if (!hw_dom->arch_mbm_total) 428 return -ENOMEM; 429 } 430 if (resctrl_arch_is_mbm_local_enabled()) { 431 tsize = sizeof(*hw_dom->arch_mbm_local); 432 hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); 433 if (!hw_dom->arch_mbm_local) { 434 kfree(hw_dom->arch_mbm_total); 435 hw_dom->arch_mbm_total = NULL; 436 return -ENOMEM; 437 } 438 } 439 440 return 0; 441 } 442 443 static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) 444 { 445 switch (scope) { 446 case RESCTRL_L2_CACHE: 447 case RESCTRL_L3_CACHE: 448 return get_cpu_cacheinfo_id(cpu, scope); 449 case RESCTRL_L3_NODE: 450 return cpu_to_node(cpu); 451 default: 452 break; 453 } 454 455 return -EINVAL; 456 } 457 458 static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) 459 { 460 int id = get_domain_id_from_scope(cpu, r->ctrl_scope); 461 struct rdt_hw_ctrl_domain *hw_dom; 462 struct list_head *add_pos = NULL; 463 struct rdt_domain_hdr *hdr; 464 struct rdt_ctrl_domain *d; 465 int err; 466 467 lockdep_assert_held(&domain_list_lock); 468 469 if (id < 0) { 470 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", 471 cpu, r->ctrl_scope, r->name); 472 return; 473 } 474 475 hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos); 476 if (hdr) { 477 if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) 478 return; 479 d = container_of(hdr, struct rdt_ctrl_domain, hdr); 480 481 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 482 if (r->cache.arch_has_per_cpu_cfg) 483 rdt_domain_reconfigure_cdp(r); 484 return; 485 } 486 487 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); 488 if (!hw_dom) 489 return; 490 491 d = &hw_dom->d_resctrl; 492 d->hdr.id = id; 493 d->hdr.type = RESCTRL_CTRL_DOMAIN; 494 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 495 496 rdt_domain_reconfigure_cdp(r); 497 498 if (domain_setup_ctrlval(r, d)) { 499 ctrl_domain_free(hw_dom); 500 return; 501 } 502 503 list_add_tail_rcu(&d->hdr.list, add_pos); 504 505 err = resctrl_online_ctrl_domain(r, d); 506 if (err) { 507 list_del_rcu(&d->hdr.list); 508 synchronize_rcu(); 509 ctrl_domain_free(hw_dom); 510 } 511 } 512 513 static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) 514 { 515 int id = get_domain_id_from_scope(cpu, r->mon_scope); 516 struct list_head *add_pos = NULL; 517 struct rdt_hw_mon_domain *hw_dom; 518 struct rdt_domain_hdr *hdr; 519 struct rdt_mon_domain *d; 520 int err; 521 522 lockdep_assert_held(&domain_list_lock); 523 524 if (id < 0) { 525 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", 526 cpu, r->mon_scope, r->name); 527 return; 528 } 529 530 hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos); 531 if (hdr) { 532 if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) 533 return; 534 d = container_of(hdr, struct rdt_mon_domain, hdr); 535 536 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 537 return; 538 } 539 540 hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); 541 if (!hw_dom) 542 return; 543 544 d = &hw_dom->d_resctrl; 545 d->hdr.id = id; 546 d->hdr.type = RESCTRL_MON_DOMAIN; 547 d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); 548 if (!d->ci) { 549 pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); 550 mon_domain_free(hw_dom); 551 return; 552 } 553 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 554 555 arch_mon_domain_online(r, d); 556 557 if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { 558 mon_domain_free(hw_dom); 559 return; 560 } 561 562 list_add_tail_rcu(&d->hdr.list, add_pos); 563 564 err = resctrl_online_mon_domain(r, d); 565 if (err) { 566 list_del_rcu(&d->hdr.list); 567 synchronize_rcu(); 568 mon_domain_free(hw_dom); 569 } 570 } 571 572 static void domain_add_cpu(int cpu, struct rdt_resource *r) 573 { 574 if (r->alloc_capable) 575 domain_add_cpu_ctrl(cpu, r); 576 if (r->mon_capable) 577 domain_add_cpu_mon(cpu, r); 578 } 579 580 static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) 581 { 582 int id = get_domain_id_from_scope(cpu, r->ctrl_scope); 583 struct rdt_hw_ctrl_domain *hw_dom; 584 struct rdt_domain_hdr *hdr; 585 struct rdt_ctrl_domain *d; 586 587 lockdep_assert_held(&domain_list_lock); 588 589 if (id < 0) { 590 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", 591 cpu, r->ctrl_scope, r->name); 592 return; 593 } 594 595 hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL); 596 if (!hdr) { 597 pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n", 598 id, cpu, r->name); 599 return; 600 } 601 602 if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) 603 return; 604 605 d = container_of(hdr, struct rdt_ctrl_domain, hdr); 606 hw_dom = resctrl_to_arch_ctrl_dom(d); 607 608 cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); 609 if (cpumask_empty(&d->hdr.cpu_mask)) { 610 resctrl_offline_ctrl_domain(r, d); 611 list_del_rcu(&d->hdr.list); 612 synchronize_rcu(); 613 614 /* 615 * rdt_ctrl_domain "d" is going to be freed below, so clear 616 * its pointer from pseudo_lock_region struct. 617 */ 618 if (d->plr) 619 d->plr->d = NULL; 620 ctrl_domain_free(hw_dom); 621 622 return; 623 } 624 } 625 626 static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) 627 { 628 int id = get_domain_id_from_scope(cpu, r->mon_scope); 629 struct rdt_hw_mon_domain *hw_dom; 630 struct rdt_domain_hdr *hdr; 631 struct rdt_mon_domain *d; 632 633 lockdep_assert_held(&domain_list_lock); 634 635 if (id < 0) { 636 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", 637 cpu, r->mon_scope, r->name); 638 return; 639 } 640 641 hdr = resctrl_find_domain(&r->mon_domains, id, NULL); 642 if (!hdr) { 643 pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n", 644 id, cpu, r->name); 645 return; 646 } 647 648 if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) 649 return; 650 651 d = container_of(hdr, struct rdt_mon_domain, hdr); 652 hw_dom = resctrl_to_arch_mon_dom(d); 653 654 cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); 655 if (cpumask_empty(&d->hdr.cpu_mask)) { 656 resctrl_offline_mon_domain(r, d); 657 list_del_rcu(&d->hdr.list); 658 synchronize_rcu(); 659 mon_domain_free(hw_dom); 660 661 return; 662 } 663 } 664 665 static void domain_remove_cpu(int cpu, struct rdt_resource *r) 666 { 667 if (r->alloc_capable) 668 domain_remove_cpu_ctrl(cpu, r); 669 if (r->mon_capable) 670 domain_remove_cpu_mon(cpu, r); 671 } 672 673 static void clear_closid_rmid(int cpu) 674 { 675 struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); 676 677 state->default_closid = RESCTRL_RESERVED_CLOSID; 678 state->default_rmid = RESCTRL_RESERVED_RMID; 679 state->cur_closid = RESCTRL_RESERVED_CLOSID; 680 state->cur_rmid = RESCTRL_RESERVED_RMID; 681 wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID, 682 RESCTRL_RESERVED_CLOSID); 683 } 684 685 static int resctrl_arch_online_cpu(unsigned int cpu) 686 { 687 struct rdt_resource *r; 688 689 mutex_lock(&domain_list_lock); 690 for_each_capable_rdt_resource(r) 691 domain_add_cpu(cpu, r); 692 mutex_unlock(&domain_list_lock); 693 694 clear_closid_rmid(cpu); 695 resctrl_online_cpu(cpu); 696 697 return 0; 698 } 699 700 static int resctrl_arch_offline_cpu(unsigned int cpu) 701 { 702 struct rdt_resource *r; 703 704 resctrl_offline_cpu(cpu); 705 706 mutex_lock(&domain_list_lock); 707 for_each_capable_rdt_resource(r) 708 domain_remove_cpu(cpu, r); 709 mutex_unlock(&domain_list_lock); 710 711 clear_closid_rmid(cpu); 712 713 return 0; 714 } 715 716 enum { 717 RDT_FLAG_CMT, 718 RDT_FLAG_MBM_TOTAL, 719 RDT_FLAG_MBM_LOCAL, 720 RDT_FLAG_L3_CAT, 721 RDT_FLAG_L3_CDP, 722 RDT_FLAG_L2_CAT, 723 RDT_FLAG_L2_CDP, 724 RDT_FLAG_MBA, 725 RDT_FLAG_SMBA, 726 RDT_FLAG_BMEC, 727 }; 728 729 #define RDT_OPT(idx, n, f) \ 730 [idx] = { \ 731 .name = n, \ 732 .flag = f \ 733 } 734 735 struct rdt_options { 736 char *name; 737 int flag; 738 bool force_off, force_on; 739 }; 740 741 static struct rdt_options rdt_options[] __initdata = { 742 RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC), 743 RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL), 744 RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL), 745 RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), 746 RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), 747 RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), 748 RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2), 749 RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), 750 RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), 751 RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), 752 }; 753 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) 754 755 static int __init set_rdt_options(char *str) 756 { 757 struct rdt_options *o; 758 bool force_off; 759 char *tok; 760 761 if (*str == '=') 762 str++; 763 while ((tok = strsep(&str, ",")) != NULL) { 764 force_off = *tok == '!'; 765 if (force_off) 766 tok++; 767 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 768 if (strcmp(tok, o->name) == 0) { 769 if (force_off) 770 o->force_off = true; 771 else 772 o->force_on = true; 773 break; 774 } 775 } 776 } 777 return 1; 778 } 779 __setup("rdt", set_rdt_options); 780 781 bool __init rdt_cpu_has(int flag) 782 { 783 bool ret = boot_cpu_has(flag); 784 struct rdt_options *o; 785 786 if (!ret) 787 return ret; 788 789 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { 790 if (flag == o->flag) { 791 if (o->force_off) 792 ret = false; 793 if (o->force_on) 794 ret = true; 795 break; 796 } 797 } 798 return ret; 799 } 800 801 __init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) 802 { 803 if (!rdt_cpu_has(X86_FEATURE_BMEC)) 804 return false; 805 806 switch (evt) { 807 case QOS_L3_MBM_TOTAL_EVENT_ID: 808 return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL); 809 case QOS_L3_MBM_LOCAL_EVENT_ID: 810 return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL); 811 default: 812 return false; 813 } 814 } 815 816 static __init bool get_mem_config(void) 817 { 818 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; 819 820 if (!rdt_cpu_has(X86_FEATURE_MBA)) 821 return false; 822 823 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 824 return __get_mem_config_intel(&hw_res->r_resctrl); 825 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 826 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 827 828 return false; 829 } 830 831 static __init bool get_slow_mem_config(void) 832 { 833 struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA]; 834 835 if (!rdt_cpu_has(X86_FEATURE_SMBA)) 836 return false; 837 838 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 839 return __rdt_get_mem_config_amd(&hw_res->r_resctrl); 840 841 return false; 842 } 843 844 static __init bool get_rdt_alloc_resources(void) 845 { 846 struct rdt_resource *r; 847 bool ret = false; 848 849 if (rdt_alloc_capable) 850 return true; 851 852 if (!boot_cpu_has(X86_FEATURE_RDT_A)) 853 return false; 854 855 if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { 856 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 857 rdt_get_cache_alloc_cfg(1, r); 858 if (rdt_cpu_has(X86_FEATURE_CDP_L3)) 859 rdt_get_cdp_l3_config(); 860 ret = true; 861 } 862 if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { 863 /* CPUID 0x10.2 fields are same format at 0x10.1 */ 864 r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl; 865 rdt_get_cache_alloc_cfg(2, r); 866 if (rdt_cpu_has(X86_FEATURE_CDP_L2)) 867 rdt_get_cdp_l2_config(); 868 ret = true; 869 } 870 871 if (get_mem_config()) 872 ret = true; 873 874 if (get_slow_mem_config()) 875 ret = true; 876 877 return ret; 878 } 879 880 static __init bool get_rdt_mon_resources(void) 881 { 882 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; 883 884 if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) 885 rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); 886 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) 887 rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID); 888 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) 889 rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID); 890 891 if (!rdt_mon_features) 892 return false; 893 894 return !rdt_get_mon_l3_config(r); 895 } 896 897 static __init void __check_quirks_intel(void) 898 { 899 switch (boot_cpu_data.x86_vfm) { 900 case INTEL_HASWELL_X: 901 if (!rdt_options[RDT_FLAG_L3_CAT].force_off) 902 cache_alloc_hsw_probe(); 903 break; 904 case INTEL_SKYLAKE_X: 905 if (boot_cpu_data.x86_stepping <= 4) 906 set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); 907 else 908 set_rdt_options("!l3cat"); 909 fallthrough; 910 case INTEL_BROADWELL_X: 911 intel_rdt_mbm_apply_quirk(); 912 break; 913 } 914 } 915 916 static __init void check_quirks(void) 917 { 918 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 919 __check_quirks_intel(); 920 } 921 922 static __init bool get_rdt_resources(void) 923 { 924 rdt_alloc_capable = get_rdt_alloc_resources(); 925 rdt_mon_capable = get_rdt_mon_resources(); 926 927 return (rdt_mon_capable || rdt_alloc_capable); 928 } 929 930 static __init void rdt_init_res_defs_intel(void) 931 { 932 struct rdt_hw_resource *hw_res; 933 struct rdt_resource *r; 934 935 for_each_rdt_resource(r) { 936 hw_res = resctrl_to_arch_res(r); 937 938 if (r->rid == RDT_RESOURCE_L3 || 939 r->rid == RDT_RESOURCE_L2) { 940 r->cache.arch_has_per_cpu_cfg = false; 941 r->cache.min_cbm_bits = 1; 942 } else if (r->rid == RDT_RESOURCE_MBA) { 943 hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE; 944 hw_res->msr_update = mba_wrmsr_intel; 945 } 946 } 947 } 948 949 static __init void rdt_init_res_defs_amd(void) 950 { 951 struct rdt_hw_resource *hw_res; 952 struct rdt_resource *r; 953 954 for_each_rdt_resource(r) { 955 hw_res = resctrl_to_arch_res(r); 956 957 if (r->rid == RDT_RESOURCE_L3 || 958 r->rid == RDT_RESOURCE_L2) { 959 r->cache.arch_has_sparse_bitmasks = true; 960 r->cache.arch_has_per_cpu_cfg = true; 961 r->cache.min_cbm_bits = 0; 962 } else if (r->rid == RDT_RESOURCE_MBA) { 963 hw_res->msr_base = MSR_IA32_MBA_BW_BASE; 964 hw_res->msr_update = mba_wrmsr_amd; 965 } else if (r->rid == RDT_RESOURCE_SMBA) { 966 hw_res->msr_base = MSR_IA32_SMBA_BW_BASE; 967 hw_res->msr_update = mba_wrmsr_amd; 968 } 969 } 970 } 971 972 static __init void rdt_init_res_defs(void) 973 { 974 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 975 rdt_init_res_defs_intel(); 976 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 977 rdt_init_res_defs_amd(); 978 } 979 980 static enum cpuhp_state rdt_online; 981 982 /* Runs once on the BSP during boot. */ 983 void resctrl_cpu_detect(struct cpuinfo_x86 *c) 984 { 985 if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { 986 c->x86_cache_max_rmid = -1; 987 c->x86_cache_occ_scale = -1; 988 c->x86_cache_mbm_width_offset = -1; 989 return; 990 } 991 992 /* will be overridden if occupancy monitoring exists */ 993 c->x86_cache_max_rmid = cpuid_ebx(0xf); 994 995 if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || 996 cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || 997 cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { 998 u32 eax, ebx, ecx, edx; 999 1000 /* QoS sub-leaf, EAX=0Fh, ECX=1 */ 1001 cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); 1002 1003 c->x86_cache_max_rmid = ecx; 1004 c->x86_cache_occ_scale = ebx; 1005 c->x86_cache_mbm_width_offset = eax & 0xff; 1006 1007 if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset) 1008 c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; 1009 } 1010 } 1011 1012 static int __init resctrl_arch_late_init(void) 1013 { 1014 struct rdt_resource *r; 1015 int state, ret; 1016 1017 /* 1018 * Initialize functions(or definitions) that are different 1019 * between vendors here. 1020 */ 1021 rdt_init_res_defs(); 1022 1023 check_quirks(); 1024 1025 if (!get_rdt_resources()) 1026 return -ENODEV; 1027 1028 state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, 1029 "x86/resctrl/cat:online:", 1030 resctrl_arch_online_cpu, 1031 resctrl_arch_offline_cpu); 1032 if (state < 0) 1033 return state; 1034 1035 ret = resctrl_init(); 1036 if (ret) { 1037 cpuhp_remove_state(state); 1038 return ret; 1039 } 1040 rdt_online = state; 1041 1042 for_each_alloc_capable_rdt_resource(r) 1043 pr_info("%s allocation detected\n", r->name); 1044 1045 for_each_mon_capable_rdt_resource(r) 1046 pr_info("%s monitoring detected\n", r->name); 1047 1048 return 0; 1049 } 1050 1051 late_initcall(resctrl_arch_late_init); 1052 1053 static void __exit resctrl_arch_exit(void) 1054 { 1055 cpuhp_remove_state(rdt_online); 1056 1057 resctrl_exit(); 1058 } 1059 1060 __exitcall(resctrl_arch_exit); 1061