1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * x86 CPU caches detection and configuration 4 * 5 * Previous changes 6 * - Venkatesh Pallipadi: Cache identification through CPUID(0x4) 7 * - Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure 8 * - Andi Kleen / Andreas Herrmann: CPUID(0x4) emulation on AMD 9 */ 10 11 #include <linux/cacheinfo.h> 12 #include <linux/cpu.h> 13 #include <linux/cpuhotplug.h> 14 #include <linux/stop_machine.h> 15 16 #include <asm/amd/nb.h> 17 #include <asm/cacheinfo.h> 18 #include <asm/cpufeature.h> 19 #include <asm/cpuid/api.h> 20 #include <asm/mtrr.h> 21 #include <asm/smp.h> 22 #include <asm/tlbflush.h> 23 24 #include "cpu.h" 25 26 /* Shared last level cache maps */ 27 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); 28 29 /* Shared L2 cache maps */ 30 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); 31 32 static cpumask_var_t cpu_cacheinfo_mask; 33 34 /* Kernel controls MTRR and/or PAT MSRs. */ 35 unsigned int memory_caching_control __ro_after_init; 36 37 enum _cache_type { 38 CTYPE_NULL = 0, 39 CTYPE_DATA = 1, 40 CTYPE_INST = 2, 41 CTYPE_UNIFIED = 3 42 }; 43 44 union _cpuid4_leaf_eax { 45 struct { 46 enum _cache_type type :5; 47 unsigned int level :3; 48 unsigned int is_self_initializing :1; 49 unsigned int is_fully_associative :1; 50 unsigned int reserved :4; 51 unsigned int num_threads_sharing :12; 52 unsigned int num_cores_on_die :6; 53 } split; 54 u32 full; 55 }; 56 57 union _cpuid4_leaf_ebx { 58 struct { 59 unsigned int coherency_line_size :12; 60 unsigned int physical_line_partition :10; 61 unsigned int ways_of_associativity :10; 62 } split; 63 u32 full; 64 }; 65 66 union _cpuid4_leaf_ecx { 67 struct { 68 unsigned int number_of_sets :32; 69 } split; 70 u32 full; 71 }; 72 73 struct _cpuid4_info { 74 union _cpuid4_leaf_eax eax; 75 union _cpuid4_leaf_ebx ebx; 76 union _cpuid4_leaf_ecx ecx; 77 unsigned int id; 78 unsigned long size; 79 }; 80 81 /* Map CPUID(0x4) EAX.cache_type to <linux/cacheinfo.h> types */ 82 static const enum cache_type cache_type_map[] = { 83 [CTYPE_NULL] = CACHE_TYPE_NOCACHE, 84 [CTYPE_DATA] = CACHE_TYPE_DATA, 85 [CTYPE_INST] = CACHE_TYPE_INST, 86 [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, 87 }; 88 89 /* 90 * Fallback AMD CPUID(0x4) emulation 91 * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) 92 * 93 * @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should 94 * be determined from CPUID(0x8000001d) instead of CPUID(0x80000006). 95 */ 96 97 #define AMD_CPUID4_FULLY_ASSOCIATIVE 0xffff 98 #define AMD_L2_L3_INVALID_ASSOC 0x9 99 100 union l1_cache { 101 struct { 102 unsigned line_size :8; 103 unsigned lines_per_tag :8; 104 unsigned assoc :8; 105 unsigned size_in_kb :8; 106 }; 107 unsigned int val; 108 }; 109 110 union l2_cache { 111 struct { 112 unsigned line_size :8; 113 unsigned lines_per_tag :4; 114 unsigned assoc :4; 115 unsigned size_in_kb :16; 116 }; 117 unsigned int val; 118 }; 119 120 union l3_cache { 121 struct { 122 unsigned line_size :8; 123 unsigned lines_per_tag :4; 124 unsigned assoc :4; 125 unsigned res :2; 126 unsigned size_encoded :14; 127 }; 128 unsigned int val; 129 }; 130 131 /* L2/L3 associativity mapping */ 132 static const unsigned short assocs[] = { 133 [1] = 1, 134 [2] = 2, 135 [3] = 3, 136 [4] = 4, 137 [5] = 6, 138 [6] = 8, 139 [8] = 16, 140 [0xa] = 32, 141 [0xb] = 48, 142 [0xc] = 64, 143 [0xd] = 96, 144 [0xe] = 128, 145 [0xf] = AMD_CPUID4_FULLY_ASSOCIATIVE 146 }; 147 148 static const unsigned char levels[] = { 1, 1, 2, 3 }; 149 static const unsigned char types[] = { 1, 2, 3, 3 }; 150 151 static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, 152 union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) 153 { 154 unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb; 155 union l1_cache l1i, l1d, *l1; 156 union l2_cache l2; 157 union l3_cache l3; 158 159 eax->full = 0; 160 ebx->full = 0; 161 ecx->full = 0; 162 163 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); 164 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); 165 166 l1 = &l1d; 167 switch (index) { 168 case 1: 169 l1 = &l1i; 170 fallthrough; 171 case 0: 172 if (!l1->val) 173 return; 174 175 assoc = (l1->assoc == 0xff) ? AMD_CPUID4_FULLY_ASSOCIATIVE : l1->assoc; 176 line_size = l1->line_size; 177 lines_per_tag = l1->lines_per_tag; 178 size_in_kb = l1->size_in_kb; 179 break; 180 case 2: 181 if (!l2.assoc || l2.assoc == AMD_L2_L3_INVALID_ASSOC) 182 return; 183 184 /* Use x86_cache_size as it might have K7 errata fixes */ 185 assoc = assocs[l2.assoc]; 186 line_size = l2.line_size; 187 lines_per_tag = l2.lines_per_tag; 188 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); 189 break; 190 case 3: 191 if (!l3.assoc || l3.assoc == AMD_L2_L3_INVALID_ASSOC) 192 return; 193 194 assoc = assocs[l3.assoc]; 195 line_size = l3.line_size; 196 lines_per_tag = l3.lines_per_tag; 197 size_in_kb = l3.size_encoded * 512; 198 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { 199 size_in_kb = size_in_kb >> 1; 200 assoc = assoc >> 1; 201 } 202 break; 203 default: 204 return; 205 } 206 207 eax->split.is_self_initializing = 1; 208 eax->split.type = types[index]; 209 eax->split.level = levels[index]; 210 eax->split.num_threads_sharing = 0; 211 eax->split.num_cores_on_die = topology_num_cores_per_package(); 212 213 if (assoc == AMD_CPUID4_FULLY_ASSOCIATIVE) 214 eax->split.is_fully_associative = 1; 215 216 ebx->split.coherency_line_size = line_size - 1; 217 ebx->split.ways_of_associativity = assoc - 1; 218 ebx->split.physical_line_partition = lines_per_tag - 1; 219 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / 220 (ebx->split.ways_of_associativity + 1) - 1; 221 } 222 223 static int cpuid4_info_fill_done(struct _cpuid4_info *id4, union _cpuid4_leaf_eax eax, 224 union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx) 225 { 226 if (eax.split.type == CTYPE_NULL) 227 return -EIO; 228 229 id4->eax = eax; 230 id4->ebx = ebx; 231 id4->ecx = ecx; 232 id4->size = (ecx.split.number_of_sets + 1) * 233 (ebx.split.coherency_line_size + 1) * 234 (ebx.split.physical_line_partition + 1) * 235 (ebx.split.ways_of_associativity + 1); 236 237 return 0; 238 } 239 240 static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4) 241 { 242 union _cpuid4_leaf_eax eax; 243 union _cpuid4_leaf_ebx ebx; 244 union _cpuid4_leaf_ecx ecx; 245 u32 ignored; 246 247 if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) 248 cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &ignored); 249 else 250 legacy_amd_cpuid4(index, &eax, &ebx, &ecx); 251 252 return cpuid4_info_fill_done(id4, eax, ebx, ecx); 253 } 254 255 static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4) 256 { 257 union _cpuid4_leaf_eax eax; 258 union _cpuid4_leaf_ebx ebx; 259 union _cpuid4_leaf_ecx ecx; 260 u32 ignored; 261 262 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &ignored); 263 264 return cpuid4_info_fill_done(id4, eax, ebx, ecx); 265 } 266 267 static int fill_cpuid4_info(int index, struct _cpuid4_info *id4) 268 { 269 u8 cpu_vendor = boot_cpu_data.x86_vendor; 270 271 return (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) ? 272 amd_fill_cpuid4_info(index, id4) : 273 intel_fill_cpuid4_info(index, id4); 274 } 275 276 static int find_num_cache_leaves(struct cpuinfo_x86 *c) 277 { 278 unsigned int eax, ebx, ecx, edx, op; 279 union _cpuid4_leaf_eax cache_eax; 280 int i = -1; 281 282 /* Do a CPUID(op) loop to calculate num_cache_leaves */ 283 op = (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) ? 0x8000001d : 4; 284 do { 285 ++i; 286 cpuid_count(op, i, &eax, &ebx, &ecx, &edx); 287 cache_eax.full = eax; 288 } while (cache_eax.split.type != CTYPE_NULL); 289 return i; 290 } 291 292 /* 293 * AMD/Hygon CPUs may have multiple LLCs if L3 caches exist. 294 */ 295 296 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) 297 { 298 if (!cpuid_amd_hygon_has_l3_cache()) 299 return; 300 301 if (c->x86 < 0x17) { 302 /* Pre-Zen: LLC is at the node level */ 303 c->topo.llc_id = die_id; 304 } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { 305 /* 306 * Family 17h up to 1F models: LLC is at the core 307 * complex level. Core complex ID is ApicId[3]. 308 */ 309 c->topo.llc_id = c->topo.apicid >> 3; 310 } else { 311 /* 312 * Newer families: LLC ID is calculated from the number 313 * of threads sharing the L3 cache. 314 */ 315 u32 eax, ebx, ecx, edx, num_sharing_cache = 0; 316 u32 llc_index = find_num_cache_leaves(c) - 1; 317 318 cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx); 319 if (eax) 320 num_sharing_cache = ((eax >> 14) & 0xfff) + 1; 321 322 if (num_sharing_cache) { 323 int index_msb = get_count_order(num_sharing_cache); 324 325 c->topo.llc_id = c->topo.apicid >> index_msb; 326 } 327 } 328 } 329 330 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) 331 { 332 if (!cpuid_amd_hygon_has_l3_cache()) 333 return; 334 335 /* 336 * Hygons are similar to AMD Family 17h up to 1F models: LLC is 337 * at the core complex level. Core complex ID is ApicId[3]. 338 */ 339 c->topo.llc_id = c->topo.apicid >> 3; 340 } 341 342 void init_amd_cacheinfo(struct cpuinfo_x86 *c) 343 { 344 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 345 346 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) 347 ci->num_leaves = find_num_cache_leaves(c); 348 else if (c->extended_cpuid_level >= 0x80000006) 349 ci->num_leaves = (cpuid_edx(0x80000006) & 0xf000) ? 4 : 3; 350 } 351 352 void init_hygon_cacheinfo(struct cpuinfo_x86 *c) 353 { 354 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 355 356 ci->num_leaves = find_num_cache_leaves(c); 357 } 358 359 static void intel_cacheinfo_done(struct cpuinfo_x86 *c, unsigned int l3, 360 unsigned int l2, unsigned int l1i, unsigned int l1d) 361 { 362 /* 363 * If llc_id is still unset, then cpuid_level < 4, which implies 364 * that the only possibility left is SMT. Since CPUID(0x2) doesn't 365 * specify any shared caches and SMT shares all caches, we can 366 * unconditionally set LLC ID to the package ID so that all 367 * threads share it. 368 */ 369 if (c->topo.llc_id == BAD_APICID) 370 c->topo.llc_id = c->topo.pkg_id; 371 372 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : l1i + l1d); 373 374 if (!l2) 375 cpu_detect_cache_sizes(c); 376 } 377 378 /* 379 * Legacy Intel CPUID(0x2) path if CPUID(0x4) is not available. 380 */ 381 static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c) 382 { 383 unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; 384 const struct leaf_0x2_table *desc; 385 union leaf_0x2_regs regs; 386 u8 *ptr; 387 388 if (c->cpuid_level < 2) 389 return; 390 391 cpuid_leaf_0x2(®s); 392 for_each_cpuid_0x2_desc(regs, ptr, desc) { 393 switch (desc->c_type) { 394 case CACHE_L1_INST: l1i += desc->c_size; break; 395 case CACHE_L1_DATA: l1d += desc->c_size; break; 396 case CACHE_L2: l2 += desc->c_size; break; 397 case CACHE_L3: l3 += desc->c_size; break; 398 } 399 } 400 401 intel_cacheinfo_done(c, l3, l2, l1i, l1d); 402 } 403 404 static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4) 405 { 406 unsigned int num_threads_sharing; 407 int index_msb; 408 409 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 410 index_msb = get_count_order(num_threads_sharing); 411 return c->topo.apicid & ~((1 << index_msb) - 1); 412 } 413 414 static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c) 415 { 416 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 417 unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID; 418 unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0; 419 420 if (c->cpuid_level < 4) 421 return false; 422 423 /* 424 * There should be at least one leaf. A non-zero value means 425 * that the number of leaves has been previously initialized. 426 */ 427 if (!ci->num_leaves) 428 ci->num_leaves = find_num_cache_leaves(c); 429 430 if (!ci->num_leaves) 431 return false; 432 433 for (int i = 0; i < ci->num_leaves; i++) { 434 struct _cpuid4_info id4 = {}; 435 int ret; 436 437 ret = intel_fill_cpuid4_info(i, &id4); 438 if (ret < 0) 439 continue; 440 441 switch (id4.eax.split.level) { 442 case 1: 443 if (id4.eax.split.type == CTYPE_DATA) 444 l1d = id4.size / 1024; 445 else if (id4.eax.split.type == CTYPE_INST) 446 l1i = id4.size / 1024; 447 break; 448 case 2: 449 l2 = id4.size / 1024; 450 l2_id = calc_cache_topo_id(c, &id4); 451 break; 452 case 3: 453 l3 = id4.size / 1024; 454 l3_id = calc_cache_topo_id(c, &id4); 455 break; 456 default: 457 break; 458 } 459 } 460 461 c->topo.l2c_id = l2_id; 462 c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id; 463 intel_cacheinfo_done(c, l3, l2, l1i, l1d); 464 return true; 465 } 466 467 void init_intel_cacheinfo(struct cpuinfo_x86 *c) 468 { 469 /* Don't use CPUID(0x2) if CPUID(0x4) is supported. */ 470 if (intel_cacheinfo_0x4(c)) 471 return; 472 473 intel_cacheinfo_0x2(c); 474 } 475 476 /* 477 * <linux/cacheinfo.h> shared_cpu_map setup, AMD/Hygon 478 */ 479 static int __cache_amd_cpumap_setup(unsigned int cpu, int index, 480 const struct _cpuid4_info *id4) 481 { 482 struct cpu_cacheinfo *this_cpu_ci; 483 struct cacheinfo *ci; 484 int i, sibling; 485 486 /* 487 * For L3, always use the pre-calculated cpu_llc_shared_mask 488 * to derive shared_cpu_map. 489 */ 490 if (index == 3) { 491 for_each_cpu(i, cpu_llc_shared_mask(cpu)) { 492 this_cpu_ci = get_cpu_cacheinfo(i); 493 if (!this_cpu_ci->info_list) 494 continue; 495 496 ci = this_cpu_ci->info_list + index; 497 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { 498 if (!cpu_online(sibling)) 499 continue; 500 cpumask_set_cpu(sibling, &ci->shared_cpu_map); 501 } 502 } 503 } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { 504 unsigned int apicid, nshared, first, last; 505 506 nshared = id4->eax.split.num_threads_sharing + 1; 507 apicid = cpu_data(cpu).topo.apicid; 508 first = apicid - (apicid % nshared); 509 last = first + nshared - 1; 510 511 for_each_online_cpu(i) { 512 this_cpu_ci = get_cpu_cacheinfo(i); 513 if (!this_cpu_ci->info_list) 514 continue; 515 516 apicid = cpu_data(i).topo.apicid; 517 if ((apicid < first) || (apicid > last)) 518 continue; 519 520 ci = this_cpu_ci->info_list + index; 521 522 for_each_online_cpu(sibling) { 523 apicid = cpu_data(sibling).topo.apicid; 524 if ((apicid < first) || (apicid > last)) 525 continue; 526 cpumask_set_cpu(sibling, &ci->shared_cpu_map); 527 } 528 } 529 } else 530 return 0; 531 532 return 1; 533 } 534 535 /* 536 * <linux/cacheinfo.h> shared_cpu_map setup, Intel + fallback AMD/Hygon 537 */ 538 static void __cache_cpumap_setup(unsigned int cpu, int index, 539 const struct _cpuid4_info *id4) 540 { 541 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 542 struct cpuinfo_x86 *c = &cpu_data(cpu); 543 struct cacheinfo *ci, *sibling_ci; 544 unsigned long num_threads_sharing; 545 int index_msb, i; 546 547 if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) { 548 if (__cache_amd_cpumap_setup(cpu, index, id4)) 549 return; 550 } 551 552 ci = this_cpu_ci->info_list + index; 553 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 554 555 cpumask_set_cpu(cpu, &ci->shared_cpu_map); 556 if (num_threads_sharing == 1) 557 return; 558 559 index_msb = get_count_order(num_threads_sharing); 560 561 for_each_online_cpu(i) 562 if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) { 563 struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); 564 565 /* Skip if itself or no cacheinfo */ 566 if (i == cpu || !sib_cpu_ci->info_list) 567 continue; 568 569 sibling_ci = sib_cpu_ci->info_list + index; 570 cpumask_set_cpu(i, &ci->shared_cpu_map); 571 cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map); 572 } 573 } 574 575 static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info *id4, 576 struct amd_northbridge *nb) 577 { 578 ci->id = id4->id; 579 ci->attributes = CACHE_ID; 580 ci->level = id4->eax.split.level; 581 ci->type = cache_type_map[id4->eax.split.type]; 582 ci->coherency_line_size = id4->ebx.split.coherency_line_size + 1; 583 ci->ways_of_associativity = id4->ebx.split.ways_of_associativity + 1; 584 ci->size = id4->size; 585 ci->number_of_sets = id4->ecx.split.number_of_sets + 1; 586 ci->physical_line_partition = id4->ebx.split.physical_line_partition + 1; 587 ci->priv = nb; 588 } 589 590 int init_cache_level(unsigned int cpu) 591 { 592 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); 593 594 /* There should be at least one leaf. */ 595 if (!ci->num_leaves) 596 return -ENOENT; 597 598 return 0; 599 } 600 601 /* 602 * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input 603 * ECX as cache index. Then right shift apicid by the number's order to get 604 * cache id for this cache node. 605 */ 606 static void get_cache_id(int cpu, struct _cpuid4_info *id4) 607 { 608 struct cpuinfo_x86 *c = &cpu_data(cpu); 609 unsigned long num_threads_sharing; 610 int index_msb; 611 612 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 613 index_msb = get_count_order(num_threads_sharing); 614 id4->id = c->topo.apicid >> index_msb; 615 } 616 617 int populate_cache_leaves(unsigned int cpu) 618 { 619 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 620 struct cacheinfo *ci = this_cpu_ci->info_list; 621 u8 cpu_vendor = boot_cpu_data.x86_vendor; 622 struct amd_northbridge *nb = NULL; 623 struct _cpuid4_info id4 = {}; 624 int idx, ret; 625 626 for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { 627 ret = fill_cpuid4_info(idx, &id4); 628 if (ret) 629 return ret; 630 631 get_cache_id(cpu, &id4); 632 633 if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) 634 nb = amd_init_l3_cache(idx); 635 636 ci_info_init(ci++, &id4, nb); 637 __cache_cpumap_setup(cpu, idx, &id4); 638 } 639 640 this_cpu_ci->cpu_map_populated = true; 641 return 0; 642 } 643 644 /* 645 * Disable and enable caches. Needed for changing MTRRs and the PAT MSR. 646 * 647 * Since we are disabling the cache don't allow any interrupts, 648 * they would run extremely slow and would only increase the pain. 649 * 650 * The caller must ensure that local interrupts are disabled and 651 * are reenabled after cache_enable() has been called. 652 */ 653 static unsigned long saved_cr4; 654 static DEFINE_RAW_SPINLOCK(cache_disable_lock); 655 656 /* 657 * Cache flushing is the most time-consuming step when programming the 658 * MTRRs. On many Intel CPUs without known erratas, it can be skipped 659 * if the CPU declares cache self-snooping support. 660 */ 661 static void maybe_flush_caches(void) 662 { 663 if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) 664 wbinvd(); 665 } 666 667 void cache_disable(void) __acquires(cache_disable_lock) 668 { 669 unsigned long cr0; 670 671 /* 672 * This is not ideal since the cache is only flushed/disabled 673 * for this CPU while the MTRRs are changed, but changing this 674 * requires more invasive changes to the way the kernel boots. 675 */ 676 raw_spin_lock(&cache_disable_lock); 677 678 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 679 cr0 = read_cr0() | X86_CR0_CD; 680 write_cr0(cr0); 681 682 maybe_flush_caches(); 683 684 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 685 if (cpu_feature_enabled(X86_FEATURE_PGE)) { 686 saved_cr4 = __read_cr4(); 687 __write_cr4(saved_cr4 & ~X86_CR4_PGE); 688 } 689 690 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ 691 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 692 flush_tlb_local(); 693 694 if (cpu_feature_enabled(X86_FEATURE_MTRR)) 695 mtrr_disable(); 696 697 maybe_flush_caches(); 698 } 699 700 void cache_enable(void) __releases(cache_disable_lock) 701 { 702 /* Flush TLBs (no need to flush caches - they are disabled) */ 703 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 704 flush_tlb_local(); 705 706 if (cpu_feature_enabled(X86_FEATURE_MTRR)) 707 mtrr_enable(); 708 709 /* Enable caches */ 710 write_cr0(read_cr0() & ~X86_CR0_CD); 711 712 /* Restore value of CR4 */ 713 if (cpu_feature_enabled(X86_FEATURE_PGE)) 714 __write_cr4(saved_cr4); 715 716 raw_spin_unlock(&cache_disable_lock); 717 } 718 719 static void cache_cpu_init(void) 720 { 721 unsigned long flags; 722 723 local_irq_save(flags); 724 725 if (memory_caching_control & CACHE_MTRR) { 726 cache_disable(); 727 mtrr_generic_set_state(); 728 cache_enable(); 729 } 730 731 if (memory_caching_control & CACHE_PAT) 732 pat_cpu_init(); 733 734 local_irq_restore(flags); 735 } 736 737 static bool cache_aps_delayed_init = true; 738 739 void set_cache_aps_delayed_init(bool val) 740 { 741 cache_aps_delayed_init = val; 742 } 743 744 bool get_cache_aps_delayed_init(void) 745 { 746 return cache_aps_delayed_init; 747 } 748 749 static int cache_rendezvous_handler(void *unused) 750 { 751 if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id())) 752 cache_cpu_init(); 753 754 return 0; 755 } 756 757 void __init cache_bp_init(void) 758 { 759 mtrr_bp_init(); 760 pat_bp_init(); 761 762 if (memory_caching_control) 763 cache_cpu_init(); 764 } 765 766 void cache_bp_restore(void) 767 { 768 if (memory_caching_control) 769 cache_cpu_init(); 770 } 771 772 static int cache_ap_online(unsigned int cpu) 773 { 774 cpumask_set_cpu(cpu, cpu_cacheinfo_mask); 775 776 if (!memory_caching_control || get_cache_aps_delayed_init()) 777 return 0; 778 779 /* 780 * Ideally we should hold mtrr_mutex here to avoid MTRR entries 781 * changed, but this routine will be called in CPU boot time, 782 * holding the lock breaks it. 783 * 784 * This routine is called in two cases: 785 * 786 * 1. very early time of software resume, when there absolutely 787 * isn't MTRR entry changes; 788 * 789 * 2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug 790 * lock to prevent MTRR entry changes 791 */ 792 stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL, 793 cpu_cacheinfo_mask); 794 795 return 0; 796 } 797 798 static int cache_ap_offline(unsigned int cpu) 799 { 800 cpumask_clear_cpu(cpu, cpu_cacheinfo_mask); 801 return 0; 802 } 803 804 /* 805 * Delayed cache initialization for all AP's 806 */ 807 void cache_aps_init(void) 808 { 809 if (!memory_caching_control || !get_cache_aps_delayed_init()) 810 return; 811 812 stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask); 813 set_cache_aps_delayed_init(false); 814 } 815 816 static int __init cache_ap_register(void) 817 { 818 zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL); 819 cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask); 820 821 cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING, 822 "x86/cachectrl:starting", 823 cache_ap_online, cache_ap_offline); 824 return 0; 825 } 826 early_initcall(cache_ap_register); 827