1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * x86 CPU caches detection and configuration 4 * 5 * Previous changes 6 * - Venkatesh Pallipadi: Cache identification through CPUID(0x4) 7 * - Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure 8 * - Andi Kleen / Andreas Herrmann: CPUID(0x4) emulation on AMD 9 */ 10 11 #include <linux/cacheinfo.h> 12 #include <linux/cpu.h> 13 #include <linux/cpuhotplug.h> 14 #include <linux/stop_machine.h> 15 16 #include <asm/amd/nb.h> 17 #include <asm/cacheinfo.h> 18 #include <asm/cpufeature.h> 19 #include <asm/cpuid/api.h> 20 #include <asm/mtrr.h> 21 #include <asm/smp.h> 22 #include <asm/tlbflush.h> 23 24 #include "cpu.h" 25 26 /* Shared last level cache maps */ 27 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); 28 29 /* Shared L2 cache maps */ 30 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); 31 32 static cpumask_var_t cpu_cacheinfo_mask; 33 34 /* Kernel controls MTRR and/or PAT MSRs. */ 35 unsigned int memory_caching_control __ro_after_init; 36 37 enum _cache_type { 38 CTYPE_NULL = 0, 39 CTYPE_DATA = 1, 40 CTYPE_INST = 2, 41 CTYPE_UNIFIED = 3 42 }; 43 44 union _cpuid4_leaf_eax { 45 struct { 46 enum _cache_type type :5; 47 unsigned int level :3; 48 unsigned int is_self_initializing :1; 49 unsigned int is_fully_associative :1; 50 unsigned int reserved :4; 51 unsigned int num_threads_sharing :12; 52 unsigned int num_cores_on_die :6; 53 } split; 54 u32 full; 55 }; 56 57 union _cpuid4_leaf_ebx { 58 struct { 59 unsigned int coherency_line_size :12; 60 unsigned int physical_line_partition :10; 61 unsigned int ways_of_associativity :10; 62 } split; 63 u32 full; 64 }; 65 66 union _cpuid4_leaf_ecx { 67 struct { 68 unsigned int number_of_sets :32; 69 } split; 70 u32 full; 71 }; 72 73 struct _cpuid4_info { 74 union _cpuid4_leaf_eax eax; 75 union _cpuid4_leaf_ebx ebx; 76 union _cpuid4_leaf_ecx ecx; 77 unsigned int id; 78 unsigned long size; 79 }; 80 81 /* Map CPUID(0x4) EAX.cache_type to <linux/cacheinfo.h> types */ 82 static const enum cache_type cache_type_map[] = { 83 [CTYPE_NULL] = CACHE_TYPE_NOCACHE, 84 [CTYPE_DATA] = CACHE_TYPE_DATA, 85 [CTYPE_INST] = CACHE_TYPE_INST, 86 [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, 87 }; 88 89 /* 90 * Fallback AMD CPUID(0x4) emulation 91 * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) 92 * 93 * @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should 94 * be determined from CPUID(0x8000001d) instead of CPUID(0x80000006). 95 */ 96 97 #define AMD_CPUID4_FULLY_ASSOCIATIVE 0xffff 98 #define AMD_L2_L3_INVALID_ASSOC 0x9 99 100 union l1_cache { 101 struct { 102 unsigned line_size :8; 103 unsigned lines_per_tag :8; 104 unsigned assoc :8; 105 unsigned size_in_kb :8; 106 }; 107 unsigned int val; 108 }; 109 110 union l2_cache { 111 struct { 112 unsigned line_size :8; 113 unsigned lines_per_tag :4; 114 unsigned assoc :4; 115 unsigned size_in_kb :16; 116 }; 117 unsigned int val; 118 }; 119 120 union l3_cache { 121 struct { 122 unsigned line_size :8; 123 unsigned lines_per_tag :4; 124 unsigned assoc :4; 125 unsigned res :2; 126 unsigned size_encoded :14; 127 }; 128 unsigned int val; 129 }; 130 131 /* L2/L3 associativity mapping */ 132 static const unsigned short assocs[] = { 133 [1] = 1, 134 [2] = 2, 135 [3] = 3, 136 [4] = 4, 137 [5] = 6, 138 [6] = 8, 139 [8] = 16, 140 [0xa] = 32, 141 [0xb] = 48, 142 [0xc] = 64, 143 [0xd] = 96, 144 [0xe] = 128, 145 [0xf] = AMD_CPUID4_FULLY_ASSOCIATIVE 146 }; 147 148 static const unsigned char levels[] = { 1, 1, 2, 3 }; 149 static const unsigned char types[] = { 1, 2, 3, 3 }; 150 151 static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, 152 union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) 153 { 154 unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb; 155 union l1_cache l1i, l1d, *l1; 156 union l2_cache l2; 157 union l3_cache l3; 158 159 eax->full = 0; 160 ebx->full = 0; 161 ecx->full = 0; 162 163 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); 164 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); 165 166 l1 = &l1d; 167 switch (index) { 168 case 1: 169 l1 = &l1i; 170 fallthrough; 171 case 0: 172 if (!l1->val) 173 return; 174 175 assoc = (l1->assoc == 0xff) ? AMD_CPUID4_FULLY_ASSOCIATIVE : l1->assoc; 176 line_size = l1->line_size; 177 lines_per_tag = l1->lines_per_tag; 178 size_in_kb = l1->size_in_kb; 179 break; 180 case 2: 181 if (!l2.assoc || l2.assoc == AMD_L2_L3_INVALID_ASSOC) 182 return; 183 184 /* Use x86_cache_size as it might have K7 errata fixes */ 185 assoc = assocs[l2.assoc]; 186 line_size = l2.line_size; 187 lines_per_tag = l2.lines_per_tag; 188 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); 189 break; 190 case 3: 191 if (!l3.assoc || l3.assoc == AMD_L2_L3_INVALID_ASSOC) 192 return; 193 194 assoc = assocs[l3.assoc]; 195 line_size = l3.line_size; 196 lines_per_tag = l3.lines_per_tag; 197 size_in_kb = l3.size_encoded * 512; 198 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { 199 size_in_kb = size_in_kb >> 1; 200 assoc = assoc >> 1; 201 } 202 break; 203 default: 204 return; 205 } 206 207 eax->split.is_self_initializing = 1; 208 eax->split.type = types[index]; 209 eax->split.level = levels[index]; 210 eax->split.num_threads_sharing = 0; 211 eax->split.num_cores_on_die = topology_num_cores_per_package(); 212 213 if (assoc == AMD_CPUID4_FULLY_ASSOCIATIVE) 214 eax->split.is_fully_associative = 1; 215 216 ebx->split.coherency_line_size = line_size - 1; 217 ebx->split.ways_of_associativity = assoc - 1; 218 ebx->split.physical_line_partition = lines_per_tag - 1; 219 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / 220 (ebx->split.ways_of_associativity + 1) - 1; 221 } 222 223 static int cpuid4_info_fill_done(struct _cpuid4_info *id4, union _cpuid4_leaf_eax eax, 224 union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx) 225 { 226 if (eax.split.type == CTYPE_NULL) 227 return -EIO; 228 229 id4->eax = eax; 230 id4->ebx = ebx; 231 id4->ecx = ecx; 232 id4->size = (ecx.split.number_of_sets + 1) * 233 (ebx.split.coherency_line_size + 1) * 234 (ebx.split.physical_line_partition + 1) * 235 (ebx.split.ways_of_associativity + 1); 236 237 return 0; 238 } 239 240 static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4) 241 { 242 union _cpuid4_leaf_eax eax; 243 union _cpuid4_leaf_ebx ebx; 244 union _cpuid4_leaf_ecx ecx; 245 u32 ignored; 246 247 if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) 248 cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &ignored); 249 else 250 legacy_amd_cpuid4(index, &eax, &ebx, &ecx); 251 252 return cpuid4_info_fill_done(id4, eax, ebx, ecx); 253 } 254 255 static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4) 256 { 257 union _cpuid4_leaf_eax eax; 258 union _cpuid4_leaf_ebx ebx; 259 union _cpuid4_leaf_ecx ecx; 260 u32 ignored; 261 262 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &ignored); 263 264 return cpuid4_info_fill_done(id4, eax, ebx, ecx); 265 } 266 267 static int fill_cpuid4_info(int index, struct _cpuid4_info *id4) 268 { 269 u8 cpu_vendor = boot_cpu_data.x86_vendor; 270 271 return (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) ? 272 amd_fill_cpuid4_info(index, id4) : 273 intel_fill_cpuid4_info(index, id4); 274 } 275 276 static int find_num_cache_leaves(struct cpuinfo_x86 *c) 277 { 278 unsigned int eax, ebx, ecx, edx, op; 279 union _cpuid4_leaf_eax cache_eax; 280 int i = -1; 281 282 /* Do a CPUID(op) loop to calculate num_cache_leaves */ 283 op = (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) ? 0x8000001d : 4; 284 do { 285 ++i; 286 cpuid_count(op, i, &eax, &ebx, &ecx, &edx); 287 cache_eax.full = eax; 288 } while (cache_eax.split.type != CTYPE_NULL); 289 return i; 290 } 291 292 /* 293 * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input 294 * ECX as cache index. Then right shift apicid by the number's order to get 295 * cache id for this cache node. 296 */ 297 static unsigned int get_cache_id(u32 apicid, const struct _cpuid4_info *id4) 298 { 299 unsigned long num_threads_sharing; 300 int index_msb; 301 302 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 303 index_msb = get_count_order(num_threads_sharing); 304 305 return apicid >> index_msb; 306 } 307 308 /* 309 * AMD/Hygon CPUs may have multiple LLCs if L3 caches exist. 310 */ 311 312 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) 313 { 314 if (!cpuid_amd_hygon_has_l3_cache()) 315 return; 316 317 if (c->x86 < 0x17) { 318 /* Pre-Zen: LLC is at the node level */ 319 c->topo.llc_id = die_id; 320 } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { 321 /* 322 * Family 17h up to 1F models: LLC is at the core 323 * complex level. Core complex ID is ApicId[3]. 324 */ 325 c->topo.llc_id = c->topo.apicid >> 3; 326 } else { 327 /* 328 * Newer families: LLC ID is calculated from the number 329 * of threads sharing the L3 cache. 330 */ 331 u32 llc_index = find_num_cache_leaves(c) - 1; 332 struct _cpuid4_info id4 = {}; 333 334 if (!amd_fill_cpuid4_info(llc_index, &id4)) 335 c->topo.llc_id = get_cache_id(c->topo.apicid, &id4); 336 } 337 } 338 339 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) 340 { 341 if (!cpuid_amd_hygon_has_l3_cache()) 342 return; 343 344 /* 345 * Hygons are similar to AMD Family 17h up to 1F models: LLC is 346 * at the core complex level. Core complex ID is ApicId[3]. 347 */ 348 c->topo.llc_id = c->topo.apicid >> 3; 349 } 350 351 void init_amd_cacheinfo(struct cpuinfo_x86 *c) 352 { 353 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 354 355 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) 356 ci->num_leaves = find_num_cache_leaves(c); 357 else if (c->extended_cpuid_level >= 0x80000006) 358 ci->num_leaves = (cpuid_edx(0x80000006) & 0xf000) ? 4 : 3; 359 } 360 361 void init_hygon_cacheinfo(struct cpuinfo_x86 *c) 362 { 363 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 364 365 ci->num_leaves = find_num_cache_leaves(c); 366 } 367 368 static void intel_cacheinfo_done(struct cpuinfo_x86 *c, unsigned int l3, 369 unsigned int l2, unsigned int l1i, unsigned int l1d) 370 { 371 /* 372 * If llc_id is still unset, then cpuid_level < 4, which implies 373 * that the only possibility left is SMT. Since CPUID(0x2) doesn't 374 * specify any shared caches and SMT shares all caches, we can 375 * unconditionally set LLC ID to the package ID so that all 376 * threads share it. 377 */ 378 if (c->topo.llc_id == BAD_APICID) 379 c->topo.llc_id = c->topo.pkg_id; 380 381 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : l1i + l1d); 382 383 if (!l2) 384 cpu_detect_cache_sizes(c); 385 } 386 387 /* 388 * Legacy Intel CPUID(0x2) path if CPUID(0x4) is not available. 389 */ 390 static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c) 391 { 392 unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; 393 const struct leaf_0x2_table *desc; 394 union leaf_0x2_regs regs; 395 u8 *ptr; 396 397 if (c->cpuid_level < 2) 398 return; 399 400 cpuid_leaf_0x2(®s); 401 for_each_cpuid_0x2_desc(regs, ptr, desc) { 402 switch (desc->c_type) { 403 case CACHE_L1_INST: l1i += desc->c_size; break; 404 case CACHE_L1_DATA: l1d += desc->c_size; break; 405 case CACHE_L2: l2 += desc->c_size; break; 406 case CACHE_L3: l3 += desc->c_size; break; 407 } 408 } 409 410 intel_cacheinfo_done(c, l3, l2, l1i, l1d); 411 } 412 413 static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4) 414 { 415 unsigned int num_threads_sharing; 416 int index_msb; 417 418 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 419 index_msb = get_count_order(num_threads_sharing); 420 return c->topo.apicid & ~((1 << index_msb) - 1); 421 } 422 423 static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c) 424 { 425 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 426 unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID; 427 unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0; 428 429 if (c->cpuid_level < 4) 430 return false; 431 432 /* 433 * There should be at least one leaf. A non-zero value means 434 * that the number of leaves has been previously initialized. 435 */ 436 if (!ci->num_leaves) 437 ci->num_leaves = find_num_cache_leaves(c); 438 439 if (!ci->num_leaves) 440 return false; 441 442 for (int i = 0; i < ci->num_leaves; i++) { 443 struct _cpuid4_info id4 = {}; 444 int ret; 445 446 ret = intel_fill_cpuid4_info(i, &id4); 447 if (ret < 0) 448 continue; 449 450 switch (id4.eax.split.level) { 451 case 1: 452 if (id4.eax.split.type == CTYPE_DATA) 453 l1d = id4.size / 1024; 454 else if (id4.eax.split.type == CTYPE_INST) 455 l1i = id4.size / 1024; 456 break; 457 case 2: 458 l2 = id4.size / 1024; 459 l2_id = calc_cache_topo_id(c, &id4); 460 break; 461 case 3: 462 l3 = id4.size / 1024; 463 l3_id = calc_cache_topo_id(c, &id4); 464 break; 465 default: 466 break; 467 } 468 } 469 470 c->topo.l2c_id = l2_id; 471 c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id; 472 intel_cacheinfo_done(c, l3, l2, l1i, l1d); 473 return true; 474 } 475 476 void init_intel_cacheinfo(struct cpuinfo_x86 *c) 477 { 478 /* Don't use CPUID(0x2) if CPUID(0x4) is supported. */ 479 if (intel_cacheinfo_0x4(c)) 480 return; 481 482 intel_cacheinfo_0x2(c); 483 } 484 485 /* 486 * <linux/cacheinfo.h> shared_cpu_map setup, AMD/Hygon 487 */ 488 static int __cache_amd_cpumap_setup(unsigned int cpu, int index, 489 const struct _cpuid4_info *id4) 490 { 491 struct cpu_cacheinfo *this_cpu_ci; 492 struct cacheinfo *ci; 493 int i, sibling; 494 495 /* 496 * For L3, always use the pre-calculated cpu_llc_shared_mask 497 * to derive shared_cpu_map. 498 */ 499 if (index == 3) { 500 for_each_cpu(i, cpu_llc_shared_mask(cpu)) { 501 this_cpu_ci = get_cpu_cacheinfo(i); 502 if (!this_cpu_ci->info_list) 503 continue; 504 505 ci = this_cpu_ci->info_list + index; 506 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { 507 if (!cpu_online(sibling)) 508 continue; 509 cpumask_set_cpu(sibling, &ci->shared_cpu_map); 510 } 511 } 512 } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { 513 unsigned int apicid, nshared, first, last; 514 515 nshared = id4->eax.split.num_threads_sharing + 1; 516 apicid = cpu_data(cpu).topo.apicid; 517 first = apicid - (apicid % nshared); 518 last = first + nshared - 1; 519 520 for_each_online_cpu(i) { 521 this_cpu_ci = get_cpu_cacheinfo(i); 522 if (!this_cpu_ci->info_list) 523 continue; 524 525 apicid = cpu_data(i).topo.apicid; 526 if ((apicid < first) || (apicid > last)) 527 continue; 528 529 ci = this_cpu_ci->info_list + index; 530 531 for_each_online_cpu(sibling) { 532 apicid = cpu_data(sibling).topo.apicid; 533 if ((apicid < first) || (apicid > last)) 534 continue; 535 cpumask_set_cpu(sibling, &ci->shared_cpu_map); 536 } 537 } 538 } else 539 return 0; 540 541 return 1; 542 } 543 544 /* 545 * <linux/cacheinfo.h> shared_cpu_map setup, Intel + fallback AMD/Hygon 546 */ 547 static void __cache_cpumap_setup(unsigned int cpu, int index, 548 const struct _cpuid4_info *id4) 549 { 550 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 551 struct cpuinfo_x86 *c = &cpu_data(cpu); 552 struct cacheinfo *ci, *sibling_ci; 553 unsigned long num_threads_sharing; 554 int index_msb, i; 555 556 if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) { 557 if (__cache_amd_cpumap_setup(cpu, index, id4)) 558 return; 559 } 560 561 ci = this_cpu_ci->info_list + index; 562 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 563 564 cpumask_set_cpu(cpu, &ci->shared_cpu_map); 565 if (num_threads_sharing == 1) 566 return; 567 568 index_msb = get_count_order(num_threads_sharing); 569 570 for_each_online_cpu(i) 571 if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) { 572 struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); 573 574 /* Skip if itself or no cacheinfo */ 575 if (i == cpu || !sib_cpu_ci->info_list) 576 continue; 577 578 sibling_ci = sib_cpu_ci->info_list + index; 579 cpumask_set_cpu(i, &ci->shared_cpu_map); 580 cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map); 581 } 582 } 583 584 static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info *id4, 585 struct amd_northbridge *nb) 586 { 587 ci->id = id4->id; 588 ci->attributes = CACHE_ID; 589 ci->level = id4->eax.split.level; 590 ci->type = cache_type_map[id4->eax.split.type]; 591 ci->coherency_line_size = id4->ebx.split.coherency_line_size + 1; 592 ci->ways_of_associativity = id4->ebx.split.ways_of_associativity + 1; 593 ci->size = id4->size; 594 ci->number_of_sets = id4->ecx.split.number_of_sets + 1; 595 ci->physical_line_partition = id4->ebx.split.physical_line_partition + 1; 596 ci->priv = nb; 597 } 598 599 int init_cache_level(unsigned int cpu) 600 { 601 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); 602 603 /* There should be at least one leaf. */ 604 if (!ci->num_leaves) 605 return -ENOENT; 606 607 return 0; 608 } 609 610 int populate_cache_leaves(unsigned int cpu) 611 { 612 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 613 struct cacheinfo *ci = this_cpu_ci->info_list; 614 u8 cpu_vendor = boot_cpu_data.x86_vendor; 615 u32 apicid = cpu_data(cpu).topo.apicid; 616 struct amd_northbridge *nb = NULL; 617 struct _cpuid4_info id4 = {}; 618 int idx, ret; 619 620 for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { 621 ret = fill_cpuid4_info(idx, &id4); 622 if (ret) 623 return ret; 624 625 id4.id = get_cache_id(apicid, &id4); 626 627 if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) 628 nb = amd_init_l3_cache(idx); 629 630 ci_info_init(ci++, &id4, nb); 631 __cache_cpumap_setup(cpu, idx, &id4); 632 } 633 634 this_cpu_ci->cpu_map_populated = true; 635 return 0; 636 } 637 638 /* 639 * Disable and enable caches. Needed for changing MTRRs and the PAT MSR. 640 * 641 * Since we are disabling the cache don't allow any interrupts, 642 * they would run extremely slow and would only increase the pain. 643 * 644 * The caller must ensure that local interrupts are disabled and 645 * are reenabled after cache_enable() has been called. 646 */ 647 static unsigned long saved_cr4; 648 static DEFINE_RAW_SPINLOCK(cache_disable_lock); 649 650 /* 651 * Cache flushing is the most time-consuming step when programming the 652 * MTRRs. On many Intel CPUs without known erratas, it can be skipped 653 * if the CPU declares cache self-snooping support. 654 */ 655 static void maybe_flush_caches(void) 656 { 657 if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) 658 wbinvd(); 659 } 660 661 void cache_disable(void) __acquires(cache_disable_lock) 662 { 663 unsigned long cr0; 664 665 /* 666 * This is not ideal since the cache is only flushed/disabled 667 * for this CPU while the MTRRs are changed, but changing this 668 * requires more invasive changes to the way the kernel boots. 669 */ 670 raw_spin_lock(&cache_disable_lock); 671 672 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 673 cr0 = read_cr0() | X86_CR0_CD; 674 write_cr0(cr0); 675 676 maybe_flush_caches(); 677 678 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 679 if (cpu_feature_enabled(X86_FEATURE_PGE)) { 680 saved_cr4 = __read_cr4(); 681 __write_cr4(saved_cr4 & ~X86_CR4_PGE); 682 } 683 684 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ 685 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 686 flush_tlb_local(); 687 688 if (cpu_feature_enabled(X86_FEATURE_MTRR)) 689 mtrr_disable(); 690 691 maybe_flush_caches(); 692 } 693 694 void cache_enable(void) __releases(cache_disable_lock) 695 { 696 /* Flush TLBs (no need to flush caches - they are disabled) */ 697 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 698 flush_tlb_local(); 699 700 if (cpu_feature_enabled(X86_FEATURE_MTRR)) 701 mtrr_enable(); 702 703 /* Enable caches */ 704 write_cr0(read_cr0() & ~X86_CR0_CD); 705 706 /* Restore value of CR4 */ 707 if (cpu_feature_enabled(X86_FEATURE_PGE)) 708 __write_cr4(saved_cr4); 709 710 raw_spin_unlock(&cache_disable_lock); 711 } 712 713 static void cache_cpu_init(void) 714 { 715 unsigned long flags; 716 717 local_irq_save(flags); 718 719 if (memory_caching_control & CACHE_MTRR) { 720 cache_disable(); 721 mtrr_generic_set_state(); 722 cache_enable(); 723 } 724 725 if (memory_caching_control & CACHE_PAT) 726 pat_cpu_init(); 727 728 local_irq_restore(flags); 729 } 730 731 static bool cache_aps_delayed_init = true; 732 733 void set_cache_aps_delayed_init(bool val) 734 { 735 cache_aps_delayed_init = val; 736 } 737 738 bool get_cache_aps_delayed_init(void) 739 { 740 return cache_aps_delayed_init; 741 } 742 743 static int cache_rendezvous_handler(void *unused) 744 { 745 if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id())) 746 cache_cpu_init(); 747 748 return 0; 749 } 750 751 void __init cache_bp_init(void) 752 { 753 mtrr_bp_init(); 754 pat_bp_init(); 755 756 if (memory_caching_control) 757 cache_cpu_init(); 758 } 759 760 void cache_bp_restore(void) 761 { 762 if (memory_caching_control) 763 cache_cpu_init(); 764 } 765 766 static int cache_ap_online(unsigned int cpu) 767 { 768 cpumask_set_cpu(cpu, cpu_cacheinfo_mask); 769 770 if (!memory_caching_control || get_cache_aps_delayed_init()) 771 return 0; 772 773 /* 774 * Ideally we should hold mtrr_mutex here to avoid MTRR entries 775 * changed, but this routine will be called in CPU boot time, 776 * holding the lock breaks it. 777 * 778 * This routine is called in two cases: 779 * 780 * 1. very early time of software resume, when there absolutely 781 * isn't MTRR entry changes; 782 * 783 * 2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug 784 * lock to prevent MTRR entry changes 785 */ 786 stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL, 787 cpu_cacheinfo_mask); 788 789 return 0; 790 } 791 792 static int cache_ap_offline(unsigned int cpu) 793 { 794 cpumask_clear_cpu(cpu, cpu_cacheinfo_mask); 795 return 0; 796 } 797 798 /* 799 * Delayed cache initialization for all AP's 800 */ 801 void cache_aps_init(void) 802 { 803 if (!memory_caching_control || !get_cache_aps_delayed_init()) 804 return; 805 806 stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask); 807 set_cache_aps_delayed_init(false); 808 } 809 810 static int __init cache_ap_register(void) 811 { 812 zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL); 813 cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask); 814 815 cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING, 816 "x86/cachectrl:starting", 817 cache_ap_online, cache_ap_offline); 818 return 0; 819 } 820 early_initcall(cache_ap_register); 821