1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Routines to identify caches on Intel CPU. 4 * 5 * Changes: 6 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 7 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 8 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. 9 */ 10 11 #include <linux/cacheinfo.h> 12 #include <linux/cpu.h> 13 #include <linux/cpuhotplug.h> 14 #include <linux/stop_machine.h> 15 16 #include <asm/amd_nb.h> 17 #include <asm/cacheinfo.h> 18 #include <asm/cpufeature.h> 19 #include <asm/cpuid.h> 20 #include <asm/mtrr.h> 21 #include <asm/smp.h> 22 #include <asm/tlbflush.h> 23 24 #include "cpu.h" 25 26 /* Shared last level cache maps */ 27 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); 28 29 /* Shared L2 cache maps */ 30 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); 31 32 static cpumask_var_t cpu_cacheinfo_mask; 33 34 /* Kernel controls MTRR and/or PAT MSRs. */ 35 unsigned int memory_caching_control __ro_after_init; 36 37 enum _cache_type { 38 CTYPE_NULL = 0, 39 CTYPE_DATA = 1, 40 CTYPE_INST = 2, 41 CTYPE_UNIFIED = 3 42 }; 43 44 union _cpuid4_leaf_eax { 45 struct { 46 enum _cache_type type:5; 47 unsigned int level:3; 48 unsigned int is_self_initializing:1; 49 unsigned int is_fully_associative:1; 50 unsigned int reserved:4; 51 unsigned int num_threads_sharing:12; 52 unsigned int num_cores_on_die:6; 53 } split; 54 u32 full; 55 }; 56 57 union _cpuid4_leaf_ebx { 58 struct { 59 unsigned int coherency_line_size:12; 60 unsigned int physical_line_partition:10; 61 unsigned int ways_of_associativity:10; 62 } split; 63 u32 full; 64 }; 65 66 union _cpuid4_leaf_ecx { 67 struct { 68 unsigned int number_of_sets:32; 69 } split; 70 u32 full; 71 }; 72 73 struct _cpuid4_info { 74 union _cpuid4_leaf_eax eax; 75 union _cpuid4_leaf_ebx ebx; 76 union _cpuid4_leaf_ecx ecx; 77 unsigned int id; 78 unsigned long size; 79 }; 80 81 /* 82 * Fallback AMD CPUID(4) emulation 83 * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) 84 */ 85 86 union l1_cache { 87 struct { 88 unsigned line_size:8; 89 unsigned lines_per_tag:8; 90 unsigned assoc:8; 91 unsigned size_in_kb:8; 92 }; 93 unsigned val; 94 }; 95 96 union l2_cache { 97 struct { 98 unsigned line_size:8; 99 unsigned lines_per_tag:4; 100 unsigned assoc:4; 101 unsigned size_in_kb:16; 102 }; 103 unsigned val; 104 }; 105 106 union l3_cache { 107 struct { 108 unsigned line_size:8; 109 unsigned lines_per_tag:4; 110 unsigned assoc:4; 111 unsigned res:2; 112 unsigned size_encoded:14; 113 }; 114 unsigned val; 115 }; 116 117 static const unsigned short assocs[] = { 118 [1] = 1, 119 [2] = 2, 120 [4] = 4, 121 [6] = 8, 122 [8] = 16, 123 [0xa] = 32, 124 [0xb] = 48, 125 [0xc] = 64, 126 [0xd] = 96, 127 [0xe] = 128, 128 [0xf] = 0xffff /* fully associative - no way to show this currently */ 129 }; 130 131 static const unsigned char levels[] = { 1, 1, 2, 3 }; 132 static const unsigned char types[] = { 1, 2, 3, 3 }; 133 134 static const enum cache_type cache_type_map[] = { 135 [CTYPE_NULL] = CACHE_TYPE_NOCACHE, 136 [CTYPE_DATA] = CACHE_TYPE_DATA, 137 [CTYPE_INST] = CACHE_TYPE_INST, 138 [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, 139 }; 140 141 static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, 142 union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) 143 { 144 unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb; 145 union l1_cache l1i, l1d; 146 union l2_cache l2; 147 union l3_cache l3; 148 union l1_cache *l1 = &l1d; 149 150 eax->full = 0; 151 ebx->full = 0; 152 ecx->full = 0; 153 154 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); 155 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); 156 157 switch (index) { 158 case 1: 159 l1 = &l1i; 160 fallthrough; 161 case 0: 162 if (!l1->val) 163 return; 164 assoc = assocs[l1->assoc]; 165 line_size = l1->line_size; 166 lines_per_tag = l1->lines_per_tag; 167 size_in_kb = l1->size_in_kb; 168 break; 169 case 2: 170 if (!l2.val) 171 return; 172 assoc = assocs[l2.assoc]; 173 line_size = l2.line_size; 174 lines_per_tag = l2.lines_per_tag; 175 /* cpu_data has errata corrections for K7 applied */ 176 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); 177 break; 178 case 3: 179 if (!l3.val) 180 return; 181 assoc = assocs[l3.assoc]; 182 line_size = l3.line_size; 183 lines_per_tag = l3.lines_per_tag; 184 size_in_kb = l3.size_encoded * 512; 185 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { 186 size_in_kb = size_in_kb >> 1; 187 assoc = assoc >> 1; 188 } 189 break; 190 default: 191 return; 192 } 193 194 eax->split.is_self_initializing = 1; 195 eax->split.type = types[index]; 196 eax->split.level = levels[index]; 197 eax->split.num_threads_sharing = 0; 198 eax->split.num_cores_on_die = topology_num_cores_per_package(); 199 200 if (assoc == 0xffff) 201 eax->split.is_fully_associative = 1; 202 ebx->split.coherency_line_size = line_size - 1; 203 ebx->split.ways_of_associativity = assoc - 1; 204 ebx->split.physical_line_partition = lines_per_tag - 1; 205 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / 206 (ebx->split.ways_of_associativity + 1) - 1; 207 } 208 209 static int cpuid4_info_fill_done(struct _cpuid4_info *id4, union _cpuid4_leaf_eax eax, 210 union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx) 211 { 212 if (eax.split.type == CTYPE_NULL) 213 return -EIO; 214 215 id4->eax = eax; 216 id4->ebx = ebx; 217 id4->ecx = ecx; 218 id4->size = (ecx.split.number_of_sets + 1) * 219 (ebx.split.coherency_line_size + 1) * 220 (ebx.split.physical_line_partition + 1) * 221 (ebx.split.ways_of_associativity + 1); 222 223 return 0; 224 } 225 226 static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4) 227 { 228 union _cpuid4_leaf_eax eax; 229 union _cpuid4_leaf_ebx ebx; 230 union _cpuid4_leaf_ecx ecx; 231 u32 ignored; 232 233 if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) 234 cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &ignored); 235 else 236 legacy_amd_cpuid4(index, &eax, &ebx, &ecx); 237 238 return cpuid4_info_fill_done(id4, eax, ebx, ecx); 239 } 240 241 static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4) 242 { 243 union _cpuid4_leaf_eax eax; 244 union _cpuid4_leaf_ebx ebx; 245 union _cpuid4_leaf_ecx ecx; 246 u32 ignored; 247 248 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &ignored); 249 250 return cpuid4_info_fill_done(id4, eax, ebx, ecx); 251 } 252 253 static int fill_cpuid4_info(int index, struct _cpuid4_info *id4) 254 { 255 u8 cpu_vendor = boot_cpu_data.x86_vendor; 256 257 return (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) ? 258 amd_fill_cpuid4_info(index, id4) : 259 intel_fill_cpuid4_info(index, id4); 260 } 261 262 static int find_num_cache_leaves(struct cpuinfo_x86 *c) 263 { 264 unsigned int eax, ebx, ecx, edx, op; 265 union _cpuid4_leaf_eax cache_eax; 266 int i = -1; 267 268 if (c->x86_vendor == X86_VENDOR_AMD || 269 c->x86_vendor == X86_VENDOR_HYGON) 270 op = 0x8000001d; 271 else 272 op = 4; 273 274 do { 275 ++i; 276 /* Do cpuid(op) loop to find out num_cache_leaves */ 277 cpuid_count(op, i, &eax, &ebx, &ecx, &edx); 278 cache_eax.full = eax; 279 } while (cache_eax.split.type != CTYPE_NULL); 280 return i; 281 } 282 283 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) 284 { 285 /* 286 * We may have multiple LLCs if L3 caches exist, so check if we 287 * have an L3 cache by looking at the L3 cache CPUID leaf. 288 */ 289 if (!cpuid_edx(0x80000006)) 290 return; 291 292 if (c->x86 < 0x17) { 293 /* LLC is at the node level. */ 294 c->topo.llc_id = die_id; 295 } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { 296 /* 297 * LLC is at the core complex level. 298 * Core complex ID is ApicId[3] for these processors. 299 */ 300 c->topo.llc_id = c->topo.apicid >> 3; 301 } else { 302 /* 303 * LLC ID is calculated from the number of threads sharing the 304 * cache. 305 * */ 306 u32 eax, ebx, ecx, edx, num_sharing_cache = 0; 307 u32 llc_index = find_num_cache_leaves(c) - 1; 308 309 cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx); 310 if (eax) 311 num_sharing_cache = ((eax >> 14) & 0xfff) + 1; 312 313 if (num_sharing_cache) { 314 int bits = get_count_order(num_sharing_cache); 315 316 c->topo.llc_id = c->topo.apicid >> bits; 317 } 318 } 319 } 320 321 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) 322 { 323 /* 324 * We may have multiple LLCs if L3 caches exist, so check if we 325 * have an L3 cache by looking at the L3 cache CPUID leaf. 326 */ 327 if (!cpuid_edx(0x80000006)) 328 return; 329 330 /* 331 * LLC is at the core complex level. 332 * Core complex ID is ApicId[3] for these processors. 333 */ 334 c->topo.llc_id = c->topo.apicid >> 3; 335 } 336 337 void init_amd_cacheinfo(struct cpuinfo_x86 *c) 338 { 339 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 340 341 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { 342 ci->num_leaves = find_num_cache_leaves(c); 343 } else if (c->extended_cpuid_level >= 0x80000006) { 344 if (cpuid_edx(0x80000006) & 0xf000) 345 ci->num_leaves = 4; 346 else 347 ci->num_leaves = 3; 348 } 349 } 350 351 void init_hygon_cacheinfo(struct cpuinfo_x86 *c) 352 { 353 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 354 355 ci->num_leaves = find_num_cache_leaves(c); 356 } 357 358 void init_intel_cacheinfo(struct cpuinfo_x86 *c) 359 { 360 /* Cache sizes */ 361 unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; 362 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 363 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 364 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; 365 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); 366 367 if (c->cpuid_level > 3) { 368 /* 369 * There should be at least one leaf. A non-zero value means 370 * that the number of leaves has been initialized. 371 */ 372 if (!ci->num_leaves) 373 ci->num_leaves = find_num_cache_leaves(c); 374 375 /* 376 * Whenever possible use cpuid(4), deterministic cache 377 * parameters cpuid leaf to find the cache details 378 */ 379 for (i = 0; i < ci->num_leaves; i++) { 380 struct _cpuid4_info id4 = {}; 381 int retval; 382 383 retval = intel_fill_cpuid4_info(i, &id4); 384 if (retval < 0) 385 continue; 386 387 switch (id4.eax.split.level) { 388 case 1: 389 if (id4.eax.split.type == CTYPE_DATA) 390 new_l1d = id4.size/1024; 391 else if (id4.eax.split.type == CTYPE_INST) 392 new_l1i = id4.size/1024; 393 break; 394 case 2: 395 new_l2 = id4.size/1024; 396 num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; 397 index_msb = get_count_order(num_threads_sharing); 398 l2_id = c->topo.apicid & ~((1 << index_msb) - 1); 399 break; 400 case 3: 401 new_l3 = id4.size/1024; 402 num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; 403 index_msb = get_count_order(num_threads_sharing); 404 l3_id = c->topo.apicid & ~((1 << index_msb) - 1); 405 break; 406 default: 407 break; 408 } 409 } 410 } 411 412 /* Don't use CPUID(2) if CPUID(4) is supported. */ 413 if (!ci->num_leaves && c->cpuid_level > 1) { 414 const struct leaf_0x2_table *entry; 415 union leaf_0x2_regs regs; 416 u8 *ptr; 417 418 cpuid_get_leaf_0x2_regs(®s); 419 for_each_leaf_0x2_entry(regs, ptr, entry) { 420 switch (entry->c_type) { 421 case CACHE_L1_INST: l1i += entry->c_size; break; 422 case CACHE_L1_DATA: l1d += entry->c_size; break; 423 case CACHE_L2: l2 += entry->c_size; break; 424 case CACHE_L3: l3 += entry->c_size; break; 425 } 426 } 427 } 428 429 if (new_l1d) 430 l1d = new_l1d; 431 432 if (new_l1i) 433 l1i = new_l1i; 434 435 if (new_l2) { 436 l2 = new_l2; 437 c->topo.llc_id = l2_id; 438 c->topo.l2c_id = l2_id; 439 } 440 441 if (new_l3) { 442 l3 = new_l3; 443 c->topo.llc_id = l3_id; 444 } 445 446 /* 447 * If llc_id is not yet set, this means cpuid_level < 4 which in 448 * turns means that the only possibility is SMT (as indicated in 449 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know 450 * that SMT shares all caches, we can unconditionally set cpu_llc_id to 451 * c->topo.pkg_id. 452 */ 453 if (c->topo.llc_id == BAD_APICID) 454 c->topo.llc_id = c->topo.pkg_id; 455 456 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); 457 458 if (!l2) 459 cpu_detect_cache_sizes(c); 460 } 461 462 static int __cache_amd_cpumap_setup(unsigned int cpu, int index, 463 const struct _cpuid4_info *id4) 464 { 465 struct cpu_cacheinfo *this_cpu_ci; 466 struct cacheinfo *ci; 467 int i, sibling; 468 469 /* 470 * For L3, always use the pre-calculated cpu_llc_shared_mask 471 * to derive shared_cpu_map. 472 */ 473 if (index == 3) { 474 for_each_cpu(i, cpu_llc_shared_mask(cpu)) { 475 this_cpu_ci = get_cpu_cacheinfo(i); 476 if (!this_cpu_ci->info_list) 477 continue; 478 ci = this_cpu_ci->info_list + index; 479 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { 480 if (!cpu_online(sibling)) 481 continue; 482 cpumask_set_cpu(sibling, 483 &ci->shared_cpu_map); 484 } 485 } 486 } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { 487 unsigned int apicid, nshared, first, last; 488 489 nshared = id4->eax.split.num_threads_sharing + 1; 490 apicid = cpu_data(cpu).topo.apicid; 491 first = apicid - (apicid % nshared); 492 last = first + nshared - 1; 493 494 for_each_online_cpu(i) { 495 this_cpu_ci = get_cpu_cacheinfo(i); 496 if (!this_cpu_ci->info_list) 497 continue; 498 499 apicid = cpu_data(i).topo.apicid; 500 if ((apicid < first) || (apicid > last)) 501 continue; 502 503 ci = this_cpu_ci->info_list + index; 504 505 for_each_online_cpu(sibling) { 506 apicid = cpu_data(sibling).topo.apicid; 507 if ((apicid < first) || (apicid > last)) 508 continue; 509 cpumask_set_cpu(sibling, 510 &ci->shared_cpu_map); 511 } 512 } 513 } else 514 return 0; 515 516 return 1; 517 } 518 519 static void __cache_cpumap_setup(unsigned int cpu, int index, 520 const struct _cpuid4_info *id4) 521 { 522 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 523 struct cacheinfo *ci, *sibling_ci; 524 unsigned long num_threads_sharing; 525 int index_msb, i; 526 struct cpuinfo_x86 *c = &cpu_data(cpu); 527 528 if (c->x86_vendor == X86_VENDOR_AMD || 529 c->x86_vendor == X86_VENDOR_HYGON) { 530 if (__cache_amd_cpumap_setup(cpu, index, id4)) 531 return; 532 } 533 534 ci = this_cpu_ci->info_list + index; 535 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 536 537 cpumask_set_cpu(cpu, &ci->shared_cpu_map); 538 if (num_threads_sharing == 1) 539 return; 540 541 index_msb = get_count_order(num_threads_sharing); 542 543 for_each_online_cpu(i) 544 if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) { 545 struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); 546 547 if (i == cpu || !sib_cpu_ci->info_list) 548 continue;/* skip if itself or no cacheinfo */ 549 sibling_ci = sib_cpu_ci->info_list + index; 550 cpumask_set_cpu(i, &ci->shared_cpu_map); 551 cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map); 552 } 553 } 554 555 static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info *id4, 556 struct amd_northbridge *nb) 557 { 558 ci->id = id4->id; 559 ci->attributes = CACHE_ID; 560 ci->level = id4->eax.split.level; 561 ci->type = cache_type_map[id4->eax.split.type]; 562 ci->coherency_line_size = id4->ebx.split.coherency_line_size + 1; 563 ci->ways_of_associativity = id4->ebx.split.ways_of_associativity + 1; 564 ci->size = id4->size; 565 ci->number_of_sets = id4->ecx.split.number_of_sets + 1; 566 ci->physical_line_partition = id4->ebx.split.physical_line_partition + 1; 567 ci->priv = nb; 568 } 569 570 int init_cache_level(unsigned int cpu) 571 { 572 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); 573 574 /* There should be at least one leaf. */ 575 if (!ci->num_leaves) 576 return -ENOENT; 577 578 return 0; 579 } 580 581 /* 582 * The max shared threads number comes from CPUID.4:EAX[25-14] with input 583 * ECX as cache index. Then right shift apicid by the number's order to get 584 * cache id for this cache node. 585 */ 586 static void get_cache_id(int cpu, struct _cpuid4_info *id4) 587 { 588 struct cpuinfo_x86 *c = &cpu_data(cpu); 589 unsigned long num_threads_sharing; 590 int index_msb; 591 592 num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; 593 index_msb = get_count_order(num_threads_sharing); 594 id4->id = c->topo.apicid >> index_msb; 595 } 596 597 int populate_cache_leaves(unsigned int cpu) 598 { 599 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 600 struct cacheinfo *ci = this_cpu_ci->info_list; 601 u8 cpu_vendor = boot_cpu_data.x86_vendor; 602 struct amd_northbridge *nb = NULL; 603 struct _cpuid4_info id4 = {}; 604 int idx, ret; 605 606 for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { 607 ret = fill_cpuid4_info(idx, &id4); 608 if (ret) 609 return ret; 610 611 get_cache_id(cpu, &id4); 612 613 if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) 614 nb = amd_init_l3_cache(idx); 615 616 ci_info_init(ci++, &id4, nb); 617 __cache_cpumap_setup(cpu, idx, &id4); 618 } 619 this_cpu_ci->cpu_map_populated = true; 620 621 return 0; 622 } 623 624 /* 625 * Disable and enable caches. Needed for changing MTRRs and the PAT MSR. 626 * 627 * Since we are disabling the cache don't allow any interrupts, 628 * they would run extremely slow and would only increase the pain. 629 * 630 * The caller must ensure that local interrupts are disabled and 631 * are reenabled after cache_enable() has been called. 632 */ 633 static unsigned long saved_cr4; 634 static DEFINE_RAW_SPINLOCK(cache_disable_lock); 635 636 void cache_disable(void) __acquires(cache_disable_lock) 637 { 638 unsigned long cr0; 639 640 /* 641 * Note that this is not ideal 642 * since the cache is only flushed/disabled for this CPU while the 643 * MTRRs are changed, but changing this requires more invasive 644 * changes to the way the kernel boots 645 */ 646 647 raw_spin_lock(&cache_disable_lock); 648 649 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 650 cr0 = read_cr0() | X86_CR0_CD; 651 write_cr0(cr0); 652 653 /* 654 * Cache flushing is the most time-consuming step when programming 655 * the MTRRs. Fortunately, as per the Intel Software Development 656 * Manual, we can skip it if the processor supports cache self- 657 * snooping. 658 */ 659 if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) 660 wbinvd(); 661 662 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 663 if (cpu_feature_enabled(X86_FEATURE_PGE)) { 664 saved_cr4 = __read_cr4(); 665 __write_cr4(saved_cr4 & ~X86_CR4_PGE); 666 } 667 668 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ 669 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 670 flush_tlb_local(); 671 672 if (cpu_feature_enabled(X86_FEATURE_MTRR)) 673 mtrr_disable(); 674 675 /* Again, only flush caches if we have to. */ 676 if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) 677 wbinvd(); 678 } 679 680 void cache_enable(void) __releases(cache_disable_lock) 681 { 682 /* Flush TLBs (no need to flush caches - they are disabled) */ 683 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 684 flush_tlb_local(); 685 686 if (cpu_feature_enabled(X86_FEATURE_MTRR)) 687 mtrr_enable(); 688 689 /* Enable caches */ 690 write_cr0(read_cr0() & ~X86_CR0_CD); 691 692 /* Restore value of CR4 */ 693 if (cpu_feature_enabled(X86_FEATURE_PGE)) 694 __write_cr4(saved_cr4); 695 696 raw_spin_unlock(&cache_disable_lock); 697 } 698 699 static void cache_cpu_init(void) 700 { 701 unsigned long flags; 702 703 local_irq_save(flags); 704 705 if (memory_caching_control & CACHE_MTRR) { 706 cache_disable(); 707 mtrr_generic_set_state(); 708 cache_enable(); 709 } 710 711 if (memory_caching_control & CACHE_PAT) 712 pat_cpu_init(); 713 714 local_irq_restore(flags); 715 } 716 717 static bool cache_aps_delayed_init = true; 718 719 void set_cache_aps_delayed_init(bool val) 720 { 721 cache_aps_delayed_init = val; 722 } 723 724 bool get_cache_aps_delayed_init(void) 725 { 726 return cache_aps_delayed_init; 727 } 728 729 static int cache_rendezvous_handler(void *unused) 730 { 731 if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id())) 732 cache_cpu_init(); 733 734 return 0; 735 } 736 737 void __init cache_bp_init(void) 738 { 739 mtrr_bp_init(); 740 pat_bp_init(); 741 742 if (memory_caching_control) 743 cache_cpu_init(); 744 } 745 746 void cache_bp_restore(void) 747 { 748 if (memory_caching_control) 749 cache_cpu_init(); 750 } 751 752 static int cache_ap_online(unsigned int cpu) 753 { 754 cpumask_set_cpu(cpu, cpu_cacheinfo_mask); 755 756 if (!memory_caching_control || get_cache_aps_delayed_init()) 757 return 0; 758 759 /* 760 * Ideally we should hold mtrr_mutex here to avoid MTRR entries 761 * changed, but this routine will be called in CPU boot time, 762 * holding the lock breaks it. 763 * 764 * This routine is called in two cases: 765 * 766 * 1. very early time of software resume, when there absolutely 767 * isn't MTRR entry changes; 768 * 769 * 2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug 770 * lock to prevent MTRR entry changes 771 */ 772 stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL, 773 cpu_cacheinfo_mask); 774 775 return 0; 776 } 777 778 static int cache_ap_offline(unsigned int cpu) 779 { 780 cpumask_clear_cpu(cpu, cpu_cacheinfo_mask); 781 return 0; 782 } 783 784 /* 785 * Delayed cache initialization for all AP's 786 */ 787 void cache_aps_init(void) 788 { 789 if (!memory_caching_control || !get_cache_aps_delayed_init()) 790 return; 791 792 stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask); 793 set_cache_aps_delayed_init(false); 794 } 795 796 static int __init cache_ap_register(void) 797 { 798 zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL); 799 cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask); 800 801 cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING, 802 "x86/cachectrl:starting", 803 cache_ap_online, cache_ap_offline); 804 return 0; 805 } 806 early_initcall(cache_ap_register); 807