1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/pgtable.h> 4 5 #include <linux/string.h> 6 #include <linux/bitops.h> 7 #include <linux/smp.h> 8 #include <linux/sched.h> 9 #include <linux/sched/clock.h> 10 #include <linux/semaphore.h> 11 #include <linux/thread_info.h> 12 #include <linux/init.h> 13 #include <linux/uaccess.h> 14 #include <linux/workqueue.h> 15 #include <linux/delay.h> 16 #include <linux/cpuhotplug.h> 17 18 #include <asm/cpufeature.h> 19 #include <asm/msr.h> 20 #include <asm/bugs.h> 21 #include <asm/cpu.h> 22 #include <asm/intel-family.h> 23 #include <asm/microcode.h> 24 #include <asm/hwcap2.h> 25 #include <asm/elf.h> 26 #include <asm/cpu_device_id.h> 27 #include <asm/cmdline.h> 28 #include <asm/traps.h> 29 #include <asm/resctrl.h> 30 #include <asm/numa.h> 31 #include <asm/thermal.h> 32 33 #ifdef CONFIG_X86_64 34 #include <linux/topology.h> 35 #endif 36 37 #include "cpu.h" 38 39 #ifdef CONFIG_X86_LOCAL_APIC 40 #include <asm/mpspec.h> 41 #include <asm/apic.h> 42 #endif 43 44 enum split_lock_detect_state { 45 sld_off = 0, 46 sld_warn, 47 sld_fatal, 48 sld_ratelimit, 49 }; 50 51 /* 52 * Default to sld_off because most systems do not support split lock detection. 53 * sld_state_setup() will switch this to sld_warn on systems that support 54 * split lock/bus lock detect, unless there is a command line override. 55 */ 56 static enum split_lock_detect_state sld_state __ro_after_init = sld_off; 57 static u64 msr_test_ctrl_cache __ro_after_init; 58 59 /* 60 * With a name like MSR_TEST_CTL it should go without saying, but don't touch 61 * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it 62 * on CPUs that do not support SLD can cause fireworks, even when writing '0'. 63 */ 64 static bool cpu_model_supports_sld __ro_after_init; 65 66 /* 67 * Processors which have self-snooping capability can handle conflicting 68 * memory type across CPUs by snooping its own cache. However, there exists 69 * CPU models in which having conflicting memory types still leads to 70 * unpredictable behavior, machine check errors, or hangs. Clear this 71 * feature to prevent its use on machines with known erratas. 72 */ 73 static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) 74 { 75 switch (c->x86_model) { 76 case INTEL_FAM6_CORE_YONAH: 77 case INTEL_FAM6_CORE2_MEROM: 78 case INTEL_FAM6_CORE2_MEROM_L: 79 case INTEL_FAM6_CORE2_PENRYN: 80 case INTEL_FAM6_CORE2_DUNNINGTON: 81 case INTEL_FAM6_NEHALEM: 82 case INTEL_FAM6_NEHALEM_G: 83 case INTEL_FAM6_NEHALEM_EP: 84 case INTEL_FAM6_NEHALEM_EX: 85 case INTEL_FAM6_WESTMERE: 86 case INTEL_FAM6_WESTMERE_EP: 87 case INTEL_FAM6_SANDYBRIDGE: 88 setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); 89 } 90 } 91 92 static bool ring3mwait_disabled __read_mostly; 93 94 static int __init ring3mwait_disable(char *__unused) 95 { 96 ring3mwait_disabled = true; 97 return 1; 98 } 99 __setup("ring3mwait=disable", ring3mwait_disable); 100 101 static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) 102 { 103 /* 104 * Ring 3 MONITOR/MWAIT feature cannot be detected without 105 * cpu model and family comparison. 106 */ 107 if (c->x86 != 6) 108 return; 109 switch (c->x86_model) { 110 case INTEL_FAM6_XEON_PHI_KNL: 111 case INTEL_FAM6_XEON_PHI_KNM: 112 break; 113 default: 114 return; 115 } 116 117 if (ring3mwait_disabled) 118 return; 119 120 set_cpu_cap(c, X86_FEATURE_RING3MWAIT); 121 this_cpu_or(msr_misc_features_shadow, 122 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT); 123 124 if (c == &boot_cpu_data) 125 ELF_HWCAP2 |= HWCAP2_RING3MWAIT; 126 } 127 128 /* 129 * Early microcode releases for the Spectre v2 mitigation were broken. 130 * Information taken from; 131 * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf 132 * - https://kb.vmware.com/s/article/52345 133 * - Microcode revisions observed in the wild 134 * - Release note from 20180108 microcode release 135 */ 136 struct sku_microcode { 137 u8 model; 138 u8 stepping; 139 u32 microcode; 140 }; 141 static const struct sku_microcode spectre_bad_microcodes[] = { 142 { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 }, 143 { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 }, 144 { INTEL_FAM6_KABYLAKE, 0x09, 0x80 }, 145 { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 }, 146 { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 }, 147 { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, 148 { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, 149 { INTEL_FAM6_BROADWELL, 0x04, 0x28 }, 150 { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b }, 151 { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 }, 152 { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 }, 153 { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, 154 { INTEL_FAM6_HASWELL_L, 0x01, 0x21 }, 155 { INTEL_FAM6_HASWELL_G, 0x01, 0x18 }, 156 { INTEL_FAM6_HASWELL, 0x03, 0x23 }, 157 { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, 158 { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, 159 { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, 160 /* Observed in the wild */ 161 { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, 162 { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, 163 }; 164 165 static bool bad_spectre_microcode(struct cpuinfo_x86 *c) 166 { 167 int i; 168 169 /* 170 * We know that the hypervisor lie to us on the microcode version so 171 * we may as well hope that it is running the correct version. 172 */ 173 if (cpu_has(c, X86_FEATURE_HYPERVISOR)) 174 return false; 175 176 if (c->x86 != 6) 177 return false; 178 179 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { 180 if (c->x86_model == spectre_bad_microcodes[i].model && 181 c->x86_stepping == spectre_bad_microcodes[i].stepping) 182 return (c->microcode <= spectre_bad_microcodes[i].microcode); 183 } 184 return false; 185 } 186 187 static void early_init_intel(struct cpuinfo_x86 *c) 188 { 189 u64 misc_enable; 190 191 /* Unmask CPUID levels if masked: */ 192 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { 193 if (msr_clear_bit(MSR_IA32_MISC_ENABLE, 194 MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) { 195 c->cpuid_level = cpuid_eax(0); 196 get_cpu_cap(c); 197 } 198 } 199 200 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 201 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 202 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 203 204 if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) 205 c->microcode = intel_get_microcode_revision(); 206 207 /* Now if any of them are set, check the blacklist and clear the lot */ 208 if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || 209 cpu_has(c, X86_FEATURE_INTEL_STIBP) || 210 cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || 211 cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { 212 pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); 213 setup_clear_cpu_cap(X86_FEATURE_IBRS); 214 setup_clear_cpu_cap(X86_FEATURE_IBPB); 215 setup_clear_cpu_cap(X86_FEATURE_STIBP); 216 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); 217 setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); 218 setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); 219 setup_clear_cpu_cap(X86_FEATURE_SSBD); 220 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); 221 } 222 223 /* 224 * Atom erratum AAE44/AAF40/AAG38/AAH41: 225 * 226 * A race condition between speculative fetches and invalidating 227 * a large page. This is worked around in microcode, but we 228 * need the microcode to have already been loaded... so if it is 229 * not, recommend a BIOS update and disable large pages. 230 */ 231 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && 232 c->microcode < 0x20e) { 233 pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); 234 clear_cpu_cap(c, X86_FEATURE_PSE); 235 } 236 237 #ifdef CONFIG_X86_64 238 set_cpu_cap(c, X86_FEATURE_SYSENTER32); 239 #else 240 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ 241 if (c->x86 == 15 && c->x86_cache_alignment == 64) 242 c->x86_cache_alignment = 128; 243 #endif 244 245 /* CPUID workaround for 0F33/0F34 CPU */ 246 if (c->x86 == 0xF && c->x86_model == 0x3 247 && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) 248 c->x86_phys_bits = 36; 249 250 /* 251 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 252 * with P/T states and does not stop in deep C-states. 253 * 254 * It is also reliable across cores and sockets. (but not across 255 * cabinets - we turn it off in that case explicitly.) 256 */ 257 if (c->x86_power & (1 << 8)) { 258 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 259 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 260 } 261 262 /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ 263 if (c->x86 == 6) { 264 switch (c->x86_model) { 265 case INTEL_FAM6_ATOM_SALTWELL_MID: 266 case INTEL_FAM6_ATOM_SALTWELL_TABLET: 267 case INTEL_FAM6_ATOM_SILVERMONT_MID: 268 case INTEL_FAM6_ATOM_AIRMONT_NP: 269 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); 270 break; 271 default: 272 break; 273 } 274 } 275 276 /* 277 * There is a known erratum on Pentium III and Core Solo 278 * and Core Duo CPUs. 279 * " Page with PAT set to WC while associated MTRR is UC 280 * may consolidate to UC " 281 * Because of this erratum, it is better to stick with 282 * setting WC in MTRR rather than using PAT on these CPUs. 283 * 284 * Enable PAT WC only on P4, Core 2 or later CPUs. 285 */ 286 if (c->x86 == 6 && c->x86_model < 15) 287 clear_cpu_cap(c, X86_FEATURE_PAT); 288 289 /* 290 * If fast string is not enabled in IA32_MISC_ENABLE for any reason, 291 * clear the fast string and enhanced fast string CPU capabilities. 292 */ 293 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { 294 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); 295 if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { 296 pr_info("Disabled fast string operations\n"); 297 setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); 298 setup_clear_cpu_cap(X86_FEATURE_ERMS); 299 } 300 } 301 302 /* 303 * Intel Quark Core DevMan_001.pdf section 6.4.11 304 * "The operating system also is required to invalidate (i.e., flush) 305 * the TLB when any changes are made to any of the page table entries. 306 * The operating system must reload CR3 to cause the TLB to be flushed" 307 * 308 * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h 309 * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE 310 * to be modified. 311 */ 312 if (c->x86 == 5 && c->x86_model == 9) { 313 pr_info("Disabling PGE capability bit\n"); 314 setup_clear_cpu_cap(X86_FEATURE_PGE); 315 } 316 317 if (c->cpuid_level >= 0x00000001) { 318 u32 eax, ebx, ecx, edx; 319 320 cpuid(0x00000001, &eax, &ebx, &ecx, &edx); 321 /* 322 * If HTT (EDX[28]) is set EBX[16:23] contain the number of 323 * apicids which are reserved per package. Store the resulting 324 * shift value for the package management code. 325 */ 326 if (edx & (1U << 28)) 327 c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); 328 } 329 330 check_memory_type_self_snoop_errata(c); 331 332 /* 333 * Get the number of SMT siblings early from the extended topology 334 * leaf, if available. Otherwise try the legacy SMT detection. 335 */ 336 if (detect_extended_topology_early(c) < 0) 337 detect_ht_early(c); 338 } 339 340 static void bsp_init_intel(struct cpuinfo_x86 *c) 341 { 342 resctrl_cpu_detect(c); 343 } 344 345 #ifdef CONFIG_X86_32 346 /* 347 * Early probe support logic for ppro memory erratum #50 348 * 349 * This is called before we do cpu ident work 350 */ 351 352 int ppro_with_ram_bug(void) 353 { 354 /* Uses data from early_cpu_detect now */ 355 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 356 boot_cpu_data.x86 == 6 && 357 boot_cpu_data.x86_model == 1 && 358 boot_cpu_data.x86_stepping < 8) { 359 pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); 360 return 1; 361 } 362 return 0; 363 } 364 365 static void intel_smp_check(struct cpuinfo_x86 *c) 366 { 367 /* calling is from identify_secondary_cpu() ? */ 368 if (!c->cpu_index) 369 return; 370 371 /* 372 * Mask B, Pentium, but not Pentium MMX 373 */ 374 if (c->x86 == 5 && 375 c->x86_stepping >= 1 && c->x86_stepping <= 4 && 376 c->x86_model <= 3) { 377 /* 378 * Remember we have B step Pentia with bugs 379 */ 380 WARN_ONCE(1, "WARNING: SMP operation may be unreliable" 381 "with B stepping processors.\n"); 382 } 383 } 384 385 static int forcepae; 386 static int __init forcepae_setup(char *__unused) 387 { 388 forcepae = 1; 389 return 1; 390 } 391 __setup("forcepae", forcepae_setup); 392 393 static void intel_workarounds(struct cpuinfo_x86 *c) 394 { 395 #ifdef CONFIG_X86_F00F_BUG 396 /* 397 * All models of Pentium and Pentium with MMX technology CPUs 398 * have the F0 0F bug, which lets nonprivileged users lock up the 399 * system. Announce that the fault handler will be checking for it. 400 * The Quark is also family 5, but does not have the same bug. 401 */ 402 clear_cpu_bug(c, X86_BUG_F00F); 403 if (c->x86 == 5 && c->x86_model < 9) { 404 static int f00f_workaround_enabled; 405 406 set_cpu_bug(c, X86_BUG_F00F); 407 if (!f00f_workaround_enabled) { 408 pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n"); 409 f00f_workaround_enabled = 1; 410 } 411 } 412 #endif 413 414 /* 415 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until 416 * model 3 mask 3 417 */ 418 if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) 419 clear_cpu_cap(c, X86_FEATURE_SEP); 420 421 /* 422 * PAE CPUID issue: many Pentium M report no PAE but may have a 423 * functionally usable PAE implementation. 424 * Forcefully enable PAE if kernel parameter "forcepae" is present. 425 */ 426 if (forcepae) { 427 pr_warn("PAE forced!\n"); 428 set_cpu_cap(c, X86_FEATURE_PAE); 429 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); 430 } 431 432 /* 433 * P4 Xeon erratum 037 workaround. 434 * Hardware prefetcher may cause stale data to be loaded into the cache. 435 */ 436 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { 437 if (msr_set_bit(MSR_IA32_MISC_ENABLE, 438 MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { 439 pr_info("CPU: C0 stepping P4 Xeon detected.\n"); 440 pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n"); 441 } 442 } 443 444 /* 445 * See if we have a good local APIC by checking for buggy Pentia, 446 * i.e. all B steppings and the C2 stepping of P54C when using their 447 * integrated APIC (see 11AP erratum in "Pentium Processor 448 * Specification Update"). 449 */ 450 if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && 451 (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) 452 set_cpu_bug(c, X86_BUG_11AP); 453 454 455 #ifdef CONFIG_X86_INTEL_USERCOPY 456 /* 457 * Set up the preferred alignment for movsl bulk memory moves 458 */ 459 switch (c->x86) { 460 case 4: /* 486: untested */ 461 break; 462 case 5: /* Old Pentia: untested */ 463 break; 464 case 6: /* PII/PIII only like movsl with 8-byte alignment */ 465 movsl_mask.mask = 7; 466 break; 467 case 15: /* P4 is OK down to 8-byte alignment */ 468 movsl_mask.mask = 7; 469 break; 470 } 471 #endif 472 473 intel_smp_check(c); 474 } 475 #else 476 static void intel_workarounds(struct cpuinfo_x86 *c) 477 { 478 } 479 #endif 480 481 static void srat_detect_node(struct cpuinfo_x86 *c) 482 { 483 #ifdef CONFIG_NUMA 484 unsigned node; 485 int cpu = smp_processor_id(); 486 487 /* Don't do the funky fallback heuristics the AMD version employs 488 for now. */ 489 node = numa_cpu_node(cpu); 490 if (node == NUMA_NO_NODE || !node_online(node)) { 491 /* reuse the value from init_cpu_to_node() */ 492 node = cpu_to_node(cpu); 493 } 494 numa_set_node(cpu, node); 495 #endif 496 } 497 498 #define MSR_IA32_TME_ACTIVATE 0x982 499 500 /* Helpers to access TME_ACTIVATE MSR */ 501 #define TME_ACTIVATE_LOCKED(x) (x & 0x1) 502 #define TME_ACTIVATE_ENABLED(x) (x & 0x2) 503 504 #define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ 505 #define TME_ACTIVATE_POLICY_AES_XTS_128 0 506 507 #define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ 508 509 #define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ 510 #define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 511 512 /* Values for mktme_status (SW only construct) */ 513 #define MKTME_ENABLED 0 514 #define MKTME_DISABLED 1 515 #define MKTME_UNINITIALIZED 2 516 static int mktme_status = MKTME_UNINITIALIZED; 517 518 static void detect_tme(struct cpuinfo_x86 *c) 519 { 520 u64 tme_activate, tme_policy, tme_crypto_algs; 521 int keyid_bits = 0, nr_keyids = 0; 522 static u64 tme_activate_cpu0 = 0; 523 524 rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); 525 526 if (mktme_status != MKTME_UNINITIALIZED) { 527 if (tme_activate != tme_activate_cpu0) { 528 /* Broken BIOS? */ 529 pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); 530 pr_err_once("x86/tme: MKTME is not usable\n"); 531 mktme_status = MKTME_DISABLED; 532 533 /* Proceed. We may need to exclude bits from x86_phys_bits. */ 534 } 535 } else { 536 tme_activate_cpu0 = tme_activate; 537 } 538 539 if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { 540 pr_info_once("x86/tme: not enabled by BIOS\n"); 541 mktme_status = MKTME_DISABLED; 542 return; 543 } 544 545 if (mktme_status != MKTME_UNINITIALIZED) 546 goto detect_keyid_bits; 547 548 pr_info("x86/tme: enabled by BIOS\n"); 549 550 tme_policy = TME_ACTIVATE_POLICY(tme_activate); 551 if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) 552 pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); 553 554 tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); 555 if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { 556 pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", 557 tme_crypto_algs); 558 mktme_status = MKTME_DISABLED; 559 } 560 detect_keyid_bits: 561 keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); 562 nr_keyids = (1UL << keyid_bits) - 1; 563 if (nr_keyids) { 564 pr_info_once("x86/mktme: enabled by BIOS\n"); 565 pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); 566 } else { 567 pr_info_once("x86/mktme: disabled by BIOS\n"); 568 } 569 570 if (mktme_status == MKTME_UNINITIALIZED) { 571 /* MKTME is usable */ 572 mktme_status = MKTME_ENABLED; 573 } 574 575 /* 576 * KeyID bits effectively lower the number of physical address 577 * bits. Update cpuinfo_x86::x86_phys_bits accordingly. 578 */ 579 c->x86_phys_bits -= keyid_bits; 580 } 581 582 static void init_cpuid_fault(struct cpuinfo_x86 *c) 583 { 584 u64 msr; 585 586 if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) { 587 if (msr & MSR_PLATFORM_INFO_CPUID_FAULT) 588 set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); 589 } 590 } 591 592 static void init_intel_misc_features(struct cpuinfo_x86 *c) 593 { 594 u64 msr; 595 596 if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr)) 597 return; 598 599 /* Clear all MISC features */ 600 this_cpu_write(msr_misc_features_shadow, 0); 601 602 /* Check features and update capabilities and shadow control bits */ 603 init_cpuid_fault(c); 604 probe_xeon_phi_r3mwait(c); 605 606 msr = this_cpu_read(msr_misc_features_shadow); 607 wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); 608 } 609 610 static void split_lock_init(void); 611 static void bus_lock_init(void); 612 613 static void init_intel(struct cpuinfo_x86 *c) 614 { 615 early_init_intel(c); 616 617 intel_workarounds(c); 618 619 /* 620 * Detect the extended topology information if available. This 621 * will reinitialise the initial_apicid which will be used 622 * in init_intel_cacheinfo() 623 */ 624 detect_extended_topology(c); 625 626 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 627 /* 628 * let's use the legacy cpuid vector 0x1 and 0x4 for topology 629 * detection. 630 */ 631 detect_num_cpu_cores(c); 632 #ifdef CONFIG_X86_32 633 detect_ht(c); 634 #endif 635 } 636 637 init_intel_cacheinfo(c); 638 639 if (c->cpuid_level > 9) { 640 unsigned eax = cpuid_eax(10); 641 /* Check for version and the number of counters */ 642 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) 643 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); 644 } 645 646 if (cpu_has(c, X86_FEATURE_XMM2)) 647 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); 648 649 if (boot_cpu_has(X86_FEATURE_DS)) { 650 unsigned int l1, l2; 651 652 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); 653 if (!(l1 & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)) 654 set_cpu_cap(c, X86_FEATURE_BTS); 655 if (!(l1 & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL)) 656 set_cpu_cap(c, X86_FEATURE_PEBS); 657 } 658 659 if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) && 660 (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) 661 set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); 662 663 if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) && 664 ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT))) 665 set_cpu_bug(c, X86_BUG_MONITOR); 666 667 #ifdef CONFIG_X86_64 668 if (c->x86 == 15) 669 c->x86_cache_alignment = c->x86_clflush_size * 2; 670 if (c->x86 == 6) 671 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 672 #else 673 /* 674 * Names for the Pentium II/Celeron processors 675 * detectable only by also checking the cache size. 676 * Dixon is NOT a Celeron. 677 */ 678 if (c->x86 == 6) { 679 unsigned int l2 = c->x86_cache_size; 680 char *p = NULL; 681 682 switch (c->x86_model) { 683 case 5: 684 if (l2 == 0) 685 p = "Celeron (Covington)"; 686 else if (l2 == 256) 687 p = "Mobile Pentium II (Dixon)"; 688 break; 689 690 case 6: 691 if (l2 == 128) 692 p = "Celeron (Mendocino)"; 693 else if (c->x86_stepping == 0 || c->x86_stepping == 5) 694 p = "Celeron-A"; 695 break; 696 697 case 8: 698 if (l2 == 128) 699 p = "Celeron (Coppermine)"; 700 break; 701 } 702 703 if (p) 704 strcpy(c->x86_model_id, p); 705 } 706 707 if (c->x86 == 15) 708 set_cpu_cap(c, X86_FEATURE_P4); 709 if (c->x86 == 6) 710 set_cpu_cap(c, X86_FEATURE_P3); 711 #endif 712 713 /* Work around errata */ 714 srat_detect_node(c); 715 716 init_ia32_feat_ctl(c); 717 718 if (cpu_has(c, X86_FEATURE_TME)) 719 detect_tme(c); 720 721 init_intel_misc_features(c); 722 723 split_lock_init(); 724 bus_lock_init(); 725 726 intel_init_thermal(c); 727 } 728 729 #ifdef CONFIG_X86_32 730 static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 731 { 732 /* 733 * Intel PIII Tualatin. This comes in two flavours. 734 * One has 256kb of cache, the other 512. We have no way 735 * to determine which, so we use a boottime override 736 * for the 512kb model, and assume 256 otherwise. 737 */ 738 if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) 739 size = 256; 740 741 /* 742 * Intel Quark SoC X1000 contains a 4-way set associative 743 * 16K cache with a 16 byte cache line and 256 lines per tag 744 */ 745 if ((c->x86 == 5) && (c->x86_model == 9)) 746 size = 16; 747 return size; 748 } 749 #endif 750 751 #define TLB_INST_4K 0x01 752 #define TLB_INST_4M 0x02 753 #define TLB_INST_2M_4M 0x03 754 755 #define TLB_INST_ALL 0x05 756 #define TLB_INST_1G 0x06 757 758 #define TLB_DATA_4K 0x11 759 #define TLB_DATA_4M 0x12 760 #define TLB_DATA_2M_4M 0x13 761 #define TLB_DATA_4K_4M 0x14 762 763 #define TLB_DATA_1G 0x16 764 765 #define TLB_DATA0_4K 0x21 766 #define TLB_DATA0_4M 0x22 767 #define TLB_DATA0_2M_4M 0x23 768 769 #define STLB_4K 0x41 770 #define STLB_4K_2M 0x42 771 772 static const struct _tlb_table intel_tlb_table[] = { 773 { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, 774 { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, 775 { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, 776 { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, 777 { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, 778 { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, 779 { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" }, 780 { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, 781 { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, 782 { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, 783 { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, 784 { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, 785 { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, 786 { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, 787 { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, 788 { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, 789 { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, 790 { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, 791 { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, 792 { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, 793 { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, 794 { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, 795 { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, 796 { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, 797 { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, 798 { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, 799 { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, 800 { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, 801 { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, 802 { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" }, 803 { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" }, 804 { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, 805 { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, 806 { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, 807 { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" }, 808 { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, 809 { 0x00, 0, 0 } 810 }; 811 812 static void intel_tlb_lookup(const unsigned char desc) 813 { 814 unsigned char k; 815 if (desc == 0) 816 return; 817 818 /* look up this descriptor in the table */ 819 for (k = 0; intel_tlb_table[k].descriptor != desc && 820 intel_tlb_table[k].descriptor != 0; k++) 821 ; 822 823 if (intel_tlb_table[k].tlb_type == 0) 824 return; 825 826 switch (intel_tlb_table[k].tlb_type) { 827 case STLB_4K: 828 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) 829 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; 830 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) 831 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; 832 break; 833 case STLB_4K_2M: 834 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) 835 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; 836 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) 837 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; 838 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) 839 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; 840 if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) 841 tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; 842 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) 843 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; 844 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) 845 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; 846 break; 847 case TLB_INST_ALL: 848 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) 849 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; 850 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) 851 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; 852 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) 853 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; 854 break; 855 case TLB_INST_4K: 856 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) 857 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; 858 break; 859 case TLB_INST_4M: 860 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) 861 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; 862 break; 863 case TLB_INST_2M_4M: 864 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) 865 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; 866 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) 867 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; 868 break; 869 case TLB_DATA_4K: 870 case TLB_DATA0_4K: 871 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) 872 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; 873 break; 874 case TLB_DATA_4M: 875 case TLB_DATA0_4M: 876 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) 877 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; 878 break; 879 case TLB_DATA_2M_4M: 880 case TLB_DATA0_2M_4M: 881 if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) 882 tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; 883 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) 884 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; 885 break; 886 case TLB_DATA_4K_4M: 887 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) 888 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; 889 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) 890 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; 891 break; 892 case TLB_DATA_1G: 893 if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) 894 tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; 895 break; 896 } 897 } 898 899 static void intel_detect_tlb(struct cpuinfo_x86 *c) 900 { 901 int i, j, n; 902 unsigned int regs[4]; 903 unsigned char *desc = (unsigned char *)regs; 904 905 if (c->cpuid_level < 2) 906 return; 907 908 /* Number of times to iterate */ 909 n = cpuid_eax(2) & 0xFF; 910 911 for (i = 0 ; i < n ; i++) { 912 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); 913 914 /* If bit 31 is set, this is an unknown format */ 915 for (j = 0 ; j < 3 ; j++) 916 if (regs[j] & (1 << 31)) 917 regs[j] = 0; 918 919 /* Byte 0 is level count, not a descriptor */ 920 for (j = 1 ; j < 16 ; j++) 921 intel_tlb_lookup(desc[j]); 922 } 923 } 924 925 static const struct cpu_dev intel_cpu_dev = { 926 .c_vendor = "Intel", 927 .c_ident = { "GenuineIntel" }, 928 #ifdef CONFIG_X86_32 929 .legacy_models = { 930 { .family = 4, .model_names = 931 { 932 [0] = "486 DX-25/33", 933 [1] = "486 DX-50", 934 [2] = "486 SX", 935 [3] = "486 DX/2", 936 [4] = "486 SL", 937 [5] = "486 SX/2", 938 [7] = "486 DX/2-WB", 939 [8] = "486 DX/4", 940 [9] = "486 DX/4-WB" 941 } 942 }, 943 { .family = 5, .model_names = 944 { 945 [0] = "Pentium 60/66 A-step", 946 [1] = "Pentium 60/66", 947 [2] = "Pentium 75 - 200", 948 [3] = "OverDrive PODP5V83", 949 [4] = "Pentium MMX", 950 [7] = "Mobile Pentium 75 - 200", 951 [8] = "Mobile Pentium MMX", 952 [9] = "Quark SoC X1000", 953 } 954 }, 955 { .family = 6, .model_names = 956 { 957 [0] = "Pentium Pro A-step", 958 [1] = "Pentium Pro", 959 [3] = "Pentium II (Klamath)", 960 [4] = "Pentium II (Deschutes)", 961 [5] = "Pentium II (Deschutes)", 962 [6] = "Mobile Pentium II", 963 [7] = "Pentium III (Katmai)", 964 [8] = "Pentium III (Coppermine)", 965 [10] = "Pentium III (Cascades)", 966 [11] = "Pentium III (Tualatin)", 967 } 968 }, 969 { .family = 15, .model_names = 970 { 971 [0] = "Pentium 4 (Unknown)", 972 [1] = "Pentium 4 (Willamette)", 973 [2] = "Pentium 4 (Northwood)", 974 [4] = "Pentium 4 (Foster)", 975 [5] = "Pentium 4 (Foster)", 976 } 977 }, 978 }, 979 .legacy_cache_size = intel_size_cache, 980 #endif 981 .c_detect_tlb = intel_detect_tlb, 982 .c_early_init = early_init_intel, 983 .c_bsp_init = bsp_init_intel, 984 .c_init = init_intel, 985 .c_x86_vendor = X86_VENDOR_INTEL, 986 }; 987 988 cpu_dev_register(intel_cpu_dev); 989 990 #undef pr_fmt 991 #define pr_fmt(fmt) "x86/split lock detection: " fmt 992 993 static const struct { 994 const char *option; 995 enum split_lock_detect_state state; 996 } sld_options[] __initconst = { 997 { "off", sld_off }, 998 { "warn", sld_warn }, 999 { "fatal", sld_fatal }, 1000 { "ratelimit:", sld_ratelimit }, 1001 }; 1002 1003 static struct ratelimit_state bld_ratelimit; 1004 1005 static unsigned int sysctl_sld_mitigate = 1; 1006 static DEFINE_SEMAPHORE(buslock_sem, 1); 1007 1008 #ifdef CONFIG_PROC_SYSCTL 1009 static struct ctl_table sld_sysctls[] = { 1010 { 1011 .procname = "split_lock_mitigate", 1012 .data = &sysctl_sld_mitigate, 1013 .maxlen = sizeof(unsigned int), 1014 .mode = 0644, 1015 .proc_handler = proc_douintvec_minmax, 1016 .extra1 = SYSCTL_ZERO, 1017 .extra2 = SYSCTL_ONE, 1018 }, 1019 {} 1020 }; 1021 1022 static int __init sld_mitigate_sysctl_init(void) 1023 { 1024 register_sysctl_init("kernel", sld_sysctls); 1025 return 0; 1026 } 1027 1028 late_initcall(sld_mitigate_sysctl_init); 1029 #endif 1030 1031 static inline bool match_option(const char *arg, int arglen, const char *opt) 1032 { 1033 int len = strlen(opt), ratelimit; 1034 1035 if (strncmp(arg, opt, len)) 1036 return false; 1037 1038 /* 1039 * Min ratelimit is 1 bus lock/sec. 1040 * Max ratelimit is 1000 bus locks/sec. 1041 */ 1042 if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 && 1043 ratelimit > 0 && ratelimit <= 1000) { 1044 ratelimit_state_init(&bld_ratelimit, HZ, ratelimit); 1045 ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE); 1046 return true; 1047 } 1048 1049 return len == arglen; 1050 } 1051 1052 static bool split_lock_verify_msr(bool on) 1053 { 1054 u64 ctrl, tmp; 1055 1056 if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl)) 1057 return false; 1058 if (on) 1059 ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; 1060 else 1061 ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT; 1062 if (wrmsrl_safe(MSR_TEST_CTRL, ctrl)) 1063 return false; 1064 rdmsrl(MSR_TEST_CTRL, tmp); 1065 return ctrl == tmp; 1066 } 1067 1068 static void __init sld_state_setup(void) 1069 { 1070 enum split_lock_detect_state state = sld_warn; 1071 char arg[20]; 1072 int i, ret; 1073 1074 if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && 1075 !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) 1076 return; 1077 1078 ret = cmdline_find_option(boot_command_line, "split_lock_detect", 1079 arg, sizeof(arg)); 1080 if (ret >= 0) { 1081 for (i = 0; i < ARRAY_SIZE(sld_options); i++) { 1082 if (match_option(arg, ret, sld_options[i].option)) { 1083 state = sld_options[i].state; 1084 break; 1085 } 1086 } 1087 } 1088 sld_state = state; 1089 } 1090 1091 static void __init __split_lock_setup(void) 1092 { 1093 if (!split_lock_verify_msr(false)) { 1094 pr_info("MSR access failed: Disabled\n"); 1095 return; 1096 } 1097 1098 rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); 1099 1100 if (!split_lock_verify_msr(true)) { 1101 pr_info("MSR access failed: Disabled\n"); 1102 return; 1103 } 1104 1105 /* Restore the MSR to its cached value. */ 1106 wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); 1107 1108 setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT); 1109 } 1110 1111 /* 1112 * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking 1113 * is not implemented as one thread could undo the setting of the other 1114 * thread immediately after dropping the lock anyway. 1115 */ 1116 static void sld_update_msr(bool on) 1117 { 1118 u64 test_ctrl_val = msr_test_ctrl_cache; 1119 1120 if (on) 1121 test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; 1122 1123 wrmsrl(MSR_TEST_CTRL, test_ctrl_val); 1124 } 1125 1126 static void split_lock_init(void) 1127 { 1128 /* 1129 * #DB for bus lock handles ratelimit and #AC for split lock is 1130 * disabled. 1131 */ 1132 if (sld_state == sld_ratelimit) { 1133 split_lock_verify_msr(false); 1134 return; 1135 } 1136 1137 if (cpu_model_supports_sld) 1138 split_lock_verify_msr(sld_state != sld_off); 1139 } 1140 1141 static void __split_lock_reenable_unlock(struct work_struct *work) 1142 { 1143 sld_update_msr(true); 1144 up(&buslock_sem); 1145 } 1146 1147 static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock); 1148 1149 static void __split_lock_reenable(struct work_struct *work) 1150 { 1151 sld_update_msr(true); 1152 } 1153 static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable); 1154 1155 /* 1156 * If a CPU goes offline with pending delayed work to re-enable split lock 1157 * detection then the delayed work will be executed on some other CPU. That 1158 * handles releasing the buslock_sem, but because it executes on a 1159 * different CPU probably won't re-enable split lock detection. This is a 1160 * problem on HT systems since the sibling CPU on the same core may then be 1161 * left running with split lock detection disabled. 1162 * 1163 * Unconditionally re-enable detection here. 1164 */ 1165 static int splitlock_cpu_offline(unsigned int cpu) 1166 { 1167 sld_update_msr(true); 1168 1169 return 0; 1170 } 1171 1172 static void split_lock_warn(unsigned long ip) 1173 { 1174 struct delayed_work *work; 1175 int cpu; 1176 1177 if (!current->reported_split_lock) 1178 pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", 1179 current->comm, current->pid, ip); 1180 current->reported_split_lock = 1; 1181 1182 if (sysctl_sld_mitigate) { 1183 /* 1184 * misery factor #1: 1185 * sleep 10ms before trying to execute split lock. 1186 */ 1187 if (msleep_interruptible(10) > 0) 1188 return; 1189 /* 1190 * Misery factor #2: 1191 * only allow one buslocked disabled core at a time. 1192 */ 1193 if (down_interruptible(&buslock_sem) == -EINTR) 1194 return; 1195 work = &sl_reenable_unlock; 1196 } else { 1197 work = &sl_reenable; 1198 } 1199 1200 cpu = get_cpu(); 1201 schedule_delayed_work_on(cpu, work, 2); 1202 1203 /* Disable split lock detection on this CPU to make progress */ 1204 sld_update_msr(false); 1205 put_cpu(); 1206 } 1207 1208 bool handle_guest_split_lock(unsigned long ip) 1209 { 1210 if (sld_state == sld_warn) { 1211 split_lock_warn(ip); 1212 return true; 1213 } 1214 1215 pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n", 1216 current->comm, current->pid, 1217 sld_state == sld_fatal ? "fatal" : "bogus", ip); 1218 1219 current->thread.error_code = 0; 1220 current->thread.trap_nr = X86_TRAP_AC; 1221 force_sig_fault(SIGBUS, BUS_ADRALN, NULL); 1222 return false; 1223 } 1224 EXPORT_SYMBOL_GPL(handle_guest_split_lock); 1225 1226 static void bus_lock_init(void) 1227 { 1228 u64 val; 1229 1230 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) 1231 return; 1232 1233 rdmsrl(MSR_IA32_DEBUGCTLMSR, val); 1234 1235 if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && 1236 (sld_state == sld_warn || sld_state == sld_fatal)) || 1237 sld_state == sld_off) { 1238 /* 1239 * Warn and fatal are handled by #AC for split lock if #AC for 1240 * split lock is supported. 1241 */ 1242 val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; 1243 } else { 1244 val |= DEBUGCTLMSR_BUS_LOCK_DETECT; 1245 } 1246 1247 wrmsrl(MSR_IA32_DEBUGCTLMSR, val); 1248 } 1249 1250 bool handle_user_split_lock(struct pt_regs *regs, long error_code) 1251 { 1252 if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) 1253 return false; 1254 split_lock_warn(regs->ip); 1255 return true; 1256 } 1257 1258 void handle_bus_lock(struct pt_regs *regs) 1259 { 1260 switch (sld_state) { 1261 case sld_off: 1262 break; 1263 case sld_ratelimit: 1264 /* Enforce no more than bld_ratelimit bus locks/sec. */ 1265 while (!__ratelimit(&bld_ratelimit)) 1266 msleep(20); 1267 /* Warn on the bus lock. */ 1268 fallthrough; 1269 case sld_warn: 1270 pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n", 1271 current->comm, current->pid, regs->ip); 1272 break; 1273 case sld_fatal: 1274 force_sig_fault(SIGBUS, BUS_ADRALN, NULL); 1275 break; 1276 } 1277 } 1278 1279 /* 1280 * CPU models that are known to have the per-core split-lock detection 1281 * feature even though they do not enumerate IA32_CORE_CAPABILITIES. 1282 */ 1283 static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { 1284 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), 1285 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), 1286 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), 1287 {} 1288 }; 1289 1290 static void __init split_lock_setup(struct cpuinfo_x86 *c) 1291 { 1292 const struct x86_cpu_id *m; 1293 u64 ia32_core_caps; 1294 1295 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1296 return; 1297 1298 /* Check for CPUs that have support but do not enumerate it: */ 1299 m = x86_match_cpu(split_lock_cpu_ids); 1300 if (m) 1301 goto supported; 1302 1303 if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) 1304 return; 1305 1306 /* 1307 * Not all bits in MSR_IA32_CORE_CAPS are architectural, but 1308 * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set 1309 * it have split lock detection. 1310 */ 1311 rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); 1312 if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) 1313 goto supported; 1314 1315 /* CPU is not in the model list and does not have the MSR bit: */ 1316 return; 1317 1318 supported: 1319 cpu_model_supports_sld = true; 1320 __split_lock_setup(); 1321 } 1322 1323 static void sld_state_show(void) 1324 { 1325 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && 1326 !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) 1327 return; 1328 1329 switch (sld_state) { 1330 case sld_off: 1331 pr_info("disabled\n"); 1332 break; 1333 case sld_warn: 1334 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { 1335 pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); 1336 if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, 1337 "x86/splitlock", NULL, splitlock_cpu_offline) < 0) 1338 pr_warn("No splitlock CPU offline handler\n"); 1339 } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { 1340 pr_info("#DB: warning on user-space bus_locks\n"); 1341 } 1342 break; 1343 case sld_fatal: 1344 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { 1345 pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n"); 1346 } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { 1347 pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n", 1348 boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ? 1349 " from non-WB" : ""); 1350 } 1351 break; 1352 case sld_ratelimit: 1353 if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) 1354 pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst); 1355 break; 1356 } 1357 } 1358 1359 void __init sld_setup(struct cpuinfo_x86 *c) 1360 { 1361 split_lock_setup(c); 1362 sld_state_setup(); 1363 sld_state_show(); 1364 } 1365 1366 #define X86_HYBRID_CPU_TYPE_ID_SHIFT 24 1367 1368 /** 1369 * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU 1370 * 1371 * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in 1372 * a hybrid processor. If the processor is not hybrid, returns 0. 1373 */ 1374 u8 get_this_hybrid_cpu_type(void) 1375 { 1376 if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) 1377 return 0; 1378 1379 return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT; 1380 } 1381