1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Intel specific MCE features. 4 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 5 * Copyright (C) 2008, 2009 Intel Corporation 6 * Author: Andi Kleen 7 */ 8 9 #include <linux/gfp.h> 10 #include <linux/interrupt.h> 11 #include <linux/percpu.h> 12 #include <linux/sched.h> 13 #include <linux/cpumask.h> 14 #include <asm/apic.h> 15 #include <asm/cpufeature.h> 16 #include <asm/cpu_device_id.h> 17 #include <asm/processor.h> 18 #include <asm/msr.h> 19 #include <asm/mce.h> 20 21 #include "internal.h" 22 23 /* 24 * Support for Intel Correct Machine Check Interrupts. This allows 25 * the CPU to raise an interrupt when a corrected machine check happened. 26 * Normally we pick those up using a regular polling timer. 27 * Also supports reliable discovery of shared banks. 28 */ 29 30 /* 31 * CMCI can be delivered to multiple cpus that share a machine check bank 32 * so we need to designate a single cpu to process errors logged in each bank 33 * in the interrupt handler (otherwise we would have many races and potential 34 * double reporting of the same error). 35 * Note that this can change when a cpu is offlined or brought online since 36 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() 37 * disables CMCI on all banks owned by the cpu and clears this bitfield. At 38 * this point, cmci_rediscover() kicks in and a different cpu may end up 39 * taking ownership of some of the shared MCA banks that were previously 40 * owned by the offlined cpu. 41 */ 42 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 43 44 /* 45 * cmci_discover_lock protects against parallel discovery attempts 46 * which could race against each other. 47 */ 48 static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 49 50 /* 51 * On systems that do support CMCI but it's disabled, polling for MCEs can 52 * cause the same event to be reported multiple times because IA32_MCi_STATUS 53 * is shared by the same package. 54 */ 55 static DEFINE_SPINLOCK(cmci_poll_lock); 56 57 /* Linux non-storm CMCI threshold (may be overridden by BIOS) */ 58 #define CMCI_THRESHOLD 1 59 60 /* 61 * MCi_CTL2 threshold for each bank when there is no storm. 62 * Default value for each bank may have been set by BIOS. 63 */ 64 static u16 cmci_threshold[MAX_NR_BANKS]; 65 66 /* 67 * High threshold to limit CMCI rate during storms. Max supported is 68 * 0x7FFF. Use this slightly smaller value so it has a distinctive 69 * signature when some asks "Why am I not seeing all corrected errors?" 70 * A high threshold is used instead of just disabling CMCI for a 71 * bank because both corrected and uncorrected errors may be logged 72 * in the same bank and signalled with CMCI. The threshold only applies 73 * to corrected errors, so keeping CMCI enabled means that uncorrected 74 * errors will still be processed in a timely fashion. 75 */ 76 #define CMCI_STORM_THRESHOLD 32749 77 78 static bool cmci_supported(int *banks) 79 { 80 u64 cap; 81 82 if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) 83 return false; 84 85 /* 86 * Vendor check is not strictly needed, but the initial 87 * initialization is vendor keyed and this 88 * makes sure none of the backdoors are entered otherwise. 89 */ 90 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && 91 boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) 92 return false; 93 94 if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) 95 return false; 96 97 rdmsrl(MSR_IA32_MCG_CAP, cap); 98 *banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK); 99 return !!(cap & MCG_CMCI_P); 100 } 101 102 static bool lmce_supported(void) 103 { 104 u64 tmp; 105 106 if (mca_cfg.lmce_disabled) 107 return false; 108 109 rdmsrl(MSR_IA32_MCG_CAP, tmp); 110 111 /* 112 * LMCE depends on recovery support in the processor. Hence both 113 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. 114 */ 115 if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != 116 (MCG_SER_P | MCG_LMCE_P)) 117 return false; 118 119 /* 120 * BIOS should indicate support for LMCE by setting bit 20 in 121 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP 122 * fault. The MSR must also be locked for LMCE_ENABLED to take effect. 123 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally 124 * locks the MSR in the event that it wasn't already locked by BIOS. 125 */ 126 rdmsrl(MSR_IA32_FEAT_CTL, tmp); 127 if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED))) 128 return false; 129 130 return tmp & FEAT_CTL_LMCE_ENABLED; 131 } 132 133 /* 134 * Set a new CMCI threshold value. Preserve the state of the 135 * MCI_CTL2_CMCI_EN bit in case this happens during a 136 * cmci_rediscover() operation. 137 */ 138 static void cmci_set_threshold(int bank, int thresh) 139 { 140 unsigned long flags; 141 u64 val; 142 143 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 144 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 145 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 146 wrmsrl(MSR_IA32_MCx_CTL2(bank), val | thresh); 147 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 148 } 149 150 void mce_intel_handle_storm(int bank, bool on) 151 { 152 if (on) 153 cmci_set_threshold(bank, CMCI_STORM_THRESHOLD); 154 else 155 cmci_set_threshold(bank, cmci_threshold[bank]); 156 } 157 158 /* 159 * The interrupt handler. This is called on every event. 160 * Just call the poller directly to log any events. 161 * This could in theory increase the threshold under high load, 162 * but doesn't for now. 163 */ 164 static void intel_threshold_interrupt(void) 165 { 166 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 167 } 168 169 /* 170 * Check all the reasons why current CPU cannot claim 171 * ownership of a bank. 172 * 1: CPU already owns this bank 173 * 2: BIOS owns this bank 174 * 3: Some other CPU owns this bank 175 */ 176 static bool cmci_skip_bank(int bank, u64 *val) 177 { 178 unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); 179 180 if (test_bit(bank, owned)) 181 return true; 182 183 /* Skip banks in firmware first mode */ 184 if (test_bit(bank, mce_banks_ce_disabled)) 185 return true; 186 187 rdmsrl(MSR_IA32_MCx_CTL2(bank), *val); 188 189 /* Already owned by someone else? */ 190 if (*val & MCI_CTL2_CMCI_EN) { 191 clear_bit(bank, owned); 192 __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); 193 return true; 194 } 195 196 return false; 197 } 198 199 /* 200 * Decide which CMCI interrupt threshold to use: 201 * 1: If this bank is in storm mode from whichever CPU was 202 * the previous owner, stay in storm mode. 203 * 2: If ignoring any threshold set by BIOS, set Linux default 204 * 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero). 205 */ 206 static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh) 207 { 208 if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) 209 return val; 210 211 if (!mca_cfg.bios_cmci_threshold) { 212 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 213 val |= CMCI_THRESHOLD; 214 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 215 /* 216 * If bios_cmci_threshold boot option was specified 217 * but the threshold is zero, we'll try to initialize 218 * it to 1. 219 */ 220 *bios_zero_thresh = 1; 221 val |= CMCI_THRESHOLD; 222 } 223 224 return val; 225 } 226 227 /* 228 * Try to claim ownership of a bank. 229 */ 230 static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh) 231 { 232 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 233 234 val |= MCI_CTL2_CMCI_EN; 235 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 236 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 237 238 /* If the enable bit did not stick, this bank should be polled. */ 239 if (!(val & MCI_CTL2_CMCI_EN)) { 240 WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks))); 241 storm->banks[bank].poll_only = true; 242 return; 243 } 244 245 /* This CPU successfully set the enable bit. */ 246 set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned)); 247 248 if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) { 249 pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank); 250 mce_inherit_storm(bank); 251 cmci_storm_begin(bank); 252 } else { 253 __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); 254 } 255 256 /* 257 * We are able to set thresholds for some banks that 258 * had a threshold of 0. This means the BIOS has not 259 * set the thresholds properly or does not work with 260 * this boot option. Note down now and report later. 261 */ 262 if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && 263 (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 264 *bios_wrong_thresh = 1; 265 266 /* Save default threshold for each bank */ 267 if (cmci_threshold[bank] == 0) 268 cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK; 269 } 270 271 /* 272 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 273 * on this CPU. Use the algorithm recommended in the SDM to discover shared 274 * banks. Called during initial bootstrap, and also for hotplug CPU operations 275 * to rediscover/reassign machine check banks. 276 */ 277 static void cmci_discover(int banks) 278 { 279 int bios_wrong_thresh = 0; 280 unsigned long flags; 281 int i; 282 283 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 284 for (i = 0; i < banks; i++) { 285 u64 val; 286 int bios_zero_thresh = 0; 287 288 if (cmci_skip_bank(i, &val)) 289 continue; 290 291 val = cmci_pick_threshold(val, &bios_zero_thresh); 292 cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh); 293 } 294 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 295 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 296 pr_info_once( 297 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 298 pr_info_once( 299 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); 300 } 301 } 302 303 /* 304 * Just in case we missed an event during initialization check 305 * all the CMCI owned banks. 306 */ 307 void cmci_recheck(void) 308 { 309 unsigned long flags; 310 int banks; 311 312 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 313 return; 314 315 local_irq_save(flags); 316 machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); 317 local_irq_restore(flags); 318 } 319 320 /* Caller must hold the lock on cmci_discover_lock */ 321 static void __cmci_disable_bank(int bank) 322 { 323 u64 val; 324 325 if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) 326 return; 327 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 328 val &= ~MCI_CTL2_CMCI_EN; 329 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 330 __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); 331 332 if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) 333 cmci_storm_end(bank); 334 } 335 336 /* 337 * Disable CMCI on this CPU for all banks it owns when it goes down. 338 * This allows other CPUs to claim the banks on rediscovery. 339 */ 340 void cmci_clear(void) 341 { 342 unsigned long flags; 343 int i; 344 int banks; 345 346 if (!cmci_supported(&banks)) 347 return; 348 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 349 for (i = 0; i < banks; i++) 350 __cmci_disable_bank(i); 351 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 352 } 353 354 static void cmci_rediscover_work_func(void *arg) 355 { 356 int banks; 357 358 /* Recheck banks in case CPUs don't all have the same */ 359 if (cmci_supported(&banks)) 360 cmci_discover(banks); 361 } 362 363 /* After a CPU went down cycle through all the others and rediscover */ 364 void cmci_rediscover(void) 365 { 366 int banks; 367 368 if (!cmci_supported(&banks)) 369 return; 370 371 on_each_cpu(cmci_rediscover_work_func, NULL, 1); 372 } 373 374 /* 375 * Reenable CMCI on this CPU in case a CPU down failed. 376 */ 377 void cmci_reenable(void) 378 { 379 int banks; 380 if (cmci_supported(&banks)) 381 cmci_discover(banks); 382 } 383 384 void cmci_disable_bank(int bank) 385 { 386 int banks; 387 unsigned long flags; 388 389 if (!cmci_supported(&banks)) 390 return; 391 392 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 393 __cmci_disable_bank(bank); 394 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 395 } 396 397 /* Bank polling function when CMCI is disabled. */ 398 static void cmci_mc_poll_banks(void) 399 { 400 spin_lock(&cmci_poll_lock); 401 machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); 402 spin_unlock(&cmci_poll_lock); 403 } 404 405 void intel_init_cmci(void) 406 { 407 int banks; 408 409 if (!cmci_supported(&banks)) { 410 mc_poll_banks = cmci_mc_poll_banks; 411 return; 412 } 413 414 mce_threshold_vector = intel_threshold_interrupt; 415 cmci_discover(banks); 416 /* 417 * For CPU #0 this runs with still disabled APIC, but that's 418 * ok because only the vector is set up. We still do another 419 * check for the banks later for CPU #0 just to make sure 420 * to not miss any events. 421 */ 422 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); 423 cmci_recheck(); 424 } 425 426 void intel_init_lmce(void) 427 { 428 u64 val; 429 430 if (!lmce_supported()) 431 return; 432 433 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 434 435 if (!(val & MCG_EXT_CTL_LMCE_EN)) 436 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 437 } 438 439 void intel_clear_lmce(void) 440 { 441 u64 val; 442 443 if (!lmce_supported()) 444 return; 445 446 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 447 val &= ~MCG_EXT_CTL_LMCE_EN; 448 wrmsrl(MSR_IA32_MCG_EXT_CTL, val); 449 } 450 451 /* 452 * Enable additional error logs from the integrated 453 * memory controller on processors that support this. 454 */ 455 static void intel_imc_init(struct cpuinfo_x86 *c) 456 { 457 u64 error_control; 458 459 switch (c->x86_vfm) { 460 case INTEL_SANDYBRIDGE_X: 461 case INTEL_IVYBRIDGE_X: 462 case INTEL_HASWELL_X: 463 if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control)) 464 return; 465 error_control |= 2; 466 wrmsrl_safe(MSR_ERROR_CONTROL, error_control); 467 break; 468 } 469 } 470 471 void mce_intel_feature_init(struct cpuinfo_x86 *c) 472 { 473 intel_init_cmci(); 474 intel_init_lmce(); 475 intel_imc_init(c); 476 } 477 478 void mce_intel_feature_clear(struct cpuinfo_x86 *c) 479 { 480 intel_clear_lmce(); 481 } 482 483 bool intel_filter_mce(struct mce *m) 484 { 485 struct cpuinfo_x86 *c = &boot_cpu_data; 486 487 /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */ 488 if ((c->x86_vfm == INTEL_HASWELL || 489 c->x86_vfm == INTEL_HASWELL_L || 490 c->x86_vfm == INTEL_BROADWELL || 491 c->x86_vfm == INTEL_HASWELL_G || 492 c->x86_vfm == INTEL_SKYLAKE_X) && 493 (m->bank == 0) && 494 ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005)) 495 return true; 496 497 return false; 498 } 499 500 /* 501 * Check if the address reported by the CPU is in a format we can parse. 502 * It would be possible to add code for most other cases, but all would 503 * be somewhat complicated (e.g. segment offset would require an instruction 504 * parser). So only support physical addresses up to page granularity for now. 505 */ 506 bool intel_mce_usable_address(struct mce *m) 507 { 508 if (!(m->status & MCI_STATUS_MISCV)) 509 return false; 510 511 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) 512 return false; 513 514 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) 515 return false; 516 517 return true; 518 } 519