1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Intel specific MCE features. 4 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 5 * Copyright (C) 2008, 2009 Intel Corporation 6 * Author: Andi Kleen 7 */ 8 9 #include <linux/gfp.h> 10 #include <linux/interrupt.h> 11 #include <linux/percpu.h> 12 #include <linux/sched.h> 13 #include <linux/cpumask.h> 14 #include <asm/apic.h> 15 #include <asm/cpufeature.h> 16 #include <asm/cpu_device_id.h> 17 #include <asm/processor.h> 18 #include <asm/msr.h> 19 #include <asm/mce.h> 20 21 #include "internal.h" 22 23 /* 24 * Support for Intel Correct Machine Check Interrupts. This allows 25 * the CPU to raise an interrupt when a corrected machine check happened. 26 * Normally we pick those up using a regular polling timer. 27 * Also supports reliable discovery of shared banks. 28 */ 29 30 /* 31 * CMCI can be delivered to multiple cpus that share a machine check bank 32 * so we need to designate a single cpu to process errors logged in each bank 33 * in the interrupt handler (otherwise we would have many races and potential 34 * double reporting of the same error). 35 * Note that this can change when a cpu is offlined or brought online since 36 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() 37 * disables CMCI on all banks owned by the cpu and clears this bitfield. At 38 * this point, cmci_rediscover() kicks in and a different cpu may end up 39 * taking ownership of some of the shared MCA banks that were previously 40 * owned by the offlined cpu. 41 */ 42 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 43 44 /* 45 * cmci_discover_lock protects against parallel discovery attempts 46 * which could race against each other. 47 */ 48 static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 49 50 /* 51 * On systems that do support CMCI but it's disabled, polling for MCEs can 52 * cause the same event to be reported multiple times because IA32_MCi_STATUS 53 * is shared by the same package. 54 */ 55 static DEFINE_SPINLOCK(cmci_poll_lock); 56 57 /* Linux non-storm CMCI threshold (may be overridden by BIOS) */ 58 #define CMCI_THRESHOLD 1 59 60 /* 61 * MCi_CTL2 threshold for each bank when there is no storm. 62 * Default value for each bank may have been set by BIOS. 63 */ 64 static u16 cmci_threshold[MAX_NR_BANKS]; 65 66 /* 67 * High threshold to limit CMCI rate during storms. Max supported is 68 * 0x7FFF. Use this slightly smaller value so it has a distinctive 69 * signature when some asks "Why am I not seeing all corrected errors?" 70 * A high threshold is used instead of just disabling CMCI for a 71 * bank because both corrected and uncorrected errors may be logged 72 * in the same bank and signalled with CMCI. The threshold only applies 73 * to corrected errors, so keeping CMCI enabled means that uncorrected 74 * errors will still be processed in a timely fashion. 75 */ 76 #define CMCI_STORM_THRESHOLD 32749 77 78 static int cmci_supported(int *banks) 79 { 80 u64 cap; 81 82 if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) 83 return 0; 84 85 /* 86 * Vendor check is not strictly needed, but the initial 87 * initialization is vendor keyed and this 88 * makes sure none of the backdoors are entered otherwise. 89 */ 90 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && 91 boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) 92 return 0; 93 94 if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) 95 return 0; 96 rdmsrl(MSR_IA32_MCG_CAP, cap); 97 *banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK); 98 return !!(cap & MCG_CMCI_P); 99 } 100 101 static bool lmce_supported(void) 102 { 103 u64 tmp; 104 105 if (mca_cfg.lmce_disabled) 106 return false; 107 108 rdmsrl(MSR_IA32_MCG_CAP, tmp); 109 110 /* 111 * LMCE depends on recovery support in the processor. Hence both 112 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. 113 */ 114 if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != 115 (MCG_SER_P | MCG_LMCE_P)) 116 return false; 117 118 /* 119 * BIOS should indicate support for LMCE by setting bit 20 in 120 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP 121 * fault. The MSR must also be locked for LMCE_ENABLED to take effect. 122 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally 123 * locks the MSR in the event that it wasn't already locked by BIOS. 124 */ 125 rdmsrl(MSR_IA32_FEAT_CTL, tmp); 126 if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED))) 127 return false; 128 129 return tmp & FEAT_CTL_LMCE_ENABLED; 130 } 131 132 /* 133 * Set a new CMCI threshold value. Preserve the state of the 134 * MCI_CTL2_CMCI_EN bit in case this happens during a 135 * cmci_rediscover() operation. 136 */ 137 static void cmci_set_threshold(int bank, int thresh) 138 { 139 unsigned long flags; 140 u64 val; 141 142 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 143 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 144 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 145 wrmsrl(MSR_IA32_MCx_CTL2(bank), val | thresh); 146 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 147 } 148 149 void mce_intel_handle_storm(int bank, bool on) 150 { 151 if (on) 152 cmci_set_threshold(bank, CMCI_STORM_THRESHOLD); 153 else 154 cmci_set_threshold(bank, cmci_threshold[bank]); 155 } 156 157 /* 158 * The interrupt handler. This is called on every event. 159 * Just call the poller directly to log any events. 160 * This could in theory increase the threshold under high load, 161 * but doesn't for now. 162 */ 163 static void intel_threshold_interrupt(void) 164 { 165 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 166 } 167 168 /* 169 * Check all the reasons why current CPU cannot claim 170 * ownership of a bank. 171 * 1: CPU already owns this bank 172 * 2: BIOS owns this bank 173 * 3: Some other CPU owns this bank 174 */ 175 static bool cmci_skip_bank(int bank, u64 *val) 176 { 177 unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); 178 179 if (test_bit(bank, owned)) 180 return true; 181 182 /* Skip banks in firmware first mode */ 183 if (test_bit(bank, mce_banks_ce_disabled)) 184 return true; 185 186 rdmsrl(MSR_IA32_MCx_CTL2(bank), *val); 187 188 /* Already owned by someone else? */ 189 if (*val & MCI_CTL2_CMCI_EN) { 190 clear_bit(bank, owned); 191 __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); 192 return true; 193 } 194 195 return false; 196 } 197 198 /* 199 * Decide which CMCI interrupt threshold to use: 200 * 1: If this bank is in storm mode from whichever CPU was 201 * the previous owner, stay in storm mode. 202 * 2: If ignoring any threshold set by BIOS, set Linux default 203 * 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero). 204 */ 205 static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh) 206 { 207 if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) 208 return val; 209 210 if (!mca_cfg.bios_cmci_threshold) { 211 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 212 val |= CMCI_THRESHOLD; 213 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 214 /* 215 * If bios_cmci_threshold boot option was specified 216 * but the threshold is zero, we'll try to initialize 217 * it to 1. 218 */ 219 *bios_zero_thresh = 1; 220 val |= CMCI_THRESHOLD; 221 } 222 223 return val; 224 } 225 226 /* 227 * Try to claim ownership of a bank. 228 */ 229 static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh) 230 { 231 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 232 233 val |= MCI_CTL2_CMCI_EN; 234 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 235 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 236 237 /* If the enable bit did not stick, this bank should be polled. */ 238 if (!(val & MCI_CTL2_CMCI_EN)) { 239 WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks))); 240 storm->banks[bank].poll_only = true; 241 return; 242 } 243 244 /* This CPU successfully set the enable bit. */ 245 set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned)); 246 247 if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) { 248 pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank); 249 mce_inherit_storm(bank); 250 cmci_storm_begin(bank); 251 } else { 252 __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); 253 } 254 255 /* 256 * We are able to set thresholds for some banks that 257 * had a threshold of 0. This means the BIOS has not 258 * set the thresholds properly or does not work with 259 * this boot option. Note down now and report later. 260 */ 261 if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && 262 (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 263 *bios_wrong_thresh = 1; 264 265 /* Save default threshold for each bank */ 266 if (cmci_threshold[bank] == 0) 267 cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK; 268 } 269 270 /* 271 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 272 * on this CPU. Use the algorithm recommended in the SDM to discover shared 273 * banks. Called during initial bootstrap, and also for hotplug CPU operations 274 * to rediscover/reassign machine check banks. 275 */ 276 static void cmci_discover(int banks) 277 { 278 int bios_wrong_thresh = 0; 279 unsigned long flags; 280 int i; 281 282 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 283 for (i = 0; i < banks; i++) { 284 u64 val; 285 int bios_zero_thresh = 0; 286 287 if (cmci_skip_bank(i, &val)) 288 continue; 289 290 val = cmci_pick_threshold(val, &bios_zero_thresh); 291 cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh); 292 } 293 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 294 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 295 pr_info_once( 296 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 297 pr_info_once( 298 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); 299 } 300 } 301 302 /* 303 * Just in case we missed an event during initialization check 304 * all the CMCI owned banks. 305 */ 306 void cmci_recheck(void) 307 { 308 unsigned long flags; 309 int banks; 310 311 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 312 return; 313 314 local_irq_save(flags); 315 machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); 316 local_irq_restore(flags); 317 } 318 319 /* Caller must hold the lock on cmci_discover_lock */ 320 static void __cmci_disable_bank(int bank) 321 { 322 u64 val; 323 324 if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) 325 return; 326 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 327 val &= ~MCI_CTL2_CMCI_EN; 328 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 329 __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); 330 331 if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) 332 cmci_storm_end(bank); 333 } 334 335 /* 336 * Disable CMCI on this CPU for all banks it owns when it goes down. 337 * This allows other CPUs to claim the banks on rediscovery. 338 */ 339 void cmci_clear(void) 340 { 341 unsigned long flags; 342 int i; 343 int banks; 344 345 if (!cmci_supported(&banks)) 346 return; 347 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 348 for (i = 0; i < banks; i++) 349 __cmci_disable_bank(i); 350 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 351 } 352 353 static void cmci_rediscover_work_func(void *arg) 354 { 355 int banks; 356 357 /* Recheck banks in case CPUs don't all have the same */ 358 if (cmci_supported(&banks)) 359 cmci_discover(banks); 360 } 361 362 /* After a CPU went down cycle through all the others and rediscover */ 363 void cmci_rediscover(void) 364 { 365 int banks; 366 367 if (!cmci_supported(&banks)) 368 return; 369 370 on_each_cpu(cmci_rediscover_work_func, NULL, 1); 371 } 372 373 /* 374 * Reenable CMCI on this CPU in case a CPU down failed. 375 */ 376 void cmci_reenable(void) 377 { 378 int banks; 379 if (cmci_supported(&banks)) 380 cmci_discover(banks); 381 } 382 383 void cmci_disable_bank(int bank) 384 { 385 int banks; 386 unsigned long flags; 387 388 if (!cmci_supported(&banks)) 389 return; 390 391 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 392 __cmci_disable_bank(bank); 393 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 394 } 395 396 /* Bank polling function when CMCI is disabled. */ 397 static void cmci_mc_poll_banks(void) 398 { 399 spin_lock(&cmci_poll_lock); 400 machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); 401 spin_unlock(&cmci_poll_lock); 402 } 403 404 void intel_init_cmci(void) 405 { 406 int banks; 407 408 if (!cmci_supported(&banks)) { 409 mc_poll_banks = cmci_mc_poll_banks; 410 return; 411 } 412 413 mce_threshold_vector = intel_threshold_interrupt; 414 cmci_discover(banks); 415 /* 416 * For CPU #0 this runs with still disabled APIC, but that's 417 * ok because only the vector is set up. We still do another 418 * check for the banks later for CPU #0 just to make sure 419 * to not miss any events. 420 */ 421 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); 422 cmci_recheck(); 423 } 424 425 void intel_init_lmce(void) 426 { 427 u64 val; 428 429 if (!lmce_supported()) 430 return; 431 432 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 433 434 if (!(val & MCG_EXT_CTL_LMCE_EN)) 435 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 436 } 437 438 void intel_clear_lmce(void) 439 { 440 u64 val; 441 442 if (!lmce_supported()) 443 return; 444 445 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 446 val &= ~MCG_EXT_CTL_LMCE_EN; 447 wrmsrl(MSR_IA32_MCG_EXT_CTL, val); 448 } 449 450 /* 451 * Enable additional error logs from the integrated 452 * memory controller on processors that support this. 453 */ 454 static void intel_imc_init(struct cpuinfo_x86 *c) 455 { 456 u64 error_control; 457 458 switch (c->x86_vfm) { 459 case INTEL_SANDYBRIDGE_X: 460 case INTEL_IVYBRIDGE_X: 461 case INTEL_HASWELL_X: 462 if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control)) 463 return; 464 error_control |= 2; 465 wrmsrl_safe(MSR_ERROR_CONTROL, error_control); 466 break; 467 } 468 } 469 470 void mce_intel_feature_init(struct cpuinfo_x86 *c) 471 { 472 intel_init_cmci(); 473 intel_init_lmce(); 474 intel_imc_init(c); 475 } 476 477 void mce_intel_feature_clear(struct cpuinfo_x86 *c) 478 { 479 intel_clear_lmce(); 480 } 481 482 bool intel_filter_mce(struct mce *m) 483 { 484 struct cpuinfo_x86 *c = &boot_cpu_data; 485 486 /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */ 487 if ((c->x86_vfm == INTEL_HASWELL || 488 c->x86_vfm == INTEL_HASWELL_L || 489 c->x86_vfm == INTEL_BROADWELL || 490 c->x86_vfm == INTEL_HASWELL_G || 491 c->x86_vfm == INTEL_SKYLAKE_X) && 492 (m->bank == 0) && 493 ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005)) 494 return true; 495 496 return false; 497 } 498 499 /* 500 * Check if the address reported by the CPU is in a format we can parse. 501 * It would be possible to add code for most other cases, but all would 502 * be somewhat complicated (e.g. segment offset would require an instruction 503 * parser). So only support physical addresses up to page granularity for now. 504 */ 505 bool intel_mce_usable_address(struct mce *m) 506 { 507 if (!(m->status & MCI_STATUS_MISCV)) 508 return false; 509 510 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) 511 return false; 512 513 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) 514 return false; 515 516 return true; 517 } 518