xref: /linux/arch/x86/kernel/cpu/mce/intel.c (revision 621cde16e49b3ecf7d59a8106a20aaebfb4a59a9)
121afaf18SBorislav Petkov // SPDX-License-Identifier: GPL-2.0
221afaf18SBorislav Petkov /*
321afaf18SBorislav Petkov  * Intel specific MCE features.
421afaf18SBorislav Petkov  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
521afaf18SBorislav Petkov  * Copyright (C) 2008, 2009 Intel Corporation
621afaf18SBorislav Petkov  * Author: Andi Kleen
721afaf18SBorislav Petkov  */
821afaf18SBorislav Petkov 
921afaf18SBorislav Petkov #include <linux/gfp.h>
1021afaf18SBorislav Petkov #include <linux/interrupt.h>
1121afaf18SBorislav Petkov #include <linux/percpu.h>
1221afaf18SBorislav Petkov #include <linux/sched.h>
1321afaf18SBorislav Petkov #include <linux/cpumask.h>
1421afaf18SBorislav Petkov #include <asm/apic.h>
1521afaf18SBorislav Petkov #include <asm/cpufeature.h>
16*4a5f2dd1STony Luck #include <asm/cpu_device_id.h>
1721afaf18SBorislav Petkov #include <asm/processor.h>
1821afaf18SBorislav Petkov #include <asm/msr.h>
1921afaf18SBorislav Petkov #include <asm/mce.h>
2021afaf18SBorislav Petkov 
2121afaf18SBorislav Petkov #include "internal.h"
2221afaf18SBorislav Petkov 
2321afaf18SBorislav Petkov /*
2421afaf18SBorislav Petkov  * Support for Intel Correct Machine Check Interrupts. This allows
2521afaf18SBorislav Petkov  * the CPU to raise an interrupt when a corrected machine check happened.
2621afaf18SBorislav Petkov  * Normally we pick those up using a regular polling timer.
2721afaf18SBorislav Petkov  * Also supports reliable discovery of shared banks.
2821afaf18SBorislav Petkov  */
2921afaf18SBorislav Petkov 
3021afaf18SBorislav Petkov /*
3121afaf18SBorislav Petkov  * CMCI can be delivered to multiple cpus that share a machine check bank
3221afaf18SBorislav Petkov  * so we need to designate a single cpu to process errors logged in each bank
3321afaf18SBorislav Petkov  * in the interrupt handler (otherwise we would have many races and potential
3421afaf18SBorislav Petkov  * double reporting of the same error).
3521afaf18SBorislav Petkov  * Note that this can change when a cpu is offlined or brought online since
3621afaf18SBorislav Petkov  * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
3721afaf18SBorislav Petkov  * disables CMCI on all banks owned by the cpu and clears this bitfield. At
3821afaf18SBorislav Petkov  * this point, cmci_rediscover() kicks in and a different cpu may end up
3921afaf18SBorislav Petkov  * taking ownership of some of the shared MCA banks that were previously
4021afaf18SBorislav Petkov  * owned by the offlined cpu.
4121afaf18SBorislav Petkov  */
4221afaf18SBorislav Petkov static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
4321afaf18SBorislav Petkov 
4421afaf18SBorislav Petkov /*
4521afaf18SBorislav Petkov  * cmci_discover_lock protects against parallel discovery attempts
4621afaf18SBorislav Petkov  * which could race against each other.
4721afaf18SBorislav Petkov  */
4821afaf18SBorislav Petkov static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
4921afaf18SBorislav Petkov 
50c3629dd7SBorislav Petkov (AMD) /*
51c3629dd7SBorislav Petkov (AMD)  * On systems that do support CMCI but it's disabled, polling for MCEs can
52c3629dd7SBorislav Petkov (AMD)  * cause the same event to be reported multiple times because IA32_MCi_STATUS
53c3629dd7SBorislav Petkov (AMD)  * is shared by the same package.
54c3629dd7SBorislav Petkov (AMD)  */
55c3629dd7SBorislav Petkov (AMD) static DEFINE_SPINLOCK(cmci_poll_lock);
56c3629dd7SBorislav Petkov (AMD) 
571f68ce2aSTony Luck /* Linux non-storm CMCI threshold (may be overridden by BIOS) */
5821afaf18SBorislav Petkov #define CMCI_THRESHOLD		1
5921afaf18SBorislav Petkov 
601f68ce2aSTony Luck /*
611f68ce2aSTony Luck  * MCi_CTL2 threshold for each bank when there is no storm.
621f68ce2aSTony Luck  * Default value for each bank may have been set by BIOS.
631f68ce2aSTony Luck  */
641f68ce2aSTony Luck static u16 cmci_threshold[MAX_NR_BANKS];
651f68ce2aSTony Luck 
661f68ce2aSTony Luck /*
671f68ce2aSTony Luck  * High threshold to limit CMCI rate during storms. Max supported is
681f68ce2aSTony Luck  * 0x7FFF. Use this slightly smaller value so it has a distinctive
691f68ce2aSTony Luck  * signature when some asks "Why am I not seeing all corrected errors?"
701f68ce2aSTony Luck  * A high threshold is used instead of just disabling CMCI for a
711f68ce2aSTony Luck  * bank because both corrected and uncorrected errors may be logged
721f68ce2aSTony Luck  * in the same bank and signalled with CMCI. The threshold only applies
731f68ce2aSTony Luck  * to corrected errors, so keeping CMCI enabled means that uncorrected
741f68ce2aSTony Luck  * errors will still be processed in a timely fashion.
751f68ce2aSTony Luck  */
761f68ce2aSTony Luck #define CMCI_STORM_THRESHOLD	32749
771f68ce2aSTony Luck 
cmci_supported(int * banks)7821afaf18SBorislav Petkov static int cmci_supported(int *banks)
7921afaf18SBorislav Petkov {
8021afaf18SBorislav Petkov 	u64 cap;
8121afaf18SBorislav Petkov 
8221afaf18SBorislav Petkov 	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
8321afaf18SBorislav Petkov 		return 0;
8421afaf18SBorislav Petkov 
8521afaf18SBorislav Petkov 	/*
8621afaf18SBorislav Petkov 	 * Vendor check is not strictly needed, but the initial
8721afaf18SBorislav Petkov 	 * initialization is vendor keyed and this
8821afaf18SBorislav Petkov 	 * makes sure none of the backdoors are entered otherwise.
8921afaf18SBorislav Petkov 	 */
905a3d56a0STony W Wang-oc 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
915a3d56a0STony W Wang-oc 	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
9221afaf18SBorislav Petkov 		return 0;
935a3d56a0STony W Wang-oc 
9421afaf18SBorislav Petkov 	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
9521afaf18SBorislav Petkov 		return 0;
9621afaf18SBorislav Petkov 	rdmsrl(MSR_IA32_MCG_CAP, cap);
9721afaf18SBorislav Petkov 	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
9821afaf18SBorislav Petkov 	return !!(cap & MCG_CMCI_P);
9921afaf18SBorislav Petkov }
10021afaf18SBorislav Petkov 
lmce_supported(void)10121afaf18SBorislav Petkov static bool lmce_supported(void)
10221afaf18SBorislav Petkov {
10321afaf18SBorislav Petkov 	u64 tmp;
10421afaf18SBorislav Petkov 
10521afaf18SBorislav Petkov 	if (mca_cfg.lmce_disabled)
10621afaf18SBorislav Petkov 		return false;
10721afaf18SBorislav Petkov 
10821afaf18SBorislav Petkov 	rdmsrl(MSR_IA32_MCG_CAP, tmp);
10921afaf18SBorislav Petkov 
11021afaf18SBorislav Petkov 	/*
11121afaf18SBorislav Petkov 	 * LMCE depends on recovery support in the processor. Hence both
11221afaf18SBorislav Petkov 	 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
11321afaf18SBorislav Petkov 	 */
11421afaf18SBorislav Petkov 	if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
11521afaf18SBorislav Petkov 		   (MCG_SER_P | MCG_LMCE_P))
11621afaf18SBorislav Petkov 		return false;
11721afaf18SBorislav Petkov 
11821afaf18SBorislav Petkov 	/*
11921afaf18SBorislav Petkov 	 * BIOS should indicate support for LMCE by setting bit 20 in
12032ad73dbSSean Christopherson 	 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
1216d527cebSSean Christopherson 	 * fault.  The MSR must also be locked for LMCE_ENABLED to take effect.
1226d527cebSSean Christopherson 	 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
1236d527cebSSean Christopherson 	 * locks the MSR in the event that it wasn't already locked by BIOS.
12421afaf18SBorislav Petkov 	 */
12532ad73dbSSean Christopherson 	rdmsrl(MSR_IA32_FEAT_CTL, tmp);
1266d527cebSSean Christopherson 	if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
12721afaf18SBorislav Petkov 		return false;
1286d527cebSSean Christopherson 
1296d527cebSSean Christopherson 	return tmp & FEAT_CTL_LMCE_ENABLED;
13021afaf18SBorislav Petkov }
13121afaf18SBorislav Petkov 
13221afaf18SBorislav Petkov /*
1331f68ce2aSTony Luck  * Set a new CMCI threshold value. Preserve the state of the
1341f68ce2aSTony Luck  * MCI_CTL2_CMCI_EN bit in case this happens during a
1351f68ce2aSTony Luck  * cmci_rediscover() operation.
1361f68ce2aSTony Luck  */
cmci_set_threshold(int bank,int thresh)1371f68ce2aSTony Luck static void cmci_set_threshold(int bank, int thresh)
1381f68ce2aSTony Luck {
1391f68ce2aSTony Luck 	unsigned long flags;
1401f68ce2aSTony Luck 	u64 val;
1411f68ce2aSTony Luck 
1421f68ce2aSTony Luck 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
1431f68ce2aSTony Luck 	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
1441f68ce2aSTony Luck 	val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
1451f68ce2aSTony Luck 	wrmsrl(MSR_IA32_MCx_CTL2(bank), val | thresh);
1461f68ce2aSTony Luck 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
1471f68ce2aSTony Luck }
1481f68ce2aSTony Luck 
mce_intel_handle_storm(int bank,bool on)1491f68ce2aSTony Luck void mce_intel_handle_storm(int bank, bool on)
1501f68ce2aSTony Luck {
1511f68ce2aSTony Luck 	if (on)
1521f68ce2aSTony Luck 		cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
1531f68ce2aSTony Luck 	else
1541f68ce2aSTony Luck 		cmci_set_threshold(bank, cmci_threshold[bank]);
1551f68ce2aSTony Luck }
1561f68ce2aSTony Luck 
1571f68ce2aSTony Luck /*
15821afaf18SBorislav Petkov  * The interrupt handler. This is called on every event.
15921afaf18SBorislav Petkov  * Just call the poller directly to log any events.
16021afaf18SBorislav Petkov  * This could in theory increase the threshold under high load,
16121afaf18SBorislav Petkov  * but doesn't for now.
16221afaf18SBorislav Petkov  */
intel_threshold_interrupt(void)16321afaf18SBorislav Petkov static void intel_threshold_interrupt(void)
16421afaf18SBorislav Petkov {
16521afaf18SBorislav Petkov 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
16621afaf18SBorislav Petkov }
16721afaf18SBorislav Petkov 
16821afaf18SBorislav Petkov /*
1691f68ce2aSTony Luck  * Check all the reasons why current CPU cannot claim
1701f68ce2aSTony Luck  * ownership of a bank.
1711f68ce2aSTony Luck  * 1: CPU already owns this bank
1721f68ce2aSTony Luck  * 2: BIOS owns this bank
1731f68ce2aSTony Luck  * 3: Some other CPU owns this bank
17421afaf18SBorislav Petkov  */
cmci_skip_bank(int bank,u64 * val)1751f68ce2aSTony Luck static bool cmci_skip_bank(int bank, u64 *val)
17621afaf18SBorislav Petkov {
17721afaf18SBorislav Petkov 	unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
17821afaf18SBorislav Petkov 
1791f68ce2aSTony Luck 	if (test_bit(bank, owned))
1801f68ce2aSTony Luck 		return true;
18121afaf18SBorislav Petkov 
18221afaf18SBorislav Petkov 	/* Skip banks in firmware first mode */
1831f68ce2aSTony Luck 	if (test_bit(bank, mce_banks_ce_disabled))
1841f68ce2aSTony Luck 		return true;
18521afaf18SBorislav Petkov 
1861f68ce2aSTony Luck 	rdmsrl(MSR_IA32_MCx_CTL2(bank), *val);
18721afaf18SBorislav Petkov 
18821afaf18SBorislav Petkov 	/* Already owned by someone else? */
1891f68ce2aSTony Luck 	if (*val & MCI_CTL2_CMCI_EN) {
1901f68ce2aSTony Luck 		clear_bit(bank, owned);
1911f68ce2aSTony Luck 		__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
1921f68ce2aSTony Luck 		return true;
19321afaf18SBorislav Petkov 	}
19421afaf18SBorislav Petkov 
1951f68ce2aSTony Luck 	return false;
1961f68ce2aSTony Luck }
1971f68ce2aSTony Luck 
1981f68ce2aSTony Luck /*
1991f68ce2aSTony Luck  * Decide which CMCI interrupt threshold to use:
2001f68ce2aSTony Luck  * 1: If this bank is in storm mode from whichever CPU was
2011f68ce2aSTony Luck  *    the previous owner, stay in storm mode.
2021f68ce2aSTony Luck  * 2: If ignoring any threshold set by BIOS, set Linux default
2031f68ce2aSTony Luck  * 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero).
2041f68ce2aSTony Luck  */
cmci_pick_threshold(u64 val,int * bios_zero_thresh)2051f68ce2aSTony Luck static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh)
2061f68ce2aSTony Luck {
2071f68ce2aSTony Luck 	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
2081f68ce2aSTony Luck 		return val;
2091f68ce2aSTony Luck 
21021afaf18SBorislav Petkov 	if (!mca_cfg.bios_cmci_threshold) {
21121afaf18SBorislav Petkov 		val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
21221afaf18SBorislav Petkov 		val |= CMCI_THRESHOLD;
21321afaf18SBorislav Petkov 	} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
21421afaf18SBorislav Petkov 		/*
21521afaf18SBorislav Petkov 		 * If bios_cmci_threshold boot option was specified
21621afaf18SBorislav Petkov 		 * but the threshold is zero, we'll try to initialize
21721afaf18SBorislav Petkov 		 * it to 1.
21821afaf18SBorislav Petkov 		 */
2191f68ce2aSTony Luck 		*bios_zero_thresh = 1;
22021afaf18SBorislav Petkov 		val |= CMCI_THRESHOLD;
22121afaf18SBorislav Petkov 	}
22221afaf18SBorislav Petkov 
2231f68ce2aSTony Luck 	return val;
2241f68ce2aSTony Luck }
22521afaf18SBorislav Petkov 
2261f68ce2aSTony Luck /*
2271f68ce2aSTony Luck  * Try to claim ownership of a bank.
2281f68ce2aSTony Luck  */
cmci_claim_bank(int bank,u64 val,int bios_zero_thresh,int * bios_wrong_thresh)2291f68ce2aSTony Luck static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh)
2301f68ce2aSTony Luck {
2311f68ce2aSTony Luck 	struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
2321f68ce2aSTony Luck 
2331f68ce2aSTony Luck 	val |= MCI_CTL2_CMCI_EN;
2341f68ce2aSTony Luck 	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
2351f68ce2aSTony Luck 	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
2361f68ce2aSTony Luck 
2371f68ce2aSTony Luck 	/* If the enable bit did not stick, this bank should be polled. */
2381f68ce2aSTony Luck 	if (!(val & MCI_CTL2_CMCI_EN)) {
2391f68ce2aSTony Luck 		WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks)));
2401f68ce2aSTony Luck 		storm->banks[bank].poll_only = true;
2411f68ce2aSTony Luck 		return;
2421f68ce2aSTony Luck 	}
2431f68ce2aSTony Luck 
2441f68ce2aSTony Luck 	/* This CPU successfully set the enable bit. */
2451f68ce2aSTony Luck 	set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned));
2461f68ce2aSTony Luck 
2471f68ce2aSTony Luck 	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) {
2481f68ce2aSTony Luck 		pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank);
2491f68ce2aSTony Luck 		mce_inherit_storm(bank);
2501f68ce2aSTony Luck 		cmci_storm_begin(bank);
2511f68ce2aSTony Luck 	} else {
2521f68ce2aSTony Luck 		__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
2531f68ce2aSTony Luck 	}
2541f68ce2aSTony Luck 
25521afaf18SBorislav Petkov 	/*
25621afaf18SBorislav Petkov 	 * We are able to set thresholds for some banks that
25721afaf18SBorislav Petkov 	 * had a threshold of 0. This means the BIOS has not
25821afaf18SBorislav Petkov 	 * set the thresholds properly or does not work with
25921afaf18SBorislav Petkov 	 * this boot option. Note down now and report later.
26021afaf18SBorislav Petkov 	 */
26121afaf18SBorislav Petkov 	if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
26221afaf18SBorislav Petkov 	    (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
2631f68ce2aSTony Luck 		*bios_wrong_thresh = 1;
2641f68ce2aSTony Luck 
2651f68ce2aSTony Luck 	/* Save default threshold for each bank */
2661f68ce2aSTony Luck 	if (cmci_threshold[bank] == 0)
2671f68ce2aSTony Luck 		cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK;
26821afaf18SBorislav Petkov }
2691f68ce2aSTony Luck 
2701f68ce2aSTony Luck /*
2711f68ce2aSTony Luck  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
2721f68ce2aSTony Luck  * on this CPU. Use the algorithm recommended in the SDM to discover shared
2731f68ce2aSTony Luck  * banks. Called during initial bootstrap, and also for hotplug CPU operations
2741f68ce2aSTony Luck  * to rediscover/reassign machine check banks.
2751f68ce2aSTony Luck  */
cmci_discover(int banks)2761f68ce2aSTony Luck static void cmci_discover(int banks)
2771f68ce2aSTony Luck {
2781f68ce2aSTony Luck 	int bios_wrong_thresh = 0;
2791f68ce2aSTony Luck 	unsigned long flags;
2801f68ce2aSTony Luck 	int i;
2811f68ce2aSTony Luck 
2821f68ce2aSTony Luck 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
2831f68ce2aSTony Luck 	for (i = 0; i < banks; i++) {
2841f68ce2aSTony Luck 		u64 val;
2851f68ce2aSTony Luck 		int bios_zero_thresh = 0;
2861f68ce2aSTony Luck 
2871f68ce2aSTony Luck 		if (cmci_skip_bank(i, &val))
2881f68ce2aSTony Luck 			continue;
2891f68ce2aSTony Luck 
2901f68ce2aSTony Luck 		val = cmci_pick_threshold(val, &bios_zero_thresh);
2911f68ce2aSTony Luck 		cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh);
29221afaf18SBorislav Petkov 	}
29321afaf18SBorislav Petkov 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
29421afaf18SBorislav Petkov 	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
29521afaf18SBorislav Petkov 		pr_info_once(
29621afaf18SBorislav Petkov 			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
29721afaf18SBorislav Petkov 		pr_info_once(
29821afaf18SBorislav Petkov 			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
29921afaf18SBorislav Petkov 	}
30021afaf18SBorislav Petkov }
30121afaf18SBorislav Petkov 
30221afaf18SBorislav Petkov /*
30321afaf18SBorislav Petkov  * Just in case we missed an event during initialization check
30421afaf18SBorislav Petkov  * all the CMCI owned banks.
30521afaf18SBorislav Petkov  */
cmci_recheck(void)30621afaf18SBorislav Petkov void cmci_recheck(void)
30721afaf18SBorislav Petkov {
30821afaf18SBorislav Petkov 	unsigned long flags;
30921afaf18SBorislav Petkov 	int banks;
31021afaf18SBorislav Petkov 
31121afaf18SBorislav Petkov 	if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
31221afaf18SBorislav Petkov 		return;
31321afaf18SBorislav Petkov 
31421afaf18SBorislav Petkov 	local_irq_save(flags);
31521afaf18SBorislav Petkov 	machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
31621afaf18SBorislav Petkov 	local_irq_restore(flags);
31721afaf18SBorislav Petkov }
31821afaf18SBorislav Petkov 
31921afaf18SBorislav Petkov /* Caller must hold the lock on cmci_discover_lock */
__cmci_disable_bank(int bank)32021afaf18SBorislav Petkov static void __cmci_disable_bank(int bank)
32121afaf18SBorislav Petkov {
32221afaf18SBorislav Petkov 	u64 val;
32321afaf18SBorislav Petkov 
32421afaf18SBorislav Petkov 	if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
32521afaf18SBorislav Petkov 		return;
32621afaf18SBorislav Petkov 	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
32721afaf18SBorislav Petkov 	val &= ~MCI_CTL2_CMCI_EN;
32821afaf18SBorislav Petkov 	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
32921afaf18SBorislav Petkov 	__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
3301f68ce2aSTony Luck 
3311f68ce2aSTony Luck 	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
3321f68ce2aSTony Luck 		cmci_storm_end(bank);
33321afaf18SBorislav Petkov }
33421afaf18SBorislav Petkov 
33521afaf18SBorislav Petkov /*
33621afaf18SBorislav Petkov  * Disable CMCI on this CPU for all banks it owns when it goes down.
33721afaf18SBorislav Petkov  * This allows other CPUs to claim the banks on rediscovery.
33821afaf18SBorislav Petkov  */
cmci_clear(void)33921afaf18SBorislav Petkov void cmci_clear(void)
34021afaf18SBorislav Petkov {
34121afaf18SBorislav Petkov 	unsigned long flags;
34221afaf18SBorislav Petkov 	int i;
34321afaf18SBorislav Petkov 	int banks;
34421afaf18SBorislav Petkov 
34521afaf18SBorislav Petkov 	if (!cmci_supported(&banks))
34621afaf18SBorislav Petkov 		return;
34721afaf18SBorislav Petkov 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
34821afaf18SBorislav Petkov 	for (i = 0; i < banks; i++)
34921afaf18SBorislav Petkov 		__cmci_disable_bank(i);
35021afaf18SBorislav Petkov 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
35121afaf18SBorislav Petkov }
35221afaf18SBorislav Petkov 
cmci_rediscover_work_func(void * arg)35321afaf18SBorislav Petkov static void cmci_rediscover_work_func(void *arg)
35421afaf18SBorislav Petkov {
35521afaf18SBorislav Petkov 	int banks;
35621afaf18SBorislav Petkov 
35721afaf18SBorislav Petkov 	/* Recheck banks in case CPUs don't all have the same */
35821afaf18SBorislav Petkov 	if (cmci_supported(&banks))
35921afaf18SBorislav Petkov 		cmci_discover(banks);
36021afaf18SBorislav Petkov }
36121afaf18SBorislav Petkov 
36221afaf18SBorislav Petkov /* After a CPU went down cycle through all the others and rediscover */
cmci_rediscover(void)36321afaf18SBorislav Petkov void cmci_rediscover(void)
36421afaf18SBorislav Petkov {
36521afaf18SBorislav Petkov 	int banks;
36621afaf18SBorislav Petkov 
36721afaf18SBorislav Petkov 	if (!cmci_supported(&banks))
36821afaf18SBorislav Petkov 		return;
36921afaf18SBorislav Petkov 
37021afaf18SBorislav Petkov 	on_each_cpu(cmci_rediscover_work_func, NULL, 1);
37121afaf18SBorislav Petkov }
37221afaf18SBorislav Petkov 
37321afaf18SBorislav Petkov /*
37421afaf18SBorislav Petkov  * Reenable CMCI on this CPU in case a CPU down failed.
37521afaf18SBorislav Petkov  */
cmci_reenable(void)37621afaf18SBorislav Petkov void cmci_reenable(void)
37721afaf18SBorislav Petkov {
37821afaf18SBorislav Petkov 	int banks;
37921afaf18SBorislav Petkov 	if (cmci_supported(&banks))
38021afaf18SBorislav Petkov 		cmci_discover(banks);
38121afaf18SBorislav Petkov }
38221afaf18SBorislav Petkov 
cmci_disable_bank(int bank)38321afaf18SBorislav Petkov void cmci_disable_bank(int bank)
38421afaf18SBorislav Petkov {
38521afaf18SBorislav Petkov 	int banks;
38621afaf18SBorislav Petkov 	unsigned long flags;
38721afaf18SBorislav Petkov 
38821afaf18SBorislav Petkov 	if (!cmci_supported(&banks))
38921afaf18SBorislav Petkov 		return;
39021afaf18SBorislav Petkov 
39121afaf18SBorislav Petkov 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
39221afaf18SBorislav Petkov 	__cmci_disable_bank(bank);
39321afaf18SBorislav Petkov 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
39421afaf18SBorislav Petkov }
39521afaf18SBorislav Petkov 
396c3629dd7SBorislav Petkov (AMD) /* Bank polling function when CMCI is disabled. */
cmci_mc_poll_banks(void)397c3629dd7SBorislav Petkov (AMD) static void cmci_mc_poll_banks(void)
398c3629dd7SBorislav Petkov (AMD) {
399c3629dd7SBorislav Petkov (AMD) 	spin_lock(&cmci_poll_lock);
400c3629dd7SBorislav Petkov (AMD) 	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
401c3629dd7SBorislav Petkov (AMD) 	spin_unlock(&cmci_poll_lock);
402c3629dd7SBorislav Petkov (AMD) }
403c3629dd7SBorislav Petkov (AMD) 
intel_init_cmci(void)4045a3d56a0STony W Wang-oc void intel_init_cmci(void)
40521afaf18SBorislav Petkov {
40621afaf18SBorislav Petkov 	int banks;
40721afaf18SBorislav Petkov 
408c3629dd7SBorislav Petkov (AMD) 	if (!cmci_supported(&banks)) {
409c3629dd7SBorislav Petkov (AMD) 		mc_poll_banks = cmci_mc_poll_banks;
41021afaf18SBorislav Petkov 		return;
411c3629dd7SBorislav Petkov (AMD) 	}
41221afaf18SBorislav Petkov 
41321afaf18SBorislav Petkov 	mce_threshold_vector = intel_threshold_interrupt;
41421afaf18SBorislav Petkov 	cmci_discover(banks);
41521afaf18SBorislav Petkov 	/*
41621afaf18SBorislav Petkov 	 * For CPU #0 this runs with still disabled APIC, but that's
41721afaf18SBorislav Petkov 	 * ok because only the vector is set up. We still do another
41821afaf18SBorislav Petkov 	 * check for the banks later for CPU #0 just to make sure
41921afaf18SBorislav Petkov 	 * to not miss any events.
42021afaf18SBorislav Petkov 	 */
42121afaf18SBorislav Petkov 	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
42221afaf18SBorislav Petkov 	cmci_recheck();
42321afaf18SBorislav Petkov }
42421afaf18SBorislav Petkov 
intel_init_lmce(void)42570f0c230STony W Wang-oc void intel_init_lmce(void)
42621afaf18SBorislav Petkov {
42721afaf18SBorislav Petkov 	u64 val;
42821afaf18SBorislav Petkov 
42921afaf18SBorislav Petkov 	if (!lmce_supported())
43021afaf18SBorislav Petkov 		return;
43121afaf18SBorislav Petkov 
43221afaf18SBorislav Petkov 	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
43321afaf18SBorislav Petkov 
43421afaf18SBorislav Petkov 	if (!(val & MCG_EXT_CTL_LMCE_EN))
43521afaf18SBorislav Petkov 		wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
43621afaf18SBorislav Petkov }
43721afaf18SBorislav Petkov 
intel_clear_lmce(void)43870f0c230STony W Wang-oc void intel_clear_lmce(void)
43921afaf18SBorislav Petkov {
44021afaf18SBorislav Petkov 	u64 val;
44121afaf18SBorislav Petkov 
44221afaf18SBorislav Petkov 	if (!lmce_supported())
44321afaf18SBorislav Petkov 		return;
44421afaf18SBorislav Petkov 
44521afaf18SBorislav Petkov 	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
44621afaf18SBorislav Petkov 	val &= ~MCG_EXT_CTL_LMCE_EN;
44721afaf18SBorislav Petkov 	wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
44821afaf18SBorislav Petkov }
44921afaf18SBorislav Petkov 
45068299a42STony Luck /*
45168299a42STony Luck  * Enable additional error logs from the integrated
45268299a42STony Luck  * memory controller on processors that support this.
45368299a42STony Luck  */
intel_imc_init(struct cpuinfo_x86 * c)45468299a42STony Luck static void intel_imc_init(struct cpuinfo_x86 *c)
45568299a42STony Luck {
45668299a42STony Luck 	u64 error_control;
45768299a42STony Luck 
458*4a5f2dd1STony Luck 	switch (c->x86_vfm) {
459*4a5f2dd1STony Luck 	case INTEL_SANDYBRIDGE_X:
460*4a5f2dd1STony Luck 	case INTEL_IVYBRIDGE_X:
461*4a5f2dd1STony Luck 	case INTEL_HASWELL_X:
462098416e6STony Luck 		if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control))
463098416e6STony Luck 			return;
46468299a42STony Luck 		error_control |= 2;
465098416e6STony Luck 		wrmsrl_safe(MSR_ERROR_CONTROL, error_control);
46668299a42STony Luck 		break;
46768299a42STony Luck 	}
46868299a42STony Luck }
46968299a42STony Luck 
mce_intel_feature_init(struct cpuinfo_x86 * c)47021afaf18SBorislav Petkov void mce_intel_feature_init(struct cpuinfo_x86 *c)
47121afaf18SBorislav Petkov {
47221afaf18SBorislav Petkov 	intel_init_cmci();
47321afaf18SBorislav Petkov 	intel_init_lmce();
47468299a42STony Luck 	intel_imc_init(c);
47521afaf18SBorislav Petkov }
47621afaf18SBorislav Petkov 
mce_intel_feature_clear(struct cpuinfo_x86 * c)47721afaf18SBorislav Petkov void mce_intel_feature_clear(struct cpuinfo_x86 *c)
47821afaf18SBorislav Petkov {
47921afaf18SBorislav Petkov 	intel_clear_lmce();
48021afaf18SBorislav Petkov }
4812976908eSPrarit Bhargava 
intel_filter_mce(struct mce * m)4822976908eSPrarit Bhargava bool intel_filter_mce(struct mce *m)
4832976908eSPrarit Bhargava {
4842976908eSPrarit Bhargava 	struct cpuinfo_x86 *c = &boot_cpu_data;
4852976908eSPrarit Bhargava 
486e629fc14SDave Jones 	/* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
487*4a5f2dd1STony Luck 	if ((c->x86_vfm == INTEL_HASWELL ||
488*4a5f2dd1STony Luck 	     c->x86_vfm == INTEL_HASWELL_L ||
489*4a5f2dd1STony Luck 	     c->x86_vfm == INTEL_BROADWELL ||
490*4a5f2dd1STony Luck 	     c->x86_vfm == INTEL_HASWELL_G ||
491*4a5f2dd1STony Luck 	     c->x86_vfm == INTEL_SKYLAKE_X) &&
4922976908eSPrarit Bhargava 	    (m->bank == 0) &&
4932976908eSPrarit Bhargava 	    ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
4942976908eSPrarit Bhargava 		return true;
4952976908eSPrarit Bhargava 
4962976908eSPrarit Bhargava 	return false;
4972976908eSPrarit Bhargava }
4981bae0cfeSYazen Ghannam 
4991bae0cfeSYazen Ghannam /*
5001bae0cfeSYazen Ghannam  * Check if the address reported by the CPU is in a format we can parse.
5011bae0cfeSYazen Ghannam  * It would be possible to add code for most other cases, but all would
5021bae0cfeSYazen Ghannam  * be somewhat complicated (e.g. segment offset would require an instruction
5031bae0cfeSYazen Ghannam  * parser). So only support physical addresses up to page granularity for now.
5041bae0cfeSYazen Ghannam  */
intel_mce_usable_address(struct mce * m)5051bae0cfeSYazen Ghannam bool intel_mce_usable_address(struct mce *m)
5061bae0cfeSYazen Ghannam {
5071bae0cfeSYazen Ghannam 	if (!(m->status & MCI_STATUS_MISCV))
5081bae0cfeSYazen Ghannam 		return false;
5091bae0cfeSYazen Ghannam 
5101bae0cfeSYazen Ghannam 	if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
5111bae0cfeSYazen Ghannam 		return false;
5121bae0cfeSYazen Ghannam 
5131bae0cfeSYazen Ghannam 	if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
5141bae0cfeSYazen Ghannam 		return false;
5151bae0cfeSYazen Ghannam 
5161bae0cfeSYazen Ghannam 	return true;
5171bae0cfeSYazen Ghannam }
518