121afaf18SBorislav Petkov // SPDX-License-Identifier: GPL-2.0
221afaf18SBorislav Petkov /*
321afaf18SBorislav Petkov * Intel specific MCE features.
421afaf18SBorislav Petkov * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
521afaf18SBorislav Petkov * Copyright (C) 2008, 2009 Intel Corporation
621afaf18SBorislav Petkov * Author: Andi Kleen
721afaf18SBorislav Petkov */
821afaf18SBorislav Petkov
921afaf18SBorislav Petkov #include <linux/gfp.h>
1021afaf18SBorislav Petkov #include <linux/interrupt.h>
1121afaf18SBorislav Petkov #include <linux/percpu.h>
1221afaf18SBorislav Petkov #include <linux/sched.h>
1321afaf18SBorislav Petkov #include <linux/cpumask.h>
1421afaf18SBorislav Petkov #include <asm/apic.h>
1521afaf18SBorislav Petkov #include <asm/cpufeature.h>
16*4a5f2dd1STony Luck #include <asm/cpu_device_id.h>
1721afaf18SBorislav Petkov #include <asm/processor.h>
1821afaf18SBorislav Petkov #include <asm/msr.h>
1921afaf18SBorislav Petkov #include <asm/mce.h>
2021afaf18SBorislav Petkov
2121afaf18SBorislav Petkov #include "internal.h"
2221afaf18SBorislav Petkov
2321afaf18SBorislav Petkov /*
2421afaf18SBorislav Petkov * Support for Intel Correct Machine Check Interrupts. This allows
2521afaf18SBorislav Petkov * the CPU to raise an interrupt when a corrected machine check happened.
2621afaf18SBorislav Petkov * Normally we pick those up using a regular polling timer.
2721afaf18SBorislav Petkov * Also supports reliable discovery of shared banks.
2821afaf18SBorislav Petkov */
2921afaf18SBorislav Petkov
3021afaf18SBorislav Petkov /*
3121afaf18SBorislav Petkov * CMCI can be delivered to multiple cpus that share a machine check bank
3221afaf18SBorislav Petkov * so we need to designate a single cpu to process errors logged in each bank
3321afaf18SBorislav Petkov * in the interrupt handler (otherwise we would have many races and potential
3421afaf18SBorislav Petkov * double reporting of the same error).
3521afaf18SBorislav Petkov * Note that this can change when a cpu is offlined or brought online since
3621afaf18SBorislav Petkov * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
3721afaf18SBorislav Petkov * disables CMCI on all banks owned by the cpu and clears this bitfield. At
3821afaf18SBorislav Petkov * this point, cmci_rediscover() kicks in and a different cpu may end up
3921afaf18SBorislav Petkov * taking ownership of some of the shared MCA banks that were previously
4021afaf18SBorislav Petkov * owned by the offlined cpu.
4121afaf18SBorislav Petkov */
4221afaf18SBorislav Petkov static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
4321afaf18SBorislav Petkov
4421afaf18SBorislav Petkov /*
4521afaf18SBorislav Petkov * cmci_discover_lock protects against parallel discovery attempts
4621afaf18SBorislav Petkov * which could race against each other.
4721afaf18SBorislav Petkov */
4821afaf18SBorislav Petkov static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
4921afaf18SBorislav Petkov
50c3629dd7SBorislav Petkov (AMD) /*
51c3629dd7SBorislav Petkov (AMD) * On systems that do support CMCI but it's disabled, polling for MCEs can
52c3629dd7SBorislav Petkov (AMD) * cause the same event to be reported multiple times because IA32_MCi_STATUS
53c3629dd7SBorislav Petkov (AMD) * is shared by the same package.
54c3629dd7SBorislav Petkov (AMD) */
55c3629dd7SBorislav Petkov (AMD) static DEFINE_SPINLOCK(cmci_poll_lock);
56c3629dd7SBorislav Petkov (AMD)
571f68ce2aSTony Luck /* Linux non-storm CMCI threshold (may be overridden by BIOS) */
5821afaf18SBorislav Petkov #define CMCI_THRESHOLD 1
5921afaf18SBorislav Petkov
601f68ce2aSTony Luck /*
611f68ce2aSTony Luck * MCi_CTL2 threshold for each bank when there is no storm.
621f68ce2aSTony Luck * Default value for each bank may have been set by BIOS.
631f68ce2aSTony Luck */
641f68ce2aSTony Luck static u16 cmci_threshold[MAX_NR_BANKS];
651f68ce2aSTony Luck
661f68ce2aSTony Luck /*
671f68ce2aSTony Luck * High threshold to limit CMCI rate during storms. Max supported is
681f68ce2aSTony Luck * 0x7FFF. Use this slightly smaller value so it has a distinctive
691f68ce2aSTony Luck * signature when some asks "Why am I not seeing all corrected errors?"
701f68ce2aSTony Luck * A high threshold is used instead of just disabling CMCI for a
711f68ce2aSTony Luck * bank because both corrected and uncorrected errors may be logged
721f68ce2aSTony Luck * in the same bank and signalled with CMCI. The threshold only applies
731f68ce2aSTony Luck * to corrected errors, so keeping CMCI enabled means that uncorrected
741f68ce2aSTony Luck * errors will still be processed in a timely fashion.
751f68ce2aSTony Luck */
761f68ce2aSTony Luck #define CMCI_STORM_THRESHOLD 32749
771f68ce2aSTony Luck
cmci_supported(int * banks)7821afaf18SBorislav Petkov static int cmci_supported(int *banks)
7921afaf18SBorislav Petkov {
8021afaf18SBorislav Petkov u64 cap;
8121afaf18SBorislav Petkov
8221afaf18SBorislav Petkov if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
8321afaf18SBorislav Petkov return 0;
8421afaf18SBorislav Petkov
8521afaf18SBorislav Petkov /*
8621afaf18SBorislav Petkov * Vendor check is not strictly needed, but the initial
8721afaf18SBorislav Petkov * initialization is vendor keyed and this
8821afaf18SBorislav Petkov * makes sure none of the backdoors are entered otherwise.
8921afaf18SBorislav Petkov */
905a3d56a0STony W Wang-oc if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
915a3d56a0STony W Wang-oc boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
9221afaf18SBorislav Petkov return 0;
935a3d56a0STony W Wang-oc
9421afaf18SBorislav Petkov if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
9521afaf18SBorislav Petkov return 0;
9621afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCG_CAP, cap);
9721afaf18SBorislav Petkov *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
9821afaf18SBorislav Petkov return !!(cap & MCG_CMCI_P);
9921afaf18SBorislav Petkov }
10021afaf18SBorislav Petkov
lmce_supported(void)10121afaf18SBorislav Petkov static bool lmce_supported(void)
10221afaf18SBorislav Petkov {
10321afaf18SBorislav Petkov u64 tmp;
10421afaf18SBorislav Petkov
10521afaf18SBorislav Petkov if (mca_cfg.lmce_disabled)
10621afaf18SBorislav Petkov return false;
10721afaf18SBorislav Petkov
10821afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCG_CAP, tmp);
10921afaf18SBorislav Petkov
11021afaf18SBorislav Petkov /*
11121afaf18SBorislav Petkov * LMCE depends on recovery support in the processor. Hence both
11221afaf18SBorislav Petkov * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
11321afaf18SBorislav Petkov */
11421afaf18SBorislav Petkov if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
11521afaf18SBorislav Petkov (MCG_SER_P | MCG_LMCE_P))
11621afaf18SBorislav Petkov return false;
11721afaf18SBorislav Petkov
11821afaf18SBorislav Petkov /*
11921afaf18SBorislav Petkov * BIOS should indicate support for LMCE by setting bit 20 in
12032ad73dbSSean Christopherson * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
1216d527cebSSean Christopherson * fault. The MSR must also be locked for LMCE_ENABLED to take effect.
1226d527cebSSean Christopherson * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
1236d527cebSSean Christopherson * locks the MSR in the event that it wasn't already locked by BIOS.
12421afaf18SBorislav Petkov */
12532ad73dbSSean Christopherson rdmsrl(MSR_IA32_FEAT_CTL, tmp);
1266d527cebSSean Christopherson if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
12721afaf18SBorislav Petkov return false;
1286d527cebSSean Christopherson
1296d527cebSSean Christopherson return tmp & FEAT_CTL_LMCE_ENABLED;
13021afaf18SBorislav Petkov }
13121afaf18SBorislav Petkov
13221afaf18SBorislav Petkov /*
1331f68ce2aSTony Luck * Set a new CMCI threshold value. Preserve the state of the
1341f68ce2aSTony Luck * MCI_CTL2_CMCI_EN bit in case this happens during a
1351f68ce2aSTony Luck * cmci_rediscover() operation.
1361f68ce2aSTony Luck */
cmci_set_threshold(int bank,int thresh)1371f68ce2aSTony Luck static void cmci_set_threshold(int bank, int thresh)
1381f68ce2aSTony Luck {
1391f68ce2aSTony Luck unsigned long flags;
1401f68ce2aSTony Luck u64 val;
1411f68ce2aSTony Luck
1421f68ce2aSTony Luck raw_spin_lock_irqsave(&cmci_discover_lock, flags);
1431f68ce2aSTony Luck rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
1441f68ce2aSTony Luck val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
1451f68ce2aSTony Luck wrmsrl(MSR_IA32_MCx_CTL2(bank), val | thresh);
1461f68ce2aSTony Luck raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
1471f68ce2aSTony Luck }
1481f68ce2aSTony Luck
mce_intel_handle_storm(int bank,bool on)1491f68ce2aSTony Luck void mce_intel_handle_storm(int bank, bool on)
1501f68ce2aSTony Luck {
1511f68ce2aSTony Luck if (on)
1521f68ce2aSTony Luck cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
1531f68ce2aSTony Luck else
1541f68ce2aSTony Luck cmci_set_threshold(bank, cmci_threshold[bank]);
1551f68ce2aSTony Luck }
1561f68ce2aSTony Luck
1571f68ce2aSTony Luck /*
15821afaf18SBorislav Petkov * The interrupt handler. This is called on every event.
15921afaf18SBorislav Petkov * Just call the poller directly to log any events.
16021afaf18SBorislav Petkov * This could in theory increase the threshold under high load,
16121afaf18SBorislav Petkov * but doesn't for now.
16221afaf18SBorislav Petkov */
intel_threshold_interrupt(void)16321afaf18SBorislav Petkov static void intel_threshold_interrupt(void)
16421afaf18SBorislav Petkov {
16521afaf18SBorislav Petkov machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
16621afaf18SBorislav Petkov }
16721afaf18SBorislav Petkov
16821afaf18SBorislav Petkov /*
1691f68ce2aSTony Luck * Check all the reasons why current CPU cannot claim
1701f68ce2aSTony Luck * ownership of a bank.
1711f68ce2aSTony Luck * 1: CPU already owns this bank
1721f68ce2aSTony Luck * 2: BIOS owns this bank
1731f68ce2aSTony Luck * 3: Some other CPU owns this bank
17421afaf18SBorislav Petkov */
cmci_skip_bank(int bank,u64 * val)1751f68ce2aSTony Luck static bool cmci_skip_bank(int bank, u64 *val)
17621afaf18SBorislav Petkov {
17721afaf18SBorislav Petkov unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
17821afaf18SBorislav Petkov
1791f68ce2aSTony Luck if (test_bit(bank, owned))
1801f68ce2aSTony Luck return true;
18121afaf18SBorislav Petkov
18221afaf18SBorislav Petkov /* Skip banks in firmware first mode */
1831f68ce2aSTony Luck if (test_bit(bank, mce_banks_ce_disabled))
1841f68ce2aSTony Luck return true;
18521afaf18SBorislav Petkov
1861f68ce2aSTony Luck rdmsrl(MSR_IA32_MCx_CTL2(bank), *val);
18721afaf18SBorislav Petkov
18821afaf18SBorislav Petkov /* Already owned by someone else? */
1891f68ce2aSTony Luck if (*val & MCI_CTL2_CMCI_EN) {
1901f68ce2aSTony Luck clear_bit(bank, owned);
1911f68ce2aSTony Luck __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
1921f68ce2aSTony Luck return true;
19321afaf18SBorislav Petkov }
19421afaf18SBorislav Petkov
1951f68ce2aSTony Luck return false;
1961f68ce2aSTony Luck }
1971f68ce2aSTony Luck
1981f68ce2aSTony Luck /*
1991f68ce2aSTony Luck * Decide which CMCI interrupt threshold to use:
2001f68ce2aSTony Luck * 1: If this bank is in storm mode from whichever CPU was
2011f68ce2aSTony Luck * the previous owner, stay in storm mode.
2021f68ce2aSTony Luck * 2: If ignoring any threshold set by BIOS, set Linux default
2031f68ce2aSTony Luck * 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero).
2041f68ce2aSTony Luck */
cmci_pick_threshold(u64 val,int * bios_zero_thresh)2051f68ce2aSTony Luck static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh)
2061f68ce2aSTony Luck {
2071f68ce2aSTony Luck if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
2081f68ce2aSTony Luck return val;
2091f68ce2aSTony Luck
21021afaf18SBorislav Petkov if (!mca_cfg.bios_cmci_threshold) {
21121afaf18SBorislav Petkov val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
21221afaf18SBorislav Petkov val |= CMCI_THRESHOLD;
21321afaf18SBorislav Petkov } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
21421afaf18SBorislav Petkov /*
21521afaf18SBorislav Petkov * If bios_cmci_threshold boot option was specified
21621afaf18SBorislav Petkov * but the threshold is zero, we'll try to initialize
21721afaf18SBorislav Petkov * it to 1.
21821afaf18SBorislav Petkov */
2191f68ce2aSTony Luck *bios_zero_thresh = 1;
22021afaf18SBorislav Petkov val |= CMCI_THRESHOLD;
22121afaf18SBorislav Petkov }
22221afaf18SBorislav Petkov
2231f68ce2aSTony Luck return val;
2241f68ce2aSTony Luck }
22521afaf18SBorislav Petkov
2261f68ce2aSTony Luck /*
2271f68ce2aSTony Luck * Try to claim ownership of a bank.
2281f68ce2aSTony Luck */
cmci_claim_bank(int bank,u64 val,int bios_zero_thresh,int * bios_wrong_thresh)2291f68ce2aSTony Luck static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh)
2301f68ce2aSTony Luck {
2311f68ce2aSTony Luck struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
2321f68ce2aSTony Luck
2331f68ce2aSTony Luck val |= MCI_CTL2_CMCI_EN;
2341f68ce2aSTony Luck wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
2351f68ce2aSTony Luck rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
2361f68ce2aSTony Luck
2371f68ce2aSTony Luck /* If the enable bit did not stick, this bank should be polled. */
2381f68ce2aSTony Luck if (!(val & MCI_CTL2_CMCI_EN)) {
2391f68ce2aSTony Luck WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks)));
2401f68ce2aSTony Luck storm->banks[bank].poll_only = true;
2411f68ce2aSTony Luck return;
2421f68ce2aSTony Luck }
2431f68ce2aSTony Luck
2441f68ce2aSTony Luck /* This CPU successfully set the enable bit. */
2451f68ce2aSTony Luck set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned));
2461f68ce2aSTony Luck
2471f68ce2aSTony Luck if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) {
2481f68ce2aSTony Luck pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank);
2491f68ce2aSTony Luck mce_inherit_storm(bank);
2501f68ce2aSTony Luck cmci_storm_begin(bank);
2511f68ce2aSTony Luck } else {
2521f68ce2aSTony Luck __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
2531f68ce2aSTony Luck }
2541f68ce2aSTony Luck
25521afaf18SBorislav Petkov /*
25621afaf18SBorislav Petkov * We are able to set thresholds for some banks that
25721afaf18SBorislav Petkov * had a threshold of 0. This means the BIOS has not
25821afaf18SBorislav Petkov * set the thresholds properly or does not work with
25921afaf18SBorislav Petkov * this boot option. Note down now and report later.
26021afaf18SBorislav Petkov */
26121afaf18SBorislav Petkov if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
26221afaf18SBorislav Petkov (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
2631f68ce2aSTony Luck *bios_wrong_thresh = 1;
2641f68ce2aSTony Luck
2651f68ce2aSTony Luck /* Save default threshold for each bank */
2661f68ce2aSTony Luck if (cmci_threshold[bank] == 0)
2671f68ce2aSTony Luck cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK;
26821afaf18SBorislav Petkov }
2691f68ce2aSTony Luck
2701f68ce2aSTony Luck /*
2711f68ce2aSTony Luck * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
2721f68ce2aSTony Luck * on this CPU. Use the algorithm recommended in the SDM to discover shared
2731f68ce2aSTony Luck * banks. Called during initial bootstrap, and also for hotplug CPU operations
2741f68ce2aSTony Luck * to rediscover/reassign machine check banks.
2751f68ce2aSTony Luck */
cmci_discover(int banks)2761f68ce2aSTony Luck static void cmci_discover(int banks)
2771f68ce2aSTony Luck {
2781f68ce2aSTony Luck int bios_wrong_thresh = 0;
2791f68ce2aSTony Luck unsigned long flags;
2801f68ce2aSTony Luck int i;
2811f68ce2aSTony Luck
2821f68ce2aSTony Luck raw_spin_lock_irqsave(&cmci_discover_lock, flags);
2831f68ce2aSTony Luck for (i = 0; i < banks; i++) {
2841f68ce2aSTony Luck u64 val;
2851f68ce2aSTony Luck int bios_zero_thresh = 0;
2861f68ce2aSTony Luck
2871f68ce2aSTony Luck if (cmci_skip_bank(i, &val))
2881f68ce2aSTony Luck continue;
2891f68ce2aSTony Luck
2901f68ce2aSTony Luck val = cmci_pick_threshold(val, &bios_zero_thresh);
2911f68ce2aSTony Luck cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh);
29221afaf18SBorislav Petkov }
29321afaf18SBorislav Petkov raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
29421afaf18SBorislav Petkov if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
29521afaf18SBorislav Petkov pr_info_once(
29621afaf18SBorislav Petkov "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
29721afaf18SBorislav Petkov pr_info_once(
29821afaf18SBorislav Petkov "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
29921afaf18SBorislav Petkov }
30021afaf18SBorislav Petkov }
30121afaf18SBorislav Petkov
30221afaf18SBorislav Petkov /*
30321afaf18SBorislav Petkov * Just in case we missed an event during initialization check
30421afaf18SBorislav Petkov * all the CMCI owned banks.
30521afaf18SBorislav Petkov */
cmci_recheck(void)30621afaf18SBorislav Petkov void cmci_recheck(void)
30721afaf18SBorislav Petkov {
30821afaf18SBorislav Petkov unsigned long flags;
30921afaf18SBorislav Petkov int banks;
31021afaf18SBorislav Petkov
31121afaf18SBorislav Petkov if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
31221afaf18SBorislav Petkov return;
31321afaf18SBorislav Petkov
31421afaf18SBorislav Petkov local_irq_save(flags);
31521afaf18SBorislav Petkov machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
31621afaf18SBorislav Petkov local_irq_restore(flags);
31721afaf18SBorislav Petkov }
31821afaf18SBorislav Petkov
31921afaf18SBorislav Petkov /* Caller must hold the lock on cmci_discover_lock */
__cmci_disable_bank(int bank)32021afaf18SBorislav Petkov static void __cmci_disable_bank(int bank)
32121afaf18SBorislav Petkov {
32221afaf18SBorislav Petkov u64 val;
32321afaf18SBorislav Petkov
32421afaf18SBorislav Petkov if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
32521afaf18SBorislav Petkov return;
32621afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
32721afaf18SBorislav Petkov val &= ~MCI_CTL2_CMCI_EN;
32821afaf18SBorislav Petkov wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
32921afaf18SBorislav Petkov __clear_bit(bank, this_cpu_ptr(mce_banks_owned));
3301f68ce2aSTony Luck
3311f68ce2aSTony Luck if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
3321f68ce2aSTony Luck cmci_storm_end(bank);
33321afaf18SBorislav Petkov }
33421afaf18SBorislav Petkov
33521afaf18SBorislav Petkov /*
33621afaf18SBorislav Petkov * Disable CMCI on this CPU for all banks it owns when it goes down.
33721afaf18SBorislav Petkov * This allows other CPUs to claim the banks on rediscovery.
33821afaf18SBorislav Petkov */
cmci_clear(void)33921afaf18SBorislav Petkov void cmci_clear(void)
34021afaf18SBorislav Petkov {
34121afaf18SBorislav Petkov unsigned long flags;
34221afaf18SBorislav Petkov int i;
34321afaf18SBorislav Petkov int banks;
34421afaf18SBorislav Petkov
34521afaf18SBorislav Petkov if (!cmci_supported(&banks))
34621afaf18SBorislav Petkov return;
34721afaf18SBorislav Petkov raw_spin_lock_irqsave(&cmci_discover_lock, flags);
34821afaf18SBorislav Petkov for (i = 0; i < banks; i++)
34921afaf18SBorislav Petkov __cmci_disable_bank(i);
35021afaf18SBorislav Petkov raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
35121afaf18SBorislav Petkov }
35221afaf18SBorislav Petkov
cmci_rediscover_work_func(void * arg)35321afaf18SBorislav Petkov static void cmci_rediscover_work_func(void *arg)
35421afaf18SBorislav Petkov {
35521afaf18SBorislav Petkov int banks;
35621afaf18SBorislav Petkov
35721afaf18SBorislav Petkov /* Recheck banks in case CPUs don't all have the same */
35821afaf18SBorislav Petkov if (cmci_supported(&banks))
35921afaf18SBorislav Petkov cmci_discover(banks);
36021afaf18SBorislav Petkov }
36121afaf18SBorislav Petkov
36221afaf18SBorislav Petkov /* After a CPU went down cycle through all the others and rediscover */
cmci_rediscover(void)36321afaf18SBorislav Petkov void cmci_rediscover(void)
36421afaf18SBorislav Petkov {
36521afaf18SBorislav Petkov int banks;
36621afaf18SBorislav Petkov
36721afaf18SBorislav Petkov if (!cmci_supported(&banks))
36821afaf18SBorislav Petkov return;
36921afaf18SBorislav Petkov
37021afaf18SBorislav Petkov on_each_cpu(cmci_rediscover_work_func, NULL, 1);
37121afaf18SBorislav Petkov }
37221afaf18SBorislav Petkov
37321afaf18SBorislav Petkov /*
37421afaf18SBorislav Petkov * Reenable CMCI on this CPU in case a CPU down failed.
37521afaf18SBorislav Petkov */
cmci_reenable(void)37621afaf18SBorislav Petkov void cmci_reenable(void)
37721afaf18SBorislav Petkov {
37821afaf18SBorislav Petkov int banks;
37921afaf18SBorislav Petkov if (cmci_supported(&banks))
38021afaf18SBorislav Petkov cmci_discover(banks);
38121afaf18SBorislav Petkov }
38221afaf18SBorislav Petkov
cmci_disable_bank(int bank)38321afaf18SBorislav Petkov void cmci_disable_bank(int bank)
38421afaf18SBorislav Petkov {
38521afaf18SBorislav Petkov int banks;
38621afaf18SBorislav Petkov unsigned long flags;
38721afaf18SBorislav Petkov
38821afaf18SBorislav Petkov if (!cmci_supported(&banks))
38921afaf18SBorislav Petkov return;
39021afaf18SBorislav Petkov
39121afaf18SBorislav Petkov raw_spin_lock_irqsave(&cmci_discover_lock, flags);
39221afaf18SBorislav Petkov __cmci_disable_bank(bank);
39321afaf18SBorislav Petkov raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
39421afaf18SBorislav Petkov }
39521afaf18SBorislav Petkov
396c3629dd7SBorislav Petkov (AMD) /* Bank polling function when CMCI is disabled. */
cmci_mc_poll_banks(void)397c3629dd7SBorislav Petkov (AMD) static void cmci_mc_poll_banks(void)
398c3629dd7SBorislav Petkov (AMD) {
399c3629dd7SBorislav Petkov (AMD) spin_lock(&cmci_poll_lock);
400c3629dd7SBorislav Petkov (AMD) machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
401c3629dd7SBorislav Petkov (AMD) spin_unlock(&cmci_poll_lock);
402c3629dd7SBorislav Petkov (AMD) }
403c3629dd7SBorislav Petkov (AMD)
intel_init_cmci(void)4045a3d56a0STony W Wang-oc void intel_init_cmci(void)
40521afaf18SBorislav Petkov {
40621afaf18SBorislav Petkov int banks;
40721afaf18SBorislav Petkov
408c3629dd7SBorislav Petkov (AMD) if (!cmci_supported(&banks)) {
409c3629dd7SBorislav Petkov (AMD) mc_poll_banks = cmci_mc_poll_banks;
41021afaf18SBorislav Petkov return;
411c3629dd7SBorislav Petkov (AMD) }
41221afaf18SBorislav Petkov
41321afaf18SBorislav Petkov mce_threshold_vector = intel_threshold_interrupt;
41421afaf18SBorislav Petkov cmci_discover(banks);
41521afaf18SBorislav Petkov /*
41621afaf18SBorislav Petkov * For CPU #0 this runs with still disabled APIC, but that's
41721afaf18SBorislav Petkov * ok because only the vector is set up. We still do another
41821afaf18SBorislav Petkov * check for the banks later for CPU #0 just to make sure
41921afaf18SBorislav Petkov * to not miss any events.
42021afaf18SBorislav Petkov */
42121afaf18SBorislav Petkov apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
42221afaf18SBorislav Petkov cmci_recheck();
42321afaf18SBorislav Petkov }
42421afaf18SBorislav Petkov
intel_init_lmce(void)42570f0c230STony W Wang-oc void intel_init_lmce(void)
42621afaf18SBorislav Petkov {
42721afaf18SBorislav Petkov u64 val;
42821afaf18SBorislav Petkov
42921afaf18SBorislav Petkov if (!lmce_supported())
43021afaf18SBorislav Petkov return;
43121afaf18SBorislav Petkov
43221afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
43321afaf18SBorislav Petkov
43421afaf18SBorislav Petkov if (!(val & MCG_EXT_CTL_LMCE_EN))
43521afaf18SBorislav Petkov wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
43621afaf18SBorislav Petkov }
43721afaf18SBorislav Petkov
intel_clear_lmce(void)43870f0c230STony W Wang-oc void intel_clear_lmce(void)
43921afaf18SBorislav Petkov {
44021afaf18SBorislav Petkov u64 val;
44121afaf18SBorislav Petkov
44221afaf18SBorislav Petkov if (!lmce_supported())
44321afaf18SBorislav Petkov return;
44421afaf18SBorislav Petkov
44521afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
44621afaf18SBorislav Petkov val &= ~MCG_EXT_CTL_LMCE_EN;
44721afaf18SBorislav Petkov wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
44821afaf18SBorislav Petkov }
44921afaf18SBorislav Petkov
45068299a42STony Luck /*
45168299a42STony Luck * Enable additional error logs from the integrated
45268299a42STony Luck * memory controller on processors that support this.
45368299a42STony Luck */
intel_imc_init(struct cpuinfo_x86 * c)45468299a42STony Luck static void intel_imc_init(struct cpuinfo_x86 *c)
45568299a42STony Luck {
45668299a42STony Luck u64 error_control;
45768299a42STony Luck
458*4a5f2dd1STony Luck switch (c->x86_vfm) {
459*4a5f2dd1STony Luck case INTEL_SANDYBRIDGE_X:
460*4a5f2dd1STony Luck case INTEL_IVYBRIDGE_X:
461*4a5f2dd1STony Luck case INTEL_HASWELL_X:
462098416e6STony Luck if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control))
463098416e6STony Luck return;
46468299a42STony Luck error_control |= 2;
465098416e6STony Luck wrmsrl_safe(MSR_ERROR_CONTROL, error_control);
46668299a42STony Luck break;
46768299a42STony Luck }
46868299a42STony Luck }
46968299a42STony Luck
mce_intel_feature_init(struct cpuinfo_x86 * c)47021afaf18SBorislav Petkov void mce_intel_feature_init(struct cpuinfo_x86 *c)
47121afaf18SBorislav Petkov {
47221afaf18SBorislav Petkov intel_init_cmci();
47321afaf18SBorislav Petkov intel_init_lmce();
47468299a42STony Luck intel_imc_init(c);
47521afaf18SBorislav Petkov }
47621afaf18SBorislav Petkov
mce_intel_feature_clear(struct cpuinfo_x86 * c)47721afaf18SBorislav Petkov void mce_intel_feature_clear(struct cpuinfo_x86 *c)
47821afaf18SBorislav Petkov {
47921afaf18SBorislav Petkov intel_clear_lmce();
48021afaf18SBorislav Petkov }
4812976908eSPrarit Bhargava
intel_filter_mce(struct mce * m)4822976908eSPrarit Bhargava bool intel_filter_mce(struct mce *m)
4832976908eSPrarit Bhargava {
4842976908eSPrarit Bhargava struct cpuinfo_x86 *c = &boot_cpu_data;
4852976908eSPrarit Bhargava
486e629fc14SDave Jones /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
487*4a5f2dd1STony Luck if ((c->x86_vfm == INTEL_HASWELL ||
488*4a5f2dd1STony Luck c->x86_vfm == INTEL_HASWELL_L ||
489*4a5f2dd1STony Luck c->x86_vfm == INTEL_BROADWELL ||
490*4a5f2dd1STony Luck c->x86_vfm == INTEL_HASWELL_G ||
491*4a5f2dd1STony Luck c->x86_vfm == INTEL_SKYLAKE_X) &&
4922976908eSPrarit Bhargava (m->bank == 0) &&
4932976908eSPrarit Bhargava ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
4942976908eSPrarit Bhargava return true;
4952976908eSPrarit Bhargava
4962976908eSPrarit Bhargava return false;
4972976908eSPrarit Bhargava }
4981bae0cfeSYazen Ghannam
4991bae0cfeSYazen Ghannam /*
5001bae0cfeSYazen Ghannam * Check if the address reported by the CPU is in a format we can parse.
5011bae0cfeSYazen Ghannam * It would be possible to add code for most other cases, but all would
5021bae0cfeSYazen Ghannam * be somewhat complicated (e.g. segment offset would require an instruction
5031bae0cfeSYazen Ghannam * parser). So only support physical addresses up to page granularity for now.
5041bae0cfeSYazen Ghannam */
intel_mce_usable_address(struct mce * m)5051bae0cfeSYazen Ghannam bool intel_mce_usable_address(struct mce *m)
5061bae0cfeSYazen Ghannam {
5071bae0cfeSYazen Ghannam if (!(m->status & MCI_STATUS_MISCV))
5081bae0cfeSYazen Ghannam return false;
5091bae0cfeSYazen Ghannam
5101bae0cfeSYazen Ghannam if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
5111bae0cfeSYazen Ghannam return false;
5121bae0cfeSYazen Ghannam
5131bae0cfeSYazen Ghannam if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
5141bae0cfeSYazen Ghannam return false;
5151bae0cfeSYazen Ghannam
5161bae0cfeSYazen Ghannam return true;
5171bae0cfeSYazen Ghannam }
518