xref: /linux/arch/x86/kernel/cpu/mce/intel.c (revision 86941382508850d58c11bdafe0fec646dfd31b09)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Intel specific MCE features.
4  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
5  * Copyright (C) 2008, 2009 Intel Corporation
6  * Author: Andi Kleen
7  */
8 
9 #include <linux/gfp.h>
10 #include <linux/interrupt.h>
11 #include <linux/percpu.h>
12 #include <linux/sched.h>
13 #include <linux/cpumask.h>
14 #include <asm/apic.h>
15 #include <asm/cpufeature.h>
16 #include <asm/cpu_device_id.h>
17 #include <asm/processor.h>
18 #include <asm/msr.h>
19 #include <asm/mce.h>
20 
21 #include "internal.h"
22 
23 /*
24  * Support for Intel Correct Machine Check Interrupts. This allows
25  * the CPU to raise an interrupt when a corrected machine check happened.
26  * Normally we pick those up using a regular polling timer.
27  * Also supports reliable discovery of shared banks.
28  */
29 
30 /*
31  * CMCI can be delivered to multiple cpus that share a machine check bank
32  * so we need to designate a single cpu to process errors logged in each bank
33  * in the interrupt handler (otherwise we would have many races and potential
34  * double reporting of the same error).
35  * Note that this can change when a cpu is offlined or brought online since
36  * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
37  * disables CMCI on all banks owned by the cpu and clears this bitfield. At
38  * this point, cmci_rediscover() kicks in and a different cpu may end up
39  * taking ownership of some of the shared MCA banks that were previously
40  * owned by the offlined cpu.
41  */
42 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
43 
44 /*
45  * cmci_discover_lock protects against parallel discovery attempts
46  * which could race against each other.
47  */
48 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
49 
50 /*
51  * On systems that do support CMCI but it's disabled, polling for MCEs can
52  * cause the same event to be reported multiple times because IA32_MCi_STATUS
53  * is shared by the same package.
54  */
55 static DEFINE_SPINLOCK(cmci_poll_lock);
56 
57 /* Linux non-storm CMCI threshold (may be overridden by BIOS) */
58 #define CMCI_THRESHOLD		1
59 
60 /*
61  * MCi_CTL2 threshold for each bank when there is no storm.
62  * Default value for each bank may have been set by BIOS.
63  */
64 static u16 cmci_threshold[MAX_NR_BANKS];
65 
66 /*
67  * High threshold to limit CMCI rate during storms. Max supported is
68  * 0x7FFF. Use this slightly smaller value so it has a distinctive
69  * signature when some asks "Why am I not seeing all corrected errors?"
70  * A high threshold is used instead of just disabling CMCI for a
71  * bank because both corrected and uncorrected errors may be logged
72  * in the same bank and signalled with CMCI. The threshold only applies
73  * to corrected errors, so keeping CMCI enabled means that uncorrected
74  * errors will still be processed in a timely fashion.
75  */
76 #define CMCI_STORM_THRESHOLD	32749
77 
78 static bool cmci_supported(int *banks)
79 {
80 	u64 cap;
81 
82 	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
83 		return false;
84 
85 	/*
86 	 * Vendor check is not strictly needed, but the initial
87 	 * initialization is vendor keyed and this
88 	 * makes sure none of the backdoors are entered otherwise.
89 	 */
90 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
91 	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
92 		return false;
93 
94 	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
95 		return false;
96 
97 	rdmsrq(MSR_IA32_MCG_CAP, cap);
98 	*banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK);
99 	return !!(cap & MCG_CMCI_P);
100 }
101 
102 static bool lmce_supported(void)
103 {
104 	u64 tmp;
105 
106 	if (mca_cfg.lmce_disabled)
107 		return false;
108 
109 	rdmsrq(MSR_IA32_MCG_CAP, tmp);
110 
111 	/*
112 	 * LMCE depends on recovery support in the processor. Hence both
113 	 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
114 	 */
115 	if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
116 		   (MCG_SER_P | MCG_LMCE_P))
117 		return false;
118 
119 	/*
120 	 * BIOS should indicate support for LMCE by setting bit 20 in
121 	 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
122 	 * fault.  The MSR must also be locked for LMCE_ENABLED to take effect.
123 	 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
124 	 * locks the MSR in the event that it wasn't already locked by BIOS.
125 	 */
126 	rdmsrq(MSR_IA32_FEAT_CTL, tmp);
127 	if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
128 		return false;
129 
130 	return tmp & FEAT_CTL_LMCE_ENABLED;
131 }
132 
133 /*
134  * Set a new CMCI threshold value. Preserve the state of the
135  * MCI_CTL2_CMCI_EN bit in case this happens during a
136  * cmci_rediscover() operation.
137  */
138 static void cmci_set_threshold(int bank, int thresh)
139 {
140 	unsigned long flags;
141 	u64 val;
142 
143 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
144 	rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
145 	val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
146 	wrmsrq(MSR_IA32_MCx_CTL2(bank), val | thresh);
147 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
148 }
149 
150 void mce_intel_handle_storm(int bank, bool on)
151 {
152 	if (on)
153 		cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
154 	else
155 		cmci_set_threshold(bank, cmci_threshold[bank]);
156 }
157 
158 /*
159  * The interrupt handler. This is called on every event.
160  * Just call the poller directly to log any events.
161  * This could in theory increase the threshold under high load,
162  * but doesn't for now.
163  */
164 static void intel_threshold_interrupt(void)
165 {
166 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
167 }
168 
169 /*
170  * Check all the reasons why current CPU cannot claim
171  * ownership of a bank.
172  * 1: CPU already owns this bank
173  * 2: BIOS owns this bank
174  * 3: Some other CPU owns this bank
175  */
176 static bool cmci_skip_bank(int bank, u64 *val)
177 {
178 	unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
179 
180 	if (test_bit(bank, owned))
181 		return true;
182 
183 	/* Skip banks in firmware first mode */
184 	if (test_bit(bank, mce_banks_ce_disabled))
185 		return true;
186 
187 	rdmsrq(MSR_IA32_MCx_CTL2(bank), *val);
188 
189 	/* Already owned by someone else? */
190 	if (*val & MCI_CTL2_CMCI_EN) {
191 		clear_bit(bank, owned);
192 		__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
193 		return true;
194 	}
195 
196 	return false;
197 }
198 
199 /*
200  * Decide which CMCI interrupt threshold to use:
201  * 1: If this bank is in storm mode from whichever CPU was
202  *    the previous owner, stay in storm mode.
203  * 2: If ignoring any threshold set by BIOS, set Linux default
204  * 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero).
205  */
206 static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh)
207 {
208 	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
209 		return val;
210 
211 	if (!mca_cfg.bios_cmci_threshold) {
212 		val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
213 		val |= CMCI_THRESHOLD;
214 	} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
215 		/*
216 		 * If bios_cmci_threshold boot option was specified
217 		 * but the threshold is zero, we'll try to initialize
218 		 * it to 1.
219 		 */
220 		*bios_zero_thresh = 1;
221 		val |= CMCI_THRESHOLD;
222 	}
223 
224 	return val;
225 }
226 
227 /*
228  * Try to claim ownership of a bank.
229  */
230 static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh)
231 {
232 	struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
233 
234 	val |= MCI_CTL2_CMCI_EN;
235 	wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
236 	rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
237 
238 	/* If the enable bit did not stick, this bank should be polled. */
239 	if (!(val & MCI_CTL2_CMCI_EN)) {
240 		WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks)));
241 		storm->banks[bank].poll_only = true;
242 		return;
243 	}
244 
245 	/* This CPU successfully set the enable bit. */
246 	set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned));
247 
248 	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) {
249 		pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank);
250 		mce_inherit_storm(bank);
251 		cmci_storm_begin(bank);
252 	} else {
253 		__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
254 	}
255 
256 	/*
257 	 * We are able to set thresholds for some banks that
258 	 * had a threshold of 0. This means the BIOS has not
259 	 * set the thresholds properly or does not work with
260 	 * this boot option. Note down now and report later.
261 	 */
262 	if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
263 	    (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
264 		*bios_wrong_thresh = 1;
265 
266 	/* Save default threshold for each bank */
267 	if (cmci_threshold[bank] == 0)
268 		cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK;
269 }
270 
271 /*
272  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
273  * on this CPU. Use the algorithm recommended in the SDM to discover shared
274  * banks. Called during initial bootstrap, and also for hotplug CPU operations
275  * to rediscover/reassign machine check banks.
276  */
277 static void cmci_discover(int banks)
278 {
279 	int bios_wrong_thresh = 0;
280 	unsigned long flags;
281 	int i;
282 
283 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
284 	for (i = 0; i < banks; i++) {
285 		u64 val;
286 		int bios_zero_thresh = 0;
287 
288 		if (cmci_skip_bank(i, &val))
289 			continue;
290 
291 		val = cmci_pick_threshold(val, &bios_zero_thresh);
292 		cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh);
293 	}
294 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
295 	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
296 		pr_info_once(
297 			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
298 		pr_info_once(
299 			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
300 	}
301 }
302 
303 /*
304  * Just in case we missed an event during initialization check
305  * all the CMCI owned banks.
306  */
307 void cmci_recheck(void)
308 {
309 	unsigned long flags;
310 	int banks;
311 
312 	if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
313 		return;
314 
315 	local_irq_save(flags);
316 	machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
317 	local_irq_restore(flags);
318 }
319 
320 /* Caller must hold the lock on cmci_discover_lock */
321 static void __cmci_disable_bank(int bank)
322 {
323 	u64 val;
324 
325 	if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
326 		return;
327 	rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
328 	val &= ~MCI_CTL2_CMCI_EN;
329 	wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
330 	__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
331 
332 	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
333 		cmci_storm_end(bank);
334 }
335 
336 /*
337  * Disable CMCI on this CPU for all banks it owns when it goes down.
338  * This allows other CPUs to claim the banks on rediscovery.
339  */
340 void cmci_clear(void)
341 {
342 	unsigned long flags;
343 	int i;
344 	int banks;
345 
346 	if (!cmci_supported(&banks))
347 		return;
348 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
349 	for (i = 0; i < banks; i++)
350 		__cmci_disable_bank(i);
351 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
352 }
353 
354 static void cmci_rediscover_work_func(void *arg)
355 {
356 	int banks;
357 
358 	/* Recheck banks in case CPUs don't all have the same */
359 	if (cmci_supported(&banks))
360 		cmci_discover(banks);
361 }
362 
363 /* After a CPU went down cycle through all the others and rediscover */
364 void cmci_rediscover(void)
365 {
366 	int banks;
367 
368 	if (!cmci_supported(&banks))
369 		return;
370 
371 	on_each_cpu(cmci_rediscover_work_func, NULL, 1);
372 }
373 
374 /*
375  * Reenable CMCI on this CPU in case a CPU down failed.
376  */
377 void cmci_reenable(void)
378 {
379 	int banks;
380 	if (cmci_supported(&banks))
381 		cmci_discover(banks);
382 }
383 
384 void cmci_disable_bank(int bank)
385 {
386 	int banks;
387 	unsigned long flags;
388 
389 	if (!cmci_supported(&banks))
390 		return;
391 
392 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
393 	__cmci_disable_bank(bank);
394 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
395 }
396 
397 /* Bank polling function when CMCI is disabled. */
398 static void cmci_mc_poll_banks(void)
399 {
400 	spin_lock(&cmci_poll_lock);
401 	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
402 	spin_unlock(&cmci_poll_lock);
403 }
404 
405 void intel_init_cmci(void)
406 {
407 	int banks;
408 
409 	if (!cmci_supported(&banks)) {
410 		mc_poll_banks = cmci_mc_poll_banks;
411 		return;
412 	}
413 
414 	mce_threshold_vector = intel_threshold_interrupt;
415 	cmci_discover(banks);
416 	/*
417 	 * For CPU #0 this runs with still disabled APIC, but that's
418 	 * ok because only the vector is set up. We still do another
419 	 * check for the banks later for CPU #0 just to make sure
420 	 * to not miss any events.
421 	 */
422 	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
423 	cmci_recheck();
424 }
425 
426 void intel_init_lmce(void)
427 {
428 	u64 val;
429 
430 	if (!lmce_supported())
431 		return;
432 
433 	rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
434 
435 	if (!(val & MCG_EXT_CTL_LMCE_EN))
436 		wrmsrq(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
437 }
438 
439 void intel_clear_lmce(void)
440 {
441 	u64 val;
442 
443 	if (!lmce_supported())
444 		return;
445 
446 	rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
447 	val &= ~MCG_EXT_CTL_LMCE_EN;
448 	wrmsrq(MSR_IA32_MCG_EXT_CTL, val);
449 }
450 
451 /*
452  * Enable additional error logs from the integrated
453  * memory controller on processors that support this.
454  */
455 static void intel_imc_init(struct cpuinfo_x86 *c)
456 {
457 	u64 error_control;
458 
459 	switch (c->x86_vfm) {
460 	case INTEL_SANDYBRIDGE_X:
461 	case INTEL_IVYBRIDGE_X:
462 	case INTEL_HASWELL_X:
463 		if (rdmsrq_safe(MSR_ERROR_CONTROL, &error_control))
464 			return;
465 		error_control |= 2;
466 		wrmsrq_safe(MSR_ERROR_CONTROL, error_control);
467 		break;
468 	}
469 }
470 
471 void mce_intel_feature_init(struct cpuinfo_x86 *c)
472 {
473 	intel_init_cmci();
474 	intel_init_lmce();
475 	intel_imc_init(c);
476 }
477 
478 void mce_intel_feature_clear(struct cpuinfo_x86 *c)
479 {
480 	intel_clear_lmce();
481 	cmci_clear();
482 }
483 
484 bool intel_filter_mce(struct mce *m)
485 {
486 	struct cpuinfo_x86 *c = &boot_cpu_data;
487 
488 	/* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
489 	if ((c->x86_vfm == INTEL_HASWELL ||
490 	     c->x86_vfm == INTEL_HASWELL_L ||
491 	     c->x86_vfm == INTEL_BROADWELL ||
492 	     c->x86_vfm == INTEL_HASWELL_G ||
493 	     c->x86_vfm == INTEL_SKYLAKE_X) &&
494 	    (m->bank == 0) &&
495 	    ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
496 		return true;
497 
498 	return false;
499 }
500 
501 /*
502  * Check if the address reported by the CPU is in a format we can parse.
503  * It would be possible to add code for most other cases, but all would
504  * be somewhat complicated (e.g. segment offset would require an instruction
505  * parser). So only support physical addresses up to page granularity for now.
506  */
507 bool intel_mce_usable_address(struct mce *m)
508 {
509 	if (!(m->status & MCI_STATUS_MISCV))
510 		return false;
511 
512 	if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
513 		return false;
514 
515 	if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
516 		return false;
517 
518 	return true;
519 }
520