xref: /linux/arch/x86/kernel/cpu/mce/intel.c (revision 0b364cf53b20204e92bac7c6ebd1ee7d3ec62931)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Intel specific MCE features.
4  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
5  * Copyright (C) 2008, 2009 Intel Corporation
6  * Author: Andi Kleen
7  */
8 
9 #include <linux/gfp.h>
10 #include <linux/interrupt.h>
11 #include <linux/percpu.h>
12 #include <linux/sched.h>
13 #include <linux/cpumask.h>
14 #include <asm/apic.h>
15 #include <asm/cpufeature.h>
16 #include <asm/cpu_device_id.h>
17 #include <asm/processor.h>
18 #include <asm/msr.h>
19 #include <asm/mce.h>
20 
21 #include "internal.h"
22 
23 /*
24  * Support for Intel Correct Machine Check Interrupts. This allows
25  * the CPU to raise an interrupt when a corrected machine check happened.
26  * Normally we pick those up using a regular polling timer.
27  * Also supports reliable discovery of shared banks.
28  */
29 
30 /*
31  * CMCI can be delivered to multiple cpus that share a machine check bank
32  * so we need to designate a single cpu to process errors logged in each bank
33  * in the interrupt handler (otherwise we would have many races and potential
34  * double reporting of the same error).
35  * Note that this can change when a cpu is offlined or brought online since
36  * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
37  * disables CMCI on all banks owned by the cpu and clears this bitfield. At
38  * this point, cmci_rediscover() kicks in and a different cpu may end up
39  * taking ownership of some of the shared MCA banks that were previously
40  * owned by the offlined cpu.
41  */
42 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
43 
44 /*
45  * cmci_discover_lock protects against parallel discovery attempts
46  * which could race against each other.
47  */
48 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
49 
50 /*
51  * On systems that do support CMCI but it's disabled, polling for MCEs can
52  * cause the same event to be reported multiple times because IA32_MCi_STATUS
53  * is shared by the same package.
54  */
55 static DEFINE_SPINLOCK(cmci_poll_lock);
56 
57 /* Linux non-storm CMCI threshold (may be overridden by BIOS) */
58 #define CMCI_THRESHOLD		1
59 
60 /*
61  * MCi_CTL2 threshold for each bank when there is no storm.
62  * Default value for each bank may have been set by BIOS.
63  */
64 static u16 cmci_threshold[MAX_NR_BANKS];
65 
66 /*
67  * High threshold to limit CMCI rate during storms. Max supported is
68  * 0x7FFF. Use this slightly smaller value so it has a distinctive
69  * signature when some asks "Why am I not seeing all corrected errors?"
70  * A high threshold is used instead of just disabling CMCI for a
71  * bank because both corrected and uncorrected errors may be logged
72  * in the same bank and signalled with CMCI. The threshold only applies
73  * to corrected errors, so keeping CMCI enabled means that uncorrected
74  * errors will still be processed in a timely fashion.
75  */
76 #define CMCI_STORM_THRESHOLD	32749
77 
78 static int cmci_supported(int *banks)
79 {
80 	u64 cap;
81 
82 	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
83 		return 0;
84 
85 	/*
86 	 * Vendor check is not strictly needed, but the initial
87 	 * initialization is vendor keyed and this
88 	 * makes sure none of the backdoors are entered otherwise.
89 	 */
90 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
91 	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
92 		return 0;
93 
94 	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
95 		return 0;
96 	rdmsrl(MSR_IA32_MCG_CAP, cap);
97 	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
98 	return !!(cap & MCG_CMCI_P);
99 }
100 
101 static bool lmce_supported(void)
102 {
103 	u64 tmp;
104 
105 	if (mca_cfg.lmce_disabled)
106 		return false;
107 
108 	rdmsrl(MSR_IA32_MCG_CAP, tmp);
109 
110 	/*
111 	 * LMCE depends on recovery support in the processor. Hence both
112 	 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
113 	 */
114 	if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
115 		   (MCG_SER_P | MCG_LMCE_P))
116 		return false;
117 
118 	/*
119 	 * BIOS should indicate support for LMCE by setting bit 20 in
120 	 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
121 	 * fault.  The MSR must also be locked for LMCE_ENABLED to take effect.
122 	 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
123 	 * locks the MSR in the event that it wasn't already locked by BIOS.
124 	 */
125 	rdmsrl(MSR_IA32_FEAT_CTL, tmp);
126 	if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
127 		return false;
128 
129 	return tmp & FEAT_CTL_LMCE_ENABLED;
130 }
131 
132 /*
133  * Set a new CMCI threshold value. Preserve the state of the
134  * MCI_CTL2_CMCI_EN bit in case this happens during a
135  * cmci_rediscover() operation.
136  */
137 static void cmci_set_threshold(int bank, int thresh)
138 {
139 	unsigned long flags;
140 	u64 val;
141 
142 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
143 	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
144 	val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
145 	wrmsrl(MSR_IA32_MCx_CTL2(bank), val | thresh);
146 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
147 }
148 
149 void mce_intel_handle_storm(int bank, bool on)
150 {
151 	if (on)
152 		cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
153 	else
154 		cmci_set_threshold(bank, cmci_threshold[bank]);
155 }
156 
157 /*
158  * The interrupt handler. This is called on every event.
159  * Just call the poller directly to log any events.
160  * This could in theory increase the threshold under high load,
161  * but doesn't for now.
162  */
163 static void intel_threshold_interrupt(void)
164 {
165 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
166 }
167 
168 /*
169  * Check all the reasons why current CPU cannot claim
170  * ownership of a bank.
171  * 1: CPU already owns this bank
172  * 2: BIOS owns this bank
173  * 3: Some other CPU owns this bank
174  */
175 static bool cmci_skip_bank(int bank, u64 *val)
176 {
177 	unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
178 
179 	if (test_bit(bank, owned))
180 		return true;
181 
182 	/* Skip banks in firmware first mode */
183 	if (test_bit(bank, mce_banks_ce_disabled))
184 		return true;
185 
186 	rdmsrl(MSR_IA32_MCx_CTL2(bank), *val);
187 
188 	/* Already owned by someone else? */
189 	if (*val & MCI_CTL2_CMCI_EN) {
190 		clear_bit(bank, owned);
191 		__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
192 		return true;
193 	}
194 
195 	return false;
196 }
197 
198 /*
199  * Decide which CMCI interrupt threshold to use:
200  * 1: If this bank is in storm mode from whichever CPU was
201  *    the previous owner, stay in storm mode.
202  * 2: If ignoring any threshold set by BIOS, set Linux default
203  * 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero).
204  */
205 static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh)
206 {
207 	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
208 		return val;
209 
210 	if (!mca_cfg.bios_cmci_threshold) {
211 		val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
212 		val |= CMCI_THRESHOLD;
213 	} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
214 		/*
215 		 * If bios_cmci_threshold boot option was specified
216 		 * but the threshold is zero, we'll try to initialize
217 		 * it to 1.
218 		 */
219 		*bios_zero_thresh = 1;
220 		val |= CMCI_THRESHOLD;
221 	}
222 
223 	return val;
224 }
225 
226 /*
227  * Try to claim ownership of a bank.
228  */
229 static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh)
230 {
231 	struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
232 
233 	val |= MCI_CTL2_CMCI_EN;
234 	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
235 	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
236 
237 	/* If the enable bit did not stick, this bank should be polled. */
238 	if (!(val & MCI_CTL2_CMCI_EN)) {
239 		WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks)));
240 		storm->banks[bank].poll_only = true;
241 		return;
242 	}
243 
244 	/* This CPU successfully set the enable bit. */
245 	set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned));
246 
247 	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) {
248 		pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank);
249 		mce_inherit_storm(bank);
250 		cmci_storm_begin(bank);
251 	} else {
252 		__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
253 	}
254 
255 	/*
256 	 * We are able to set thresholds for some banks that
257 	 * had a threshold of 0. This means the BIOS has not
258 	 * set the thresholds properly or does not work with
259 	 * this boot option. Note down now and report later.
260 	 */
261 	if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
262 	    (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
263 		*bios_wrong_thresh = 1;
264 
265 	/* Save default threshold for each bank */
266 	if (cmci_threshold[bank] == 0)
267 		cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK;
268 }
269 
270 /*
271  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
272  * on this CPU. Use the algorithm recommended in the SDM to discover shared
273  * banks. Called during initial bootstrap, and also for hotplug CPU operations
274  * to rediscover/reassign machine check banks.
275  */
276 static void cmci_discover(int banks)
277 {
278 	int bios_wrong_thresh = 0;
279 	unsigned long flags;
280 	int i;
281 
282 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
283 	for (i = 0; i < banks; i++) {
284 		u64 val;
285 		int bios_zero_thresh = 0;
286 
287 		if (cmci_skip_bank(i, &val))
288 			continue;
289 
290 		val = cmci_pick_threshold(val, &bios_zero_thresh);
291 		cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh);
292 	}
293 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
294 	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
295 		pr_info_once(
296 			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
297 		pr_info_once(
298 			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
299 	}
300 }
301 
302 /*
303  * Just in case we missed an event during initialization check
304  * all the CMCI owned banks.
305  */
306 void cmci_recheck(void)
307 {
308 	unsigned long flags;
309 	int banks;
310 
311 	if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
312 		return;
313 
314 	local_irq_save(flags);
315 	machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
316 	local_irq_restore(flags);
317 }
318 
319 /* Caller must hold the lock on cmci_discover_lock */
320 static void __cmci_disable_bank(int bank)
321 {
322 	u64 val;
323 
324 	if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
325 		return;
326 	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
327 	val &= ~MCI_CTL2_CMCI_EN;
328 	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
329 	__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
330 
331 	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
332 		cmci_storm_end(bank);
333 }
334 
335 /*
336  * Disable CMCI on this CPU for all banks it owns when it goes down.
337  * This allows other CPUs to claim the banks on rediscovery.
338  */
339 void cmci_clear(void)
340 {
341 	unsigned long flags;
342 	int i;
343 	int banks;
344 
345 	if (!cmci_supported(&banks))
346 		return;
347 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
348 	for (i = 0; i < banks; i++)
349 		__cmci_disable_bank(i);
350 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
351 }
352 
353 static void cmci_rediscover_work_func(void *arg)
354 {
355 	int banks;
356 
357 	/* Recheck banks in case CPUs don't all have the same */
358 	if (cmci_supported(&banks))
359 		cmci_discover(banks);
360 }
361 
362 /* After a CPU went down cycle through all the others and rediscover */
363 void cmci_rediscover(void)
364 {
365 	int banks;
366 
367 	if (!cmci_supported(&banks))
368 		return;
369 
370 	on_each_cpu(cmci_rediscover_work_func, NULL, 1);
371 }
372 
373 /*
374  * Reenable CMCI on this CPU in case a CPU down failed.
375  */
376 void cmci_reenable(void)
377 {
378 	int banks;
379 	if (cmci_supported(&banks))
380 		cmci_discover(banks);
381 }
382 
383 void cmci_disable_bank(int bank)
384 {
385 	int banks;
386 	unsigned long flags;
387 
388 	if (!cmci_supported(&banks))
389 		return;
390 
391 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
392 	__cmci_disable_bank(bank);
393 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
394 }
395 
396 /* Bank polling function when CMCI is disabled. */
397 static void cmci_mc_poll_banks(void)
398 {
399 	spin_lock(&cmci_poll_lock);
400 	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
401 	spin_unlock(&cmci_poll_lock);
402 }
403 
404 void intel_init_cmci(void)
405 {
406 	int banks;
407 
408 	if (!cmci_supported(&banks)) {
409 		mc_poll_banks = cmci_mc_poll_banks;
410 		return;
411 	}
412 
413 	mce_threshold_vector = intel_threshold_interrupt;
414 	cmci_discover(banks);
415 	/*
416 	 * For CPU #0 this runs with still disabled APIC, but that's
417 	 * ok because only the vector is set up. We still do another
418 	 * check for the banks later for CPU #0 just to make sure
419 	 * to not miss any events.
420 	 */
421 	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
422 	cmci_recheck();
423 }
424 
425 void intel_init_lmce(void)
426 {
427 	u64 val;
428 
429 	if (!lmce_supported())
430 		return;
431 
432 	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
433 
434 	if (!(val & MCG_EXT_CTL_LMCE_EN))
435 		wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
436 }
437 
438 void intel_clear_lmce(void)
439 {
440 	u64 val;
441 
442 	if (!lmce_supported())
443 		return;
444 
445 	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
446 	val &= ~MCG_EXT_CTL_LMCE_EN;
447 	wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
448 }
449 
450 /*
451  * Enable additional error logs from the integrated
452  * memory controller on processors that support this.
453  */
454 static void intel_imc_init(struct cpuinfo_x86 *c)
455 {
456 	u64 error_control;
457 
458 	switch (c->x86_vfm) {
459 	case INTEL_SANDYBRIDGE_X:
460 	case INTEL_IVYBRIDGE_X:
461 	case INTEL_HASWELL_X:
462 		if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control))
463 			return;
464 		error_control |= 2;
465 		wrmsrl_safe(MSR_ERROR_CONTROL, error_control);
466 		break;
467 	}
468 }
469 
470 void mce_intel_feature_init(struct cpuinfo_x86 *c)
471 {
472 	intel_init_cmci();
473 	intel_init_lmce();
474 	intel_imc_init(c);
475 }
476 
477 void mce_intel_feature_clear(struct cpuinfo_x86 *c)
478 {
479 	intel_clear_lmce();
480 }
481 
482 bool intel_filter_mce(struct mce *m)
483 {
484 	struct cpuinfo_x86 *c = &boot_cpu_data;
485 
486 	/* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
487 	if ((c->x86_vfm == INTEL_HASWELL ||
488 	     c->x86_vfm == INTEL_HASWELL_L ||
489 	     c->x86_vfm == INTEL_BROADWELL ||
490 	     c->x86_vfm == INTEL_HASWELL_G ||
491 	     c->x86_vfm == INTEL_SKYLAKE_X) &&
492 	    (m->bank == 0) &&
493 	    ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
494 		return true;
495 
496 	return false;
497 }
498 
499 /*
500  * Check if the address reported by the CPU is in a format we can parse.
501  * It would be possible to add code for most other cases, but all would
502  * be somewhat complicated (e.g. segment offset would require an instruction
503  * parser). So only support physical addresses up to page granularity for now.
504  */
505 bool intel_mce_usable_address(struct mce *m)
506 {
507 	if (!(m->status & MCI_STATUS_MISCV))
508 		return false;
509 
510 	if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
511 		return false;
512 
513 	if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
514 		return false;
515 
516 	return true;
517 }
518