xref: /linux/arch/x86/kernel/cpu/mce/intel.c (revision 70ab9ec9166db90ab8980aff4f7083512ecddd1f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Intel specific MCE features.
4  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
5  * Copyright (C) 2008, 2009 Intel Corporation
6  * Author: Andi Kleen
7  */
8 
9 #include <linux/gfp.h>
10 #include <linux/interrupt.h>
11 #include <linux/percpu.h>
12 #include <linux/sched.h>
13 #include <linux/cpumask.h>
14 #include <asm/apic.h>
15 #include <asm/cpufeature.h>
16 #include <asm/intel-family.h>
17 #include <asm/processor.h>
18 #include <asm/msr.h>
19 #include <asm/mce.h>
20 
21 #include "internal.h"
22 
23 /*
24  * Support for Intel Correct Machine Check Interrupts. This allows
25  * the CPU to raise an interrupt when a corrected machine check happened.
26  * Normally we pick those up using a regular polling timer.
27  * Also supports reliable discovery of shared banks.
28  */
29 
30 /*
31  * CMCI can be delivered to multiple cpus that share a machine check bank
32  * so we need to designate a single cpu to process errors logged in each bank
33  * in the interrupt handler (otherwise we would have many races and potential
34  * double reporting of the same error).
35  * Note that this can change when a cpu is offlined or brought online since
36  * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
37  * disables CMCI on all banks owned by the cpu and clears this bitfield. At
38  * this point, cmci_rediscover() kicks in and a different cpu may end up
39  * taking ownership of some of the shared MCA banks that were previously
40  * owned by the offlined cpu.
41  */
42 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
43 
44 /*
45  * CMCI storm detection backoff counter
46  *
47  * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
48  * encountered an error. If not, we decrement it by one. We signal the end of
49  * the CMCI storm when it reaches 0.
50  */
51 static DEFINE_PER_CPU(int, cmci_backoff_cnt);
52 
53 /*
54  * cmci_discover_lock protects against parallel discovery attempts
55  * which could race against each other.
56  */
57 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
58 
59 /*
60  * On systems that do support CMCI but it's disabled, polling for MCEs can
61  * cause the same event to be reported multiple times because IA32_MCi_STATUS
62  * is shared by the same package.
63  */
64 static DEFINE_SPINLOCK(cmci_poll_lock);
65 
66 #define CMCI_THRESHOLD		1
67 #define CMCI_POLL_INTERVAL	(30 * HZ)
68 #define CMCI_STORM_INTERVAL	(HZ)
69 #define CMCI_STORM_THRESHOLD	15
70 
71 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
72 static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
73 static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
74 
75 enum {
76 	CMCI_STORM_NONE,
77 	CMCI_STORM_ACTIVE,
78 	CMCI_STORM_SUBSIDED,
79 };
80 
81 static atomic_t cmci_storm_on_cpus;
82 
83 static int cmci_supported(int *banks)
84 {
85 	u64 cap;
86 
87 	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
88 		return 0;
89 
90 	/*
91 	 * Vendor check is not strictly needed, but the initial
92 	 * initialization is vendor keyed and this
93 	 * makes sure none of the backdoors are entered otherwise.
94 	 */
95 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
96 	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
97 		return 0;
98 
99 	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
100 		return 0;
101 	rdmsrl(MSR_IA32_MCG_CAP, cap);
102 	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
103 	return !!(cap & MCG_CMCI_P);
104 }
105 
106 static bool lmce_supported(void)
107 {
108 	u64 tmp;
109 
110 	if (mca_cfg.lmce_disabled)
111 		return false;
112 
113 	rdmsrl(MSR_IA32_MCG_CAP, tmp);
114 
115 	/*
116 	 * LMCE depends on recovery support in the processor. Hence both
117 	 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
118 	 */
119 	if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
120 		   (MCG_SER_P | MCG_LMCE_P))
121 		return false;
122 
123 	/*
124 	 * BIOS should indicate support for LMCE by setting bit 20 in
125 	 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
126 	 * fault.  The MSR must also be locked for LMCE_ENABLED to take effect.
127 	 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
128 	 * locks the MSR in the event that it wasn't already locked by BIOS.
129 	 */
130 	rdmsrl(MSR_IA32_FEAT_CTL, tmp);
131 	if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
132 		return false;
133 
134 	return tmp & FEAT_CTL_LMCE_ENABLED;
135 }
136 
137 bool mce_intel_cmci_poll(void)
138 {
139 	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
140 		return false;
141 
142 	/*
143 	 * Reset the counter if we've logged an error in the last poll
144 	 * during the storm.
145 	 */
146 	if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)))
147 		this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
148 	else
149 		this_cpu_dec(cmci_backoff_cnt);
150 
151 	return true;
152 }
153 
154 void mce_intel_hcpu_update(unsigned long cpu)
155 {
156 	if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
157 		atomic_dec(&cmci_storm_on_cpus);
158 
159 	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
160 }
161 
162 static void cmci_toggle_interrupt_mode(bool on)
163 {
164 	unsigned long flags, *owned;
165 	int bank;
166 	u64 val;
167 
168 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
169 	owned = this_cpu_ptr(mce_banks_owned);
170 	for_each_set_bit(bank, owned, MAX_NR_BANKS) {
171 		rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
172 
173 		if (on)
174 			val |= MCI_CTL2_CMCI_EN;
175 		else
176 			val &= ~MCI_CTL2_CMCI_EN;
177 
178 		wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
179 	}
180 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
181 }
182 
183 unsigned long cmci_intel_adjust_timer(unsigned long interval)
184 {
185 	if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
186 	    (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
187 		mce_notify_irq();
188 		return CMCI_STORM_INTERVAL;
189 	}
190 
191 	switch (__this_cpu_read(cmci_storm_state)) {
192 	case CMCI_STORM_ACTIVE:
193 
194 		/*
195 		 * We switch back to interrupt mode once the poll timer has
196 		 * silenced itself. That means no events recorded and the timer
197 		 * interval is back to our poll interval.
198 		 */
199 		__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
200 		if (!atomic_sub_return(1, &cmci_storm_on_cpus))
201 			pr_notice("CMCI storm subsided: switching to interrupt mode\n");
202 
203 		fallthrough;
204 
205 	case CMCI_STORM_SUBSIDED:
206 		/*
207 		 * We wait for all CPUs to go back to SUBSIDED state. When that
208 		 * happens we switch back to interrupt mode.
209 		 */
210 		if (!atomic_read(&cmci_storm_on_cpus)) {
211 			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
212 			cmci_toggle_interrupt_mode(true);
213 			cmci_recheck();
214 		}
215 		return CMCI_POLL_INTERVAL;
216 	default:
217 
218 		/* We have shiny weather. Let the poll do whatever it thinks. */
219 		return interval;
220 	}
221 }
222 
223 static bool cmci_storm_detect(void)
224 {
225 	unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
226 	unsigned long ts = __this_cpu_read(cmci_time_stamp);
227 	unsigned long now = jiffies;
228 	int r;
229 
230 	if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
231 		return true;
232 
233 	if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
234 		cnt++;
235 	} else {
236 		cnt = 1;
237 		__this_cpu_write(cmci_time_stamp, now);
238 	}
239 	__this_cpu_write(cmci_storm_cnt, cnt);
240 
241 	if (cnt <= CMCI_STORM_THRESHOLD)
242 		return false;
243 
244 	cmci_toggle_interrupt_mode(false);
245 	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
246 	r = atomic_add_return(1, &cmci_storm_on_cpus);
247 	mce_timer_kick(CMCI_STORM_INTERVAL);
248 	this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
249 
250 	if (r == 1)
251 		pr_notice("CMCI storm detected: switching to poll mode\n");
252 	return true;
253 }
254 
255 /*
256  * The interrupt handler. This is called on every event.
257  * Just call the poller directly to log any events.
258  * This could in theory increase the threshold under high load,
259  * but doesn't for now.
260  */
261 static void intel_threshold_interrupt(void)
262 {
263 	if (cmci_storm_detect())
264 		return;
265 
266 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
267 }
268 
269 /*
270  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
271  * on this CPU. Use the algorithm recommended in the SDM to discover shared
272  * banks.
273  */
274 static void cmci_discover(int banks)
275 {
276 	unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
277 	unsigned long flags;
278 	int i;
279 	int bios_wrong_thresh = 0;
280 
281 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
282 	for (i = 0; i < banks; i++) {
283 		u64 val;
284 		int bios_zero_thresh = 0;
285 
286 		if (test_bit(i, owned))
287 			continue;
288 
289 		/* Skip banks in firmware first mode */
290 		if (test_bit(i, mce_banks_ce_disabled))
291 			continue;
292 
293 		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
294 
295 		/* Already owned by someone else? */
296 		if (val & MCI_CTL2_CMCI_EN) {
297 			clear_bit(i, owned);
298 			__clear_bit(i, this_cpu_ptr(mce_poll_banks));
299 			continue;
300 		}
301 
302 		if (!mca_cfg.bios_cmci_threshold) {
303 			val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
304 			val |= CMCI_THRESHOLD;
305 		} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
306 			/*
307 			 * If bios_cmci_threshold boot option was specified
308 			 * but the threshold is zero, we'll try to initialize
309 			 * it to 1.
310 			 */
311 			bios_zero_thresh = 1;
312 			val |= CMCI_THRESHOLD;
313 		}
314 
315 		val |= MCI_CTL2_CMCI_EN;
316 		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
317 		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
318 
319 		/* Did the enable bit stick? -- the bank supports CMCI */
320 		if (val & MCI_CTL2_CMCI_EN) {
321 			set_bit(i, owned);
322 			__clear_bit(i, this_cpu_ptr(mce_poll_banks));
323 			/*
324 			 * We are able to set thresholds for some banks that
325 			 * had a threshold of 0. This means the BIOS has not
326 			 * set the thresholds properly or does not work with
327 			 * this boot option. Note down now and report later.
328 			 */
329 			if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
330 					(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
331 				bios_wrong_thresh = 1;
332 		} else {
333 			WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
334 		}
335 	}
336 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
337 	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
338 		pr_info_once(
339 			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
340 		pr_info_once(
341 			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
342 	}
343 }
344 
345 /*
346  * Just in case we missed an event during initialization check
347  * all the CMCI owned banks.
348  */
349 void cmci_recheck(void)
350 {
351 	unsigned long flags;
352 	int banks;
353 
354 	if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
355 		return;
356 
357 	local_irq_save(flags);
358 	machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
359 	local_irq_restore(flags);
360 }
361 
362 /* Caller must hold the lock on cmci_discover_lock */
363 static void __cmci_disable_bank(int bank)
364 {
365 	u64 val;
366 
367 	if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
368 		return;
369 	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
370 	val &= ~MCI_CTL2_CMCI_EN;
371 	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
372 	__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
373 }
374 
375 /*
376  * Disable CMCI on this CPU for all banks it owns when it goes down.
377  * This allows other CPUs to claim the banks on rediscovery.
378  */
379 void cmci_clear(void)
380 {
381 	unsigned long flags;
382 	int i;
383 	int banks;
384 
385 	if (!cmci_supported(&banks))
386 		return;
387 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
388 	for (i = 0; i < banks; i++)
389 		__cmci_disable_bank(i);
390 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
391 }
392 
393 static void cmci_rediscover_work_func(void *arg)
394 {
395 	int banks;
396 
397 	/* Recheck banks in case CPUs don't all have the same */
398 	if (cmci_supported(&banks))
399 		cmci_discover(banks);
400 }
401 
402 /* After a CPU went down cycle through all the others and rediscover */
403 void cmci_rediscover(void)
404 {
405 	int banks;
406 
407 	if (!cmci_supported(&banks))
408 		return;
409 
410 	on_each_cpu(cmci_rediscover_work_func, NULL, 1);
411 }
412 
413 /*
414  * Reenable CMCI on this CPU in case a CPU down failed.
415  */
416 void cmci_reenable(void)
417 {
418 	int banks;
419 	if (cmci_supported(&banks))
420 		cmci_discover(banks);
421 }
422 
423 void cmci_disable_bank(int bank)
424 {
425 	int banks;
426 	unsigned long flags;
427 
428 	if (!cmci_supported(&banks))
429 		return;
430 
431 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
432 	__cmci_disable_bank(bank);
433 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
434 }
435 
436 /* Bank polling function when CMCI is disabled. */
437 static void cmci_mc_poll_banks(void)
438 {
439 	spin_lock(&cmci_poll_lock);
440 	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
441 	spin_unlock(&cmci_poll_lock);
442 }
443 
444 void intel_init_cmci(void)
445 {
446 	int banks;
447 
448 	if (!cmci_supported(&banks)) {
449 		mc_poll_banks = cmci_mc_poll_banks;
450 		return;
451 	}
452 
453 	mce_threshold_vector = intel_threshold_interrupt;
454 	cmci_discover(banks);
455 	/*
456 	 * For CPU #0 this runs with still disabled APIC, but that's
457 	 * ok because only the vector is set up. We still do another
458 	 * check for the banks later for CPU #0 just to make sure
459 	 * to not miss any events.
460 	 */
461 	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
462 	cmci_recheck();
463 }
464 
465 void intel_init_lmce(void)
466 {
467 	u64 val;
468 
469 	if (!lmce_supported())
470 		return;
471 
472 	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
473 
474 	if (!(val & MCG_EXT_CTL_LMCE_EN))
475 		wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
476 }
477 
478 void intel_clear_lmce(void)
479 {
480 	u64 val;
481 
482 	if (!lmce_supported())
483 		return;
484 
485 	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
486 	val &= ~MCG_EXT_CTL_LMCE_EN;
487 	wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
488 }
489 
490 /*
491  * Enable additional error logs from the integrated
492  * memory controller on processors that support this.
493  */
494 static void intel_imc_init(struct cpuinfo_x86 *c)
495 {
496 	u64 error_control;
497 
498 	switch (c->x86_model) {
499 	case INTEL_FAM6_SANDYBRIDGE_X:
500 	case INTEL_FAM6_IVYBRIDGE_X:
501 	case INTEL_FAM6_HASWELL_X:
502 		if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control))
503 			return;
504 		error_control |= 2;
505 		wrmsrl_safe(MSR_ERROR_CONTROL, error_control);
506 		break;
507 	}
508 }
509 
510 void mce_intel_feature_init(struct cpuinfo_x86 *c)
511 {
512 	intel_init_cmci();
513 	intel_init_lmce();
514 	intel_imc_init(c);
515 }
516 
517 void mce_intel_feature_clear(struct cpuinfo_x86 *c)
518 {
519 	intel_clear_lmce();
520 }
521 
522 bool intel_filter_mce(struct mce *m)
523 {
524 	struct cpuinfo_x86 *c = &boot_cpu_data;
525 
526 	/* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
527 	if ((c->x86 == 6) &&
528 	    ((c->x86_model == INTEL_FAM6_HASWELL) ||
529 	     (c->x86_model == INTEL_FAM6_HASWELL_L) ||
530 	     (c->x86_model == INTEL_FAM6_BROADWELL) ||
531 	     (c->x86_model == INTEL_FAM6_HASWELL_G) ||
532 	     (c->x86_model == INTEL_FAM6_SKYLAKE_X)) &&
533 	    (m->bank == 0) &&
534 	    ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
535 		return true;
536 
537 	return false;
538 }
539 
540 /*
541  * Check if the address reported by the CPU is in a format we can parse.
542  * It would be possible to add code for most other cases, but all would
543  * be somewhat complicated (e.g. segment offset would require an instruction
544  * parser). So only support physical addresses up to page granularity for now.
545  */
546 bool intel_mce_usable_address(struct mce *m)
547 {
548 	if (!(m->status & MCI_STATUS_MISCV))
549 		return false;
550 
551 	if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
552 		return false;
553 
554 	if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
555 		return false;
556 
557 	return true;
558 }
559