xref: /linux/arch/x86/kernel/cpu/mce/intel.c (revision 6fdcba32711044c35c0e1b094cbd8f3f0b4472c9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Intel specific MCE features.
4  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
5  * Copyright (C) 2008, 2009 Intel Corporation
6  * Author: Andi Kleen
7  */
8 
9 #include <linux/gfp.h>
10 #include <linux/interrupt.h>
11 #include <linux/percpu.h>
12 #include <linux/sched.h>
13 #include <linux/cpumask.h>
14 #include <asm/apic.h>
15 #include <asm/cpufeature.h>
16 #include <asm/intel-family.h>
17 #include <asm/processor.h>
18 #include <asm/msr.h>
19 #include <asm/mce.h>
20 
21 #include "internal.h"
22 
23 /*
24  * Support for Intel Correct Machine Check Interrupts. This allows
25  * the CPU to raise an interrupt when a corrected machine check happened.
26  * Normally we pick those up using a regular polling timer.
27  * Also supports reliable discovery of shared banks.
28  */
29 
30 /*
31  * CMCI can be delivered to multiple cpus that share a machine check bank
32  * so we need to designate a single cpu to process errors logged in each bank
33  * in the interrupt handler (otherwise we would have many races and potential
34  * double reporting of the same error).
35  * Note that this can change when a cpu is offlined or brought online since
36  * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
37  * disables CMCI on all banks owned by the cpu and clears this bitfield. At
38  * this point, cmci_rediscover() kicks in and a different cpu may end up
39  * taking ownership of some of the shared MCA banks that were previously
40  * owned by the offlined cpu.
41  */
42 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
43 
44 /*
45  * CMCI storm detection backoff counter
46  *
47  * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
48  * encountered an error. If not, we decrement it by one. We signal the end of
49  * the CMCI storm when it reaches 0.
50  */
51 static DEFINE_PER_CPU(int, cmci_backoff_cnt);
52 
53 /*
54  * cmci_discover_lock protects against parallel discovery attempts
55  * which could race against each other.
56  */
57 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
58 
59 #define CMCI_THRESHOLD		1
60 #define CMCI_POLL_INTERVAL	(30 * HZ)
61 #define CMCI_STORM_INTERVAL	(HZ)
62 #define CMCI_STORM_THRESHOLD	15
63 
64 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
65 static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
66 static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
67 
68 enum {
69 	CMCI_STORM_NONE,
70 	CMCI_STORM_ACTIVE,
71 	CMCI_STORM_SUBSIDED,
72 };
73 
74 static atomic_t cmci_storm_on_cpus;
75 
76 static int cmci_supported(int *banks)
77 {
78 	u64 cap;
79 
80 	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
81 		return 0;
82 
83 	/*
84 	 * Vendor check is not strictly needed, but the initial
85 	 * initialization is vendor keyed and this
86 	 * makes sure none of the backdoors are entered otherwise.
87 	 */
88 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
89 	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
90 		return 0;
91 
92 	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
93 		return 0;
94 	rdmsrl(MSR_IA32_MCG_CAP, cap);
95 	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
96 	return !!(cap & MCG_CMCI_P);
97 }
98 
99 static bool lmce_supported(void)
100 {
101 	u64 tmp;
102 
103 	if (mca_cfg.lmce_disabled)
104 		return false;
105 
106 	rdmsrl(MSR_IA32_MCG_CAP, tmp);
107 
108 	/*
109 	 * LMCE depends on recovery support in the processor. Hence both
110 	 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
111 	 */
112 	if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
113 		   (MCG_SER_P | MCG_LMCE_P))
114 		return false;
115 
116 	/*
117 	 * BIOS should indicate support for LMCE by setting bit 20 in
118 	 * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
119 	 * generate a #GP fault.
120 	 */
121 	rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
122 	if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
123 		   (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
124 		return true;
125 
126 	return false;
127 }
128 
129 bool mce_intel_cmci_poll(void)
130 {
131 	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
132 		return false;
133 
134 	/*
135 	 * Reset the counter if we've logged an error in the last poll
136 	 * during the storm.
137 	 */
138 	if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)))
139 		this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
140 	else
141 		this_cpu_dec(cmci_backoff_cnt);
142 
143 	return true;
144 }
145 
146 void mce_intel_hcpu_update(unsigned long cpu)
147 {
148 	if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
149 		atomic_dec(&cmci_storm_on_cpus);
150 
151 	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
152 }
153 
154 static void cmci_toggle_interrupt_mode(bool on)
155 {
156 	unsigned long flags, *owned;
157 	int bank;
158 	u64 val;
159 
160 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
161 	owned = this_cpu_ptr(mce_banks_owned);
162 	for_each_set_bit(bank, owned, MAX_NR_BANKS) {
163 		rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
164 
165 		if (on)
166 			val |= MCI_CTL2_CMCI_EN;
167 		else
168 			val &= ~MCI_CTL2_CMCI_EN;
169 
170 		wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
171 	}
172 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
173 }
174 
175 unsigned long cmci_intel_adjust_timer(unsigned long interval)
176 {
177 	if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
178 	    (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
179 		mce_notify_irq();
180 		return CMCI_STORM_INTERVAL;
181 	}
182 
183 	switch (__this_cpu_read(cmci_storm_state)) {
184 	case CMCI_STORM_ACTIVE:
185 
186 		/*
187 		 * We switch back to interrupt mode once the poll timer has
188 		 * silenced itself. That means no events recorded and the timer
189 		 * interval is back to our poll interval.
190 		 */
191 		__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
192 		if (!atomic_sub_return(1, &cmci_storm_on_cpus))
193 			pr_notice("CMCI storm subsided: switching to interrupt mode\n");
194 
195 		/* FALLTHROUGH */
196 
197 	case CMCI_STORM_SUBSIDED:
198 		/*
199 		 * We wait for all CPUs to go back to SUBSIDED state. When that
200 		 * happens we switch back to interrupt mode.
201 		 */
202 		if (!atomic_read(&cmci_storm_on_cpus)) {
203 			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
204 			cmci_toggle_interrupt_mode(true);
205 			cmci_recheck();
206 		}
207 		return CMCI_POLL_INTERVAL;
208 	default:
209 
210 		/* We have shiny weather. Let the poll do whatever it thinks. */
211 		return interval;
212 	}
213 }
214 
215 static bool cmci_storm_detect(void)
216 {
217 	unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
218 	unsigned long ts = __this_cpu_read(cmci_time_stamp);
219 	unsigned long now = jiffies;
220 	int r;
221 
222 	if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
223 		return true;
224 
225 	if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
226 		cnt++;
227 	} else {
228 		cnt = 1;
229 		__this_cpu_write(cmci_time_stamp, now);
230 	}
231 	__this_cpu_write(cmci_storm_cnt, cnt);
232 
233 	if (cnt <= CMCI_STORM_THRESHOLD)
234 		return false;
235 
236 	cmci_toggle_interrupt_mode(false);
237 	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
238 	r = atomic_add_return(1, &cmci_storm_on_cpus);
239 	mce_timer_kick(CMCI_STORM_INTERVAL);
240 	this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
241 
242 	if (r == 1)
243 		pr_notice("CMCI storm detected: switching to poll mode\n");
244 	return true;
245 }
246 
247 /*
248  * The interrupt handler. This is called on every event.
249  * Just call the poller directly to log any events.
250  * This could in theory increase the threshold under high load,
251  * but doesn't for now.
252  */
253 static void intel_threshold_interrupt(void)
254 {
255 	if (cmci_storm_detect())
256 		return;
257 
258 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
259 }
260 
261 /*
262  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
263  * on this CPU. Use the algorithm recommended in the SDM to discover shared
264  * banks.
265  */
266 static void cmci_discover(int banks)
267 {
268 	unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
269 	unsigned long flags;
270 	int i;
271 	int bios_wrong_thresh = 0;
272 
273 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
274 	for (i = 0; i < banks; i++) {
275 		u64 val;
276 		int bios_zero_thresh = 0;
277 
278 		if (test_bit(i, owned))
279 			continue;
280 
281 		/* Skip banks in firmware first mode */
282 		if (test_bit(i, mce_banks_ce_disabled))
283 			continue;
284 
285 		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
286 
287 		/* Already owned by someone else? */
288 		if (val & MCI_CTL2_CMCI_EN) {
289 			clear_bit(i, owned);
290 			__clear_bit(i, this_cpu_ptr(mce_poll_banks));
291 			continue;
292 		}
293 
294 		if (!mca_cfg.bios_cmci_threshold) {
295 			val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
296 			val |= CMCI_THRESHOLD;
297 		} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
298 			/*
299 			 * If bios_cmci_threshold boot option was specified
300 			 * but the threshold is zero, we'll try to initialize
301 			 * it to 1.
302 			 */
303 			bios_zero_thresh = 1;
304 			val |= CMCI_THRESHOLD;
305 		}
306 
307 		val |= MCI_CTL2_CMCI_EN;
308 		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
309 		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
310 
311 		/* Did the enable bit stick? -- the bank supports CMCI */
312 		if (val & MCI_CTL2_CMCI_EN) {
313 			set_bit(i, owned);
314 			__clear_bit(i, this_cpu_ptr(mce_poll_banks));
315 			/*
316 			 * We are able to set thresholds for some banks that
317 			 * had a threshold of 0. This means the BIOS has not
318 			 * set the thresholds properly or does not work with
319 			 * this boot option. Note down now and report later.
320 			 */
321 			if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
322 					(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
323 				bios_wrong_thresh = 1;
324 		} else {
325 			WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
326 		}
327 	}
328 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
329 	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
330 		pr_info_once(
331 			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
332 		pr_info_once(
333 			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
334 	}
335 }
336 
337 /*
338  * Just in case we missed an event during initialization check
339  * all the CMCI owned banks.
340  */
341 void cmci_recheck(void)
342 {
343 	unsigned long flags;
344 	int banks;
345 
346 	if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
347 		return;
348 
349 	local_irq_save(flags);
350 	machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
351 	local_irq_restore(flags);
352 }
353 
354 /* Caller must hold the lock on cmci_discover_lock */
355 static void __cmci_disable_bank(int bank)
356 {
357 	u64 val;
358 
359 	if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
360 		return;
361 	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
362 	val &= ~MCI_CTL2_CMCI_EN;
363 	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
364 	__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
365 }
366 
367 /*
368  * Disable CMCI on this CPU for all banks it owns when it goes down.
369  * This allows other CPUs to claim the banks on rediscovery.
370  */
371 void cmci_clear(void)
372 {
373 	unsigned long flags;
374 	int i;
375 	int banks;
376 
377 	if (!cmci_supported(&banks))
378 		return;
379 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
380 	for (i = 0; i < banks; i++)
381 		__cmci_disable_bank(i);
382 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
383 }
384 
385 static void cmci_rediscover_work_func(void *arg)
386 {
387 	int banks;
388 
389 	/* Recheck banks in case CPUs don't all have the same */
390 	if (cmci_supported(&banks))
391 		cmci_discover(banks);
392 }
393 
394 /* After a CPU went down cycle through all the others and rediscover */
395 void cmci_rediscover(void)
396 {
397 	int banks;
398 
399 	if (!cmci_supported(&banks))
400 		return;
401 
402 	on_each_cpu(cmci_rediscover_work_func, NULL, 1);
403 }
404 
405 /*
406  * Reenable CMCI on this CPU in case a CPU down failed.
407  */
408 void cmci_reenable(void)
409 {
410 	int banks;
411 	if (cmci_supported(&banks))
412 		cmci_discover(banks);
413 }
414 
415 void cmci_disable_bank(int bank)
416 {
417 	int banks;
418 	unsigned long flags;
419 
420 	if (!cmci_supported(&banks))
421 		return;
422 
423 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
424 	__cmci_disable_bank(bank);
425 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
426 }
427 
428 void intel_init_cmci(void)
429 {
430 	int banks;
431 
432 	if (!cmci_supported(&banks))
433 		return;
434 
435 	mce_threshold_vector = intel_threshold_interrupt;
436 	cmci_discover(banks);
437 	/*
438 	 * For CPU #0 this runs with still disabled APIC, but that's
439 	 * ok because only the vector is set up. We still do another
440 	 * check for the banks later for CPU #0 just to make sure
441 	 * to not miss any events.
442 	 */
443 	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
444 	cmci_recheck();
445 }
446 
447 void intel_init_lmce(void)
448 {
449 	u64 val;
450 
451 	if (!lmce_supported())
452 		return;
453 
454 	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
455 
456 	if (!(val & MCG_EXT_CTL_LMCE_EN))
457 		wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
458 }
459 
460 void intel_clear_lmce(void)
461 {
462 	u64 val;
463 
464 	if (!lmce_supported())
465 		return;
466 
467 	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
468 	val &= ~MCG_EXT_CTL_LMCE_EN;
469 	wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
470 }
471 
472 static void intel_ppin_init(struct cpuinfo_x86 *c)
473 {
474 	unsigned long long val;
475 
476 	/*
477 	 * Even if testing the presence of the MSR would be enough, we don't
478 	 * want to risk the situation where other models reuse this MSR for
479 	 * other purposes.
480 	 */
481 	switch (c->x86_model) {
482 	case INTEL_FAM6_IVYBRIDGE_X:
483 	case INTEL_FAM6_HASWELL_X:
484 	case INTEL_FAM6_BROADWELL_D:
485 	case INTEL_FAM6_BROADWELL_X:
486 	case INTEL_FAM6_SKYLAKE_X:
487 	case INTEL_FAM6_ICELAKE_X:
488 	case INTEL_FAM6_XEON_PHI_KNL:
489 	case INTEL_FAM6_XEON_PHI_KNM:
490 
491 		if (rdmsrl_safe(MSR_PPIN_CTL, &val))
492 			return;
493 
494 		if ((val & 3UL) == 1UL) {
495 			/* PPIN available but disabled: */
496 			return;
497 		}
498 
499 		/* If PPIN is disabled, but not locked, try to enable: */
500 		if (!(val & 3UL)) {
501 			wrmsrl_safe(MSR_PPIN_CTL,  val | 2UL);
502 			rdmsrl_safe(MSR_PPIN_CTL, &val);
503 		}
504 
505 		if ((val & 3UL) == 2UL)
506 			set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
507 	}
508 }
509 
510 void mce_intel_feature_init(struct cpuinfo_x86 *c)
511 {
512 	intel_init_thermal(c);
513 	intel_init_cmci();
514 	intel_init_lmce();
515 	intel_ppin_init(c);
516 }
517 
518 void mce_intel_feature_clear(struct cpuinfo_x86 *c)
519 {
520 	intel_clear_lmce();
521 }
522