xref: /linux/arch/powerpc/kernel/mce.c (revision c05772018491e5294f55d63b239ab0d532e96616)
1 /*
2  * Machine check exception handling.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21 
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce: " fmt
24 
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
31 
32 #include <asm/machdep.h>
33 #include <asm/mce.h>
34 
35 static DEFINE_PER_CPU(int, mce_nest_count);
36 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
37 
38 /* Queue for delayed MCE events. */
39 static DEFINE_PER_CPU(int, mce_queue_count);
40 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41 
42 /* Queue for delayed MCE UE events. */
43 static DEFINE_PER_CPU(int, mce_ue_count);
44 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
45 					mce_ue_event_queue);
46 
47 static void machine_check_process_queued_event(struct irq_work *work);
48 void machine_check_ue_event(struct machine_check_event *evt);
49 static void machine_process_ue_event(struct work_struct *work);
50 
51 static struct irq_work mce_event_process_work = {
52         .func = machine_check_process_queued_event,
53 };
54 
55 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
56 
57 static void mce_set_error_info(struct machine_check_event *mce,
58 			       struct mce_error_info *mce_err)
59 {
60 	mce->error_type = mce_err->error_type;
61 	switch (mce_err->error_type) {
62 	case MCE_ERROR_TYPE_UE:
63 		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
64 		break;
65 	case MCE_ERROR_TYPE_SLB:
66 		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
67 		break;
68 	case MCE_ERROR_TYPE_ERAT:
69 		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
70 		break;
71 	case MCE_ERROR_TYPE_TLB:
72 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
73 		break;
74 	case MCE_ERROR_TYPE_USER:
75 		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
76 		break;
77 	case MCE_ERROR_TYPE_RA:
78 		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
79 		break;
80 	case MCE_ERROR_TYPE_LINK:
81 		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
82 		break;
83 	case MCE_ERROR_TYPE_UNKNOWN:
84 	default:
85 		break;
86 	}
87 }
88 
89 /*
90  * Decode and save high level MCE information into per cpu buffer which
91  * is an array of machine_check_event structure.
92  */
93 void save_mce_event(struct pt_regs *regs, long handled,
94 		    struct mce_error_info *mce_err,
95 		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
96 {
97 	int index = __this_cpu_inc_return(mce_nest_count) - 1;
98 	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
99 
100 	/*
101 	 * Return if we don't have enough space to log mce event.
102 	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 	 * the check below will stop buffer overrun.
104 	 */
105 	if (index >= MAX_MC_EVT)
106 		return;
107 
108 	/* Populate generic machine check info */
109 	mce->version = MCE_V1;
110 	mce->srr0 = nip;
111 	mce->srr1 = regs->msr;
112 	mce->gpr3 = regs->gpr[3];
113 	mce->in_use = 1;
114 
115 	/* Mark it recovered if we have handled it and MSR(RI=1). */
116 	if (handled && (regs->msr & MSR_RI))
117 		mce->disposition = MCE_DISPOSITION_RECOVERED;
118 	else
119 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
120 
121 	mce->initiator = mce_err->initiator;
122 	mce->severity = mce_err->severity;
123 
124 	/*
125 	 * Populate the mce error_type and type-specific error_type.
126 	 */
127 	mce_set_error_info(mce, mce_err);
128 
129 	if (!addr)
130 		return;
131 
132 	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
133 		mce->u.tlb_error.effective_address_provided = true;
134 		mce->u.tlb_error.effective_address = addr;
135 	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
136 		mce->u.slb_error.effective_address_provided = true;
137 		mce->u.slb_error.effective_address = addr;
138 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
139 		mce->u.erat_error.effective_address_provided = true;
140 		mce->u.erat_error.effective_address = addr;
141 	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
142 		mce->u.user_error.effective_address_provided = true;
143 		mce->u.user_error.effective_address = addr;
144 	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
145 		mce->u.ra_error.effective_address_provided = true;
146 		mce->u.ra_error.effective_address = addr;
147 	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
148 		mce->u.link_error.effective_address_provided = true;
149 		mce->u.link_error.effective_address = addr;
150 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
151 		mce->u.ue_error.effective_address_provided = true;
152 		mce->u.ue_error.effective_address = addr;
153 		if (phys_addr != ULONG_MAX) {
154 			mce->u.ue_error.physical_address_provided = true;
155 			mce->u.ue_error.physical_address = phys_addr;
156 			machine_check_ue_event(mce);
157 		}
158 	}
159 	return;
160 }
161 
162 /*
163  * get_mce_event:
164  *	mce	Pointer to machine_check_event structure to be filled.
165  *	release Flag to indicate whether to free the event slot or not.
166  *		0 <= do not release the mce event. Caller will invoke
167  *		     release_mce_event() once event has been consumed.
168  *		1 <= release the slot.
169  *
170  *	return	1 = success
171  *		0 = failure
172  *
173  * get_mce_event() will be called by platform specific machine check
174  * handle routine and in KVM.
175  * When we call get_mce_event(), we are still in interrupt context and
176  * preemption will not be scheduled until ret_from_expect() routine
177  * is called.
178  */
179 int get_mce_event(struct machine_check_event *mce, bool release)
180 {
181 	int index = __this_cpu_read(mce_nest_count) - 1;
182 	struct machine_check_event *mc_evt;
183 	int ret = 0;
184 
185 	/* Sanity check */
186 	if (index < 0)
187 		return ret;
188 
189 	/* Check if we have MCE info to process. */
190 	if (index < MAX_MC_EVT) {
191 		mc_evt = this_cpu_ptr(&mce_event[index]);
192 		/* Copy the event structure and release the original */
193 		if (mce)
194 			*mce = *mc_evt;
195 		if (release)
196 			mc_evt->in_use = 0;
197 		ret = 1;
198 	}
199 	/* Decrement the count to free the slot. */
200 	if (release)
201 		__this_cpu_dec(mce_nest_count);
202 
203 	return ret;
204 }
205 
206 void release_mce_event(void)
207 {
208 	get_mce_event(NULL, true);
209 }
210 
211 
212 /*
213  * Queue up the MCE event which then can be handled later.
214  */
215 void machine_check_ue_event(struct machine_check_event *evt)
216 {
217 	int index;
218 
219 	index = __this_cpu_inc_return(mce_ue_count) - 1;
220 	/* If queue is full, just return for now. */
221 	if (index >= MAX_MC_EVT) {
222 		__this_cpu_dec(mce_ue_count);
223 		return;
224 	}
225 	memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
226 
227 	/* Queue work to process this event later. */
228 	schedule_work(&mce_ue_event_work);
229 }
230 
231 /*
232  * Queue up the MCE event which then can be handled later.
233  */
234 void machine_check_queue_event(void)
235 {
236 	int index;
237 	struct machine_check_event evt;
238 
239 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
240 		return;
241 
242 	index = __this_cpu_inc_return(mce_queue_count) - 1;
243 	/* If queue is full, just return for now. */
244 	if (index >= MAX_MC_EVT) {
245 		__this_cpu_dec(mce_queue_count);
246 		return;
247 	}
248 	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
249 
250 	/* Queue irq work to process this event later. */
251 	irq_work_queue(&mce_event_process_work);
252 }
253 /*
254  * process pending MCE event from the mce event queue. This function will be
255  * called during syscall exit.
256  */
257 static void machine_process_ue_event(struct work_struct *work)
258 {
259 	int index;
260 	struct machine_check_event *evt;
261 
262 	while (__this_cpu_read(mce_ue_count) > 0) {
263 		index = __this_cpu_read(mce_ue_count) - 1;
264 		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
265 #ifdef CONFIG_MEMORY_FAILURE
266 		/*
267 		 * This should probably queued elsewhere, but
268 		 * oh! well
269 		 */
270 		if (evt->error_type == MCE_ERROR_TYPE_UE) {
271 			if (evt->u.ue_error.physical_address_provided) {
272 				unsigned long pfn;
273 
274 				pfn = evt->u.ue_error.physical_address >>
275 					PAGE_SHIFT;
276 				memory_failure(pfn, 0);
277 			} else
278 				pr_warn("Failed to identify bad address from "
279 					"where the uncorrectable error (UE) "
280 					"was generated\n");
281 		}
282 #endif
283 		__this_cpu_dec(mce_ue_count);
284 	}
285 }
286 /*
287  * process pending MCE event from the mce event queue. This function will be
288  * called during syscall exit.
289  */
290 static void machine_check_process_queued_event(struct irq_work *work)
291 {
292 	int index;
293 	struct machine_check_event *evt;
294 
295 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
296 
297 	/*
298 	 * For now just print it to console.
299 	 * TODO: log this error event to FSP or nvram.
300 	 */
301 	while (__this_cpu_read(mce_queue_count) > 0) {
302 		index = __this_cpu_read(mce_queue_count) - 1;
303 		evt = this_cpu_ptr(&mce_event_queue[index]);
304 		machine_check_print_event_info(evt, false, false);
305 		__this_cpu_dec(mce_queue_count);
306 	}
307 }
308 
309 void machine_check_print_event_info(struct machine_check_event *evt,
310 				    bool user_mode, bool in_guest)
311 {
312 	const char *level, *sevstr, *subtype;
313 	static const char *mc_ue_types[] = {
314 		"Indeterminate",
315 		"Instruction fetch",
316 		"Page table walk ifetch",
317 		"Load/Store",
318 		"Page table walk Load/Store",
319 	};
320 	static const char *mc_slb_types[] = {
321 		"Indeterminate",
322 		"Parity",
323 		"Multihit",
324 	};
325 	static const char *mc_erat_types[] = {
326 		"Indeterminate",
327 		"Parity",
328 		"Multihit",
329 	};
330 	static const char *mc_tlb_types[] = {
331 		"Indeterminate",
332 		"Parity",
333 		"Multihit",
334 	};
335 	static const char *mc_user_types[] = {
336 		"Indeterminate",
337 		"tlbie(l) invalid",
338 	};
339 	static const char *mc_ra_types[] = {
340 		"Indeterminate",
341 		"Instruction fetch (bad)",
342 		"Instruction fetch (foreign)",
343 		"Page table walk ifetch (bad)",
344 		"Page table walk ifetch (foreign)",
345 		"Load (bad)",
346 		"Store (bad)",
347 		"Page table walk Load/Store (bad)",
348 		"Page table walk Load/Store (foreign)",
349 		"Load/Store (foreign)",
350 	};
351 	static const char *mc_link_types[] = {
352 		"Indeterminate",
353 		"Instruction fetch (timeout)",
354 		"Page table walk ifetch (timeout)",
355 		"Load (timeout)",
356 		"Store (timeout)",
357 		"Page table walk Load/Store (timeout)",
358 	};
359 
360 	/* Print things out */
361 	if (evt->version != MCE_V1) {
362 		pr_err("Machine Check Exception, Unknown event version %d !\n",
363 		       evt->version);
364 		return;
365 	}
366 	switch (evt->severity) {
367 	case MCE_SEV_NO_ERROR:
368 		level = KERN_INFO;
369 		sevstr = "Harmless";
370 		break;
371 	case MCE_SEV_WARNING:
372 		level = KERN_WARNING;
373 		sevstr = "";
374 		break;
375 	case MCE_SEV_ERROR_SYNC:
376 		level = KERN_ERR;
377 		sevstr = "Severe";
378 		break;
379 	case MCE_SEV_FATAL:
380 	default:
381 		level = KERN_ERR;
382 		sevstr = "Fatal";
383 		break;
384 	}
385 
386 	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
387 	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
388 	       "Recovered" : "Not recovered");
389 
390 	if (in_guest) {
391 		printk("%s  Guest NIP: %016llx\n", level, evt->srr0);
392 	} else if (user_mode) {
393 		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
394 			evt->srr0, current->pid, current->comm);
395 	} else {
396 		printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
397 		       (void *)evt->srr0);
398 	}
399 
400 	printk("%s  Initiator: %s\n", level,
401 	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
402 	switch (evt->error_type) {
403 	case MCE_ERROR_TYPE_UE:
404 		subtype = evt->u.ue_error.ue_error_type <
405 			ARRAY_SIZE(mc_ue_types) ?
406 			mc_ue_types[evt->u.ue_error.ue_error_type]
407 			: "Unknown";
408 		printk("%s  Error type: UE [%s]\n", level, subtype);
409 		if (evt->u.ue_error.effective_address_provided)
410 			printk("%s    Effective address: %016llx\n",
411 			       level, evt->u.ue_error.effective_address);
412 		if (evt->u.ue_error.physical_address_provided)
413 			printk("%s    Physical address:  %016llx\n",
414 			       level, evt->u.ue_error.physical_address);
415 		break;
416 	case MCE_ERROR_TYPE_SLB:
417 		subtype = evt->u.slb_error.slb_error_type <
418 			ARRAY_SIZE(mc_slb_types) ?
419 			mc_slb_types[evt->u.slb_error.slb_error_type]
420 			: "Unknown";
421 		printk("%s  Error type: SLB [%s]\n", level, subtype);
422 		if (evt->u.slb_error.effective_address_provided)
423 			printk("%s    Effective address: %016llx\n",
424 			       level, evt->u.slb_error.effective_address);
425 		break;
426 	case MCE_ERROR_TYPE_ERAT:
427 		subtype = evt->u.erat_error.erat_error_type <
428 			ARRAY_SIZE(mc_erat_types) ?
429 			mc_erat_types[evt->u.erat_error.erat_error_type]
430 			: "Unknown";
431 		printk("%s  Error type: ERAT [%s]\n", level, subtype);
432 		if (evt->u.erat_error.effective_address_provided)
433 			printk("%s    Effective address: %016llx\n",
434 			       level, evt->u.erat_error.effective_address);
435 		break;
436 	case MCE_ERROR_TYPE_TLB:
437 		subtype = evt->u.tlb_error.tlb_error_type <
438 			ARRAY_SIZE(mc_tlb_types) ?
439 			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
440 			: "Unknown";
441 		printk("%s  Error type: TLB [%s]\n", level, subtype);
442 		if (evt->u.tlb_error.effective_address_provided)
443 			printk("%s    Effective address: %016llx\n",
444 			       level, evt->u.tlb_error.effective_address);
445 		break;
446 	case MCE_ERROR_TYPE_USER:
447 		subtype = evt->u.user_error.user_error_type <
448 			ARRAY_SIZE(mc_user_types) ?
449 			mc_user_types[evt->u.user_error.user_error_type]
450 			: "Unknown";
451 		printk("%s  Error type: User [%s]\n", level, subtype);
452 		if (evt->u.user_error.effective_address_provided)
453 			printk("%s    Effective address: %016llx\n",
454 			       level, evt->u.user_error.effective_address);
455 		break;
456 	case MCE_ERROR_TYPE_RA:
457 		subtype = evt->u.ra_error.ra_error_type <
458 			ARRAY_SIZE(mc_ra_types) ?
459 			mc_ra_types[evt->u.ra_error.ra_error_type]
460 			: "Unknown";
461 		printk("%s  Error type: Real address [%s]\n", level, subtype);
462 		if (evt->u.ra_error.effective_address_provided)
463 			printk("%s    Effective address: %016llx\n",
464 			       level, evt->u.ra_error.effective_address);
465 		break;
466 	case MCE_ERROR_TYPE_LINK:
467 		subtype = evt->u.link_error.link_error_type <
468 			ARRAY_SIZE(mc_link_types) ?
469 			mc_link_types[evt->u.link_error.link_error_type]
470 			: "Unknown";
471 		printk("%s  Error type: Link [%s]\n", level, subtype);
472 		if (evt->u.link_error.effective_address_provided)
473 			printk("%s    Effective address: %016llx\n",
474 			       level, evt->u.link_error.effective_address);
475 		break;
476 	default:
477 	case MCE_ERROR_TYPE_UNKNOWN:
478 		printk("%s  Error type: Unknown\n", level);
479 		break;
480 	}
481 }
482 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
483 
484 /*
485  * This function is called in real mode. Strictly no printk's please.
486  *
487  * regs->nip and regs->msr contains srr0 and ssr1.
488  */
489 long machine_check_early(struct pt_regs *regs)
490 {
491 	long handled = 0;
492 
493 	/*
494 	 * See if platform is capable of handling machine check.
495 	 */
496 	if (ppc_md.machine_check_early)
497 		handled = ppc_md.machine_check_early(regs);
498 	return handled;
499 }
500 
501 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
502 static enum {
503 	DTRIG_UNKNOWN,
504 	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
505 	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
506 } hmer_debug_trig_function;
507 
508 static int init_debug_trig_function(void)
509 {
510 	int pvr;
511 	struct device_node *cpun;
512 	struct property *prop = NULL;
513 	const char *str;
514 
515 	/* First look in the device tree */
516 	preempt_disable();
517 	cpun = of_get_cpu_node(smp_processor_id(), NULL);
518 	if (cpun) {
519 		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
520 					    prop, str) {
521 			if (strcmp(str, "bit17-vector-ci-load") == 0)
522 				hmer_debug_trig_function = DTRIG_VECTOR_CI;
523 			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
524 				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
525 		}
526 		of_node_put(cpun);
527 	}
528 	preempt_enable();
529 
530 	/* If we found the property, don't look at PVR */
531 	if (prop)
532 		goto out;
533 
534 	pvr = mfspr(SPRN_PVR);
535 	/* Check for POWER9 Nimbus (scale-out) */
536 	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
537 		/* DD2.2 and later */
538 		if ((pvr & 0xfff) >= 0x202)
539 			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
540 		/* DD2.0 and DD2.1 - used for vector CI load emulation */
541 		else if ((pvr & 0xfff) >= 0x200)
542 			hmer_debug_trig_function = DTRIG_VECTOR_CI;
543 	}
544 
545  out:
546 	switch (hmer_debug_trig_function) {
547 	case DTRIG_VECTOR_CI:
548 		pr_debug("HMI debug trigger used for vector CI load\n");
549 		break;
550 	case DTRIG_SUSPEND_ESCAPE:
551 		pr_debug("HMI debug trigger used for TM suspend escape\n");
552 		break;
553 	default:
554 		break;
555 	}
556 	return 0;
557 }
558 __initcall(init_debug_trig_function);
559 
560 /*
561  * Handle HMIs that occur as a result of a debug trigger.
562  * Return values:
563  * -1 means this is not a HMI cause that we know about
564  *  0 means no further handling is required
565  *  1 means further handling is required
566  */
567 long hmi_handle_debugtrig(struct pt_regs *regs)
568 {
569 	unsigned long hmer = mfspr(SPRN_HMER);
570 	long ret = 0;
571 
572 	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
573 	if (!((hmer & HMER_DEBUG_TRIG)
574 	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
575 		return -1;
576 
577 	hmer &= ~HMER_DEBUG_TRIG;
578 	/* HMER is a write-AND register */
579 	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
580 
581 	switch (hmer_debug_trig_function) {
582 	case DTRIG_VECTOR_CI:
583 		/*
584 		 * Now to avoid problems with soft-disable we
585 		 * only do the emulation if we are coming from
586 		 * host user space
587 		 */
588 		if (regs && user_mode(regs))
589 			ret = local_paca->hmi_p9_special_emu = 1;
590 
591 		break;
592 
593 	default:
594 		break;
595 	}
596 
597 	/*
598 	 * See if any other HMI causes remain to be handled
599 	 */
600 	if (hmer & mfspr(SPRN_HMEER))
601 		return -1;
602 
603 	return ret;
604 }
605 
606 /*
607  * Return values:
608  */
609 long hmi_exception_realmode(struct pt_regs *regs)
610 {
611 	int ret;
612 
613 	__this_cpu_inc(irq_stat.hmi_exceptions);
614 
615 	ret = hmi_handle_debugtrig(regs);
616 	if (ret >= 0)
617 		return ret;
618 
619 	wait_for_subcore_guest_exit();
620 
621 	if (ppc_md.hmi_exception_early)
622 		ppc_md.hmi_exception_early(regs);
623 
624 	wait_for_tb_resync();
625 
626 	return 1;
627 }
628