xref: /linux/arch/powerpc/kernel/mce.c (revision cda6618d060b5e8afc93e691d4bcd987f3dd4393)
1 /*
2  * Machine check exception handling.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21 
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce: " fmt
24 
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
31 
32 #include <asm/machdep.h>
33 #include <asm/mce.h>
34 #include <asm/nmi.h>
35 
36 static DEFINE_PER_CPU(int, mce_nest_count);
37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
38 
39 /* Queue for delayed MCE events. */
40 static DEFINE_PER_CPU(int, mce_queue_count);
41 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
42 
43 /* Queue for delayed MCE UE events. */
44 static DEFINE_PER_CPU(int, mce_ue_count);
45 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
46 					mce_ue_event_queue);
47 
48 static void machine_check_process_queued_event(struct irq_work *work);
49 void machine_check_ue_event(struct machine_check_event *evt);
50 static void machine_process_ue_event(struct work_struct *work);
51 
52 static struct irq_work mce_event_process_work = {
53         .func = machine_check_process_queued_event,
54 };
55 
56 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
57 
58 static void mce_set_error_info(struct machine_check_event *mce,
59 			       struct mce_error_info *mce_err)
60 {
61 	mce->error_type = mce_err->error_type;
62 	switch (mce_err->error_type) {
63 	case MCE_ERROR_TYPE_UE:
64 		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
65 		break;
66 	case MCE_ERROR_TYPE_SLB:
67 		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
68 		break;
69 	case MCE_ERROR_TYPE_ERAT:
70 		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
71 		break;
72 	case MCE_ERROR_TYPE_TLB:
73 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
74 		break;
75 	case MCE_ERROR_TYPE_USER:
76 		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
77 		break;
78 	case MCE_ERROR_TYPE_RA:
79 		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
80 		break;
81 	case MCE_ERROR_TYPE_LINK:
82 		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
83 		break;
84 	case MCE_ERROR_TYPE_UNKNOWN:
85 	default:
86 		break;
87 	}
88 }
89 
90 /*
91  * Decode and save high level MCE information into per cpu buffer which
92  * is an array of machine_check_event structure.
93  */
94 void save_mce_event(struct pt_regs *regs, long handled,
95 		    struct mce_error_info *mce_err,
96 		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
97 {
98 	int index = __this_cpu_inc_return(mce_nest_count) - 1;
99 	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
100 
101 	/*
102 	 * Return if we don't have enough space to log mce event.
103 	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
104 	 * the check below will stop buffer overrun.
105 	 */
106 	if (index >= MAX_MC_EVT)
107 		return;
108 
109 	/* Populate generic machine check info */
110 	mce->version = MCE_V1;
111 	mce->srr0 = nip;
112 	mce->srr1 = regs->msr;
113 	mce->gpr3 = regs->gpr[3];
114 	mce->in_use = 1;
115 	mce->cpu = get_paca()->paca_index;
116 
117 	/* Mark it recovered if we have handled it and MSR(RI=1). */
118 	if (handled && (regs->msr & MSR_RI))
119 		mce->disposition = MCE_DISPOSITION_RECOVERED;
120 	else
121 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
122 
123 	mce->initiator = mce_err->initiator;
124 	mce->severity = mce_err->severity;
125 	mce->sync_error = mce_err->sync_error;
126 
127 	/*
128 	 * Populate the mce error_type and type-specific error_type.
129 	 */
130 	mce_set_error_info(mce, mce_err);
131 
132 	if (!addr)
133 		return;
134 
135 	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
136 		mce->u.tlb_error.effective_address_provided = true;
137 		mce->u.tlb_error.effective_address = addr;
138 	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
139 		mce->u.slb_error.effective_address_provided = true;
140 		mce->u.slb_error.effective_address = addr;
141 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
142 		mce->u.erat_error.effective_address_provided = true;
143 		mce->u.erat_error.effective_address = addr;
144 	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
145 		mce->u.user_error.effective_address_provided = true;
146 		mce->u.user_error.effective_address = addr;
147 	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
148 		mce->u.ra_error.effective_address_provided = true;
149 		mce->u.ra_error.effective_address = addr;
150 	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
151 		mce->u.link_error.effective_address_provided = true;
152 		mce->u.link_error.effective_address = addr;
153 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
154 		mce->u.ue_error.effective_address_provided = true;
155 		mce->u.ue_error.effective_address = addr;
156 		if (phys_addr != ULONG_MAX) {
157 			mce->u.ue_error.physical_address_provided = true;
158 			mce->u.ue_error.physical_address = phys_addr;
159 			machine_check_ue_event(mce);
160 		}
161 	}
162 	return;
163 }
164 
165 /*
166  * get_mce_event:
167  *	mce	Pointer to machine_check_event structure to be filled.
168  *	release Flag to indicate whether to free the event slot or not.
169  *		0 <= do not release the mce event. Caller will invoke
170  *		     release_mce_event() once event has been consumed.
171  *		1 <= release the slot.
172  *
173  *	return	1 = success
174  *		0 = failure
175  *
176  * get_mce_event() will be called by platform specific machine check
177  * handle routine and in KVM.
178  * When we call get_mce_event(), we are still in interrupt context and
179  * preemption will not be scheduled until ret_from_expect() routine
180  * is called.
181  */
182 int get_mce_event(struct machine_check_event *mce, bool release)
183 {
184 	int index = __this_cpu_read(mce_nest_count) - 1;
185 	struct machine_check_event *mc_evt;
186 	int ret = 0;
187 
188 	/* Sanity check */
189 	if (index < 0)
190 		return ret;
191 
192 	/* Check if we have MCE info to process. */
193 	if (index < MAX_MC_EVT) {
194 		mc_evt = this_cpu_ptr(&mce_event[index]);
195 		/* Copy the event structure and release the original */
196 		if (mce)
197 			*mce = *mc_evt;
198 		if (release)
199 			mc_evt->in_use = 0;
200 		ret = 1;
201 	}
202 	/* Decrement the count to free the slot. */
203 	if (release)
204 		__this_cpu_dec(mce_nest_count);
205 
206 	return ret;
207 }
208 
209 void release_mce_event(void)
210 {
211 	get_mce_event(NULL, true);
212 }
213 
214 
215 /*
216  * Queue up the MCE event which then can be handled later.
217  */
218 void machine_check_ue_event(struct machine_check_event *evt)
219 {
220 	int index;
221 
222 	index = __this_cpu_inc_return(mce_ue_count) - 1;
223 	/* If queue is full, just return for now. */
224 	if (index >= MAX_MC_EVT) {
225 		__this_cpu_dec(mce_ue_count);
226 		return;
227 	}
228 	memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
229 
230 	/* Queue work to process this event later. */
231 	schedule_work(&mce_ue_event_work);
232 }
233 
234 /*
235  * Queue up the MCE event which then can be handled later.
236  */
237 void machine_check_queue_event(void)
238 {
239 	int index;
240 	struct machine_check_event evt;
241 
242 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
243 		return;
244 
245 	index = __this_cpu_inc_return(mce_queue_count) - 1;
246 	/* If queue is full, just return for now. */
247 	if (index >= MAX_MC_EVT) {
248 		__this_cpu_dec(mce_queue_count);
249 		return;
250 	}
251 	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
252 
253 	/* Queue irq work to process this event later. */
254 	irq_work_queue(&mce_event_process_work);
255 }
256 /*
257  * process pending MCE event from the mce event queue. This function will be
258  * called during syscall exit.
259  */
260 static void machine_process_ue_event(struct work_struct *work)
261 {
262 	int index;
263 	struct machine_check_event *evt;
264 
265 	while (__this_cpu_read(mce_ue_count) > 0) {
266 		index = __this_cpu_read(mce_ue_count) - 1;
267 		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
268 #ifdef CONFIG_MEMORY_FAILURE
269 		/*
270 		 * This should probably queued elsewhere, but
271 		 * oh! well
272 		 */
273 		if (evt->error_type == MCE_ERROR_TYPE_UE) {
274 			if (evt->u.ue_error.physical_address_provided) {
275 				unsigned long pfn;
276 
277 				pfn = evt->u.ue_error.physical_address >>
278 					PAGE_SHIFT;
279 				memory_failure(pfn, 0);
280 			} else
281 				pr_warn("Failed to identify bad address from "
282 					"where the uncorrectable error (UE) "
283 					"was generated\n");
284 		}
285 #endif
286 		__this_cpu_dec(mce_ue_count);
287 	}
288 }
289 /*
290  * process pending MCE event from the mce event queue. This function will be
291  * called during syscall exit.
292  */
293 static void machine_check_process_queued_event(struct irq_work *work)
294 {
295 	int index;
296 	struct machine_check_event *evt;
297 
298 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
299 
300 	/*
301 	 * For now just print it to console.
302 	 * TODO: log this error event to FSP or nvram.
303 	 */
304 	while (__this_cpu_read(mce_queue_count) > 0) {
305 		index = __this_cpu_read(mce_queue_count) - 1;
306 		evt = this_cpu_ptr(&mce_event_queue[index]);
307 		machine_check_print_event_info(evt, false, false);
308 		__this_cpu_dec(mce_queue_count);
309 	}
310 }
311 
312 void machine_check_print_event_info(struct machine_check_event *evt,
313 				    bool user_mode, bool in_guest)
314 {
315 	const char *level, *sevstr, *subtype, *err_type;
316 	uint64_t ea = 0, pa = 0;
317 	int n = 0;
318 	char dar_str[50];
319 	char pa_str[50];
320 	static const char *mc_ue_types[] = {
321 		"Indeterminate",
322 		"Instruction fetch",
323 		"Page table walk ifetch",
324 		"Load/Store",
325 		"Page table walk Load/Store",
326 	};
327 	static const char *mc_slb_types[] = {
328 		"Indeterminate",
329 		"Parity",
330 		"Multihit",
331 	};
332 	static const char *mc_erat_types[] = {
333 		"Indeterminate",
334 		"Parity",
335 		"Multihit",
336 	};
337 	static const char *mc_tlb_types[] = {
338 		"Indeterminate",
339 		"Parity",
340 		"Multihit",
341 	};
342 	static const char *mc_user_types[] = {
343 		"Indeterminate",
344 		"tlbie(l) invalid",
345 	};
346 	static const char *mc_ra_types[] = {
347 		"Indeterminate",
348 		"Instruction fetch (bad)",
349 		"Instruction fetch (foreign)",
350 		"Page table walk ifetch (bad)",
351 		"Page table walk ifetch (foreign)",
352 		"Load (bad)",
353 		"Store (bad)",
354 		"Page table walk Load/Store (bad)",
355 		"Page table walk Load/Store (foreign)",
356 		"Load/Store (foreign)",
357 	};
358 	static const char *mc_link_types[] = {
359 		"Indeterminate",
360 		"Instruction fetch (timeout)",
361 		"Page table walk ifetch (timeout)",
362 		"Load (timeout)",
363 		"Store (timeout)",
364 		"Page table walk Load/Store (timeout)",
365 	};
366 
367 	/* Print things out */
368 	if (evt->version != MCE_V1) {
369 		pr_err("Machine Check Exception, Unknown event version %d !\n",
370 		       evt->version);
371 		return;
372 	}
373 	switch (evt->severity) {
374 	case MCE_SEV_NO_ERROR:
375 		level = KERN_INFO;
376 		sevstr = "Harmless";
377 		break;
378 	case MCE_SEV_WARNING:
379 		level = KERN_WARNING;
380 		sevstr = "Warning";
381 		break;
382 	case MCE_SEV_SEVERE:
383 		level = KERN_ERR;
384 		sevstr = "Severe";
385 		break;
386 	case MCE_SEV_FATAL:
387 	default:
388 		level = KERN_ERR;
389 		sevstr = "Fatal";
390 		break;
391 	}
392 
393 	switch (evt->error_type) {
394 	case MCE_ERROR_TYPE_UE:
395 		err_type = "UE";
396 		subtype = evt->u.ue_error.ue_error_type <
397 			ARRAY_SIZE(mc_ue_types) ?
398 			mc_ue_types[evt->u.ue_error.ue_error_type]
399 			: "Unknown";
400 		if (evt->u.ue_error.effective_address_provided)
401 			ea = evt->u.ue_error.effective_address;
402 		if (evt->u.ue_error.physical_address_provided)
403 			pa = evt->u.ue_error.physical_address;
404 		break;
405 	case MCE_ERROR_TYPE_SLB:
406 		err_type = "SLB";
407 		subtype = evt->u.slb_error.slb_error_type <
408 			ARRAY_SIZE(mc_slb_types) ?
409 			mc_slb_types[evt->u.slb_error.slb_error_type]
410 			: "Unknown";
411 		if (evt->u.slb_error.effective_address_provided)
412 			ea = evt->u.slb_error.effective_address;
413 		break;
414 	case MCE_ERROR_TYPE_ERAT:
415 		err_type = "ERAT";
416 		subtype = evt->u.erat_error.erat_error_type <
417 			ARRAY_SIZE(mc_erat_types) ?
418 			mc_erat_types[evt->u.erat_error.erat_error_type]
419 			: "Unknown";
420 		if (evt->u.erat_error.effective_address_provided)
421 			ea = evt->u.erat_error.effective_address;
422 		break;
423 	case MCE_ERROR_TYPE_TLB:
424 		err_type = "TLB";
425 		subtype = evt->u.tlb_error.tlb_error_type <
426 			ARRAY_SIZE(mc_tlb_types) ?
427 			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
428 			: "Unknown";
429 		if (evt->u.tlb_error.effective_address_provided)
430 			ea = evt->u.tlb_error.effective_address;
431 		break;
432 	case MCE_ERROR_TYPE_USER:
433 		err_type = "User";
434 		subtype = evt->u.user_error.user_error_type <
435 			ARRAY_SIZE(mc_user_types) ?
436 			mc_user_types[evt->u.user_error.user_error_type]
437 			: "Unknown";
438 		if (evt->u.user_error.effective_address_provided)
439 			ea = evt->u.user_error.effective_address;
440 		break;
441 	case MCE_ERROR_TYPE_RA:
442 		err_type = "Real address";
443 		subtype = evt->u.ra_error.ra_error_type <
444 			ARRAY_SIZE(mc_ra_types) ?
445 			mc_ra_types[evt->u.ra_error.ra_error_type]
446 			: "Unknown";
447 		if (evt->u.ra_error.effective_address_provided)
448 			ea = evt->u.ra_error.effective_address;
449 		break;
450 	case MCE_ERROR_TYPE_LINK:
451 		err_type = "Link";
452 		subtype = evt->u.link_error.link_error_type <
453 			ARRAY_SIZE(mc_link_types) ?
454 			mc_link_types[evt->u.link_error.link_error_type]
455 			: "Unknown";
456 		if (evt->u.link_error.effective_address_provided)
457 			ea = evt->u.link_error.effective_address;
458 		break;
459 	default:
460 	case MCE_ERROR_TYPE_UNKNOWN:
461 		err_type = "Unknown";
462 		subtype = "";
463 		break;
464 	}
465 
466 	dar_str[0] = pa_str[0] = '\0';
467 	if (ea && evt->srr0 != ea) {
468 		/* Load/Store address */
469 		n = sprintf(dar_str, "DAR: %016llx ", ea);
470 		if (pa)
471 			sprintf(dar_str + n, "paddr: %016llx ", pa);
472 	} else if (pa) {
473 		sprintf(pa_str, " paddr: %016llx", pa);
474 	}
475 
476 	printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
477 		level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
478 		err_type, subtype, dar_str,
479 		evt->disposition == MCE_DISPOSITION_RECOVERED ?
480 		"Recovered" : "Not recovered");
481 
482 	if (in_guest || user_mode) {
483 		printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
484 			level, evt->cpu, current->pid, current->comm,
485 			in_guest ? "Guest " : "", evt->srr0, pa_str);
486 	} else {
487 		printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
488 			level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
489 	}
490 }
491 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
492 
493 /*
494  * This function is called in real mode. Strictly no printk's please.
495  *
496  * regs->nip and regs->msr contains srr0 and ssr1.
497  */
498 long machine_check_early(struct pt_regs *regs)
499 {
500 	long handled = 0;
501 
502 	hv_nmi_check_nonrecoverable(regs);
503 
504 	/*
505 	 * See if platform is capable of handling machine check.
506 	 */
507 	if (ppc_md.machine_check_early)
508 		handled = ppc_md.machine_check_early(regs);
509 	return handled;
510 }
511 
512 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
513 static enum {
514 	DTRIG_UNKNOWN,
515 	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
516 	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
517 } hmer_debug_trig_function;
518 
519 static int init_debug_trig_function(void)
520 {
521 	int pvr;
522 	struct device_node *cpun;
523 	struct property *prop = NULL;
524 	const char *str;
525 
526 	/* First look in the device tree */
527 	preempt_disable();
528 	cpun = of_get_cpu_node(smp_processor_id(), NULL);
529 	if (cpun) {
530 		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
531 					    prop, str) {
532 			if (strcmp(str, "bit17-vector-ci-load") == 0)
533 				hmer_debug_trig_function = DTRIG_VECTOR_CI;
534 			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
535 				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
536 		}
537 		of_node_put(cpun);
538 	}
539 	preempt_enable();
540 
541 	/* If we found the property, don't look at PVR */
542 	if (prop)
543 		goto out;
544 
545 	pvr = mfspr(SPRN_PVR);
546 	/* Check for POWER9 Nimbus (scale-out) */
547 	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
548 		/* DD2.2 and later */
549 		if ((pvr & 0xfff) >= 0x202)
550 			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
551 		/* DD2.0 and DD2.1 - used for vector CI load emulation */
552 		else if ((pvr & 0xfff) >= 0x200)
553 			hmer_debug_trig_function = DTRIG_VECTOR_CI;
554 	}
555 
556  out:
557 	switch (hmer_debug_trig_function) {
558 	case DTRIG_VECTOR_CI:
559 		pr_debug("HMI debug trigger used for vector CI load\n");
560 		break;
561 	case DTRIG_SUSPEND_ESCAPE:
562 		pr_debug("HMI debug trigger used for TM suspend escape\n");
563 		break;
564 	default:
565 		break;
566 	}
567 	return 0;
568 }
569 __initcall(init_debug_trig_function);
570 
571 /*
572  * Handle HMIs that occur as a result of a debug trigger.
573  * Return values:
574  * -1 means this is not a HMI cause that we know about
575  *  0 means no further handling is required
576  *  1 means further handling is required
577  */
578 long hmi_handle_debugtrig(struct pt_regs *regs)
579 {
580 	unsigned long hmer = mfspr(SPRN_HMER);
581 	long ret = 0;
582 
583 	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
584 	if (!((hmer & HMER_DEBUG_TRIG)
585 	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
586 		return -1;
587 
588 	hmer &= ~HMER_DEBUG_TRIG;
589 	/* HMER is a write-AND register */
590 	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
591 
592 	switch (hmer_debug_trig_function) {
593 	case DTRIG_VECTOR_CI:
594 		/*
595 		 * Now to avoid problems with soft-disable we
596 		 * only do the emulation if we are coming from
597 		 * host user space
598 		 */
599 		if (regs && user_mode(regs))
600 			ret = local_paca->hmi_p9_special_emu = 1;
601 
602 		break;
603 
604 	default:
605 		break;
606 	}
607 
608 	/*
609 	 * See if any other HMI causes remain to be handled
610 	 */
611 	if (hmer & mfspr(SPRN_HMEER))
612 		return -1;
613 
614 	return ret;
615 }
616 
617 /*
618  * Return values:
619  */
620 long hmi_exception_realmode(struct pt_regs *regs)
621 {
622 	int ret;
623 
624 	__this_cpu_inc(irq_stat.hmi_exceptions);
625 
626 	ret = hmi_handle_debugtrig(regs);
627 	if (ret >= 0)
628 		return ret;
629 
630 	wait_for_subcore_guest_exit();
631 
632 	if (ppc_md.hmi_exception_early)
633 		ppc_md.hmi_exception_early(regs);
634 
635 	wait_for_tb_resync();
636 
637 	return 1;
638 }
639