xref: /linux/arch/powerpc/kernel/mce.c (revision dd5b2498d845f925904cb2afabb6ba11bfc317c5)
1 /*
2  * Machine check exception handling.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21 
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce: " fmt
24 
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
31 
32 #include <asm/machdep.h>
33 #include <asm/mce.h>
34 #include <asm/nmi.h>
35 
36 static DEFINE_PER_CPU(int, mce_nest_count);
37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
38 
39 /* Queue for delayed MCE events. */
40 static DEFINE_PER_CPU(int, mce_queue_count);
41 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
42 
43 /* Queue for delayed MCE UE events. */
44 static DEFINE_PER_CPU(int, mce_ue_count);
45 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
46 					mce_ue_event_queue);
47 
48 static void machine_check_process_queued_event(struct irq_work *work);
49 void machine_check_ue_event(struct machine_check_event *evt);
50 static void machine_process_ue_event(struct work_struct *work);
51 
52 static struct irq_work mce_event_process_work = {
53         .func = machine_check_process_queued_event,
54 };
55 
56 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
57 
58 static void mce_set_error_info(struct machine_check_event *mce,
59 			       struct mce_error_info *mce_err)
60 {
61 	mce->error_type = mce_err->error_type;
62 	switch (mce_err->error_type) {
63 	case MCE_ERROR_TYPE_UE:
64 		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
65 		break;
66 	case MCE_ERROR_TYPE_SLB:
67 		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
68 		break;
69 	case MCE_ERROR_TYPE_ERAT:
70 		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
71 		break;
72 	case MCE_ERROR_TYPE_TLB:
73 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
74 		break;
75 	case MCE_ERROR_TYPE_USER:
76 		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
77 		break;
78 	case MCE_ERROR_TYPE_RA:
79 		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
80 		break;
81 	case MCE_ERROR_TYPE_LINK:
82 		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
83 		break;
84 	case MCE_ERROR_TYPE_UNKNOWN:
85 	default:
86 		break;
87 	}
88 }
89 
90 /*
91  * Decode and save high level MCE information into per cpu buffer which
92  * is an array of machine_check_event structure.
93  */
94 void save_mce_event(struct pt_regs *regs, long handled,
95 		    struct mce_error_info *mce_err,
96 		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
97 {
98 	int index = __this_cpu_inc_return(mce_nest_count) - 1;
99 	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
100 
101 	/*
102 	 * Return if we don't have enough space to log mce event.
103 	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
104 	 * the check below will stop buffer overrun.
105 	 */
106 	if (index >= MAX_MC_EVT)
107 		return;
108 
109 	/* Populate generic machine check info */
110 	mce->version = MCE_V1;
111 	mce->srr0 = nip;
112 	mce->srr1 = regs->msr;
113 	mce->gpr3 = regs->gpr[3];
114 	mce->in_use = 1;
115 
116 	/* Mark it recovered if we have handled it and MSR(RI=1). */
117 	if (handled && (regs->msr & MSR_RI))
118 		mce->disposition = MCE_DISPOSITION_RECOVERED;
119 	else
120 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
121 
122 	mce->initiator = mce_err->initiator;
123 	mce->severity = mce_err->severity;
124 
125 	/*
126 	 * Populate the mce error_type and type-specific error_type.
127 	 */
128 	mce_set_error_info(mce, mce_err);
129 
130 	if (!addr)
131 		return;
132 
133 	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
134 		mce->u.tlb_error.effective_address_provided = true;
135 		mce->u.tlb_error.effective_address = addr;
136 	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
137 		mce->u.slb_error.effective_address_provided = true;
138 		mce->u.slb_error.effective_address = addr;
139 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
140 		mce->u.erat_error.effective_address_provided = true;
141 		mce->u.erat_error.effective_address = addr;
142 	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
143 		mce->u.user_error.effective_address_provided = true;
144 		mce->u.user_error.effective_address = addr;
145 	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
146 		mce->u.ra_error.effective_address_provided = true;
147 		mce->u.ra_error.effective_address = addr;
148 	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
149 		mce->u.link_error.effective_address_provided = true;
150 		mce->u.link_error.effective_address = addr;
151 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
152 		mce->u.ue_error.effective_address_provided = true;
153 		mce->u.ue_error.effective_address = addr;
154 		if (phys_addr != ULONG_MAX) {
155 			mce->u.ue_error.physical_address_provided = true;
156 			mce->u.ue_error.physical_address = phys_addr;
157 			machine_check_ue_event(mce);
158 		}
159 	}
160 	return;
161 }
162 
163 /*
164  * get_mce_event:
165  *	mce	Pointer to machine_check_event structure to be filled.
166  *	release Flag to indicate whether to free the event slot or not.
167  *		0 <= do not release the mce event. Caller will invoke
168  *		     release_mce_event() once event has been consumed.
169  *		1 <= release the slot.
170  *
171  *	return	1 = success
172  *		0 = failure
173  *
174  * get_mce_event() will be called by platform specific machine check
175  * handle routine and in KVM.
176  * When we call get_mce_event(), we are still in interrupt context and
177  * preemption will not be scheduled until ret_from_expect() routine
178  * is called.
179  */
180 int get_mce_event(struct machine_check_event *mce, bool release)
181 {
182 	int index = __this_cpu_read(mce_nest_count) - 1;
183 	struct machine_check_event *mc_evt;
184 	int ret = 0;
185 
186 	/* Sanity check */
187 	if (index < 0)
188 		return ret;
189 
190 	/* Check if we have MCE info to process. */
191 	if (index < MAX_MC_EVT) {
192 		mc_evt = this_cpu_ptr(&mce_event[index]);
193 		/* Copy the event structure and release the original */
194 		if (mce)
195 			*mce = *mc_evt;
196 		if (release)
197 			mc_evt->in_use = 0;
198 		ret = 1;
199 	}
200 	/* Decrement the count to free the slot. */
201 	if (release)
202 		__this_cpu_dec(mce_nest_count);
203 
204 	return ret;
205 }
206 
207 void release_mce_event(void)
208 {
209 	get_mce_event(NULL, true);
210 }
211 
212 
213 /*
214  * Queue up the MCE event which then can be handled later.
215  */
216 void machine_check_ue_event(struct machine_check_event *evt)
217 {
218 	int index;
219 
220 	index = __this_cpu_inc_return(mce_ue_count) - 1;
221 	/* If queue is full, just return for now. */
222 	if (index >= MAX_MC_EVT) {
223 		__this_cpu_dec(mce_ue_count);
224 		return;
225 	}
226 	memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
227 
228 	/* Queue work to process this event later. */
229 	schedule_work(&mce_ue_event_work);
230 }
231 
232 /*
233  * Queue up the MCE event which then can be handled later.
234  */
235 void machine_check_queue_event(void)
236 {
237 	int index;
238 	struct machine_check_event evt;
239 
240 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
241 		return;
242 
243 	index = __this_cpu_inc_return(mce_queue_count) - 1;
244 	/* If queue is full, just return for now. */
245 	if (index >= MAX_MC_EVT) {
246 		__this_cpu_dec(mce_queue_count);
247 		return;
248 	}
249 	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
250 
251 	/* Queue irq work to process this event later. */
252 	irq_work_queue(&mce_event_process_work);
253 }
254 /*
255  * process pending MCE event from the mce event queue. This function will be
256  * called during syscall exit.
257  */
258 static void machine_process_ue_event(struct work_struct *work)
259 {
260 	int index;
261 	struct machine_check_event *evt;
262 
263 	while (__this_cpu_read(mce_ue_count) > 0) {
264 		index = __this_cpu_read(mce_ue_count) - 1;
265 		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
266 #ifdef CONFIG_MEMORY_FAILURE
267 		/*
268 		 * This should probably queued elsewhere, but
269 		 * oh! well
270 		 */
271 		if (evt->error_type == MCE_ERROR_TYPE_UE) {
272 			if (evt->u.ue_error.physical_address_provided) {
273 				unsigned long pfn;
274 
275 				pfn = evt->u.ue_error.physical_address >>
276 					PAGE_SHIFT;
277 				memory_failure(pfn, 0);
278 			} else
279 				pr_warn("Failed to identify bad address from "
280 					"where the uncorrectable error (UE) "
281 					"was generated\n");
282 		}
283 #endif
284 		__this_cpu_dec(mce_ue_count);
285 	}
286 }
287 /*
288  * process pending MCE event from the mce event queue. This function will be
289  * called during syscall exit.
290  */
291 static void machine_check_process_queued_event(struct irq_work *work)
292 {
293 	int index;
294 	struct machine_check_event *evt;
295 
296 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
297 
298 	/*
299 	 * For now just print it to console.
300 	 * TODO: log this error event to FSP or nvram.
301 	 */
302 	while (__this_cpu_read(mce_queue_count) > 0) {
303 		index = __this_cpu_read(mce_queue_count) - 1;
304 		evt = this_cpu_ptr(&mce_event_queue[index]);
305 		machine_check_print_event_info(evt, false, false);
306 		__this_cpu_dec(mce_queue_count);
307 	}
308 }
309 
310 void machine_check_print_event_info(struct machine_check_event *evt,
311 				    bool user_mode, bool in_guest)
312 {
313 	const char *level, *sevstr, *subtype;
314 	static const char *mc_ue_types[] = {
315 		"Indeterminate",
316 		"Instruction fetch",
317 		"Page table walk ifetch",
318 		"Load/Store",
319 		"Page table walk Load/Store",
320 	};
321 	static const char *mc_slb_types[] = {
322 		"Indeterminate",
323 		"Parity",
324 		"Multihit",
325 	};
326 	static const char *mc_erat_types[] = {
327 		"Indeterminate",
328 		"Parity",
329 		"Multihit",
330 	};
331 	static const char *mc_tlb_types[] = {
332 		"Indeterminate",
333 		"Parity",
334 		"Multihit",
335 	};
336 	static const char *mc_user_types[] = {
337 		"Indeterminate",
338 		"tlbie(l) invalid",
339 	};
340 	static const char *mc_ra_types[] = {
341 		"Indeterminate",
342 		"Instruction fetch (bad)",
343 		"Instruction fetch (foreign)",
344 		"Page table walk ifetch (bad)",
345 		"Page table walk ifetch (foreign)",
346 		"Load (bad)",
347 		"Store (bad)",
348 		"Page table walk Load/Store (bad)",
349 		"Page table walk Load/Store (foreign)",
350 		"Load/Store (foreign)",
351 	};
352 	static const char *mc_link_types[] = {
353 		"Indeterminate",
354 		"Instruction fetch (timeout)",
355 		"Page table walk ifetch (timeout)",
356 		"Load (timeout)",
357 		"Store (timeout)",
358 		"Page table walk Load/Store (timeout)",
359 	};
360 
361 	/* Print things out */
362 	if (evt->version != MCE_V1) {
363 		pr_err("Machine Check Exception, Unknown event version %d !\n",
364 		       evt->version);
365 		return;
366 	}
367 	switch (evt->severity) {
368 	case MCE_SEV_NO_ERROR:
369 		level = KERN_INFO;
370 		sevstr = "Harmless";
371 		break;
372 	case MCE_SEV_WARNING:
373 		level = KERN_WARNING;
374 		sevstr = "";
375 		break;
376 	case MCE_SEV_ERROR_SYNC:
377 		level = KERN_ERR;
378 		sevstr = "Severe";
379 		break;
380 	case MCE_SEV_FATAL:
381 	default:
382 		level = KERN_ERR;
383 		sevstr = "Fatal";
384 		break;
385 	}
386 
387 	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
388 	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
389 	       "Recovered" : "Not recovered");
390 
391 	if (in_guest) {
392 		printk("%s  Guest NIP: %016llx\n", level, evt->srr0);
393 	} else if (user_mode) {
394 		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
395 			evt->srr0, current->pid, current->comm);
396 	} else {
397 		printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
398 		       (void *)evt->srr0);
399 	}
400 
401 	printk("%s  Initiator: %s\n", level,
402 	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
403 	switch (evt->error_type) {
404 	case MCE_ERROR_TYPE_UE:
405 		subtype = evt->u.ue_error.ue_error_type <
406 			ARRAY_SIZE(mc_ue_types) ?
407 			mc_ue_types[evt->u.ue_error.ue_error_type]
408 			: "Unknown";
409 		printk("%s  Error type: UE [%s]\n", level, subtype);
410 		if (evt->u.ue_error.effective_address_provided)
411 			printk("%s    Effective address: %016llx\n",
412 			       level, evt->u.ue_error.effective_address);
413 		if (evt->u.ue_error.physical_address_provided)
414 			printk("%s    Physical address:  %016llx\n",
415 			       level, evt->u.ue_error.physical_address);
416 		break;
417 	case MCE_ERROR_TYPE_SLB:
418 		subtype = evt->u.slb_error.slb_error_type <
419 			ARRAY_SIZE(mc_slb_types) ?
420 			mc_slb_types[evt->u.slb_error.slb_error_type]
421 			: "Unknown";
422 		printk("%s  Error type: SLB [%s]\n", level, subtype);
423 		if (evt->u.slb_error.effective_address_provided)
424 			printk("%s    Effective address: %016llx\n",
425 			       level, evt->u.slb_error.effective_address);
426 		break;
427 	case MCE_ERROR_TYPE_ERAT:
428 		subtype = evt->u.erat_error.erat_error_type <
429 			ARRAY_SIZE(mc_erat_types) ?
430 			mc_erat_types[evt->u.erat_error.erat_error_type]
431 			: "Unknown";
432 		printk("%s  Error type: ERAT [%s]\n", level, subtype);
433 		if (evt->u.erat_error.effective_address_provided)
434 			printk("%s    Effective address: %016llx\n",
435 			       level, evt->u.erat_error.effective_address);
436 		break;
437 	case MCE_ERROR_TYPE_TLB:
438 		subtype = evt->u.tlb_error.tlb_error_type <
439 			ARRAY_SIZE(mc_tlb_types) ?
440 			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
441 			: "Unknown";
442 		printk("%s  Error type: TLB [%s]\n", level, subtype);
443 		if (evt->u.tlb_error.effective_address_provided)
444 			printk("%s    Effective address: %016llx\n",
445 			       level, evt->u.tlb_error.effective_address);
446 		break;
447 	case MCE_ERROR_TYPE_USER:
448 		subtype = evt->u.user_error.user_error_type <
449 			ARRAY_SIZE(mc_user_types) ?
450 			mc_user_types[evt->u.user_error.user_error_type]
451 			: "Unknown";
452 		printk("%s  Error type: User [%s]\n", level, subtype);
453 		if (evt->u.user_error.effective_address_provided)
454 			printk("%s    Effective address: %016llx\n",
455 			       level, evt->u.user_error.effective_address);
456 		break;
457 	case MCE_ERROR_TYPE_RA:
458 		subtype = evt->u.ra_error.ra_error_type <
459 			ARRAY_SIZE(mc_ra_types) ?
460 			mc_ra_types[evt->u.ra_error.ra_error_type]
461 			: "Unknown";
462 		printk("%s  Error type: Real address [%s]\n", level, subtype);
463 		if (evt->u.ra_error.effective_address_provided)
464 			printk("%s    Effective address: %016llx\n",
465 			       level, evt->u.ra_error.effective_address);
466 		break;
467 	case MCE_ERROR_TYPE_LINK:
468 		subtype = evt->u.link_error.link_error_type <
469 			ARRAY_SIZE(mc_link_types) ?
470 			mc_link_types[evt->u.link_error.link_error_type]
471 			: "Unknown";
472 		printk("%s  Error type: Link [%s]\n", level, subtype);
473 		if (evt->u.link_error.effective_address_provided)
474 			printk("%s    Effective address: %016llx\n",
475 			       level, evt->u.link_error.effective_address);
476 		break;
477 	default:
478 	case MCE_ERROR_TYPE_UNKNOWN:
479 		printk("%s  Error type: Unknown\n", level);
480 		break;
481 	}
482 }
483 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
484 
485 /*
486  * This function is called in real mode. Strictly no printk's please.
487  *
488  * regs->nip and regs->msr contains srr0 and ssr1.
489  */
490 long machine_check_early(struct pt_regs *regs)
491 {
492 	long handled = 0;
493 
494 	hv_nmi_check_nonrecoverable(regs);
495 
496 	/*
497 	 * See if platform is capable of handling machine check.
498 	 */
499 	if (ppc_md.machine_check_early)
500 		handled = ppc_md.machine_check_early(regs);
501 	return handled;
502 }
503 
504 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
505 static enum {
506 	DTRIG_UNKNOWN,
507 	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
508 	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
509 } hmer_debug_trig_function;
510 
511 static int init_debug_trig_function(void)
512 {
513 	int pvr;
514 	struct device_node *cpun;
515 	struct property *prop = NULL;
516 	const char *str;
517 
518 	/* First look in the device tree */
519 	preempt_disable();
520 	cpun = of_get_cpu_node(smp_processor_id(), NULL);
521 	if (cpun) {
522 		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
523 					    prop, str) {
524 			if (strcmp(str, "bit17-vector-ci-load") == 0)
525 				hmer_debug_trig_function = DTRIG_VECTOR_CI;
526 			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
527 				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
528 		}
529 		of_node_put(cpun);
530 	}
531 	preempt_enable();
532 
533 	/* If we found the property, don't look at PVR */
534 	if (prop)
535 		goto out;
536 
537 	pvr = mfspr(SPRN_PVR);
538 	/* Check for POWER9 Nimbus (scale-out) */
539 	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
540 		/* DD2.2 and later */
541 		if ((pvr & 0xfff) >= 0x202)
542 			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
543 		/* DD2.0 and DD2.1 - used for vector CI load emulation */
544 		else if ((pvr & 0xfff) >= 0x200)
545 			hmer_debug_trig_function = DTRIG_VECTOR_CI;
546 	}
547 
548  out:
549 	switch (hmer_debug_trig_function) {
550 	case DTRIG_VECTOR_CI:
551 		pr_debug("HMI debug trigger used for vector CI load\n");
552 		break;
553 	case DTRIG_SUSPEND_ESCAPE:
554 		pr_debug("HMI debug trigger used for TM suspend escape\n");
555 		break;
556 	default:
557 		break;
558 	}
559 	return 0;
560 }
561 __initcall(init_debug_trig_function);
562 
563 /*
564  * Handle HMIs that occur as a result of a debug trigger.
565  * Return values:
566  * -1 means this is not a HMI cause that we know about
567  *  0 means no further handling is required
568  *  1 means further handling is required
569  */
570 long hmi_handle_debugtrig(struct pt_regs *regs)
571 {
572 	unsigned long hmer = mfspr(SPRN_HMER);
573 	long ret = 0;
574 
575 	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
576 	if (!((hmer & HMER_DEBUG_TRIG)
577 	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
578 		return -1;
579 
580 	hmer &= ~HMER_DEBUG_TRIG;
581 	/* HMER is a write-AND register */
582 	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
583 
584 	switch (hmer_debug_trig_function) {
585 	case DTRIG_VECTOR_CI:
586 		/*
587 		 * Now to avoid problems with soft-disable we
588 		 * only do the emulation if we are coming from
589 		 * host user space
590 		 */
591 		if (regs && user_mode(regs))
592 			ret = local_paca->hmi_p9_special_emu = 1;
593 
594 		break;
595 
596 	default:
597 		break;
598 	}
599 
600 	/*
601 	 * See if any other HMI causes remain to be handled
602 	 */
603 	if (hmer & mfspr(SPRN_HMEER))
604 		return -1;
605 
606 	return ret;
607 }
608 
609 /*
610  * Return values:
611  */
612 long hmi_exception_realmode(struct pt_regs *regs)
613 {
614 	int ret;
615 
616 	__this_cpu_inc(irq_stat.hmi_exceptions);
617 
618 	ret = hmi_handle_debugtrig(regs);
619 	if (ret >= 0)
620 		return ret;
621 
622 	wait_for_subcore_guest_exit();
623 
624 	if (ppc_md.hmi_exception_early)
625 		ppc_md.hmi_exception_early(regs);
626 
627 	wait_for_tb_resync();
628 
629 	return 1;
630 }
631