xref: /linux/arch/powerpc/kernel/mce.c (revision 923b3cf00b3ffc896543bac99affc0fa8553e41a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Machine check exception handling.
4  *
5  * Copyright 2013 IBM Corporation
6  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7  */
8 
9 #undef DEBUG
10 #define pr_fmt(fmt) "mce: " fmt
11 
12 #include <linux/hardirq.h>
13 #include <linux/types.h>
14 #include <linux/ptrace.h>
15 #include <linux/percpu.h>
16 #include <linux/export.h>
17 #include <linux/irq_work.h>
18 #include <linux/extable.h>
19 #include <linux/ftrace.h>
20 #include <linux/memblock.h>
21 
22 #include <asm/machdep.h>
23 #include <asm/mce.h>
24 #include <asm/nmi.h>
25 #include <asm/asm-prototypes.h>
26 
27 #include "setup.h"
28 
29 static void machine_check_process_queued_event(struct irq_work *work);
30 static void machine_check_ue_irq_work(struct irq_work *work);
31 static void machine_check_ue_event(struct machine_check_event *evt);
32 static void machine_process_ue_event(struct work_struct *work);
33 
34 static struct irq_work mce_event_process_work = {
35         .func = machine_check_process_queued_event,
36 };
37 
38 static struct irq_work mce_ue_event_irq_work = {
39 	.func = machine_check_ue_irq_work,
40 };
41 
42 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
43 
44 static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
45 
46 int mce_register_notifier(struct notifier_block *nb)
47 {
48 	return blocking_notifier_chain_register(&mce_notifier_list, nb);
49 }
50 EXPORT_SYMBOL_GPL(mce_register_notifier);
51 
52 int mce_unregister_notifier(struct notifier_block *nb)
53 {
54 	return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
55 }
56 EXPORT_SYMBOL_GPL(mce_unregister_notifier);
57 
58 static void mce_set_error_info(struct machine_check_event *mce,
59 			       struct mce_error_info *mce_err)
60 {
61 	mce->error_type = mce_err->error_type;
62 	switch (mce_err->error_type) {
63 	case MCE_ERROR_TYPE_UE:
64 		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
65 		break;
66 	case MCE_ERROR_TYPE_SLB:
67 		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
68 		break;
69 	case MCE_ERROR_TYPE_ERAT:
70 		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
71 		break;
72 	case MCE_ERROR_TYPE_TLB:
73 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
74 		break;
75 	case MCE_ERROR_TYPE_USER:
76 		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
77 		break;
78 	case MCE_ERROR_TYPE_RA:
79 		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
80 		break;
81 	case MCE_ERROR_TYPE_LINK:
82 		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
83 		break;
84 	case MCE_ERROR_TYPE_UNKNOWN:
85 	default:
86 		break;
87 	}
88 }
89 
90 /*
91  * Decode and save high level MCE information into per cpu buffer which
92  * is an array of machine_check_event structure.
93  */
94 void save_mce_event(struct pt_regs *regs, long handled,
95 		    struct mce_error_info *mce_err,
96 		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
97 {
98 	int index = local_paca->mce_info->mce_nest_count++;
99 	struct machine_check_event *mce;
100 
101 	mce = &local_paca->mce_info->mce_event[index];
102 	/*
103 	 * Return if we don't have enough space to log mce event.
104 	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
105 	 * the check below will stop buffer overrun.
106 	 */
107 	if (index >= MAX_MC_EVT)
108 		return;
109 
110 	/* Populate generic machine check info */
111 	mce->version = MCE_V1;
112 	mce->srr0 = nip;
113 	mce->srr1 = regs->msr;
114 	mce->gpr3 = regs->gpr[3];
115 	mce->in_use = 1;
116 	mce->cpu = get_paca()->paca_index;
117 
118 	/* Mark it recovered if we have handled it and MSR(RI=1). */
119 	if (handled && (regs->msr & MSR_RI))
120 		mce->disposition = MCE_DISPOSITION_RECOVERED;
121 	else
122 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
123 
124 	mce->initiator = mce_err->initiator;
125 	mce->severity = mce_err->severity;
126 	mce->sync_error = mce_err->sync_error;
127 	mce->error_class = mce_err->error_class;
128 
129 	/*
130 	 * Populate the mce error_type and type-specific error_type.
131 	 */
132 	mce_set_error_info(mce, mce_err);
133 
134 	if (!addr)
135 		return;
136 
137 	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
138 		mce->u.tlb_error.effective_address_provided = true;
139 		mce->u.tlb_error.effective_address = addr;
140 	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
141 		mce->u.slb_error.effective_address_provided = true;
142 		mce->u.slb_error.effective_address = addr;
143 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
144 		mce->u.erat_error.effective_address_provided = true;
145 		mce->u.erat_error.effective_address = addr;
146 	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
147 		mce->u.user_error.effective_address_provided = true;
148 		mce->u.user_error.effective_address = addr;
149 	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
150 		mce->u.ra_error.effective_address_provided = true;
151 		mce->u.ra_error.effective_address = addr;
152 	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
153 		mce->u.link_error.effective_address_provided = true;
154 		mce->u.link_error.effective_address = addr;
155 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
156 		mce->u.ue_error.effective_address_provided = true;
157 		mce->u.ue_error.effective_address = addr;
158 		if (phys_addr != ULONG_MAX) {
159 			mce->u.ue_error.physical_address_provided = true;
160 			mce->u.ue_error.physical_address = phys_addr;
161 			mce->u.ue_error.ignore_event = mce_err->ignore_event;
162 			machine_check_ue_event(mce);
163 		}
164 	}
165 	return;
166 }
167 
168 /*
169  * get_mce_event:
170  *	mce	Pointer to machine_check_event structure to be filled.
171  *	release Flag to indicate whether to free the event slot or not.
172  *		0 <= do not release the mce event. Caller will invoke
173  *		     release_mce_event() once event has been consumed.
174  *		1 <= release the slot.
175  *
176  *	return	1 = success
177  *		0 = failure
178  *
179  * get_mce_event() will be called by platform specific machine check
180  * handle routine and in KVM.
181  * When we call get_mce_event(), we are still in interrupt context and
182  * preemption will not be scheduled until ret_from_expect() routine
183  * is called.
184  */
185 int get_mce_event(struct machine_check_event *mce, bool release)
186 {
187 	int index = local_paca->mce_info->mce_nest_count - 1;
188 	struct machine_check_event *mc_evt;
189 	int ret = 0;
190 
191 	/* Sanity check */
192 	if (index < 0)
193 		return ret;
194 
195 	/* Check if we have MCE info to process. */
196 	if (index < MAX_MC_EVT) {
197 		mc_evt = &local_paca->mce_info->mce_event[index];
198 		/* Copy the event structure and release the original */
199 		if (mce)
200 			*mce = *mc_evt;
201 		if (release)
202 			mc_evt->in_use = 0;
203 		ret = 1;
204 	}
205 	/* Decrement the count to free the slot. */
206 	if (release)
207 		local_paca->mce_info->mce_nest_count--;
208 
209 	return ret;
210 }
211 
212 void release_mce_event(void)
213 {
214 	get_mce_event(NULL, true);
215 }
216 
217 static void machine_check_ue_irq_work(struct irq_work *work)
218 {
219 	schedule_work(&mce_ue_event_work);
220 }
221 
222 /*
223  * Queue up the MCE event which then can be handled later.
224  */
225 static void machine_check_ue_event(struct machine_check_event *evt)
226 {
227 	int index;
228 
229 	index = local_paca->mce_info->mce_ue_count++;
230 	/* If queue is full, just return for now. */
231 	if (index >= MAX_MC_EVT) {
232 		local_paca->mce_info->mce_ue_count--;
233 		return;
234 	}
235 	memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
236 	       evt, sizeof(*evt));
237 
238 	/* Queue work to process this event later. */
239 	irq_work_queue(&mce_ue_event_irq_work);
240 }
241 
242 /*
243  * Queue up the MCE event which then can be handled later.
244  */
245 void machine_check_queue_event(void)
246 {
247 	int index;
248 	struct machine_check_event evt;
249 
250 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
251 		return;
252 
253 	index = local_paca->mce_info->mce_queue_count++;
254 	/* If queue is full, just return for now. */
255 	if (index >= MAX_MC_EVT) {
256 		local_paca->mce_info->mce_queue_count--;
257 		return;
258 	}
259 	memcpy(&local_paca->mce_info->mce_event_queue[index],
260 	       &evt, sizeof(evt));
261 
262 	/* Queue irq work to process this event later. */
263 	irq_work_queue(&mce_event_process_work);
264 }
265 
266 void mce_common_process_ue(struct pt_regs *regs,
267 			   struct mce_error_info *mce_err)
268 {
269 	const struct exception_table_entry *entry;
270 
271 	entry = search_kernel_exception_table(regs->nip);
272 	if (entry) {
273 		mce_err->ignore_event = true;
274 		regs->nip = extable_fixup(entry);
275 	}
276 }
277 
278 /*
279  * process pending MCE event from the mce event queue. This function will be
280  * called during syscall exit.
281  */
282 static void machine_process_ue_event(struct work_struct *work)
283 {
284 	int index;
285 	struct machine_check_event *evt;
286 
287 	while (local_paca->mce_info->mce_ue_count > 0) {
288 		index = local_paca->mce_info->mce_ue_count - 1;
289 		evt = &local_paca->mce_info->mce_ue_event_queue[index];
290 		blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
291 #ifdef CONFIG_MEMORY_FAILURE
292 		/*
293 		 * This should probably queued elsewhere, but
294 		 * oh! well
295 		 *
296 		 * Don't report this machine check because the caller has a
297 		 * asked us to ignore the event, it has a fixup handler which
298 		 * will do the appropriate error handling and reporting.
299 		 */
300 		if (evt->error_type == MCE_ERROR_TYPE_UE) {
301 			if (evt->u.ue_error.ignore_event) {
302 				local_paca->mce_info->mce_ue_count--;
303 				continue;
304 			}
305 
306 			if (evt->u.ue_error.physical_address_provided) {
307 				unsigned long pfn;
308 
309 				pfn = evt->u.ue_error.physical_address >>
310 					PAGE_SHIFT;
311 				memory_failure(pfn, 0);
312 			} else
313 				pr_warn("Failed to identify bad address from "
314 					"where the uncorrectable error (UE) "
315 					"was generated\n");
316 		}
317 #endif
318 		local_paca->mce_info->mce_ue_count--;
319 	}
320 }
321 /*
322  * process pending MCE event from the mce event queue. This function will be
323  * called during syscall exit.
324  */
325 static void machine_check_process_queued_event(struct irq_work *work)
326 {
327 	int index;
328 	struct machine_check_event *evt;
329 
330 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
331 
332 	/*
333 	 * For now just print it to console.
334 	 * TODO: log this error event to FSP or nvram.
335 	 */
336 	while (local_paca->mce_info->mce_queue_count > 0) {
337 		index = local_paca->mce_info->mce_queue_count - 1;
338 		evt = &local_paca->mce_info->mce_event_queue[index];
339 
340 		if (evt->error_type == MCE_ERROR_TYPE_UE &&
341 		    evt->u.ue_error.ignore_event) {
342 			local_paca->mce_info->mce_queue_count--;
343 			continue;
344 		}
345 		machine_check_print_event_info(evt, false, false);
346 		local_paca->mce_info->mce_queue_count--;
347 	}
348 }
349 
350 void machine_check_print_event_info(struct machine_check_event *evt,
351 				    bool user_mode, bool in_guest)
352 {
353 	const char *level, *sevstr, *subtype, *err_type, *initiator;
354 	uint64_t ea = 0, pa = 0;
355 	int n = 0;
356 	char dar_str[50];
357 	char pa_str[50];
358 	static const char *mc_ue_types[] = {
359 		"Indeterminate",
360 		"Instruction fetch",
361 		"Page table walk ifetch",
362 		"Load/Store",
363 		"Page table walk Load/Store",
364 	};
365 	static const char *mc_slb_types[] = {
366 		"Indeterminate",
367 		"Parity",
368 		"Multihit",
369 	};
370 	static const char *mc_erat_types[] = {
371 		"Indeterminate",
372 		"Parity",
373 		"Multihit",
374 	};
375 	static const char *mc_tlb_types[] = {
376 		"Indeterminate",
377 		"Parity",
378 		"Multihit",
379 	};
380 	static const char *mc_user_types[] = {
381 		"Indeterminate",
382 		"tlbie(l) invalid",
383 		"scv invalid",
384 	};
385 	static const char *mc_ra_types[] = {
386 		"Indeterminate",
387 		"Instruction fetch (bad)",
388 		"Instruction fetch (foreign)",
389 		"Page table walk ifetch (bad)",
390 		"Page table walk ifetch (foreign)",
391 		"Load (bad)",
392 		"Store (bad)",
393 		"Page table walk Load/Store (bad)",
394 		"Page table walk Load/Store (foreign)",
395 		"Load/Store (foreign)",
396 	};
397 	static const char *mc_link_types[] = {
398 		"Indeterminate",
399 		"Instruction fetch (timeout)",
400 		"Page table walk ifetch (timeout)",
401 		"Load (timeout)",
402 		"Store (timeout)",
403 		"Page table walk Load/Store (timeout)",
404 	};
405 	static const char *mc_error_class[] = {
406 		"Unknown",
407 		"Hardware error",
408 		"Probable Hardware error (some chance of software cause)",
409 		"Software error",
410 		"Probable Software error (some chance of hardware cause)",
411 	};
412 
413 	/* Print things out */
414 	if (evt->version != MCE_V1) {
415 		pr_err("Machine Check Exception, Unknown event version %d !\n",
416 		       evt->version);
417 		return;
418 	}
419 	switch (evt->severity) {
420 	case MCE_SEV_NO_ERROR:
421 		level = KERN_INFO;
422 		sevstr = "Harmless";
423 		break;
424 	case MCE_SEV_WARNING:
425 		level = KERN_WARNING;
426 		sevstr = "Warning";
427 		break;
428 	case MCE_SEV_SEVERE:
429 		level = KERN_ERR;
430 		sevstr = "Severe";
431 		break;
432 	case MCE_SEV_FATAL:
433 	default:
434 		level = KERN_ERR;
435 		sevstr = "Fatal";
436 		break;
437 	}
438 
439 	switch(evt->initiator) {
440 	case MCE_INITIATOR_CPU:
441 		initiator = "CPU";
442 		break;
443 	case MCE_INITIATOR_PCI:
444 		initiator = "PCI";
445 		break;
446 	case MCE_INITIATOR_ISA:
447 		initiator = "ISA";
448 		break;
449 	case MCE_INITIATOR_MEMORY:
450 		initiator = "Memory";
451 		break;
452 	case MCE_INITIATOR_POWERMGM:
453 		initiator = "Power Management";
454 		break;
455 	case MCE_INITIATOR_UNKNOWN:
456 	default:
457 		initiator = "Unknown";
458 		break;
459 	}
460 
461 	switch (evt->error_type) {
462 	case MCE_ERROR_TYPE_UE:
463 		err_type = "UE";
464 		subtype = evt->u.ue_error.ue_error_type <
465 			ARRAY_SIZE(mc_ue_types) ?
466 			mc_ue_types[evt->u.ue_error.ue_error_type]
467 			: "Unknown";
468 		if (evt->u.ue_error.effective_address_provided)
469 			ea = evt->u.ue_error.effective_address;
470 		if (evt->u.ue_error.physical_address_provided)
471 			pa = evt->u.ue_error.physical_address;
472 		break;
473 	case MCE_ERROR_TYPE_SLB:
474 		err_type = "SLB";
475 		subtype = evt->u.slb_error.slb_error_type <
476 			ARRAY_SIZE(mc_slb_types) ?
477 			mc_slb_types[evt->u.slb_error.slb_error_type]
478 			: "Unknown";
479 		if (evt->u.slb_error.effective_address_provided)
480 			ea = evt->u.slb_error.effective_address;
481 		break;
482 	case MCE_ERROR_TYPE_ERAT:
483 		err_type = "ERAT";
484 		subtype = evt->u.erat_error.erat_error_type <
485 			ARRAY_SIZE(mc_erat_types) ?
486 			mc_erat_types[evt->u.erat_error.erat_error_type]
487 			: "Unknown";
488 		if (evt->u.erat_error.effective_address_provided)
489 			ea = evt->u.erat_error.effective_address;
490 		break;
491 	case MCE_ERROR_TYPE_TLB:
492 		err_type = "TLB";
493 		subtype = evt->u.tlb_error.tlb_error_type <
494 			ARRAY_SIZE(mc_tlb_types) ?
495 			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
496 			: "Unknown";
497 		if (evt->u.tlb_error.effective_address_provided)
498 			ea = evt->u.tlb_error.effective_address;
499 		break;
500 	case MCE_ERROR_TYPE_USER:
501 		err_type = "User";
502 		subtype = evt->u.user_error.user_error_type <
503 			ARRAY_SIZE(mc_user_types) ?
504 			mc_user_types[evt->u.user_error.user_error_type]
505 			: "Unknown";
506 		if (evt->u.user_error.effective_address_provided)
507 			ea = evt->u.user_error.effective_address;
508 		break;
509 	case MCE_ERROR_TYPE_RA:
510 		err_type = "Real address";
511 		subtype = evt->u.ra_error.ra_error_type <
512 			ARRAY_SIZE(mc_ra_types) ?
513 			mc_ra_types[evt->u.ra_error.ra_error_type]
514 			: "Unknown";
515 		if (evt->u.ra_error.effective_address_provided)
516 			ea = evt->u.ra_error.effective_address;
517 		break;
518 	case MCE_ERROR_TYPE_LINK:
519 		err_type = "Link";
520 		subtype = evt->u.link_error.link_error_type <
521 			ARRAY_SIZE(mc_link_types) ?
522 			mc_link_types[evt->u.link_error.link_error_type]
523 			: "Unknown";
524 		if (evt->u.link_error.effective_address_provided)
525 			ea = evt->u.link_error.effective_address;
526 		break;
527 	case MCE_ERROR_TYPE_DCACHE:
528 		err_type = "D-Cache";
529 		subtype = "Unknown";
530 		break;
531 	case MCE_ERROR_TYPE_ICACHE:
532 		err_type = "I-Cache";
533 		subtype = "Unknown";
534 		break;
535 	default:
536 	case MCE_ERROR_TYPE_UNKNOWN:
537 		err_type = "Unknown";
538 		subtype = "";
539 		break;
540 	}
541 
542 	dar_str[0] = pa_str[0] = '\0';
543 	if (ea && evt->srr0 != ea) {
544 		/* Load/Store address */
545 		n = sprintf(dar_str, "DAR: %016llx ", ea);
546 		if (pa)
547 			sprintf(dar_str + n, "paddr: %016llx ", pa);
548 	} else if (pa) {
549 		sprintf(pa_str, " paddr: %016llx", pa);
550 	}
551 
552 	printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
553 		level, evt->cpu, sevstr, in_guest ? "Guest" : "",
554 		err_type, subtype, dar_str,
555 		evt->disposition == MCE_DISPOSITION_RECOVERED ?
556 		"Recovered" : "Not recovered");
557 
558 	if (in_guest || user_mode) {
559 		printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
560 			level, evt->cpu, current->pid, current->comm,
561 			in_guest ? "Guest " : "", evt->srr0, pa_str);
562 	} else {
563 		printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
564 			level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
565 	}
566 
567 	printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
568 
569 	subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
570 		mc_error_class[evt->error_class] : "Unknown";
571 	printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
572 
573 #ifdef CONFIG_PPC_BOOK3S_64
574 	/* Display faulty slb contents for SLB errors. */
575 	if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
576 		slb_dump_contents(local_paca->mce_faulty_slbs);
577 #endif
578 }
579 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
580 
581 /*
582  * This function is called in real mode. Strictly no printk's please.
583  *
584  * regs->nip and regs->msr contains srr0 and ssr1.
585  */
586 long notrace machine_check_early(struct pt_regs *regs)
587 {
588 	long handled = 0;
589 	u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
590 
591 	this_cpu_set_ftrace_enabled(0);
592 	/* Do not use nmi_enter/exit for pseries hpte guest */
593 	if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
594 		nmi_enter();
595 
596 	hv_nmi_check_nonrecoverable(regs);
597 
598 	/*
599 	 * See if platform is capable of handling machine check.
600 	 */
601 	if (ppc_md.machine_check_early)
602 		handled = ppc_md.machine_check_early(regs);
603 
604 	if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
605 		nmi_exit();
606 
607 	this_cpu_set_ftrace_enabled(ftrace_enabled);
608 
609 	return handled;
610 }
611 
612 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
613 static enum {
614 	DTRIG_UNKNOWN,
615 	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
616 	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
617 } hmer_debug_trig_function;
618 
619 static int init_debug_trig_function(void)
620 {
621 	int pvr;
622 	struct device_node *cpun;
623 	struct property *prop = NULL;
624 	const char *str;
625 
626 	/* First look in the device tree */
627 	preempt_disable();
628 	cpun = of_get_cpu_node(smp_processor_id(), NULL);
629 	if (cpun) {
630 		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
631 					    prop, str) {
632 			if (strcmp(str, "bit17-vector-ci-load") == 0)
633 				hmer_debug_trig_function = DTRIG_VECTOR_CI;
634 			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
635 				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
636 		}
637 		of_node_put(cpun);
638 	}
639 	preempt_enable();
640 
641 	/* If we found the property, don't look at PVR */
642 	if (prop)
643 		goto out;
644 
645 	pvr = mfspr(SPRN_PVR);
646 	/* Check for POWER9 Nimbus (scale-out) */
647 	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
648 		/* DD2.2 and later */
649 		if ((pvr & 0xfff) >= 0x202)
650 			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
651 		/* DD2.0 and DD2.1 - used for vector CI load emulation */
652 		else if ((pvr & 0xfff) >= 0x200)
653 			hmer_debug_trig_function = DTRIG_VECTOR_CI;
654 	}
655 
656  out:
657 	switch (hmer_debug_trig_function) {
658 	case DTRIG_VECTOR_CI:
659 		pr_debug("HMI debug trigger used for vector CI load\n");
660 		break;
661 	case DTRIG_SUSPEND_ESCAPE:
662 		pr_debug("HMI debug trigger used for TM suspend escape\n");
663 		break;
664 	default:
665 		break;
666 	}
667 	return 0;
668 }
669 __initcall(init_debug_trig_function);
670 
671 /*
672  * Handle HMIs that occur as a result of a debug trigger.
673  * Return values:
674  * -1 means this is not a HMI cause that we know about
675  *  0 means no further handling is required
676  *  1 means further handling is required
677  */
678 long hmi_handle_debugtrig(struct pt_regs *regs)
679 {
680 	unsigned long hmer = mfspr(SPRN_HMER);
681 	long ret = 0;
682 
683 	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
684 	if (!((hmer & HMER_DEBUG_TRIG)
685 	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
686 		return -1;
687 
688 	hmer &= ~HMER_DEBUG_TRIG;
689 	/* HMER is a write-AND register */
690 	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
691 
692 	switch (hmer_debug_trig_function) {
693 	case DTRIG_VECTOR_CI:
694 		/*
695 		 * Now to avoid problems with soft-disable we
696 		 * only do the emulation if we are coming from
697 		 * host user space
698 		 */
699 		if (regs && user_mode(regs))
700 			ret = local_paca->hmi_p9_special_emu = 1;
701 
702 		break;
703 
704 	default:
705 		break;
706 	}
707 
708 	/*
709 	 * See if any other HMI causes remain to be handled
710 	 */
711 	if (hmer & mfspr(SPRN_HMEER))
712 		return -1;
713 
714 	return ret;
715 }
716 
717 /*
718  * Return values:
719  */
720 long hmi_exception_realmode(struct pt_regs *regs)
721 {
722 	int ret;
723 
724 	local_paca->hmi_irqs++;
725 
726 	ret = hmi_handle_debugtrig(regs);
727 	if (ret >= 0)
728 		return ret;
729 
730 	wait_for_subcore_guest_exit();
731 
732 	if (ppc_md.hmi_exception_early)
733 		ppc_md.hmi_exception_early(regs);
734 
735 	wait_for_tb_resync();
736 
737 	return 1;
738 }
739 
740 void __init mce_init(void)
741 {
742 	struct mce_info *mce_info;
743 	u64 limit;
744 	int i;
745 
746 	limit = min(ppc64_bolted_size(), ppc64_rma_size);
747 	for_each_possible_cpu(i) {
748 		mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
749 						  __alignof__(*mce_info),
750 						  MEMBLOCK_LOW_LIMIT,
751 						  limit, cpu_to_node(i));
752 		if (!mce_info)
753 			goto err;
754 		paca_ptrs[i]->mce_info = mce_info;
755 	}
756 	return;
757 err:
758 	panic("Failed to allocate memory for MCE event data\n");
759 }
760