xref: /titanic_51/usr/src/uts/sun4v/os/error.c (revision 07d06da50d310a325b457d6330165aebab1e0064)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/machsystm.h>
28 #include <sys/sysmacros.h>
29 #include <sys/cpuvar.h>
30 #include <sys/async.h>
31 #include <sys/ontrap.h>
32 #include <sys/ddifm.h>
33 #include <sys/hypervisor_api.h>
34 #include <sys/errorq.h>
35 #include <sys/promif.h>
36 #include <sys/prom_plat.h>
37 #include <sys/x_call.h>
38 #include <sys/error.h>
39 #include <sys/fm/util.h>
40 #include <sys/ivintr.h>
41 #include <sys/archsystm.h>
42 
43 #define	MAX_CE_FLTS		10
44 #define	MAX_ASYNC_FLTS		6
45 
46 errorq_t *ue_queue;			/* queue of uncorrectable errors */
47 errorq_t *ce_queue;			/* queue of correctable errors */
48 
49 /*
50  * Being used by memory test driver.
51  * ce_verbose_memory - covers CEs in DIMMs
52  * ce_verbose_other - covers "others" (ecache, IO, etc.)
53  *
54  * If the value is 0, nothing is logged.
55  * If the value is 1, the error is logged to the log file, but not console.
56  * If the value is 2, the error is logged to the log file and console.
57  */
58 int	ce_verbose_memory = 1;
59 int	ce_verbose_other = 1;
60 
61 int	ce_show_data = 0;
62 int	ce_debug = 0;
63 int	ue_debug = 0;
64 int	reset_debug = 0;
65 
66 /*
67  * Tunables for controlling the handling of asynchronous faults (AFTs). Setting
68  * these to non-default values on a non-DEBUG kernel is NOT supported.
69  */
70 int	aft_verbose = 0;	/* log AFT messages > 1 to log only */
71 int	aft_panic = 0;		/* panic (not reboot) on fatal usermode AFLT */
72 int	aft_testfatal = 0;	/* force all AFTs to panic immediately */
73 
74 /*
75  * Used for vbsc hostshutdown (power-off button)
76  */
77 int	err_shutdown_triggered = 0;	/* only once */
78 uint64_t err_shutdown_inum = 0;	/* used to pull the trigger */
79 
80 /*
81  * Used to print NRE/RE via system variable or kmdb
82  */
83 int		printerrh = 0;		/* see /etc/system */
84 static void	errh_er_print(errh_er_t *, const char *);
85 kmutex_t	errh_print_lock;
86 
87 /*
88  * Defined in bus_func.c but initialised in error_init
89  */
90 extern kmutex_t bfd_lock;
91 
92 static uint32_t rq_overflow_count = 0;		/* counter for rq overflow */
93 
94 static void cpu_queue_one_event(errh_async_flt_t *);
95 static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t);
96 static void errh_page_retire(errh_async_flt_t *, uchar_t);
97 static int errh_error_protected(struct regs *, struct async_flt *, int *);
98 static void errh_rq_full(struct async_flt *);
99 static void ue_drain(void *, struct async_flt *, errorq_elem_t *);
100 static void ce_drain(void *, struct async_flt *, errorq_elem_t *);
101 static void errh_handle_attr(errh_async_flt_t *);
102 static void errh_handle_asr(errh_async_flt_t *);
103 static void errh_handle_sp(errh_async_flt_t *);
104 static void sp_ereport_post(uint8_t);
105 
106 /*ARGSUSED*/
107 void
108 process_resumable_error(struct regs *rp, uint32_t head_offset,
109     uint32_t tail_offset)
110 {
111 	struct machcpu *mcpup;
112 	struct async_flt *aflt;
113 	errh_async_flt_t errh_flt;
114 	errh_er_t *head_va;
115 
116 	mcpup = &(CPU->cpu_m);
117 
118 	while (head_offset != tail_offset) {
119 		/* kernel buffer starts right after the resumable queue */
120 		head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset +
121 		    CPU_RQ_SIZE);
122 		/* Copy the error report to local buffer */
123 		bzero(&errh_flt, sizeof (errh_async_flt_t));
124 		bcopy((char *)head_va, &(errh_flt.errh_er),
125 		    sizeof (errh_er_t));
126 
127 		mcpup->cpu_rq_lastre = head_va;
128 		if (printerrh)
129 			errh_er_print(&errh_flt.errh_er, "RQ");
130 
131 		/* Increment the queue head */
132 		head_offset += Q_ENTRY_SIZE;
133 		/* Wrap around */
134 		head_offset &= (CPU_RQ_SIZE - 1);
135 
136 		/* set error handle to zero so it can hold new error report */
137 		head_va->ehdl = 0;
138 
139 		switch (errh_flt.errh_er.desc) {
140 		case ERRH_DESC_UCOR_RE:
141 			/*
142 			 * Check error attribute, handle individual error
143 			 * if it is needed.
144 			 */
145 			errh_handle_attr(&errh_flt);
146 			break;
147 
148 		case ERRH_DESC_WARN_RE:
149 			/*
150 			 * Power-off requested, but handle it one time only.
151 			 */
152 			if (!err_shutdown_triggered) {
153 				setsoftint(err_shutdown_inum);
154 				++err_shutdown_triggered;
155 			}
156 			continue;
157 
158 		case ERRH_DESC_SP:
159 			/*
160 			 * The state of the SP has changed.
161 			 */
162 			errh_handle_sp(&errh_flt);
163 			continue;
164 
165 		default:
166 			cmn_err(CE_WARN, "Error Descriptor 0x%llx "
167 			    " invalid in resumable error handler",
168 			    (long long) errh_flt.errh_er.desc);
169 			continue;
170 		}
171 
172 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
173 		aflt->flt_id = gethrtime();
174 		aflt->flt_bus_id = getprocessorid();
175 		aflt->flt_class = CPU_FAULT;
176 		aflt->flt_prot = AFLT_PROT_NONE;
177 		aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK)
178 		    >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV);
179 
180 		if (errh_flt.errh_er.attr & ERRH_ATTR_CPU)
181 			/* If it is an error on other cpu */
182 			aflt->flt_panic = 1;
183 		else
184 			aflt->flt_panic = 0;
185 
186 		/*
187 		 * Handle resumable queue full case.
188 		 */
189 		if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) {
190 			(void) errh_rq_full(aflt);
191 		}
192 
193 		/*
194 		 * Queue the error on ce or ue queue depend on flt_panic.
195 		 * Even if flt_panic is set, the code still keep processing
196 		 * the rest element on rq until the panic starts.
197 		 */
198 		(void) cpu_queue_one_event(&errh_flt);
199 
200 		/*
201 		 * Panic here if aflt->flt_panic has been set.
202 		 * Enqueued errors will be logged as part of the panic flow.
203 		 */
204 		if (aflt->flt_panic) {
205 			fm_panic("Unrecoverable error on another CPU");
206 		}
207 	}
208 }
209 
210 void
211 process_nonresumable_error(struct regs *rp, uint64_t flags,
212     uint32_t head_offset, uint32_t tail_offset)
213 {
214 	struct machcpu *mcpup;
215 	struct async_flt *aflt;
216 	errh_async_flt_t errh_flt;
217 	errh_er_t *head_va;
218 	int trampolined = 0;
219 	int expected = DDI_FM_ERR_UNEXPECTED;
220 	uint64_t exec_mode;
221 	uint8_t u_spill_fill;
222 
223 	mcpup = &(CPU->cpu_m);
224 
225 	while (head_offset != tail_offset) {
226 		/* kernel buffer starts right after the nonresumable queue */
227 		head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset +
228 		    CPU_NRQ_SIZE);
229 
230 		/* Copy the error report to local buffer */
231 		bzero(&errh_flt, sizeof (errh_async_flt_t));
232 
233 		bcopy((char *)head_va, &(errh_flt.errh_er),
234 		    sizeof (errh_er_t));
235 
236 		mcpup->cpu_nrq_lastnre = head_va;
237 		if (printerrh)
238 			errh_er_print(&errh_flt.errh_er, "NRQ");
239 
240 		/* Increment the queue head */
241 		head_offset += Q_ENTRY_SIZE;
242 		/* Wrap around */
243 		head_offset &= (CPU_NRQ_SIZE - 1);
244 
245 		/* set error handle to zero so it can hold new error report */
246 		head_va->ehdl = 0;
247 
248 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
249 
250 		trampolined = 0;
251 
252 		if (errh_flt.errh_er.attr & ERRH_ATTR_PIO)
253 			aflt->flt_class = BUS_FAULT;
254 		else
255 			aflt->flt_class = CPU_FAULT;
256 
257 		aflt->flt_id = gethrtime();
258 		aflt->flt_bus_id = getprocessorid();
259 		aflt->flt_pc = (caddr_t)rp->r_pc;
260 		exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK)
261 		    >> ERRH_MODE_SHIFT;
262 		aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV ||
263 		    exec_mode == ERRH_MODE_UNKNOWN);
264 		aflt->flt_prot = AFLT_PROT_NONE;
265 		aflt->flt_tl = (uchar_t)(flags & ERRH_TL_MASK);
266 		aflt->flt_panic = ((aflt->flt_tl != 0) ||
267 		    (aft_testfatal != 0));
268 
269 		/*
270 		 * For the first error packet on the queue, check if it
271 		 * happened in user fill/spill trap.
272 		 */
273 		if (flags & ERRH_U_SPILL_FILL) {
274 			u_spill_fill = 1;
275 			/* clear the user fill/spill flag in flags */
276 			flags = (uint64_t)aflt->flt_tl;
277 		} else
278 			u_spill_fill = 0;
279 
280 		switch (errh_flt.errh_er.desc) {
281 		case ERRH_DESC_PR_NRE:
282 			if (u_spill_fill) {
283 				aflt->flt_panic = 0;
284 				break;
285 			}
286 			/*
287 			 * Fall through, precise fault also need to check
288 			 * to see if it was protected.
289 			 */
290 			/*FALLTHRU*/
291 
292 		case ERRH_DESC_DEF_NRE:
293 			/*
294 			 * If the trap occurred in privileged mode at TL=0,
295 			 * we need to check to see if we were executing
296 			 * in kernel under on_trap() or t_lofault
297 			 * protection. If so, and if it was a PIO or MEM
298 			 * error, then modify the saved registers so that
299 			 * we return from the trap to the appropriate
300 			 * trampoline routine.
301 			 */
302 			if (aflt->flt_priv == 1 && aflt->flt_tl == 0 &&
303 			    ((errh_flt.errh_er.attr & ERRH_ATTR_PIO) ||
304 			    (errh_flt.errh_er.attr & ERRH_ATTR_MEM))) {
305 				trampolined =
306 				    errh_error_protected(rp, aflt, &expected);
307 			}
308 
309 			if (!aflt->flt_priv || aflt->flt_prot ==
310 			    AFLT_PROT_COPY) {
311 				aflt->flt_panic |= aft_panic;
312 			} else if (!trampolined &&
313 			    (aflt->flt_class != BUS_FAULT)) {
314 				aflt->flt_panic = 1;
315 			}
316 
317 			/*
318 			 * Check error attribute, handle individual error
319 			 * if it is needed.
320 			 */
321 			errh_handle_attr(&errh_flt);
322 
323 			/*
324 			 * If PIO error, we need to query the bus nexus
325 			 * for fatal errors.
326 			 */
327 			if (aflt->flt_class == BUS_FAULT) {
328 				aflt->flt_addr = errh_flt.errh_er.ra;
329 				errh_cpu_run_bus_error_handlers(aflt,
330 				    expected);
331 			}
332 
333 			break;
334 
335 		case ERRH_DESC_USER_DCORE:
336 			/*
337 			 * User generated panic. Call panic directly
338 			 * since there are no FMA e-reports to
339 			 * display.
340 			 */
341 
342 			panic("Panic - Generated at user request");
343 
344 			break;
345 
346 		default:
347 			cmn_err(CE_WARN, "Panic - Error Descriptor 0x%llx "
348 			    " invalid in non-resumable error handler",
349 			    (long long) errh_flt.errh_er.desc);
350 			aflt->flt_panic = 1;
351 			break;
352 		}
353 
354 		/*
355 		 * Queue the error report for further processing. If
356 		 * flt_panic is set, code still process other errors
357 		 * in the queue until the panic routine stops the
358 		 * kernel.
359 		 */
360 		(void) cpu_queue_one_event(&errh_flt);
361 
362 		/*
363 		 * Panic here if aflt->flt_panic has been set.
364 		 * Enqueued errors will be logged as part of the panic flow.
365 		 */
366 		if (aflt->flt_panic) {
367 			fm_panic("Unrecoverable hardware error");
368 		}
369 
370 		/*
371 		 * Call page_retire() to handle memory errors.
372 		 */
373 		if (errh_flt.errh_er.attr & ERRH_ATTR_MEM)
374 			errh_page_retire(&errh_flt, PR_UE);
375 
376 		/*
377 		 * If we queued an error and the it was in user mode, or
378 		 * protected by t_lofault, or user_spill_fill is set, we
379 		 * set AST flag so the queue will be drained before
380 		 * returning to user mode.
381 		 */
382 		if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY ||
383 		    u_spill_fill) {
384 			int pcb_flag = 0;
385 
386 			if (aflt->flt_class == CPU_FAULT)
387 				pcb_flag |= ASYNC_HWERR;
388 			else if (aflt->flt_class == BUS_FAULT)
389 				pcb_flag |= ASYNC_BERR;
390 
391 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
392 			aston(curthread);
393 		}
394 	}
395 }
396 
397 /*
398  * For PIO errors, this routine calls nexus driver's error
399  * callback routines. If the callback routine returns fatal, and
400  * we are in kernel or unknow mode without any error protection,
401  * we need to turn on the panic flag.
402  */
403 void
404 errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
405 {
406 	int status;
407 	ddi_fm_error_t de;
408 
409 	bzero(&de, sizeof (ddi_fm_error_t));
410 
411 	de.fme_version = DDI_FME_VERSION;
412 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
413 	de.fme_flag = expected;
414 	de.fme_bus_specific = (void *)aflt->flt_addr;
415 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
416 
417 	/*
418 	 * If error is protected, it will jump to proper routine
419 	 * to handle the handle; if it is in user level, we just
420 	 * kill the user process; if the driver thinks the error is
421 	 * not fatal, we can drive on. If none of above are true,
422 	 * we panic
423 	 */
424 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) &&
425 	    (status == DDI_FM_FATAL))
426 		aflt->flt_panic = 1;
427 }
428 
429 /*
430  * This routine checks to see if we are under any error protection when
431  * the error happens. If we are under error protection, we unwind to
432  * the protection and indicate fault.
433  */
434 static int
435 errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected)
436 {
437 	int trampolined = 0;
438 	ddi_acc_hdl_t *hp;
439 
440 	if (curthread->t_ontrap != NULL) {
441 		on_trap_data_t *otp = curthread->t_ontrap;
442 
443 		if (otp->ot_prot & OT_DATA_EC) {
444 			aflt->flt_prot = AFLT_PROT_EC;
445 			otp->ot_trap |= OT_DATA_EC;
446 			rp->r_pc = otp->ot_trampoline;
447 			rp->r_npc = rp->r_pc +4;
448 			trampolined = 1;
449 		}
450 
451 		if (otp->ot_prot & OT_DATA_ACCESS) {
452 			aflt->flt_prot = AFLT_PROT_ACCESS;
453 			otp->ot_trap |= OT_DATA_ACCESS;
454 			rp->r_pc = otp->ot_trampoline;
455 			rp->r_npc = rp->r_pc + 4;
456 			trampolined = 1;
457 			/*
458 			 * for peek and caut_gets
459 			 * errors are expected
460 			 */
461 			hp = (ddi_acc_hdl_t *)otp->ot_handle;
462 			if (!hp)
463 				*expected = DDI_FM_ERR_PEEK;
464 			else if (hp->ah_acc.devacc_attr_access ==
465 			    DDI_CAUTIOUS_ACC)
466 				*expected = DDI_FM_ERR_EXPECTED;
467 		}
468 	} else if (curthread->t_lofault) {
469 		aflt->flt_prot = AFLT_PROT_COPY;
470 		rp->r_g1 = EFAULT;
471 		rp->r_pc = curthread->t_lofault;
472 		rp->r_npc = rp->r_pc + 4;
473 		trampolined = 1;
474 	}
475 
476 	return (trampolined);
477 }
478 
479 /*
480  * Queue one event.
481  */
482 static void
483 cpu_queue_one_event(errh_async_flt_t *errh_fltp)
484 {
485 	struct async_flt *aflt = (struct async_flt *)errh_fltp;
486 	errorq_t *eqp;
487 
488 	if (aflt->flt_panic)
489 		eqp = ue_queue;
490 	else
491 		eqp = ce_queue;
492 
493 	errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t),
494 	    aflt->flt_panic);
495 }
496 
497 /*
498  * The cpu_async_log_err() function is called by the ce/ue_drain() function to
499  * handle logging for CPU events that are dequeued.  As such, it can be invoked
500  * from softint context, from AST processing in the trap() flow, or from the
501  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
502  */
503 void
504 cpu_async_log_err(void *flt)
505 {
506 	errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt;
507 	errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er;
508 
509 	switch (errh_erp->desc) {
510 	case ERRH_DESC_UCOR_RE:
511 		if (errh_erp->attr & ERRH_ATTR_MEM) {
512 			/*
513 			 * Turn on the PR_UE flag. The page will be
514 			 * scrubbed when it is freed.
515 			 */
516 			errh_page_retire(errh_fltp, PR_UE);
517 		}
518 
519 		break;
520 
521 	case ERRH_DESC_PR_NRE:
522 	case ERRH_DESC_DEF_NRE:
523 		if (errh_erp->attr & ERRH_ATTR_MEM) {
524 			/*
525 			 * For non-resumable memory error, retire
526 			 * the page here.
527 			 */
528 			errh_page_retire(errh_fltp, PR_UE);
529 
530 			/*
531 			 * If we are going to panic, scrub the page first
532 			 */
533 			if (errh_fltp->cmn_asyncflt.flt_panic)
534 				mem_scrub(errh_fltp->errh_er.ra,
535 				    errh_fltp->errh_er.sz);
536 		}
537 		break;
538 
539 	default:
540 		break;
541 	}
542 }
543 
544 /*
545  * Called from ce_drain().
546  */
547 void
548 cpu_ce_log_err(struct async_flt *aflt)
549 {
550 	switch (aflt->flt_class) {
551 	case CPU_FAULT:
552 		cpu_async_log_err(aflt);
553 		break;
554 
555 	case BUS_FAULT:
556 		cpu_async_log_err(aflt);
557 		break;
558 
559 	default:
560 		break;
561 	}
562 }
563 
564 /*
565  * Called from ue_drain().
566  */
567 void
568 cpu_ue_log_err(struct async_flt *aflt)
569 {
570 	switch (aflt->flt_class) {
571 	case CPU_FAULT:
572 		cpu_async_log_err(aflt);
573 		break;
574 
575 	case BUS_FAULT:
576 		cpu_async_log_err(aflt);
577 		break;
578 
579 	default:
580 		break;
581 	}
582 }
583 
584 /*
585  * Turn on flag on the error memory region.
586  */
587 static void
588 errh_page_retire(errh_async_flt_t *errh_fltp, uchar_t flag)
589 {
590 	uint64_t flt_real_addr_start = errh_fltp->errh_er.ra;
591 	uint64_t flt_real_addr_end = flt_real_addr_start +
592 	    errh_fltp->errh_er.sz - 1;
593 	int64_t current_addr;
594 
595 	if (errh_fltp->errh_er.sz == 0)
596 		return;
597 
598 	for (current_addr = flt_real_addr_start;
599 	    current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) {
600 		(void) page_retire(current_addr, flag);
601 	}
602 }
603 
604 void
605 mem_scrub(uint64_t paddr, uint64_t len)
606 {
607 	uint64_t pa, length, scrubbed_len;
608 
609 	pa = paddr;
610 	length = len;
611 	scrubbed_len = 0;
612 
613 	while (length > 0) {
614 		if (hv_mem_scrub(pa, length, &scrubbed_len) != H_EOK)
615 			break;
616 
617 		pa += scrubbed_len;
618 		length -= scrubbed_len;
619 	}
620 }
621 
622 /*
623  * Call hypervisor to flush the memory region.
624  * Both va and len must be MMU_PAGESIZE aligned.
625  * Returns the total number of bytes flushed.
626  */
627 uint64_t
628 mem_sync(caddr_t orig_va, size_t orig_len)
629 {
630 	uint64_t pa, length, flushed;
631 	uint64_t chunk_len = MMU_PAGESIZE;
632 	uint64_t total_flushed = 0;
633 	uint64_t va, len;
634 
635 	if (orig_len == 0)
636 		return (total_flushed);
637 
638 	/* align va */
639 	va = P2ALIGN_TYPED(orig_va, MMU_PAGESIZE, uint64_t);
640 	/* round up len to MMU_PAGESIZE aligned */
641 	len = P2ROUNDUP_TYPED(orig_va + orig_len, MMU_PAGESIZE, uint64_t) - va;
642 
643 	while (len > 0) {
644 		pa = va_to_pa((caddr_t)va);
645 		if (pa == (uint64_t)-1)
646 			return (total_flushed);
647 
648 		length = chunk_len;
649 		flushed = 0;
650 
651 		while (length > 0) {
652 			if (hv_mem_sync(pa, length, &flushed) != H_EOK)
653 				return (total_flushed);
654 
655 			pa += flushed;
656 			length -= flushed;
657 			total_flushed += flushed;
658 		}
659 
660 		va += chunk_len;
661 		len -= chunk_len;
662 	}
663 
664 	return (total_flushed);
665 }
666 
667 /*
668  * If resumable queue is full, we need to check if any cpu is in
669  * error state. If not, we drive on. If yes, we need to panic. The
670  * hypervisor call hv_cpu_state() is being used for checking the
671  * cpu state.  And reset %tick_compr in case tick-compare was lost.
672  */
673 static void
674 errh_rq_full(struct async_flt *afltp)
675 {
676 	processorid_t who;
677 	uint64_t cpu_state;
678 	uint64_t retval;
679 	uint64_t current_tick;
680 
681 	current_tick = (uint64_t)gettick();
682 	tickcmpr_set(current_tick);
683 
684 	for (who = 0; who < NCPU; who++)
685 		if (CPU_IN_SET(cpu_ready_set, who)) {
686 			retval = hv_cpu_state(who, &cpu_state);
687 			if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) {
688 				afltp->flt_panic = 1;
689 				break;
690 			}
691 		}
692 }
693 
694 /*
695  * Return processor specific async error structure
696  * size used.
697  */
698 int
699 cpu_aflt_size(void)
700 {
701 	return (sizeof (errh_async_flt_t));
702 }
703 
704 #define	SZ_TO_ETRS_SHIFT	6
705 
706 /*
707  * Message print out when resumable queue is overflown
708  */
709 /*ARGSUSED*/
710 void
711 rq_overflow(struct regs *rp, uint64_t head_offset,
712     uint64_t tail_offset)
713 {
714 	rq_overflow_count++;
715 }
716 
717 /*
718  * Handler to process a fatal error.  This routine can be called from a
719  * softint, called from trap()'s AST handling, or called from the panic flow.
720  */
721 /*ARGSUSED*/
722 static void
723 ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
724 {
725 	cpu_ue_log_err(aflt);
726 }
727 
728 /*
729  * Handler to process a correctable error.  This routine can be called from a
730  * softint.  We just call the CPU module's logging routine.
731  */
732 /*ARGSUSED*/
733 static void
734 ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
735 {
736 	cpu_ce_log_err(aflt);
737 }
738 
739 /*
740  * Handler to process vbsc hostshutdown (power-off button).
741  */
742 static int
743 err_shutdown_softintr()
744 {
745 	cmn_err(CE_WARN, "Power-off requested, system will now shutdown.");
746 	do_shutdown();
747 
748 	/*
749 	 * just in case do_shutdown() fails
750 	 */
751 	(void) timeout((void(*)(void *))power_down, NULL, 100 * hz);
752 	return (DDI_INTR_CLAIMED);
753 }
754 
755 /*
756  * Allocate error queue sizes based on max_ncpus.  max_ncpus is set just
757  * after ncpunode has been determined.  ncpus is set in start_other_cpus
758  * which is called after error_init() but may change dynamically.
759  */
760 void
761 error_init(void)
762 {
763 	char tmp_name[MAXSYSNAME];
764 	pnode_t node;
765 	size_t size = cpu_aflt_size();
766 
767 	/*
768 	 * Initialize the correctable and uncorrectable error queues.
769 	 */
770 	ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL,
771 	    MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL);
772 
773 	ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL,
774 	    MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0);
775 
776 	if (ue_queue == NULL || ce_queue == NULL)
777 		panic("failed to create required system error queue");
778 
779 	/*
780 	 * Setup interrupt handler for power-off button.
781 	 */
782 	err_shutdown_inum = add_softintr(PIL_9,
783 	    (softintrfunc)err_shutdown_softintr, NULL, SOFTINT_ST);
784 
785 	/*
786 	 * Initialize the busfunc list mutex.  This must be a PIL_15 spin lock
787 	 * because we will need to acquire it from cpu_async_error().
788 	 */
789 	mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
790 
791 	/* Only allow one cpu at a time to dump errh errors. */
792 	mutex_init(&errh_print_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
793 
794 	node = prom_rootnode();
795 	if ((node == OBP_NONODE) || (node == OBP_BADNODE)) {
796 		cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node);
797 		return;
798 	}
799 
800 	if (((size = prom_getproplen(node, "reset-reason")) != -1) &&
801 	    (size <= MAXSYSNAME) &&
802 	    (prom_getprop(node, "reset-reason", tmp_name) != -1)) {
803 		if (reset_debug) {
804 			cmn_err(CE_CONT, "System booting after %s\n", tmp_name);
805 		} else if (strncmp(tmp_name, "FATAL", 5) == 0) {
806 			cmn_err(CE_CONT,
807 			    "System booting after fatal error %s\n", tmp_name);
808 		}
809 	}
810 }
811 
812 /*
813  * Nonresumable queue is full, panic here
814  */
815 /*ARGSUSED*/
816 void
817 nrq_overflow(struct regs *rp)
818 {
819 	fm_panic("Nonresumable queue full");
820 }
821 
822 /*
823  * This is the place for special error handling for individual errors.
824  */
825 static void
826 errh_handle_attr(errh_async_flt_t *errh_fltp)
827 {
828 	switch (errh_fltp->errh_er.attr & ~ERRH_MODE_MASK) {
829 	case ERRH_ATTR_CPU:
830 	case ERRH_ATTR_MEM:
831 	case ERRH_ATTR_PIO:
832 	case ERRH_ATTR_IRF:
833 	case ERRH_ATTR_FRF:
834 	case ERRH_ATTR_SHUT:
835 		break;
836 
837 	case ERRH_ATTR_ASR:
838 		errh_handle_asr(errh_fltp);
839 		break;
840 
841 	case ERRH_ATTR_ASI:
842 	case ERRH_ATTR_PREG:
843 	case ERRH_ATTR_RQF:
844 		break;
845 
846 	default:
847 		break;
848 	}
849 }
850 
851 /*
852  * Handle ASR bit set in ATTR
853  */
854 static void
855 errh_handle_asr(errh_async_flt_t *errh_fltp)
856 {
857 	uint64_t current_tick;
858 
859 	switch (errh_fltp->errh_er.reg) {
860 	case ASR_REG_VALID | ASR_REG_TICK:
861 		/*
862 		 * For Tick Compare Register error, it only happens when
863 		 * the register is being read or compared with the %tick
864 		 * register. Since we lost the contents of the register,
865 		 * we set the %tick_compr in the future. An interrupt will
866 		 * happen when %tick matches the value field of %tick_compr.
867 		 */
868 		current_tick = (uint64_t)gettick();
869 		tickcmpr_set(current_tick);
870 		/* Do not panic */
871 		errh_fltp->cmn_asyncflt.flt_panic = 0;
872 		break;
873 
874 	default:
875 		break;
876 	}
877 }
878 
879 /*
880  * Handle a SP state change.
881  */
882 static void
883 errh_handle_sp(errh_async_flt_t *errh_fltp)
884 {
885 	uint8_t		sp_state;
886 
887 	sp_state = (errh_fltp->errh_er.attr & ERRH_SP_MASK) >> ERRH_SP_SHIFT;
888 
889 	/*
890 	 * Only the SP is unavailable state change is currently valid.
891 	 */
892 	if (sp_state == ERRH_SP_UNAVAILABLE) {
893 		sp_ereport_post(sp_state);
894 	} else {
895 		cmn_err(CE_WARN, "Invalid SP state 0x%x in SP state change "
896 		    "handler.\n", sp_state);
897 	}
898 }
899 
900 /*
901  * Dump the error packet
902  */
903 /*ARGSUSED*/
904 static void
905 errh_er_print(errh_er_t *errh_erp, const char *queue)
906 {
907 	typedef union {
908 		uint64_t w;
909 		uint16_t s[4];
910 	} errhp_t;
911 	errhp_t *p = (errhp_t *)errh_erp;
912 	int i;
913 
914 	mutex_enter(&errh_print_lock);
915 	switch (errh_erp->desc) {
916 	case ERRH_DESC_UCOR_RE:
917 		cmn_err(CE_CONT, "\nResumable Uncorrectable Error ");
918 		break;
919 	case ERRH_DESC_PR_NRE:
920 		cmn_err(CE_CONT, "\nNonresumable Precise Error ");
921 		break;
922 	case ERRH_DESC_DEF_NRE:
923 		cmn_err(CE_CONT, "\nNonresumable Deferred Error ");
924 		break;
925 	default:
926 		cmn_err(CE_CONT, "\nError packet ");
927 		break;
928 	}
929 	cmn_err(CE_CONT, "received on %s\n", queue);
930 
931 	/*
932 	 * Print Q_ENTRY_SIZE bytes of epacket with 8 bytes per line
933 	 */
934 	for (i = Q_ENTRY_SIZE; i > 0; i -= 8, ++p) {
935 		cmn_err(CE_CONT, "%016lx: %04x %04x %04x %04x\n", (uint64_t)p,
936 		    p->s[0], p->s[1], p->s[2], p->s[3]);
937 	}
938 	mutex_exit(&errh_print_lock);
939 }
940 
941 static void
942 sp_ereport_post(uint8_t sp_state)
943 {
944 	nvlist_t	*ereport, *detector;
945 
946 	/*
947 	 * Currently an ereport is only sent when the state of the SP
948 	 * changes to unavailable.
949 	 */
950 	ASSERT(sp_state == ERRH_SP_UNAVAILABLE);
951 
952 	ereport = fm_nvlist_create(NULL);
953 	detector = fm_nvlist_create(NULL);
954 
955 	/*
956 	 * Create an HC-scheme detector FMRI.
957 	 */
958 	fm_fmri_hc_set(detector, FM_HC_SCHEME_VERSION, NULL, NULL, 1,
959 	    "chassis", 0);
960 
961 	fm_ereport_set(ereport, FM_EREPORT_VERSION, "chassis.sp.unavailable",
962 	    fm_ena_generate(0, FM_ENA_FMT1), detector, NULL);
963 
964 	(void) fm_ereport_post(ereport, EVCH_TRYHARD);
965 
966 	fm_nvlist_destroy(ereport, FM_NVA_FREE);
967 	fm_nvlist_destroy(detector, FM_NVA_FREE);
968 }
969