xref: /illumos-gate/usr/src/uts/sun4v/os/error.c (revision 6a604193b70017bd933cd973200b3f13803674b2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/machsystm.h>
28 #include <sys/sysmacros.h>
29 #include <sys/cpuvar.h>
30 #include <sys/async.h>
31 #include <sys/ontrap.h>
32 #include <sys/ddifm.h>
33 #include <sys/hypervisor_api.h>
34 #include <sys/errorq.h>
35 #include <sys/promif.h>
36 #include <sys/prom_plat.h>
37 #include <sys/x_call.h>
38 #include <sys/error.h>
39 #include <sys/fm/util.h>
40 #include <sys/ivintr.h>
41 #include <sys/machasi.h>
42 #include <sys/mmu.h>
43 #include <sys/archsystm.h>
44 
45 #define	MAX_CE_FLTS		10
46 #define	MAX_ASYNC_FLTS		6
47 
48 errorq_t *ue_queue;			/* queue of uncorrectable errors */
49 errorq_t *ce_queue;			/* queue of correctable errors */
50 
51 /*
52  * Being used by memory test driver.
53  * ce_verbose_memory - covers CEs in DIMMs
54  * ce_verbose_other - covers "others" (ecache, IO, etc.)
55  *
56  * If the value is 0, nothing is logged.
57  * If the value is 1, the error is logged to the log file, but not console.
58  * If the value is 2, the error is logged to the log file and console.
59  */
60 int	ce_verbose_memory = 1;
61 int	ce_verbose_other = 1;
62 
63 int	ce_show_data = 0;
64 int	ce_debug = 0;
65 int	ue_debug = 0;
66 int	reset_debug = 0;
67 
68 /*
69  * Tunables for controlling the handling of asynchronous faults (AFTs). Setting
70  * these to non-default values on a non-DEBUG kernel is NOT supported.
71  */
72 int	aft_verbose = 0;	/* log AFT messages > 1 to log only */
73 int	aft_panic = 0;		/* panic (not reboot) on fatal usermode AFLT */
74 int	aft_testfatal = 0;	/* force all AFTs to panic immediately */
75 
76 /*
77  * Used for vbsc hostshutdown (power-off button)
78  */
79 int	err_shutdown_triggered = 0;	/* only once */
80 uint64_t err_shutdown_inum = 0;	/* used to pull the trigger */
81 
82 /*
83  * Used to print NRE/RE via system variable or kmdb
84  */
85 int		printerrh = 0;		/* see /etc/system */
86 static void	errh_er_print(errh_er_t *, const char *);
87 kmutex_t	errh_print_lock;
88 
89 /*
90  * Defined in bus_func.c but initialised in error_init
91  */
92 extern kmutex_t bfd_lock;
93 
94 static uint32_t rq_overflow_count = 0;		/* counter for rq overflow */
95 
96 static void cpu_queue_one_event(errh_async_flt_t *);
97 static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t);
98 static void errh_page_retire(errh_async_flt_t *, uchar_t);
99 static int errh_error_protected(struct regs *, struct async_flt *, int *);
100 static void errh_rq_full(struct async_flt *);
101 static void ue_drain(void *, struct async_flt *, errorq_elem_t *);
102 static void ce_drain(void *, struct async_flt *, errorq_elem_t *);
103 static void errh_handle_attr(errh_async_flt_t *);
104 static void errh_handle_asr(errh_async_flt_t *);
105 
106 /*ARGSUSED*/
107 void
108 process_resumable_error(struct regs *rp, uint32_t head_offset,
109     uint32_t tail_offset)
110 {
111 	struct machcpu *mcpup;
112 	struct async_flt *aflt;
113 	errh_async_flt_t errh_flt;
114 	errh_er_t *head_va;
115 
116 	mcpup = &(CPU->cpu_m);
117 
118 	while (head_offset != tail_offset) {
119 		/* kernel buffer starts right after the resumable queue */
120 		head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset +
121 		    CPU_RQ_SIZE);
122 		/* Copy the error report to local buffer */
123 		bzero(&errh_flt, sizeof (errh_async_flt_t));
124 		bcopy((char *)head_va, &(errh_flt.errh_er),
125 		    sizeof (errh_er_t));
126 
127 		mcpup->cpu_rq_lastre = head_va;
128 		if (printerrh)
129 			errh_er_print(&errh_flt.errh_er, "RQ");
130 
131 		/* Increment the queue head */
132 		head_offset += Q_ENTRY_SIZE;
133 		/* Wrap around */
134 		head_offset &= (CPU_RQ_SIZE - 1);
135 
136 		/* set error handle to zero so it can hold new error report */
137 		head_va->ehdl = 0;
138 
139 		switch (errh_flt.errh_er.desc) {
140 		case ERRH_DESC_UCOR_RE:
141 			/*
142 			 * Check error attribute, handle individual error
143 			 * if it is needed.
144 			 */
145 			errh_handle_attr(&errh_flt);
146 			break;
147 
148 		case ERRH_DESC_WARN_RE:
149 			/*
150 			 * Power-off requested, but handle it one time only.
151 			 */
152 			if (!err_shutdown_triggered) {
153 				setsoftint(err_shutdown_inum);
154 				++err_shutdown_triggered;
155 			}
156 			continue;
157 
158 		default:
159 			cmn_err(CE_WARN, "Error Descriptor 0x%llx "
160 			    " invalid in resumable error handler",
161 			    (long long) errh_flt.errh_er.desc);
162 			continue;
163 		}
164 
165 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
166 		aflt->flt_id = gethrtime();
167 		aflt->flt_bus_id = getprocessorid();
168 		aflt->flt_class = CPU_FAULT;
169 		aflt->flt_prot = AFLT_PROT_NONE;
170 		aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK)
171 		    >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV);
172 
173 		if (errh_flt.errh_er.attr & ERRH_ATTR_CPU)
174 			/* If it is an error on other cpu */
175 			aflt->flt_panic = 1;
176 		else
177 			aflt->flt_panic = 0;
178 
179 		/*
180 		 * Handle resumable queue full case.
181 		 */
182 		if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) {
183 			(void) errh_rq_full(aflt);
184 		}
185 
186 		/*
187 		 * Queue the error on ce or ue queue depend on flt_panic.
188 		 * Even if flt_panic is set, the code still keep processing
189 		 * the rest element on rq until the panic starts.
190 		 */
191 		(void) cpu_queue_one_event(&errh_flt);
192 
193 		/*
194 		 * Panic here if aflt->flt_panic has been set.
195 		 * Enqueued errors will be logged as part of the panic flow.
196 		 */
197 		if (aflt->flt_panic) {
198 			fm_panic("Unrecoverable error on another CPU");
199 		}
200 	}
201 }
202 
203 void
204 process_nonresumable_error(struct regs *rp, uint64_t flags,
205     uint32_t head_offset, uint32_t tail_offset)
206 {
207 	struct machcpu *mcpup;
208 	struct async_flt *aflt;
209 	errh_async_flt_t errh_flt;
210 	errh_er_t *head_va;
211 	int trampolined = 0;
212 	int expected = DDI_FM_ERR_UNEXPECTED;
213 	uint64_t exec_mode;
214 	uint8_t u_spill_fill;
215 	int u_kill = 1;
216 
217 	mcpup = &(CPU->cpu_m);
218 
219 	while (head_offset != tail_offset) {
220 		/* kernel buffer starts right after the nonresumable queue */
221 		head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset +
222 		    CPU_NRQ_SIZE);
223 
224 		/* Copy the error report to local buffer */
225 		bzero(&errh_flt, sizeof (errh_async_flt_t));
226 
227 		bcopy((char *)head_va, &(errh_flt.errh_er),
228 		    sizeof (errh_er_t));
229 
230 		mcpup->cpu_nrq_lastnre = head_va;
231 		if (printerrh)
232 			errh_er_print(&errh_flt.errh_er, "NRQ");
233 
234 		/* Increment the queue head */
235 		head_offset += Q_ENTRY_SIZE;
236 		/* Wrap around */
237 		head_offset &= (CPU_NRQ_SIZE - 1);
238 
239 		/* set error handle to zero so it can hold new error report */
240 		head_va->ehdl = 0;
241 
242 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
243 
244 		trampolined = 0;
245 
246 		if (errh_flt.errh_er.attr & ERRH_ATTR_PIO)
247 			aflt->flt_class = BUS_FAULT;
248 		else
249 			aflt->flt_class = CPU_FAULT;
250 
251 		aflt->flt_id = gethrtime();
252 		aflt->flt_bus_id = getprocessorid();
253 		aflt->flt_pc = (caddr_t)rp->r_pc;
254 		exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK)
255 		    >> ERRH_MODE_SHIFT;
256 		aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV ||
257 		    exec_mode == ERRH_MODE_UNKNOWN);
258 		aflt->flt_prot = AFLT_PROT_NONE;
259 		aflt->flt_tl = (uchar_t)(flags & ERRH_TL_MASK);
260 		aflt->flt_panic = ((aflt->flt_tl != 0) ||
261 		    (aft_testfatal != 0));
262 
263 		/*
264 		 * For the first error packet on the queue, check if it
265 		 * happened in user fill/spill trap.
266 		 */
267 		if (flags & ERRH_U_SPILL_FILL) {
268 			u_spill_fill = 1;
269 			/* clear the user fill/spill flag in flags */
270 			flags = (uint64_t)aflt->flt_tl;
271 		} else
272 			u_spill_fill = 0;
273 
274 		switch (errh_flt.errh_er.desc) {
275 		case ERRH_DESC_PR_NRE:
276 			if (u_spill_fill) {
277 				aflt->flt_panic = 0;
278 				break;
279 			}
280 			/*
281 			 * Context Register Parity - for reload of secondary
282 			 * context register, see nonresumable_error.  Note
283 			 * that 'size' for CRP denotes a sense of version,
284 			 * so if it's out of range, then just let it fall
285 			 * through and be processed later.
286 			 */
287 			if ((errh_flt.errh_er.attr & ERRH_ATTR_ASI) &&
288 			    (errh_flt.errh_er.asi == ASI_MMU_CTX) &&
289 			    (errh_flt.errh_er.addr >= MMU_PCONTEXT0) &&
290 			    (errh_flt.errh_er.addr + errh_flt.errh_er.sz <=
291 			    MMU_SCONTEXT1 + sizeof (uint64_t))) {
292 
293 				if (aflt->flt_tl)	/* TL>0, so panic */
294 					break;
295 
296 				u_kill = 0;		/* do not terminate */
297 				break;
298 			}
299 			/*
300 			 * All other PR_NRE fall through in order to
301 			 * check for protection.  The list can include
302 			 * ERRH_ATTR_FRF, ERRH_ATTR_IRF, ERRH_ATTR_MEM,
303 			 * and ERRH_ATTR_PIO.
304 			 */
305 			/*FALLTHRU*/
306 
307 		case ERRH_DESC_DEF_NRE:
308 			/*
309 			 * If the trap occurred in privileged mode at TL=0,
310 			 * we need to check to see if we were executing
311 			 * in kernel under on_trap() or t_lofault
312 			 * protection. If so, and if it was a PIO or MEM
313 			 * error, then modify the saved registers so that
314 			 * we return from the trap to the appropriate
315 			 * trampoline routine.
316 			 */
317 			if (aflt->flt_priv == 1 && aflt->flt_tl == 0 &&
318 			    ((errh_flt.errh_er.attr & ERRH_ATTR_PIO) ||
319 			    (errh_flt.errh_er.attr & ERRH_ATTR_MEM))) {
320 				trampolined =
321 				    errh_error_protected(rp, aflt, &expected);
322 			}
323 
324 			if (!aflt->flt_priv || aflt->flt_prot ==
325 			    AFLT_PROT_COPY) {
326 				aflt->flt_panic |= aft_panic;
327 			} else if (!trampolined &&
328 			    (aflt->flt_class != BUS_FAULT)) {
329 				aflt->flt_panic = 1;
330 			}
331 
332 			/*
333 			 * Check error attribute, handle individual error
334 			 * if it is needed.
335 			 */
336 			errh_handle_attr(&errh_flt);
337 
338 			/*
339 			 * If PIO error, we need to query the bus nexus
340 			 * for fatal errors.
341 			 */
342 			if (aflt->flt_class == BUS_FAULT) {
343 				aflt->flt_addr = errh_flt.errh_er.addr;
344 				errh_cpu_run_bus_error_handlers(aflt,
345 				    expected);
346 			}
347 
348 			break;
349 
350 		case ERRH_DESC_USER_DCORE:
351 			/*
352 			 * User generated panic. Call panic directly
353 			 * since there are no FMA e-reports to
354 			 * display.
355 			 */
356 
357 			panic("Panic - Generated at user request");
358 
359 			break;
360 
361 		default:
362 			cmn_err(CE_WARN, "Panic - Error Descriptor 0x%llx "
363 			    " invalid in non-resumable error handler",
364 			    (long long) errh_flt.errh_er.desc);
365 			aflt->flt_panic = 1;
366 			break;
367 		}
368 
369 		/*
370 		 * Queue the error report for further processing. If
371 		 * flt_panic is set, code still process other errors
372 		 * in the queue until the panic routine stops the
373 		 * kernel.
374 		 */
375 		(void) cpu_queue_one_event(&errh_flt);
376 
377 		/*
378 		 * Panic here if aflt->flt_panic has been set.
379 		 * Enqueued errors will be logged as part of the panic flow.
380 		 */
381 		if (aflt->flt_panic) {
382 			fm_panic("Unrecoverable hardware error");
383 		}
384 
385 		/*
386 		 * Call page_retire() to handle memory errors.
387 		 */
388 		if (errh_flt.errh_er.attr & ERRH_ATTR_MEM)
389 			errh_page_retire(&errh_flt, PR_UE);
390 
391 		/*
392 		 * If we queued an error for a thread that should terminate
393 		 * and it was in user mode or protected by t_lofault, set AST
394 		 * flag so the queue will be drained before returning to user
395 		 * mode.  Note that user threads can be killed via pcb_flags.
396 		 */
397 		if (u_kill && (!aflt->flt_priv ||
398 		    aflt->flt_prot == AFLT_PROT_COPY || u_spill_fill)) {
399 			int pcb_flag = 0;
400 
401 			if (aflt->flt_class == CPU_FAULT)
402 				pcb_flag |= ASYNC_HWERR;
403 			else if (aflt->flt_class == BUS_FAULT)
404 				pcb_flag |= ASYNC_BERR;
405 
406 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
407 			aston(curthread);
408 		}
409 	}
410 }
411 
412 /*
413  * For PIO errors, this routine calls nexus driver's error
414  * callback routines. If the callback routine returns fatal, and
415  * we are in kernel or unknow mode without any error protection,
416  * we need to turn on the panic flag.
417  */
418 void
419 errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
420 {
421 	int status;
422 	ddi_fm_error_t de;
423 
424 	bzero(&de, sizeof (ddi_fm_error_t));
425 
426 	de.fme_version = DDI_FME_VERSION;
427 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
428 	de.fme_flag = expected;
429 	de.fme_bus_specific = (void *)aflt->flt_addr;
430 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
431 
432 	/*
433 	 * If error is protected, it will jump to proper routine
434 	 * to handle the handle; if it is in user level, we just
435 	 * kill the user process; if the driver thinks the error is
436 	 * not fatal, we can drive on. If none of above are true,
437 	 * we panic
438 	 */
439 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) &&
440 	    (status == DDI_FM_FATAL))
441 		aflt->flt_panic = 1;
442 }
443 
444 /*
445  * This routine checks to see if we are under any error protection when
446  * the error happens. If we are under error protection, we unwind to
447  * the protection and indicate fault.
448  */
449 static int
450 errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected)
451 {
452 	int trampolined = 0;
453 	ddi_acc_hdl_t *hp;
454 
455 	if (curthread->t_ontrap != NULL) {
456 		on_trap_data_t *otp = curthread->t_ontrap;
457 
458 		if (otp->ot_prot & OT_DATA_EC) {
459 			aflt->flt_prot = AFLT_PROT_EC;
460 			otp->ot_trap |= OT_DATA_EC;
461 			rp->r_pc = otp->ot_trampoline;
462 			rp->r_npc = rp->r_pc +4;
463 			trampolined = 1;
464 		}
465 
466 		if (otp->ot_prot & OT_DATA_ACCESS) {
467 			aflt->flt_prot = AFLT_PROT_ACCESS;
468 			otp->ot_trap |= OT_DATA_ACCESS;
469 			rp->r_pc = otp->ot_trampoline;
470 			rp->r_npc = rp->r_pc + 4;
471 			trampolined = 1;
472 			/*
473 			 * for peek and caut_gets
474 			 * errors are expected
475 			 */
476 			hp = (ddi_acc_hdl_t *)otp->ot_handle;
477 			if (!hp)
478 				*expected = DDI_FM_ERR_PEEK;
479 			else if (hp->ah_acc.devacc_attr_access ==
480 			    DDI_CAUTIOUS_ACC)
481 				*expected = DDI_FM_ERR_EXPECTED;
482 		}
483 	} else if (curthread->t_lofault) {
484 		aflt->flt_prot = AFLT_PROT_COPY;
485 		rp->r_g1 = EFAULT;
486 		rp->r_pc = curthread->t_lofault;
487 		rp->r_npc = rp->r_pc + 4;
488 		trampolined = 1;
489 	}
490 
491 	return (trampolined);
492 }
493 
494 /*
495  * Queue one event.
496  */
497 static void
498 cpu_queue_one_event(errh_async_flt_t *errh_fltp)
499 {
500 	struct async_flt *aflt = (struct async_flt *)errh_fltp;
501 	errorq_t *eqp;
502 
503 	if (aflt->flt_panic)
504 		eqp = ue_queue;
505 	else
506 		eqp = ce_queue;
507 
508 	errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t),
509 	    aflt->flt_panic);
510 }
511 
512 /*
513  * The cpu_async_log_err() function is called by the ce/ue_drain() function to
514  * handle logging for CPU events that are dequeued.  As such, it can be invoked
515  * from softint context, from AST processing in the trap() flow, or from the
516  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
517  */
518 void
519 cpu_async_log_err(void *flt)
520 {
521 	errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt;
522 	errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er;
523 
524 	switch (errh_erp->desc) {
525 	case ERRH_DESC_UCOR_RE:
526 		if (errh_erp->attr & ERRH_ATTR_MEM) {
527 			/*
528 			 * Turn on the PR_UE flag. The page will be
529 			 * scrubbed when it is freed.
530 			 */
531 			errh_page_retire(errh_fltp, PR_UE);
532 		}
533 
534 		break;
535 
536 	case ERRH_DESC_PR_NRE:
537 	case ERRH_DESC_DEF_NRE:
538 		if (errh_erp->attr & ERRH_ATTR_MEM) {
539 			/*
540 			 * For non-resumable memory error, retire
541 			 * the page here.
542 			 */
543 			errh_page_retire(errh_fltp, PR_UE);
544 
545 			/*
546 			 * If we are going to panic, scrub the page first
547 			 */
548 			if (errh_fltp->cmn_asyncflt.flt_panic)
549 				mem_scrub(errh_fltp->errh_er.addr,
550 				    errh_fltp->errh_er.sz);
551 		}
552 		break;
553 
554 	default:
555 		break;
556 	}
557 }
558 
559 /*
560  * Called from ce_drain().
561  */
562 void
563 cpu_ce_log_err(struct async_flt *aflt)
564 {
565 	switch (aflt->flt_class) {
566 	case CPU_FAULT:
567 		cpu_async_log_err(aflt);
568 		break;
569 
570 	case BUS_FAULT:
571 		cpu_async_log_err(aflt);
572 		break;
573 
574 	default:
575 		break;
576 	}
577 }
578 
579 /*
580  * Called from ue_drain().
581  */
582 void
583 cpu_ue_log_err(struct async_flt *aflt)
584 {
585 	switch (aflt->flt_class) {
586 	case CPU_FAULT:
587 		cpu_async_log_err(aflt);
588 		break;
589 
590 	case BUS_FAULT:
591 		cpu_async_log_err(aflt);
592 		break;
593 
594 	default:
595 		break;
596 	}
597 }
598 
599 /*
600  * Turn on flag on the error memory region.
601  */
602 static void
603 errh_page_retire(errh_async_flt_t *errh_fltp, uchar_t flag)
604 {
605 	uint64_t flt_real_addr_start = errh_fltp->errh_er.addr;
606 	uint64_t flt_real_addr_end = flt_real_addr_start +
607 	    errh_fltp->errh_er.sz - 1;
608 	int64_t current_addr;
609 
610 	if (errh_fltp->errh_er.sz == 0)
611 		return;
612 
613 	for (current_addr = flt_real_addr_start;
614 	    current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) {
615 		(void) page_retire(current_addr, flag);
616 	}
617 }
618 
619 void
620 mem_scrub(uint64_t paddr, uint64_t len)
621 {
622 	uint64_t pa, length, scrubbed_len;
623 
624 	pa = paddr;
625 	length = len;
626 	scrubbed_len = 0;
627 
628 	while (length > 0) {
629 		if (hv_mem_scrub(pa, length, &scrubbed_len) != H_EOK)
630 			break;
631 
632 		pa += scrubbed_len;
633 		length -= scrubbed_len;
634 	}
635 }
636 
637 /*
638  * Call hypervisor to flush the memory region.
639  * Both va and len must be MMU_PAGESIZE aligned.
640  * Returns the total number of bytes flushed.
641  */
642 uint64_t
643 mem_sync(caddr_t orig_va, size_t orig_len)
644 {
645 	uint64_t pa, length, flushed;
646 	uint64_t chunk_len = MMU_PAGESIZE;
647 	uint64_t total_flushed = 0;
648 	uint64_t va, len;
649 
650 	if (orig_len == 0)
651 		return (total_flushed);
652 
653 	/* align va */
654 	va = P2ALIGN_TYPED(orig_va, MMU_PAGESIZE, uint64_t);
655 	/* round up len to MMU_PAGESIZE aligned */
656 	len = P2ROUNDUP_TYPED(orig_va + orig_len, MMU_PAGESIZE, uint64_t) - va;
657 
658 	while (len > 0) {
659 		pa = va_to_pa((caddr_t)va);
660 		if (pa == (uint64_t)-1)
661 			return (total_flushed);
662 
663 		length = chunk_len;
664 		flushed = 0;
665 
666 		while (length > 0) {
667 			if (hv_mem_sync(pa, length, &flushed) != H_EOK)
668 				return (total_flushed);
669 
670 			pa += flushed;
671 			length -= flushed;
672 			total_flushed += flushed;
673 		}
674 
675 		va += chunk_len;
676 		len -= chunk_len;
677 	}
678 
679 	return (total_flushed);
680 }
681 
682 /*
683  * If resumable queue is full, we need to check if any cpu is in
684  * error state. If not, we drive on. If yes, we need to panic. The
685  * hypervisor call hv_cpu_state() is being used for checking the
686  * cpu state.  And reset %tick_compr in case tick-compare was lost.
687  */
688 static void
689 errh_rq_full(struct async_flt *afltp)
690 {
691 	processorid_t who;
692 	uint64_t cpu_state;
693 	uint64_t retval;
694 	uint64_t current_tick;
695 
696 	current_tick = (uint64_t)gettick();
697 	tickcmpr_set(current_tick);
698 
699 	for (who = 0; who < NCPU; who++)
700 		if (CPU_IN_SET(cpu_ready_set, who)) {
701 			retval = hv_cpu_state(who, &cpu_state);
702 			if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) {
703 				afltp->flt_panic = 1;
704 				break;
705 			}
706 		}
707 }
708 
709 /*
710  * Return processor specific async error structure
711  * size used.
712  */
713 int
714 cpu_aflt_size(void)
715 {
716 	return (sizeof (errh_async_flt_t));
717 }
718 
719 #define	SZ_TO_ETRS_SHIFT	6
720 
721 /*
722  * Message print out when resumable queue is overflown
723  */
724 /*ARGSUSED*/
725 void
726 rq_overflow(struct regs *rp, uint64_t head_offset,
727     uint64_t tail_offset)
728 {
729 	rq_overflow_count++;
730 }
731 
732 /*
733  * Handler to process a fatal error.  This routine can be called from a
734  * softint, called from trap()'s AST handling, or called from the panic flow.
735  */
736 /*ARGSUSED*/
737 static void
738 ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
739 {
740 	cpu_ue_log_err(aflt);
741 }
742 
743 /*
744  * Handler to process a correctable error.  This routine can be called from a
745  * softint.  We just call the CPU module's logging routine.
746  */
747 /*ARGSUSED*/
748 static void
749 ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
750 {
751 	cpu_ce_log_err(aflt);
752 }
753 
754 /*
755  * Handler to process vbsc hostshutdown (power-off button).
756  */
757 static int
758 err_shutdown_softintr()
759 {
760 	cmn_err(CE_WARN, "Power-off requested, system will now shutdown.");
761 	do_shutdown();
762 
763 	/*
764 	 * just in case do_shutdown() fails
765 	 */
766 	(void) timeout((void(*)(void *))power_down, NULL, 100 * hz);
767 	return (DDI_INTR_CLAIMED);
768 }
769 
770 /*
771  * Allocate error queue sizes based on max_ncpus.  max_ncpus is set just
772  * after ncpunode has been determined.  ncpus is set in start_other_cpus
773  * which is called after error_init() but may change dynamically.
774  */
775 void
776 error_init(void)
777 {
778 	char tmp_name[MAXSYSNAME];
779 	pnode_t node;
780 	size_t size = cpu_aflt_size();
781 
782 	/*
783 	 * Initialize the correctable and uncorrectable error queues.
784 	 */
785 	ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL,
786 	    MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL);
787 
788 	ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL,
789 	    MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0);
790 
791 	if (ue_queue == NULL || ce_queue == NULL)
792 		panic("failed to create required system error queue");
793 
794 	/*
795 	 * Setup interrupt handler for power-off button.
796 	 */
797 	err_shutdown_inum = add_softintr(PIL_9,
798 	    (softintrfunc)err_shutdown_softintr, NULL, SOFTINT_ST);
799 
800 	/*
801 	 * Initialize the busfunc list mutex.  This must be a PIL_15 spin lock
802 	 * because we will need to acquire it from cpu_async_error().
803 	 */
804 	mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
805 
806 	/* Only allow one cpu at a time to dump errh errors. */
807 	mutex_init(&errh_print_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
808 
809 	node = prom_rootnode();
810 	if ((node == OBP_NONODE) || (node == OBP_BADNODE)) {
811 		cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node);
812 		return;
813 	}
814 
815 	if (((size = prom_getproplen(node, "reset-reason")) != -1) &&
816 	    (size <= MAXSYSNAME) &&
817 	    (prom_getprop(node, "reset-reason", tmp_name) != -1)) {
818 		if (reset_debug) {
819 			cmn_err(CE_CONT, "System booting after %s\n", tmp_name);
820 		} else if (strncmp(tmp_name, "FATAL", 5) == 0) {
821 			cmn_err(CE_CONT,
822 			    "System booting after fatal error %s\n", tmp_name);
823 		}
824 	}
825 }
826 
827 /*
828  * Nonresumable queue is full, panic here
829  */
830 /*ARGSUSED*/
831 void
832 nrq_overflow(struct regs *rp)
833 {
834 	fm_panic("Nonresumable queue full");
835 }
836 
837 /*
838  * This is the place for special error handling for individual errors.
839  */
840 static void
841 errh_handle_attr(errh_async_flt_t *errh_fltp)
842 {
843 	switch (errh_fltp->errh_er.attr & ~ERRH_MODE_MASK) {
844 	case ERRH_ATTR_CPU:
845 	case ERRH_ATTR_MEM:
846 	case ERRH_ATTR_PIO:
847 	case ERRH_ATTR_IRF:
848 	case ERRH_ATTR_FRF:
849 	case ERRH_ATTR_SHUT:
850 		break;
851 
852 	case ERRH_ATTR_ASR:
853 		errh_handle_asr(errh_fltp);
854 		break;
855 
856 	case ERRH_ATTR_ASI:
857 	case ERRH_ATTR_PREG:
858 	case ERRH_ATTR_RQF:
859 		break;
860 
861 	default:
862 		break;
863 	}
864 }
865 
866 /*
867  * Handle ASR bit set in ATTR
868  */
869 static void
870 errh_handle_asr(errh_async_flt_t *errh_fltp)
871 {
872 	uint64_t current_tick;
873 
874 	switch (errh_fltp->errh_er.reg) {
875 	case ASR_REG_VALID | ASR_REG_TICK:
876 		/*
877 		 * For Tick Compare Register error, it only happens when
878 		 * the register is being read or compared with the %tick
879 		 * register. Since we lost the contents of the register,
880 		 * we set the %tick_compr in the future. An interrupt will
881 		 * happen when %tick matches the value field of %tick_compr.
882 		 */
883 		current_tick = (uint64_t)gettick();
884 		tickcmpr_set(current_tick);
885 		/* Do not panic */
886 		errh_fltp->cmn_asyncflt.flt_panic = 0;
887 		break;
888 
889 	default:
890 		break;
891 	}
892 }
893 
894 /*
895  * Dump the error packet
896  */
897 /*ARGSUSED*/
898 static void
899 errh_er_print(errh_er_t *errh_erp, const char *queue)
900 {
901 	typedef union {
902 		uint64_t w;
903 		uint16_t s[4];
904 	} errhp_t;
905 	errhp_t *p = (errhp_t *)errh_erp;
906 	int i;
907 
908 	mutex_enter(&errh_print_lock);
909 	switch (errh_erp->desc) {
910 	case ERRH_DESC_UCOR_RE:
911 		cmn_err(CE_CONT, "\nResumable Uncorrectable Error ");
912 		break;
913 	case ERRH_DESC_PR_NRE:
914 		cmn_err(CE_CONT, "\nNonresumable Precise Error ");
915 		break;
916 	case ERRH_DESC_DEF_NRE:
917 		cmn_err(CE_CONT, "\nNonresumable Deferred Error ");
918 		break;
919 	default:
920 		cmn_err(CE_CONT, "\nError packet ");
921 		break;
922 	}
923 	cmn_err(CE_CONT, "received on %s\n", queue);
924 
925 	/*
926 	 * Print Q_ENTRY_SIZE bytes of epacket with 8 bytes per line
927 	 */
928 	for (i = Q_ENTRY_SIZE; i > 0; i -= 8, ++p) {
929 		cmn_err(CE_CONT, "%016lx: %04x %04x %04x %04x\n", (uint64_t)p,
930 		    p->s[0], p->s[1], p->s[2], p->s[3]);
931 	}
932 	mutex_exit(&errh_print_lock);
933 }
934