xref: /titanic_51/usr/src/uts/sun4v/os/error.c (revision 50981ffc7e4c5048d14890df805afee6ec113991)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/machsystm.h>
28 #include <sys/sysmacros.h>
29 #include <sys/cpuvar.h>
30 #include <sys/async.h>
31 #include <sys/ontrap.h>
32 #include <sys/ddifm.h>
33 #include <sys/hypervisor_api.h>
34 #include <sys/errorq.h>
35 #include <sys/promif.h>
36 #include <sys/prom_plat.h>
37 #include <sys/x_call.h>
38 #include <sys/error.h>
39 #include <sys/fm/util.h>
40 #include <sys/ivintr.h>
41 #include <sys/machasi.h>
42 #include <sys/mmu.h>
43 #include <sys/archsystm.h>
44 
45 #define	MAX_CE_FLTS		10
46 #define	MAX_ASYNC_FLTS		6
47 
48 errorq_t *ue_queue;			/* queue of uncorrectable errors */
49 errorq_t *ce_queue;			/* queue of correctable errors */
50 
51 /*
52  * Being used by memory test driver.
53  * ce_verbose_memory - covers CEs in DIMMs
54  * ce_verbose_other - covers "others" (ecache, IO, etc.)
55  *
56  * If the value is 0, nothing is logged.
57  * If the value is 1, the error is logged to the log file, but not console.
58  * If the value is 2, the error is logged to the log file and console.
59  */
60 int	ce_verbose_memory = 1;
61 int	ce_verbose_other = 1;
62 
63 int	ce_show_data = 0;
64 int	ce_debug = 0;
65 int	ue_debug = 0;
66 int	reset_debug = 0;
67 
68 /*
69  * Tunables for controlling the handling of asynchronous faults (AFTs). Setting
70  * these to non-default values on a non-DEBUG kernel is NOT supported.
71  */
72 int	aft_verbose = 0;	/* log AFT messages > 1 to log only */
73 int	aft_panic = 0;		/* panic (not reboot) on fatal usermode AFLT */
74 int	aft_testfatal = 0;	/* force all AFTs to panic immediately */
75 
76 /*
77  * Used for vbsc hostshutdown (power-off button)
78  */
79 int	err_shutdown_triggered = 0;	/* only once */
80 uint64_t err_shutdown_inum = 0;	/* used to pull the trigger */
81 
82 /*
83  * Used to print NRE/RE via system variable or kmdb
84  */
85 int		printerrh = 0;		/* see /etc/system */
86 static void	errh_er_print(errh_er_t *, const char *);
87 kmutex_t	errh_print_lock;
88 
89 /*
90  * Defined in bus_func.c but initialised in error_init
91  */
92 extern kmutex_t bfd_lock;
93 
94 static uint32_t rq_overflow_count = 0;		/* counter for rq overflow */
95 
96 static void cpu_queue_one_event(errh_async_flt_t *);
97 static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t);
98 static void errh_page_retire(errh_async_flt_t *, uchar_t);
99 static int errh_error_protected(struct regs *, struct async_flt *, int *);
100 static void errh_rq_full(struct async_flt *);
101 static void ue_drain(void *, struct async_flt *, errorq_elem_t *);
102 static void ce_drain(void *, struct async_flt *, errorq_elem_t *);
103 static void errh_handle_attr(errh_async_flt_t *);
104 static void errh_handle_asr(errh_async_flt_t *);
105 
106 /*ARGSUSED*/
107 void
108 process_resumable_error(struct regs *rp, uint32_t head_offset,
109     uint32_t tail_offset)
110 {
111 	struct machcpu *mcpup;
112 	struct async_flt *aflt;
113 	errh_async_flt_t errh_flt;
114 	errh_er_t *head_va;
115 
116 	mcpup = &(CPU->cpu_m);
117 
118 	while (head_offset != tail_offset) {
119 		/* kernel buffer starts right after the resumable queue */
120 		head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset +
121 		    CPU_RQ_SIZE);
122 		/* Copy the error report to local buffer */
123 		bzero(&errh_flt, sizeof (errh_async_flt_t));
124 		bcopy((char *)head_va, &(errh_flt.errh_er),
125 		    sizeof (errh_er_t));
126 
127 		mcpup->cpu_rq_lastre = head_va;
128 		if (printerrh)
129 			errh_er_print(&errh_flt.errh_er, "RQ");
130 
131 		/* Increment the queue head */
132 		head_offset += Q_ENTRY_SIZE;
133 		/* Wrap around */
134 		head_offset &= (CPU_RQ_SIZE - 1);
135 
136 		/* set error handle to zero so it can hold new error report */
137 		head_va->ehdl = 0;
138 
139 		switch (errh_flt.errh_er.desc) {
140 		case ERRH_DESC_UCOR_RE:
141 			/*
142 			 * Check error attribute, handle individual error
143 			 * if it is needed.
144 			 */
145 			errh_handle_attr(&errh_flt);
146 			break;
147 
148 		case ERRH_DESC_WARN_RE:
149 			/*
150 			 * Power-off requested, but handle it one time only.
151 			 */
152 			if (!err_shutdown_triggered) {
153 				setsoftint(err_shutdown_inum);
154 				++err_shutdown_triggered;
155 			}
156 			continue;
157 
158 		default:
159 			cmn_err(CE_WARN, "Error Descriptor 0x%llx "
160 			    " invalid in resumable error handler",
161 			    (long long) errh_flt.errh_er.desc);
162 			continue;
163 		}
164 
165 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
166 		aflt->flt_id = gethrtime();
167 		aflt->flt_bus_id = getprocessorid();
168 		aflt->flt_class = CPU_FAULT;
169 		aflt->flt_prot = AFLT_PROT_NONE;
170 		aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK)
171 		    >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV);
172 
173 		if (errh_flt.errh_er.attr & ERRH_ATTR_CPU)
174 			/* If it is an error on other cpu */
175 			aflt->flt_panic = 1;
176 		else
177 			aflt->flt_panic = 0;
178 
179 		/*
180 		 * Handle resumable queue full case.
181 		 */
182 		if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) {
183 			(void) errh_rq_full(aflt);
184 		}
185 
186 		/*
187 		 * Queue the error on ce or ue queue depend on flt_panic.
188 		 * Even if flt_panic is set, the code still keep processing
189 		 * the rest element on rq until the panic starts.
190 		 */
191 		(void) cpu_queue_one_event(&errh_flt);
192 
193 		/*
194 		 * Panic here if aflt->flt_panic has been set.
195 		 * Enqueued errors will be logged as part of the panic flow.
196 		 */
197 		if (aflt->flt_panic) {
198 			fm_panic("Unrecoverable error on another CPU");
199 		}
200 	}
201 }
202 
203 void
204 process_nonresumable_error(struct regs *rp, uint64_t flags,
205     uint32_t head_offset, uint32_t tail_offset)
206 {
207 	struct machcpu *mcpup;
208 	struct async_flt *aflt;
209 	errh_async_flt_t errh_flt;
210 	errh_er_t *head_va;
211 	int trampolined = 0;
212 	int expected = DDI_FM_ERR_UNEXPECTED;
213 	uint64_t exec_mode;
214 	uint8_t u_spill_fill;
215 	int u_kill = 1;
216 
217 	mcpup = &(CPU->cpu_m);
218 
219 	while (head_offset != tail_offset) {
220 		/* kernel buffer starts right after the nonresumable queue */
221 		head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset +
222 		    CPU_NRQ_SIZE);
223 
224 		/* Copy the error report to local buffer */
225 		bzero(&errh_flt, sizeof (errh_async_flt_t));
226 
227 		bcopy((char *)head_va, &(errh_flt.errh_er),
228 		    sizeof (errh_er_t));
229 
230 		mcpup->cpu_nrq_lastnre = head_va;
231 		if (printerrh)
232 			errh_er_print(&errh_flt.errh_er, "NRQ");
233 
234 		/* Increment the queue head */
235 		head_offset += Q_ENTRY_SIZE;
236 		/* Wrap around */
237 		head_offset &= (CPU_NRQ_SIZE - 1);
238 
239 		/* set error handle to zero so it can hold new error report */
240 		head_va->ehdl = 0;
241 
242 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
243 
244 		trampolined = 0;
245 
246 		if (errh_flt.errh_er.attr & ERRH_ATTR_PIO)
247 			aflt->flt_class = BUS_FAULT;
248 		else
249 			aflt->flt_class = CPU_FAULT;
250 
251 		aflt->flt_id = gethrtime();
252 		aflt->flt_bus_id = getprocessorid();
253 		aflt->flt_pc = (caddr_t)rp->r_pc;
254 		exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK)
255 		    >> ERRH_MODE_SHIFT;
256 		aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV ||
257 		    exec_mode == ERRH_MODE_UNKNOWN);
258 		aflt->flt_prot = AFLT_PROT_NONE;
259 		aflt->flt_tl = (uchar_t)(flags & ERRH_TL_MASK);
260 		aflt->flt_panic = ((aflt->flt_tl != 0) ||
261 		    (aft_testfatal != 0));
262 
263 		/*
264 		 * For the first error packet on the queue, check if it
265 		 * happened in user fill/spill trap.
266 		 */
267 		if (flags & ERRH_U_SPILL_FILL) {
268 			u_spill_fill = 1;
269 			/* clear the user fill/spill flag in flags */
270 			flags = (uint64_t)aflt->flt_tl;
271 		} else
272 			u_spill_fill = 0;
273 
274 		switch (errh_flt.errh_er.desc) {
275 		case ERRH_DESC_PR_NRE:
276 			if (u_spill_fill) {
277 				aflt->flt_panic = 0;
278 				break;
279 			}
280 			/*
281 			 * Context Register Parity - for reload of secondary
282 			 * context register, see nonresumable_error.
283 			 */
284 			if ((errh_flt.errh_er.attr & ERRH_ATTR_ASI) &&
285 			    (errh_flt.errh_er.asi == ASI_MMU_CTX)) {
286 
287 				if (aflt->flt_tl)	/* TL>0, so panic */
288 					break;
289 
290 				/* Panic on unknown context registers */
291 				if (errh_flt.errh_er.addr < MMU_PCONTEXT0 ||
292 				    errh_flt.errh_er.addr + errh_flt.errh_er.sz
293 				    > MMU_SCONTEXT1 + sizeof (uint64_t)) {
294 					cmn_err(CE_WARN, "Parity error on "
295 					    "unknown context register\n");
296 					aflt->flt_panic = 1;
297 					break;
298 				}
299 
300 				u_kill = 0;		/* do not terminate */
301 				break;
302 			}
303 			/*
304 			 * All other PR_NRE fall through in order to
305 			 * check for protection.  The list can include
306 			 * ERRH_ATTR_FRF, ERRH_ATTR_IRF, ERRH_ATTR_MEM,
307 			 * and ERRH_ATTR_PIO.
308 			 */
309 			/*FALLTHRU*/
310 
311 		case ERRH_DESC_DEF_NRE:
312 			/*
313 			 * If the trap occurred in privileged mode at TL=0,
314 			 * we need to check to see if we were executing
315 			 * in kernel under on_trap() or t_lofault
316 			 * protection. If so, and if it was a PIO or MEM
317 			 * error, then modify the saved registers so that
318 			 * we return from the trap to the appropriate
319 			 * trampoline routine.
320 			 */
321 			if (aflt->flt_priv == 1 && aflt->flt_tl == 0 &&
322 			    ((errh_flt.errh_er.attr & ERRH_ATTR_PIO) ||
323 			    (errh_flt.errh_er.attr & ERRH_ATTR_MEM))) {
324 				trampolined =
325 				    errh_error_protected(rp, aflt, &expected);
326 			}
327 
328 			if (!aflt->flt_priv || aflt->flt_prot ==
329 			    AFLT_PROT_COPY) {
330 				aflt->flt_panic |= aft_panic;
331 			} else if (!trampolined &&
332 			    (aflt->flt_class != BUS_FAULT)) {
333 				aflt->flt_panic = 1;
334 			}
335 
336 			/*
337 			 * Check error attribute, handle individual error
338 			 * if it is needed.
339 			 */
340 			errh_handle_attr(&errh_flt);
341 
342 			/*
343 			 * If PIO error, we need to query the bus nexus
344 			 * for fatal errors.
345 			 */
346 			if (aflt->flt_class == BUS_FAULT) {
347 				aflt->flt_addr = errh_flt.errh_er.addr;
348 				errh_cpu_run_bus_error_handlers(aflt,
349 				    expected);
350 			}
351 
352 			break;
353 
354 		case ERRH_DESC_USER_DCORE:
355 			/*
356 			 * User generated panic. Call panic directly
357 			 * since there are no FMA e-reports to
358 			 * display.
359 			 */
360 
361 			panic("Panic - Generated at user request");
362 
363 			break;
364 
365 		default:
366 			cmn_err(CE_WARN, "Panic - Error Descriptor 0x%llx "
367 			    " invalid in non-resumable error handler",
368 			    (long long) errh_flt.errh_er.desc);
369 			aflt->flt_panic = 1;
370 			break;
371 		}
372 
373 		/*
374 		 * Queue the error report for further processing. If
375 		 * flt_panic is set, code still process other errors
376 		 * in the queue until the panic routine stops the
377 		 * kernel.
378 		 */
379 		(void) cpu_queue_one_event(&errh_flt);
380 
381 		/*
382 		 * Panic here if aflt->flt_panic has been set.
383 		 * Enqueued errors will be logged as part of the panic flow.
384 		 */
385 		if (aflt->flt_panic) {
386 			fm_panic("Unrecoverable hardware error");
387 		}
388 
389 		/*
390 		 * Call page_retire() to handle memory errors.
391 		 */
392 		if (errh_flt.errh_er.attr & ERRH_ATTR_MEM)
393 			errh_page_retire(&errh_flt, PR_UE);
394 
395 		/*
396 		 * If we queued an error for a thread that should terminate
397 		 * and it was in user mode or protected by t_lofault, set AST
398 		 * flag so the queue will be drained before returning to user
399 		 * mode.  Note that user threads can be killed via pcb_flags.
400 		 */
401 		if (u_kill && (!aflt->flt_priv ||
402 		    aflt->flt_prot == AFLT_PROT_COPY || u_spill_fill)) {
403 			int pcb_flag = 0;
404 
405 			if (aflt->flt_class == CPU_FAULT)
406 				pcb_flag |= ASYNC_HWERR;
407 			else if (aflt->flt_class == BUS_FAULT)
408 				pcb_flag |= ASYNC_BERR;
409 
410 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
411 			aston(curthread);
412 		}
413 	}
414 }
415 
416 /*
417  * For PIO errors, this routine calls nexus driver's error
418  * callback routines. If the callback routine returns fatal, and
419  * we are in kernel or unknow mode without any error protection,
420  * we need to turn on the panic flag.
421  */
422 void
423 errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
424 {
425 	int status;
426 	ddi_fm_error_t de;
427 
428 	bzero(&de, sizeof (ddi_fm_error_t));
429 
430 	de.fme_version = DDI_FME_VERSION;
431 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
432 	de.fme_flag = expected;
433 	de.fme_bus_specific = (void *)aflt->flt_addr;
434 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
435 
436 	/*
437 	 * If error is protected, it will jump to proper routine
438 	 * to handle the handle; if it is in user level, we just
439 	 * kill the user process; if the driver thinks the error is
440 	 * not fatal, we can drive on. If none of above are true,
441 	 * we panic
442 	 */
443 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) &&
444 	    (status == DDI_FM_FATAL))
445 		aflt->flt_panic = 1;
446 }
447 
448 /*
449  * This routine checks to see if we are under any error protection when
450  * the error happens. If we are under error protection, we unwind to
451  * the protection and indicate fault.
452  */
453 static int
454 errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected)
455 {
456 	int trampolined = 0;
457 	ddi_acc_hdl_t *hp;
458 
459 	if (curthread->t_ontrap != NULL) {
460 		on_trap_data_t *otp = curthread->t_ontrap;
461 
462 		if (otp->ot_prot & OT_DATA_EC) {
463 			aflt->flt_prot = AFLT_PROT_EC;
464 			otp->ot_trap |= OT_DATA_EC;
465 			rp->r_pc = otp->ot_trampoline;
466 			rp->r_npc = rp->r_pc +4;
467 			trampolined = 1;
468 		}
469 
470 		if (otp->ot_prot & OT_DATA_ACCESS) {
471 			aflt->flt_prot = AFLT_PROT_ACCESS;
472 			otp->ot_trap |= OT_DATA_ACCESS;
473 			rp->r_pc = otp->ot_trampoline;
474 			rp->r_npc = rp->r_pc + 4;
475 			trampolined = 1;
476 			/*
477 			 * for peek and caut_gets
478 			 * errors are expected
479 			 */
480 			hp = (ddi_acc_hdl_t *)otp->ot_handle;
481 			if (!hp)
482 				*expected = DDI_FM_ERR_PEEK;
483 			else if (hp->ah_acc.devacc_attr_access ==
484 			    DDI_CAUTIOUS_ACC)
485 				*expected = DDI_FM_ERR_EXPECTED;
486 		}
487 	} else if (curthread->t_lofault) {
488 		aflt->flt_prot = AFLT_PROT_COPY;
489 		rp->r_g1 = EFAULT;
490 		rp->r_pc = curthread->t_lofault;
491 		rp->r_npc = rp->r_pc + 4;
492 		trampolined = 1;
493 	}
494 
495 	return (trampolined);
496 }
497 
498 /*
499  * Queue one event.
500  */
501 static void
502 cpu_queue_one_event(errh_async_flt_t *errh_fltp)
503 {
504 	struct async_flt *aflt = (struct async_flt *)errh_fltp;
505 	errorq_t *eqp;
506 
507 	if (aflt->flt_panic)
508 		eqp = ue_queue;
509 	else
510 		eqp = ce_queue;
511 
512 	errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t),
513 	    aflt->flt_panic);
514 }
515 
516 /*
517  * The cpu_async_log_err() function is called by the ce/ue_drain() function to
518  * handle logging for CPU events that are dequeued.  As such, it can be invoked
519  * from softint context, from AST processing in the trap() flow, or from the
520  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
521  */
522 void
523 cpu_async_log_err(void *flt)
524 {
525 	errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt;
526 	errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er;
527 
528 	switch (errh_erp->desc) {
529 	case ERRH_DESC_UCOR_RE:
530 		if (errh_erp->attr & ERRH_ATTR_MEM) {
531 			/*
532 			 * Turn on the PR_UE flag. The page will be
533 			 * scrubbed when it is freed.
534 			 */
535 			errh_page_retire(errh_fltp, PR_UE);
536 		}
537 
538 		break;
539 
540 	case ERRH_DESC_PR_NRE:
541 	case ERRH_DESC_DEF_NRE:
542 		if (errh_erp->attr & ERRH_ATTR_MEM) {
543 			/*
544 			 * For non-resumable memory error, retire
545 			 * the page here.
546 			 */
547 			errh_page_retire(errh_fltp, PR_UE);
548 
549 			/*
550 			 * If we are going to panic, scrub the page first
551 			 */
552 			if (errh_fltp->cmn_asyncflt.flt_panic)
553 				mem_scrub(errh_fltp->errh_er.addr,
554 				    errh_fltp->errh_er.sz);
555 		}
556 		break;
557 
558 	default:
559 		break;
560 	}
561 }
562 
563 /*
564  * Called from ce_drain().
565  */
566 void
567 cpu_ce_log_err(struct async_flt *aflt)
568 {
569 	switch (aflt->flt_class) {
570 	case CPU_FAULT:
571 		cpu_async_log_err(aflt);
572 		break;
573 
574 	case BUS_FAULT:
575 		cpu_async_log_err(aflt);
576 		break;
577 
578 	default:
579 		break;
580 	}
581 }
582 
583 /*
584  * Called from ue_drain().
585  */
586 void
587 cpu_ue_log_err(struct async_flt *aflt)
588 {
589 	switch (aflt->flt_class) {
590 	case CPU_FAULT:
591 		cpu_async_log_err(aflt);
592 		break;
593 
594 	case BUS_FAULT:
595 		cpu_async_log_err(aflt);
596 		break;
597 
598 	default:
599 		break;
600 	}
601 }
602 
603 /*
604  * Turn on flag on the error memory region.
605  */
606 static void
607 errh_page_retire(errh_async_flt_t *errh_fltp, uchar_t flag)
608 {
609 	uint64_t flt_real_addr_start = errh_fltp->errh_er.addr;
610 	uint64_t flt_real_addr_end = flt_real_addr_start +
611 	    errh_fltp->errh_er.sz - 1;
612 	int64_t current_addr;
613 
614 	if (errh_fltp->errh_er.sz == 0)
615 		return;
616 
617 	for (current_addr = flt_real_addr_start;
618 	    current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) {
619 		(void) page_retire(current_addr, flag);
620 	}
621 }
622 
623 void
624 mem_scrub(uint64_t paddr, uint64_t len)
625 {
626 	uint64_t pa, length, scrubbed_len;
627 
628 	pa = paddr;
629 	length = len;
630 	scrubbed_len = 0;
631 
632 	while (length > 0) {
633 		if (hv_mem_scrub(pa, length, &scrubbed_len) != H_EOK)
634 			break;
635 
636 		pa += scrubbed_len;
637 		length -= scrubbed_len;
638 	}
639 }
640 
641 /*
642  * Call hypervisor to flush the memory region.
643  * Both va and len must be MMU_PAGESIZE aligned.
644  * Returns the total number of bytes flushed.
645  */
646 uint64_t
647 mem_sync(caddr_t orig_va, size_t orig_len)
648 {
649 	uint64_t pa, length, flushed;
650 	uint64_t chunk_len = MMU_PAGESIZE;
651 	uint64_t total_flushed = 0;
652 	uint64_t va, len;
653 
654 	if (orig_len == 0)
655 		return (total_flushed);
656 
657 	/* align va */
658 	va = P2ALIGN_TYPED(orig_va, MMU_PAGESIZE, uint64_t);
659 	/* round up len to MMU_PAGESIZE aligned */
660 	len = P2ROUNDUP_TYPED(orig_va + orig_len, MMU_PAGESIZE, uint64_t) - va;
661 
662 	while (len > 0) {
663 		pa = va_to_pa((caddr_t)va);
664 		if (pa == (uint64_t)-1)
665 			return (total_flushed);
666 
667 		length = chunk_len;
668 		flushed = 0;
669 
670 		while (length > 0) {
671 			if (hv_mem_sync(pa, length, &flushed) != H_EOK)
672 				return (total_flushed);
673 
674 			pa += flushed;
675 			length -= flushed;
676 			total_flushed += flushed;
677 		}
678 
679 		va += chunk_len;
680 		len -= chunk_len;
681 	}
682 
683 	return (total_flushed);
684 }
685 
686 /*
687  * If resumable queue is full, we need to check if any cpu is in
688  * error state. If not, we drive on. If yes, we need to panic. The
689  * hypervisor call hv_cpu_state() is being used for checking the
690  * cpu state.  And reset %tick_compr in case tick-compare was lost.
691  */
692 static void
693 errh_rq_full(struct async_flt *afltp)
694 {
695 	processorid_t who;
696 	uint64_t cpu_state;
697 	uint64_t retval;
698 	uint64_t current_tick;
699 
700 	current_tick = (uint64_t)gettick();
701 	tickcmpr_set(current_tick);
702 
703 	for (who = 0; who < NCPU; who++)
704 		if (CPU_IN_SET(cpu_ready_set, who)) {
705 			retval = hv_cpu_state(who, &cpu_state);
706 			if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) {
707 				afltp->flt_panic = 1;
708 				break;
709 			}
710 		}
711 }
712 
713 /*
714  * Return processor specific async error structure
715  * size used.
716  */
717 int
718 cpu_aflt_size(void)
719 {
720 	return (sizeof (errh_async_flt_t));
721 }
722 
723 #define	SZ_TO_ETRS_SHIFT	6
724 
725 /*
726  * Message print out when resumable queue is overflown
727  */
728 /*ARGSUSED*/
729 void
730 rq_overflow(struct regs *rp, uint64_t head_offset,
731     uint64_t tail_offset)
732 {
733 	rq_overflow_count++;
734 }
735 
736 /*
737  * Handler to process a fatal error.  This routine can be called from a
738  * softint, called from trap()'s AST handling, or called from the panic flow.
739  */
740 /*ARGSUSED*/
741 static void
742 ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
743 {
744 	cpu_ue_log_err(aflt);
745 }
746 
747 /*
748  * Handler to process a correctable error.  This routine can be called from a
749  * softint.  We just call the CPU module's logging routine.
750  */
751 /*ARGSUSED*/
752 static void
753 ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
754 {
755 	cpu_ce_log_err(aflt);
756 }
757 
758 /*
759  * Handler to process vbsc hostshutdown (power-off button).
760  */
761 static int
762 err_shutdown_softintr()
763 {
764 	cmn_err(CE_WARN, "Power-off requested, system will now shutdown.");
765 	do_shutdown();
766 
767 	/*
768 	 * just in case do_shutdown() fails
769 	 */
770 	(void) timeout((void(*)(void *))power_down, NULL, 100 * hz);
771 	return (DDI_INTR_CLAIMED);
772 }
773 
774 /*
775  * Allocate error queue sizes based on max_ncpus.  max_ncpus is set just
776  * after ncpunode has been determined.  ncpus is set in start_other_cpus
777  * which is called after error_init() but may change dynamically.
778  */
779 void
780 error_init(void)
781 {
782 	char tmp_name[MAXSYSNAME];
783 	pnode_t node;
784 	size_t size = cpu_aflt_size();
785 
786 	/*
787 	 * Initialize the correctable and uncorrectable error queues.
788 	 */
789 	ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL,
790 	    MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL);
791 
792 	ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL,
793 	    MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0);
794 
795 	if (ue_queue == NULL || ce_queue == NULL)
796 		panic("failed to create required system error queue");
797 
798 	/*
799 	 * Setup interrupt handler for power-off button.
800 	 */
801 	err_shutdown_inum = add_softintr(PIL_9,
802 	    (softintrfunc)err_shutdown_softintr, NULL, SOFTINT_ST);
803 
804 	/*
805 	 * Initialize the busfunc list mutex.  This must be a PIL_15 spin lock
806 	 * because we will need to acquire it from cpu_async_error().
807 	 */
808 	mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
809 
810 	/* Only allow one cpu at a time to dump errh errors. */
811 	mutex_init(&errh_print_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
812 
813 	node = prom_rootnode();
814 	if ((node == OBP_NONODE) || (node == OBP_BADNODE)) {
815 		cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node);
816 		return;
817 	}
818 
819 	if (((size = prom_getproplen(node, "reset-reason")) != -1) &&
820 	    (size <= MAXSYSNAME) &&
821 	    (prom_getprop(node, "reset-reason", tmp_name) != -1)) {
822 		if (reset_debug) {
823 			cmn_err(CE_CONT, "System booting after %s\n", tmp_name);
824 		} else if (strncmp(tmp_name, "FATAL", 5) == 0) {
825 			cmn_err(CE_CONT,
826 			    "System booting after fatal error %s\n", tmp_name);
827 		}
828 	}
829 }
830 
831 /*
832  * Nonresumable queue is full, panic here
833  */
834 /*ARGSUSED*/
835 void
836 nrq_overflow(struct regs *rp)
837 {
838 	fm_panic("Nonresumable queue full");
839 }
840 
841 /*
842  * This is the place for special error handling for individual errors.
843  */
844 static void
845 errh_handle_attr(errh_async_flt_t *errh_fltp)
846 {
847 	switch (errh_fltp->errh_er.attr & ~ERRH_MODE_MASK) {
848 	case ERRH_ATTR_CPU:
849 	case ERRH_ATTR_MEM:
850 	case ERRH_ATTR_PIO:
851 	case ERRH_ATTR_IRF:
852 	case ERRH_ATTR_FRF:
853 	case ERRH_ATTR_SHUT:
854 		break;
855 
856 	case ERRH_ATTR_ASR:
857 		errh_handle_asr(errh_fltp);
858 		break;
859 
860 	case ERRH_ATTR_ASI:
861 	case ERRH_ATTR_PREG:
862 	case ERRH_ATTR_RQF:
863 		break;
864 
865 	default:
866 		break;
867 	}
868 }
869 
870 /*
871  * Handle ASR bit set in ATTR
872  */
873 static void
874 errh_handle_asr(errh_async_flt_t *errh_fltp)
875 {
876 	uint64_t current_tick;
877 
878 	switch (errh_fltp->errh_er.reg) {
879 	case ASR_REG_VALID | ASR_REG_TICK:
880 		/*
881 		 * For Tick Compare Register error, it only happens when
882 		 * the register is being read or compared with the %tick
883 		 * register. Since we lost the contents of the register,
884 		 * we set the %tick_compr in the future. An interrupt will
885 		 * happen when %tick matches the value field of %tick_compr.
886 		 */
887 		current_tick = (uint64_t)gettick();
888 		tickcmpr_set(current_tick);
889 		/* Do not panic */
890 		errh_fltp->cmn_asyncflt.flt_panic = 0;
891 		break;
892 
893 	default:
894 		break;
895 	}
896 }
897 
898 /*
899  * Dump the error packet
900  */
901 /*ARGSUSED*/
902 static void
903 errh_er_print(errh_er_t *errh_erp, const char *queue)
904 {
905 	typedef union {
906 		uint64_t w;
907 		uint16_t s[4];
908 	} errhp_t;
909 	errhp_t *p = (errhp_t *)errh_erp;
910 	int i;
911 
912 	mutex_enter(&errh_print_lock);
913 	switch (errh_erp->desc) {
914 	case ERRH_DESC_UCOR_RE:
915 		cmn_err(CE_CONT, "\nResumable Uncorrectable Error ");
916 		break;
917 	case ERRH_DESC_PR_NRE:
918 		cmn_err(CE_CONT, "\nNonresumable Precise Error ");
919 		break;
920 	case ERRH_DESC_DEF_NRE:
921 		cmn_err(CE_CONT, "\nNonresumable Deferred Error ");
922 		break;
923 	default:
924 		cmn_err(CE_CONT, "\nError packet ");
925 		break;
926 	}
927 	cmn_err(CE_CONT, "received on %s\n", queue);
928 
929 	/*
930 	 * Print Q_ENTRY_SIZE bytes of epacket with 8 bytes per line
931 	 */
932 	for (i = Q_ENTRY_SIZE; i > 0; i -= 8, ++p) {
933 		cmn_err(CE_CONT, "%016lx: %04x %04x %04x %04x\n", (uint64_t)p,
934 		    p->s[0], p->s[1], p->s[2], p->s[3]);
935 	}
936 	mutex_exit(&errh_print_lock);
937 }
938