xref: /titanic_52/usr/src/uts/sun4v/io/px/px_err.c (revision 2dea4eed7ad1c66ae4770263aa2911815a8b86eb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * sun4v Fire Error Handling
28  */
29 
30 #include <sys/types.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/sunndi.h>
34 #include <sys/fm/protocol.h>
35 #include <sys/fm/util.h>
36 #include <sys/membar.h>
37 #include "px_obj.h"
38 #include "px_err.h"
39 
40 static void px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p,
41     px_rc_err_t *epkt);
42 static uint_t px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt);
43 static int  px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr,
44     px_rc_err_t *epkt, pf_data_t *pfd_p);
45 
46 static void px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt,
47     boolean_t is_block_pci, char *msg);
48 static void px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt,
49     boolean_t is_block_pci, int err, ddi_fm_error_t *derr,
50     boolean_t is_valid_epkt);
51 static int px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
52     px_rc_err_t *epkt);
53 static int px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
54     px_rc_err_t *epkt);
55 static int px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
56     px_rc_err_t *epkt);
57 static int px_port_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
58     px_rc_err_t *epkt, pf_data_t *pfd_p);
59 static int px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
60     px_rc_err_t *epkt);
61 static int px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr,
62     px_rc_err_t *epkt);
63 static int px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr,
64     px_rc_err_t *epkt, pf_data_t *pfd_p);
65 static void px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr,
66     px_rc_err_t *epkt);
67 static int px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr,
68     px_rc_err_t *epkt);
69 
70 /* Include the code generated sun4v epkt checking code */
71 #include "px_err_gen.c"
72 
73 /*
74  * This variable indicates if we have a hypervisor that could potentially send
75  * incorrect epkts. We always set this to TRUE for now until we find a way to
76  * tell if this HV bug has been fixed.
77  */
78 boolean_t px_legacy_epkt = B_TRUE;
79 
80 /*
81  * px_err_cb_intr:
82  * Interrupt handler for the Host Bus Block.
83  */
84 uint_t
85 px_err_cb_intr(caddr_t arg)
86 {
87 	px_fault_t	*fault_p = (px_fault_t *)arg;
88 	px_rc_err_t	*epkt = (px_rc_err_t *)fault_p->px_intr_payload;
89 
90 	if (epkt != NULL) {
91 		return (px_err_intr(fault_p, epkt));
92 	}
93 
94 	return (DDI_INTR_UNCLAIMED);
95 }
96 
97 /*
98  * px_err_dmc_pec_intr:
99  * Interrupt handler for the DMC/PEC block.
100  */
101 uint_t
102 px_err_dmc_pec_intr(caddr_t arg)
103 {
104 	px_fault_t	*fault_p = (px_fault_t *)arg;
105 	px_rc_err_t	*epkt = (px_rc_err_t *)fault_p->px_intr_payload;
106 
107 	if (epkt != NULL) {
108 		return (px_err_intr(fault_p, epkt));
109 	}
110 
111 	return (DDI_INTR_UNCLAIMED);
112 }
113 
114 /*
115  * px_err_cmn_intr:
116  * Common function called by trap, mondo and fabric intr.
117  * This function is more meaningful in sun4u implementation.  Kept
118  * to mirror sun4u call stack.
119  * o check for safe access
120  * o create and queue RC info for later use in fabric scan.
121  *   o RUC/WUC, PTLP, MMU Errors(CA), UR
122  *
123  * @param px_p		leaf in which to check access
124  * @param derr		fm err data structure to be updated
125  * @param caller	PX_TRAP_CALL | PX_INTR_CALL
126  * @param chkjbc	whether to handle hostbus registers (ignored)
127  * @return err		PX_NO_PANIC | PX_PROTECTED |
128  *                      PX_PANIC | PX_HW_RESET | PX_EXPECTED
129  */
130 /* ARGSUSED */
131 int
132 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block)
133 {
134 	px_err_safeacc_check(px_p, derr);
135 	return (DDI_FM_OK);
136 }
137 
138 /*
139  * fills RC specific fault data
140  */
141 static void
142 px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, px_rc_err_t *epkt) {
143 	pf_pcie_adv_err_regs_t adv_reg;
144 	int		sts = DDI_SUCCESS;
145 	pcie_req_id_t	fault_bdf = PCIE_INVALID_BDF;
146 	uint64_t	fault_addr = 0;
147 	uint16_t	s_status = 0;
148 
149 	/* Add an PCIE PF_DATA Entry */
150 	if (epkt->rc_descr.block == BLOCK_MMU) {
151 		/* Only PIO Fault Addresses are valid, this is DMA */
152 		s_status = PCI_STAT_S_TARG_AB;
153 		fault_addr = NULL;
154 
155 		if (epkt->rc_descr.H) {
156 			fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16);
157 			PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags =
158 			    PF_AFFECTED_BDF;
159 			PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf =
160 			    fault_bdf;
161 		} else
162 			sts = DDI_FAILURE;
163 	} else {
164 		px_pec_err_t	*pec_p = (px_pec_err_t *)epkt;
165 		uint32_t	dir = pec_p->pec_descr.dir;
166 
167 		adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0]);
168 		adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0] >> 32);
169 		adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1]);
170 		adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1] >> 32);
171 
172 		/* translate RC UR/CA to legacy secondary errors */
173 		if ((dir == DIR_READ || dir == DIR_WRITE) &&
174 		    pec_p->pec_descr.U) {
175 			if (pec_p->ue_reg_status & PCIE_AER_UCE_UR)
176 				s_status |= PCI_STAT_R_MAST_AB;
177 			if (pec_p->ue_reg_status & PCIE_AER_UCE_CA)
178 				s_status |= PCI_STAT_R_TARG_AB;
179 		}
180 
181 		if (pec_p->ue_reg_status & PCIE_AER_UCE_PTLP)
182 			s_status |= PCI_STAT_PERROR;
183 
184 		if (pec_p->ue_reg_status & PCIE_AER_UCE_CA)
185 			s_status |= PCI_STAT_S_TARG_AB;
186 
187 		sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
188 		fault_bdf = adv_reg.pcie_ue_tgt_bdf;
189 		fault_addr = adv_reg.pcie_ue_tgt_addr;
190 		/* affected BDF is to be filled in by px_scan_fabric */
191 	}
192 
193 	if (sts == DDI_SUCCESS) {
194 		PCIE_ROOT_FAULT(pfd_p)->scan_bdf = fault_bdf;
195 		PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)fault_addr;
196 		PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status;
197 	}
198 }
199 
200 /*
201  * Convert error severity from PX internal values to PCIe Fabric values.  Most
202  * are self explanitory, except PX_PROTECTED.  PX_PROTECTED will never be
203  * returned as is if forgivable.
204  */
205 static int px_err_to_fab_sev(int rc_err) {
206 	int fab_err = 0;
207 
208 	if (rc_err & (PX_HW_RESET | PX_EXPECTED | PX_NO_PANIC))
209 		fab_err |= PF_ERR_NO_PANIC;
210 
211 	if (rc_err & (PX_PANIC | PX_PROTECTED))
212 		fab_err |= PF_ERR_PANIC;
213 
214 	if (rc_err & PX_NO_ERROR)
215 		fab_err |= PF_ERR_NO_ERROR;
216 
217 	return (fab_err);
218 }
219 
220 /*
221  * px_err_intr:
222  * Interrupt handler for the JBC/DMC/PEC block.
223  * o lock
224  * o create derr
225  * o check safe access
226  * o px_err_check_severity(epkt)
227  * o pcie_scan_fabric
228  * o Idle intr state
229  * o unlock
230  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
231  */
232 static uint_t
233 px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt)
234 {
235 	px_t		*px_p = DIP_TO_STATE(fault_p->px_fh_dip);
236 	dev_info_t	*rpdip = px_p->px_dip;
237 	int		rc_err, fab_err, msg;
238 	ddi_fm_error_t	derr;
239 	pf_data_t	*pfd_p;
240 
241 	if (px_fm_enter(px_p) != DDI_SUCCESS)
242 		goto done;
243 
244 	pfd_p = px_get_pfd(px_p);
245 	PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_INTERNAL;
246 	PCIE_ROOT_EH_SRC(pfd_p)->intr_data = epkt;
247 
248 	/* Create the derr */
249 	bzero(&derr, sizeof (ddi_fm_error_t));
250 	derr.fme_version = DDI_FME_VERSION;
251 	derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1);
252 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
253 
254 	/* Basically check for safe access */
255 	(void) px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_ALL);
256 
257 	/* Check the severity of this error */
258 	rc_err = px_err_epkt_severity(px_p, &derr, epkt, pfd_p);
259 
260 	pfd_p->pe_severity_flags = px_err_to_fab_sev(rc_err);
261 	/*
262 	 * px_err_epkt_severity needs to populate affected dev
263 	 * Only MMU errors and PCIe errors need this.
264 	 * For MMU we will call pf_handle_lookup, using fault bdf
265 	 * - need to call bdf look up..
266 	 * For PCIe do not fill in affected..
267 	 */
268 
269 	/* Scan the fabric if the root port is not in drain state. */
270 	fab_err = px_scan_fabric(px_p, rpdip, &derr);
271 
272 	/* Set the intr state to idle for the leaf that received the mondo */
273 	if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino,
274 	    INTR_IDLE_STATE) != DDI_SUCCESS) {
275 		px_fm_exit(px_p);
276 		return (DDI_INTR_UNCLAIMED);
277 	}
278 
279 	switch (epkt->rc_descr.block) {
280 	case BLOCK_MMU: /* FALLTHROUGH */
281 	case BLOCK_INTR:
282 		msg = PX_RC;
283 		break;
284 	case BLOCK_PCIE:
285 		msg = PX_RP;
286 		break;
287 	case BLOCK_HOSTBUS: /* FALLTHROUGH */
288 	default:
289 		msg = PX_HB;
290 		break;
291 	}
292 
293 	px_err_panic(rc_err, msg, fab_err, B_TRUE);
294 	px_fm_exit(px_p);
295 	px_err_panic(rc_err, msg, fab_err, B_FALSE);
296 
297 done:
298 	return (DDI_INTR_CLAIMED);
299 }
300 
301 /*
302  * px_err_epkt_severity:
303  * Check the severity of the fire error based the epkt received
304  *
305  * @param px_p		leaf in which to take the snap shot.
306  * @param derr		fm err in which the ereport is to be based on
307  * @param epkt		epkt recevied from HV
308  */
309 static int
310 px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt,
311     pf_data_t *pfd_p)
312 {
313 	px_pec_t 	*pec_p = px_p->px_pec_p;
314 	dev_info_t	*dip = px_p->px_dip;
315 	boolean_t	is_safeacc = B_FALSE;
316 	boolean_t	is_block_pci = B_FALSE;
317 	boolean_t	is_valid_epkt = B_FALSE;
318 	int		err = 0;
319 
320 	/* Cautious access error handling  */
321 	switch (derr->fme_flag) {
322 	case DDI_FM_ERR_EXPECTED:
323 		/*
324 		 * For ddi_caut_put treat all events as nonfatal. Here
325 		 * we have the handle and can call ndi_fm_acc_err_set().
326 		 */
327 		derr->fme_status = DDI_FM_NONFATAL;
328 		ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr);
329 		is_safeacc = B_TRUE;
330 		break;
331 	case DDI_FM_ERR_PEEK:
332 	case DDI_FM_ERR_POKE:
333 		/*
334 		 * For ddi_peek/poke treat all events as nonfatal.
335 		 */
336 		is_safeacc = B_TRUE;
337 		break;
338 	default:
339 		is_safeacc = B_FALSE;
340 	}
341 
342 	/*
343 	 * Older hypervisors in some cases send epkts with incorrect fields.
344 	 * We have to handle these "special" epkts correctly.
345 	 */
346 	if (px_legacy_epkt)
347 		px_fix_legacy_epkt(dip, derr, epkt);
348 
349 	switch (epkt->rc_descr.block) {
350 	case BLOCK_HOSTBUS:
351 		err = px_cb_epkt_severity(dip, derr, epkt);
352 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF;
353 		break;
354 	case BLOCK_MMU:
355 		err = px_mmu_epkt_severity(dip, derr, epkt);
356 		px_err_fill_pfd(dip, pfd_p, epkt);
357 		break;
358 	case BLOCK_INTR:
359 		err = px_intr_epkt_severity(dip, derr, epkt);
360 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF;
361 		break;
362 	case BLOCK_PORT:
363 		err = px_port_epkt_severity(dip, derr, epkt, pfd_p);
364 		break;
365 	case BLOCK_PCIE:
366 		is_block_pci = B_TRUE;
367 		err = px_pcie_epkt_severity(dip, derr, epkt);
368 		px_err_fill_pfd(dip, pfd_p, epkt);
369 		break;
370 	default:
371 		err = 0;
372 	}
373 
374 	if ((err & PX_HW_RESET) || (err & PX_PANIC)) {
375 		if (px_log & PX_PANIC)
376 			px_err_log_handle(dip, epkt, is_block_pci, "PANIC");
377 		is_valid_epkt = B_TRUE;
378 	} else if (err & PX_PROTECTED) {
379 		if (px_log & PX_PROTECTED)
380 			px_err_log_handle(dip, epkt, is_block_pci, "PROTECTED");
381 		is_valid_epkt = B_TRUE;
382 	} else if (err & PX_NO_PANIC) {
383 		if (px_log & PX_NO_PANIC)
384 			px_err_log_handle(dip, epkt, is_block_pci, "NO PANIC");
385 		is_valid_epkt = B_TRUE;
386 	} else if (err & PX_NO_ERROR) {
387 		if (px_log & PX_NO_ERROR)
388 			px_err_log_handle(dip, epkt, is_block_pci, "NO ERROR");
389 		is_valid_epkt = B_TRUE;
390 	} else if (err == 0) {
391 		px_err_log_handle(dip, epkt, is_block_pci, "UNRECOGNIZED");
392 		is_valid_epkt = B_FALSE;
393 
394 		/* Panic on a unrecognized epkt */
395 		err = PX_PANIC;
396 	}
397 
398 	px_err_send_epkt_erpt(dip, epkt, is_block_pci, err, derr,
399 	    is_valid_epkt);
400 
401 	/* Readjust the severity as a result of safe access */
402 	if (is_safeacc && !(err & PX_PANIC) && !(px_die & PX_PROTECTED))
403 		err = PX_NO_PANIC;
404 
405 	return (err);
406 }
407 
408 static void
409 px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt,
410     boolean_t is_block_pci, int err, ddi_fm_error_t *derr,
411     boolean_t is_valid_epkt)
412 {
413 	char buf[FM_MAX_CLASS], descr_buf[1024];
414 
415 	/* send ereport for debug purposes */
416 	(void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_RC_UNRECOG);
417 
418 	if (is_block_pci) {
419 		px_pec_err_t *pec = (px_pec_err_t *)epkt;
420 		(void) snprintf(descr_buf, sizeof (descr_buf),
421 		    "%s Epkt contents:\n"
422 		    "Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n"
423 		    "I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n"
424 		    "PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n"
425 		    "CE Status Reg: 0x%x, UE Status Reg: 0x%x\n"
426 		    "HDR1: 0x%lx, HDR2: 0x%lx\n"
427 		    "Err Src Reg: 0x%x, Root Err Status: 0x%x\n"
428 		    "Err Severity: 0x%x\n",
429 		    is_valid_epkt ? "Valid" : "Invalid",
430 		    pec->pec_descr.block, pec->pec_descr.dir,
431 		    pec->pec_descr.Z, pec->pec_descr.S,
432 		    pec->pec_descr.R, pec->pec_descr.I,
433 		    pec->pec_descr.H, pec->pec_descr.C,
434 		    pec->pec_descr.U, pec->pec_descr.E,
435 		    pec->pec_descr.P, pec->pci_err_status,
436 		    pec->pcie_err_status, pec->ce_reg_status,
437 		    pec->ue_reg_status, pec->hdr[0],
438 		    pec->hdr[1], pec->err_src_reg,
439 		    pec->root_err_status, err);
440 
441 		ddi_fm_ereport_post(dip, buf, derr->fme_ena,
442 		    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
443 		    EPKT_SYSINO, DATA_TYPE_UINT64,
444 		    is_valid_epkt ? pec->sysino : 0,
445 		    EPKT_EHDL, DATA_TYPE_UINT64,
446 		    is_valid_epkt ? pec->ehdl : 0,
447 		    EPKT_STICK, DATA_TYPE_UINT64,
448 		    is_valid_epkt ? pec->stick : 0,
449 		    EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)pec)[3],
450 		    EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)pec)[4],
451 		    EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)pec)[5],
452 		    EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)pec)[6],
453 		    EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)pec)[7],
454 		    EPKT_PEC_DESCR, DATA_TYPE_STRING, descr_buf);
455 	} else {
456 		(void) snprintf(descr_buf, sizeof (descr_buf),
457 		    "%s Epkt contents:\n"
458 		    "Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n"
459 		    "Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n"
460 		    "M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n"
461 		    "Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n"
462 		    "Err Severity: 0x%x\n",
463 		    is_valid_epkt ? "Valid" : "Invalid",
464 		    epkt->rc_descr.block, epkt->rc_descr.op,
465 		    epkt->rc_descr.phase, epkt->rc_descr.cond,
466 		    epkt->rc_descr.dir, epkt->rc_descr.STOP,
467 		    epkt->rc_descr.H, epkt->rc_descr.R,
468 		    epkt->rc_descr.D, epkt->rc_descr.M,
469 		    epkt->rc_descr.S, epkt->size, epkt->addr,
470 		    epkt->hdr[0], epkt->hdr[1], epkt->reserved,
471 		    err);
472 
473 		ddi_fm_ereport_post(dip, buf, derr->fme_ena,
474 		    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
475 		    EPKT_SYSINO, DATA_TYPE_UINT64,
476 		    is_valid_epkt ? epkt->sysino : 0,
477 		    EPKT_EHDL, DATA_TYPE_UINT64,
478 		    is_valid_epkt ? epkt->ehdl : 0,
479 		    EPKT_STICK, DATA_TYPE_UINT64,
480 		    is_valid_epkt ? epkt->stick : 0,
481 		    EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)epkt)[3],
482 		    EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)epkt)[4],
483 		    EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)epkt)[5],
484 		    EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)epkt)[6],
485 		    EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)epkt)[7],
486 		    EPKT_RC_DESCR, DATA_TYPE_STRING, descr_buf);
487 	}
488 }
489 
490 static void
491 px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, boolean_t is_block_pci,
492     char *msg)
493 {
494 	if (is_block_pci) {
495 		px_pec_err_t *pec = (px_pec_err_t *)epkt;
496 		DBG(DBG_ERR_INTR, dip,
497 		    "A PCIe root port error has occured with a severity"
498 		    " \"%s\"\n"
499 		    "\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n"
500 		    "\tH=%d, C=%d, U=%d, E=%d, P=%d\n"
501 		    "\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n"
502 		    "\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n"
503 		    "\terr_src: 0x%x, root_err: 0x%x\n",
504 		    msg, pec->pec_descr.block, pec->pec_descr.dir,
505 		    pec->pec_descr.Z, pec->pec_descr.S, pec->pec_descr.R,
506 		    pec->pec_descr.I, pec->pec_descr.H, pec->pec_descr.C,
507 		    pec->pec_descr.U, pec->pec_descr.E, pec->pec_descr.P,
508 		    pec->pci_err_status, pec->pcie_err_status,
509 		    pec->ce_reg_status, pec->ue_reg_status, pec->hdr[0],
510 		    pec->hdr[1], pec->err_src_reg, pec->root_err_status);
511 	} else {
512 		DBG(DBG_ERR_INTR, dip,
513 		    "A PCIe root complex error has occured with a severity"
514 		    " \"%s\"\n"
515 		    "\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n"
516 		    "\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n"
517 		    "\tS=%d, Size: 0x%x, Addr: 0x%p\n"
518 		    "\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n",
519 		    msg, epkt->rc_descr.block, epkt->rc_descr.op,
520 		    epkt->rc_descr.phase, epkt->rc_descr.cond,
521 		    epkt->rc_descr.dir, epkt->rc_descr.STOP, epkt->rc_descr.H,
522 		    epkt->rc_descr.R, epkt->rc_descr.D, epkt->rc_descr.M,
523 		    epkt->rc_descr.S, epkt->size, epkt->addr, epkt->hdr[0],
524 		    epkt->hdr[1], epkt->reserved);
525 	}
526 }
527 
528 /* ARGSUSED */
529 static void
530 px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt)
531 {
532 	/*
533 	 * We don't have a default case for any of the below switch statements
534 	 * since we are ok with the code falling through.
535 	 */
536 	switch (epkt->rc_descr.block) {
537 	case BLOCK_HOSTBUS:
538 		switch (epkt->rc_descr.op) {
539 		case OP_DMA:
540 			switch (epkt->rc_descr.phase) {
541 			case PH_UNKNOWN:
542 				switch (epkt->rc_descr.cond) {
543 				case CND_UNKNOWN:
544 					switch (epkt->rc_descr.dir) {
545 					case DIR_RESERVED:
546 						epkt->rc_descr.dir = DIR_READ;
547 						break;
548 					} /* DIR */
549 				} /* CND */
550 			} /* PH */
551 		} /* OP */
552 		break;
553 	case BLOCK_MMU:
554 		switch (epkt->rc_descr.op) {
555 		case OP_XLAT:
556 			switch (epkt->rc_descr.phase) {
557 			case PH_DATA:
558 				switch (epkt->rc_descr.cond) {
559 				case CND_PROT:
560 					switch (epkt->rc_descr.dir) {
561 					case DIR_UNKNOWN:
562 						epkt->rc_descr.dir = DIR_WRITE;
563 						break;
564 					} /* DIR */
565 				} /* CND */
566 				break;
567 			case PH_IRR:
568 				switch (epkt->rc_descr.cond) {
569 				case CND_RESERVED:
570 					switch (epkt->rc_descr.dir) {
571 					case DIR_IRR:
572 						epkt->rc_descr.phase = PH_ADDR;
573 						epkt->rc_descr.cond = CND_IRR;
574 					} /* DIR */
575 				} /* CND */
576 			} /* PH */
577 		} /* OP */
578 		break;
579 	case BLOCK_INTR:
580 		switch (epkt->rc_descr.op) {
581 		case OP_MSIQ:
582 			switch (epkt->rc_descr.phase) {
583 			case PH_UNKNOWN:
584 				switch (epkt->rc_descr.cond) {
585 				case CND_ILL:
586 					switch (epkt->rc_descr.dir) {
587 					case DIR_RESERVED:
588 						epkt->rc_descr.dir = DIR_IRR;
589 						break;
590 					} /* DIR */
591 					break;
592 				case CND_IRR:
593 					switch (epkt->rc_descr.dir) {
594 					case DIR_IRR:
595 						epkt->rc_descr.cond = CND_OV;
596 						break;
597 					} /* DIR */
598 				} /* CND */
599 			} /* PH */
600 			break;
601 		case OP_RESERVED:
602 			switch (epkt->rc_descr.phase) {
603 			case PH_UNKNOWN:
604 				switch (epkt->rc_descr.cond) {
605 				case CND_ILL:
606 					switch (epkt->rc_descr.dir) {
607 					case DIR_IRR:
608 						epkt->rc_descr.op = OP_MSI32;
609 						epkt->rc_descr.phase = PH_DATA;
610 						break;
611 					} /* DIR */
612 				} /* CND */
613 				break;
614 			case PH_DATA:
615 				switch (epkt->rc_descr.cond) {
616 				case CND_INT:
617 					switch (epkt->rc_descr.dir) {
618 					case DIR_UNKNOWN:
619 						epkt->rc_descr.op = OP_MSI32;
620 						break;
621 					} /* DIR */
622 				} /* CND */
623 			} /* PH */
624 		} /* OP */
625 	} /* BLOCK */
626 }
627 
628 /* ARGSUSED */
629 static int
630 px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt)
631 {
632 	return (px_err_check_eq(dip));
633 }
634 
635 /* ARGSUSED */
636 static int
637 px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt,
638     pf_data_t *pfd_p)
639 {
640 	pf_pcie_adv_err_regs_t	adv_reg;
641 	uint16_t		s_status;
642 	int			sts = PX_PANIC;
643 
644 	/*
645 	 * Check for failed non-posted writes, which are errors that are not
646 	 * defined in the PCIe spec.  If not return panic.
647 	 */
648 	if (!((epkt->rc_descr.op == OP_PIO) &&
649 	    (epkt->rc_descr.phase == PH_IRR))) {
650 		sts = (PX_PANIC);
651 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF;
652 		goto done;
653 	}
654 
655 	/*
656 	 * Gather the error logs, if they do not exist just return with no panic
657 	 * and let the fabric message take care of the error.
658 	 */
659 	if (!epkt->rc_descr.H) {
660 		sts = (PX_NO_PANIC);
661 		goto done;
662 	}
663 
664 	adv_reg.pcie_ue_hdr[0] = (uint32_t)(epkt->hdr[0]);
665 	adv_reg.pcie_ue_hdr[1] = (uint32_t)(epkt->hdr[0] >> 32);
666 	adv_reg.pcie_ue_hdr[2] = (uint32_t)(epkt->hdr[1]);
667 	adv_reg.pcie_ue_hdr[3] = (uint32_t)(epkt->hdr[1] >> 32);
668 
669 	sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
670 
671 	if (epkt->rc_descr.M)
672 		adv_reg.pcie_ue_tgt_addr = epkt->addr;
673 
674 	if (!((sts == DDI_SUCCESS) || (epkt->rc_descr.M))) {
675 		/* Let the fabric message take care of error */
676 		sts = PX_NO_PANIC;
677 		goto done;
678 	}
679 
680 	/* See if the failed transaction belonged to a hardened driver */
681 	if (pf_hdl_lookup(dip, derr->fme_ena,
682 	    adv_reg.pcie_ue_tgt_trans, adv_reg.pcie_ue_tgt_addr,
683 	    adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND)
684 		sts = (PX_NO_PANIC);
685 	else
686 		sts = (PX_PANIC);
687 
688 	/* Add pfd to cause a fabric scan */
689 	switch (epkt->rc_descr.cond) {
690 	case CND_RCA:
691 		s_status = PCI_STAT_R_TARG_AB;
692 		break;
693 	case CND_RUR:
694 		s_status = PCI_STAT_R_MAST_AB;
695 		break;
696 	}
697 	PCIE_ROOT_FAULT(pfd_p)->scan_bdf = adv_reg.pcie_ue_tgt_bdf;
698 	PCIE_ROOT_FAULT(pfd_p)->scan_addr = adv_reg.pcie_ue_tgt_addr;
699 	PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status;
700 
701 done:
702 	return (sts);
703 }
704 
705 /* ARGSUSED */
706 static int
707 px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt)
708 {
709 	px_pec_err_t	*pec_p = (px_pec_err_t *)epkt;
710 	px_err_pcie_t	*pcie = (px_err_pcie_t *)epkt;
711 	pf_pcie_adv_err_regs_t adv_reg;
712 	int		sts;
713 	uint32_t	temp;
714 
715 	/*
716 	 * Check for failed PIO Read/Writes, which are errors that are not
717 	 * defined in the PCIe spec.
718 	 */
719 	temp = PCIE_AER_UCE_UR | PCIE_AER_UCE_CA;
720 	if (((pec_p->pec_descr.dir == DIR_READ) ||
721 	    (pec_p->pec_descr.dir == DIR_WRITE)) &&
722 	    pec_p->pec_descr.U && (pec_p->ue_reg_status & temp)) {
723 		adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0]);
724 		adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0] >> 32);
725 		adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1]);
726 		adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1] >> 32);
727 
728 		sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
729 
730 		if (sts == DDI_SUCCESS &&
731 		    pf_hdl_lookup(dip, derr->fme_ena,
732 		    adv_reg.pcie_ue_tgt_trans,
733 		    adv_reg.pcie_ue_tgt_addr,
734 		    adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND)
735 			return (PX_NO_PANIC);
736 		else
737 			return (PX_PANIC);
738 	}
739 
740 	if (!pec_p->pec_descr.C)
741 		pec_p->ce_reg_status = 0;
742 	if (!pec_p->pec_descr.U)
743 		pec_p->ue_reg_status = 0;
744 	if (!pec_p->pec_descr.H)
745 		pec_p->hdr[0] = 0;
746 	if (!pec_p->pec_descr.I)
747 		pec_p->hdr[1] = 0;
748 
749 	/*
750 	 * According to the PCIe spec, there is a first error pointer.  If there
751 	 * are header logs recorded and there are more than one error, the log
752 	 * will belong to the error that the first error pointer points to.
753 	 *
754 	 * The regs.primary_ue expects a bit number, go through the ue register
755 	 * and find the first error that occured.  Because the sun4v epkt spec
756 	 * does not define this value, the algorithm below gives the lower bit
757 	 * priority.
758 	 */
759 	temp = pcie->ue_reg;
760 	if (temp) {
761 		int x;
762 		for (x = 0; !(temp & 0x1); x++) {
763 			temp = temp >> 1;
764 		}
765 		pcie->primary_ue = 1 << x;
766 	} else {
767 		pcie->primary_ue = 0;
768 	}
769 
770 	/* Sun4v doesn't log the TX hdr except for CTOs */
771 	if (pcie->primary_ue == PCIE_AER_UCE_TO) {
772 		pcie->tx_hdr1 = pcie->rx_hdr1;
773 		pcie->tx_hdr2 = pcie->rx_hdr2;
774 		pcie->tx_hdr3 = pcie->rx_hdr3;
775 		pcie->tx_hdr4 = pcie->rx_hdr4;
776 		pcie->rx_hdr1 = 0;
777 		pcie->rx_hdr2 = 0;
778 		pcie->rx_hdr3 = 0;
779 		pcie->rx_hdr4 = 0;
780 	} else {
781 		pcie->tx_hdr1 = 0;
782 		pcie->tx_hdr2 = 0;
783 		pcie->tx_hdr3 = 0;
784 		pcie->tx_hdr4 = 0;
785 	}
786 
787 	return (px_err_check_pcie(dip, derr, pcie));
788 }
789 
790 static int
791 px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt)
792 {
793 	uint64_t addr = (uint64_t)epkt->addr;
794 	pcie_req_id_t bdf = PCIE_INVALID_BDF;
795 
796 	if (epkt->rc_descr.H) {
797 		bdf = (uint32_t)((epkt->hdr[0] >> 16) && 0xFFFF);
798 	}
799 
800 	return (pf_hdl_lookup(dip, derr->fme_ena, PF_ADDR_DMA, addr,
801 	    bdf));
802 }
803