xref: /illumos-gate/usr/src/uts/sun4/io/px/px_fm.c (revision 051d39bbeea3e1b0fd8395dc97be34acb3241891)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PX Fault Management Architecture
30  */
31 #include <sys/types.h>
32 #include <sys/sunndi.h>
33 #include <sys/sunddi.h>
34 #include <sys/fm/protocol.h>
35 #include <sys/fm/util.h>
36 #include <sys/membar.h>
37 #include "px_obj.h"
38 
39 #define	PX_PCIE_PANIC_BITS \
40 	(PCIE_AER_UCE_DLP | PCIE_AER_UCE_FCP | PCIE_AER_UCE_TO | \
41 	PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP | PCIE_AER_UCE_ECRC | \
42 	PCIE_AER_UCE_UR)
43 #define	PX_PCIE_NO_PANIC_BITS \
44 	(PCIE_AER_UCE_TRAINING | PCIE_AER_UCE_SD | PCIE_AER_UCE_CA | \
45 	PCIE_AER_UCE_UC)
46 
47 static void px_err_fill_pfd(dev_info_t *rpdip, px_err_pcie_t *regs);
48 static int px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr,
49     px_err_pcie_t *regs);
50 
51 #if defined(DEBUG)
52 static void px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs, int severity);
53 #else	/* DEBUG */
54 #define	px_pcie_log 0 &&
55 #endif	/* DEBUG */
56 
57 /* external functions */
58 extern int pci_xcap_locate(ddi_acc_handle_t h, uint16_t id, uint16_t *base_p);
59 extern int pci_lcap_locate(ddi_acc_handle_t h, uint8_t id, uint16_t *base_p);
60 
61 /*
62  * Initialize px FMA support
63  */
64 int
65 px_fm_attach(px_t *px_p)
66 {
67 	px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
68 		DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
69 
70 	/*
71 	 * Initialize pci_target_queue for FMA handling of
72 	 * pci errors.
73 	 */
74 	pci_targetq_init();
75 
76 	/*
77 	 * check parents' capability
78 	 */
79 	ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc);
80 
81 	/*
82 	 * parents need to be ereport and error handling capable
83 	 */
84 	ASSERT(px_p->px_fm_cap &&
85 	    (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE));
86 
87 	/*
88 	 * Initialize lock to synchronize fabric error handling
89 	 */
90 	mutex_init(&px_p->px_fm_mutex, NULL, MUTEX_DRIVER,
91 	    (void *)px_p->px_fm_ibc);
92 
93 	/*
94 	 * register error callback in parent
95 	 */
96 	ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p);
97 
98 	return (DDI_SUCCESS);
99 }
100 
101 /*
102  * Deregister FMA
103  */
104 void
105 px_fm_detach(px_t *px_p)
106 {
107 	ddi_fm_handler_unregister(px_p->px_dip);
108 	mutex_destroy(&px_p->px_fm_mutex);
109 	ddi_fm_fini(px_p->px_dip);
110 }
111 
112 /*
113  * Function used to setup access functions depending on level of desired
114  * protection.
115  */
116 void
117 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip)
118 {
119 	uchar_t fflag;
120 	ddi_acc_hdl_t *hp;
121 	ddi_acc_impl_t *ap;
122 
123 	hp = mp->map_handlep;
124 	ap = (ddi_acc_impl_t *)hp->ah_platform_private;
125 	fflag = ap->ahi_common.ah_acc.devacc_attr_access;
126 
127 	if (mp->map_op == DDI_MO_MAP_LOCKED) {
128 		ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL);
129 		switch (fflag) {
130 		case DDI_FLAGERR_ACC:
131 			ap->ahi_get8 = i_ddi_prot_get8;
132 			ap->ahi_get16 = i_ddi_prot_get16;
133 			ap->ahi_get32 = i_ddi_prot_get32;
134 			ap->ahi_get64 = i_ddi_prot_get64;
135 			ap->ahi_put8 = i_ddi_prot_put8;
136 			ap->ahi_put16 = i_ddi_prot_put16;
137 			ap->ahi_put32 = i_ddi_prot_put32;
138 			ap->ahi_put64 = i_ddi_prot_put64;
139 			ap->ahi_rep_get8 = i_ddi_prot_rep_get8;
140 			ap->ahi_rep_get16 = i_ddi_prot_rep_get16;
141 			ap->ahi_rep_get32 = i_ddi_prot_rep_get32;
142 			ap->ahi_rep_get64 = i_ddi_prot_rep_get64;
143 			ap->ahi_rep_put8 = i_ddi_prot_rep_put8;
144 			ap->ahi_rep_put16 = i_ddi_prot_rep_put16;
145 			ap->ahi_rep_put32 = i_ddi_prot_rep_put32;
146 			ap->ahi_rep_put64 = i_ddi_prot_rep_put64;
147 			break;
148 		case DDI_CAUTIOUS_ACC :
149 			ap->ahi_get8 = i_ddi_caut_get8;
150 			ap->ahi_get16 = i_ddi_caut_get16;
151 			ap->ahi_get32 = i_ddi_caut_get32;
152 			ap->ahi_get64 = i_ddi_caut_get64;
153 			ap->ahi_put8 = i_ddi_caut_put8;
154 			ap->ahi_put16 = i_ddi_caut_put16;
155 			ap->ahi_put32 = i_ddi_caut_put32;
156 			ap->ahi_put64 = i_ddi_caut_put64;
157 			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
158 			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
159 			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
160 			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
161 			ap->ahi_rep_put8 = i_ddi_caut_rep_put8;
162 			ap->ahi_rep_put16 = i_ddi_caut_rep_put16;
163 			ap->ahi_rep_put32 = i_ddi_caut_rep_put32;
164 			ap->ahi_rep_put64 = i_ddi_caut_rep_put64;
165 			break;
166 		default:
167 			break;
168 		}
169 	} else if (mp->map_op == DDI_MO_UNMAP) {
170 		ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
171 	}
172 }
173 
174 /*
175  * Function used to initialize FMA for our children nodes. Called
176  * through pci busops when child node calls ddi_fm_init.
177  */
178 /*ARGSUSED*/
179 int
180 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
181     ddi_iblock_cookie_t *ibc_p)
182 {
183 	px_t *px_p = DIP_TO_STATE(dip);
184 
185 	ASSERT(ibc_p != NULL);
186 	*ibc_p = px_p->px_fm_ibc;
187 
188 	return (px_p->px_fm_cap);
189 }
190 
191 /*
192  * lock access for exclusive PCIe access
193  */
194 void
195 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle)
196 {
197 	px_pec_t	*pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p;
198 
199 	/*
200 	 * Exclusive access has been used for cautious put/get,
201 	 * Both utilize i_ddi_ontrap which, on sparcv9, implements
202 	 * similar protection as what on_trap() does, and which calls
203 	 * membar  #Sync to flush out all cpu deferred errors
204 	 * prior to get/put operation, so here we're not calling
205 	 * membar  #Sync - a difference from what's in pci_bus_enter().
206 	 */
207 	mutex_enter(&pec_p->pec_pokefault_mutex);
208 	pec_p->pec_acc_hdl = handle;
209 }
210 
211 /*
212  * unlock access for exclusive PCIe access
213  */
214 /* ARGSUSED */
215 void
216 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle)
217 {
218 	px_t		*px_p = DIP_TO_STATE(dip);
219 	px_pec_t	*pec_p = px_p->px_pec_p;
220 
221 	pec_p->pec_acc_hdl = NULL;
222 	mutex_exit(&pec_p->pec_pokefault_mutex);
223 }
224 
225 
226 /*
227  * PCI error callback which is registered with our parent to call
228  * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors
229  * and PCI BERR/TO/UE on IO Loads.
230  */
231 /*ARGSUSED*/
232 int
233 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data)
234 {
235 	dev_info_t	*pdip = ddi_get_parent(dip);
236 	px_t		*px_p = (px_t *)impl_data;
237 	int		i, acc_type = 0;
238 	int		lookup, rc_err, fab_err = PF_NO_PANIC;
239 	uint32_t	addr, addr_high, addr_low;
240 	pcie_req_id_t	bdf;
241 	px_ranges_t	*ranges_p;
242 	int		range_len;
243 
244 	/*
245 	 * Deadlock scenario:
246 	 * 1. A fabric or mondo 62 interrupt with respect to px0 - T1/cpu0;
247 	 * 2. While error handling thread T1 is running on cpu0, a trap
248 	 *    occurs to cpu1 - T2/cpu1;
249 	 * 3. While doing error handling on T1, a precise trap occurs,
250 	 *    overtaken T1 - T1+/cpu0;
251 	 *
252 	 * Why threads deadlock:
253 	 *   T1 owns px_fm_mutex, T2 owns rootnex' fh_lock, but blocked on
254 	 *   px_fm_mutex, T1+ blocked on rootnex' fh_lock which won't be
255 	 *   released since T2 will never get px_fm_mutex since T1+ buried
256 	 *   thread T1 who is responsible for releasing px_fm_mutex.
257 	 *
258 	 * Solution:
259 	 *   px_fm_callback must release rootnex' fh_lock prior to acquire
260 	 *   px_fm_mutex and reaquire the fh_lock after release px_fm_mutex;
261 	 *   if px_fm_callback is unable to acquire px_fm_mutex, meaning the
262 	 *   latest trap has either overtaken the error handling thread or an
263 	 *   error handling thread on another cpu owns it, just quit with OK
264 	 *   status. Note, in this case, the cpu sync error handler should
265 	 *   respect nexus'return status and not to panic, otherwise system
266 	 *   will hang.
267 	 */
268 	i_ddi_fm_handler_exit(pdip);
269 	if (!mutex_tryenter(&px_p->px_fm_mutex)) {
270 		i_ddi_fm_handler_enter(pdip);
271 		return (DDI_FM_OK);
272 	}
273 
274 	addr_high = (uint32_t)((uint64_t)derr->fme_bus_specific >> 32);
275 	addr_low = (uint32_t)((uint64_t)derr->fme_bus_specific);
276 
277 	/*
278 	 * Make sure this failed load came from this PCIe port.  Check by
279 	 * matching the upper 32 bits of the address with the ranges property.
280 	 */
281 	range_len = px_p->px_ranges_length / sizeof (px_ranges_t);
282 	i = 0;
283 	for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) {
284 		if (ranges_p->parent_high == addr_high) {
285 			switch (ranges_p->child_high & PCI_ADDR_MASK) {
286 			case PCI_ADDR_CONFIG:
287 				acc_type = PF_CFG_ADDR;
288 				addr = NULL;
289 				bdf = (pcie_req_id_t)(addr_low >> 12);
290 				break;
291 			case PCI_ADDR_MEM32:
292 				acc_type = PF_DMA_ADDR;
293 				addr = addr_low;
294 				bdf = NULL;
295 				break;
296 			}
297 			break;
298 		}
299 	}
300 
301 	/* This address doesn't belong to this leaf, just return with OK */
302 	if (!acc_type) {
303 		mutex_exit(&px_p->px_fm_mutex);
304 		i_ddi_fm_handler_enter(pdip);
305 		return (DDI_FM_OK);
306 	}
307 
308 	rc_err = px_err_cmn_intr(px_p, derr, PX_TRAP_CALL, PX_FM_BLOCK_ALL);
309 	lookup = pf_hdl_lookup(dip, derr->fme_ena, acc_type, addr, bdf);
310 
311 	if (!px_lib_is_in_drain_state(px_p)) {
312 		/*
313 		 * This is to ensure that device corresponding to the addr of
314 		 * the failed PIO/CFG load gets scanned.
315 		 */
316 		px_rp_en_q(px_p, bdf, addr,
317 		    (PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB));
318 		fab_err = pf_scan_fabric(dip, derr, px_p->px_dq_p,
319 		    &px_p->px_dq_tail);
320 	}
321 
322 	mutex_exit(&px_p->px_fm_mutex);
323 	i_ddi_fm_handler_enter(pdip);
324 
325 	if ((rc_err & (PX_PANIC | PX_PROTECTED)) || (fab_err & PF_PANIC) ||
326 	    (lookup == PF_HDL_NOTFOUND))
327 		return (DDI_FM_FATAL);
328 	else if ((rc_err == PX_NO_ERROR) && (fab_err == PF_NO_ERROR))
329 		return (DDI_FM_OK);
330 
331 	return (DDI_FM_NONFATAL);
332 }
333 
334 /*
335  * px_err_fabric_intr:
336  * Interrupt handler for PCIE fabric block.
337  * o lock
338  * o create derr
339  * o px_err_cmn_intr(leaf, with jbc)
340  * o send ereport(fire fmri, derr, payload = BDF)
341  * o dispatch (leaf)
342  * o unlock
343  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
344  */
345 /* ARGSUSED */
346 uint_t
347 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, pcie_req_id_t rid)
348 {
349 	dev_info_t	*rpdip = px_p->px_dip;
350 	int		rc_err, fab_err = PF_NO_PANIC;
351 	ddi_fm_error_t	derr;
352 
353 	mutex_enter(&px_p->px_fm_mutex);
354 
355 	/* Create the derr */
356 	bzero(&derr, sizeof (ddi_fm_error_t));
357 	derr.fme_version = DDI_FME_VERSION;
358 	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
359 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
360 
361 	/* Ensure that the rid of the fabric message will get scanned. */
362 	px_rp_en_q(px_p, rid, NULL, NULL);
363 
364 	rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE);
365 
366 	/* call rootport dispatch */
367 	if (!px_lib_is_in_drain_state(px_p)) {
368 		fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p,
369 		    &px_p->px_dq_tail);
370 	}
371 
372 	mutex_exit(&px_p->px_fm_mutex);
373 
374 	px_err_panic(rc_err, PX_RC, fab_err);
375 
376 	return (DDI_INTR_CLAIMED);
377 }
378 
379 /*
380  * px_err_safeacc_check:
381  * Check to see if a peek/poke and cautious access is currently being
382  * done on a particular leaf.
383  *
384  * Safe access reads induced fire errors will be handled by cpu trap handler
385  * which will call px_fm_callback() which calls this function. In that
386  * case, the derr fields will be set by trap handler with the correct values.
387  *
388  * Safe access writes induced errors will be handled by px interrupt
389  * handlers, this function will fill in the derr fields.
390  *
391  * If a cpu trap does occur, it will quiesce all other interrupts allowing
392  * the cpu trap error handling to finish before Fire receives an interrupt.
393  *
394  * If fire does indeed have an error when a cpu trap occurs as a result of
395  * a safe access, a trap followed by a Mondo/Fabric interrupt will occur.
396  * In which case derr will be initialized as "UNEXPECTED" by the interrupt
397  * handler and this function will need to find if this error occured in the
398  * middle of a safe access operation.
399  *
400  * @param px_p		leaf in which to check access
401  * @param derr		fm err data structure to be updated
402  */
403 void
404 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr)
405 {
406 	px_pec_t 	*pec_p = px_p->px_pec_p;
407 	int		acctype = pec_p->pec_safeacc_type;
408 
409 	ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
410 
411 	if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
412 		return;
413 	}
414 
415 	/* safe access checking */
416 	switch (acctype) {
417 	case DDI_FM_ERR_EXPECTED:
418 		/*
419 		 * cautious access protection, protected from all err.
420 		 */
421 		ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex));
422 		ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr,
423 		    DDI_FME_VERSION);
424 		derr->fme_flag = acctype;
425 		derr->fme_acc_handle = pec_p->pec_acc_hdl;
426 		break;
427 	case DDI_FM_ERR_POKE:
428 		/*
429 		 * ddi_poke protection, check nexus and children for
430 		 * expected errors.
431 		 */
432 		ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex));
433 		membar_sync();
434 		derr->fme_flag = acctype;
435 		break;
436 	case DDI_FM_ERR_PEEK:
437 		derr->fme_flag = acctype;
438 		break;
439 	}
440 }
441 
442 /*
443  * Suggest panic if any EQ (except CE q) has overflown.
444  */
445 int
446 px_err_check_eq(dev_info_t *dip)
447 {
448 	px_t			*px_p = DIP_TO_STATE(dip);
449 	px_msiq_state_t 	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
450 	px_pec_t		*pec_p = px_p->px_pec_p;
451 	msiqid_t		eq_no = msiq_state_p->msiq_1st_msiq_id;
452 	pci_msiq_state_t	msiq_state;
453 	int			i;
454 
455 	for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
456 		if (i + eq_no == pec_p->pec_corr_msg_msiq_id) /* skip CE q */
457 			continue;
458 		if ((px_lib_msiq_getstate(dip, i + eq_no, &msiq_state) !=
459 			DDI_SUCCESS) || msiq_state == PCI_MSIQ_STATE_ERROR)
460 			return (PX_PANIC);
461 	}
462 	return (PX_NO_PANIC);
463 }
464 
465 static void
466 px_err_fill_pfd(dev_info_t *rpdip, px_err_pcie_t *regs)
467 {
468 	px_t		*px_p = DIP_TO_STATE(rpdip);
469 	pf_data_t	pf_data = {0};
470 	pcie_req_id_t	fault_bdf = 0;
471 	uint32_t	fault_addr = 0;
472 	uint16_t	s_status = 0;
473 
474 	/*
475 	 * set RC s_status in PCI term to coordinate with downstream fabric
476 	 * errors ananlysis.
477 	 */
478 	if (regs->primary_ue & PCIE_AER_UCE_UR)
479 		s_status = PCI_STAT_R_MAST_AB;
480 	if (regs->primary_ue & PCIE_AER_UCE_CA)
481 		s_status = PCI_STAT_R_TARG_AB;
482 	if (regs->primary_ue & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC))
483 		s_status = PCI_STAT_PERROR;
484 
485 	if (regs->primary_ue & (PCIE_AER_UCE_UR | PCIE_AER_UCE_CA)) {
486 		pf_data.aer_h0 = regs->rx_hdr1;
487 		pf_data.aer_h1 = regs->rx_hdr2;
488 		pf_data.aer_h2 = regs->rx_hdr3;
489 		pf_data.aer_h3 = regs->rx_hdr4;
490 
491 		pf_tlp_decode(rpdip, &pf_data, &fault_bdf, NULL, NULL);
492 	} else if (regs->primary_ue & PCIE_AER_UCE_PTLP) {
493 		pcie_tlp_hdr_t	*tlp_p;
494 
495 		pf_data.aer_h0 = regs->rx_hdr1;
496 		pf_data.aer_h1 = regs->rx_hdr2;
497 		pf_data.aer_h2 = regs->rx_hdr3;
498 		pf_data.aer_h3 = regs->rx_hdr4;
499 
500 		tlp_p = (pcie_tlp_hdr_t *)&pf_data.aer_h0;
501 		if (tlp_p->type == PCIE_TLP_TYPE_CPL)
502 			pf_tlp_decode(rpdip, &pf_data, &fault_bdf, NULL, NULL);
503 
504 		pf_data.aer_h0 = regs->tx_hdr1;
505 		pf_data.aer_h1 = regs->tx_hdr2;
506 		pf_data.aer_h2 = regs->tx_hdr3;
507 		pf_data.aer_h3 = regs->tx_hdr4;
508 
509 		pf_tlp_decode(rpdip, &pf_data, NULL, &fault_addr, NULL);
510 	}
511 
512 	px_rp_en_q(px_p, fault_bdf, fault_addr, s_status);
513 }
514 
515 int
516 px_err_check_pcie(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs)
517 {
518 	uint32_t ce_reg, ue_reg;
519 	int err = PX_NO_ERROR;
520 
521 	ce_reg = regs->ce_reg;
522 	if (ce_reg)
523 		err |= (ce_reg & px_fabric_die_rc_ce) ? PX_PANIC : PX_NO_ERROR;
524 
525 	ue_reg = regs->ue_reg;
526 	if (!ue_reg)
527 		goto done;
528 
529 	if (ue_reg & PCIE_AER_UCE_PTLP)
530 		err |= px_pcie_ptlp(dip, derr, regs);
531 
532 	if (ue_reg & PX_PCIE_PANIC_BITS)
533 		err |= PX_PANIC;
534 
535 	if (ue_reg & PX_PCIE_NO_PANIC_BITS)
536 		err |= PX_NO_PANIC;
537 
538 	/* Scan the fabric to clean up error bits, for the following errors. */
539 	if (ue_reg & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_CA | PCIE_AER_UCE_UR))
540 		px_err_fill_pfd(dip, regs);
541 done:
542 	px_pcie_log(dip, regs, err);
543 	return (err);
544 }
545 
546 #if defined(DEBUG)
547 static void
548 px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs, int severity)
549 {
550 	DBG(DBG_ERR_INTR, dip,
551 	    "A PCIe RC error has occured with a severity of \"%s\"\n"
552 	    "\tCE: 0x%x UE: 0x%x Primary UE: 0x%x\n"
553 	    "\tTX Hdr: 0x%x 0x%x 0x%x 0x%x\n\tRX Hdr: 0x%x 0x%x 0x%x 0x%x\n",
554 	    (severity & PX_PANIC) ? "PANIC" : "NO PANIC", regs->ce_reg,
555 	    regs->ue_reg, regs->primary_ue, regs->tx_hdr1, regs->tx_hdr2,
556 	    regs->tx_hdr3, regs->tx_hdr4, regs->rx_hdr1, regs->rx_hdr2,
557 	    regs->rx_hdr3, regs->rx_hdr4);
558 }
559 #endif	/* DEBUG */
560 
561 /*
562  * look through poisoned TLP cases and suggest panic/no panic depend on
563  * handle lookup.
564  */
565 static int
566 px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs)
567 {
568 	pf_data_t	pf_data;
569 	pcie_req_id_t	bdf;
570 	uint32_t	addr, trans_type;
571 	int		tlp_sts, tlp_cmd;
572 	int		sts = PF_HDL_NOTFOUND;
573 
574 	if (regs->primary_ue != PCIE_AER_UCE_PTLP)
575 		return (PX_PANIC);
576 
577 	if (!regs->rx_hdr1)
578 		goto done;
579 
580 	pf_data.aer_h0 = regs->rx_hdr1;
581 	pf_data.aer_h1 = regs->rx_hdr2;
582 	pf_data.aer_h2 = regs->rx_hdr3;
583 	pf_data.aer_h3 = regs->rx_hdr4;
584 
585 	tlp_sts = pf_tlp_decode(dip, &pf_data, &bdf, &addr, &trans_type);
586 	tlp_cmd = ((pcie_tlp_hdr_t *)(&pf_data.aer_h0))->type;
587 
588 	if (tlp_sts == DDI_FAILURE)
589 		goto done;
590 
591 	switch (tlp_cmd) {
592 	case PCIE_TLP_TYPE_CPL:
593 	case PCIE_TLP_TYPE_CPLLK:
594 		/*
595 		 * Usually a PTLP is a CPL with data.  Grab the completer BDF
596 		 * from the RX TLP, and the original address from the TX TLP.
597 		 */
598 		if (regs->tx_hdr1) {
599 			pf_data.aer_h0 = regs->tx_hdr1;
600 			pf_data.aer_h1 = regs->tx_hdr2;
601 			pf_data.aer_h2 = regs->tx_hdr3;
602 			pf_data.aer_h3 = regs->tx_hdr4;
603 
604 			sts = pf_tlp_decode(dip, &pf_data, NULL, &addr,
605 			    &trans_type);
606 		} /* FALLTHRU */
607 	case PCIE_TLP_TYPE_IO:
608 	case PCIE_TLP_TYPE_MEM:
609 	case PCIE_TLP_TYPE_MEMLK:
610 		sts = pf_hdl_lookup(dip, derr->fme_ena, trans_type, addr, bdf);
611 		break;
612 	default:
613 		sts = PF_HDL_NOTFOUND;
614 	}
615 done:
616 	return (sts == PF_HDL_NOTFOUND ? PX_PANIC : PX_NO_PANIC);
617 }
618 
619 /*
620  * This function appends a pf_data structure to the error q which is used later
621  * during PCIe fabric scan.  It signifies:
622  * o errs rcvd in RC, that may have been propagated to/from the fabric
623  * o the fabric scan code should scan the device path of fault bdf/addr
624  *
625  * fault_bdf: The bdf that caused the fault, which may have error bits set.
626  * fault_addr: The PIO addr that caused the fault, such as failed PIO, but not
627  *	       failed DMAs.
628  * s_status: Secondary Status equivalent to why the fault occured.
629  *	     (ie S-TA/MA, R-TA)
630  * Either the fault bdf or addr may be NULL, but not both.
631  */
632 int px_foo = 0;
633 void
634 px_rp_en_q(px_t *px_p, pcie_req_id_t fault_bdf, uint32_t fault_addr,
635     uint16_t s_status)
636 {
637 	pf_data_t pf_data = {0};
638 
639 	if (!fault_bdf && !fault_addr)
640 		return;
641 
642 	pf_data.dev_type = PCIE_PCIECAP_DEV_TYPE_ROOT;
643 	if (px_foo) {
644 		pf_data.fault_bdf = px_foo;
645 		px_foo = 0;
646 	} else
647 		pf_data.fault_bdf = fault_bdf;
648 
649 	pf_data.fault_addr = fault_addr;
650 	pf_data.s_status = s_status;
651 	pf_data.send_erpt = PF_SEND_ERPT_NO;
652 
653 	(void) pf_en_dq(&pf_data, px_p->px_dq_p, &px_p->px_dq_tail, -1);
654 }
655 
656 /*
657  * Panic if the err tunable is set and that we are not already in the middle
658  * of panic'ing.
659  */
660 #define	MSZ (sizeof (fm_msg) -strlen(fm_msg) - 1)
661 void
662 px_err_panic(int err, int msg, int fab_err)
663 {
664 	char fm_msg[96] = "";
665 	int ferr = PX_NO_ERROR;
666 
667 	if (panicstr)
668 		return;
669 
670 	if (!(err & px_die))
671 		goto fabric;
672 	if (msg & PX_RC)
673 		(void) strncat(fm_msg, px_panic_rc_msg, MSZ);
674 	if (msg & PX_RP)
675 		(void) strncat(fm_msg, px_panic_rp_msg, MSZ);
676 	if (msg & PX_HB)
677 		(void) strncat(fm_msg, px_panic_hb_msg, MSZ);
678 
679 fabric:
680 	if (fab_err & PF_PANIC)
681 		ferr = PX_PANIC;
682 	if (fab_err & ~(PF_PANIC | PF_NO_ERROR))
683 		ferr = PX_NO_PANIC;
684 	if (ferr & px_die) {
685 		if (strlen(fm_msg))
686 			(void) strncat(fm_msg, " and", MSZ);
687 		(void) strncat(fm_msg, px_panic_fab_msg, MSZ);
688 	}
689 
690 	if (strlen(fm_msg))
691 		fm_panic("Fatal error has occured in:%s.", fm_msg);
692 }
693