xref: /titanic_50/usr/src/uts/common/io/pciex/pcie_fault.c (revision 0b8f054691a3974b7c86dd2700ecbf6108a2d55e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/sysmacros.h>
27 #include <sys/types.h>
28 #include <sys/kmem.h>
29 #include <sys/modctl.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/sunndi.h>
33 #include <sys/fm/protocol.h>
34 #include <sys/fm/util.h>
35 #include <sys/fm/io/ddi.h>
36 #include <sys/fm/io/pci.h>
37 #include <sys/promif.h>
38 #include <sys/disp.h>
39 #include <sys/atomic.h>
40 #include <sys/pcie.h>
41 #include <sys/pci_cap.h>
42 #include <sys/pcie_impl.h>
43 
44 #define	PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \
45 	PCIE_DEVSTS_CE_DETECTED)
46 
47 #define	PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \
48 	PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR)
49 
50 #define	PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\
51 	PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP)
52 #define	PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \
53 	PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR)
54 
55 #define	PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \
56 	PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \
57 	PCIE_AER_SUCE_SERR_ASSERT)
58 #define	PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \
59 	PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \
60 	PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \
61 	PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \
62 	PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR)
63 
64 #define	PF_PCI_PARITY_ERR (PCI_STAT_S_PERROR | PCI_STAT_PERROR)
65 
66 #define	PF_FIRST_AER_ERR(bit, adv) \
67 	(bit & (1 << (adv->pcie_adv_ctl & PCIE_AER_CTL_FST_ERR_PTR_MASK)))
68 
69 #define	HAS_AER_LOGS(pfd_p, bit) \
70 	(PCIE_HAS_AER(pfd_p->pe_bus_p) && \
71 	PF_FIRST_AER_ERR(bit, PCIE_ADV_REG(pfd_p)))
72 
73 #define	PF_FIRST_SAER_ERR(bit, adv) \
74 	(bit & (1 << (adv->pcie_sue_ctl & PCIE_AER_SCTL_FST_ERR_PTR_MASK)))
75 
76 #define	HAS_SAER_LOGS(pfd_p, bit) \
77 	(PCIE_HAS_AER(pfd_p->pe_bus_p) && \
78 	PF_FIRST_SAER_ERR(bit, PCIE_ADV_BDG_REG(pfd_p)))
79 
80 #define	GET_SAER_CMD(pfd_p) \
81 	((PCIE_ADV_BDG_HDR(pfd_p, 1) >> \
82 	PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & PCIE_AER_SUCE_HDR_CMD_LWR_MASK)
83 
84 #define	CE_ADVISORY(pfd_p) \
85 	(PCIE_ADV_REG(pfd_p)->pcie_ce_status & PCIE_AER_CE_AD_NFE)
86 
87 /* PCIe Fault Fabric Error analysis table */
88 typedef struct pf_fab_err_tbl {
89 	uint32_t	bit;		/* Error bit */
90 	int		(*handler)();	/* Error handling fuction */
91 } pf_fab_err_tbl_t;
92 
93 static pcie_bus_t *pf_is_ready(dev_info_t *);
94 /* Functions for scanning errors */
95 static int pf_default_hdl(dev_info_t *, pf_impl_t *);
96 static int pf_dispatch(dev_info_t *, pf_impl_t *, boolean_t);
97 static boolean_t pf_in_bus_range(pcie_bus_t *, pcie_req_id_t);
98 static boolean_t pf_in_addr_range(pcie_bus_t *, uint64_t);
99 
100 static int pf_pci_decode(pf_data_t *, uint16_t *);
101 
102 /* Functions for gathering errors */
103 static void pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs,
104     pcie_bus_t *bus_p, boolean_t bdg);
105 static void pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
106 static void pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
107 static void pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
108 static int pf_dummy_cb(dev_info_t *, ddi_fm_error_t *, const void *);
109 static void pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl_p);
110 
111 /* Functions for analysing errors */
112 static int pf_analyse_error(ddi_fm_error_t *, pf_impl_t *);
113 static void pf_adjust_for_no_aer(pf_data_t *);
114 static void pf_adjust_for_no_saer(pf_data_t *);
115 static pf_data_t *pf_get_pcie_bridge(pf_data_t *, pcie_req_id_t);
116 static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *);
117 static boolean_t pf_matched_in_rc(pf_data_t *, pf_data_t *,
118     uint32_t);
119 static int pf_analyse_error_tbl(ddi_fm_error_t *, pf_impl_t *,
120     pf_data_t *, const pf_fab_err_tbl_t *, uint32_t);
121 static int pf_analyse_ca_ur(ddi_fm_error_t *, uint32_t,
122     pf_data_t *, pf_data_t *);
123 static int pf_analyse_ma_ta(ddi_fm_error_t *, uint32_t,
124     pf_data_t *, pf_data_t *);
125 static int pf_analyse_pci(ddi_fm_error_t *, uint32_t,
126     pf_data_t *, pf_data_t *);
127 static int pf_analyse_perr_assert(ddi_fm_error_t *, uint32_t,
128     pf_data_t *, pf_data_t *);
129 static int pf_analyse_ptlp(ddi_fm_error_t *, uint32_t,
130     pf_data_t *, pf_data_t *);
131 static int pf_analyse_sc(ddi_fm_error_t *, uint32_t,
132     pf_data_t *, pf_data_t *);
133 static int pf_analyse_to(ddi_fm_error_t *, uint32_t,
134     pf_data_t *, pf_data_t *);
135 static int pf_analyse_uc(ddi_fm_error_t *, uint32_t,
136     pf_data_t *, pf_data_t *);
137 static int pf_analyse_uc_data(ddi_fm_error_t *, uint32_t,
138     pf_data_t *, pf_data_t *);
139 static int pf_no_panic(ddi_fm_error_t *, uint32_t,
140     pf_data_t *, pf_data_t *);
141 static int pf_panic(ddi_fm_error_t *, uint32_t,
142     pf_data_t *, pf_data_t *);
143 static void pf_send_ereport(ddi_fm_error_t *, pf_impl_t *);
144 static int pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr);
145 
146 /* PCIe Fabric Handle Lookup Support Functions. */
147 static int pf_hdl_child_lookup(dev_info_t *, ddi_fm_error_t *, uint32_t,
148     uint64_t, pcie_req_id_t);
149 static int pf_hdl_compare(dev_info_t *, ddi_fm_error_t *, uint32_t, uint64_t,
150     pcie_req_id_t, ndi_fmc_t *);
151 static int pf_log_hdl_lookup(dev_info_t *, ddi_fm_error_t *, pf_data_t *,
152 	boolean_t);
153 
154 static int pf_handler_enter(dev_info_t *, pf_impl_t *);
155 static void pf_handler_exit(dev_info_t *);
156 
157 boolean_t pcie_full_scan = B_FALSE;	/* Force to always do a full scan */
158 int pcie_disable_scan = 0;		/* Disable fabric scan */
159 
160 /*
161  * Scan Fabric is the entry point for PCI/PCIe IO fabric errors.  The
162  * caller may create a local pf_data_t with the "root fault"
163  * information populated to either do a precise or full scan.  More
164  * than one pf_data_t maybe linked together if there are multiple
165  * errors.  Only a PCIe compliant Root Port device may pass in NULL
166  * for the root_pfd_p.
167  *
168  * "Root Complexes" such as NPE and PX should call scan_fabric using itself as
169  * the rdip.  PCIe Root ports should call pf_scan_fabric using it's parent as
170  * the rdip.
171  *
172  * Scan fabric initiated from RCs are likely due to a fabric message, traps or
173  * any RC detected errors that propagated to/from the fabric.
174  *
175  * This code assumes that by the time pf_scan_fabric is
176  * called, pf_handler_enter has NOT been called on the rdip.
177  */
178 int
179 pf_scan_fabric(dev_info_t *rdip, ddi_fm_error_t *derr, pf_data_t *root_pfd_p)
180 {
181 	pf_impl_t	impl;
182 	pf_data_t	*pfd_p, *pfd_head_p, *pfd_tail_p;
183 	int		scan_flag = PF_SCAN_SUCCESS;
184 	int		analyse_flag = PF_ERR_NO_ERROR;
185 	boolean_t	full_scan = pcie_full_scan;
186 
187 	if (pcie_disable_scan)
188 		return (analyse_flag);
189 
190 	/* Find the head and tail of this link list */
191 	pfd_head_p = root_pfd_p;
192 	for (pfd_tail_p = root_pfd_p; pfd_tail_p && pfd_tail_p->pe_next;
193 	    pfd_tail_p = pfd_tail_p->pe_next)
194 		;
195 
196 	/* Save head/tail */
197 	impl.pf_total = 0;
198 	impl.pf_derr = derr;
199 	impl.pf_dq_head_p = pfd_head_p;
200 	impl.pf_dq_tail_p = pfd_tail_p;
201 
202 	/* If scan is initiated from RP then RP itself must be scanned. */
203 	if (PCIE_IS_RP(PCIE_DIP2BUS(rdip)) && pf_is_ready(rdip) &&
204 	    !root_pfd_p) {
205 		scan_flag = pf_handler_enter(rdip, &impl);
206 		if (scan_flag & PF_SCAN_DEADLOCK)
207 			goto done;
208 
209 		scan_flag = pf_default_hdl(rdip, &impl);
210 		if (scan_flag & PF_SCAN_NO_ERR_IN_CHILD)
211 			goto done;
212 	}
213 
214 	/*
215 	 * Scan the fabric using the scan_bdf and scan_addr in error q.
216 	 * scan_bdf will be valid in the following cases:
217 	 *	- Fabric message
218 	 *	- Poisoned TLP
219 	 *	- Signaled UR/CA
220 	 *	- Received UR/CA
221 	 *	- PIO load failures
222 	 */
223 	for (pfd_p = impl.pf_dq_head_p; pfd_p && PFD_IS_ROOT(pfd_p);
224 	    pfd_p = pfd_p->pe_next) {
225 		impl.pf_fault = PCIE_ROOT_FAULT(pfd_p);
226 
227 		if (impl.pf_fault->full_scan)
228 			full_scan = B_TRUE;
229 
230 		if (full_scan ||
231 		    PCIE_CHECK_VALID_BDF(impl.pf_fault->scan_bdf) ||
232 		    impl.pf_fault->scan_addr)
233 			scan_flag |= pf_dispatch(rdip, &impl, full_scan);
234 
235 		if (full_scan)
236 			break;
237 	}
238 
239 done:
240 	/*
241 	 * If this is due to safe access, don't analyze the errors and return
242 	 * success regardless of how scan fabric went.
243 	 */
244 	if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
245 		analyse_flag = PF_ERR_NO_PANIC;
246 	} else {
247 		analyse_flag = pf_analyse_error(derr, &impl);
248 	}
249 
250 	pf_send_ereport(derr, &impl);
251 
252 	/*
253 	 * Check if any hardened driver's callback reported a panic or scan
254 	 * fabric was unable to gather all the information needed.  If so panic.
255 	 */
256 	if (scan_flag & (PF_SCAN_CB_FAILURE | PF_SCAN_BAD_RESPONSE))
257 		analyse_flag |= PF_ERR_PANIC;
258 
259 	/*
260 	 * If a deadlock was detected, panic the system as error analysis has
261 	 * been compromised.
262 	 */
263 	if (scan_flag & PF_SCAN_DEADLOCK)
264 		analyse_flag |= PF_ERR_PANIC_DEADLOCK;
265 
266 	derr->fme_status = PF_ERR2DDIFM_ERR(scan_flag);
267 
268 	return (analyse_flag);
269 }
270 
271 /*
272  * pf_dispatch walks the device tree and calls the pf_default_hdl if the device
273  * falls in the error path.
274  *
275  * Returns PF_SCAN_* flags
276  */
277 static int
278 pf_dispatch(dev_info_t *pdip, pf_impl_t *impl, boolean_t full_scan)
279 {
280 	dev_info_t	*dip;
281 	pcie_req_id_t	rid = impl->pf_fault->scan_bdf;
282 	pcie_bus_t	*bus_p;
283 	int		scan_flag = PF_SCAN_SUCCESS;
284 
285 	for (dip = ddi_get_child(pdip); dip; dip = ddi_get_next_sibling(dip)) {
286 		/* Make sure dip is attached and ready */
287 		if (!(bus_p = pf_is_ready(dip)))
288 			continue;
289 
290 		scan_flag |= pf_handler_enter(dip, impl);
291 		if (scan_flag & PF_SCAN_DEADLOCK)
292 			break;
293 
294 		/*
295 		 * Handle this device if it is a:
296 		 * o Full Scan
297 		 * o PCI/PCI-X Device
298 		 * o Fault BDF = Device BDF
299 		 * o BDF/ADDR is in range of the Bridge/Switch
300 		 */
301 		if (full_scan ||
302 		    (bus_p->bus_bdf == rid) ||
303 		    pf_in_bus_range(bus_p, rid) ||
304 		    pf_in_addr_range(bus_p, impl->pf_fault->scan_addr)) {
305 			int hdl_flag = pf_default_hdl(dip, impl);
306 			scan_flag |= hdl_flag;
307 
308 			/*
309 			 * If pf_default_hdl was not able gather error
310 			 * information, it means this device wasn't added to the
311 			 * error q list.  In that case exit the lock now,
312 			 * otherwise it'll be locked forever.
313 			 */
314 			if (hdl_flag & PF_SCAN_BAD_RESPONSE)
315 				pf_handler_exit(dip);
316 
317 			/*
318 			 * A bridge may have detected no errors in which case
319 			 * there is no need to scan further down.
320 			 */
321 			if (hdl_flag & PF_SCAN_NO_ERR_IN_CHILD)
322 				continue;
323 		} else {
324 			pf_handler_exit(dip);
325 			continue;
326 		}
327 
328 		/* match or in bridge bus-range */
329 		switch (bus_p->bus_dev_type) {
330 		case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI:
331 		case PCIE_PCIECAP_DEV_TYPE_PCI2PCIE:
332 			scan_flag |= pf_dispatch(dip, impl, B_TRUE);
333 			break;
334 		case PCIE_PCIECAP_DEV_TYPE_UP:
335 		case PCIE_PCIECAP_DEV_TYPE_DOWN:
336 		case PCIE_PCIECAP_DEV_TYPE_ROOT:
337 		{
338 			pf_data_t *pfd_p = PCIE_BUS2PFD(bus_p);
339 			pf_pci_err_regs_t *err_p = PCI_ERR_REG(pfd_p);
340 			pf_pci_bdg_err_regs_t *serr_p = PCI_BDG_ERR_REG(pfd_p);
341 			/*
342 			 * Continue if the fault BDF != the switch or there is a
343 			 * parity error
344 			 */
345 			if ((bus_p->bus_bdf != rid) ||
346 			    (err_p->pci_err_status & PF_PCI_PARITY_ERR) ||
347 			    (serr_p->pci_bdg_sec_stat & PF_PCI_PARITY_ERR))
348 				scan_flag |= pf_dispatch(dip, impl, full_scan);
349 			break;
350 		}
351 		case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV:
352 		case PCIE_PCIECAP_DEV_TYPE_PCI_DEV:
353 			/*
354 			 * Reached a PCIe end point so stop. Note dev_type
355 			 * PCI_DEV is just a PCIe device that requires IO Space
356 			 */
357 			break;
358 		case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO:
359 			if (PCIE_IS_BDG(bus_p))
360 				scan_flag |= pf_dispatch(dip, impl, B_TRUE);
361 			break;
362 		default:
363 			ASSERT(B_FALSE);
364 		}
365 	}
366 	return (scan_flag);
367 }
368 
369 /* Returns whether the "bdf" is in the bus range of a switch/bridge */
370 static boolean_t
371 pf_in_bus_range(pcie_bus_t *bus_p, pcie_req_id_t bdf)
372 {
373 	pci_bus_range_t *br_p = &bus_p->bus_bus_range;
374 	uint8_t		bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >>
375 	    PCIE_REQ_ID_BUS_SHIFT;
376 
377 	/* check if given bdf falls within bridge's bus range */
378 	if (PCIE_IS_BDG(bus_p) &&
379 	    ((bus_no >= br_p->lo) && (bus_no <= br_p->hi)))
380 		return (B_TRUE);
381 	else
382 		return (B_FALSE);
383 }
384 
385 /*
386  * Returns whether the "addr" is in the addr range of a switch/bridge, or if the
387  * "addr" is in the assigned addr of a device.
388  */
389 static boolean_t
390 pf_in_addr_range(pcie_bus_t *bus_p, uint64_t addr)
391 {
392 	uint_t		i;
393 	uint64_t	low, hi;
394 	ppb_ranges_t	*ranges_p = bus_p->bus_addr_ranges;
395 	pci_regspec_t	*assign_p = bus_p->bus_assigned_addr;
396 
397 	/* check if given address belongs to this device */
398 	for (i = 0; i < bus_p->bus_assigned_entries; i++, assign_p++) {
399 		low = assign_p->pci_phys_low;
400 		hi = low + assign_p->pci_size_low;
401 		if ((addr < hi) && (addr >= low))
402 			return (B_TRUE);
403 	}
404 
405 	/* check if given address belongs to a child below this device */
406 	if (!PCIE_IS_BDG(bus_p))
407 		return (B_FALSE);
408 
409 	for (i = 0; i < bus_p->bus_addr_entries; i++, ranges_p++) {
410 		switch (ranges_p->child_high & PCI_ADDR_MASK) {
411 		case PCI_ADDR_IO:
412 		case PCI_ADDR_MEM32:
413 			low = ranges_p->child_low;
414 			hi = ranges_p->size_low + low;
415 			if ((addr < hi) && (addr >= low))
416 				return (B_TRUE);
417 			break;
418 		case PCI_ADDR_MEM64:
419 			low = ((uint64_t)ranges_p->child_mid << 32) |
420 			    (uint64_t)ranges_p->child_low;
421 			hi = (((uint64_t)ranges_p->size_high << 32) |
422 			    (uint64_t)ranges_p->size_low) + low;
423 			if ((addr < hi) && (addr >= low))
424 				return (B_TRUE);
425 			break;
426 		}
427 	}
428 	return (B_FALSE);
429 }
430 
431 static pcie_bus_t *
432 pf_is_ready(dev_info_t *dip)
433 {
434 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
435 	if (!bus_p)
436 		return (NULL);
437 
438 	if (!(bus_p->bus_fm_flags & PF_FM_READY))
439 		return (NULL);
440 	return (bus_p);
441 }
442 
443 static void
444 pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs,
445     pcie_bus_t *bus_p, boolean_t bdg)
446 {
447 	if (bdg) {
448 		pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p,
449 		    PCI_PCIX_BDG_ECC_STATUS);
450 		pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p,
451 		    PCI_PCIX_BDG_ECC_FST_AD);
452 		pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p,
453 		    PCI_PCIX_BDG_ECC_SEC_AD);
454 		pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p,
455 		    PCI_PCIX_BDG_ECC_ATTR);
456 	} else {
457 		pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p,
458 		    PCI_PCIX_ECC_STATUS);
459 		pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p,
460 		    PCI_PCIX_ECC_FST_AD);
461 		pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p,
462 		    PCI_PCIX_ECC_SEC_AD);
463 		pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p,
464 		    PCI_PCIX_ECC_ATTR);
465 	}
466 }
467 
468 
469 static void
470 pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
471 {
472 	/*
473 	 * For PCI-X device PCI-X Capability only exists for Type 0 Headers.
474 	 * PCI-X Bridge Capability only exists for Type 1 Headers.
475 	 * Both capabilities do not exist at the same time.
476 	 */
477 	if (PCIE_IS_BDG(bus_p)) {
478 		pf_pcix_bdg_err_regs_t *pcix_bdg_regs;
479 
480 		pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p);
481 
482 		pcix_bdg_regs->pcix_bdg_sec_stat = PCIX_CAP_GET(16, bus_p,
483 		    PCI_PCIX_SEC_STATUS);
484 		pcix_bdg_regs->pcix_bdg_stat = PCIX_CAP_GET(32, bus_p,
485 		    PCI_PCIX_BDG_STATUS);
486 
487 		if (PCIX_ECC_VERSION_CHECK(bus_p)) {
488 			/*
489 			 * PCI Express to PCI-X bridges only implement the
490 			 * secondary side of the PCI-X ECC registers, bit one is
491 			 * read-only so we make sure we do not write to it.
492 			 */
493 			if (!PCIE_IS_PCIE_BDG(bus_p)) {
494 				PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
495 				    0);
496 				pf_pcix_ecc_regs_gather(
497 				    PCIX_BDG_ECC_REG(pfd_p, 0), bus_p, B_TRUE);
498 				PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
499 				    1);
500 			}
501 			pf_pcix_ecc_regs_gather(PCIX_BDG_ECC_REG(pfd_p, 0),
502 			    bus_p, B_TRUE);
503 		}
504 	} else {
505 		pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p);
506 
507 		pcix_regs->pcix_command = PCIX_CAP_GET(16, bus_p,
508 		    PCI_PCIX_COMMAND);
509 		pcix_regs->pcix_status = PCIX_CAP_GET(32, bus_p,
510 		    PCI_PCIX_STATUS);
511 		if (PCIX_ECC_VERSION_CHECK(bus_p))
512 			pf_pcix_ecc_regs_gather(PCIX_ECC_REG(pfd_p), bus_p,
513 			    B_TRUE);
514 	}
515 }
516 
517 static void
518 pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
519 {
520 	pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p);
521 	pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p);
522 
523 	pcie_regs->pcie_err_status = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS);
524 	pcie_regs->pcie_err_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
525 	pcie_regs->pcie_dev_cap = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP);
526 
527 	if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p))
528 		pf_pcix_regs_gather(pfd_p, bus_p);
529 
530 	if (PCIE_IS_ROOT(bus_p)) {
531 		pf_pcie_rp_err_regs_t *pcie_rp_regs = PCIE_RP_REG(pfd_p);
532 
533 		pcie_rp_regs->pcie_rp_status = PCIE_CAP_GET(32, bus_p,
534 		    PCIE_ROOTSTS);
535 		pcie_rp_regs->pcie_rp_ctl = PCIE_CAP_GET(16, bus_p,
536 		    PCIE_ROOTCTL);
537 	}
538 
539 	if (!PCIE_HAS_AER(bus_p))
540 		return;
541 
542 	/* Gather UE AERs */
543 	pcie_adv_regs->pcie_adv_ctl = PCIE_AER_GET(32, bus_p,
544 	    PCIE_AER_CTL);
545 	pcie_adv_regs->pcie_ue_status = PCIE_AER_GET(32, bus_p,
546 	    PCIE_AER_UCE_STS);
547 	pcie_adv_regs->pcie_ue_mask = PCIE_AER_GET(32, bus_p,
548 	    PCIE_AER_UCE_MASK);
549 	pcie_adv_regs->pcie_ue_sev = PCIE_AER_GET(32, bus_p,
550 	    PCIE_AER_UCE_SERV);
551 	PCIE_ADV_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p,
552 	    PCIE_AER_HDR_LOG);
553 	PCIE_ADV_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p,
554 	    PCIE_AER_HDR_LOG + 0x4);
555 	PCIE_ADV_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p,
556 	    PCIE_AER_HDR_LOG + 0x8);
557 	PCIE_ADV_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p,
558 	    PCIE_AER_HDR_LOG + 0xc);
559 
560 	/* Gather CE AERs */
561 	pcie_adv_regs->pcie_ce_status = PCIE_AER_GET(32, bus_p,
562 	    PCIE_AER_CE_STS);
563 	pcie_adv_regs->pcie_ce_mask = PCIE_AER_GET(32, bus_p,
564 	    PCIE_AER_CE_MASK);
565 
566 	/*
567 	 * If pci express to pci bridge then grab the bridge
568 	 * error registers.
569 	 */
570 	if (PCIE_IS_PCIE_BDG(bus_p)) {
571 		pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs =
572 		    PCIE_ADV_BDG_REG(pfd_p);
573 
574 		pcie_bdg_regs->pcie_sue_ctl = PCIE_AER_GET(32, bus_p,
575 		    PCIE_AER_SCTL);
576 		pcie_bdg_regs->pcie_sue_status = PCIE_AER_GET(32, bus_p,
577 		    PCIE_AER_SUCE_STS);
578 		pcie_bdg_regs->pcie_sue_mask = PCIE_AER_GET(32, bus_p,
579 		    PCIE_AER_SUCE_MASK);
580 		pcie_bdg_regs->pcie_sue_sev = PCIE_AER_GET(32, bus_p,
581 		    PCIE_AER_SUCE_SERV);
582 		PCIE_ADV_BDG_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p,
583 		    PCIE_AER_SHDR_LOG);
584 		PCIE_ADV_BDG_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p,
585 		    PCIE_AER_SHDR_LOG + 0x4);
586 		PCIE_ADV_BDG_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p,
587 		    PCIE_AER_SHDR_LOG + 0x8);
588 		PCIE_ADV_BDG_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p,
589 		    PCIE_AER_SHDR_LOG + 0xc);
590 	}
591 
592 	/*
593 	 * If PCI Express root port then grab the root port
594 	 * error registers.
595 	 */
596 	if (PCIE_IS_ROOT(bus_p)) {
597 		pf_pcie_adv_rp_err_regs_t *pcie_rp_regs =
598 		    PCIE_ADV_RP_REG(pfd_p);
599 
600 		pcie_rp_regs->pcie_rp_err_cmd = PCIE_AER_GET(32, bus_p,
601 		    PCIE_AER_RE_CMD);
602 		pcie_rp_regs->pcie_rp_err_status = PCIE_AER_GET(32, bus_p,
603 		    PCIE_AER_RE_STS);
604 		pcie_rp_regs->pcie_rp_ce_src_id = PCIE_AER_GET(16, bus_p,
605 		    PCIE_AER_CE_SRC_ID);
606 		pcie_rp_regs->pcie_rp_ue_src_id = PCIE_AER_GET(16, bus_p,
607 		    PCIE_AER_ERR_SRC_ID);
608 	}
609 }
610 
611 static void
612 pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
613 {
614 	pf_pci_err_regs_t *pci_regs = PCI_ERR_REG(pfd_p);
615 
616 	/*
617 	 * Start by reading all the error registers that are available for
618 	 * pci and pci express and for leaf devices and bridges/switches
619 	 */
620 	pci_regs->pci_err_status = PCIE_GET(16, bus_p, PCI_CONF_STAT);
621 	pci_regs->pci_cfg_comm = PCIE_GET(16, bus_p, PCI_CONF_COMM);
622 
623 	/*
624 	 * If pci-pci bridge grab PCI bridge specific error registers.
625 	 */
626 	if (PCIE_IS_BDG(bus_p)) {
627 		pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p);
628 		pci_bdg_regs->pci_bdg_sec_stat =
629 		    PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS);
630 		pci_bdg_regs->pci_bdg_ctrl =
631 		    PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL);
632 	}
633 
634 	/*
635 	 * If pci express device grab pci express error registers and
636 	 * check for advanced error reporting features and grab them if
637 	 * available.
638 	 */
639 	if (PCIE_IS_PCIE(bus_p))
640 		pf_pcie_regs_gather(pfd_p, bus_p);
641 	else if (PCIE_IS_PCIX(bus_p))
642 		pf_pcix_regs_gather(pfd_p, bus_p);
643 
644 }
645 
646 static void
647 pf_pcix_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
648 {
649 	if (PCIE_IS_BDG(bus_p)) {
650 		pf_pcix_bdg_err_regs_t *pcix_bdg_regs;
651 
652 		pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p);
653 
654 		PCIX_CAP_PUT(16, bus_p, PCI_PCIX_SEC_STATUS,
655 		    pcix_bdg_regs->pcix_bdg_sec_stat);
656 
657 		PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_STATUS,
658 		    pcix_bdg_regs->pcix_bdg_stat);
659 
660 		if (PCIX_ECC_VERSION_CHECK(bus_p)) {
661 			pf_pcix_ecc_regs_t *pcix_bdg_ecc_regs;
662 			/*
663 			 * PCI Express to PCI-X bridges only implement the
664 			 * secondary side of the PCI-X ECC registers.  For
665 			 * clearing, there is no need to "select" the ECC
666 			 * register, just write what was originally read.
667 			 */
668 			if (!PCIE_IS_PCIE_BDG(bus_p)) {
669 				pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 0);
670 				PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
671 				    pcix_bdg_ecc_regs->pcix_ecc_ctlstat);
672 
673 			}
674 			pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 1);
675 			PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
676 			    pcix_bdg_ecc_regs->pcix_ecc_ctlstat);
677 		}
678 	} else {
679 		pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p);
680 
681 		PCIX_CAP_PUT(32, bus_p, PCI_PCIX_STATUS,
682 		    pcix_regs->pcix_status);
683 
684 		if (PCIX_ECC_VERSION_CHECK(bus_p)) {
685 			pf_pcix_ecc_regs_t *pcix_ecc_regs = PCIX_ECC_REG(pfd_p);
686 
687 			PCIX_CAP_PUT(32, bus_p, PCI_PCIX_ECC_STATUS,
688 			    pcix_ecc_regs->pcix_ecc_ctlstat);
689 		}
690 	}
691 }
692 
693 static void
694 pf_pcie_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
695 {
696 	pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p);
697 	pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p);
698 
699 	PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS, pcie_regs->pcie_err_status);
700 
701 	if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p))
702 		pf_pcix_regs_clear(pfd_p, bus_p);
703 
704 	if (!PCIE_HAS_AER(bus_p))
705 		return;
706 
707 	PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_STS,
708 	    pcie_adv_regs->pcie_ue_status);
709 
710 	PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS,
711 	    pcie_adv_regs->pcie_ce_status);
712 
713 	if (PCIE_IS_PCIE_BDG(bus_p)) {
714 		pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs =
715 		    PCIE_ADV_BDG_REG(pfd_p);
716 
717 		PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_STS,
718 		    pcie_bdg_regs->pcie_sue_status);
719 	}
720 
721 	/*
722 	 * If PCI Express root complex then clear the root complex
723 	 * error registers.
724 	 */
725 	if (PCIE_IS_ROOT(bus_p)) {
726 		pf_pcie_adv_rp_err_regs_t *pcie_rp_regs;
727 
728 		pcie_rp_regs = PCIE_ADV_RP_REG(pfd_p);
729 
730 		PCIE_AER_PUT(32, bus_p, PCIE_AER_RE_STS,
731 		    pcie_rp_regs->pcie_rp_err_status);
732 	}
733 }
734 
735 static void
736 pf_pci_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
737 {
738 	if (PCIE_IS_PCIE(bus_p))
739 		pf_pcie_regs_clear(pfd_p, bus_p);
740 	else if (PCIE_IS_PCIX(bus_p))
741 		pf_pcix_regs_clear(pfd_p, bus_p);
742 
743 	PCIE_PUT(16, bus_p, PCI_CONF_STAT, pfd_p->pe_pci_regs->pci_err_status);
744 
745 	if (PCIE_IS_BDG(bus_p)) {
746 		pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p);
747 		PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS,
748 		    pci_bdg_regs->pci_bdg_sec_stat);
749 	}
750 }
751 
752 /* ARGSUSED */
753 void
754 pcie_clear_errors(dev_info_t *dip)
755 {
756 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
757 	pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
758 
759 	ASSERT(bus_p);
760 
761 	pf_pci_regs_gather(pfd_p, bus_p);
762 	pf_pci_regs_clear(pfd_p, bus_p);
763 }
764 
765 /* Find the fault BDF, fault Addr or full scan on a PCIe Root Port. */
766 static void
767 pf_pci_find_rp_fault(pf_data_t *pfd_p, pcie_bus_t *bus_p)
768 {
769 	pf_root_fault_t *root_fault = PCIE_ROOT_FAULT(pfd_p);
770 	pf_pcie_adv_rp_err_regs_t *rp_regs = PCIE_ADV_RP_REG(pfd_p);
771 	uint32_t root_err = rp_regs->pcie_rp_err_status;
772 	uint32_t ue_err = PCIE_ADV_REG(pfd_p)->pcie_ue_status;
773 	int num_faults = 0;
774 
775 	/* Since this data structure is reused, make sure to reset it */
776 	root_fault->full_scan = B_FALSE;
777 	root_fault->scan_bdf = PCIE_INVALID_BDF;
778 	root_fault->scan_addr = 0;
779 
780 	if (!PCIE_HAS_AER(bus_p) &&
781 	    (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) {
782 		PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
783 		return;
784 	}
785 
786 	/*
787 	 * Check to see if an error has been received that
788 	 * requires a scan of the fabric.  Count the number of
789 	 * faults seen.  If MUL CE/FE_NFE that counts for
790 	 * atleast 2 faults, so just return with full_scan.
791 	 */
792 	if ((root_err & PCIE_AER_RE_STS_MUL_CE_RCVD) ||
793 	    (root_err & PCIE_AER_RE_STS_MUL_FE_NFE_RCVD)) {
794 		PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
795 		return;
796 	}
797 
798 	if (root_err & PCIE_AER_RE_STS_CE_RCVD)
799 		num_faults++;
800 
801 	if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD)
802 		num_faults++;
803 
804 	if (ue_err & PCIE_AER_UCE_CA)
805 		num_faults++;
806 
807 	if (ue_err & PCIE_AER_UCE_UR)
808 		num_faults++;
809 
810 	/* If no faults just return */
811 	if (num_faults == 0)
812 		return;
813 
814 	/* If faults > 1 do full scan */
815 	if (num_faults > 1) {
816 		PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
817 		return;
818 	}
819 
820 	/* By this point, there is only 1 fault detected */
821 	if (root_err & PCIE_AER_RE_STS_CE_RCVD) {
822 		PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ce_src_id;
823 		num_faults--;
824 	} else if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) {
825 		PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ue_src_id;
826 		num_faults--;
827 	} else if ((HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_CA) ||
828 	    HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_UR)) &&
829 	    (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), PCIE_ADV_REG(pfd_p)) ==
830 	    DDI_SUCCESS)) {
831 		PCIE_ROOT_FAULT(pfd_p)->scan_addr =
832 		    PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr;
833 		num_faults--;
834 	}
835 
836 	/*
837 	 * This means an error did occur, but we couldn't extract the fault BDF
838 	 */
839 	if (num_faults > 0)
840 		PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
841 
842 }
843 
844 
845 /*
846  * Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue
847  *
848  * Returns a scan flag.
849  * o PF_SCAN_SUCCESS - Error gathered and cleared sucessfuly, data added to
850  *   Fault Q
851  * o PF_SCAN_BAD_RESPONSE - Unable to talk to device, item not added to fault Q
852  * o PF_SCAN_CB_FAILURE - A hardened device deemed that the error was fatal.
853  * o PF_SCAN_NO_ERR_IN_CHILD - Only applies to bridge to prevent further
854  *   unnecessary scanning
855  * o PF_SCAN_IN_DQ - This device has already been scanned; it was skipped this
856  *   time.
857  */
858 static int
859 pf_default_hdl(dev_info_t *dip, pf_impl_t *impl)
860 {
861 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
862 	pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
863 	int cb_sts, scan_flag = PF_SCAN_SUCCESS;
864 
865 	/* Make sure this device hasn't already been snapshotted and cleared */
866 	if (pfd_p->pe_valid == B_TRUE) {
867 		scan_flag |= PF_SCAN_IN_DQ;
868 		goto done;
869 	}
870 
871 	/*
872 	 * Read vendor/device ID and check with cached data, if it doesn't match
873 	 * could very well be a device that isn't responding anymore.  Just
874 	 * stop.  Save the basic info in the error q for post mortem debugging
875 	 * purposes.
876 	 */
877 	if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) {
878 		char buf[FM_MAX_CLASS];
879 
880 		(void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
881 		    PCI_ERROR_SUBCLASS, PCI_NR);
882 		ddi_fm_ereport_post(dip, buf, fm_ena_generate(0, FM_ENA_FMT1),
883 		    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL);
884 
885 		return (PF_SCAN_BAD_RESPONSE);
886 	}
887 
888 	pf_pci_regs_gather(pfd_p, bus_p);
889 	pf_pci_regs_clear(pfd_p, bus_p);
890 	if (PCIE_IS_RP(bus_p))
891 		pf_pci_find_rp_fault(pfd_p, bus_p);
892 
893 	cb_sts = pf_fm_callback(dip, impl->pf_derr);
894 
895 	if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN)
896 		scan_flag |= PF_SCAN_CB_FAILURE;
897 
898 	/* Add the snapshot to the error q */
899 	pf_en_dq(pfd_p, impl);
900 
901 done:
902 	/*
903 	 * If a bridge does not have any error no need to scan any further down.
904 	 * For PCIe devices, check the PCIe device status and PCI secondary
905 	 * status.
906 	 * - Some non-compliant PCIe devices do not utilize PCIe
907 	 *   error registers.  If so rely on legacy PCI error registers.
908 	 * For PCI devices, check the PCI secondary status.
909 	 */
910 	if (PCIE_IS_PCIE_BDG(bus_p) &&
911 	    !(PCIE_ERR_REG(pfd_p)->pcie_err_status & PF_PCIE_BDG_ERR) &&
912 	    !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR))
913 		scan_flag |= PF_SCAN_NO_ERR_IN_CHILD;
914 
915 	if (PCIE_IS_PCI_BDG(bus_p) &&
916 	    !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR))
917 		scan_flag |= PF_SCAN_NO_ERR_IN_CHILD;
918 
919 	pfd_p->pe_valid = B_TRUE;
920 	return (scan_flag);
921 }
922 
923 /*
924  * Called during postattach to initialize a device's error handling
925  * capabilities.  If the devices has already been hardened, then there isn't
926  * much needed.  Otherwise initialize the device's default FMA capabilities.
927  *
928  * In a future project where PCIe support is removed from pcifm, several
929  * "properties" that are setup in ddi_fm_init and pci_ereport_setup need to be
930  * created here so that the PCI/PCIe eversholt rules will work properly.
931  */
932 void
933 pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc, ddi_attach_cmd_t cmd)
934 {
935 	pcie_bus_t		*bus_p = PCIE_DIP2BUS(dip);
936 	struct i_ddi_fmhdl	*fmhdl = DEVI(dip)->devi_fmhdl;
937 	boolean_t		need_cb_register = B_FALSE;
938 
939 	if (!bus_p) {
940 		cmn_err(CE_WARN, "devi_bus information is not set for %s%d.\n",
941 		    ddi_driver_name(dip), ddi_get_instance(dip));
942 		return;
943 	}
944 
945 	if (fmhdl) {
946 		/*
947 		 * If device is only ereport capable and not callback capable
948 		 * make it callback capable. The only downside is that the
949 		 * "fm-errcb-capable" property is not created for this device
950 		 * which should be ok since it's not used anywhere.
951 		 */
952 		if (!(fmhdl->fh_cap & DDI_FM_ERRCB_CAPABLE))
953 			need_cb_register = B_TRUE;
954 	} else {
955 		int cap;
956 		/*
957 		 * fm-capable in driver.conf can be used to set fm_capabilities.
958 		 * If fm-capable is not defined, set the default
959 		 * DDI_FM_EREPORT_CAPABLE and DDI_FM_ERRCB_CAPABLE.
960 		 */
961 		cap = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
962 		    DDI_PROP_DONTPASS, "fm-capable",
963 		    DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE);
964 		cap &= (DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE);
965 
966 		bus_p->bus_fm_flags |= PF_FM_IS_NH;
967 
968 		if (cmd == DDI_ATTACH) {
969 			ddi_fm_init(dip, &cap, &ibc);
970 			pci_ereport_setup(dip);
971 		}
972 
973 		if (cap & DDI_FM_ERRCB_CAPABLE)
974 			need_cb_register = B_TRUE;
975 
976 		fmhdl = DEVI(dip)->devi_fmhdl;
977 	}
978 
979 	/* If ddi_fm_init fails for any reason RETURN */
980 	if (!fmhdl) {
981 		bus_p->bus_fm_flags = 0;
982 		return;
983 	}
984 
985 	fmhdl->fh_cap |=  DDI_FM_ERRCB_CAPABLE;
986 	if (cmd == DDI_ATTACH) {
987 		if (need_cb_register)
988 			ddi_fm_handler_register(dip, pf_dummy_cb, NULL);
989 	}
990 
991 	bus_p->bus_fm_flags |= PF_FM_READY;
992 }
993 
994 /* undo FMA lock, called at predetach */
995 void
996 pf_fini(dev_info_t *dip, ddi_detach_cmd_t cmd)
997 {
998 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
999 
1000 	if (!bus_p)
1001 		return;
1002 
1003 	/* Don't fini anything if device isn't FM Ready */
1004 	if (!(bus_p->bus_fm_flags & PF_FM_READY))
1005 		return;
1006 
1007 	/* no other code should set the flag to false */
1008 	bus_p->bus_fm_flags &= ~PF_FM_READY;
1009 
1010 	/*
1011 	 * Grab the mutex to make sure device isn't in the middle of
1012 	 * error handling.  Setting the bus_fm_flag to ~PF_FM_READY
1013 	 * should prevent this device from being error handled after
1014 	 * the mutex has been released.
1015 	 */
1016 	(void) pf_handler_enter(dip, NULL);
1017 	pf_handler_exit(dip);
1018 
1019 	/* undo non-hardened drivers */
1020 	if (bus_p->bus_fm_flags & PF_FM_IS_NH) {
1021 		if (cmd == DDI_DETACH) {
1022 			bus_p->bus_fm_flags &= ~PF_FM_IS_NH;
1023 			pci_ereport_teardown(dip);
1024 			/*
1025 			 * ddi_fini itself calls ddi_handler_unregister,
1026 			 * so no need to explicitly call unregister.
1027 			 */
1028 			ddi_fm_fini(dip);
1029 		}
1030 	}
1031 }
1032 
1033 /*ARGSUSED*/
1034 static int
1035 pf_dummy_cb(dev_info_t *dip, ddi_fm_error_t *derr, const void *not_used)
1036 {
1037 	return (DDI_FM_OK);
1038 }
1039 
1040 /*
1041  * Add PFD to queue.  If it is an RC add it to the beginning,
1042  * otherwise add it to the end.
1043  */
1044 static void
1045 pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl)
1046 {
1047 	pf_data_t *head_p = impl->pf_dq_head_p;
1048 	pf_data_t *tail_p = impl->pf_dq_tail_p;
1049 
1050 	impl->pf_total++;
1051 
1052 	if (!head_p) {
1053 		ASSERT(PFD_IS_ROOT(pfd_p));
1054 		impl->pf_dq_head_p = pfd_p;
1055 		impl->pf_dq_tail_p = pfd_p;
1056 		pfd_p->pe_prev = NULL;
1057 		pfd_p->pe_next = NULL;
1058 		return;
1059 	}
1060 
1061 	/* Check if this is a Root Port eprt */
1062 	if (PFD_IS_ROOT(pfd_p)) {
1063 		pf_data_t *root_p, *last_p = NULL;
1064 
1065 		/* The first item must be a RP */
1066 		root_p = head_p;
1067 		for (last_p = head_p; last_p && PFD_IS_ROOT(last_p);
1068 		    last_p = last_p->pe_next)
1069 			root_p = last_p;
1070 
1071 		/* root_p is the last RP pfd. last_p is the first non-RP pfd. */
1072 		root_p->pe_next = pfd_p;
1073 		pfd_p->pe_prev = root_p;
1074 		pfd_p->pe_next = last_p;
1075 
1076 		if (last_p)
1077 			last_p->pe_prev = pfd_p;
1078 		else
1079 			tail_p = pfd_p;
1080 	} else {
1081 		tail_p->pe_next = pfd_p;
1082 		pfd_p->pe_prev = tail_p;
1083 		pfd_p->pe_next = NULL;
1084 		tail_p = pfd_p;
1085 	}
1086 
1087 	impl->pf_dq_head_p = head_p;
1088 	impl->pf_dq_tail_p = tail_p;
1089 }
1090 
1091 /*
1092  * Ignore:
1093  * - TRAINING: as leaves do not have children
1094  * - SD: as leaves do not have children
1095  */
1096 const pf_fab_err_tbl_t pcie_pcie_tbl[] = {
1097 	PCIE_AER_UCE_DLP,	pf_panic,
1098 	PCIE_AER_UCE_PTLP,	pf_analyse_ptlp,
1099 	PCIE_AER_UCE_FCP,	pf_panic,
1100 	PCIE_AER_UCE_TO,	pf_analyse_to,
1101 	PCIE_AER_UCE_CA,	pf_analyse_ca_ur,
1102 	PCIE_AER_UCE_UC,	pf_analyse_uc,
1103 	PCIE_AER_UCE_RO,	pf_panic,
1104 	PCIE_AER_UCE_MTLP,	pf_panic,
1105 	PCIE_AER_UCE_ECRC,	pf_panic,
1106 	PCIE_AER_UCE_UR,	pf_analyse_ca_ur,
1107 	NULL,			NULL
1108 };
1109 
1110 const pf_fab_err_tbl_t pcie_rp_tbl[] = {
1111 	PCIE_AER_UCE_TRAINING,	pf_no_panic,
1112 	PCIE_AER_UCE_DLP,	pf_panic,
1113 	PCIE_AER_UCE_SD,	pf_no_panic,
1114 	PCIE_AER_UCE_PTLP,	pf_analyse_ptlp,
1115 	PCIE_AER_UCE_FCP,	pf_panic,
1116 	PCIE_AER_UCE_TO,	pf_panic,
1117 	PCIE_AER_UCE_CA,	pf_no_panic,
1118 	PCIE_AER_UCE_UC,	pf_analyse_uc,
1119 	PCIE_AER_UCE_RO,	pf_panic,
1120 	PCIE_AER_UCE_MTLP,	pf_panic,
1121 	PCIE_AER_UCE_ECRC,	pf_panic,
1122 	PCIE_AER_UCE_UR,	pf_no_panic,
1123 	NULL,			NULL
1124 };
1125 
1126 const pf_fab_err_tbl_t pcie_sw_tbl[] = {
1127 	PCIE_AER_UCE_TRAINING,	pf_no_panic,
1128 	PCIE_AER_UCE_DLP,	pf_panic,
1129 	PCIE_AER_UCE_SD,	pf_no_panic,
1130 	PCIE_AER_UCE_PTLP,	pf_analyse_ptlp,
1131 	PCIE_AER_UCE_FCP,	pf_panic,
1132 	PCIE_AER_UCE_TO,	pf_analyse_to,
1133 	PCIE_AER_UCE_CA,	pf_analyse_ca_ur,
1134 	PCIE_AER_UCE_UC,	pf_analyse_uc,
1135 	PCIE_AER_UCE_RO,	pf_panic,
1136 	PCIE_AER_UCE_MTLP,	pf_panic,
1137 	PCIE_AER_UCE_ECRC,	pf_panic,
1138 	PCIE_AER_UCE_UR,	pf_analyse_ca_ur,
1139 	NULL,			NULL
1140 };
1141 
1142 const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = {
1143 	PCIE_AER_SUCE_TA_ON_SC,		pf_analyse_sc,
1144 	PCIE_AER_SUCE_MA_ON_SC,		pf_analyse_sc,
1145 	PCIE_AER_SUCE_RCVD_TA,		pf_analyse_ma_ta,
1146 	PCIE_AER_SUCE_RCVD_MA,		pf_analyse_ma_ta,
1147 	PCIE_AER_SUCE_USC_ERR,		pf_panic,
1148 	PCIE_AER_SUCE_USC_MSG_DATA_ERR,	pf_analyse_ma_ta,
1149 	PCIE_AER_SUCE_UC_DATA_ERR,	pf_analyse_uc_data,
1150 	PCIE_AER_SUCE_UC_ATTR_ERR,	pf_panic,
1151 	PCIE_AER_SUCE_UC_ADDR_ERR,	pf_panic,
1152 	PCIE_AER_SUCE_TIMER_EXPIRED,	pf_panic,
1153 	PCIE_AER_SUCE_PERR_ASSERT,	pf_analyse_perr_assert,
1154 	PCIE_AER_SUCE_SERR_ASSERT,	pf_no_panic,
1155 	PCIE_AER_SUCE_INTERNAL_ERR,	pf_panic,
1156 	NULL,			NULL
1157 };
1158 
1159 const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = {
1160 	PCI_STAT_PERROR,	pf_analyse_pci,
1161 	PCI_STAT_S_PERROR,	pf_analyse_pci,
1162 	PCI_STAT_S_SYSERR,	pf_panic,
1163 	PCI_STAT_R_MAST_AB,	pf_analyse_pci,
1164 	PCI_STAT_R_TARG_AB,	pf_analyse_pci,
1165 	PCI_STAT_S_TARG_AB,	pf_analyse_pci,
1166 	NULL,			NULL
1167 };
1168 
1169 const pf_fab_err_tbl_t pcie_pci_tbl[] = {
1170 	PCI_STAT_PERROR,	pf_analyse_pci,
1171 	PCI_STAT_S_PERROR,	pf_analyse_pci,
1172 	PCI_STAT_S_SYSERR,	pf_panic,
1173 	PCI_STAT_R_MAST_AB,	pf_analyse_pci,
1174 	PCI_STAT_R_TARG_AB,	pf_analyse_pci,
1175 	PCI_STAT_S_TARG_AB,	pf_analyse_pci,
1176 	NULL,			NULL
1177 };
1178 
1179 #define	PF_MASKED_AER_ERR(pfd_p) \
1180 	(PCIE_ADV_REG(pfd_p)->pcie_ue_status & \
1181 	    ((PCIE_ADV_REG(pfd_p)->pcie_ue_mask) ^ 0xFFFFFFFF))
1182 #define	PF_MASKED_SAER_ERR(pfd_p) \
1183 	(PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status & \
1184 	    ((PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask) ^ 0xFFFFFFFF))
1185 /*
1186  * Analyse all the PCIe Fault Data (erpt) gathered during dispatch in the erpt
1187  * Queue.
1188  */
1189 static int
1190 pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl)
1191 {
1192 	int		sts_flags, error_flags = 0;
1193 	pf_data_t	*pfd_p;
1194 
1195 	for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
1196 		sts_flags = 0;
1197 
1198 		switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) {
1199 		case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV:
1200 		case PCIE_PCIECAP_DEV_TYPE_PCI_DEV:
1201 			if (PCIE_DEVSTS_CE_DETECTED &
1202 			    PCIE_ERR_REG(pfd_p)->pcie_err_status)
1203 				sts_flags |= PF_ERR_CE;
1204 
1205 			pf_adjust_for_no_aer(pfd_p);
1206 			sts_flags |= pf_analyse_error_tbl(derr, impl,
1207 			    pfd_p, pcie_pcie_tbl, PF_MASKED_AER_ERR(pfd_p));
1208 			break;
1209 		case PCIE_PCIECAP_DEV_TYPE_ROOT:
1210 			pf_adjust_for_no_aer(pfd_p);
1211 			sts_flags |= pf_analyse_error_tbl(derr, impl,
1212 			    pfd_p, pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p));
1213 			break;
1214 		case PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO:
1215 			/* no adjust_for_aer for pseudo RC */
1216 			sts_flags |= pf_analyse_error_tbl(derr, impl, pfd_p,
1217 			    pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p));
1218 			break;
1219 		case PCIE_PCIECAP_DEV_TYPE_UP:
1220 		case PCIE_PCIECAP_DEV_TYPE_DOWN:
1221 			if (PCIE_DEVSTS_CE_DETECTED &
1222 			    PCIE_ERR_REG(pfd_p)->pcie_err_status)
1223 				sts_flags |= PF_ERR_CE;
1224 
1225 			pf_adjust_for_no_aer(pfd_p);
1226 			sts_flags |= pf_analyse_error_tbl(derr, impl,
1227 			    pfd_p, pcie_sw_tbl, PF_MASKED_AER_ERR(pfd_p));
1228 			break;
1229 		case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI:
1230 			if (PCIE_DEVSTS_CE_DETECTED &
1231 			    PCIE_ERR_REG(pfd_p)->pcie_err_status)
1232 				sts_flags |= PF_ERR_CE;
1233 
1234 			pf_adjust_for_no_aer(pfd_p);
1235 			pf_adjust_for_no_saer(pfd_p);
1236 			sts_flags |= pf_analyse_error_tbl(derr,
1237 			    impl, pfd_p, pcie_pcie_tbl,
1238 			    PF_MASKED_AER_ERR(pfd_p));
1239 			sts_flags |= pf_analyse_error_tbl(derr,
1240 			    impl, pfd_p, pcie_pcie_bdg_tbl,
1241 			    PF_MASKED_SAER_ERR(pfd_p));
1242 			/*
1243 			 * Some non-compliant PCIe devices do not utilize PCIe
1244 			 * error registers.  So fallthrough and rely on legacy
1245 			 * PCI error registers.
1246 			 */
1247 			if ((PCIE_DEVSTS_NFE_DETECTED | PCIE_DEVSTS_FE_DETECTED)
1248 			    & PCIE_ERR_REG(pfd_p)->pcie_err_status)
1249 				break;
1250 			/* FALLTHROUGH */
1251 		case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO:
1252 			sts_flags |= pf_analyse_error_tbl(derr, impl,
1253 			    pfd_p, pcie_pci_tbl,
1254 			    PCI_ERR_REG(pfd_p)->pci_err_status);
1255 
1256 			if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p)))
1257 				break;
1258 
1259 			sts_flags |= pf_analyse_error_tbl(derr,
1260 			    impl, pfd_p, pcie_pci_bdg_tbl,
1261 			    PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat);
1262 		}
1263 
1264 		pfd_p->pe_severity_flags = sts_flags;
1265 		error_flags |= pfd_p->pe_severity_flags;
1266 	}
1267 
1268 	return (error_flags);
1269 }
1270 
1271 static int
1272 pf_analyse_error_tbl(ddi_fm_error_t *derr, pf_impl_t *impl,
1273     pf_data_t *pfd_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg) {
1274 	const pf_fab_err_tbl_t *row;
1275 	int err = 0;
1276 
1277 	for (row = tbl; err_reg && (row->bit != NULL) && !(err & PF_ERR_PANIC);
1278 	    row++) {
1279 		if (err_reg & row->bit)
1280 			err |= row->handler(derr, row->bit, impl->pf_dq_head_p,
1281 			    pfd_p);
1282 	}
1283 
1284 	if (!err)
1285 		err = PF_ERR_NO_ERROR;
1286 
1287 	return (err);
1288 }
1289 
1290 /*
1291  * PCIe Completer Abort and Unsupport Request error analyser.  If a PCIe device
1292  * issues a CA/UR a corresponding Received CA/UR should have been seen in the
1293  * PCIe root complex.  Check to see if RC did indeed receive a CA/UR, if so then
1294  * this error may be safely ignored.  If not check the logs and see if an
1295  * associated handler for this transaction can be found.
1296  */
1297 /* ARGSUSED */
1298 static int
1299 pf_analyse_ca_ur(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1300     pf_data_t *pfd_p)
1301 {
1302 	uint32_t	abort_type;
1303 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1304 
1305 	/* If UR's are masked forgive this error */
1306 	if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1307 	    (bit == PCIE_AER_UCE_UR))
1308 		return (PF_ERR_NO_PANIC);
1309 
1310 	/*
1311 	 * If a RP has an CA/UR it means a leaf sent a bad request to the RP
1312 	 * such as a config read or a bad DMA address.
1313 	 */
1314 	if (PCIE_IS_RP(PCIE_PFD2BUS(pfd_p)))
1315 		goto handle_lookup;
1316 
1317 	if (bit == PCIE_AER_UCE_UR)
1318 		abort_type = PCI_STAT_R_MAST_AB;
1319 	else
1320 		abort_type = PCI_STAT_R_TARG_AB;
1321 
1322 	if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type))
1323 		return (PF_ERR_MATCHED_RC);
1324 
1325 handle_lookup:
1326 	if (HAS_AER_LOGS(pfd_p, bit) &&
1327 	    pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == PF_HDL_FOUND)
1328 			return (PF_ERR_MATCHED_DEVICE);
1329 
1330 	return (PF_ERR_PANIC);
1331 }
1332 
1333 /*
1334  * PCIe-PCI Bridge Received Master Abort and Target error analyser.  If a PCIe
1335  * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in
1336  * the PCIe root complex.  Check to see if RC did indeed receive a CA/UR, if so
1337  * then this error may be safely ignored.  If not check the logs and see if an
1338  * associated handler for this transaction can be found.
1339  */
1340 /* ARGSUSED */
1341 static int
1342 pf_analyse_ma_ta(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1343     pf_data_t *pfd_p)
1344 {
1345 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1346 	uint32_t	abort_type;
1347 
1348 	/* If UR's are masked forgive this error */
1349 	if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1350 	    (bit == PCIE_AER_SUCE_RCVD_MA))
1351 		return (PF_ERR_NO_PANIC);
1352 
1353 	if (bit == PCIE_AER_SUCE_RCVD_MA)
1354 		abort_type = PCI_STAT_R_MAST_AB;
1355 	else
1356 		abort_type = PCI_STAT_R_TARG_AB;
1357 
1358 	if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type))
1359 		return (PF_ERR_MATCHED_RC);
1360 
1361 	if (!HAS_SAER_LOGS(pfd_p, bit))
1362 		return (PF_ERR_PANIC);
1363 
1364 	if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND)
1365 		return (PF_ERR_MATCHED_DEVICE);
1366 
1367 	return (PF_ERR_PANIC);
1368 }
1369 
1370 /*
1371  * Generic PCI error analyser.  This function is used for Parity Errors,
1372  * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts.
1373  * In general PCI devices do not have error logs, it is very difficult to figure
1374  * out what transaction caused the error.  Instead find the nearest PCIe-PCI
1375  * Bridge and check to see if it has logs and if it has an error associated with
1376  * this PCI Device.
1377  */
1378 /* ARGSUSED */
1379 static int
1380 pf_analyse_pci(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1381     pf_data_t *pfd_p)
1382 {
1383 	pf_data_t	*parent_pfd_p;
1384 	uint16_t	cmd;
1385 	uint32_t	aer_ue_status;
1386 	pcie_bus_t	*bus_p = PCIE_PFD2BUS(pfd_p);
1387 	pf_pcie_adv_bdg_err_regs_t *parent_saer_p;
1388 
1389 	if (PCI_ERR_REG(pfd_p)->pci_err_status & PCI_STAT_S_SYSERR)
1390 		return (PF_ERR_PANIC);
1391 
1392 	/* If UR's are masked forgive this error */
1393 	if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1394 	    (bit == PCI_STAT_R_MAST_AB))
1395 		return (PF_ERR_NO_PANIC);
1396 
1397 
1398 	if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) {
1399 		aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT;
1400 	} else {
1401 		aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC |
1402 		    PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA |
1403 		    PCIE_AER_SUCE_RCVD_MA);
1404 	}
1405 
1406 	parent_pfd_p = pf_get_parent_pcie_bridge(pfd_p);
1407 	if (parent_pfd_p == NULL)
1408 		return (PF_ERR_PANIC);
1409 
1410 	/* Check if parent bridge has seen this error */
1411 	parent_saer_p = PCIE_ADV_BDG_REG(parent_pfd_p);
1412 	if (!(parent_saer_p->pcie_sue_status & aer_ue_status) ||
1413 	    !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status))
1414 		return (PF_ERR_PANIC);
1415 
1416 	/*
1417 	 * If the addr or bdf from the parent PCIe bridge logs belong to this
1418 	 * PCI device, assume the PCIe bridge's error handling has already taken
1419 	 * care of this PCI device's error.
1420 	 */
1421 	if (pf_pci_decode(parent_pfd_p, &cmd) != DDI_SUCCESS)
1422 		return (PF_ERR_PANIC);
1423 
1424 	if ((parent_saer_p->pcie_sue_tgt_bdf == bus_p->bus_bdf) ||
1425 	    pf_in_addr_range(bus_p, parent_saer_p->pcie_sue_tgt_addr))
1426 		return (PF_ERR_MATCHED_PARENT);
1427 
1428 	/*
1429 	 * If this device is a PCI-PCI bridge, check if the bdf in the parent
1430 	 * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges.
1431 	 * If they are, then assume the PCIe bridge's error handling has already
1432 	 * taken care of this PCI-PCI bridge device's error.
1433 	 */
1434 	if (PCIE_IS_BDG(bus_p) &&
1435 	    pf_in_bus_range(bus_p, parent_saer_p->pcie_sue_tgt_bdf))
1436 		return (PF_ERR_MATCHED_PARENT);
1437 
1438 	return (PF_ERR_PANIC);
1439 }
1440 
1441 /*
1442  * PCIe Bridge transactions associated with PERR.
1443  * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe
1444  * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe
1445  * o Bridge received a poisoned Completion on a Split Transction from PCIe
1446  * o Bridge received a poisoned Completion on a Delayed Transction from PCIe
1447  *
1448  * Check for non-poisoned PCIe transactions that got forwarded to the secondary
1449  * side and detects a PERR#.  Except for delayed read completions, a poisoned
1450  * TLP will be forwarded to the secondary bus and PERR# will be asserted.
1451  */
1452 /* ARGSUSED */
1453 static int
1454 pf_analyse_perr_assert(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1455     pf_data_t *pfd_p)
1456 {
1457 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1458 	uint16_t	cmd;
1459 	int		hdl_sts = PF_HDL_NOTFOUND;
1460 	int		err = PF_ERR_NO_ERROR;
1461 	pf_pcie_adv_bdg_err_regs_t *saer_p;
1462 
1463 
1464 	if (HAS_SAER_LOGS(pfd_p, bit)) {
1465 		saer_p = PCIE_ADV_BDG_REG(pfd_p);
1466 		if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS)
1467 			return (PF_ERR_PANIC);
1468 
1469 cmd_switch:
1470 		switch (cmd) {
1471 		case PCI_PCIX_CMD_IOWR:
1472 		case PCI_PCIX_CMD_MEMWR:
1473 		case PCI_PCIX_CMD_MEMWR_BL:
1474 		case PCI_PCIX_CMD_MEMWRBL:
1475 			/* Posted Writes Transactions */
1476 			if (saer_p->pcie_sue_tgt_trans == PF_ADDR_PIO)
1477 				hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1478 				    B_FALSE);
1479 			break;
1480 		case PCI_PCIX_CMD_CFWR:
1481 			/*
1482 			 * Check to see if it is a non-posted write.  If so, a
1483 			 * UR Completion would have been sent.
1484 			 */
1485 			if (pf_matched_in_rc(dq_head_p, pfd_p,
1486 			    PCI_STAT_R_MAST_AB)) {
1487 				hdl_sts = PF_HDL_FOUND;
1488 				err = PF_ERR_MATCHED_RC;
1489 				goto done;
1490 			}
1491 			hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1492 			    B_FALSE);
1493 			break;
1494 		case PCI_PCIX_CMD_SPL:
1495 			hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1496 			    B_FALSE);
1497 			break;
1498 		case PCI_PCIX_CMD_DADR:
1499 			cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >>
1500 			    PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) &
1501 			    PCIE_AER_SUCE_HDR_CMD_UP_MASK;
1502 			if (cmd != PCI_PCIX_CMD_DADR)
1503 				goto cmd_switch;
1504 			/* FALLTHROUGH */
1505 		default:
1506 			/* Unexpected situation, panic */
1507 			hdl_sts = PF_HDL_NOTFOUND;
1508 		}
1509 
1510 		if (hdl_sts == PF_HDL_FOUND)
1511 			err = PF_ERR_MATCHED_DEVICE;
1512 		else
1513 			err = PF_ERR_PANIC;
1514 	} else {
1515 		/*
1516 		 * Check to see if it is a non-posted write.  If so, a UR
1517 		 * Completion would have been sent.
1518 		 */
1519 		if ((PCIE_ERR_REG(pfd_p)->pcie_err_status &
1520 		    PCIE_DEVSTS_UR_DETECTED) &&
1521 		    pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_R_MAST_AB))
1522 			err = PF_ERR_MATCHED_RC;
1523 
1524 		/* Check for posted writes.  Transaction is lost. */
1525 		if (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat &
1526 		    PCI_STAT_S_PERROR)
1527 			err = PF_ERR_PANIC;
1528 
1529 		/*
1530 		 * All other scenarios are due to read completions.  Check for
1531 		 * PERR on the primary side.  If found the primary side error
1532 		 * handling will take care of this error.
1533 		 */
1534 		if (err == PF_ERR_NO_ERROR) {
1535 			if (PCI_ERR_REG(pfd_p)->pci_err_status &
1536 			    PCI_STAT_PERROR)
1537 				err = PF_ERR_MATCHED_PARENT;
1538 			else
1539 				err = PF_ERR_PANIC;
1540 		}
1541 	}
1542 
1543 done:
1544 	return (err);
1545 }
1546 
1547 /*
1548  * PCIe Poisoned TLP error analyser.  If a PCIe device receives a Poisoned TLP,
1549  * check the logs and see if an associated handler for this transaction can be
1550  * found.
1551  */
1552 /* ARGSUSED */
1553 static int
1554 pf_analyse_ptlp(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1555     pf_data_t *pfd_p)
1556 {
1557 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1558 
1559 	/*
1560 	 * If AERs are supported find the logs in this device, otherwise look in
1561 	 * it's parent's logs.
1562 	 */
1563 	if (HAS_AER_LOGS(pfd_p, bit)) {
1564 		pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&PCIE_ADV_HDR(pfd_p, 0);
1565 
1566 		/*
1567 		 * Double check that the log contains a poisoned TLP.
1568 		 * Some devices like PLX switch do not log poison TLP headers.
1569 		 */
1570 		if (hdr->ep) {
1571 			if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) ==
1572 			    PF_HDL_FOUND)
1573 				return (PF_ERR_MATCHED_DEVICE);
1574 		}
1575 
1576 		/*
1577 		 * If an address is found and hdl lookup failed panic.
1578 		 * Otherwise check parents to see if there was enough
1579 		 * information recover.
1580 		 */
1581 		if (PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr)
1582 			return (PF_ERR_PANIC);
1583 	}
1584 
1585 	/*
1586 	 * Check to see if the rc has already handled this error or a parent has
1587 	 * already handled this error.
1588 	 *
1589 	 * If the error info in the RC wasn't enough to find the fault device,
1590 	 * such as if the faulting device lies behind a PCIe-PCI bridge from a
1591 	 * poisoned completion, check to see if the PCIe-PCI bridge has enough
1592 	 * info to recover.  For completion TLP's, the AER header logs only
1593 	 * contain the faulting BDF in the Root Port.  For PCIe device the fault
1594 	 * BDF is the fault device.  But if the fault device is behind a
1595 	 * PCIe-PCI bridge the fault BDF could turn out just to be a PCIe-PCI
1596 	 * bridge's secondary bus number.
1597 	 */
1598 	if (!PFD_IS_ROOT(pfd_p)) {
1599 		dev_info_t *pdip = ddi_get_parent(PCIE_PFD2DIP(pfd_p));
1600 		pf_data_t *parent_pfd_p;
1601 
1602 		if (PCIE_PFD2BUS(pfd_p)->bus_rp_dip == pdip) {
1603 			if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR))
1604 				return (PF_ERR_MATCHED_RC);
1605 		}
1606 
1607 		parent_pfd_p = PCIE_DIP2PFD(pdip);
1608 
1609 		if (HAS_AER_LOGS(parent_pfd_p, bit))
1610 			return (PF_ERR_MATCHED_PARENT);
1611 	} else {
1612 		pf_data_t *bdg_pfd_p;
1613 		pcie_req_id_t secbus;
1614 
1615 		/*
1616 		 * Looking for a pcie bridge only makes sense if the BDF
1617 		 * Dev/Func = 0/0
1618 		 */
1619 		if (!PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
1620 			goto done;
1621 
1622 		secbus = PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf;
1623 
1624 		if (!PCIE_CHECK_VALID_BDF(secbus) || (secbus & 0xFF))
1625 			goto done;
1626 
1627 		bdg_pfd_p = pf_get_pcie_bridge(pfd_p, secbus);
1628 
1629 		if (bdg_pfd_p && HAS_SAER_LOGS(bdg_pfd_p,
1630 		    PCIE_AER_SUCE_PERR_ASSERT)) {
1631 			return pf_analyse_perr_assert(derr,
1632 			    PCIE_AER_SUCE_PERR_ASSERT, dq_head_p, pfd_p);
1633 		}
1634 	}
1635 done:
1636 	return (PF_ERR_PANIC);
1637 }
1638 
1639 /*
1640  * PCIe-PCI Bridge Received Master and Target abort error analyser on Split
1641  * Completions.  If a PCIe Bridge receives a MA/TA check logs and see if an
1642  * associated handler for this transaction can be found.
1643  */
1644 /* ARGSUSED */
1645 static int
1646 pf_analyse_sc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1647     pf_data_t *pfd_p)
1648 {
1649 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1650 	uint16_t	cmd;
1651 	int		sts = PF_HDL_NOTFOUND;
1652 
1653 	if (!HAS_SAER_LOGS(pfd_p, bit))
1654 		return (PF_ERR_PANIC);
1655 
1656 	if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS)
1657 		return (PF_ERR_PANIC);
1658 
1659 	if (cmd == PCI_PCIX_CMD_SPL)
1660 		sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE);
1661 
1662 	if (sts == PF_HDL_FOUND)
1663 		return (PF_ERR_MATCHED_DEVICE);
1664 
1665 	return (PF_ERR_PANIC);
1666 }
1667 
1668 /*
1669  * PCIe Timeout error analyser.  This error can be forgiven if it is marked as
1670  * CE Advisory.  If it is marked as advisory, this means the HW can recover
1671  * and/or retry the transaction automatically.
1672  */
1673 /* ARGSUSED */
1674 static int
1675 pf_analyse_to(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1676     pf_data_t *pfd_p)
1677 {
1678 	if (HAS_AER_LOGS(pfd_p, bit) && CE_ADVISORY(pfd_p))
1679 		return (PF_ERR_NO_PANIC);
1680 
1681 	return (PF_ERR_PANIC);
1682 }
1683 
1684 /*
1685  * PCIe Unexpected Completion.  Check to see if this TLP was misrouted by
1686  * matching the device BDF with the TLP Log.  If misrouting panic, otherwise
1687  * don't panic.
1688  */
1689 /* ARGSUSED */
1690 static int
1691 pf_analyse_uc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1692     pf_data_t *pfd_p)
1693 {
1694 	if (HAS_AER_LOGS(pfd_p, bit) &&
1695 	    (PCIE_PFD2BUS(pfd_p)->bus_bdf == (PCIE_ADV_HDR(pfd_p, 2) >> 16)))
1696 		return (PF_ERR_NO_PANIC);
1697 
1698 	return (PF_ERR_PANIC);
1699 }
1700 
1701 /*
1702  * PCIe-PCI Bridge Uncorrectable Data error analyser.  All Uncorrectable Data
1703  * errors should have resulted in a PCIe Poisoned TLP to the RC, except for
1704  * Posted Writes.  Check the logs for Posted Writes and if the RC did not see a
1705  * Poisoned TLP.
1706  *
1707  * Non-Posted Writes will also generate a UR in the completion status, which the
1708  * RC should also see.
1709  */
1710 /* ARGSUSED */
1711 static int
1712 pf_analyse_uc_data(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1713     pf_data_t *pfd_p)
1714 {
1715 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1716 
1717 	if (!HAS_SAER_LOGS(pfd_p, bit))
1718 		return (PF_ERR_PANIC);
1719 
1720 	if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR))
1721 		return (PF_ERR_MATCHED_RC);
1722 
1723 	if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND)
1724 		return (PF_ERR_MATCHED_DEVICE);
1725 
1726 	return (PF_ERR_PANIC);
1727 }
1728 
1729 /* ARGSUSED */
1730 static int
1731 pf_no_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1732     pf_data_t *pfd_p)
1733 {
1734 	return (PF_ERR_NO_PANIC);
1735 }
1736 
1737 /* ARGSUSED */
1738 static int
1739 pf_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1740     pf_data_t *pfd_p)
1741 {
1742 	return (PF_ERR_PANIC);
1743 }
1744 
1745 /*
1746  * If a PCIe device does not support AER, assume all AER statuses have been set,
1747  * unless other registers do not indicate a certain error occuring.
1748  */
1749 static void
1750 pf_adjust_for_no_aer(pf_data_t *pfd_p)
1751 {
1752 	uint32_t	aer_ue = 0;
1753 	uint16_t	status;
1754 
1755 	if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
1756 		return;
1757 
1758 	if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED)
1759 		aer_ue = PF_AER_FATAL_ERR;
1760 
1761 	if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) {
1762 		aer_ue = PF_AER_NON_FATAL_ERR;
1763 		status = PCI_ERR_REG(pfd_p)->pci_err_status;
1764 
1765 		/* Check if the device received a PTLP */
1766 		if (!(status & PCI_STAT_PERROR))
1767 			aer_ue &= ~PCIE_AER_UCE_PTLP;
1768 
1769 		/* Check if the device signaled a CA */
1770 		if (!(status & PCI_STAT_S_TARG_AB))
1771 			aer_ue &= ~PCIE_AER_UCE_CA;
1772 
1773 		/* Check if the device sent a UR */
1774 		if (!(PCIE_ERR_REG(pfd_p)->pcie_err_status &
1775 		    PCIE_DEVSTS_UR_DETECTED))
1776 			aer_ue &= ~PCIE_AER_UCE_UR;
1777 
1778 		/*
1779 		 * Ignore ECRCs as it is optional and will manefest itself as
1780 		 * another error like PTLP and MFP
1781 		 */
1782 		aer_ue &= ~PCIE_AER_UCE_ECRC;
1783 
1784 		/*
1785 		 * Generally if NFE is set, SERR should also be set. Exception:
1786 		 * When certain non-fatal errors are masked, and some of them
1787 		 * happened to be the cause of the NFE, SERR will not be set and
1788 		 * they can not be the source of this interrupt.
1789 		 *
1790 		 * On x86, URs are masked (NFE + UR can be set), if any other
1791 		 * non-fatal errors (i.e, PTLP, CTO, CA, UC, ECRC, ACS) did
1792 		 * occur, SERR should be set since they are not masked. So if
1793 		 * SERR is not set, none of them occurred.
1794 		 */
1795 		if (!(status & PCI_STAT_S_SYSERR))
1796 			aer_ue &= ~PCIE_AER_UCE_TO;
1797 	}
1798 
1799 	if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) {
1800 		aer_ue &= ~PCIE_AER_UCE_TRAINING;
1801 		aer_ue &= ~PCIE_AER_UCE_SD;
1802 	}
1803 
1804 	PCIE_ADV_REG(pfd_p)->pcie_ue_status = aer_ue;
1805 }
1806 
1807 static void
1808 pf_adjust_for_no_saer(pf_data_t *pfd_p)
1809 {
1810 	uint32_t	s_aer_ue = 0;
1811 	uint16_t	status;
1812 
1813 	if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
1814 		return;
1815 
1816 	if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED)
1817 		s_aer_ue = PF_SAER_FATAL_ERR;
1818 
1819 	if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) {
1820 		s_aer_ue = PF_SAER_NON_FATAL_ERR;
1821 		status = PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat;
1822 
1823 		/* Check if the device received a UC_DATA */
1824 		if (!(status & PCI_STAT_PERROR))
1825 			s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR;
1826 
1827 		/* Check if the device received a RCVD_MA/MA_ON_SC */
1828 		if (!(status & (PCI_STAT_R_MAST_AB))) {
1829 			s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA;
1830 			s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC;
1831 		}
1832 
1833 		/* Check if the device received a RCVD_TA/TA_ON_SC */
1834 		if (!(status & (PCI_STAT_R_TARG_AB))) {
1835 			s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA;
1836 			s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC;
1837 		}
1838 	}
1839 
1840 	PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status = s_aer_ue;
1841 }
1842 
1843 /* Find the PCIe-PCI bridge based on secondary bus number */
1844 static pf_data_t *
1845 pf_get_pcie_bridge(pf_data_t *pfd_p, pcie_req_id_t secbus)
1846 {
1847 	pf_data_t *bdg_pfd_p;
1848 
1849 	/* Search down for the PCIe-PCI device. */
1850 	for (bdg_pfd_p = pfd_p->pe_next; bdg_pfd_p;
1851 	    bdg_pfd_p = bdg_pfd_p->pe_next) {
1852 		if (PCIE_IS_PCIE_BDG(PCIE_PFD2BUS(bdg_pfd_p)) &&
1853 		    PCIE_PFD2BUS(bdg_pfd_p)->bus_bdg_secbus == secbus)
1854 			return (bdg_pfd_p);
1855 	}
1856 
1857 	return (NULL);
1858 }
1859 
1860 /* Find the PCIe-PCI bridge of a PCI device */
1861 static pf_data_t *
1862 pf_get_parent_pcie_bridge(pf_data_t *pfd_p)
1863 {
1864 	dev_info_t	*dip, *rp_dip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1865 
1866 	/* This only makes sense if the device is a PCI device */
1867 	if (!PCIE_IS_PCI(PCIE_PFD2BUS(pfd_p)))
1868 		return (NULL);
1869 
1870 	/*
1871 	 * Search up for the PCIe-PCI device.  Watchout for x86 where pci
1872 	 * devices hang directly off of NPE.
1873 	 */
1874 	for (dip = PCIE_PFD2DIP(pfd_p); dip; dip = ddi_get_parent(dip)) {
1875 		if (dip == rp_dip)
1876 			dip = NULL;
1877 
1878 		if (PCIE_IS_PCIE_BDG(PCIE_DIP2BUS(dip)))
1879 			return (PCIE_DIP2PFD(dip));
1880 	}
1881 
1882 	return (NULL);
1883 }
1884 
1885 /*
1886  * See if a leaf error was bubbled up to the Root Complex (RC) and handled.
1887  * As of right now only RC's have enough information to have errors found in the
1888  * fabric to be matched to the RC.  Note that Root Port's (RP) do not carry
1889  * enough information.  Currently known RC's are SPARC Fire architecture and
1890  * it's equivalents, and x86's NPE.
1891  * SPARC Fire architectures have a plethora of error registers, while currently
1892  * NPE only have the address of a failed load.
1893  *
1894  * Check if the RC logged an error with the appropriate status type/abort type.
1895  * Ex: Parity Error, Received Master/Target Abort
1896  * Check if either the fault address found in the rc matches the device's
1897  * assigned address range (PIO's only) or the fault BDF in the rc matches the
1898  * device's BDF or Secondary Bus/Bus Range.
1899  */
1900 static boolean_t
1901 pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p,
1902     uint32_t abort_type)
1903 {
1904 	pcie_bus_t	*bus_p = PCIE_PFD2BUS(pfd_p);
1905 	pf_data_t	*rc_pfd_p;
1906 	pcie_req_id_t	fault_bdf;
1907 
1908 	for (rc_pfd_p = dq_head_p; PFD_IS_ROOT(rc_pfd_p);
1909 	    rc_pfd_p = rc_pfd_p->pe_next) {
1910 		/* Only root complex's have enough information to match */
1911 		if (!PCIE_IS_RC(PCIE_PFD2BUS(rc_pfd_p)))
1912 			continue;
1913 
1914 		/* If device and rc abort type does not match continue */
1915 		if (!(PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & abort_type))
1916 			continue;
1917 
1918 		fault_bdf = PCIE_ROOT_FAULT(rc_pfd_p)->scan_bdf;
1919 
1920 		/* The Fault BDF = Device's BDF */
1921 		if (fault_bdf == bus_p->bus_bdf)
1922 			return (B_TRUE);
1923 
1924 		/* The Fault Addr is in device's address range */
1925 		if (pf_in_addr_range(bus_p,
1926 		    PCIE_ROOT_FAULT(rc_pfd_p)->scan_addr))
1927 			return (B_TRUE);
1928 
1929 		/* The Fault BDF is from PCIe-PCI Bridge's secondary bus */
1930 		if (PCIE_IS_PCIE_BDG(bus_p) &&
1931 		    pf_in_bus_range(bus_p, fault_bdf))
1932 			return (B_TRUE);
1933 	}
1934 
1935 	return (B_FALSE);
1936 }
1937 
1938 /*
1939  * Check the RP and see if the error is PIO/DMA.  If the RP also has a PERR then
1940  * it is a DMA, otherwise it's a PIO
1941  */
1942 static void
1943 pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type,
1944     pcie_req_id_t *bdf) {
1945 	pf_data_t *rc_pfd_p;
1946 
1947 	/* Could be DMA or PIO.  Find out by look at error type. */
1948 	switch (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status) {
1949 	case PCIE_AER_SUCE_TA_ON_SC:
1950 	case PCIE_AER_SUCE_MA_ON_SC:
1951 		*trans_type = PF_ADDR_DMA;
1952 		return;
1953 	case PCIE_AER_SUCE_RCVD_TA:
1954 	case PCIE_AER_SUCE_RCVD_MA:
1955 		*bdf = PCIE_INVALID_BDF;
1956 		*trans_type = PF_ADDR_PIO;
1957 		return;
1958 	case PCIE_AER_SUCE_USC_ERR:
1959 	case PCIE_AER_SUCE_UC_DATA_ERR:
1960 	case PCIE_AER_SUCE_PERR_ASSERT:
1961 		break;
1962 	default:
1963 		*addr = 0;
1964 		*bdf = PCIE_INVALID_BDF;
1965 		*trans_type = 0;
1966 		return;
1967 	}
1968 
1969 	*bdf = PCIE_INVALID_BDF;
1970 	*trans_type = PF_ADDR_PIO;
1971 	for (rc_pfd_p = pfd_p->pe_prev; rc_pfd_p;
1972 	    rc_pfd_p = rc_pfd_p->pe_prev) {
1973 		if (PFD_IS_ROOT(rc_pfd_p) &&
1974 		    (PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat &
1975 		    PCI_STAT_PERROR)) {
1976 			*trans_type = PF_ADDR_DMA;
1977 			return;
1978 		}
1979 	}
1980 }
1981 
1982 /*
1983  * pf_pci_decode function decodes the secondary aer transaction logs in
1984  * PCIe-PCI bridges.
1985  *
1986  * The log is 128 bits long and arranged in this manner.
1987  * [0:35]   Transaction Attribute	(s_aer_h0-saer_h1)
1988  * [36:39]  Transaction lower command	(saer_h1)
1989  * [40:43]  Transaction upper command	(saer_h1)
1990  * [44:63]  Reserved
1991  * [64:127] Address			(saer_h2-saer_h3)
1992  */
1993 /* ARGSUSED */
1994 static int
1995 pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd) {
1996 	pcix_attr_t	*attr;
1997 	uint64_t	addr;
1998 	uint32_t	trans_type;
1999 	pcie_req_id_t	bdf = PCIE_INVALID_BDF;
2000 
2001 	attr = (pcix_attr_t *)&PCIE_ADV_BDG_HDR(pfd_p, 0);
2002 	*cmd = GET_SAER_CMD(pfd_p);
2003 
2004 cmd_switch:
2005 	switch (*cmd) {
2006 	case PCI_PCIX_CMD_IORD:
2007 	case PCI_PCIX_CMD_IOWR:
2008 		/* IO Access should always be down stream */
2009 		addr = PCIE_ADV_BDG_HDR(pfd_p, 2);
2010 		bdf = attr->rid;
2011 		trans_type = PF_ADDR_PIO;
2012 		break;
2013 	case PCI_PCIX_CMD_MEMRD_DW:
2014 	case PCI_PCIX_CMD_MEMRD_BL:
2015 	case PCI_PCIX_CMD_MEMRDBL:
2016 	case PCI_PCIX_CMD_MEMWR:
2017 	case PCI_PCIX_CMD_MEMWR_BL:
2018 	case PCI_PCIX_CMD_MEMWRBL:
2019 		addr = ((uint64_t)PCIE_ADV_BDG_HDR(pfd_p, 3) <<
2020 		    PCIE_AER_SUCE_HDR_ADDR_SHIFT) | PCIE_ADV_BDG_HDR(pfd_p, 2);
2021 		bdf = attr->rid;
2022 
2023 		pf_pci_find_trans_type(pfd_p, &addr, &trans_type, &bdf);
2024 		break;
2025 	case PCI_PCIX_CMD_CFRD:
2026 	case PCI_PCIX_CMD_CFWR:
2027 		/*
2028 		 * CFG Access should always be down stream.  Match the BDF in
2029 		 * the address phase.
2030 		 */
2031 		addr = 0;
2032 		bdf = attr->rid;
2033 		trans_type = PF_ADDR_CFG;
2034 		break;
2035 	case PCI_PCIX_CMD_SPL:
2036 		/*
2037 		 * Check for DMA read completions.  The requesting BDF is in the
2038 		 * Address phase.
2039 		 */
2040 		addr = 0;
2041 		bdf = attr->rid;
2042 		trans_type = PF_ADDR_DMA;
2043 		break;
2044 	case PCI_PCIX_CMD_DADR:
2045 		/*
2046 		 * For Dual Address Cycles the transaction command is in the 2nd
2047 		 * address phase.
2048 		 */
2049 		*cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >>
2050 		    PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) &
2051 		    PCIE_AER_SUCE_HDR_CMD_UP_MASK;
2052 		if (*cmd != PCI_PCIX_CMD_DADR)
2053 			goto cmd_switch;
2054 		/* FALLTHROUGH */
2055 	default:
2056 		PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0;
2057 		PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = PCIE_INVALID_BDF;
2058 		PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0;
2059 		return (DDI_FAILURE);
2060 	}
2061 	PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = trans_type;
2062 	PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = bdf;
2063 	PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = addr;
2064 	return (DDI_SUCCESS);
2065 }
2066 
2067 /*
2068  * Based on either the BDF/ADDR find and mark the faulting DMA/ACC handler.
2069  * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
2070  */
2071 int
2072 pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint64_t addr,
2073     pcie_req_id_t bdf)
2074 {
2075 	ddi_fm_error_t		derr;
2076 
2077 	/* If we don't know the addr or rid just return with NOTFOUND */
2078 	if ((addr == NULL) && !PCIE_CHECK_VALID_BDF(bdf))
2079 		return (PF_HDL_NOTFOUND);
2080 
2081 	if (!(flag & (PF_ADDR_DMA | PF_ADDR_PIO | PF_ADDR_CFG))) {
2082 		return (PF_HDL_NOTFOUND);
2083 	}
2084 
2085 	bzero(&derr, sizeof (ddi_fm_error_t));
2086 	derr.fme_version = DDI_FME_VERSION;
2087 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
2088 	derr.fme_ena = ena;
2089 
2090 	return (pf_hdl_child_lookup(dip, &derr, flag, addr, bdf));
2091 }
2092 
2093 static int
2094 pf_hdl_child_lookup(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
2095     uint64_t addr, pcie_req_id_t bdf)
2096 {
2097 	int			status = PF_HDL_NOTFOUND;
2098 	ndi_fmc_t		*fcp = NULL;
2099 	struct i_ddi_fmhdl	*fmhdl = DEVI(dip)->devi_fmhdl;
2100 	pcie_req_id_t		dip_bdf;
2101 	boolean_t		have_lock = B_FALSE;
2102 	pcie_bus_t		*bus_p;
2103 	dev_info_t		*cdip;
2104 
2105 	if (!(bus_p = pf_is_ready(dip))) {
2106 		return (status);
2107 	}
2108 
2109 	ASSERT(fmhdl);
2110 	if (!i_ddi_fm_handler_owned(dip)) {
2111 		/*
2112 		 * pf_handler_enter always returns SUCCESS if the 'impl' arg is
2113 		 * NULL.
2114 		 */
2115 		(void) pf_handler_enter(dip, NULL);
2116 		have_lock = B_TRUE;
2117 	}
2118 
2119 	dip_bdf = PCI_GET_BDF(dip);
2120 
2121 	/* Check if dip and BDF match, if not recurse to it's children. */
2122 	if (!PCIE_IS_RC(bus_p) && (!PCIE_CHECK_VALID_BDF(bdf) ||
2123 	    dip_bdf == bdf)) {
2124 		if ((flag & PF_ADDR_DMA) && DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap))
2125 			fcp = fmhdl->fh_dma_cache;
2126 		else
2127 			fcp = NULL;
2128 
2129 		if (fcp)
2130 			status = pf_hdl_compare(dip, derr, DMA_HANDLE, addr,
2131 			    bdf, fcp);
2132 
2133 
2134 		if (((flag & PF_ADDR_PIO) || (flag & PF_ADDR_CFG)) &&
2135 		    DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap))
2136 			fcp = fmhdl->fh_acc_cache;
2137 		else
2138 			fcp = NULL;
2139 
2140 		if (fcp)
2141 			status = pf_hdl_compare(dip, derr, ACC_HANDLE, addr,
2142 			    bdf, fcp);
2143 	}
2144 
2145 	/* If we found the handler or know it's this device, we're done */
2146 	if (!PCIE_IS_RC(bus_p) && ((dip_bdf == bdf) ||
2147 	    (status == PF_HDL_FOUND)))
2148 		goto done;
2149 
2150 	/*
2151 	 * If the current devuce us a PCIe-PCI bridge need to check for special
2152 	 * cases:
2153 	 *
2154 	 * If it is a PIO and we don't have an address or this is a DMA, check
2155 	 * to see if the BDF = secondary bus.  If so stop.  The BDF isn't a real
2156 	 * BDF and the fault device could have come from any device in the PCI
2157 	 * bus.
2158 	 */
2159 	if (PCIE_IS_PCIE_BDG(bus_p) &&
2160 	    ((flag & PF_ADDR_DMA || flag & PF_ADDR_PIO)) &&
2161 	    ((bus_p->bus_bdg_secbus << PCIE_REQ_ID_BUS_SHIFT) == bdf))
2162 		goto done;
2163 
2164 
2165 	/* If we can't find the handler check it's children */
2166 	for (cdip = ddi_get_child(dip); cdip;
2167 	    cdip = ddi_get_next_sibling(cdip)) {
2168 		if ((bus_p = PCIE_DIP2BUS(cdip)) == NULL)
2169 			continue;
2170 
2171 		if (pf_in_bus_range(bus_p, bdf) ||
2172 		    pf_in_addr_range(bus_p, addr))
2173 			status = pf_hdl_child_lookup(cdip, derr, flag, addr,
2174 			    bdf);
2175 
2176 		if (status == PF_HDL_FOUND)
2177 			goto done;
2178 	}
2179 
2180 done:
2181 	if (have_lock == B_TRUE)
2182 		pf_handler_exit(dip);
2183 
2184 	return (status);
2185 }
2186 
2187 static int
2188 pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
2189     uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp) {
2190 	ndi_fmcentry_t	*fep;
2191 	int		found = 0;
2192 	int		status;
2193 
2194 	mutex_enter(&fcp->fc_lock);
2195 	for (fep = fcp->fc_head; fep != NULL; fep = fep->fce_next) {
2196 		ddi_fmcompare_t compare_func;
2197 
2198 		/*
2199 		 * Compare captured error state with handle
2200 		 * resources.  During the comparison and
2201 		 * subsequent error handling, we block
2202 		 * attempts to free the cache entry.
2203 		 */
2204 		compare_func = (flag == ACC_HANDLE) ?
2205 		    i_ddi_fm_acc_err_cf_get((ddi_acc_handle_t)
2206 			fep->fce_resource) :
2207 		    i_ddi_fm_dma_err_cf_get((ddi_dma_handle_t)
2208 			fep->fce_resource);
2209 
2210 		status = compare_func(dip, fep->fce_resource,
2211 			    (void *)&addr, (void *)&bdf);
2212 
2213 		if (status == DDI_FM_NONFATAL) {
2214 			found++;
2215 
2216 			/* Set the error for this resource handle */
2217 			if (flag == ACC_HANDLE) {
2218 				ddi_acc_handle_t ap = fep->fce_resource;
2219 
2220 				i_ddi_fm_acc_err_set(ap, derr->fme_ena, status,
2221 				    DDI_FM_ERR_UNEXPECTED);
2222 				ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION);
2223 				derr->fme_acc_handle = ap;
2224 			} else {
2225 				ddi_dma_handle_t dp = fep->fce_resource;
2226 
2227 				i_ddi_fm_dma_err_set(dp, derr->fme_ena, status,
2228 				    DDI_FM_ERR_UNEXPECTED);
2229 				ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION);
2230 				derr->fme_dma_handle = dp;
2231 			}
2232 		}
2233 	}
2234 	mutex_exit(&fcp->fc_lock);
2235 
2236 	/*
2237 	 * If a handler isn't found and we know this is the right device mark
2238 	 * them all failed.
2239 	 */
2240 	if ((addr != NULL) && PCIE_CHECK_VALID_BDF(bdf) && (found == 0)) {
2241 		status = pf_hdl_compare(dip, derr, flag, addr, bdf, fcp);
2242 		if (status == PF_HDL_FOUND)
2243 			found++;
2244 	}
2245 
2246 	return ((found) ? PF_HDL_FOUND : PF_HDL_NOTFOUND);
2247 }
2248 
2249 /*
2250  * Automatically decode AER header logs and does a handling look up based on the
2251  * AER header decoding.
2252  *
2253  * For this function only the Primary/Secondary AER Header Logs need to be valid
2254  * in the pfd (PCIe Fault Data) arg.
2255  *
2256  * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
2257  */
2258 static int
2259 pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p,
2260 	boolean_t is_primary)
2261 {
2262 	int		lookup = PF_HDL_NOTFOUND;
2263 
2264 	if (is_primary) {
2265 		pf_pcie_adv_err_regs_t *reg_p = PCIE_ADV_REG(pfd_p);
2266 		if (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), reg_p) == DDI_SUCCESS) {
2267 			lookup = pf_hdl_lookup(rpdip, derr->fme_ena,
2268 			    reg_p->pcie_ue_tgt_trans,
2269 			    reg_p->pcie_ue_tgt_addr,
2270 			    reg_p->pcie_ue_tgt_bdf);
2271 		}
2272 	} else {
2273 		pf_pcie_adv_bdg_err_regs_t *reg_p = PCIE_ADV_BDG_REG(pfd_p);
2274 		uint16_t cmd;
2275 		if (pf_pci_decode(pfd_p, &cmd) == DDI_SUCCESS) {
2276 			lookup = pf_hdl_lookup(rpdip, derr->fme_ena,
2277 			    reg_p->pcie_sue_tgt_trans,
2278 			    reg_p->pcie_sue_tgt_addr,
2279 			    reg_p->pcie_sue_tgt_bdf);
2280 		}
2281 	}
2282 
2283 	return (lookup);
2284 }
2285 
2286 /*
2287  * Decodes the TLP and returns the BDF of the handler, address and transaction
2288  * type if known.
2289  *
2290  * Types of TLP logs seen in RC, and what to extract:
2291  *
2292  * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
2293  * Memory(PIO) - address, PF_PIO_ADDR
2294  * CFG - Should not occur and result in UR
2295  * Completion(DMA) - Requester BDF, PF_DMA_ADDR
2296  * Completion(PIO) - Requester BDF, PF_PIO_ADDR
2297  *
2298  * Types of TLP logs seen in SW/Leaf, and what to extract:
2299  *
2300  * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
2301  * Memory(PIO) - address, PF_PIO_ADDR
2302  * CFG - Destined BDF, address, PF_CFG_ADDR
2303  * Completion(DMA) - Requester BDF, PF_DMA_ADDR
2304  * Completion(PIO) - Requester BDF, PF_PIO_ADDR
2305  *
2306  * The adv_reg_p must be passed in separately for use with SPARC RPs.  A
2307  * SPARC RP could have multiple AER header logs which cannot be directly
2308  * accessed via the bus_p.
2309  */
2310 int
2311 pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p) {
2312 	pcie_tlp_hdr_t	*tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr;
2313 	pcie_req_id_t	my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF;
2314 	uint64_t	flt_addr = 0;
2315 	uint32_t	flt_trans_type = 0;
2316 
2317 	adv_reg_p->pcie_ue_tgt_addr = 0;
2318 	adv_reg_p->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
2319 	adv_reg_p->pcie_ue_tgt_trans = 0;
2320 
2321 	my_bdf = bus_p->bus_bdf;
2322 	switch (tlp_hdr->type) {
2323 	case PCIE_TLP_TYPE_IO:
2324 	case PCIE_TLP_TYPE_MEM:
2325 	case PCIE_TLP_TYPE_MEMLK:
2326 		/* Grab the 32/64bit fault address */
2327 		if (tlp_hdr->fmt & 0x1) {
2328 			flt_addr = ((uint64_t)adv_reg_p->pcie_ue_hdr[2] << 32);
2329 			flt_addr |= adv_reg_p->pcie_ue_hdr[3];
2330 		} else {
2331 			flt_addr = adv_reg_p->pcie_ue_hdr[2];
2332 		}
2333 
2334 		tlp_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[1] >> 16);
2335 
2336 		/*
2337 		 * If the req bdf >= this.bdf, then it means the request is this
2338 		 * device or came from a device below it.  Unless this device is
2339 		 * a PCIe root port then it means is a DMA, otherwise PIO.
2340 		 */
2341 		if ((tlp_bdf >= my_bdf) && !PCIE_IS_ROOT(bus_p)) {
2342 			flt_trans_type = PF_ADDR_DMA;
2343 			flt_bdf = tlp_bdf;
2344 		} else if (PCIE_IS_ROOT(bus_p) &&
2345 		    (PF_FIRST_AER_ERR(PCIE_AER_UCE_PTLP, adv_reg_p) ||
2346 			(PF_FIRST_AER_ERR(PCIE_AER_UCE_CA, adv_reg_p)))) {
2347 			flt_trans_type = PF_ADDR_DMA;
2348 			flt_bdf = tlp_bdf;
2349 		} else {
2350 			flt_trans_type = PF_ADDR_PIO;
2351 			flt_bdf = PCIE_INVALID_BDF;
2352 		}
2353 		break;
2354 	case PCIE_TLP_TYPE_CFG0:
2355 	case PCIE_TLP_TYPE_CFG1:
2356 		flt_addr = 0;
2357 		flt_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[2] >> 16);
2358 		flt_trans_type = PF_ADDR_CFG;
2359 		break;
2360 	case PCIE_TLP_TYPE_CPL:
2361 	case PCIE_TLP_TYPE_CPLLK:
2362 	{
2363 		pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)adv_reg_p->pcie_ue_hdr;
2364 
2365 		flt_addr = NULL;
2366 		flt_bdf = cpl_tlp->rid;
2367 
2368 		/*
2369 		 * If the cpl bdf < this.bdf, then it means the request is this
2370 		 * device or came from a device below it.  Unless this device is
2371 		 * a PCIe root port then it means is a DMA, otherwise PIO.
2372 		 */
2373 		if (cpl_tlp->rid > cpl_tlp->cid) {
2374 			flt_trans_type = PF_ADDR_DMA;
2375 		} else {
2376 			flt_trans_type = PF_ADDR_PIO | PF_ADDR_CFG;
2377 		}
2378 		break;
2379 	}
2380 	default:
2381 		return (DDI_FAILURE);
2382 	}
2383 
2384 	adv_reg_p->pcie_ue_tgt_addr = flt_addr;
2385 	adv_reg_p->pcie_ue_tgt_bdf = flt_bdf;
2386 	adv_reg_p->pcie_ue_tgt_trans = flt_trans_type;
2387 
2388 	return (DDI_SUCCESS);
2389 }
2390 
2391 #define	PCIE_EREPORT	DDI_IO_CLASS "." PCI_ERROR_SUBCLASS "." PCIEX_FABRIC
2392 static int
2393 pf_ereport_setup(dev_info_t *dip, uint64_t ena, nvlist_t **ereport,
2394     nvlist_t **detector, errorq_elem_t **eqep)
2395 {
2396 	struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2397 	char device_path[MAXPATHLEN];
2398 	nv_alloc_t *nva;
2399 
2400 	*eqep = errorq_reserve(fmhdl->fh_errorq);
2401 	if (*eqep == NULL) {
2402 		atomic_add_64(&fmhdl->fh_kstat.fek_erpt_dropped.value.ui64, 1);
2403 		return (DDI_FAILURE);
2404 	}
2405 
2406 	*ereport = errorq_elem_nvl(fmhdl->fh_errorq, *eqep);
2407 	nva = errorq_elem_nva(fmhdl->fh_errorq, *eqep);
2408 
2409 	ASSERT(*ereport);
2410 	ASSERT(nva);
2411 
2412 	/*
2413 	 * Use the dev_path/devid for this device instance.
2414 	 */
2415 	*detector = fm_nvlist_create(nva);
2416 	if (dip == ddi_root_node()) {
2417 		device_path[0] = '/';
2418 		device_path[1] = '\0';
2419 	} else {
2420 		(void) ddi_pathname(dip, device_path);
2421 	}
2422 
2423 	fm_fmri_dev_set(*detector, FM_DEV_SCHEME_VERSION, NULL,
2424 	    device_path, NULL);
2425 
2426 	if (ena == 0)
2427 		ena = fm_ena_generate(0, FM_ENA_FMT1);
2428 
2429 	fm_ereport_set(*ereport, 0, PCIE_EREPORT, ena, *detector, NULL);
2430 
2431 	return (DDI_SUCCESS);
2432 }
2433 
2434 /* ARGSUSED */
2435 static void
2436 pf_ereport_post(dev_info_t *dip, nvlist_t **ereport, nvlist_t **detector,
2437     errorq_elem_t **eqep)
2438 {
2439 	struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2440 
2441 	errorq_commit(fmhdl->fh_errorq, *eqep, ERRORQ_ASYNC);
2442 }
2443 
2444 static void
2445 pf_send_ereport(ddi_fm_error_t *derr, pf_impl_t *impl)
2446 {
2447 	nvlist_t	*ereport;
2448 	nvlist_t	*detector;
2449 	errorq_elem_t	*eqep;
2450 	pcie_bus_t	*bus_p;
2451 	pf_data_t	*pfd_p;
2452 	uint32_t	total = impl->pf_total;
2453 
2454 	/*
2455 	 * Ereports need to be sent in a top down fashion. The fabric translator
2456 	 * expects the ereports from the Root first. This is needed to tell if
2457 	 * the system contains a PCIe complaint RC/RP.
2458 	 */
2459 	for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
2460 		bus_p = PCIE_PFD2BUS(pfd_p);
2461 		pfd_p->pe_valid = B_FALSE;
2462 
2463 		if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED ||
2464 		    PFD_IS_RC(pfd_p) ||
2465 		    !DDI_FM_EREPORT_CAP(ddi_fm_capable(PCIE_PFD2DIP(pfd_p))))
2466 			continue;
2467 
2468 		if (pf_ereport_setup(PCIE_BUS2DIP(bus_p), derr->fme_ena,
2469 		    &ereport, &detector, &eqep) != DDI_SUCCESS)
2470 			continue;
2471 
2472 		/* Generic PCI device information */
2473 		fm_payload_set(ereport,
2474 		    "bdf", DATA_TYPE_UINT16, bus_p->bus_bdf,
2475 		    "device_id", DATA_TYPE_UINT16,
2476 		    (bus_p->bus_dev_ven_id >> 16),
2477 		    "vendor_id", DATA_TYPE_UINT16,
2478 		    (bus_p->bus_dev_ven_id & 0xFFFF),
2479 		    "rev_id", DATA_TYPE_UINT8, bus_p->bus_rev_id,
2480 		    "dev_type", DATA_TYPE_UINT16, bus_p->bus_dev_type,
2481 		    "pcie_off", DATA_TYPE_UINT16, bus_p->bus_pcie_off,
2482 		    "pcix_off", DATA_TYPE_UINT16, bus_p->bus_pcix_off,
2483 		    "aer_off", DATA_TYPE_UINT16, bus_p->bus_aer_off,
2484 		    "ecc_ver", DATA_TYPE_UINT16, bus_p->bus_ecc_ver,
2485 		    NULL);
2486 
2487 		/* PCI registers */
2488 		fm_payload_set(ereport,
2489 		    "pci_status", DATA_TYPE_UINT16,
2490 		    PCI_ERR_REG(pfd_p)->pci_err_status,
2491 		    "pci_command", DATA_TYPE_UINT16,
2492 		    PCI_ERR_REG(pfd_p)->pci_cfg_comm,
2493 		    NULL);
2494 
2495 		/* PCI bridge registers */
2496 		if (PCIE_IS_BDG(bus_p)) {
2497 			fm_payload_set(ereport,
2498 			    "pci_bdg_sec_status", DATA_TYPE_UINT16,
2499 			    PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat,
2500 			    "pci_bdg_ctrl", DATA_TYPE_UINT16,
2501 			    PCI_BDG_ERR_REG(pfd_p)->pci_bdg_ctrl,
2502 			    NULL);
2503 		}
2504 
2505 		/* PCIx registers */
2506 		if (PCIE_IS_PCIX(bus_p) && !PCIE_IS_BDG(bus_p)) {
2507 			fm_payload_set(ereport,
2508 			    "pcix_status", DATA_TYPE_UINT32,
2509 			    PCIX_ERR_REG(pfd_p)->pcix_status,
2510 			    "pcix_command", DATA_TYPE_UINT16,
2511 			    PCIX_ERR_REG(pfd_p)->pcix_command,
2512 			    NULL);
2513 		}
2514 
2515 		/* PCIx ECC Registers */
2516 		if (PCIX_ECC_VERSION_CHECK(bus_p)) {
2517 			pf_pcix_ecc_regs_t *ecc_bdg_reg;
2518 			pf_pcix_ecc_regs_t *ecc_reg;
2519 
2520 			if (PCIE_IS_BDG(bus_p))
2521 				ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 0);
2522 			ecc_reg = PCIX_ECC_REG(pfd_p);
2523 			fm_payload_set(ereport,
2524 			    "pcix_ecc_control_0", DATA_TYPE_UINT16,
2525 			    PCIE_IS_BDG(bus_p) ?
2526 			    (ecc_bdg_reg->pcix_ecc_ctlstat >> 16) :
2527 			    (ecc_reg->pcix_ecc_ctlstat >> 16),
2528 			    "pcix_ecc_status_0", DATA_TYPE_UINT16,
2529 			    PCIE_IS_BDG(bus_p) ?
2530 			    (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF) :
2531 			    (ecc_reg->pcix_ecc_ctlstat & 0xFFFF),
2532 			    "pcix_ecc_fst_addr_0", DATA_TYPE_UINT32,
2533 			    PCIE_IS_BDG(bus_p) ?
2534 			    ecc_bdg_reg->pcix_ecc_fstaddr :
2535 			    ecc_reg->pcix_ecc_fstaddr,
2536 			    "pcix_ecc_sec_addr_0", DATA_TYPE_UINT32,
2537 			    PCIE_IS_BDG(bus_p) ?
2538 			    ecc_bdg_reg->pcix_ecc_secaddr :
2539 			    ecc_reg->pcix_ecc_secaddr,
2540 			    "pcix_ecc_attr_0", DATA_TYPE_UINT32,
2541 			    PCIE_IS_BDG(bus_p) ?
2542 			    ecc_bdg_reg->pcix_ecc_attr :
2543 			    ecc_reg->pcix_ecc_attr,
2544 			    NULL);
2545 		}
2546 
2547 		/* PCIx ECC Bridge Registers */
2548 		if (PCIX_ECC_VERSION_CHECK(bus_p) && PCIE_IS_BDG(bus_p)) {
2549 			pf_pcix_ecc_regs_t *ecc_bdg_reg;
2550 
2551 			ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 1);
2552 			fm_payload_set(ereport,
2553 			    "pcix_ecc_control_1", DATA_TYPE_UINT16,
2554 			    (ecc_bdg_reg->pcix_ecc_ctlstat >> 16),
2555 			    "pcix_ecc_status_1", DATA_TYPE_UINT16,
2556 			    (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF),
2557 			    "pcix_ecc_fst_addr_1", DATA_TYPE_UINT32,
2558 			    ecc_bdg_reg->pcix_ecc_fstaddr,
2559 			    "pcix_ecc_sec_addr_1", DATA_TYPE_UINT32,
2560 			    ecc_bdg_reg->pcix_ecc_secaddr,
2561 			    "pcix_ecc_attr_1", DATA_TYPE_UINT32,
2562 			    ecc_bdg_reg->pcix_ecc_attr,
2563 			    NULL);
2564 		}
2565 
2566 		/* PCIx Bridge */
2567 		if (PCIE_IS_PCIX(bus_p) && PCIE_IS_BDG(bus_p)) {
2568 			fm_payload_set(ereport,
2569 			    "pcix_bdg_status", DATA_TYPE_UINT32,
2570 			    PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat,
2571 			    "pcix_bdg_sec_status", DATA_TYPE_UINT16,
2572 			    PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat,
2573 			    NULL);
2574 		}
2575 
2576 		/* PCIe registers */
2577 		if (PCIE_IS_PCIE(bus_p)) {
2578 			fm_payload_set(ereport,
2579 			    "pcie_status", DATA_TYPE_UINT16,
2580 			    PCIE_ERR_REG(pfd_p)->pcie_err_status,
2581 			    "pcie_command", DATA_TYPE_UINT16,
2582 			    PCIE_ERR_REG(pfd_p)->pcie_err_ctl,
2583 			    "pcie_dev_cap", DATA_TYPE_UINT32,
2584 			    PCIE_ERR_REG(pfd_p)->pcie_dev_cap,
2585 			    NULL);
2586 		}
2587 
2588 		/* PCIe AER registers */
2589 		if (PCIE_HAS_AER(bus_p)) {
2590 			fm_payload_set(ereport,
2591 			    "pcie_adv_ctl", DATA_TYPE_UINT32,
2592 			    PCIE_ADV_REG(pfd_p)->pcie_adv_ctl,
2593 			    "pcie_ue_status", DATA_TYPE_UINT32,
2594 			    PCIE_ADV_REG(pfd_p)->pcie_ue_status,
2595 			    "pcie_ue_mask", DATA_TYPE_UINT32,
2596 			    PCIE_ADV_REG(pfd_p)->pcie_ue_mask,
2597 			    "pcie_ue_sev", DATA_TYPE_UINT32,
2598 			    PCIE_ADV_REG(pfd_p)->pcie_ue_sev,
2599 			    "pcie_ue_hdr0", DATA_TYPE_UINT32,
2600 			    PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[0],
2601 			    "pcie_ue_hdr1", DATA_TYPE_UINT32,
2602 			    PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[1],
2603 			    "pcie_ue_hdr2", DATA_TYPE_UINT32,
2604 			    PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[2],
2605 			    "pcie_ue_hdr3", DATA_TYPE_UINT32,
2606 			    PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[3],
2607 			    "pcie_ce_status", DATA_TYPE_UINT32,
2608 			    PCIE_ADV_REG(pfd_p)->pcie_ce_status,
2609 			    "pcie_ce_mask", DATA_TYPE_UINT32,
2610 			    PCIE_ADV_REG(pfd_p)->pcie_ce_mask,
2611 			    NULL);
2612 		}
2613 
2614 		/* PCIe AER decoded header */
2615 		if (HAS_AER_LOGS(pfd_p, PCIE_ADV_REG(pfd_p)->pcie_ue_status)) {
2616 			fm_payload_set(ereport,
2617 			    "pcie_ue_tgt_trans", DATA_TYPE_UINT32,
2618 			    PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans,
2619 			    "pcie_ue_tgt_addr", DATA_TYPE_UINT64,
2620 			    PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr,
2621 			    "pcie_ue_tgt_bdf", DATA_TYPE_UINT16,
2622 			    PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf,
2623 			    NULL);
2624 			/* Clear these values as they no longer valid */
2625 			PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0;
2626 			PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0;
2627 			PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
2628 		}
2629 
2630 		/* PCIe BDG AER registers */
2631 		if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_HAS_AER(bus_p)) {
2632 			fm_payload_set(ereport,
2633 			    "pcie_sue_adv_ctl", DATA_TYPE_UINT32,
2634 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_ctl,
2635 			    "pcie_sue_status", DATA_TYPE_UINT32,
2636 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status,
2637 			    "pcie_sue_mask", DATA_TYPE_UINT32,
2638 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask,
2639 			    "pcie_sue_sev", DATA_TYPE_UINT32,
2640 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_sev,
2641 			    "pcie_sue_hdr0", DATA_TYPE_UINT32,
2642 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[0],
2643 			    "pcie_sue_hdr1", DATA_TYPE_UINT32,
2644 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[1],
2645 			    "pcie_sue_hdr2", DATA_TYPE_UINT32,
2646 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[2],
2647 			    "pcie_sue_hdr3", DATA_TYPE_UINT32,
2648 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[3],
2649 			    NULL);
2650 		}
2651 
2652 		/* PCIe BDG AER decoded header */
2653 		if (PCIE_IS_PCIE_BDG(bus_p) && HAS_SAER_LOGS(pfd_p,
2654 		    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status)) {
2655 			fm_payload_set(ereport,
2656 			    "pcie_sue_tgt_trans", DATA_TYPE_UINT32,
2657 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans,
2658 			    "pcie_sue_tgt_addr", DATA_TYPE_UINT64,
2659 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr,
2660 			    "pcie_sue_tgt_bdf", DATA_TYPE_UINT16,
2661 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf,
2662 			    NULL);
2663 			/* Clear these values as they no longer valid */
2664 			PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0;
2665 			PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0;
2666 			PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf =
2667 			    PCIE_INVALID_BDF;
2668 		}
2669 
2670 		/* PCIe RP registers */
2671 		if (PCIE_IS_RP(bus_p)) {
2672 			fm_payload_set(ereport,
2673 			    "pcie_rp_status", DATA_TYPE_UINT32,
2674 			    PCIE_RP_REG(pfd_p)->pcie_rp_status,
2675 			    "pcie_rp_control", DATA_TYPE_UINT16,
2676 			    PCIE_RP_REG(pfd_p)->pcie_rp_ctl,
2677 			    NULL);
2678 		}
2679 
2680 		/* PCIe RP AER registers */
2681 		if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) {
2682 			fm_payload_set(ereport,
2683 			    "pcie_adv_rp_status", DATA_TYPE_UINT32,
2684 			    PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_status,
2685 			    "pcie_adv_rp_command", DATA_TYPE_UINT32,
2686 			    PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_cmd,
2687 			    "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16,
2688 			    PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id,
2689 			    "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16,
2690 			    PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id,
2691 			    NULL);
2692 		}
2693 
2694 		/* Misc ereport information */
2695 		fm_payload_set(ereport,
2696 		    "remainder", DATA_TYPE_UINT32, total--,
2697 		    "severity", DATA_TYPE_UINT32, pfd_p->pe_severity_flags,
2698 		    NULL);
2699 
2700 		pf_ereport_post(PCIE_BUS2DIP(bus_p), &ereport, &detector,
2701 		    &eqep);
2702 	}
2703 
2704 	/* Unlock all the devices in the queue */
2705 	for (pfd_p = impl->pf_dq_tail_p; pfd_p; pfd_p = pfd_p->pe_prev) {
2706 		if (pfd_p->pe_lock) {
2707 			pf_handler_exit(PCIE_PFD2DIP(pfd_p));
2708 		}
2709 	}
2710 }
2711 
2712 /*
2713  * pf_handler_enter must be called to serial access to each device's pf_data_t.
2714  * Once error handling is finished with the device call pf_handler_exit to allow
2715  * other threads to access it.  The same thread may call pf_handler_enter
2716  * several times without any consequences.
2717  *
2718  * The "impl" variable is passed in during scan fabric to double check that
2719  * there is not a recursive algorithm and to ensure only one thread is doing a
2720  * fabric scan at all times.
2721  *
2722  * In some cases "impl" is not available, such as "child lookup" being called
2723  * from outside of scan fabric, just pass in NULL for this variable and this
2724  * extra check will be skipped.
2725  */
2726 static int
2727 pf_handler_enter(dev_info_t *dip, pf_impl_t *impl)
2728 {
2729 	pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
2730 
2731 	ASSERT(pfd_p);
2732 
2733 	/*
2734 	 * Check to see if the lock has already been taken by this
2735 	 * thread.  If so just return and don't take lock again.
2736 	 */
2737 	if (!pfd_p->pe_lock || !impl) {
2738 		i_ddi_fm_handler_enter(dip);
2739 		pfd_p->pe_lock = B_TRUE;
2740 		return (PF_SCAN_SUCCESS);
2741 	}
2742 
2743 	/* Check to see that this dip is already in the "impl" error queue */
2744 	for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
2745 		if (PCIE_PFD2DIP(pfd_p) == dip) {
2746 			return (PF_SCAN_SUCCESS);
2747 		}
2748 	}
2749 
2750 	return (PF_SCAN_DEADLOCK);
2751 }
2752 
2753 static void
2754 pf_handler_exit(dev_info_t *dip)
2755 {
2756 	pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
2757 
2758 	ASSERT(pfd_p);
2759 
2760 	ASSERT(pfd_p->pe_lock == B_TRUE);
2761 	i_ddi_fm_handler_exit(dip);
2762 	pfd_p->pe_lock = B_FALSE;
2763 }
2764 
2765 /*
2766  * This function calls the driver's callback function (if it's FMA hardened
2767  * and callback capable). This function relies on the current thread already
2768  * owning the driver's fmhdl lock.
2769  */
2770 static int
2771 pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr)
2772 {
2773 	int cb_sts = DDI_FM_OK;
2774 
2775 	if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) {
2776 		dev_info_t *pdip = ddi_get_parent(dip);
2777 		struct i_ddi_fmhdl *hdl = DEVI(pdip)->devi_fmhdl;
2778 		struct i_ddi_fmtgt *tgt = hdl->fh_tgts;
2779 		struct i_ddi_errhdl *errhdl;
2780 		while (tgt != NULL) {
2781 			if (dip == tgt->ft_dip) {
2782 				errhdl = tgt->ft_errhdl;
2783 				cb_sts = errhdl->eh_func(dip, derr,
2784 				    errhdl->eh_impl);
2785 				break;
2786 			}
2787 			tgt = tgt->ft_next;
2788 		}
2789 	}
2790 	return (cb_sts);
2791 }
2792