xref: /illumos-gate/usr/src/uts/common/io/pciex/pcie_fault.c (revision 7eb11c2ea01d0728660a0115775e1c7ca2c88df7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2019 Joyent, Inc.
24  * Copyright 2026 Oxide Computer Company
25  */
26 
27 #include <sys/sysmacros.h>
28 #include <sys/types.h>
29 #include <sys/kmem.h>
30 #include <sys/modctl.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/sunndi.h>
34 #include <sys/fm/protocol.h>
35 #include <sys/fm/util.h>
36 #include <sys/fm/io/ddi.h>
37 #include <sys/fm/io/pci.h>
38 #include <sys/promif.h>
39 #include <sys/disp.h>
40 #include <sys/atomic.h>
41 #include <sys/pcie.h>
42 #include <sys/pci_cap.h>
43 #include <sys/pcie_impl.h>
44 
45 #define	PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \
46 	PCIE_DEVSTS_CE_DETECTED)
47 
48 #define	PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \
49 	PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR)
50 
51 #define	PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\
52 	PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP)
53 #define	PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \
54 	PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR)
55 
56 #define	PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \
57 	PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \
58 	PCIE_AER_SUCE_SERR_ASSERT)
59 #define	PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \
60 	PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \
61 	PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \
62 	PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \
63 	PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR)
64 
65 #define	PF_PCI_PARITY_ERR (PCI_STAT_S_PERROR | PCI_STAT_PERROR)
66 
67 #define	PF_FIRST_AER_ERR(bit, adv) \
68 	(bit & (1 << (adv->pcie_adv_ctl & PCIE_AER_CTL_FST_ERR_PTR_MASK)))
69 
70 #define	HAS_AER_LOGS(pfd_p, bit) \
71 	(PCIE_HAS_AER(pfd_p->pe_bus_p) && \
72 	PF_FIRST_AER_ERR(bit, PCIE_ADV_REG(pfd_p)))
73 
74 #define	PF_FIRST_SAER_ERR(bit, adv) \
75 	(bit & (1 << (adv->pcie_sue_ctl & PCIE_AER_SCTL_FST_ERR_PTR_MASK)))
76 
77 #define	HAS_SAER_LOGS(pfd_p, bit) \
78 	(PCIE_HAS_AER(pfd_p->pe_bus_p) && \
79 	PF_FIRST_SAER_ERR(bit, PCIE_ADV_BDG_REG(pfd_p)))
80 
81 #define	GET_SAER_CMD(pfd_p) \
82 	((PCIE_ADV_BDG_HDR(pfd_p, 1) >> \
83 	PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & PCIE_AER_SUCE_HDR_CMD_LWR_MASK)
84 
85 #define	CE_ADVISORY(pfd_p) \
86 	(PCIE_ADV_REG(pfd_p)->pcie_ce_status & PCIE_AER_CE_AD_NFE)
87 
88 /* PCIe Fault Fabric Error analysis table */
89 typedef struct pf_fab_err_tbl {
90 	uint32_t	bit;		/* Error bit */
91 	int		(*handler)();	/* Error handling fuction */
92 	uint16_t	affected_flags; /* Primary affected flag */
93 	/*
94 	 * Secondary affected flag, effective when the information
95 	 * indicated by the primary flag is not available, eg.
96 	 * PF_AFFECTED_AER/SAER/ADDR
97 	 */
98 	uint16_t	sec_affected_flags;
99 } pf_fab_err_tbl_t;
100 
101 static pcie_bus_t *pf_is_ready(dev_info_t *);
102 /* Functions for scanning errors */
103 static int pf_default_hdl(dev_info_t *, pf_impl_t *);
104 static int pf_dispatch(dev_info_t *, pf_impl_t *, boolean_t);
105 static boolean_t pf_in_addr_range(pcie_bus_t *, uint64_t);
106 
107 /* Functions for gathering errors */
108 static void pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs,
109     pcie_bus_t *bus_p, boolean_t bdg);
110 static void pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
111 static void pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
112 static void pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
113 static int pf_dummy_cb(dev_info_t *, ddi_fm_error_t *, const void *);
114 static void pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl_p);
115 
116 /* Functions for analysing errors */
117 static int pf_analyse_error(ddi_fm_error_t *, pf_impl_t *);
118 static void pf_adjust_for_no_aer(pf_data_t *);
119 static void pf_adjust_for_no_saer(pf_data_t *);
120 static pf_data_t *pf_get_pcie_bridge(pf_data_t *, pcie_req_id_t);
121 static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *);
122 static boolean_t pf_matched_in_rc(pf_data_t *, pf_data_t *,
123     uint32_t);
124 static int pf_analyse_error_tbl(ddi_fm_error_t *, pf_impl_t *,
125     pf_data_t *, const pf_fab_err_tbl_t *, uint32_t);
126 static int pf_analyse_ca_ur(ddi_fm_error_t *, uint32_t,
127     pf_data_t *, pf_data_t *);
128 static int pf_analyse_ma_ta(ddi_fm_error_t *, uint32_t,
129     pf_data_t *, pf_data_t *);
130 static int pf_analyse_pci(ddi_fm_error_t *, uint32_t,
131     pf_data_t *, pf_data_t *);
132 static int pf_analyse_perr_assert(ddi_fm_error_t *, uint32_t,
133     pf_data_t *, pf_data_t *);
134 static int pf_analyse_ptlp(ddi_fm_error_t *, uint32_t,
135     pf_data_t *, pf_data_t *);
136 static int pf_analyse_sc(ddi_fm_error_t *, uint32_t,
137     pf_data_t *, pf_data_t *);
138 static int pf_analyse_to(ddi_fm_error_t *, uint32_t,
139     pf_data_t *, pf_data_t *);
140 static int pf_analyse_uc(ddi_fm_error_t *, uint32_t,
141     pf_data_t *, pf_data_t *);
142 static int pf_analyse_uc_data(ddi_fm_error_t *, uint32_t,
143     pf_data_t *, pf_data_t *);
144 static int pf_no_panic(ddi_fm_error_t *, uint32_t,
145     pf_data_t *, pf_data_t *);
146 static int pf_panic(ddi_fm_error_t *, uint32_t,
147     pf_data_t *, pf_data_t *);
148 static void pf_send_ereport(ddi_fm_error_t *, pf_impl_t *);
149 static int pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr);
150 
151 /* PCIe Fabric Handle Lookup Support Functions. */
152 static int pf_hdl_child_lookup(dev_info_t *, ddi_fm_error_t *, uint32_t,
153     uint64_t, pcie_req_id_t);
154 static int pf_hdl_compare(dev_info_t *, ddi_fm_error_t *, uint32_t, uint64_t,
155     pcie_req_id_t, ndi_fmc_t *);
156 static int pf_log_hdl_lookup(dev_info_t *, ddi_fm_error_t *, pf_data_t *,
157 	boolean_t);
158 
159 static int pf_handler_enter(dev_info_t *, pf_impl_t *);
160 static void pf_handler_exit(dev_info_t *);
161 static void pf_reset_pfd(pf_data_t *);
162 
163 boolean_t pcie_full_scan = B_FALSE;	/* Force to always do a full scan */
164 int pcie_disable_scan = 0;		/* Disable fabric scan */
165 
166 /*
167  * Cache of pf_impl_t that triggered a fatal error. This is stored before
168  * the system will likely panic. The pf_impl_t contains the scan results
169  * including all error data queues that led to the fatal error. The primary
170  * purpose of this cache is post-mortem debugging of such fatal PCIe errors.
171  * The cached data will be present and valid in crash dumps taken immediately
172  * after fatal error detection.
173  *
174  * This cache stores a shallow copy of the pf_impl_t structure which contains
175  * pointers to other structures. The validity of these pointers in a crash dump
176  * depends on their allocation:
177  *
178  * 1. pf_dq_head_p/pf_dq_tail_p (pf_data_t chain):
179  *    These point to heap-allocated pf_data_t structures that live in each
180  *    device's pcie_bus_t->bus_pfd. These are long-lived structures that
181  *    persist for the lifetime of the device node in the device tree.
182  *
183  * 2. pf_fault (pf_root_fault_t):
184  *    Points to heap-allocated structure in the root port's pf_data_t.
185  *
186  * 3. Error register structures (pf_pcie_err_regs_t, etc.):
187  *    Heap-allocated as part of each device's pf_data_t; long lived.
188  *
189  * 4. pf_derr (ddi_fm_error_t):
190  *    This points to a structure on the caller's stack. Normally this would be
191  *    invalid after the function returns, but in a crash dump the panic
192  *    preserves the stack contents.
193  *
194  * The cached pointers could theoretically become invalid if device hotplug/
195  * detach occurs between error detection and panic. However this is not a
196  * concern in practice because there is no window for this to occur between a
197  * fatal error and a system panic.
198  */
199 pf_impl_t pcie_faulty_pf_impl;
200 
201 /* Inform interested parties that error handling is about to begin. */
202 /* ARGSUSED */
203 void
pf_eh_enter(pcie_bus_t * bus_p)204 pf_eh_enter(pcie_bus_t *bus_p)
205 {
206 }
207 
208 /* Inform interested parties that error handling has ended. */
209 void
pf_eh_exit(pcie_bus_t * bus_p)210 pf_eh_exit(pcie_bus_t *bus_p)
211 {
212 	pcie_bus_t *rbus_p = PCIE_DIP2BUS(bus_p->bus_rp_dip);
213 	pf_data_t *root_pfd_p = PCIE_BUS2PFD(rbus_p);
214 	pf_data_t *pfd_p;
215 	uint_t intr_type = PCIE_ROOT_EH_SRC(root_pfd_p)->intr_type;
216 
217 	pciev_eh_exit(root_pfd_p, intr_type);
218 
219 	/* Clear affected device info and INTR SRC */
220 	for (pfd_p = root_pfd_p; pfd_p; pfd_p = pfd_p->pe_next) {
221 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0;
222 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
223 		if (PCIE_IS_ROOT(PCIE_PFD2BUS(pfd_p))) {
224 			PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE;
225 			PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL;
226 		}
227 	}
228 }
229 
230 /*
231  * After sending an ereport, or in lieu of doing so, unlock all the devices in
232  * the data queue.  We also must clear pe_valid here; this function is called in
233  * the path where we decide not to send an ereport because there is no error
234  * (spurious AER interrupt), as well as from pf_send_ereport() which has already
235  * cleared it.  Failing to do this will result in a different path through
236  * pf_dispatch() and the potential for deadlocks.  It is safe to do as we are
237  * still holding the handler lock here, just as in pf_send_ereport().
238  */
239 static void
pf_dq_unlock_chain(pf_impl_t * impl)240 pf_dq_unlock_chain(pf_impl_t *impl)
241 {
242 	pf_data_t *pfd_p;
243 
244 	for (pfd_p = impl->pf_dq_tail_p; pfd_p; pfd_p = pfd_p->pe_prev) {
245 		pfd_p->pe_valid = B_FALSE;
246 		if (pfd_p->pe_lock) {
247 			pf_handler_exit(PCIE_PFD2DIP(pfd_p));
248 		}
249 	}
250 }
251 
252 /*
253  * Scan Fabric is the entry point for PCI/PCIe IO fabric errors.  The
254  * caller may create a local pf_data_t with the "root fault"
255  * information populated to either do a precise or full scan.  More
256  * than one pf_data_t maybe linked together if there are multiple
257  * errors.  Only a PCIe compliant Root Port device may pass in NULL
258  * for the root_pfd_p.
259  *
260  * "Root Complexes" such as NPE and PX should call scan_fabric using itself as
261  * the rdip.  PCIe Root ports should call pf_scan_fabric using its parent as
262  * the rdip.
263  *
264  * Scan fabric initiated from RCs are likely due to a fabric message, traps or
265  * any RC detected errors that propagated to/from the fabric.
266  *
267  * This code assumes that by the time pf_scan_fabric is
268  * called, pf_handler_enter has NOT been called on the rdip.
269  */
270 int
pf_scan_fabric(dev_info_t * rdip,ddi_fm_error_t * derr,pf_data_t * root_pfd_p)271 pf_scan_fabric(dev_info_t *rdip, ddi_fm_error_t *derr, pf_data_t *root_pfd_p)
272 {
273 	pf_impl_t	impl;
274 	pf_data_t	*pfd_p, *pfd_head_p, *pfd_tail_p;
275 	int		scan_flag = PF_SCAN_SUCCESS;
276 	int		analyse_flag = PF_ERR_NO_ERROR;
277 	boolean_t	full_scan = pcie_full_scan;
278 
279 	if (pcie_disable_scan)
280 		return (analyse_flag);
281 
282 	/* Find the head and tail of this link list */
283 	pfd_head_p = root_pfd_p;
284 	for (pfd_tail_p = root_pfd_p; pfd_tail_p && pfd_tail_p->pe_next;
285 	    pfd_tail_p = pfd_tail_p->pe_next)
286 		;
287 
288 	/* Save head/tail */
289 	impl.pf_total = 0;
290 	impl.pf_derr = derr;
291 	impl.pf_dq_head_p = pfd_head_p;
292 	impl.pf_dq_tail_p = pfd_tail_p;
293 
294 	/* If scan is initiated from RP then RP itself must be scanned. */
295 	if (PCIE_IS_RP(PCIE_DIP2BUS(rdip)) && pf_is_ready(rdip) &&
296 	    !root_pfd_p) {
297 		scan_flag = pf_handler_enter(rdip, &impl);
298 		if (scan_flag & PF_SCAN_DEADLOCK)
299 			goto done;
300 
301 		scan_flag = pf_default_hdl(rdip, &impl);
302 		if (scan_flag & PF_SCAN_NO_ERR_IN_CHILD)
303 			goto done;
304 	}
305 
306 	/*
307 	 * Scan the fabric using the scan_bdf and scan_addr in error q.
308 	 * scan_bdf will be valid in the following cases:
309 	 *	- Fabric message
310 	 *	- Poisoned TLP
311 	 *	- Signaled UR/CA
312 	 *	- Received UR/CA
313 	 *	- PIO load failures
314 	 */
315 	for (pfd_p = impl.pf_dq_head_p; pfd_p && PFD_IS_ROOT(pfd_p);
316 	    pfd_p = pfd_p->pe_next) {
317 		impl.pf_fault = PCIE_ROOT_FAULT(pfd_p);
318 
319 		if (PFD_IS_RC(pfd_p))
320 			impl.pf_total++;
321 
322 		if (impl.pf_fault->full_scan)
323 			full_scan = B_TRUE;
324 
325 		if (full_scan ||
326 		    PCIE_CHECK_VALID_BDF(impl.pf_fault->scan_bdf) ||
327 		    impl.pf_fault->scan_addr)
328 			scan_flag |= pf_dispatch(rdip, &impl, full_scan);
329 
330 		if (full_scan)
331 			break;
332 	}
333 
334 done:
335 	/*
336 	 * If this is due to safe access, don't analyze the errors and return
337 	 * success regardless of how scan fabric went.
338 	 */
339 	if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
340 		analyse_flag = PF_ERR_NO_PANIC;
341 	} else {
342 		analyse_flag = pf_analyse_error(derr, &impl);
343 	}
344 
345 	/*
346 	 * If analyse_flag is 0 or PF_ERR_NO_ERROR, there's nothing here.  Skip
347 	 * ereport generation unless something went wrong with the scan.
348 	 */
349 	if ((analyse_flag & ~PF_ERR_NO_ERROR) != 0 ||
350 	    (scan_flag & (PF_SCAN_CB_FAILURE | PF_SCAN_DEADLOCK)) != 0) {
351 		pf_send_ereport(derr, &impl);
352 	} else {
353 		pf_dq_unlock_chain(&impl);
354 	}
355 
356 	/*
357 	 * Check if any hardened driver's callback reported a panic.
358 	 * If so panic.
359 	 */
360 	if (scan_flag & PF_SCAN_CB_FAILURE)
361 		analyse_flag |= PF_ERR_PANIC;
362 
363 	/*
364 	 * If a deadlock was detected, panic the system as error analysis has
365 	 * been compromised.
366 	 */
367 	if (scan_flag & PF_SCAN_DEADLOCK)
368 		analyse_flag |= PF_ERR_PANIC_DEADLOCK;
369 
370 	/*
371 	 * For fatal errors, cache a copy of the pf_impl_t for post-mortem
372 	 * analysis (kmdb or mdb against a system crash dump). The ereports
373 	 * may not make it into the crash dump (errorq_dump can fill up - its
374 	 * size is 16 * ncpus, so on a 256-CPU system it holds just 4096
375 	 * entries, and fatal uncorrectable errors can be lost among
376 	 * correctable errors), but this cached structure will be available for
377 	 * inspection via the ::pcie_fatal_errors mdb dcmd.
378 	 *
379 	 * Note: Whether the system actually panics depends on the caller's
380 	 * configuration (e.g., the pcieb_die tunable). This cache is populated
381 	 * whenever PF_ERR_FATAL_FLAGS is set, regardless of whether a panic
382 	 * will actually occur.
383 	 */
384 	if ((analyse_flag & PF_ERR_FATAL_FLAGS) != 0)
385 		pcie_faulty_pf_impl = impl;
386 
387 	derr->fme_status = PF_ERR2DDIFM_ERR(scan_flag);
388 
389 	return (analyse_flag);
390 }
391 
392 void
pcie_force_fullscan(void)393 pcie_force_fullscan(void)
394 {
395 	pcie_full_scan = B_TRUE;
396 }
397 
398 /*
399  * pf_dispatch walks the device tree and calls the pf_default_hdl if the device
400  * falls in the error path.
401  *
402  * Returns PF_SCAN_* flags
403  */
404 static int
pf_dispatch(dev_info_t * pdip,pf_impl_t * impl,boolean_t full_scan)405 pf_dispatch(dev_info_t *pdip, pf_impl_t *impl, boolean_t full_scan)
406 {
407 	dev_info_t	*dip;
408 	pcie_req_id_t	rid = impl->pf_fault->scan_bdf;
409 	pcie_bus_t	*bus_p;
410 	int		scan_flag = PF_SCAN_SUCCESS;
411 
412 	for (dip = ddi_get_child(pdip); dip; dip = ddi_get_next_sibling(dip)) {
413 		/* Make sure dip is attached and ready */
414 		if (!(bus_p = pf_is_ready(dip)))
415 			continue;
416 
417 		scan_flag |= pf_handler_enter(dip, impl);
418 		if (scan_flag & PF_SCAN_DEADLOCK)
419 			break;
420 
421 		/*
422 		 * Handle this device if it is a:
423 		 * o Full Scan
424 		 * o PCI/PCI-X Device
425 		 * o Fault BDF = Device BDF
426 		 * o BDF/ADDR is in range of the Bridge/Switch
427 		 */
428 		if (full_scan ||
429 		    (bus_p->bus_bdf == rid) ||
430 		    pf_in_bus_range(bus_p, rid) ||
431 		    pf_in_addr_range(bus_p, impl->pf_fault->scan_addr)) {
432 			int hdl_flag = pf_default_hdl(dip, impl);
433 			scan_flag |= hdl_flag;
434 
435 			/*
436 			 * A bridge may have detected no errors in which case
437 			 * there is no need to scan further down.
438 			 */
439 			if (hdl_flag & PF_SCAN_NO_ERR_IN_CHILD)
440 				continue;
441 		} else {
442 			pf_handler_exit(dip);
443 			continue;
444 		}
445 
446 		/* match or in bridge bus-range */
447 		switch (bus_p->bus_dev_type) {
448 		case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI:
449 		case PCIE_PCIECAP_DEV_TYPE_PCI2PCIE:
450 			scan_flag |= pf_dispatch(dip, impl, B_TRUE);
451 			break;
452 		case PCIE_PCIECAP_DEV_TYPE_UP:
453 		case PCIE_PCIECAP_DEV_TYPE_DOWN:
454 		case PCIE_PCIECAP_DEV_TYPE_ROOT:
455 		{
456 			pf_data_t *pfd_p = PCIE_BUS2PFD(bus_p);
457 			pf_pci_err_regs_t *err_p = PCI_ERR_REG(pfd_p);
458 			pf_pci_bdg_err_regs_t *serr_p = PCI_BDG_ERR_REG(pfd_p);
459 			/*
460 			 * Continue if the fault BDF != the switch or there is a
461 			 * parity error
462 			 */
463 			if ((bus_p->bus_bdf != rid) ||
464 			    (err_p->pci_err_status & PF_PCI_PARITY_ERR) ||
465 			    (serr_p->pci_bdg_sec_stat & PF_PCI_PARITY_ERR))
466 				scan_flag |= pf_dispatch(dip, impl, full_scan);
467 			break;
468 		}
469 		case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV:
470 		case PCIE_PCIECAP_DEV_TYPE_PCI_DEV:
471 			/*
472 			 * Reached a PCIe end point so stop. Note dev_type
473 			 * PCI_DEV is just a PCIe device that requires IO Space
474 			 */
475 			break;
476 		case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO:
477 			if (PCIE_IS_BDG(bus_p))
478 				scan_flag |= pf_dispatch(dip, impl, B_TRUE);
479 			break;
480 		default:
481 			ASSERT(B_FALSE);
482 		}
483 	}
484 	return (scan_flag);
485 }
486 
487 /* Returns whether the "bdf" is in the bus range of a switch/bridge */
488 boolean_t
pf_in_bus_range(pcie_bus_t * bus_p,pcie_req_id_t bdf)489 pf_in_bus_range(pcie_bus_t *bus_p, pcie_req_id_t bdf)
490 {
491 	pci_bus_range_t *br_p = &bus_p->bus_bus_range;
492 	uint8_t		bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >>
493 	    PCIE_REQ_ID_BUS_SHIFT;
494 
495 	/* check if given bdf falls within bridge's bus range */
496 	if (PCIE_IS_BDG(bus_p) &&
497 	    ((bus_no >= br_p->lo) && (bus_no <= br_p->hi)))
498 		return (B_TRUE);
499 	else
500 		return (B_FALSE);
501 }
502 
503 /*
504  * Return whether the "addr" is in the assigned addr of a device.
505  */
506 boolean_t
pf_in_assigned_addr(pcie_bus_t * bus_p,uint64_t addr)507 pf_in_assigned_addr(pcie_bus_t *bus_p, uint64_t addr)
508 {
509 	uint_t		i;
510 	uint64_t	low, hi;
511 	pci_regspec_t	*assign_p = bus_p->bus_assigned_addr;
512 
513 	for (i = 0; i < bus_p->bus_assigned_entries; i++, assign_p++) {
514 		low = assign_p->pci_phys_low;
515 		hi = low + assign_p->pci_size_low;
516 		if ((addr < hi) && (addr >= low))
517 			return (B_TRUE);
518 	}
519 	return (B_FALSE);
520 }
521 
522 /*
523  * Returns whether the "addr" is in the addr range of a switch/bridge, or if the
524  * "addr" is in the assigned addr of a device.
525  */
526 static boolean_t
pf_in_addr_range(pcie_bus_t * bus_p,uint64_t addr)527 pf_in_addr_range(pcie_bus_t *bus_p, uint64_t addr)
528 {
529 	uint_t		i;
530 	uint64_t	low, hi;
531 	ppb_ranges_t	*ranges_p = bus_p->bus_addr_ranges;
532 
533 	if (!addr)
534 		return (B_FALSE);
535 
536 	/* check if given address belongs to this device */
537 	if (pf_in_assigned_addr(bus_p, addr))
538 		return (B_TRUE);
539 
540 	/* check if given address belongs to a child below this device */
541 	if (!PCIE_IS_BDG(bus_p))
542 		return (B_FALSE);
543 
544 	for (i = 0; i < bus_p->bus_addr_entries; i++, ranges_p++) {
545 		switch (ranges_p->child_high & PCI_ADDR_MASK) {
546 		case PCI_ADDR_IO:
547 		case PCI_ADDR_MEM32:
548 			low = ranges_p->child_low;
549 			hi = ranges_p->size_low + low;
550 			if ((addr < hi) && (addr >= low))
551 				return (B_TRUE);
552 			break;
553 		case PCI_ADDR_MEM64:
554 			low = ((uint64_t)ranges_p->child_mid << 32) |
555 			    (uint64_t)ranges_p->child_low;
556 			hi = (((uint64_t)ranges_p->size_high << 32) |
557 			    (uint64_t)ranges_p->size_low) + low;
558 			if ((addr < hi) && (addr >= low))
559 				return (B_TRUE);
560 			break;
561 		}
562 	}
563 	return (B_FALSE);
564 }
565 
566 static pcie_bus_t *
pf_is_ready(dev_info_t * dip)567 pf_is_ready(dev_info_t *dip)
568 {
569 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
570 	if (!bus_p)
571 		return (NULL);
572 
573 	if (!(bus_p->bus_fm_flags & PF_FM_READY))
574 		return (NULL);
575 	return (bus_p);
576 }
577 
578 static void
pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t * pcix_ecc_regs,pcie_bus_t * bus_p,boolean_t bdg)579 pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs,
580     pcie_bus_t *bus_p, boolean_t bdg)
581 {
582 	if (bdg) {
583 		pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p,
584 		    PCI_PCIX_BDG_ECC_STATUS);
585 		pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p,
586 		    PCI_PCIX_BDG_ECC_FST_AD);
587 		pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p,
588 		    PCI_PCIX_BDG_ECC_SEC_AD);
589 		pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p,
590 		    PCI_PCIX_BDG_ECC_ATTR);
591 	} else {
592 		pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p,
593 		    PCI_PCIX_ECC_STATUS);
594 		pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p,
595 		    PCI_PCIX_ECC_FST_AD);
596 		pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p,
597 		    PCI_PCIX_ECC_SEC_AD);
598 		pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p,
599 		    PCI_PCIX_ECC_ATTR);
600 	}
601 }
602 
603 
604 static void
pf_pcix_regs_gather(pf_data_t * pfd_p,pcie_bus_t * bus_p)605 pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
606 {
607 	/*
608 	 * For PCI-X device PCI-X Capability only exists for Type 0 Headers.
609 	 * PCI-X Bridge Capability only exists for Type 1 Headers.
610 	 * Both capabilities do not exist at the same time.
611 	 */
612 	if (PCIE_IS_BDG(bus_p)) {
613 		pf_pcix_bdg_err_regs_t *pcix_bdg_regs;
614 
615 		pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p);
616 
617 		pcix_bdg_regs->pcix_bdg_sec_stat = PCIX_CAP_GET(16, bus_p,
618 		    PCI_PCIX_SEC_STATUS);
619 		pcix_bdg_regs->pcix_bdg_stat = PCIX_CAP_GET(32, bus_p,
620 		    PCI_PCIX_BDG_STATUS);
621 
622 		if (PCIX_ECC_VERSION_CHECK(bus_p)) {
623 			/*
624 			 * PCI Express to PCI-X bridges only implement the
625 			 * secondary side of the PCI-X ECC registers, bit one is
626 			 * read-only so we make sure we do not write to it.
627 			 */
628 			if (!PCIE_IS_PCIE_BDG(bus_p)) {
629 				PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
630 				    0);
631 				pf_pcix_ecc_regs_gather(
632 				    PCIX_BDG_ECC_REG(pfd_p, 0), bus_p, B_TRUE);
633 				PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
634 				    1);
635 			}
636 			pf_pcix_ecc_regs_gather(PCIX_BDG_ECC_REG(pfd_p, 0),
637 			    bus_p, B_TRUE);
638 		}
639 	} else {
640 		pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p);
641 
642 		pcix_regs->pcix_command = PCIX_CAP_GET(16, bus_p,
643 		    PCI_PCIX_COMMAND);
644 		pcix_regs->pcix_status = PCIX_CAP_GET(32, bus_p,
645 		    PCI_PCIX_STATUS);
646 		if (PCIX_ECC_VERSION_CHECK(bus_p))
647 			pf_pcix_ecc_regs_gather(PCIX_ECC_REG(pfd_p), bus_p,
648 			    B_TRUE);
649 	}
650 }
651 
652 static void
pf_pcie_regs_gather(pf_data_t * pfd_p,pcie_bus_t * bus_p)653 pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
654 {
655 	pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p);
656 	pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p);
657 
658 	pcie_regs->pcie_err_status = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS);
659 	pcie_regs->pcie_err_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
660 	pcie_regs->pcie_dev_cap = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP);
661 
662 	if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p))
663 		pf_pcix_regs_gather(pfd_p, bus_p);
664 
665 	if (PCIE_IS_ROOT(bus_p)) {
666 		pf_pcie_rp_err_regs_t *pcie_rp_regs = PCIE_RP_REG(pfd_p);
667 
668 		pcie_rp_regs->pcie_rp_status = PCIE_CAP_GET(32, bus_p,
669 		    PCIE_ROOTSTS);
670 		pcie_rp_regs->pcie_rp_ctl = PCIE_CAP_GET(16, bus_p,
671 		    PCIE_ROOTCTL);
672 	}
673 
674 	/*
675 	 * For eligible components, we gather Slot Register state.
676 	 *
677 	 * Eligible components are:
678 	 * - a Downstream Port or a Root Port with the Slot Implemented
679 	 * capability bit set
680 	 * - hotplug capable
681 	 *
682 	 * Slot register state is useful, for instance, to determine whether the
683 	 * Slot's child device is physically present (via the Slot Status
684 	 * register).
685 	 */
686 	if ((PCIE_IS_SWD(bus_p) || PCIE_IS_ROOT(bus_p)) &&
687 	    PCIE_IS_HOTPLUG_ENABLED(PCIE_BUS2DIP(bus_p))) {
688 		pf_pcie_slot_regs_t *pcie_slot_regs = PCIE_SLOT_REG(pfd_p);
689 		pcie_slot_regs->pcie_slot_cap = PCIE_CAP_GET(32, bus_p,
690 		    PCIE_SLOTCAP);
691 		pcie_slot_regs->pcie_slot_control = PCIE_CAP_GET(16, bus_p,
692 		    PCIE_SLOTCTL);
693 		pcie_slot_regs->pcie_slot_status = PCIE_CAP_GET(16, bus_p,
694 		    PCIE_SLOTSTS);
695 
696 		if (pcie_slot_regs->pcie_slot_cap != PCI_EINVAL32 &&
697 		    pcie_slot_regs->pcie_slot_control != PCI_EINVAL16 &&
698 		    pcie_slot_regs->pcie_slot_status != PCI_EINVAL16) {
699 			pcie_slot_regs->pcie_slot_regs_valid = B_TRUE;
700 		}
701 	}
702 
703 	if (!PCIE_HAS_AER(bus_p))
704 		return;
705 
706 	/* Gather UE AERs */
707 	pcie_adv_regs->pcie_adv_ctl = PCIE_AER_GET(32, bus_p,
708 	    PCIE_AER_CTL);
709 	pcie_adv_regs->pcie_ue_status = PCIE_AER_GET(32, bus_p,
710 	    PCIE_AER_UCE_STS);
711 	pcie_adv_regs->pcie_ue_mask = PCIE_AER_GET(32, bus_p,
712 	    PCIE_AER_UCE_MASK);
713 	pcie_adv_regs->pcie_ue_sev = PCIE_AER_GET(32, bus_p,
714 	    PCIE_AER_UCE_SERV);
715 	PCIE_ADV_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p,
716 	    PCIE_AER_HDR_LOG);
717 	PCIE_ADV_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p,
718 	    PCIE_AER_HDR_LOG + 0x4);
719 	PCIE_ADV_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p,
720 	    PCIE_AER_HDR_LOG + 0x8);
721 	PCIE_ADV_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p,
722 	    PCIE_AER_HDR_LOG + 0xc);
723 
724 	/* Gather CE AERs */
725 	pcie_adv_regs->pcie_ce_status = PCIE_AER_GET(32, bus_p,
726 	    PCIE_AER_CE_STS);
727 	pcie_adv_regs->pcie_ce_mask = PCIE_AER_GET(32, bus_p,
728 	    PCIE_AER_CE_MASK);
729 
730 	/*
731 	 * If pci express to pci bridge then grab the bridge
732 	 * error registers.
733 	 */
734 	if (PCIE_IS_PCIE_BDG(bus_p)) {
735 		pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs =
736 		    PCIE_ADV_BDG_REG(pfd_p);
737 
738 		pcie_bdg_regs->pcie_sue_ctl = PCIE_AER_GET(32, bus_p,
739 		    PCIE_AER_SCTL);
740 		pcie_bdg_regs->pcie_sue_status = PCIE_AER_GET(32, bus_p,
741 		    PCIE_AER_SUCE_STS);
742 		pcie_bdg_regs->pcie_sue_mask = PCIE_AER_GET(32, bus_p,
743 		    PCIE_AER_SUCE_MASK);
744 		pcie_bdg_regs->pcie_sue_sev = PCIE_AER_GET(32, bus_p,
745 		    PCIE_AER_SUCE_SERV);
746 		PCIE_ADV_BDG_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p,
747 		    PCIE_AER_SHDR_LOG);
748 		PCIE_ADV_BDG_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p,
749 		    PCIE_AER_SHDR_LOG + 0x4);
750 		PCIE_ADV_BDG_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p,
751 		    PCIE_AER_SHDR_LOG + 0x8);
752 		PCIE_ADV_BDG_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p,
753 		    PCIE_AER_SHDR_LOG + 0xc);
754 	}
755 
756 	/*
757 	 * If PCI Express root port then grab the root port
758 	 * error registers.
759 	 */
760 	if (PCIE_IS_ROOT(bus_p)) {
761 		pf_pcie_adv_rp_err_regs_t *pcie_rp_regs =
762 		    PCIE_ADV_RP_REG(pfd_p);
763 
764 		pcie_rp_regs->pcie_rp_err_cmd = PCIE_AER_GET(32, bus_p,
765 		    PCIE_AER_RE_CMD);
766 		pcie_rp_regs->pcie_rp_err_status = PCIE_AER_GET(32, bus_p,
767 		    PCIE_AER_RE_STS);
768 		pcie_rp_regs->pcie_rp_ce_src_id = PCIE_AER_GET(16, bus_p,
769 		    PCIE_AER_CE_SRC_ID);
770 		pcie_rp_regs->pcie_rp_ue_src_id = PCIE_AER_GET(16, bus_p,
771 		    PCIE_AER_ERR_SRC_ID);
772 	}
773 }
774 
775 static void
pf_pci_regs_gather(pf_data_t * pfd_p,pcie_bus_t * bus_p)776 pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
777 {
778 	pf_pci_err_regs_t *pci_regs = PCI_ERR_REG(pfd_p);
779 
780 	/*
781 	 * Start by reading all the error registers that are available for
782 	 * pci and pci express and for leaf devices and bridges/switches
783 	 */
784 	pci_regs->pci_err_status = PCIE_GET(16, bus_p, PCI_CONF_STAT);
785 	pci_regs->pci_cfg_comm = PCIE_GET(16, bus_p, PCI_CONF_COMM);
786 
787 	/*
788 	 * If pci-pci bridge grab PCI bridge specific error registers.
789 	 */
790 	if (PCIE_IS_BDG(bus_p)) {
791 		pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p);
792 		pci_bdg_regs->pci_bdg_sec_stat =
793 		    PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS);
794 		pci_bdg_regs->pci_bdg_ctrl =
795 		    PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL);
796 	}
797 
798 	/*
799 	 * If pci express device grab pci express error registers and
800 	 * check for advanced error reporting features and grab them if
801 	 * available.
802 	 */
803 	if (PCIE_IS_PCIE(bus_p))
804 		pf_pcie_regs_gather(pfd_p, bus_p);
805 	else if (PCIE_IS_PCIX(bus_p))
806 		pf_pcix_regs_gather(pfd_p, bus_p);
807 
808 }
809 
810 static void
pf_pcix_regs_clear(pf_data_t * pfd_p,pcie_bus_t * bus_p)811 pf_pcix_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
812 {
813 	if (PCIE_IS_BDG(bus_p)) {
814 		pf_pcix_bdg_err_regs_t *pcix_bdg_regs;
815 
816 		pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p);
817 
818 		PCIX_CAP_PUT(16, bus_p, PCI_PCIX_SEC_STATUS,
819 		    pcix_bdg_regs->pcix_bdg_sec_stat);
820 
821 		PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_STATUS,
822 		    pcix_bdg_regs->pcix_bdg_stat);
823 
824 		if (PCIX_ECC_VERSION_CHECK(bus_p)) {
825 			pf_pcix_ecc_regs_t *pcix_bdg_ecc_regs;
826 			/*
827 			 * PCI Express to PCI-X bridges only implement the
828 			 * secondary side of the PCI-X ECC registers.  For
829 			 * clearing, there is no need to "select" the ECC
830 			 * register, just write what was originally read.
831 			 */
832 			if (!PCIE_IS_PCIE_BDG(bus_p)) {
833 				pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 0);
834 				PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
835 				    pcix_bdg_ecc_regs->pcix_ecc_ctlstat);
836 
837 			}
838 			pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 1);
839 			PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
840 			    pcix_bdg_ecc_regs->pcix_ecc_ctlstat);
841 		}
842 	} else {
843 		pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p);
844 
845 		PCIX_CAP_PUT(32, bus_p, PCI_PCIX_STATUS,
846 		    pcix_regs->pcix_status);
847 
848 		if (PCIX_ECC_VERSION_CHECK(bus_p)) {
849 			pf_pcix_ecc_regs_t *pcix_ecc_regs = PCIX_ECC_REG(pfd_p);
850 
851 			PCIX_CAP_PUT(32, bus_p, PCI_PCIX_ECC_STATUS,
852 			    pcix_ecc_regs->pcix_ecc_ctlstat);
853 		}
854 	}
855 }
856 
857 static void
pf_pcie_regs_clear(pf_data_t * pfd_p,pcie_bus_t * bus_p)858 pf_pcie_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
859 {
860 	pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p);
861 	pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p);
862 
863 	PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS, pcie_regs->pcie_err_status);
864 
865 	if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p))
866 		pf_pcix_regs_clear(pfd_p, bus_p);
867 
868 	if (!PCIE_HAS_AER(bus_p))
869 		return;
870 
871 	PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_STS,
872 	    pcie_adv_regs->pcie_ue_status);
873 
874 	PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS,
875 	    pcie_adv_regs->pcie_ce_status);
876 
877 	if (PCIE_IS_PCIE_BDG(bus_p)) {
878 		pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs =
879 		    PCIE_ADV_BDG_REG(pfd_p);
880 
881 		PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_STS,
882 		    pcie_bdg_regs->pcie_sue_status);
883 	}
884 
885 	/*
886 	 * If PCI Express root complex then clear the root complex
887 	 * error registers.
888 	 */
889 	if (PCIE_IS_ROOT(bus_p)) {
890 		pf_pcie_adv_rp_err_regs_t *pcie_rp_regs;
891 
892 		pcie_rp_regs = PCIE_ADV_RP_REG(pfd_p);
893 
894 		PCIE_AER_PUT(32, bus_p, PCIE_AER_RE_STS,
895 		    pcie_rp_regs->pcie_rp_err_status);
896 	}
897 }
898 
899 static void
pf_pci_regs_clear(pf_data_t * pfd_p,pcie_bus_t * bus_p)900 pf_pci_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
901 {
902 	if (PCIE_IS_PCIE(bus_p))
903 		pf_pcie_regs_clear(pfd_p, bus_p);
904 	else if (PCIE_IS_PCIX(bus_p))
905 		pf_pcix_regs_clear(pfd_p, bus_p);
906 
907 	PCIE_PUT(16, bus_p, PCI_CONF_STAT, pfd_p->pe_pci_regs->pci_err_status);
908 
909 	if (PCIE_IS_BDG(bus_p)) {
910 		pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p);
911 		PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS,
912 		    pci_bdg_regs->pci_bdg_sec_stat);
913 	}
914 }
915 
916 /* ARGSUSED */
917 void
pcie_clear_errors(dev_info_t * dip)918 pcie_clear_errors(dev_info_t *dip)
919 {
920 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
921 	pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
922 
923 	ASSERT(bus_p);
924 
925 	pf_pci_regs_gather(pfd_p, bus_p);
926 	pf_pci_regs_clear(pfd_p, bus_p);
927 }
928 
929 /* Find the fault BDF, fault Addr or full scan on a PCIe Root Port. */
930 static void
pf_pci_find_rp_fault(pf_data_t * pfd_p,pcie_bus_t * bus_p)931 pf_pci_find_rp_fault(pf_data_t *pfd_p, pcie_bus_t *bus_p)
932 {
933 	pf_root_fault_t *root_fault = PCIE_ROOT_FAULT(pfd_p);
934 	pf_pcie_adv_rp_err_regs_t *rp_regs = PCIE_ADV_RP_REG(pfd_p);
935 	uint32_t root_err = rp_regs->pcie_rp_err_status;
936 	uint32_t ue_err = PCIE_ADV_REG(pfd_p)->pcie_ue_status;
937 	int num_faults = 0;
938 
939 	/* Since this data structure is reused, make sure to reset it */
940 	root_fault->full_scan = B_FALSE;
941 	root_fault->scan_bdf = PCIE_INVALID_BDF;
942 	root_fault->scan_addr = 0;
943 
944 	if (!PCIE_HAS_AER(bus_p) &&
945 	    (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) {
946 		PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
947 		return;
948 	}
949 
950 	/*
951 	 * Check to see if an error has been received that
952 	 * requires a scan of the fabric.  Count the number of
953 	 * faults seen.  If MUL CE/FE_NFE that counts for
954 	 * at least 2 faults, so just return with full_scan.
955 	 */
956 	if ((root_err & PCIE_AER_RE_STS_MUL_CE_RCVD) ||
957 	    (root_err & PCIE_AER_RE_STS_MUL_FE_NFE_RCVD)) {
958 		PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
959 		return;
960 	}
961 
962 	if (root_err & PCIE_AER_RE_STS_CE_RCVD)
963 		num_faults++;
964 
965 	if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD)
966 		num_faults++;
967 
968 	if (ue_err & PCIE_AER_UCE_CA)
969 		num_faults++;
970 
971 	if (ue_err & PCIE_AER_UCE_UR)
972 		num_faults++;
973 
974 	/* If no faults just return */
975 	if (num_faults == 0)
976 		return;
977 
978 	/* If faults > 1 do full scan */
979 	if (num_faults > 1) {
980 		PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
981 		return;
982 	}
983 
984 	/* By this point, there is only 1 fault detected */
985 	if (root_err & PCIE_AER_RE_STS_CE_RCVD) {
986 		PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ce_src_id;
987 		num_faults--;
988 	} else if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) {
989 		PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ue_src_id;
990 		num_faults--;
991 	} else if ((HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_CA) ||
992 	    HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_UR)) &&
993 	    (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), PCIE_ADV_REG(pfd_p)) ==
994 	    DDI_SUCCESS)) {
995 		PCIE_ROOT_FAULT(pfd_p)->scan_addr =
996 		    PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr;
997 		num_faults--;
998 	}
999 
1000 	/*
1001 	 * This means an error did occur, but we couldn't extract the fault BDF
1002 	 */
1003 	if (num_faults > 0)
1004 		PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
1005 
1006 }
1007 
1008 
1009 /*
1010  * Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue
1011  *
1012  * Returns a scan flag.
1013  * o PF_SCAN_SUCCESS - Error gathered and cleared sucessfuly, data added to
1014  *   Fault Q
1015  * o PF_SCAN_BAD_RESPONSE - Unable to talk to device, item added to fault Q
1016  * o PF_SCAN_CB_FAILURE - A hardened device deemed that the error was fatal.
1017  * o PF_SCAN_NO_ERR_IN_CHILD - Only applies to bridge to prevent further
1018  *   unnecessary scanning
1019  * o PF_SCAN_IN_DQ - This device has already been scanned; it was skipped this
1020  *   time.
1021  */
1022 static int
pf_default_hdl(dev_info_t * dip,pf_impl_t * impl)1023 pf_default_hdl(dev_info_t *dip, pf_impl_t *impl)
1024 {
1025 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1026 	pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
1027 	int cb_sts, scan_flag = PF_SCAN_SUCCESS;
1028 
1029 	/* Make sure this device hasn't already been snapshotted and cleared */
1030 	if (pfd_p->pe_valid == B_TRUE) {
1031 		scan_flag |= PF_SCAN_IN_DQ;
1032 		goto done;
1033 	}
1034 
1035 	/*
1036 	 * If this is a device used for PCI passthrough into a virtual machine,
1037 	 * don't let any error it caused panic the system.
1038 	 */
1039 	if (bus_p->bus_fm_flags & PF_FM_IS_PASSTHRU)
1040 		pfd_p->pe_severity_mask |= PF_ERR_PANIC;
1041 
1042 	/*
1043 	 * Read vendor/device ID and check with cached data; if it doesn't
1044 	 * match, it could very well mean that the device is no longer
1045 	 * responding.  In this case, we return PF_SCAN_BAD_RESPONSE; should
1046 	 * the caller choose to panic in this case, we will have the basic
1047 	 * info in the error queue for the purposes of postmortem debugging.
1048 	 */
1049 	if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) {
1050 		char buf[FM_MAX_CLASS];
1051 
1052 		(void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
1053 		    PCI_ERROR_SUBCLASS, PCI_NR);
1054 		ddi_fm_ereport_post(dip, buf, fm_ena_generate(0, FM_ENA_FMT1),
1055 		    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL);
1056 
1057 		/*
1058 		 * For IOV/Hotplug purposes skip gathering info for this device,
1059 		 * but populate affected info and severity.  Clear out any data
1060 		 * that maybe been saved in the last fabric scan.
1061 		 */
1062 		pf_reset_pfd(pfd_p);
1063 		pfd_p->pe_severity_flags = PF_ERR_BAD_RESPONSE;
1064 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF;
1065 
1066 		/* Add the snapshot to the error q */
1067 		pf_en_dq(pfd_p, impl);
1068 		pfd_p->pe_valid = B_TRUE;
1069 
1070 		return (PF_SCAN_BAD_RESPONSE);
1071 	}
1072 
1073 	pf_pci_regs_gather(pfd_p, bus_p);
1074 	pf_pci_regs_clear(pfd_p, bus_p);
1075 
1076 	if (PCIE_IS_RP(bus_p))
1077 		pf_pci_find_rp_fault(pfd_p, bus_p);
1078 
1079 	cb_sts = pf_fm_callback(dip, impl->pf_derr);
1080 
1081 	if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN)
1082 		scan_flag |= PF_SCAN_CB_FAILURE;
1083 
1084 	/* Add the snapshot to the error q */
1085 	pf_en_dq(pfd_p, impl);
1086 
1087 done:
1088 	/*
1089 	 * If a bridge does not have any error no need to scan any further down.
1090 	 * For PCIe devices, check the PCIe device status and PCI secondary
1091 	 * status.
1092 	 * - Some non-compliant PCIe devices do not utilize PCIe
1093 	 *   error registers.  If so rely on legacy PCI error registers.
1094 	 * For PCI devices, check the PCI secondary status.
1095 	 */
1096 	if (PCIE_IS_PCIE_BDG(bus_p) &&
1097 	    !(PCIE_ERR_REG(pfd_p)->pcie_err_status & PF_PCIE_BDG_ERR) &&
1098 	    !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR))
1099 		scan_flag |= PF_SCAN_NO_ERR_IN_CHILD;
1100 
1101 	if (PCIE_IS_PCI_BDG(bus_p) &&
1102 	    !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR))
1103 		scan_flag |= PF_SCAN_NO_ERR_IN_CHILD;
1104 
1105 	pfd_p->pe_valid = B_TRUE;
1106 	return (scan_flag);
1107 }
1108 
1109 /*
1110  * Set the passthru flag on a device bus_p. Called by passthru drivers to
1111  * indicate when a device is or is no longer under passthru control.
1112  */
1113 void
pf_set_passthru(dev_info_t * dip,boolean_t is_passthru)1114 pf_set_passthru(dev_info_t *dip, boolean_t is_passthru)
1115 {
1116 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1117 
1118 	if (is_passthru) {
1119 		atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_PASSTHRU);
1120 	} else {
1121 		atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_PASSTHRU);
1122 	}
1123 }
1124 
1125 /*
1126  * Called during postattach to initialize a device's error handling
1127  * capabilities.  If the devices has already been hardened, then there isn't
1128  * much needed.  Otherwise initialize the device's default FMA capabilities.
1129  *
1130  * In a future project where PCIe support is removed from pcifm, several
1131  * "properties" that are setup in ddi_fm_init and pci_ereport_setup need to be
1132  * created here so that the PCI/PCIe eversholt rules will work properly.
1133  */
1134 void
pf_init(dev_info_t * dip,ddi_iblock_cookie_t ibc,ddi_attach_cmd_t cmd)1135 pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc, ddi_attach_cmd_t cmd)
1136 {
1137 	pcie_bus_t		*bus_p = PCIE_DIP2BUS(dip);
1138 	struct i_ddi_fmhdl	*fmhdl = DEVI(dip)->devi_fmhdl;
1139 	boolean_t		need_cb_register = B_FALSE;
1140 
1141 	if (!bus_p) {
1142 		cmn_err(CE_WARN, "devi_bus information is not set for %s%d.\n",
1143 		    ddi_driver_name(dip), ddi_get_instance(dip));
1144 		return;
1145 	}
1146 
1147 	if (fmhdl) {
1148 		/*
1149 		 * If device is only ereport capable and not callback capable
1150 		 * make it callback capable. The only downside is that the
1151 		 * "fm-errcb-capable" property is not created for this device
1152 		 * which should be ok since it's not used anywhere.
1153 		 */
1154 		if (!(fmhdl->fh_cap & DDI_FM_ERRCB_CAPABLE))
1155 			need_cb_register = B_TRUE;
1156 	} else {
1157 		int cap;
1158 		/*
1159 		 * fm-capable in driver.conf can be used to set fm_capabilities.
1160 		 * If fm-capable is not defined, set the default
1161 		 * DDI_FM_EREPORT_CAPABLE and DDI_FM_ERRCB_CAPABLE.
1162 		 */
1163 		cap = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1164 		    DDI_PROP_DONTPASS, "fm-capable",
1165 		    DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE);
1166 		cap &= (DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE);
1167 
1168 		atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_NH);
1169 
1170 		if (cmd == DDI_ATTACH) {
1171 			ddi_fm_init(dip, &cap, &ibc);
1172 			pci_ereport_setup(dip);
1173 		}
1174 
1175 		if (cap & DDI_FM_ERRCB_CAPABLE)
1176 			need_cb_register = B_TRUE;
1177 
1178 		fmhdl = DEVI(dip)->devi_fmhdl;
1179 	}
1180 
1181 	/* If ddi_fm_init fails for any reason RETURN */
1182 	if (!fmhdl) {
1183 		(void) atomic_swap_uint(&bus_p->bus_fm_flags, 0);
1184 		return;
1185 	}
1186 
1187 	fmhdl->fh_cap |=  DDI_FM_ERRCB_CAPABLE;
1188 	if (cmd == DDI_ATTACH) {
1189 		if (need_cb_register)
1190 			ddi_fm_handler_register(dip, pf_dummy_cb, NULL);
1191 	}
1192 
1193 	atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_READY);
1194 }
1195 
1196 /* undo FMA lock, called at predetach */
1197 void
pf_fini(dev_info_t * dip,ddi_detach_cmd_t cmd)1198 pf_fini(dev_info_t *dip, ddi_detach_cmd_t cmd)
1199 {
1200 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
1201 
1202 	if (!bus_p)
1203 		return;
1204 
1205 	/* Don't fini anything if device isn't FM Ready */
1206 	if (!(bus_p->bus_fm_flags & PF_FM_READY))
1207 		return;
1208 
1209 	/* no other code should set the flag to false */
1210 	atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_READY);
1211 
1212 	/*
1213 	 * Grab the mutex to make sure device isn't in the middle of
1214 	 * error handling.  Setting the bus_fm_flag to ~PF_FM_READY
1215 	 * should prevent this device from being error handled after
1216 	 * the mutex has been released.
1217 	 */
1218 	(void) pf_handler_enter(dip, NULL);
1219 	pf_handler_exit(dip);
1220 
1221 	/* undo non-hardened drivers */
1222 	if (bus_p->bus_fm_flags & PF_FM_IS_NH) {
1223 		if (cmd == DDI_DETACH) {
1224 			atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_NH);
1225 			pci_ereport_teardown(dip);
1226 			/*
1227 			 * ddi_fini itself calls ddi_handler_unregister,
1228 			 * so no need to explicitly call unregister.
1229 			 */
1230 			ddi_fm_fini(dip);
1231 		}
1232 	}
1233 }
1234 
1235 /*ARGSUSED*/
1236 static int
pf_dummy_cb(dev_info_t * dip,ddi_fm_error_t * derr,const void * not_used)1237 pf_dummy_cb(dev_info_t *dip, ddi_fm_error_t *derr, const void *not_used)
1238 {
1239 	return (DDI_FM_OK);
1240 }
1241 
1242 /*
1243  * Add PFD to queue.  If it is an RC add it to the beginning,
1244  * otherwise add it to the end.
1245  */
1246 static void
pf_en_dq(pf_data_t * pfd_p,pf_impl_t * impl)1247 pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl)
1248 {
1249 	pf_data_t *head_p = impl->pf_dq_head_p;
1250 	pf_data_t *tail_p = impl->pf_dq_tail_p;
1251 
1252 	impl->pf_total++;
1253 
1254 	if (!head_p) {
1255 		ASSERT(PFD_IS_ROOT(pfd_p));
1256 		impl->pf_dq_head_p = pfd_p;
1257 		impl->pf_dq_tail_p = pfd_p;
1258 		pfd_p->pe_prev = NULL;
1259 		pfd_p->pe_next = NULL;
1260 		return;
1261 	}
1262 
1263 	/* Check if this is a Root Port eprt */
1264 	if (PFD_IS_ROOT(pfd_p)) {
1265 		pf_data_t *root_p, *last_p = NULL;
1266 
1267 		/* The first item must be a RP */
1268 		root_p = head_p;
1269 		for (last_p = head_p; last_p && PFD_IS_ROOT(last_p);
1270 		    last_p = last_p->pe_next)
1271 			root_p = last_p;
1272 
1273 		/* root_p is the last RP pfd. last_p is the first non-RP pfd. */
1274 		root_p->pe_next = pfd_p;
1275 		pfd_p->pe_prev = root_p;
1276 		pfd_p->pe_next = last_p;
1277 
1278 		if (last_p)
1279 			last_p->pe_prev = pfd_p;
1280 		else
1281 			tail_p = pfd_p;
1282 	} else {
1283 		tail_p->pe_next = pfd_p;
1284 		pfd_p->pe_prev = tail_p;
1285 		pfd_p->pe_next = NULL;
1286 		tail_p = pfd_p;
1287 	}
1288 
1289 	impl->pf_dq_head_p = head_p;
1290 	impl->pf_dq_tail_p = tail_p;
1291 }
1292 
1293 /*
1294  * Ignore:
1295  * - TRAINING: as leaves do not have children
1296  * - SD: as leaves do not have children
1297  */
1298 const pf_fab_err_tbl_t pcie_pcie_tbl[] = {
1299 	{PCIE_AER_UCE_DLP,	pf_panic,
1300 	    PF_AFFECTED_PARENT, 0},
1301 
1302 	{PCIE_AER_UCE_PTLP,	pf_analyse_ptlp,
1303 	    PF_AFFECTED_SELF, 0},
1304 
1305 	{PCIE_AER_UCE_FCP,	pf_panic,
1306 	    PF_AFFECTED_PARENT, 0},
1307 
1308 	{PCIE_AER_UCE_TO,	pf_analyse_to,
1309 	    PF_AFFECTED_SELF, 0},
1310 
1311 	{PCIE_AER_UCE_CA,	pf_analyse_ca_ur,
1312 	    PF_AFFECTED_SELF, 0},
1313 
1314 	{PCIE_AER_UCE_UC,	pf_analyse_uc,
1315 	    0, 0},
1316 
1317 	{PCIE_AER_UCE_RO,	pf_panic,
1318 	    PF_AFFECTED_PARENT, 0},
1319 
1320 	{PCIE_AER_UCE_MTLP,	pf_panic,
1321 	    PF_AFFECTED_PARENT, 0},
1322 
1323 	{PCIE_AER_UCE_ECRC,	pf_no_panic,
1324 	    PF_AFFECTED_SELF, 0},
1325 
1326 	{PCIE_AER_UCE_UR,	pf_analyse_ca_ur,
1327 	    PF_AFFECTED_SELF, 0},
1328 
1329 	{0, NULL, 0, 0}
1330 };
1331 
1332 const pf_fab_err_tbl_t pcie_rp_tbl[] = {
1333 	{PCIE_AER_UCE_TRAINING,	pf_no_panic,
1334 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1335 
1336 	{PCIE_AER_UCE_DLP,	pf_panic,
1337 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1338 
1339 	{PCIE_AER_UCE_SD,	pf_no_panic,
1340 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1341 
1342 	{PCIE_AER_UCE_PTLP,	pf_analyse_ptlp,
1343 	    PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1344 
1345 	{PCIE_AER_UCE_FCP,	pf_panic,
1346 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1347 
1348 	{PCIE_AER_UCE_TO,	pf_analyse_to,
1349 	    PF_AFFECTED_ADDR, PF_AFFECTED_CHILDREN},
1350 
1351 	{PCIE_AER_UCE_CA,	pf_no_panic,
1352 	    PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1353 
1354 	{PCIE_AER_UCE_UC,	pf_analyse_uc,
1355 	    0, 0},
1356 
1357 	{PCIE_AER_UCE_RO,	pf_panic,
1358 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1359 
1360 	{PCIE_AER_UCE_MTLP,	pf_panic,
1361 	    PF_AFFECTED_SELF | PF_AFFECTED_AER,
1362 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1363 
1364 	{PCIE_AER_UCE_ECRC,	pf_no_panic,
1365 	    PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1366 
1367 	{PCIE_AER_UCE_UR,	pf_no_panic,
1368 	    PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1369 
1370 	{0, NULL, 0, 0}
1371 };
1372 
1373 const pf_fab_err_tbl_t pcie_sw_tbl[] = {
1374 	{PCIE_AER_UCE_TRAINING,	pf_no_panic,
1375 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1376 
1377 	{PCIE_AER_UCE_DLP,	pf_panic,
1378 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1379 
1380 	{PCIE_AER_UCE_SD,	pf_no_panic,
1381 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1382 
1383 	{PCIE_AER_UCE_PTLP,	pf_analyse_ptlp,
1384 	    PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1385 
1386 	{PCIE_AER_UCE_FCP,	pf_panic,
1387 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1388 
1389 	{PCIE_AER_UCE_TO,	pf_analyse_to,
1390 	    PF_AFFECTED_CHILDREN, 0},
1391 
1392 	{PCIE_AER_UCE_CA,	pf_analyse_ca_ur,
1393 	    PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1394 
1395 	{PCIE_AER_UCE_UC,	pf_analyse_uc,
1396 	    0, 0},
1397 
1398 	{PCIE_AER_UCE_RO,	pf_panic,
1399 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1400 
1401 	{PCIE_AER_UCE_MTLP,	pf_panic,
1402 	    PF_AFFECTED_SELF | PF_AFFECTED_AER,
1403 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1404 
1405 	{PCIE_AER_UCE_ECRC,	pf_no_panic,
1406 	    PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1407 
1408 	{PCIE_AER_UCE_UR,	pf_analyse_ca_ur,
1409 	    PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1410 
1411 	{0, NULL, 0, 0}
1412 };
1413 
1414 const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = {
1415 	{PCIE_AER_SUCE_TA_ON_SC,	pf_analyse_sc,
1416 	    0, 0},
1417 
1418 	{PCIE_AER_SUCE_MA_ON_SC,	pf_analyse_sc,
1419 	    0, 0},
1420 
1421 	{PCIE_AER_SUCE_RCVD_TA,		pf_analyse_ma_ta,
1422 	    0, 0},
1423 
1424 	{PCIE_AER_SUCE_RCVD_MA,		pf_analyse_ma_ta,
1425 	    0, 0},
1426 
1427 	{PCIE_AER_SUCE_USC_ERR,		pf_panic,
1428 	    PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN},
1429 
1430 	{PCIE_AER_SUCE_USC_MSG_DATA_ERR, pf_analyse_ma_ta,
1431 	    PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN},
1432 
1433 	{PCIE_AER_SUCE_UC_DATA_ERR,	pf_analyse_uc_data,
1434 	    PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN},
1435 
1436 	{PCIE_AER_SUCE_UC_ATTR_ERR,	pf_panic,
1437 	    PF_AFFECTED_CHILDREN, 0},
1438 
1439 	{PCIE_AER_SUCE_UC_ADDR_ERR,	pf_panic,
1440 	    PF_AFFECTED_CHILDREN, 0},
1441 
1442 	{PCIE_AER_SUCE_TIMER_EXPIRED,	pf_panic,
1443 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1444 
1445 	{PCIE_AER_SUCE_PERR_ASSERT,	pf_analyse_perr_assert,
1446 	    0, 0},
1447 
1448 	{PCIE_AER_SUCE_SERR_ASSERT,	pf_no_panic,
1449 	    0, 0},
1450 
1451 	{PCIE_AER_SUCE_INTERNAL_ERR,	pf_panic,
1452 	    PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1453 
1454 	{0, NULL, 0, 0}
1455 };
1456 
1457 const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = {
1458 	{PCI_STAT_PERROR,	pf_analyse_pci,
1459 	    PF_AFFECTED_SELF, 0},
1460 
1461 	{PCI_STAT_S_PERROR,	pf_analyse_pci,
1462 	    PF_AFFECTED_SELF, 0},
1463 
1464 	{PCI_STAT_S_SYSERR,	pf_panic,
1465 	    PF_AFFECTED_SELF, 0},
1466 
1467 	{PCI_STAT_R_MAST_AB,	pf_analyse_pci,
1468 	    PF_AFFECTED_SELF, 0},
1469 
1470 	{PCI_STAT_R_TARG_AB,	pf_analyse_pci,
1471 	    PF_AFFECTED_SELF, 0},
1472 
1473 	{PCI_STAT_S_TARG_AB,	pf_analyse_pci,
1474 	    PF_AFFECTED_SELF, 0},
1475 
1476 	{0, NULL, 0, 0}
1477 };
1478 
1479 const pf_fab_err_tbl_t pcie_pci_tbl[] = {
1480 	{PCI_STAT_PERROR,	pf_analyse_pci,
1481 	    PF_AFFECTED_SELF, 0},
1482 
1483 	{PCI_STAT_S_PERROR,	pf_analyse_pci,
1484 	    PF_AFFECTED_SELF, 0},
1485 
1486 	{PCI_STAT_S_SYSERR,	pf_panic,
1487 	    PF_AFFECTED_SELF, 0},
1488 
1489 	{PCI_STAT_R_MAST_AB,	pf_analyse_pci,
1490 	    PF_AFFECTED_SELF, 0},
1491 
1492 	{PCI_STAT_R_TARG_AB,	pf_analyse_pci,
1493 	    PF_AFFECTED_SELF, 0},
1494 
1495 	{PCI_STAT_S_TARG_AB,	pf_analyse_pci,
1496 	    PF_AFFECTED_SELF, 0},
1497 
1498 	{0, NULL, 0, 0}
1499 };
1500 
1501 #define	PF_MASKED_AER_ERR(pfd_p) \
1502 	(PCIE_ADV_REG(pfd_p)->pcie_ue_status & \
1503 	    ((PCIE_ADV_REG(pfd_p)->pcie_ue_mask) ^ 0xFFFFFFFF))
1504 #define	PF_MASKED_SAER_ERR(pfd_p) \
1505 	(PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status & \
1506 	    ((PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask) ^ 0xFFFFFFFF))
1507 /*
1508  * Analyse all the PCIe Fault Data (erpt) gathered during dispatch in the erpt
1509  * Queue.
1510  */
1511 static int
pf_analyse_error(ddi_fm_error_t * derr,pf_impl_t * impl)1512 pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl)
1513 {
1514 	int		sts_flags, error_flags = 0;
1515 	pf_data_t	*pfd_p;
1516 
1517 	for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
1518 		sts_flags = 0;
1519 
1520 		/* skip analysing error when no error info is gathered */
1521 		if (pfd_p->pe_severity_flags == PF_ERR_BAD_RESPONSE)
1522 			goto done;
1523 
1524 		switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) {
1525 		case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV:
1526 		case PCIE_PCIECAP_DEV_TYPE_PCI_DEV:
1527 			if (PCIE_DEVSTS_CE_DETECTED &
1528 			    PCIE_ERR_REG(pfd_p)->pcie_err_status)
1529 				sts_flags |= PF_ERR_CE;
1530 
1531 			pf_adjust_for_no_aer(pfd_p);
1532 			sts_flags |= pf_analyse_error_tbl(derr, impl,
1533 			    pfd_p, pcie_pcie_tbl, PF_MASKED_AER_ERR(pfd_p));
1534 			break;
1535 		case PCIE_PCIECAP_DEV_TYPE_ROOT:
1536 			pf_adjust_for_no_aer(pfd_p);
1537 			sts_flags |= pf_analyse_error_tbl(derr, impl,
1538 			    pfd_p, pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p));
1539 			break;
1540 		case PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO:
1541 			/* no adjust_for_aer for pseudo RC */
1542 			/* keep the severity passed on from RC if any */
1543 			sts_flags |= pfd_p->pe_severity_flags;
1544 			sts_flags |= pf_analyse_error_tbl(derr, impl, pfd_p,
1545 			    pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p));
1546 			break;
1547 		case PCIE_PCIECAP_DEV_TYPE_UP:
1548 		case PCIE_PCIECAP_DEV_TYPE_DOWN:
1549 			if (PCIE_DEVSTS_CE_DETECTED &
1550 			    PCIE_ERR_REG(pfd_p)->pcie_err_status)
1551 				sts_flags |= PF_ERR_CE;
1552 
1553 			pf_adjust_for_no_aer(pfd_p);
1554 			sts_flags |= pf_analyse_error_tbl(derr, impl,
1555 			    pfd_p, pcie_sw_tbl, PF_MASKED_AER_ERR(pfd_p));
1556 			break;
1557 		case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI:
1558 			if (PCIE_DEVSTS_CE_DETECTED &
1559 			    PCIE_ERR_REG(pfd_p)->pcie_err_status)
1560 				sts_flags |= PF_ERR_CE;
1561 
1562 			pf_adjust_for_no_aer(pfd_p);
1563 			pf_adjust_for_no_saer(pfd_p);
1564 			sts_flags |= pf_analyse_error_tbl(derr,
1565 			    impl, pfd_p, pcie_pcie_tbl,
1566 			    PF_MASKED_AER_ERR(pfd_p));
1567 			sts_flags |= pf_analyse_error_tbl(derr,
1568 			    impl, pfd_p, pcie_pcie_bdg_tbl,
1569 			    PF_MASKED_SAER_ERR(pfd_p));
1570 			/*
1571 			 * Some non-compliant PCIe devices do not utilize PCIe
1572 			 * error registers.  So fallthrough and rely on legacy
1573 			 * PCI error registers.
1574 			 */
1575 			if ((PCIE_DEVSTS_NFE_DETECTED | PCIE_DEVSTS_FE_DETECTED)
1576 			    & PCIE_ERR_REG(pfd_p)->pcie_err_status)
1577 				break;
1578 			/* FALLTHROUGH */
1579 		case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO:
1580 			sts_flags |= pf_analyse_error_tbl(derr, impl,
1581 			    pfd_p, pcie_pci_tbl,
1582 			    PCI_ERR_REG(pfd_p)->pci_err_status);
1583 
1584 			if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p)))
1585 				break;
1586 
1587 			sts_flags |= pf_analyse_error_tbl(derr,
1588 			    impl, pfd_p, pcie_pci_bdg_tbl,
1589 			    PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat);
1590 		}
1591 
1592 		pfd_p->pe_severity_flags = sts_flags;
1593 
1594 done:
1595 		pfd_p->pe_orig_severity_flags = pfd_p->pe_severity_flags;
1596 		/* Have pciev_eh adjust the severity */
1597 		pfd_p->pe_severity_flags = pciev_eh(pfd_p, impl);
1598 
1599 		pfd_p->pe_severity_flags &= ~pfd_p->pe_severity_mask;
1600 
1601 		error_flags |= pfd_p->pe_severity_flags;
1602 	}
1603 
1604 	return (error_flags);
1605 }
1606 
1607 static int
pf_analyse_error_tbl(ddi_fm_error_t * derr,pf_impl_t * impl,pf_data_t * pfd_p,const pf_fab_err_tbl_t * tbl,uint32_t err_reg)1608 pf_analyse_error_tbl(ddi_fm_error_t *derr, pf_impl_t *impl,
1609     pf_data_t *pfd_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg)
1610 {
1611 	const pf_fab_err_tbl_t *row;
1612 	int err = 0;
1613 	uint16_t flags;
1614 	uint32_t bit;
1615 
1616 	for (row = tbl; err_reg && (row->bit != 0); row++) {
1617 		bit = row->bit;
1618 		if (!(err_reg & bit))
1619 			continue;
1620 		err |= row->handler(derr, bit, impl->pf_dq_head_p, pfd_p);
1621 
1622 		flags = row->affected_flags;
1623 		/*
1624 		 * check if the primary flag is valid;
1625 		 * if not, use the secondary flag
1626 		 */
1627 		if (flags & PF_AFFECTED_AER) {
1628 			if (!HAS_AER_LOGS(pfd_p, bit)) {
1629 				flags = row->sec_affected_flags;
1630 			}
1631 		} else if (flags & PF_AFFECTED_SAER) {
1632 			if (!HAS_SAER_LOGS(pfd_p, bit)) {
1633 				flags = row->sec_affected_flags;
1634 			}
1635 		} else if (flags & PF_AFFECTED_ADDR) {
1636 			/* only Root has this flag */
1637 			if (PCIE_ROOT_FAULT(pfd_p)->scan_addr == 0) {
1638 				flags = row->sec_affected_flags;
1639 			}
1640 		}
1641 
1642 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags |= flags;
1643 	}
1644 
1645 	if (!err)
1646 		err = PF_ERR_NO_ERROR;
1647 
1648 	return (err);
1649 }
1650 
1651 /*
1652  * PCIe Completer Abort and Unsupport Request error analyser.  If a PCIe device
1653  * issues a CA/UR a corresponding Received CA/UR should have been seen in the
1654  * PCIe root complex.  Check to see if RC did indeed receive a CA/UR, if so then
1655  * this error may be safely ignored.  If not check the logs and see if an
1656  * associated handler for this transaction can be found.
1657  */
1658 /* ARGSUSED */
1659 static int
pf_analyse_ca_ur(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1660 pf_analyse_ca_ur(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1661     pf_data_t *pfd_p)
1662 {
1663 	uint32_t	abort_type;
1664 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1665 
1666 	/* If UR's are masked forgive this error */
1667 	if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1668 	    (bit == PCIE_AER_UCE_UR))
1669 		return (PF_ERR_NO_PANIC);
1670 
1671 	/*
1672 	 * If a RP has an CA/UR it means a leaf sent a bad request to the RP
1673 	 * such as a config read or a bad DMA address.
1674 	 */
1675 	if (PCIE_IS_RP(PCIE_PFD2BUS(pfd_p)))
1676 		goto handle_lookup;
1677 
1678 	if (bit == PCIE_AER_UCE_UR)
1679 		abort_type = PCI_STAT_R_MAST_AB;
1680 	else
1681 		abort_type = PCI_STAT_R_TARG_AB;
1682 
1683 	if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type))
1684 		return (PF_ERR_MATCHED_RC);
1685 
1686 handle_lookup:
1687 	if (HAS_AER_LOGS(pfd_p, bit) &&
1688 	    pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == PF_HDL_FOUND)
1689 			return (PF_ERR_MATCHED_DEVICE);
1690 
1691 	return (PF_ERR_PANIC);
1692 }
1693 
1694 /*
1695  * PCIe-PCI Bridge Received Master Abort and Target error analyser.  If a PCIe
1696  * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in
1697  * the PCIe root complex.  Check to see if RC did indeed receive a CA/UR, if so
1698  * then this error may be safely ignored.  If not check the logs and see if an
1699  * associated handler for this transaction can be found.
1700  */
1701 /* ARGSUSED */
1702 static int
pf_analyse_ma_ta(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1703 pf_analyse_ma_ta(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1704     pf_data_t *pfd_p)
1705 {
1706 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1707 	uint32_t	abort_type;
1708 
1709 	/* If UR's are masked forgive this error */
1710 	if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1711 	    (bit == PCIE_AER_SUCE_RCVD_MA))
1712 		return (PF_ERR_NO_PANIC);
1713 
1714 	if (bit == PCIE_AER_SUCE_RCVD_MA)
1715 		abort_type = PCI_STAT_R_MAST_AB;
1716 	else
1717 		abort_type = PCI_STAT_R_TARG_AB;
1718 
1719 	if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type))
1720 		return (PF_ERR_MATCHED_RC);
1721 
1722 	if (!HAS_SAER_LOGS(pfd_p, bit))
1723 		return (PF_ERR_PANIC);
1724 
1725 	if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND)
1726 		return (PF_ERR_MATCHED_DEVICE);
1727 
1728 	return (PF_ERR_PANIC);
1729 }
1730 
1731 /*
1732  * Generic PCI error analyser.  This function is used for Parity Errors,
1733  * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts.
1734  * In general PCI devices do not have error logs, it is very difficult to figure
1735  * out what transaction caused the error.  Instead find the nearest PCIe-PCI
1736  * Bridge and check to see if it has logs and if it has an error associated with
1737  * this PCI Device.
1738  */
1739 /* ARGSUSED */
1740 static int
pf_analyse_pci(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1741 pf_analyse_pci(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1742     pf_data_t *pfd_p)
1743 {
1744 	pf_data_t	*parent_pfd_p;
1745 	uint16_t	cmd;
1746 	uint32_t	aer_ue_status;
1747 	pcie_bus_t	*bus_p = PCIE_PFD2BUS(pfd_p);
1748 	pf_pcie_adv_bdg_err_regs_t *parent_saer_p;
1749 
1750 	if (PCI_ERR_REG(pfd_p)->pci_err_status & PCI_STAT_S_SYSERR)
1751 		return (PF_ERR_PANIC);
1752 
1753 	/* If UR's are masked forgive this error */
1754 	if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1755 	    (bit == PCI_STAT_R_MAST_AB))
1756 		return (PF_ERR_NO_PANIC);
1757 
1758 
1759 	if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) {
1760 		aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT;
1761 	} else {
1762 		aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC |
1763 		    PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA |
1764 		    PCIE_AER_SUCE_RCVD_MA);
1765 	}
1766 
1767 	parent_pfd_p = pf_get_parent_pcie_bridge(pfd_p);
1768 	if (parent_pfd_p == NULL)
1769 		return (PF_ERR_PANIC);
1770 
1771 	/* Check if parent bridge has seen this error */
1772 	parent_saer_p = PCIE_ADV_BDG_REG(parent_pfd_p);
1773 	if (!(parent_saer_p->pcie_sue_status & aer_ue_status) ||
1774 	    !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status))
1775 		return (PF_ERR_PANIC);
1776 
1777 	/*
1778 	 * If the addr or bdf from the parent PCIe bridge logs belong to this
1779 	 * PCI device, assume the PCIe bridge's error handling has already taken
1780 	 * care of this PCI device's error.
1781 	 */
1782 	if (pf_pci_decode(parent_pfd_p, &cmd) != DDI_SUCCESS)
1783 		return (PF_ERR_PANIC);
1784 
1785 	if ((parent_saer_p->pcie_sue_tgt_bdf == bus_p->bus_bdf) ||
1786 	    pf_in_addr_range(bus_p, parent_saer_p->pcie_sue_tgt_addr))
1787 		return (PF_ERR_MATCHED_PARENT);
1788 
1789 	/*
1790 	 * If this device is a PCI-PCI bridge, check if the bdf in the parent
1791 	 * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges.
1792 	 * If they are, then assume the PCIe bridge's error handling has already
1793 	 * taken care of this PCI-PCI bridge device's error.
1794 	 */
1795 	if (PCIE_IS_BDG(bus_p) &&
1796 	    pf_in_bus_range(bus_p, parent_saer_p->pcie_sue_tgt_bdf))
1797 		return (PF_ERR_MATCHED_PARENT);
1798 
1799 	return (PF_ERR_PANIC);
1800 }
1801 
1802 /*
1803  * PCIe Bridge transactions associated with PERR.
1804  * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe
1805  * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe
1806  * o Bridge received a poisoned Completion on a Split Transction from PCIe
1807  * o Bridge received a poisoned Completion on a Delayed Transction from PCIe
1808  *
1809  * Check for non-poisoned PCIe transactions that got forwarded to the secondary
1810  * side and detects a PERR#.  Except for delayed read completions, a poisoned
1811  * TLP will be forwarded to the secondary bus and PERR# will be asserted.
1812  */
1813 /* ARGSUSED */
1814 static int
pf_analyse_perr_assert(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1815 pf_analyse_perr_assert(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1816     pf_data_t *pfd_p)
1817 {
1818 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1819 	uint16_t	cmd;
1820 	int		hdl_sts = PF_HDL_NOTFOUND;
1821 	int		err = PF_ERR_NO_ERROR;
1822 	pf_pcie_adv_bdg_err_regs_t *saer_p;
1823 
1824 
1825 	if (HAS_SAER_LOGS(pfd_p, bit)) {
1826 		saer_p = PCIE_ADV_BDG_REG(pfd_p);
1827 		if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS)
1828 			return (PF_ERR_PANIC);
1829 
1830 cmd_switch:
1831 		switch (cmd) {
1832 		case PCI_PCIX_CMD_IOWR:
1833 		case PCI_PCIX_CMD_MEMWR:
1834 		case PCI_PCIX_CMD_MEMWR_BL:
1835 		case PCI_PCIX_CMD_MEMWRBL:
1836 			/* Posted Writes Transactions */
1837 			if (saer_p->pcie_sue_tgt_trans == PF_ADDR_PIO)
1838 				hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1839 				    B_FALSE);
1840 			break;
1841 		case PCI_PCIX_CMD_CFWR:
1842 			/*
1843 			 * Check to see if it is a non-posted write.  If so, a
1844 			 * UR Completion would have been sent.
1845 			 */
1846 			if (pf_matched_in_rc(dq_head_p, pfd_p,
1847 			    PCI_STAT_R_MAST_AB)) {
1848 				hdl_sts = PF_HDL_FOUND;
1849 				err = PF_ERR_MATCHED_RC;
1850 				goto done;
1851 			}
1852 			hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1853 			    B_FALSE);
1854 			break;
1855 		case PCI_PCIX_CMD_SPL:
1856 			hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1857 			    B_FALSE);
1858 			break;
1859 		case PCI_PCIX_CMD_DADR:
1860 			cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >>
1861 			    PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) &
1862 			    PCIE_AER_SUCE_HDR_CMD_UP_MASK;
1863 			if (cmd != PCI_PCIX_CMD_DADR)
1864 				goto cmd_switch;
1865 			/* FALLTHROUGH */
1866 		default:
1867 			/* Unexpected situation, panic */
1868 			hdl_sts = PF_HDL_NOTFOUND;
1869 		}
1870 
1871 		if (hdl_sts == PF_HDL_FOUND)
1872 			err = PF_ERR_MATCHED_DEVICE;
1873 		else
1874 			err = PF_ERR_PANIC;
1875 	} else {
1876 		/*
1877 		 * Check to see if it is a non-posted write.  If so, a UR
1878 		 * Completion would have been sent.
1879 		 */
1880 		if ((PCIE_ERR_REG(pfd_p)->pcie_err_status &
1881 		    PCIE_DEVSTS_UR_DETECTED) &&
1882 		    pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_R_MAST_AB))
1883 			err = PF_ERR_MATCHED_RC;
1884 
1885 		/* Check for posted writes.  Transaction is lost. */
1886 		if (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat &
1887 		    PCI_STAT_S_PERROR)
1888 			err = PF_ERR_PANIC;
1889 
1890 		/*
1891 		 * All other scenarios are due to read completions.  Check for
1892 		 * PERR on the primary side.  If found the primary side error
1893 		 * handling will take care of this error.
1894 		 */
1895 		if (err == PF_ERR_NO_ERROR) {
1896 			if (PCI_ERR_REG(pfd_p)->pci_err_status &
1897 			    PCI_STAT_PERROR)
1898 				err = PF_ERR_MATCHED_PARENT;
1899 			else
1900 				err = PF_ERR_PANIC;
1901 		}
1902 	}
1903 
1904 done:
1905 	return (err);
1906 }
1907 
1908 /*
1909  * PCIe Poisoned TLP error analyser.  If a PCIe device receives a Poisoned TLP,
1910  * check the logs and see if an associated handler for this transaction can be
1911  * found.
1912  */
1913 /* ARGSUSED */
1914 static int
pf_analyse_ptlp(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1915 pf_analyse_ptlp(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1916     pf_data_t *pfd_p)
1917 {
1918 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1919 
1920 	/*
1921 	 * If AERs are supported find the logs in this device, otherwise look in
1922 	 * it's parent's logs.
1923 	 */
1924 	if (HAS_AER_LOGS(pfd_p, bit)) {
1925 		pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&PCIE_ADV_HDR(pfd_p, 0);
1926 
1927 		/*
1928 		 * Double check that the log contains a poisoned TLP.
1929 		 * Some devices like PLX switch do not log poison TLP headers.
1930 		 */
1931 		if (hdr->ep) {
1932 			if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) ==
1933 			    PF_HDL_FOUND)
1934 				return (PF_ERR_MATCHED_DEVICE);
1935 		}
1936 
1937 		/*
1938 		 * If an address is found and hdl lookup failed panic.
1939 		 * Otherwise check parents to see if there was enough
1940 		 * information recover.
1941 		 */
1942 		if (PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr)
1943 			return (PF_ERR_PANIC);
1944 	}
1945 
1946 	/*
1947 	 * Check to see if the rc has already handled this error or a parent has
1948 	 * already handled this error.
1949 	 *
1950 	 * If the error info in the RC wasn't enough to find the fault device,
1951 	 * such as if the faulting device lies behind a PCIe-PCI bridge from a
1952 	 * poisoned completion, check to see if the PCIe-PCI bridge has enough
1953 	 * info to recover.  For completion TLP's, the AER header logs only
1954 	 * contain the faulting BDF in the Root Port.  For PCIe device the fault
1955 	 * BDF is the fault device.  But if the fault device is behind a
1956 	 * PCIe-PCI bridge the fault BDF could turn out just to be a PCIe-PCI
1957 	 * bridge's secondary bus number.
1958 	 */
1959 	if (!PFD_IS_ROOT(pfd_p)) {
1960 		dev_info_t *pdip = ddi_get_parent(PCIE_PFD2DIP(pfd_p));
1961 		pf_data_t *parent_pfd_p;
1962 
1963 		if (PCIE_PFD2BUS(pfd_p)->bus_rp_dip == pdip) {
1964 			if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR))
1965 				return (PF_ERR_MATCHED_RC);
1966 		}
1967 
1968 		parent_pfd_p = PCIE_DIP2PFD(pdip);
1969 
1970 		if (HAS_AER_LOGS(parent_pfd_p, bit))
1971 			return (PF_ERR_MATCHED_PARENT);
1972 	} else {
1973 		pf_data_t *bdg_pfd_p;
1974 		pcie_req_id_t secbus;
1975 
1976 		/*
1977 		 * Looking for a pcie bridge only makes sense if the BDF
1978 		 * Dev/Func = 0/0
1979 		 */
1980 		if (!PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
1981 			goto done;
1982 
1983 		secbus = PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf;
1984 
1985 		if (!PCIE_CHECK_VALID_BDF(secbus) || (secbus & 0xFF))
1986 			goto done;
1987 
1988 		bdg_pfd_p = pf_get_pcie_bridge(pfd_p, secbus);
1989 
1990 		if (bdg_pfd_p && HAS_SAER_LOGS(bdg_pfd_p,
1991 		    PCIE_AER_SUCE_PERR_ASSERT)) {
1992 			return pf_analyse_perr_assert(derr,
1993 			    PCIE_AER_SUCE_PERR_ASSERT, dq_head_p, pfd_p);
1994 		}
1995 	}
1996 done:
1997 	return (PF_ERR_PANIC);
1998 }
1999 
2000 /*
2001  * PCIe-PCI Bridge Received Master and Target abort error analyser on Split
2002  * Completions.  If a PCIe Bridge receives a MA/TA check logs and see if an
2003  * associated handler for this transaction can be found.
2004  */
2005 /* ARGSUSED */
2006 static int
pf_analyse_sc(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2007 pf_analyse_sc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2008     pf_data_t *pfd_p)
2009 {
2010 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2011 	uint16_t	cmd;
2012 	int		sts = PF_HDL_NOTFOUND;
2013 
2014 	if (!HAS_SAER_LOGS(pfd_p, bit))
2015 		return (PF_ERR_PANIC);
2016 
2017 	if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS)
2018 		return (PF_ERR_PANIC);
2019 
2020 	if (cmd == PCI_PCIX_CMD_SPL)
2021 		sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE);
2022 
2023 	if (sts == PF_HDL_FOUND)
2024 		return (PF_ERR_MATCHED_DEVICE);
2025 
2026 	return (PF_ERR_PANIC);
2027 }
2028 
2029 /*
2030  * PCIe Timeout error analyser.  This error can be forgiven if it is marked as
2031  * CE Advisory.  If it is marked as advisory, this means the HW can recover
2032  * and/or retry the transaction automatically. Additionally, if a device's
2033  * parent slot reports that it is no longer physically present, we do not panic,
2034  * as one would not expect a missing device to respond to a command.
2035  */
2036 /* ARGSUSED */
2037 static int
pf_analyse_to(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2038 pf_analyse_to(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2039     pf_data_t *pfd_p)
2040 {
2041 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2042 	pf_data_t	*rppfd = PCIE_DIP2PFD(rpdip);
2043 	pf_pcie_slot_regs_t	*p_pcie_slot_regs;
2044 
2045 	if (HAS_AER_LOGS(pfd_p, bit) && CE_ADVISORY(pfd_p))
2046 		return (PF_ERR_NO_PANIC);
2047 
2048 	p_pcie_slot_regs = PCIE_SLOT_REG(rppfd);
2049 	if (p_pcie_slot_regs->pcie_slot_regs_valid) {
2050 		/*
2051 		 * If the device is reported gone from its parent slot, then it
2052 		 * is expected that any outstanding commands would time out. In
2053 		 * this case, do not panic.
2054 		 */
2055 		if ((p_pcie_slot_regs->pcie_slot_status &
2056 		    PCIE_SLOTSTS_PRESENCE_DETECTED) == 0x0) {
2057 			return (PF_ERR_NO_PANIC);
2058 		}
2059 	}
2060 
2061 	return (PF_ERR_PANIC);
2062 }
2063 
2064 /*
2065  * PCIe Unexpected Completion.  Check to see if this TLP was misrouted by
2066  * matching the device BDF with the TLP Log.  If misrouting panic, otherwise
2067  * don't panic.
2068  */
2069 /* ARGSUSED */
2070 static int
pf_analyse_uc(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2071 pf_analyse_uc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2072     pf_data_t *pfd_p)
2073 {
2074 	if (HAS_AER_LOGS(pfd_p, bit) &&
2075 	    (PCIE_PFD2BUS(pfd_p)->bus_bdf == (PCIE_ADV_HDR(pfd_p, 2) >> 16)))
2076 		return (PF_ERR_NO_PANIC);
2077 
2078 	/*
2079 	 * This is a case of mis-routing. Any of the switches above this
2080 	 * device could be at fault.
2081 	 */
2082 	PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_ROOT;
2083 
2084 	return (PF_ERR_PANIC);
2085 }
2086 
2087 /*
2088  * PCIe-PCI Bridge Uncorrectable Data error analyser.  All Uncorrectable Data
2089  * errors should have resulted in a PCIe Poisoned TLP to the RC, except for
2090  * Posted Writes.  Check the logs for Posted Writes and if the RC did not see a
2091  * Poisoned TLP.
2092  *
2093  * Non-Posted Writes will also generate a UR in the completion status, which the
2094  * RC should also see.
2095  */
2096 /* ARGSUSED */
2097 static int
pf_analyse_uc_data(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2098 pf_analyse_uc_data(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2099     pf_data_t *pfd_p)
2100 {
2101 	dev_info_t	*rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2102 
2103 	if (!HAS_SAER_LOGS(pfd_p, bit))
2104 		return (PF_ERR_PANIC);
2105 
2106 	if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR))
2107 		return (PF_ERR_MATCHED_RC);
2108 
2109 	if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND)
2110 		return (PF_ERR_MATCHED_DEVICE);
2111 
2112 	return (PF_ERR_PANIC);
2113 }
2114 
2115 /* ARGSUSED */
2116 static int
pf_no_panic(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2117 pf_no_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2118     pf_data_t *pfd_p)
2119 {
2120 	return (PF_ERR_NO_PANIC);
2121 }
2122 
2123 /* ARGSUSED */
2124 static int
pf_panic(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2125 pf_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2126     pf_data_t *pfd_p)
2127 {
2128 	return (PF_ERR_PANIC);
2129 }
2130 
2131 /*
2132  * If a PCIe device does not support AER, assume all AER statuses have been set,
2133  * unless other registers do not indicate a certain error occuring.
2134  */
2135 static void
pf_adjust_for_no_aer(pf_data_t * pfd_p)2136 pf_adjust_for_no_aer(pf_data_t *pfd_p)
2137 {
2138 	uint32_t	aer_ue = 0;
2139 	uint16_t	status;
2140 
2141 	if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
2142 		return;
2143 
2144 	if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED)
2145 		aer_ue = PF_AER_FATAL_ERR;
2146 
2147 	if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) {
2148 		aer_ue = PF_AER_NON_FATAL_ERR;
2149 		status = PCI_ERR_REG(pfd_p)->pci_err_status;
2150 
2151 		/* Check if the device received a PTLP */
2152 		if (!(status & PCI_STAT_PERROR))
2153 			aer_ue &= ~PCIE_AER_UCE_PTLP;
2154 
2155 		/* Check if the device signaled a CA */
2156 		if (!(status & PCI_STAT_S_TARG_AB))
2157 			aer_ue &= ~PCIE_AER_UCE_CA;
2158 
2159 		/* Check if the device sent a UR */
2160 		if (!(PCIE_ERR_REG(pfd_p)->pcie_err_status &
2161 		    PCIE_DEVSTS_UR_DETECTED))
2162 			aer_ue &= ~PCIE_AER_UCE_UR;
2163 
2164 		/*
2165 		 * Ignore ECRCs as it is optional and will manefest itself as
2166 		 * another error like PTLP and MFP
2167 		 */
2168 		aer_ue &= ~PCIE_AER_UCE_ECRC;
2169 
2170 		/*
2171 		 * Generally if NFE is set, SERR should also be set. Exception:
2172 		 * When certain non-fatal errors are masked, and some of them
2173 		 * happened to be the cause of the NFE, SERR will not be set and
2174 		 * they can not be the source of this interrupt.
2175 		 *
2176 		 * On x86, URs are masked (NFE + UR can be set), if any other
2177 		 * non-fatal errors (i.e, PTLP, CTO, CA, UC, ECRC, ACS) did
2178 		 * occur, SERR should be set since they are not masked. So if
2179 		 * SERR is not set, none of them occurred.
2180 		 */
2181 		if (!(status & PCI_STAT_S_SYSERR))
2182 			aer_ue &= ~PCIE_AER_UCE_TO;
2183 	}
2184 
2185 	if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) {
2186 		aer_ue &= ~PCIE_AER_UCE_TRAINING;
2187 		aer_ue &= ~PCIE_AER_UCE_SD;
2188 	}
2189 
2190 	PCIE_ADV_REG(pfd_p)->pcie_ue_status = aer_ue;
2191 }
2192 
2193 static void
pf_adjust_for_no_saer(pf_data_t * pfd_p)2194 pf_adjust_for_no_saer(pf_data_t *pfd_p)
2195 {
2196 	uint32_t	s_aer_ue = 0;
2197 	uint16_t	status;
2198 
2199 	if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
2200 		return;
2201 
2202 	if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED)
2203 		s_aer_ue = PF_SAER_FATAL_ERR;
2204 
2205 	if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) {
2206 		s_aer_ue = PF_SAER_NON_FATAL_ERR;
2207 		status = PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat;
2208 
2209 		/* Check if the device received a UC_DATA */
2210 		if (!(status & PCI_STAT_PERROR))
2211 			s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR;
2212 
2213 		/* Check if the device received a RCVD_MA/MA_ON_SC */
2214 		if (!(status & (PCI_STAT_R_MAST_AB))) {
2215 			s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA;
2216 			s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC;
2217 		}
2218 
2219 		/* Check if the device received a RCVD_TA/TA_ON_SC */
2220 		if (!(status & (PCI_STAT_R_TARG_AB))) {
2221 			s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA;
2222 			s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC;
2223 		}
2224 	}
2225 
2226 	PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status = s_aer_ue;
2227 }
2228 
2229 /* Find the PCIe-PCI bridge based on secondary bus number */
2230 static pf_data_t *
pf_get_pcie_bridge(pf_data_t * pfd_p,pcie_req_id_t secbus)2231 pf_get_pcie_bridge(pf_data_t *pfd_p, pcie_req_id_t secbus)
2232 {
2233 	pf_data_t *bdg_pfd_p;
2234 
2235 	/* Search down for the PCIe-PCI device. */
2236 	for (bdg_pfd_p = pfd_p->pe_next; bdg_pfd_p;
2237 	    bdg_pfd_p = bdg_pfd_p->pe_next) {
2238 		if (PCIE_IS_PCIE_BDG(PCIE_PFD2BUS(bdg_pfd_p)) &&
2239 		    PCIE_PFD2BUS(bdg_pfd_p)->bus_bdg_secbus == secbus)
2240 			return (bdg_pfd_p);
2241 	}
2242 
2243 	return (NULL);
2244 }
2245 
2246 /* Find the PCIe-PCI bridge of a PCI device */
2247 static pf_data_t *
pf_get_parent_pcie_bridge(pf_data_t * pfd_p)2248 pf_get_parent_pcie_bridge(pf_data_t *pfd_p)
2249 {
2250 	dev_info_t	*dip, *rp_dip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2251 
2252 	/* This only makes sense if the device is a PCI device */
2253 	if (!PCIE_IS_PCI(PCIE_PFD2BUS(pfd_p)))
2254 		return (NULL);
2255 
2256 	/*
2257 	 * Search up for the PCIe-PCI device.  Watchout for x86 where pci
2258 	 * devices hang directly off of NPE.
2259 	 */
2260 	for (dip = PCIE_PFD2DIP(pfd_p); dip; dip = ddi_get_parent(dip)) {
2261 		if (dip == rp_dip)
2262 			dip = NULL;
2263 
2264 		if (PCIE_IS_PCIE_BDG(PCIE_DIP2BUS(dip)))
2265 			return (PCIE_DIP2PFD(dip));
2266 	}
2267 
2268 	return (NULL);
2269 }
2270 
2271 /*
2272  * See if a leaf error was bubbled up to the Root Complex (RC) and handled.
2273  * As of right now only RC's have enough information to have errors found in the
2274  * fabric to be matched to the RC.  Note that Root Port's (RP) do not carry
2275  * enough information.  Currently known RC's are SPARC Fire architecture and
2276  * it's equivalents, and x86's NPE.
2277  * SPARC Fire architectures have a plethora of error registers, while currently
2278  * NPE only have the address of a failed load.
2279  *
2280  * Check if the RC logged an error with the appropriate status type/abort type.
2281  * Ex: Parity Error, Received Master/Target Abort
2282  * Check if either the fault address found in the rc matches the device's
2283  * assigned address range (PIO's only) or the fault BDF in the rc matches the
2284  * device's BDF or Secondary Bus/Bus Range.
2285  */
2286 static boolean_t
pf_matched_in_rc(pf_data_t * dq_head_p,pf_data_t * pfd_p,uint32_t abort_type)2287 pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p,
2288     uint32_t abort_type)
2289 {
2290 	pcie_bus_t	*bus_p = PCIE_PFD2BUS(pfd_p);
2291 	pf_data_t	*rc_pfd_p;
2292 	pcie_req_id_t	fault_bdf;
2293 
2294 	for (rc_pfd_p = dq_head_p; PFD_IS_ROOT(rc_pfd_p);
2295 	    rc_pfd_p = rc_pfd_p->pe_next) {
2296 		/* Only root complex's have enough information to match */
2297 		if (!PCIE_IS_RC(PCIE_PFD2BUS(rc_pfd_p)))
2298 			continue;
2299 
2300 		/* If device and rc abort type does not match continue */
2301 		if (!(PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & abort_type))
2302 			continue;
2303 
2304 		fault_bdf = PCIE_ROOT_FAULT(rc_pfd_p)->scan_bdf;
2305 
2306 		/* The Fault BDF = Device's BDF */
2307 		if (fault_bdf == bus_p->bus_bdf)
2308 			return (B_TRUE);
2309 
2310 		/* The Fault Addr is in device's address range */
2311 		if (pf_in_addr_range(bus_p,
2312 		    PCIE_ROOT_FAULT(rc_pfd_p)->scan_addr))
2313 			return (B_TRUE);
2314 
2315 		/* The Fault BDF is from PCIe-PCI Bridge's secondary bus */
2316 		if (PCIE_IS_PCIE_BDG(bus_p) &&
2317 		    pf_in_bus_range(bus_p, fault_bdf))
2318 			return (B_TRUE);
2319 	}
2320 
2321 	return (B_FALSE);
2322 }
2323 
2324 /*
2325  * Check the RP and see if the error is PIO/DMA.  If the RP also has a PERR then
2326  * it is a DMA, otherwise it's a PIO
2327  */
2328 static void
pf_pci_find_trans_type(pf_data_t * pfd_p,uint64_t * addr,uint32_t * trans_type,pcie_req_id_t * bdf)2329 pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type,
2330     pcie_req_id_t *bdf)
2331 {
2332 	pf_data_t *rc_pfd_p;
2333 
2334 	/* Could be DMA or PIO.  Find out by look at error type. */
2335 	switch (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status) {
2336 	case PCIE_AER_SUCE_TA_ON_SC:
2337 	case PCIE_AER_SUCE_MA_ON_SC:
2338 		*trans_type = PF_ADDR_DMA;
2339 		return;
2340 	case PCIE_AER_SUCE_RCVD_TA:
2341 	case PCIE_AER_SUCE_RCVD_MA:
2342 		*bdf = PCIE_INVALID_BDF;
2343 		*trans_type = PF_ADDR_PIO;
2344 		return;
2345 	case PCIE_AER_SUCE_USC_ERR:
2346 	case PCIE_AER_SUCE_UC_DATA_ERR:
2347 	case PCIE_AER_SUCE_PERR_ASSERT:
2348 		break;
2349 	default:
2350 		*addr = 0;
2351 		*bdf = PCIE_INVALID_BDF;
2352 		*trans_type = 0;
2353 		return;
2354 	}
2355 
2356 	*bdf = PCIE_INVALID_BDF;
2357 	*trans_type = PF_ADDR_PIO;
2358 	for (rc_pfd_p = pfd_p->pe_prev; rc_pfd_p;
2359 	    rc_pfd_p = rc_pfd_p->pe_prev) {
2360 		if (PFD_IS_ROOT(rc_pfd_p) &&
2361 		    (PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat &
2362 		    PCI_STAT_PERROR)) {
2363 			*trans_type = PF_ADDR_DMA;
2364 			return;
2365 		}
2366 	}
2367 }
2368 
2369 /*
2370  * pf_pci_decode function decodes the secondary aer transaction logs in
2371  * PCIe-PCI bridges.
2372  *
2373  * The log is 128 bits long and arranged in this manner.
2374  * [0:35]   Transaction Attribute	(s_aer_h0-saer_h1)
2375  * [36:39]  Transaction lower command	(saer_h1)
2376  * [40:43]  Transaction upper command	(saer_h1)
2377  * [44:63]  Reserved
2378  * [64:127] Address			(saer_h2-saer_h3)
2379  */
2380 /* ARGSUSED */
2381 int
pf_pci_decode(pf_data_t * pfd_p,uint16_t * cmd)2382 pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd)
2383 {
2384 	pcix_attr_t	*attr;
2385 	uint64_t	addr;
2386 	uint32_t	trans_type;
2387 	pcie_req_id_t	bdf = PCIE_INVALID_BDF;
2388 
2389 	attr = (pcix_attr_t *)&PCIE_ADV_BDG_HDR(pfd_p, 0);
2390 	*cmd = GET_SAER_CMD(pfd_p);
2391 
2392 cmd_switch:
2393 	switch (*cmd) {
2394 	case PCI_PCIX_CMD_IORD:
2395 	case PCI_PCIX_CMD_IOWR:
2396 		/* IO Access should always be down stream */
2397 		addr = PCIE_ADV_BDG_HDR(pfd_p, 2);
2398 		bdf = attr->rid;
2399 		trans_type = PF_ADDR_PIO;
2400 		break;
2401 	case PCI_PCIX_CMD_MEMRD_DW:
2402 	case PCI_PCIX_CMD_MEMRD_BL:
2403 	case PCI_PCIX_CMD_MEMRDBL:
2404 	case PCI_PCIX_CMD_MEMWR:
2405 	case PCI_PCIX_CMD_MEMWR_BL:
2406 	case PCI_PCIX_CMD_MEMWRBL:
2407 		addr = ((uint64_t)PCIE_ADV_BDG_HDR(pfd_p, 3) <<
2408 		    PCIE_AER_SUCE_HDR_ADDR_SHIFT) | PCIE_ADV_BDG_HDR(pfd_p, 2);
2409 		bdf = attr->rid;
2410 
2411 		pf_pci_find_trans_type(pfd_p, &addr, &trans_type, &bdf);
2412 		break;
2413 	case PCI_PCIX_CMD_CFRD:
2414 	case PCI_PCIX_CMD_CFWR:
2415 		/*
2416 		 * CFG Access should always be down stream.  Match the BDF in
2417 		 * the address phase.
2418 		 */
2419 		addr = 0;
2420 		bdf = attr->rid;
2421 		trans_type = PF_ADDR_CFG;
2422 		break;
2423 	case PCI_PCIX_CMD_SPL:
2424 		/*
2425 		 * Check for DMA read completions.  The requesting BDF is in the
2426 		 * Address phase.
2427 		 */
2428 		addr = 0;
2429 		bdf = attr->rid;
2430 		trans_type = PF_ADDR_DMA;
2431 		break;
2432 	case PCI_PCIX_CMD_DADR:
2433 		/*
2434 		 * For Dual Address Cycles the transaction command is in the 2nd
2435 		 * address phase.
2436 		 */
2437 		*cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >>
2438 		    PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) &
2439 		    PCIE_AER_SUCE_HDR_CMD_UP_MASK;
2440 		if (*cmd != PCI_PCIX_CMD_DADR)
2441 			goto cmd_switch;
2442 		/* FALLTHROUGH */
2443 	default:
2444 		PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0;
2445 		PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = PCIE_INVALID_BDF;
2446 		PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0;
2447 		return (DDI_FAILURE);
2448 	}
2449 	PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = trans_type;
2450 	PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = bdf;
2451 	PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = addr;
2452 	return (DDI_SUCCESS);
2453 }
2454 
2455 /*
2456  * Based on either the BDF/ADDR find and mark the faulting DMA/ACC handler.
2457  * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
2458  */
2459 int
pf_hdl_lookup(dev_info_t * dip,uint64_t ena,uint32_t flag,uint64_t addr,pcie_req_id_t bdf)2460 pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint64_t addr,
2461     pcie_req_id_t bdf)
2462 {
2463 	ddi_fm_error_t		derr;
2464 
2465 	/* If we don't know the addr or rid just return with NOTFOUND */
2466 	if ((addr == 0) && !PCIE_CHECK_VALID_BDF(bdf))
2467 		return (PF_HDL_NOTFOUND);
2468 
2469 	/*
2470 	 * Disable DMA handle lookup until DMA errors can be handled and
2471 	 * reported synchronously.  When enabled again, check for the
2472 	 * PF_ADDR_DMA flag
2473 	 */
2474 	if (!(flag & (PF_ADDR_PIO | PF_ADDR_CFG))) {
2475 		return (PF_HDL_NOTFOUND);
2476 	}
2477 
2478 	bzero(&derr, sizeof (ddi_fm_error_t));
2479 	derr.fme_version = DDI_FME_VERSION;
2480 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
2481 	derr.fme_ena = ena;
2482 
2483 	return (pf_hdl_child_lookup(dip, &derr, flag, addr, bdf));
2484 }
2485 
2486 static int
pf_hdl_child_lookup(dev_info_t * dip,ddi_fm_error_t * derr,uint32_t flag,uint64_t addr,pcie_req_id_t bdf)2487 pf_hdl_child_lookup(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
2488     uint64_t addr, pcie_req_id_t bdf)
2489 {
2490 	int			status = PF_HDL_NOTFOUND;
2491 	ndi_fmc_t		*fcp = NULL;
2492 	struct i_ddi_fmhdl	*fmhdl = DEVI(dip)->devi_fmhdl;
2493 	pcie_req_id_t		dip_bdf;
2494 	boolean_t		have_lock = B_FALSE;
2495 	pcie_bus_t		*bus_p;
2496 	dev_info_t		*cdip;
2497 
2498 	if (!(bus_p = pf_is_ready(dip))) {
2499 		return (status);
2500 	}
2501 
2502 	ASSERT(fmhdl);
2503 	if (!i_ddi_fm_handler_owned(dip)) {
2504 		/*
2505 		 * pf_handler_enter always returns SUCCESS if the 'impl' arg is
2506 		 * NULL.
2507 		 */
2508 		(void) pf_handler_enter(dip, NULL);
2509 		have_lock = B_TRUE;
2510 	}
2511 
2512 	dip_bdf = PCI_GET_BDF(dip);
2513 
2514 	/* Check if dip and BDF match, if not recurse to it's children. */
2515 	if (!PCIE_IS_RC(bus_p) && (!PCIE_CHECK_VALID_BDF(bdf) ||
2516 	    dip_bdf == bdf)) {
2517 		if ((flag & PF_ADDR_DMA) && DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap))
2518 			fcp = fmhdl->fh_dma_cache;
2519 		else
2520 			fcp = NULL;
2521 
2522 		if (fcp)
2523 			status = pf_hdl_compare(dip, derr, DMA_HANDLE, addr,
2524 			    bdf, fcp);
2525 
2526 
2527 		if (((flag & PF_ADDR_PIO) || (flag & PF_ADDR_CFG)) &&
2528 		    DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap))
2529 			fcp = fmhdl->fh_acc_cache;
2530 		else
2531 			fcp = NULL;
2532 
2533 		if (fcp)
2534 			status = pf_hdl_compare(dip, derr, ACC_HANDLE, addr,
2535 			    bdf, fcp);
2536 	}
2537 
2538 	/* If we found the handler or know it's this device, we're done */
2539 	if (!PCIE_IS_RC(bus_p) && ((dip_bdf == bdf) ||
2540 	    (status == PF_HDL_FOUND)))
2541 		goto done;
2542 
2543 	/*
2544 	 * If the current devuce us a PCIe-PCI bridge need to check for special
2545 	 * cases:
2546 	 *
2547 	 * If it is a PIO and we don't have an address or this is a DMA, check
2548 	 * to see if the BDF = secondary bus.  If so stop.  The BDF isn't a real
2549 	 * BDF and the fault device could have come from any device in the PCI
2550 	 * bus.
2551 	 */
2552 	if (PCIE_IS_PCIE_BDG(bus_p) &&
2553 	    ((flag & PF_ADDR_DMA || flag & PF_ADDR_PIO)) &&
2554 	    ((bus_p->bus_bdg_secbus << PCIE_REQ_ID_BUS_SHIFT) == bdf))
2555 		goto done;
2556 
2557 
2558 	/* If we can't find the handler check it's children */
2559 	for (cdip = ddi_get_child(dip); cdip;
2560 	    cdip = ddi_get_next_sibling(cdip)) {
2561 		if ((bus_p = PCIE_DIP2BUS(cdip)) == NULL)
2562 			continue;
2563 
2564 		if (pf_in_bus_range(bus_p, bdf) ||
2565 		    pf_in_addr_range(bus_p, addr))
2566 			status = pf_hdl_child_lookup(cdip, derr, flag, addr,
2567 			    bdf);
2568 
2569 		if (status == PF_HDL_FOUND)
2570 			goto done;
2571 	}
2572 
2573 done:
2574 	if (have_lock == B_TRUE)
2575 		pf_handler_exit(dip);
2576 
2577 	return (status);
2578 }
2579 
2580 static int
pf_hdl_compare(dev_info_t * dip,ddi_fm_error_t * derr,uint32_t flag,uint64_t addr,pcie_req_id_t bdf,ndi_fmc_t * fcp)2581 pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
2582     uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp)
2583 {
2584 	ndi_fmcentry_t	*fep;
2585 	int		found = 0;
2586 	int		status;
2587 
2588 	mutex_enter(&fcp->fc_lock);
2589 	for (fep = fcp->fc_head; fep != NULL; fep = fep->fce_next) {
2590 		ddi_fmcompare_t compare_func;
2591 
2592 		/*
2593 		 * Compare captured error state with handle
2594 		 * resources.  During the comparison and
2595 		 * subsequent error handling, we block
2596 		 * attempts to free the cache entry.
2597 		 */
2598 		if (flag == ACC_HANDLE) {
2599 			compare_func =
2600 			    i_ddi_fm_acc_err_cf_get((ddi_acc_handle_t)
2601 			    fep->fce_resource);
2602 		} else {
2603 			compare_func =
2604 			    i_ddi_fm_dma_err_cf_get((ddi_dma_handle_t)
2605 			    fep->fce_resource);
2606 		}
2607 
2608 		if (compare_func == NULL) /* unbound or not FLAGERR */
2609 			continue;
2610 
2611 		status = compare_func(dip, fep->fce_resource,
2612 		    (void *)&addr, (void *)&bdf);
2613 
2614 		if (status == DDI_FM_NONFATAL) {
2615 			found++;
2616 
2617 			/* Set the error for this resource handle */
2618 			if (flag == ACC_HANDLE) {
2619 				ddi_acc_handle_t ap = fep->fce_resource;
2620 
2621 				i_ddi_fm_acc_err_set(ap, derr->fme_ena, status,
2622 				    DDI_FM_ERR_UNEXPECTED);
2623 				ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION);
2624 				derr->fme_acc_handle = ap;
2625 			} else {
2626 				ddi_dma_handle_t dp = fep->fce_resource;
2627 
2628 				i_ddi_fm_dma_err_set(dp, derr->fme_ena, status,
2629 				    DDI_FM_ERR_UNEXPECTED);
2630 				ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION);
2631 				derr->fme_dma_handle = dp;
2632 			}
2633 		}
2634 	}
2635 	mutex_exit(&fcp->fc_lock);
2636 
2637 	/*
2638 	 * If a handler isn't found and we know this is the right device mark
2639 	 * them all failed.
2640 	 */
2641 	if ((addr != 0) && PCIE_CHECK_VALID_BDF(bdf) && (found == 0)) {
2642 		status = pf_hdl_compare(dip, derr, flag, addr, bdf, fcp);
2643 		if (status == PF_HDL_FOUND)
2644 			found++;
2645 	}
2646 
2647 	return ((found) ? PF_HDL_FOUND : PF_HDL_NOTFOUND);
2648 }
2649 
2650 /*
2651  * Automatically decode AER header logs and does a handling look up based on the
2652  * AER header decoding.
2653  *
2654  * For this function only the Primary/Secondary AER Header Logs need to be valid
2655  * in the pfd (PCIe Fault Data) arg.
2656  *
2657  * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
2658  */
2659 /* ARGSUSED */
2660 static int
pf_log_hdl_lookup(dev_info_t * rpdip,ddi_fm_error_t * derr,pf_data_t * pfd_p,boolean_t is_primary)2661 pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p,
2662     boolean_t is_primary)
2663 {
2664 	/*
2665 	 * Disabling this function temporarily until errors can be handled
2666 	 * synchronously.
2667 	 *
2668 	 * This function is currently only called during the middle of a fabric
2669 	 * scan.  If the fabric scan is called synchronously with an error seen
2670 	 * in the RP/RC, then the related errors in the fabric will have a
2671 	 * PF_ERR_MATCHED_RC error severity.  pf_log_hdl_lookup code will be by
2672 	 * passed when the severity is PF_ERR_MATCHED_RC.  Handle lookup would
2673 	 * have already happened in RP/RC error handling in a synchronous
2674 	 * manner.  Errors unrelated should panic, because they are being
2675 	 * handled asynchronously.
2676 	 *
2677 	 * If fabric scan is called asynchronously from any RP/RC error, then
2678 	 * DMA/PIO UE errors seen in the fabric should panic.  pf_lop_hdl_lookup
2679 	 * will return PF_HDL_NOTFOUND to ensure that the system panics.
2680 	 */
2681 	return (PF_HDL_NOTFOUND);
2682 }
2683 
2684 /*
2685  * Decodes the TLP and returns the BDF of the handler, address and transaction
2686  * type if known.
2687  *
2688  * Types of TLP logs seen in RC, and what to extract:
2689  *
2690  * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
2691  * Memory(PIO) - address, PF_PIO_ADDR
2692  * CFG - Should not occur and result in UR
2693  * Completion(DMA) - Requester BDF, PF_DMA_ADDR
2694  * Completion(PIO) - Requester BDF, PF_PIO_ADDR
2695  *
2696  * Types of TLP logs seen in SW/Leaf, and what to extract:
2697  *
2698  * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
2699  * Memory(PIO) - address, PF_PIO_ADDR
2700  * CFG - Destined BDF, address, PF_CFG_ADDR
2701  * Completion(DMA) - Requester BDF, PF_DMA_ADDR
2702  * Completion(PIO) - Requester BDF, PF_PIO_ADDR
2703  *
2704  * The adv_reg_p must be passed in separately for use with SPARC RPs.  A
2705  * SPARC RP could have multiple AER header logs which cannot be directly
2706  * accessed via the bus_p.
2707  */
2708 int
pf_tlp_decode(pcie_bus_t * bus_p,pf_pcie_adv_err_regs_t * adv_reg_p)2709 pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p)
2710 {
2711 	pcie_tlp_hdr_t	*tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr;
2712 	pcie_req_id_t	my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF;
2713 	uint64_t	flt_addr = 0;
2714 	uint32_t	flt_trans_type = 0;
2715 
2716 	adv_reg_p->pcie_ue_tgt_addr = 0;
2717 	adv_reg_p->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
2718 	adv_reg_p->pcie_ue_tgt_trans = 0;
2719 
2720 	my_bdf = bus_p->bus_bdf;
2721 	switch (tlp_hdr->type) {
2722 	case PCIE_TLP_TYPE_IO:
2723 	case PCIE_TLP_TYPE_MEM:
2724 	case PCIE_TLP_TYPE_MEMLK:
2725 		/* Grab the 32/64bit fault address */
2726 		if (tlp_hdr->fmt & 0x1) {
2727 			flt_addr = ((uint64_t)adv_reg_p->pcie_ue_hdr[2] << 32);
2728 			flt_addr |= adv_reg_p->pcie_ue_hdr[3];
2729 		} else {
2730 			flt_addr = adv_reg_p->pcie_ue_hdr[2];
2731 		}
2732 
2733 		tlp_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[1] >> 16);
2734 
2735 		/*
2736 		 * If the req bdf >= this.bdf, then it means the request is this
2737 		 * device or came from a device below it.  Unless this device is
2738 		 * a PCIe root port then it means is a DMA, otherwise PIO.
2739 		 */
2740 		if ((tlp_bdf >= my_bdf) && !PCIE_IS_ROOT(bus_p)) {
2741 			flt_trans_type = PF_ADDR_DMA;
2742 			flt_bdf = tlp_bdf;
2743 		} else if (PCIE_IS_ROOT(bus_p) &&
2744 		    (PF_FIRST_AER_ERR(PCIE_AER_UCE_PTLP, adv_reg_p) ||
2745 		    (PF_FIRST_AER_ERR(PCIE_AER_UCE_CA, adv_reg_p)))) {
2746 			flt_trans_type = PF_ADDR_DMA;
2747 			flt_bdf = tlp_bdf;
2748 		} else {
2749 			flt_trans_type = PF_ADDR_PIO;
2750 			flt_bdf = PCIE_INVALID_BDF;
2751 		}
2752 		break;
2753 	case PCIE_TLP_TYPE_CFG0:
2754 	case PCIE_TLP_TYPE_CFG1:
2755 		flt_addr = 0;
2756 		flt_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[2] >> 16);
2757 		flt_trans_type = PF_ADDR_CFG;
2758 		break;
2759 	case PCIE_TLP_TYPE_CPL:
2760 	case PCIE_TLP_TYPE_CPLLK:
2761 	{
2762 		pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)&adv_reg_p->pcie_ue_hdr[1];
2763 
2764 		flt_addr = 0;
2765 		flt_bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid :
2766 		    cpl_tlp->cid;
2767 
2768 		/*
2769 		 * If the cpl bdf < this.bdf, then it means the request is this
2770 		 * device or came from a device below it.  Unless this device is
2771 		 * a PCIe root port then it means is a DMA, otherwise PIO.
2772 		 */
2773 		if (cpl_tlp->rid > cpl_tlp->cid) {
2774 			flt_trans_type = PF_ADDR_DMA;
2775 		} else {
2776 			flt_trans_type = PF_ADDR_PIO | PF_ADDR_CFG;
2777 		}
2778 		break;
2779 	}
2780 	default:
2781 		return (DDI_FAILURE);
2782 	}
2783 
2784 	adv_reg_p->pcie_ue_tgt_addr = flt_addr;
2785 	adv_reg_p->pcie_ue_tgt_bdf = flt_bdf;
2786 	adv_reg_p->pcie_ue_tgt_trans = flt_trans_type;
2787 
2788 	return (DDI_SUCCESS);
2789 }
2790 
2791 #define	PCIE_EREPORT	DDI_IO_CLASS "." PCI_ERROR_SUBCLASS "." PCIEX_FABRIC
2792 static int
pf_ereport_setup(dev_info_t * dip,uint64_t ena,nvlist_t ** ereport,nvlist_t ** detector,errorq_elem_t ** eqep)2793 pf_ereport_setup(dev_info_t *dip, uint64_t ena, nvlist_t **ereport,
2794     nvlist_t **detector, errorq_elem_t **eqep)
2795 {
2796 	struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2797 	char device_path[MAXPATHLEN];
2798 	nv_alloc_t *nva;
2799 
2800 	*eqep = errorq_reserve(fmhdl->fh_errorq);
2801 	if (*eqep == NULL) {
2802 		atomic_inc_64(&fmhdl->fh_kstat.fek_erpt_dropped.value.ui64);
2803 		return (DDI_FAILURE);
2804 	}
2805 
2806 	*ereport = errorq_elem_nvl(fmhdl->fh_errorq, *eqep);
2807 	nva = errorq_elem_nva(fmhdl->fh_errorq, *eqep);
2808 
2809 	ASSERT(*ereport);
2810 	ASSERT(nva);
2811 
2812 	/*
2813 	 * Use the dev_path/devid for this device instance.
2814 	 */
2815 	*detector = fm_nvlist_create(nva);
2816 	if (dip == ddi_root_node()) {
2817 		device_path[0] = '/';
2818 		device_path[1] = '\0';
2819 	} else {
2820 		(void) ddi_pathname(dip, device_path);
2821 	}
2822 
2823 	fm_fmri_dev_set(*detector, FM_DEV_SCHEME_VERSION, NULL,
2824 	    device_path, NULL, NULL);
2825 
2826 	if (ena == 0)
2827 		ena = fm_ena_generate(0, FM_ENA_FMT1);
2828 
2829 	fm_ereport_set(*ereport, 0, PCIE_EREPORT, ena, *detector, NULL);
2830 
2831 	return (DDI_SUCCESS);
2832 }
2833 
2834 /* ARGSUSED */
2835 static void
pf_ereport_post(dev_info_t * dip,nvlist_t ** ereport,nvlist_t ** detector,errorq_elem_t ** eqep)2836 pf_ereport_post(dev_info_t *dip, nvlist_t **ereport, nvlist_t **detector,
2837     errorq_elem_t **eqep)
2838 {
2839 	struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2840 
2841 	errorq_commit(fmhdl->fh_errorq, *eqep, ERRORQ_ASYNC);
2842 }
2843 
2844 static void
pf_send_ereport(ddi_fm_error_t * derr,pf_impl_t * impl)2845 pf_send_ereport(ddi_fm_error_t *derr, pf_impl_t *impl)
2846 {
2847 	nvlist_t	*ereport;
2848 	nvlist_t	*detector;
2849 	errorq_elem_t	*eqep;
2850 	pcie_bus_t	*bus_p;
2851 	pf_data_t	*pfd_p;
2852 	uint32_t	total = impl->pf_total;
2853 
2854 	/*
2855 	 * Ereports need to be sent in a top down fashion. The fabric translator
2856 	 * expects the ereports from the Root first. This is needed to tell if
2857 	 * the system contains a PCIe complaint RC/RP.
2858 	 */
2859 	for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
2860 		bus_p = PCIE_PFD2BUS(pfd_p);
2861 		pfd_p->pe_valid = B_FALSE;
2862 
2863 		if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED ||
2864 		    !DDI_FM_EREPORT_CAP(ddi_fm_capable(PCIE_PFD2DIP(pfd_p))))
2865 			continue;
2866 
2867 		if (pf_ereport_setup(PCIE_BUS2DIP(bus_p), derr->fme_ena,
2868 		    &ereport, &detector, &eqep) != DDI_SUCCESS)
2869 			continue;
2870 
2871 		if (PFD_IS_RC(pfd_p)) {
2872 			fm_payload_set(ereport,
2873 			    "scan_bdf", DATA_TYPE_UINT16,
2874 			    PCIE_ROOT_FAULT(pfd_p)->scan_bdf,
2875 			    "scan_addr", DATA_TYPE_UINT64,
2876 			    PCIE_ROOT_FAULT(pfd_p)->scan_addr,
2877 			    "intr_src", DATA_TYPE_UINT16,
2878 			    PCIE_ROOT_EH_SRC(pfd_p)->intr_type,
2879 			    NULL);
2880 			goto generic;
2881 		}
2882 
2883 		/* Generic PCI device information */
2884 		fm_payload_set(ereport,
2885 		    "bdf", DATA_TYPE_UINT16, bus_p->bus_bdf,
2886 		    "device_id", DATA_TYPE_UINT16,
2887 		    (bus_p->bus_dev_ven_id >> 16),
2888 		    "vendor_id", DATA_TYPE_UINT16,
2889 		    (bus_p->bus_dev_ven_id & 0xFFFF),
2890 		    "rev_id", DATA_TYPE_UINT8, bus_p->bus_rev_id,
2891 		    "dev_type", DATA_TYPE_UINT16, bus_p->bus_dev_type,
2892 		    "pcie_off", DATA_TYPE_UINT16, bus_p->bus_pcie_off,
2893 		    "pcix_off", DATA_TYPE_UINT16, bus_p->bus_pcix_off,
2894 		    "aer_off", DATA_TYPE_UINT16, bus_p->bus_aer_off,
2895 		    "ecc_ver", DATA_TYPE_UINT16, bus_p->bus_ecc_ver,
2896 		    NULL);
2897 
2898 		/* PCI registers */
2899 		fm_payload_set(ereport,
2900 		    "pci_status", DATA_TYPE_UINT16,
2901 		    PCI_ERR_REG(pfd_p)->pci_err_status,
2902 		    "pci_command", DATA_TYPE_UINT16,
2903 		    PCI_ERR_REG(pfd_p)->pci_cfg_comm,
2904 		    NULL);
2905 
2906 		/* PCI bridge registers */
2907 		if (PCIE_IS_BDG(bus_p)) {
2908 			fm_payload_set(ereport,
2909 			    "pci_bdg_sec_status", DATA_TYPE_UINT16,
2910 			    PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat,
2911 			    "pci_bdg_ctrl", DATA_TYPE_UINT16,
2912 			    PCI_BDG_ERR_REG(pfd_p)->pci_bdg_ctrl,
2913 			    NULL);
2914 		}
2915 
2916 		/* PCIx registers */
2917 		if (PCIE_IS_PCIX(bus_p) && !PCIE_IS_BDG(bus_p)) {
2918 			fm_payload_set(ereport,
2919 			    "pcix_status", DATA_TYPE_UINT32,
2920 			    PCIX_ERR_REG(pfd_p)->pcix_status,
2921 			    "pcix_command", DATA_TYPE_UINT16,
2922 			    PCIX_ERR_REG(pfd_p)->pcix_command,
2923 			    NULL);
2924 		}
2925 
2926 		/* PCIx ECC Registers */
2927 		if (PCIX_ECC_VERSION_CHECK(bus_p)) {
2928 			pf_pcix_ecc_regs_t *ecc_bdg_reg;
2929 			pf_pcix_ecc_regs_t *ecc_reg;
2930 
2931 			if (PCIE_IS_BDG(bus_p))
2932 				ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 0);
2933 			ecc_reg = PCIX_ECC_REG(pfd_p);
2934 			fm_payload_set(ereport,
2935 			    "pcix_ecc_control_0", DATA_TYPE_UINT16,
2936 			    PCIE_IS_BDG(bus_p) ?
2937 			    (ecc_bdg_reg->pcix_ecc_ctlstat >> 16) :
2938 			    (ecc_reg->pcix_ecc_ctlstat >> 16),
2939 			    "pcix_ecc_status_0", DATA_TYPE_UINT16,
2940 			    PCIE_IS_BDG(bus_p) ?
2941 			    (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF) :
2942 			    (ecc_reg->pcix_ecc_ctlstat & 0xFFFF),
2943 			    "pcix_ecc_fst_addr_0", DATA_TYPE_UINT32,
2944 			    PCIE_IS_BDG(bus_p) ?
2945 			    ecc_bdg_reg->pcix_ecc_fstaddr :
2946 			    ecc_reg->pcix_ecc_fstaddr,
2947 			    "pcix_ecc_sec_addr_0", DATA_TYPE_UINT32,
2948 			    PCIE_IS_BDG(bus_p) ?
2949 			    ecc_bdg_reg->pcix_ecc_secaddr :
2950 			    ecc_reg->pcix_ecc_secaddr,
2951 			    "pcix_ecc_attr_0", DATA_TYPE_UINT32,
2952 			    PCIE_IS_BDG(bus_p) ?
2953 			    ecc_bdg_reg->pcix_ecc_attr :
2954 			    ecc_reg->pcix_ecc_attr,
2955 			    NULL);
2956 		}
2957 
2958 		/* PCIx ECC Bridge Registers */
2959 		if (PCIX_ECC_VERSION_CHECK(bus_p) && PCIE_IS_BDG(bus_p)) {
2960 			pf_pcix_ecc_regs_t *ecc_bdg_reg;
2961 
2962 			ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 1);
2963 			fm_payload_set(ereport,
2964 			    "pcix_ecc_control_1", DATA_TYPE_UINT16,
2965 			    (ecc_bdg_reg->pcix_ecc_ctlstat >> 16),
2966 			    "pcix_ecc_status_1", DATA_TYPE_UINT16,
2967 			    (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF),
2968 			    "pcix_ecc_fst_addr_1", DATA_TYPE_UINT32,
2969 			    ecc_bdg_reg->pcix_ecc_fstaddr,
2970 			    "pcix_ecc_sec_addr_1", DATA_TYPE_UINT32,
2971 			    ecc_bdg_reg->pcix_ecc_secaddr,
2972 			    "pcix_ecc_attr_1", DATA_TYPE_UINT32,
2973 			    ecc_bdg_reg->pcix_ecc_attr,
2974 			    NULL);
2975 		}
2976 
2977 		/* PCIx Bridge */
2978 		if (PCIE_IS_PCIX(bus_p) && PCIE_IS_BDG(bus_p)) {
2979 			fm_payload_set(ereport,
2980 			    "pcix_bdg_status", DATA_TYPE_UINT32,
2981 			    PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat,
2982 			    "pcix_bdg_sec_status", DATA_TYPE_UINT16,
2983 			    PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat,
2984 			    NULL);
2985 		}
2986 
2987 		/* PCIe registers */
2988 		if (PCIE_IS_PCIE(bus_p)) {
2989 			fm_payload_set(ereport,
2990 			    "pcie_status", DATA_TYPE_UINT16,
2991 			    PCIE_ERR_REG(pfd_p)->pcie_err_status,
2992 			    "pcie_command", DATA_TYPE_UINT16,
2993 			    PCIE_ERR_REG(pfd_p)->pcie_err_ctl,
2994 			    "pcie_dev_cap", DATA_TYPE_UINT32,
2995 			    PCIE_ERR_REG(pfd_p)->pcie_dev_cap,
2996 			    NULL);
2997 		}
2998 
2999 		/* PCIe AER registers */
3000 		if (PCIE_HAS_AER(bus_p)) {
3001 			fm_payload_set(ereport,
3002 			    "pcie_adv_ctl", DATA_TYPE_UINT32,
3003 			    PCIE_ADV_REG(pfd_p)->pcie_adv_ctl,
3004 			    "pcie_ue_status", DATA_TYPE_UINT32,
3005 			    PCIE_ADV_REG(pfd_p)->pcie_ue_status,
3006 			    "pcie_ue_mask", DATA_TYPE_UINT32,
3007 			    PCIE_ADV_REG(pfd_p)->pcie_ue_mask,
3008 			    "pcie_ue_sev", DATA_TYPE_UINT32,
3009 			    PCIE_ADV_REG(pfd_p)->pcie_ue_sev,
3010 			    "pcie_ue_hdr0", DATA_TYPE_UINT32,
3011 			    PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[0],
3012 			    "pcie_ue_hdr1", DATA_TYPE_UINT32,
3013 			    PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[1],
3014 			    "pcie_ue_hdr2", DATA_TYPE_UINT32,
3015 			    PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[2],
3016 			    "pcie_ue_hdr3", DATA_TYPE_UINT32,
3017 			    PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[3],
3018 			    "pcie_ce_status", DATA_TYPE_UINT32,
3019 			    PCIE_ADV_REG(pfd_p)->pcie_ce_status,
3020 			    "pcie_ce_mask", DATA_TYPE_UINT32,
3021 			    PCIE_ADV_REG(pfd_p)->pcie_ce_mask,
3022 			    NULL);
3023 		}
3024 
3025 		/* PCIe AER decoded header */
3026 		if (HAS_AER_LOGS(pfd_p, PCIE_ADV_REG(pfd_p)->pcie_ue_status)) {
3027 			fm_payload_set(ereport,
3028 			    "pcie_ue_tgt_trans", DATA_TYPE_UINT32,
3029 			    PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans,
3030 			    "pcie_ue_tgt_addr", DATA_TYPE_UINT64,
3031 			    PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr,
3032 			    "pcie_ue_tgt_bdf", DATA_TYPE_UINT16,
3033 			    PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf,
3034 			    NULL);
3035 			/* Clear these values as they no longer valid */
3036 			PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0;
3037 			PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0;
3038 			PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
3039 		}
3040 
3041 		/* PCIe BDG AER registers */
3042 		if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_HAS_AER(bus_p)) {
3043 			fm_payload_set(ereport,
3044 			    "pcie_sue_adv_ctl", DATA_TYPE_UINT32,
3045 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_ctl,
3046 			    "pcie_sue_status", DATA_TYPE_UINT32,
3047 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status,
3048 			    "pcie_sue_mask", DATA_TYPE_UINT32,
3049 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask,
3050 			    "pcie_sue_sev", DATA_TYPE_UINT32,
3051 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_sev,
3052 			    "pcie_sue_hdr0", DATA_TYPE_UINT32,
3053 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[0],
3054 			    "pcie_sue_hdr1", DATA_TYPE_UINT32,
3055 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[1],
3056 			    "pcie_sue_hdr2", DATA_TYPE_UINT32,
3057 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[2],
3058 			    "pcie_sue_hdr3", DATA_TYPE_UINT32,
3059 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[3],
3060 			    NULL);
3061 		}
3062 
3063 		/* PCIe BDG AER decoded header */
3064 		if (PCIE_IS_PCIE_BDG(bus_p) && HAS_SAER_LOGS(pfd_p,
3065 		    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status)) {
3066 			fm_payload_set(ereport,
3067 			    "pcie_sue_tgt_trans", DATA_TYPE_UINT32,
3068 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans,
3069 			    "pcie_sue_tgt_addr", DATA_TYPE_UINT64,
3070 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr,
3071 			    "pcie_sue_tgt_bdf", DATA_TYPE_UINT16,
3072 			    PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf,
3073 			    NULL);
3074 			/* Clear these values as they no longer valid */
3075 			PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0;
3076 			PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0;
3077 			PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf =
3078 			    PCIE_INVALID_BDF;
3079 		}
3080 
3081 		/* PCIe RP registers */
3082 		if (PCIE_IS_RP(bus_p)) {
3083 			fm_payload_set(ereport,
3084 			    "pcie_rp_status", DATA_TYPE_UINT32,
3085 			    PCIE_RP_REG(pfd_p)->pcie_rp_status,
3086 			    "pcie_rp_control", DATA_TYPE_UINT16,
3087 			    PCIE_RP_REG(pfd_p)->pcie_rp_ctl,
3088 			    NULL);
3089 		}
3090 
3091 		/* PCIe RP AER registers */
3092 		if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) {
3093 			fm_payload_set(ereport,
3094 			    "pcie_adv_rp_status", DATA_TYPE_UINT32,
3095 			    PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_status,
3096 			    "pcie_adv_rp_command", DATA_TYPE_UINT32,
3097 			    PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_cmd,
3098 			    "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16,
3099 			    PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id,
3100 			    "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16,
3101 			    PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id,
3102 			    NULL);
3103 		}
3104 
3105 		/*
3106 		 * Slot Status registers
3107 		 *
3108 		 * Since we only gather these for certain types of components,
3109 		 * only put these registers into the ereport if we have valid
3110 		 * data.
3111 		 */
3112 		if (PCIE_SLOT_REG(pfd_p)->pcie_slot_regs_valid) {
3113 			fm_payload_set(ereport,
3114 			    "pcie_slot_cap", DATA_TYPE_UINT32,
3115 			    PCIE_SLOT_REG(pfd_p)->pcie_slot_cap,
3116 			    "pcie_slot_control", DATA_TYPE_UINT16,
3117 			    PCIE_SLOT_REG(pfd_p)->pcie_slot_control,
3118 			    "pcie_slot_status", DATA_TYPE_UINT16,
3119 			    PCIE_SLOT_REG(pfd_p)->pcie_slot_status,
3120 			    NULL);
3121 		}
3122 
3123 generic:
3124 		/* IOV related information */
3125 		if (!PCIE_BDG_IS_UNASSIGNED(PCIE_PFD2BUS(impl->pf_dq_head_p))) {
3126 			fm_payload_set(ereport,
3127 			    "pcie_aff_flags", DATA_TYPE_UINT16,
3128 			    PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags,
3129 			    "pcie_aff_bdf", DATA_TYPE_UINT16,
3130 			    PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf,
3131 			    "orig_sev", DATA_TYPE_UINT32,
3132 			    pfd_p->pe_orig_severity_flags,
3133 			    NULL);
3134 		}
3135 
3136 		/* Misc ereport information */
3137 		fm_payload_set(ereport,
3138 		    "remainder", DATA_TYPE_UINT32, --total,
3139 		    "severity", DATA_TYPE_UINT32, pfd_p->pe_severity_flags,
3140 		    NULL);
3141 
3142 		pf_ereport_post(PCIE_BUS2DIP(bus_p), &ereport, &detector,
3143 		    &eqep);
3144 	}
3145 
3146 	pf_dq_unlock_chain(impl);
3147 }
3148 
3149 /*
3150  * pf_handler_enter must be called to serial access to each device's pf_data_t.
3151  * Once error handling is finished with the device call pf_handler_exit to allow
3152  * other threads to access it.  The same thread may call pf_handler_enter
3153  * several times without any consequences.
3154  *
3155  * The "impl" variable is passed in during scan fabric to double check that
3156  * there is not a recursive algorithm and to ensure only one thread is doing a
3157  * fabric scan at all times.
3158  *
3159  * In some cases "impl" is not available, such as "child lookup" being called
3160  * from outside of scan fabric, just pass in NULL for this variable and this
3161  * extra check will be skipped.
3162  */
3163 static int
pf_handler_enter(dev_info_t * dip,pf_impl_t * impl)3164 pf_handler_enter(dev_info_t *dip, pf_impl_t *impl)
3165 {
3166 	pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
3167 
3168 	ASSERT(pfd_p);
3169 
3170 	/*
3171 	 * Check to see if the lock has already been taken by this
3172 	 * thread.  If so just return and don't take lock again.
3173 	 */
3174 	if (!pfd_p->pe_lock || !impl) {
3175 		i_ddi_fm_handler_enter(dip);
3176 		pfd_p->pe_lock = B_TRUE;
3177 		return (PF_SCAN_SUCCESS);
3178 	}
3179 
3180 	/* Check to see that this dip is already in the "impl" error queue */
3181 	for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
3182 		if (PCIE_PFD2DIP(pfd_p) == dip) {
3183 			return (PF_SCAN_SUCCESS);
3184 		}
3185 	}
3186 
3187 	return (PF_SCAN_DEADLOCK);
3188 }
3189 
3190 static void
pf_handler_exit(dev_info_t * dip)3191 pf_handler_exit(dev_info_t *dip)
3192 {
3193 	pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
3194 
3195 	ASSERT(pfd_p);
3196 
3197 	ASSERT(pfd_p->pe_lock == B_TRUE);
3198 	i_ddi_fm_handler_exit(dip);
3199 	pfd_p->pe_lock = B_FALSE;
3200 }
3201 
3202 /*
3203  * This function calls the driver's callback function (if it's FMA hardened
3204  * and callback capable). This function relies on the current thread already
3205  * owning the driver's fmhdl lock.
3206  */
3207 static int
pf_fm_callback(dev_info_t * dip,ddi_fm_error_t * derr)3208 pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr)
3209 {
3210 	int cb_sts = DDI_FM_OK;
3211 
3212 	if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) {
3213 		dev_info_t *pdip = ddi_get_parent(dip);
3214 		struct i_ddi_fmhdl *hdl = DEVI(pdip)->devi_fmhdl;
3215 		struct i_ddi_fmtgt *tgt = hdl->fh_tgts;
3216 		struct i_ddi_errhdl *errhdl;
3217 		while (tgt != NULL) {
3218 			if (dip == tgt->ft_dip) {
3219 				errhdl = tgt->ft_errhdl;
3220 				cb_sts = errhdl->eh_func(dip, derr,
3221 				    errhdl->eh_impl);
3222 				break;
3223 			}
3224 			tgt = tgt->ft_next;
3225 		}
3226 	}
3227 	return (cb_sts);
3228 }
3229 
3230 static void
pf_reset_pfd(pf_data_t * pfd_p)3231 pf_reset_pfd(pf_data_t *pfd_p)
3232 {
3233 	pcie_bus_t	*bus_p = PCIE_PFD2BUS(pfd_p);
3234 
3235 	pfd_p->pe_severity_flags = 0;
3236 	pfd_p->pe_severity_mask = 0;
3237 	pfd_p->pe_orig_severity_flags = 0;
3238 	/* pe_lock and pe_valid were reset in pf_send_ereport */
3239 
3240 	PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0;
3241 	PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
3242 
3243 	if (PCIE_IS_ROOT(bus_p)) {
3244 		PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
3245 		PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0;
3246 		PCIE_ROOT_FAULT(pfd_p)->full_scan = B_FALSE;
3247 		PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE;
3248 		PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL;
3249 	}
3250 
3251 	if (PCIE_IS_BDG(bus_p)) {
3252 		bzero(PCI_BDG_ERR_REG(pfd_p), sizeof (pf_pci_bdg_err_regs_t));
3253 	}
3254 
3255 	PCI_ERR_REG(pfd_p)->pci_err_status = 0;
3256 	PCI_ERR_REG(pfd_p)->pci_cfg_comm = 0;
3257 
3258 	if (PCIE_IS_PCIE(bus_p)) {
3259 		if (PCIE_IS_ROOT(bus_p)) {
3260 			bzero(PCIE_RP_REG(pfd_p),
3261 			    sizeof (pf_pcie_rp_err_regs_t));
3262 			bzero(PCIE_ADV_RP_REG(pfd_p),
3263 			    sizeof (pf_pcie_adv_rp_err_regs_t));
3264 			PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id =
3265 			    PCIE_INVALID_BDF;
3266 			PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id =
3267 			    PCIE_INVALID_BDF;
3268 		} else if (PCIE_IS_PCIE_BDG(bus_p)) {
3269 			bzero(PCIE_ADV_BDG_REG(pfd_p),
3270 			    sizeof (pf_pcie_adv_bdg_err_regs_t));
3271 			PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf =
3272 			    PCIE_INVALID_BDF;
3273 		}
3274 
3275 		if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) {
3276 			if (PCIX_ECC_VERSION_CHECK(bus_p)) {
3277 				bzero(PCIX_BDG_ECC_REG(pfd_p, 0),
3278 				    sizeof (pf_pcix_ecc_regs_t));
3279 				bzero(PCIX_BDG_ECC_REG(pfd_p, 1),
3280 				    sizeof (pf_pcix_ecc_regs_t));
3281 			}
3282 			PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0;
3283 			PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0;
3284 		}
3285 
3286 		PCIE_ADV_REG(pfd_p)->pcie_adv_ctl = 0;
3287 		PCIE_ADV_REG(pfd_p)->pcie_ue_status = 0;
3288 		PCIE_ADV_REG(pfd_p)->pcie_ue_mask = 0;
3289 		PCIE_ADV_REG(pfd_p)->pcie_ue_sev = 0;
3290 		PCIE_ADV_HDR(pfd_p, 0) = 0;
3291 		PCIE_ADV_HDR(pfd_p, 1) = 0;
3292 		PCIE_ADV_HDR(pfd_p, 2) = 0;
3293 		PCIE_ADV_HDR(pfd_p, 3) = 0;
3294 		PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0;
3295 		PCIE_ADV_REG(pfd_p)->pcie_ce_mask = 0;
3296 		PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0;
3297 		PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0;
3298 		PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
3299 
3300 		PCIE_ERR_REG(pfd_p)->pcie_err_status = 0;
3301 		PCIE_ERR_REG(pfd_p)->pcie_err_ctl = 0;
3302 		PCIE_ERR_REG(pfd_p)->pcie_dev_cap = 0;
3303 
3304 	} else if (PCIE_IS_PCIX(bus_p)) {
3305 		if (PCIE_IS_BDG(bus_p)) {
3306 			if (PCIX_ECC_VERSION_CHECK(bus_p)) {
3307 				bzero(PCIX_BDG_ECC_REG(pfd_p, 0),
3308 				    sizeof (pf_pcix_ecc_regs_t));
3309 				bzero(PCIX_BDG_ECC_REG(pfd_p, 1),
3310 				    sizeof (pf_pcix_ecc_regs_t));
3311 			}
3312 			PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0;
3313 			PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0;
3314 		} else {
3315 			if (PCIX_ECC_VERSION_CHECK(bus_p)) {
3316 				bzero(PCIX_ECC_REG(pfd_p),
3317 				    sizeof (pf_pcix_ecc_regs_t));
3318 			}
3319 			PCIX_ERR_REG(pfd_p)->pcix_command = 0;
3320 			PCIX_ERR_REG(pfd_p)->pcix_status = 0;
3321 		}
3322 	}
3323 
3324 	pfd_p->pe_prev = NULL;
3325 	pfd_p->pe_next = NULL;
3326 	pfd_p->pe_rber_fatal = B_FALSE;
3327 }
3328 
3329 pcie_bus_t *
pf_find_busp_by_bdf(pf_impl_t * impl,pcie_req_id_t bdf)3330 pf_find_busp_by_bdf(pf_impl_t *impl, pcie_req_id_t bdf)
3331 {
3332 	pcie_bus_t *temp_bus_p;
3333 	pf_data_t *temp_pfd_p;
3334 
3335 	for (temp_pfd_p = impl->pf_dq_head_p;
3336 	    temp_pfd_p;
3337 	    temp_pfd_p = temp_pfd_p->pe_next) {
3338 		temp_bus_p = PCIE_PFD2BUS(temp_pfd_p);
3339 
3340 		if (bdf == temp_bus_p->bus_bdf) {
3341 			return (temp_bus_p);
3342 		}
3343 	}
3344 
3345 	return (NULL);
3346 }
3347 
3348 pcie_bus_t *
pf_find_busp_by_addr(pf_impl_t * impl,uint64_t addr)3349 pf_find_busp_by_addr(pf_impl_t *impl, uint64_t addr)
3350 {
3351 	pcie_bus_t *temp_bus_p;
3352 	pf_data_t *temp_pfd_p;
3353 
3354 	for (temp_pfd_p = impl->pf_dq_head_p;
3355 	    temp_pfd_p;
3356 	    temp_pfd_p = temp_pfd_p->pe_next) {
3357 		temp_bus_p = PCIE_PFD2BUS(temp_pfd_p);
3358 
3359 		if (pf_in_assigned_addr(temp_bus_p, addr)) {
3360 			return (temp_bus_p);
3361 		}
3362 	}
3363 
3364 	return (NULL);
3365 }
3366 
3367 pcie_bus_t *
pf_find_busp_by_aer(pf_impl_t * impl,pf_data_t * pfd_p)3368 pf_find_busp_by_aer(pf_impl_t *impl, pf_data_t *pfd_p)
3369 {
3370 	pf_pcie_adv_err_regs_t *reg_p = PCIE_ADV_REG(pfd_p);
3371 	pcie_bus_t *temp_bus_p = NULL;
3372 	pcie_req_id_t bdf;
3373 	uint64_t addr;
3374 	pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)reg_p->pcie_ue_hdr;
3375 	uint32_t trans_type = reg_p->pcie_ue_tgt_trans;
3376 
3377 	if ((tlp_hdr->type == PCIE_TLP_TYPE_CPL) ||
3378 	    (tlp_hdr->type == PCIE_TLP_TYPE_CPLLK)) {
3379 		pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)&reg_p->pcie_ue_hdr[1];
3380 
3381 		bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid :
3382 		    cpl_tlp->cid;
3383 		temp_bus_p = pf_find_busp_by_bdf(impl, bdf);
3384 	} else if (trans_type == PF_ADDR_PIO) {
3385 		addr = reg_p->pcie_ue_tgt_addr;
3386 		temp_bus_p = pf_find_busp_by_addr(impl, addr);
3387 	} else {
3388 		/* PF_ADDR_DMA type */
3389 		bdf = reg_p->pcie_ue_tgt_bdf;
3390 		temp_bus_p = pf_find_busp_by_bdf(impl, bdf);
3391 	}
3392 
3393 	return (temp_bus_p);
3394 }
3395 
3396 pcie_bus_t *
pf_find_busp_by_saer(pf_impl_t * impl,pf_data_t * pfd_p)3397 pf_find_busp_by_saer(pf_impl_t *impl, pf_data_t *pfd_p)
3398 {
3399 	pf_pcie_adv_bdg_err_regs_t *reg_p = PCIE_ADV_BDG_REG(pfd_p);
3400 	pcie_bus_t *temp_bus_p = NULL;
3401 	pcie_req_id_t bdf;
3402 	uint64_t addr;
3403 
3404 	addr = reg_p->pcie_sue_tgt_addr;
3405 	bdf = reg_p->pcie_sue_tgt_bdf;
3406 
3407 	if (addr != 0) {
3408 		temp_bus_p = pf_find_busp_by_addr(impl, addr);
3409 	} else if (PCIE_CHECK_VALID_BDF(bdf)) {
3410 		temp_bus_p = pf_find_busp_by_bdf(impl, bdf);
3411 	}
3412 
3413 	return (temp_bus_p);
3414 }
3415