xref: /illumos-gate/usr/src/uts/sun4/io/px/px_fm.c (revision a4aeef46cda1835da2b19f8f62b4526de6521e6c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * PX Fault Management Architecture
28  */
29 #include <sys/types.h>
30 #include <sys/sunndi.h>
31 #include <sys/sunddi.h>
32 #include <sys/fm/protocol.h>
33 #include <sys/fm/util.h>
34 #include <sys/fm/io/pci.h>
35 #include <sys/membar.h>
36 #include "px_obj.h"
37 
38 extern uint_t px_ranges_phi_mask;
39 
40 #define	PX_PCIE_PANIC_BITS \
41 	(PCIE_AER_UCE_DLP | PCIE_AER_UCE_FCP | PCIE_AER_UCE_TO | \
42 	PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP | PCIE_AER_UCE_ECRC)
43 #define	PX_PCIE_NO_PANIC_BITS \
44 	(PCIE_AER_UCE_TRAINING | PCIE_AER_UCE_SD | PCIE_AER_UCE_CA | \
45 	PCIE_AER_UCE_UC | PCIE_AER_UCE_UR)
46 
47 /*
48  * Global panicing state variabled used to control if further error handling
49  * should occur.  If the system is already panic'ing or if PX itself has
50  * recommended panic'ing the system, no further error handling should occur to
51  * prevent the system from hanging.
52  */
53 boolean_t px_panicing = B_FALSE;
54 
55 static int px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr,
56     px_err_pcie_t *regs);
57 
58 #if defined(DEBUG)
59 static void px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs);
60 #else	/* DEBUG */
61 #define	px_pcie_log 0 &&
62 #endif	/* DEBUG */
63 
64 /*
65  * Initialize px FMA support
66  */
67 int
68 px_fm_attach(px_t *px_p)
69 {
70 	int		i;
71 	dev_info_t	*dip = px_p->px_dip;
72 	pcie_bus_t	*bus_p;
73 
74 	px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
75 	    DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
76 
77 	/*
78 	 * check parents' capability
79 	 */
80 	ddi_fm_init(dip, &px_p->px_fm_cap, &px_p->px_fm_ibc);
81 
82 	/*
83 	 * parents need to be ereport and error handling capable
84 	 */
85 	ASSERT(px_p->px_fm_cap &&
86 	    (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE));
87 
88 	/*
89 	 * Initialize lock to synchronize fabric error handling
90 	 */
91 	mutex_init(&px_p->px_fm_mutex, NULL, MUTEX_DRIVER,
92 	    (void *)px_p->px_fm_ibc);
93 
94 	px_p->px_pfd_idx = 0;
95 	for (i = 0; i < 5; i++)
96 		pcie_rc_init_pfd(dip, &px_p->px_pfd_arr[i]);
97 	PCIE_DIP2PFD(dip) = px_p->px_pfd_arr;
98 
99 	bus_p = PCIE_DIP2BUS(dip);
100 	bus_p->bus_rp_bdf = px_p->px_bdf;
101 	bus_p->bus_rp_dip = dip;
102 
103 	/*
104 	 * register error callback in parent
105 	 */
106 	ddi_fm_handler_register(dip, px_fm_callback, px_p);
107 
108 	return (DDI_SUCCESS);
109 }
110 
111 /*
112  * Deregister FMA
113  */
114 void
115 px_fm_detach(px_t *px_p)
116 {
117 	int i;
118 
119 	ddi_fm_handler_unregister(px_p->px_dip);
120 	mutex_destroy(&px_p->px_fm_mutex);
121 	ddi_fm_fini(px_p->px_dip);
122 	for (i = 0; i < 5; i++)
123 		pcie_rc_fini_pfd(&px_p->px_pfd_arr[i]);
124 }
125 
126 /*
127  * Function used to setup access functions depending on level of desired
128  * protection.
129  */
130 void
131 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip, pci_regspec_t *rp)
132 {
133 	uchar_t fflag;
134 	ndi_err_t *errp;
135 	ddi_acc_hdl_t *hp;
136 	ddi_acc_impl_t *ap;
137 
138 	hp = mp->map_handlep;
139 	ap = (ddi_acc_impl_t *)hp->ah_platform_private;
140 	fflag = ap->ahi_common.ah_acc.devacc_attr_access;
141 
142 	if (mp->map_op == DDI_MO_MAP_LOCKED) {
143 		ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL);
144 		switch (fflag) {
145 		case DDI_FLAGERR_ACC:
146 			ap->ahi_get8 = i_ddi_prot_get8;
147 			ap->ahi_get16 = i_ddi_prot_get16;
148 			ap->ahi_get32 = i_ddi_prot_get32;
149 			ap->ahi_get64 = i_ddi_prot_get64;
150 			ap->ahi_put8 = i_ddi_prot_put8;
151 			ap->ahi_put16 = i_ddi_prot_put16;
152 			ap->ahi_put32 = i_ddi_prot_put32;
153 			ap->ahi_put64 = i_ddi_prot_put64;
154 			ap->ahi_rep_get8 = i_ddi_prot_rep_get8;
155 			ap->ahi_rep_get16 = i_ddi_prot_rep_get16;
156 			ap->ahi_rep_get32 = i_ddi_prot_rep_get32;
157 			ap->ahi_rep_get64 = i_ddi_prot_rep_get64;
158 			ap->ahi_rep_put8 = i_ddi_prot_rep_put8;
159 			ap->ahi_rep_put16 = i_ddi_prot_rep_put16;
160 			ap->ahi_rep_put32 = i_ddi_prot_rep_put32;
161 			ap->ahi_rep_put64 = i_ddi_prot_rep_put64;
162 			impl_acc_err_init(hp);
163 			errp = ((ddi_acc_impl_t *)hp)->ahi_err;
164 			if ((rp->pci_phys_hi & PCI_REG_ADDR_M) ==
165 			    PCI_ADDR_CONFIG)
166 				errp->err_cf = px_err_cfg_hdl_check;
167 			else
168 				errp->err_cf = px_err_pio_hdl_check;
169 			break;
170 		case DDI_CAUTIOUS_ACC :
171 			ap->ahi_get8 = i_ddi_caut_get8;
172 			ap->ahi_get16 = i_ddi_caut_get16;
173 			ap->ahi_get32 = i_ddi_caut_get32;
174 			ap->ahi_get64 = i_ddi_caut_get64;
175 			ap->ahi_put8 = i_ddi_caut_put8;
176 			ap->ahi_put16 = i_ddi_caut_put16;
177 			ap->ahi_put32 = i_ddi_caut_put32;
178 			ap->ahi_put64 = i_ddi_caut_put64;
179 			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
180 			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
181 			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
182 			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
183 			ap->ahi_rep_put8 = i_ddi_caut_rep_put8;
184 			ap->ahi_rep_put16 = i_ddi_caut_rep_put16;
185 			ap->ahi_rep_put32 = i_ddi_caut_rep_put32;
186 			ap->ahi_rep_put64 = i_ddi_caut_rep_put64;
187 			impl_acc_err_init(hp);
188 			errp = ((ddi_acc_impl_t *)hp)->ahi_err;
189 			if ((rp->pci_phys_hi & PCI_REG_ADDR_M) ==
190 			    PCI_ADDR_CONFIG)
191 				errp->err_cf = px_err_cfg_hdl_check;
192 			else
193 				errp->err_cf = px_err_pio_hdl_check;
194 			break;
195 		default:
196 			/* Illegal state, remove the handle from cache */
197 			ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
198 			break;
199 		}
200 	} else if (mp->map_op == DDI_MO_UNMAP) {
201 		ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
202 	}
203 }
204 
205 /*
206  * Function used to initialize FMA for our children nodes. Called
207  * through pci busops when child node calls ddi_fm_init.
208  */
209 /*ARGSUSED*/
210 int
211 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
212     ddi_iblock_cookie_t *ibc_p)
213 {
214 	px_t *px_p = DIP_TO_STATE(dip);
215 
216 	ASSERT(ibc_p != NULL);
217 	*ibc_p = px_p->px_fm_ibc;
218 
219 	return (px_p->px_fm_cap);
220 }
221 
222 /*
223  * lock access for exclusive PCIe access
224  */
225 void
226 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle)
227 {
228 	px_pec_t	*pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p;
229 
230 	/*
231 	 * Exclusive access has been used for cautious put/get,
232 	 * Both utilize i_ddi_ontrap which, on sparcv9, implements
233 	 * similar protection as what on_trap() does, and which calls
234 	 * membar  #Sync to flush out all cpu deferred errors
235 	 * prior to get/put operation, so here we're not calling
236 	 * membar  #Sync - a difference from what's in pci_bus_enter().
237 	 */
238 	mutex_enter(&pec_p->pec_pokefault_mutex);
239 	pec_p->pec_acc_hdl = handle;
240 }
241 
242 /*
243  * unlock access for exclusive PCIe access
244  */
245 /* ARGSUSED */
246 void
247 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle)
248 {
249 	px_t		*px_p = DIP_TO_STATE(dip);
250 	px_pec_t	*pec_p = px_p->px_pec_p;
251 
252 	pec_p->pec_acc_hdl = NULL;
253 	mutex_exit(&pec_p->pec_pokefault_mutex);
254 }
255 
256 static uint64_t
257 px_in_addr_range(dev_info_t *dip, pci_ranges_t *ranges_p, uint64_t addr)
258 {
259 	uint64_t	addr_low, addr_high;
260 
261 	addr_low = (uint64_t)(ranges_p->parent_high & px_ranges_phi_mask) << 32;
262 	addr_low |= (uint64_t)ranges_p->parent_low;
263 	addr_high = addr_low + ((uint64_t)ranges_p->size_high << 32) +
264 	    (uint64_t)ranges_p->size_low;
265 
266 	DBG(DBG_ERR_INTR, dip, "Addr: 0x%llx high: 0x%llx low: 0x%llx\n",
267 	    addr, addr_high, addr_low);
268 
269 	if ((addr < addr_high) && (addr >= addr_low))
270 		return (addr_low);
271 
272 	return (0);
273 }
274 
275 /*
276  * PCI error callback which is registered with our parent to call
277  * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors
278  * and PCI BERR/TO/UE on IO Loads.
279  */
280 /*ARGSUSED*/
281 int
282 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data)
283 {
284 	dev_info_t	*pdip = ddi_get_parent(dip);
285 	px_t		*px_p = (px_t *)impl_data;
286 	int		i, acc_type = 0;
287 	int		lookup, rc_err, fab_err;
288 	uint64_t	addr, base_addr;
289 	uint64_t	fault_addr = (uint64_t)derr->fme_bus_specific;
290 	pcie_req_id_t	bdf = PCIE_INVALID_BDF;
291 	pci_ranges_t	*ranges_p;
292 	int		range_len;
293 	pf_data_t	*pfd_p;
294 
295 	/*
296 	 * If the current thread already owns the px_fm_mutex, then we
297 	 * have encountered an error while processing a previous
298 	 * error.  Attempting to take the mutex again will cause the
299 	 * system to deadlock.
300 	 */
301 	if (px_p->px_fm_mutex_owner == curthread)
302 		return (DDI_FM_FATAL);
303 
304 	i_ddi_fm_handler_exit(pdip);
305 
306 	if (px_fm_enter(px_p) != DDI_SUCCESS) {
307 		i_ddi_fm_handler_enter(pdip);
308 		return (DDI_FM_FATAL);
309 	}
310 
311 	/*
312 	 * Make sure this failed load came from this PCIe port.	 Check by
313 	 * matching the upper 32 bits of the address with the ranges property.
314 	 */
315 	range_len = px_p->px_ranges_length / sizeof (pci_ranges_t);
316 	i = 0;
317 	for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) {
318 		base_addr = px_in_addr_range(dip, ranges_p, fault_addr);
319 		if (base_addr) {
320 			switch (ranges_p->child_high & PCI_ADDR_MASK) {
321 			case PCI_ADDR_CONFIG:
322 				acc_type = PF_ADDR_CFG;
323 				addr = NULL;
324 				bdf = (pcie_req_id_t)((fault_addr >> 12) &
325 				    0xFFFF);
326 				break;
327 			case PCI_ADDR_IO:
328 			case PCI_ADDR_MEM64:
329 			case PCI_ADDR_MEM32:
330 				acc_type = PF_ADDR_PIO;
331 				addr = fault_addr - base_addr;
332 				bdf = PCIE_INVALID_BDF;
333 				break;
334 			}
335 			break;
336 		}
337 	}
338 
339 	/* This address doesn't belong to this leaf, just return with OK */
340 	if (!acc_type) {
341 		px_fm_exit(px_p);
342 		i_ddi_fm_handler_enter(pdip);
343 		return (DDI_FM_OK);
344 	}
345 
346 	rc_err = px_err_cmn_intr(px_p, derr, PX_TRAP_CALL, PX_FM_BLOCK_ALL);
347 	lookup = pf_hdl_lookup(dip, derr->fme_ena, acc_type, (uint64_t)addr,
348 	    bdf);
349 
350 	pfd_p = px_rp_en_q(px_p, bdf, addr,
351 	    (PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB));
352 	PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_DATA;
353 
354 	/* Update affected info, either addr or bdf is not NULL */
355 	if (addr) {
356 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_ADDR;
357 	} else if (PCIE_CHECK_VALID_BDF(bdf)) {
358 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_BDF;
359 		PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = bdf;
360 	}
361 
362 	fab_err = px_scan_fabric(px_p, dip, derr);
363 
364 	px_fm_exit(px_p);
365 	i_ddi_fm_handler_enter(pdip);
366 
367 	if (!px_die)
368 		return (DDI_FM_OK);
369 
370 	if ((rc_err & (PX_PANIC | PX_PROTECTED)) ||
371 	    (fab_err & PF_ERR_FATAL_FLAGS) ||
372 	    (lookup == PF_HDL_NOTFOUND))
373 		return (DDI_FM_FATAL);
374 	else if ((rc_err == PX_NO_ERROR) && (fab_err == PF_ERR_NO_ERROR))
375 		return (DDI_FM_OK);
376 
377 	return (DDI_FM_NONFATAL);
378 }
379 
380 /*
381  * px_err_fabric_intr:
382  * Interrupt handler for PCIE fabric block.
383  * o lock
384  * o create derr
385  * o px_err_cmn_intr(leaf, with jbc)
386  * o send ereport(fire fmri, derr, payload = BDF)
387  * o dispatch (leaf)
388  * o unlock
389  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
390  */
391 /* ARGSUSED */
392 uint_t
393 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, pcie_req_id_t rid)
394 {
395 	dev_info_t	*rpdip = px_p->px_dip;
396 	int		rc_err, fab_err;
397 	ddi_fm_error_t	derr;
398 	uint32_t	rp_status;
399 	uint16_t	ce_source, ue_source;
400 	pf_data_t	*pfd_p;
401 
402 	if (px_fm_enter(px_p) != DDI_SUCCESS)
403 		goto done;
404 
405 	/* Create the derr */
406 	bzero(&derr, sizeof (ddi_fm_error_t));
407 	derr.fme_version = DDI_FME_VERSION;
408 	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
409 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
410 
411 	px_err_safeacc_check(px_p, &derr);
412 
413 	if (msg_code == PCIE_MSG_CODE_ERR_COR) {
414 		rp_status = PCIE_AER_RE_STS_CE_RCVD;
415 		ce_source = rid;
416 		ue_source = 0;
417 	} else {
418 		rp_status = PCIE_AER_RE_STS_FE_NFE_RCVD;
419 		ce_source = 0;
420 		ue_source = rid;
421 		if (msg_code == PCIE_MSG_CODE_ERR_NONFATAL)
422 			rp_status |= PCIE_AER_RE_STS_NFE_MSGS_RCVD;
423 		else {
424 			rp_status |= PCIE_AER_RE_STS_FE_MSGS_RCVD;
425 			rp_status |= PCIE_AER_RE_STS_FIRST_UC_FATAL;
426 		}
427 	}
428 
429 	if (derr.fme_flag == DDI_FM_ERR_UNEXPECTED) {
430 		ddi_fm_ereport_post(rpdip, PCI_ERROR_SUBCLASS "." PCIEX_FABRIC,
431 		    derr.fme_ena,
432 		    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
433 		    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE,
434 		    "pcie_adv_rp_status", DATA_TYPE_UINT32, rp_status,
435 		    "pcie_adv_rp_command", DATA_TYPE_UINT32, 0,
436 		    "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16, ce_source,
437 		    "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16, ue_source,
438 		    NULL);
439 	}
440 
441 	/* Ensure that the rid of the fabric message will get scanned. */
442 	pfd_p = px_rp_en_q(px_p, rid, NULL, NULL);
443 	PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_FABRIC;
444 
445 	rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE);
446 
447 	/* call rootport dispatch */
448 	fab_err = px_scan_fabric(px_p, rpdip, &derr);
449 
450 	px_err_panic(rc_err, PX_RC, fab_err, B_TRUE);
451 	px_fm_exit(px_p);
452 	px_err_panic(rc_err, PX_RC, fab_err, B_FALSE);
453 
454 done:
455 	return (DDI_INTR_CLAIMED);
456 }
457 
458 /*
459  * px_scan_fabric:
460  *
461  * Check for drain state and if there is anything to scan.
462  *
463  * Note on pfd: Different interrupts will populate the pfd's differently.  The
464  * px driver can have a total of 5 different error sources, so it has a queue of
465  * 5 pfds.  Each valid PDF is linked together and passed to pf_scan_fabric.
466  *
467  * Each error handling will populate the following info in the pfd
468  *
469  *			Root Fault	 Intr Src	 Affected BDF
470  *			----------------+---------------+------------
471  * Callback/CPU Trap	Address/BDF	|DATA		|Lookup Addr
472  * Mondo 62/63 (sun4u)	decode error	|N/A		|N/A
473  * EPKT (sun4v)		decode epkt	|INTERNAL	|decode epkt
474  * Fabric Message	fabric payload	|FABRIC		|NULL
475  * Peek/Poke		Address/BDF	|NULL		|NULL
476  *			----------------+---------------+------------
477  */
478 int
479 px_scan_fabric(px_t *px_p, dev_info_t *rpdip, ddi_fm_error_t *derr) {
480 	int fab_err = 0;
481 
482 	ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
483 
484 	if (!px_lib_is_in_drain_state(px_p) && px_p->px_pfd_idx) {
485 		fab_err = pf_scan_fabric(rpdip, derr, px_p->px_pfd_arr);
486 	}
487 
488 	return (fab_err);
489 }
490 
491 /*
492  * px_err_safeacc_check:
493  * Check to see if a peek/poke and cautious access is currently being
494  * done on a particular leaf.
495  *
496  * Safe access reads induced fire errors will be handled by cpu trap handler
497  * which will call px_fm_callback() which calls this function. In that
498  * case, the derr fields will be set by trap handler with the correct values.
499  *
500  * Safe access writes induced errors will be handled by px interrupt
501  * handlers, this function will fill in the derr fields.
502  *
503  * If a cpu trap does occur, it will quiesce all other interrupts allowing
504  * the cpu trap error handling to finish before Fire receives an interrupt.
505  *
506  * If fire does indeed have an error when a cpu trap occurs as a result of
507  * a safe access, a trap followed by a Mondo/Fabric interrupt will occur.
508  * In which case derr will be initialized as "UNEXPECTED" by the interrupt
509  * handler and this function will need to find if this error occured in the
510  * middle of a safe access operation.
511  *
512  * @param px_p		leaf in which to check access
513  * @param derr		fm err data structure to be updated
514  */
515 void
516 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr)
517 {
518 	px_pec_t 	*pec_p = px_p->px_pec_p;
519 	int		acctype = pec_p->pec_safeacc_type;
520 
521 	ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
522 
523 	if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
524 		return;
525 	}
526 
527 	/* safe access checking */
528 	switch (acctype) {
529 	case DDI_FM_ERR_EXPECTED:
530 		/*
531 		 * cautious access protection, protected from all err.
532 		 */
533 		ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr,
534 		    DDI_FME_VERSION);
535 		derr->fme_flag = acctype;
536 		derr->fme_acc_handle = pec_p->pec_acc_hdl;
537 		break;
538 	case DDI_FM_ERR_POKE:
539 		/*
540 		 * ddi_poke protection, check nexus and children for
541 		 * expected errors.
542 		 */
543 		membar_sync();
544 		derr->fme_flag = acctype;
545 		break;
546 	case DDI_FM_ERR_PEEK:
547 		derr->fme_flag = acctype;
548 		break;
549 	}
550 }
551 
552 /*
553  * Suggest panic if any EQ (except CE q) has overflown.
554  */
555 int
556 px_err_check_eq(dev_info_t *dip)
557 {
558 	px_t			*px_p = DIP_TO_STATE(dip);
559 	px_msiq_state_t 	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
560 	px_pec_t		*pec_p = px_p->px_pec_p;
561 	msiqid_t		eq_no = msiq_state_p->msiq_1st_msiq_id;
562 	pci_msiq_state_t	msiq_state;
563 	int			i;
564 
565 	for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
566 		if (i + eq_no == pec_p->pec_corr_msg_msiq_id) /* skip CE q */
567 			continue;
568 		if ((px_lib_msiq_getstate(dip, i + eq_no, &msiq_state) !=
569 		    DDI_SUCCESS) || msiq_state == PCI_MSIQ_STATE_ERROR)
570 			return (PX_PANIC);
571 	}
572 	return (PX_NO_PANIC);
573 }
574 
575 /* ARGSUSED */
576 int
577 px_err_check_pcie(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs,
578     pf_intr_type_t intr_type)
579 {
580 	px_t		*px_p = DIP_TO_STATE(dip);
581 	pf_data_t	*pfd_p = px_get_pfd(px_p);
582 	int		i;
583 	pf_pcie_adv_err_regs_t *adv_reg = PCIE_ADV_REG(pfd_p);
584 
585 	PCIE_ROOT_EH_SRC(pfd_p)->intr_type = intr_type;
586 
587 	/*
588 	 * set RC s_status in PCI term to coordinate with downstream fabric
589 	 * errors ananlysis.
590 	 */
591 	if (regs->primary_ue & PCIE_AER_UCE_UR)
592 		PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_R_MAST_AB;
593 	if (regs->primary_ue & PCIE_AER_UCE_CA)
594 		PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_R_TARG_AB;
595 	if (regs->primary_ue & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC))
596 		PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_PERROR;
597 
598 	if (!regs->primary_ue)
599 		goto done;
600 
601 	adv_reg->pcie_ce_status = regs->ce_reg;
602 	adv_reg->pcie_ue_status = regs->ue_reg | regs->primary_ue;
603 	PCIE_ADV_HDR(pfd_p, 0) = regs->rx_hdr1;
604 	PCIE_ADV_HDR(pfd_p, 1) = regs->rx_hdr2;
605 	PCIE_ADV_HDR(pfd_p, 2) = regs->rx_hdr3;
606 	PCIE_ADV_HDR(pfd_p, 3) = regs->rx_hdr4;
607 	for (i = regs->primary_ue; i != 1; i = i >> 1)
608 		adv_reg->pcie_adv_ctl++;
609 
610 	if (regs->primary_ue & (PCIE_AER_UCE_UR | PCIE_AER_UCE_CA)) {
611 		if (pf_tlp_decode(PCIE_DIP2BUS(dip), adv_reg) == DDI_SUCCESS)
612 			PCIE_ROOT_FAULT(pfd_p)->scan_bdf =
613 			    adv_reg->pcie_ue_tgt_bdf;
614 	} else if (regs->primary_ue & PCIE_AER_UCE_PTLP) {
615 		if (pf_tlp_decode(PCIE_DIP2BUS(dip), adv_reg) == DDI_SUCCESS) {
616 			PCIE_ROOT_FAULT(pfd_p)->scan_bdf =
617 			    adv_reg->pcie_ue_tgt_bdf;
618 			if (adv_reg->pcie_ue_tgt_trans ==
619 			    PF_ADDR_PIO)
620 				PCIE_ROOT_FAULT(pfd_p)->scan_addr =
621 				    adv_reg->pcie_ue_tgt_addr;
622 		}
623 
624 		/*
625 		 * Normally for Poisoned Completion TLPs we can look at the
626 		 * transmit log header for the original request and the original
627 		 * address, however this doesn't seem to be working.  HW BUG.
628 		 */
629 	}
630 
631 done:
632 	px_pcie_log(dip, regs);
633 
634 	/* Return No Error here and let the pcie misc module analyse it */
635 	return (PX_NO_ERROR);
636 }
637 
638 #if defined(DEBUG)
639 static void
640 px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs)
641 {
642 	DBG(DBG_ERR_INTR, dip,
643 	    "A PCIe RC error has occured\n"
644 	    "\tCE: 0x%x UE: 0x%x Primary UE: 0x%x\n"
645 	    "\tTX Hdr: 0x%x 0x%x 0x%x 0x%x\n\tRX Hdr: 0x%x 0x%x 0x%x 0x%x\n",
646 	    regs->ce_reg, regs->ue_reg, regs->primary_ue,
647 	    regs->tx_hdr1, regs->tx_hdr2, regs->tx_hdr3, regs->tx_hdr4,
648 	    regs->rx_hdr1, regs->rx_hdr2, regs->rx_hdr3, regs->rx_hdr4);
649 }
650 #endif
651 
652 /*
653  * look through poisoned TLP cases and suggest panic/no panic depend on
654  * handle lookup.
655  */
656 static int
657 px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs)
658 {
659 	pf_pcie_adv_err_regs_t adv_reg;
660 	pcie_req_id_t	bdf;
661 	uint64_t	addr;
662 	uint32_t	trans_type;
663 	int		tlp_sts, tlp_cmd;
664 	int		lookup = PF_HDL_NOTFOUND;
665 
666 	if (regs->primary_ue != PCIE_AER_UCE_PTLP)
667 		return (PX_PANIC);
668 
669 	if (!regs->rx_hdr1)
670 		goto done;
671 
672 	adv_reg.pcie_ue_hdr[0] = regs->rx_hdr1;
673 	adv_reg.pcie_ue_hdr[1] = regs->rx_hdr2;
674 	adv_reg.pcie_ue_hdr[2] = regs->rx_hdr3;
675 	adv_reg.pcie_ue_hdr[3] = regs->rx_hdr4;
676 
677 	tlp_sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
678 	tlp_cmd = ((pcie_tlp_hdr_t *)(adv_reg.pcie_ue_hdr))->type;
679 
680 	if (tlp_sts == DDI_FAILURE)
681 		goto done;
682 
683 	bdf = adv_reg.pcie_ue_tgt_bdf;
684 	addr = adv_reg.pcie_ue_tgt_addr;
685 	trans_type = adv_reg.pcie_ue_tgt_trans;
686 
687 	switch (tlp_cmd) {
688 	case PCIE_TLP_TYPE_CPL:
689 	case PCIE_TLP_TYPE_CPLLK:
690 		/*
691 		 * Usually a PTLP is a CPL with data.  Grab the completer BDF
692 		 * from the RX TLP, and the original address from the TX TLP.
693 		 */
694 		if (regs->tx_hdr1) {
695 			adv_reg.pcie_ue_hdr[0] = regs->tx_hdr1;
696 			adv_reg.pcie_ue_hdr[1] = regs->tx_hdr2;
697 			adv_reg.pcie_ue_hdr[2] = regs->tx_hdr3;
698 			adv_reg.pcie_ue_hdr[3] = regs->tx_hdr4;
699 
700 			lookup = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
701 			if (lookup != DDI_SUCCESS)
702 				break;
703 			addr = adv_reg.pcie_ue_tgt_addr;
704 			trans_type = adv_reg.pcie_ue_tgt_trans;
705 		} /* FALLTHRU */
706 	case PCIE_TLP_TYPE_IO:
707 	case PCIE_TLP_TYPE_MEM:
708 	case PCIE_TLP_TYPE_MEMLK:
709 		lookup = pf_hdl_lookup(dip, derr->fme_ena, trans_type, addr,
710 		    bdf);
711 		break;
712 	default:
713 		lookup = PF_HDL_NOTFOUND;
714 	}
715 done:
716 	return (lookup == PF_HDL_FOUND ? PX_NO_PANIC : PX_PANIC);
717 }
718 
719 /*
720  * px_get_pdf automatically allocates a RC pf_data_t and returns a pointer to
721  * it.  This function should be used when an error requires a fabric scan.
722  */
723 pf_data_t *
724 px_get_pfd(px_t *px_p) {
725 	int		idx = px_p->px_pfd_idx++;
726 	pf_data_t	*pfd_p = &px_p->px_pfd_arr[idx];
727 
728 	/* Clear Old Data */
729 	PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
730 	PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0;
731 	PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE;
732 	PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL;
733 	PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = NULL;
734 	PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
735 	PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = 0;
736 	PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0;
737 	PCIE_ADV_REG(pfd_p)->pcie_ue_status = 0;
738 	PCIE_ADV_REG(pfd_p)->pcie_adv_ctl = 0;
739 
740 	pfd_p->pe_next = NULL;
741 
742 	if (idx > 0) {
743 		px_p->px_pfd_arr[idx - 1].pe_next = pfd_p;
744 		pfd_p->pe_prev = &px_p->px_pfd_arr[idx - 1];
745 	} else {
746 		pfd_p->pe_prev = NULL;
747 	}
748 
749 	pfd_p->pe_severity_flags = 0;
750 	pfd_p->pe_orig_severity_flags = 0;
751 	pfd_p->pe_valid = B_TRUE;
752 
753 	return (pfd_p);
754 }
755 
756 /*
757  * This function appends a pf_data structure to the error q which is used later
758  * during PCIe fabric scan.  It signifies:
759  * o errs rcvd in RC, that may have been propagated to/from the fabric
760  * o the fabric scan code should scan the device path of fault bdf/addr
761  *
762  * scan_bdf: The bdf that caused the fault, which may have error bits set.
763  * scan_addr: The PIO addr that caused the fault, such as failed PIO, but not
764  *	       failed DMAs.
765  * s_status: Secondary Status equivalent to why the fault occured.
766  *	     (ie S-TA/MA, R-TA)
767  * Either the scan bdf or addr may be NULL, but not both.
768  */
769 pf_data_t *
770 px_rp_en_q(px_t *px_p, pcie_req_id_t scan_bdf, uint32_t scan_addr,
771     uint16_t s_status)
772 {
773 	pf_data_t	*pfd_p;
774 
775 	if (!PCIE_CHECK_VALID_BDF(scan_bdf) && !scan_addr)
776 		return (NULL);
777 
778 	pfd_p = px_get_pfd(px_p);
779 
780 	PCIE_ROOT_FAULT(pfd_p)->scan_bdf = scan_bdf;
781 	PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)scan_addr;
782 	PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status;
783 
784 	return (pfd_p);
785 }
786 
787 
788 /*
789  * Find and Mark CFG Handles as failed associated with the given BDF. We should
790  * always know the BDF for CFG accesses, since it is encoded in the address of
791  * the TLP.  Since there can be multiple cfg handles, mark them all as failed.
792  */
793 /* ARGSUSED */
794 int
795 px_err_cfg_hdl_check(dev_info_t *dip, const void *handle, const void *arg1,
796     const void *arg2)
797 {
798 	int			status = DDI_FM_FATAL;
799 	uint32_t		addr = *(uint32_t *)arg1;
800 	uint16_t		bdf = *(uint16_t *)arg2;
801 	pcie_bus_t		*bus_p;
802 
803 	DBG(DBG_ERR_INTR, dip, "Check CFG Hdl: dip 0x%p addr 0x%x bdf=0x%x\n",
804 	    dip, addr, bdf);
805 
806 	bus_p = PCIE_DIP2BUS(dip);
807 
808 	/*
809 	 * Because CFG and IO Acc Handlers are on the same cache list and both
810 	 * types of hdls gets called for both types of errors.  For this checker
811 	 * only mark the device as "Non-Fatal" if the addr == NULL and bdf !=
812 	 * NULL.
813 	 */
814 	status = (!addr && (PCIE_CHECK_VALID_BDF(bdf) &&
815 	    (bus_p->bus_bdf == bdf))) ? DDI_FM_NONFATAL : DDI_FM_FATAL;
816 
817 	return (status);
818 }
819 
820 /*
821  * Find and Mark all ACC Handles associated with a give address and BDF as
822  * failed.  If the BDF != NULL, then check to see if the device has a ACC Handle
823  * associated with ADDR.  If the handle is not found, mark all the handles as
824  * failed.  If the BDF == NULL, mark the handle as failed if it is associated
825  * with ADDR.
826  */
827 int
828 px_err_pio_hdl_check(dev_info_t *dip, const void *handle, const void *arg1,
829     const void *arg2)
830 {
831 	dev_info_t		*px_dip;
832 	px_t			*px_p;
833 	pci_ranges_t		*ranges_p;
834 	int			range_len;
835 	ddi_acc_handle_t	ap = (ddi_acc_handle_t)handle;
836 	ddi_acc_hdl_t		*hp = impl_acc_hdl_get(ap);
837 	int			i, status = DDI_FM_FATAL;
838 	uint64_t		fault_addr = *(uint64_t *)arg1;
839 	uint16_t		bdf = *(uint16_t *)arg2;
840 	uint64_t		base_addr, range_addr;
841 	uint_t			size;
842 
843 	/*
844 	 * Find the correct px dip.  On system with a real Root Port, it's the
845 	 * node above the root port.  On systems without a real Root Port the px
846 	 * dip is the bus_rp_dip.
847 	 */
848 	px_dip = PCIE_DIP2BUS(dip)->bus_rp_dip;
849 
850 	if (!PCIE_IS_RC(PCIE_DIP2BUS(px_dip)))
851 		px_dip = ddi_get_parent(px_dip);
852 
853 	ASSERT(PCIE_IS_RC(PCIE_DIP2BUS(px_dip)));
854 	px_p = INST_TO_STATE(ddi_get_instance(px_dip));
855 
856 	DBG(DBG_ERR_INTR, dip, "Check PIO Hdl: dip 0x%x addr 0x%x bdf=0x%x\n",
857 	    dip, fault_addr, bdf);
858 
859 	/* Normalize the base addr to the addr and strip off the HB info. */
860 	base_addr = (hp->ah_pfn << MMU_PAGESHIFT) + hp->ah_offset;
861 	range_len = px_p->px_ranges_length / sizeof (pci_ranges_t);
862 	i = 0;
863 	for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) {
864 		range_addr = px_in_addr_range(dip, ranges_p, base_addr);
865 		if (range_addr) {
866 			switch (ranges_p->child_high & PCI_ADDR_MASK) {
867 			case PCI_ADDR_IO:
868 			case PCI_ADDR_MEM64:
869 			case PCI_ADDR_MEM32:
870 				base_addr = base_addr - range_addr;
871 				break;
872 			}
873 			break;
874 		}
875 	}
876 
877 	/*
878 	 * Mark the handle as failed if the ADDR is mapped, or if we
879 	 * know the BDF and ADDR == 0.
880 	 */
881 	size = hp->ah_len;
882 	if (((fault_addr >= base_addr) && (fault_addr < (base_addr + size))) ||
883 	    ((fault_addr == NULL) && (PCIE_CHECK_VALID_BDF(bdf) &&
884 	    (bdf == PCIE_DIP2BUS(dip)->bus_bdf))))
885 		status = DDI_FM_NONFATAL;
886 
887 	return (status);
888 }
889 
890 /*
891  * Find and Mark all DNA Handles associated with a give address and BDF as
892  * failed.  If the BDF != NULL, then check to see if the device has a DMA Handle
893  * associated with ADDR.  If the handle is not found, mark all the handles as
894  * failed.  If the BDF == NULL, mark the handle as failed if it is associated
895  * with ADDR.
896  */
897 int
898 px_err_dma_hdl_check(dev_info_t *dip, const void *handle, const void *arg1,
899     const void *arg2)
900 {
901 	ddi_dma_impl_t		*pcie_dp;
902 	int			status = DDI_FM_FATAL;
903 	uint32_t		addr = *(uint32_t *)arg1;
904 	uint16_t		bdf = *(uint16_t *)arg2;
905 	uint32_t		base_addr;
906 	uint_t			size;
907 
908 	DBG(DBG_ERR_INTR, dip, "Check PIO Hdl: dip 0x%x addr 0x%x bdf=0x%x\n",
909 	    dip, addr, bdf);
910 
911 	pcie_dp = (ddi_dma_impl_t *)handle;
912 	base_addr = (uint32_t)pcie_dp->dmai_mapping;
913 	size = pcie_dp->dmai_size;
914 
915 	/*
916 	 * Mark the handle as failed if the ADDR is mapped, or if we
917 	 * know the BDF and ADDR == 0.
918 	 */
919 	if (((addr >= base_addr) && (addr < (base_addr + size))) ||
920 	    ((addr == NULL) && PCIE_CHECK_VALID_BDF(bdf)))
921 		status = DDI_FM_NONFATAL;
922 
923 	return (status);
924 }
925 
926 int
927 px_fm_enter(px_t *px_p) {
928 	if (px_panicing || (px_p->px_fm_mutex_owner == curthread))
929 		return (DDI_FAILURE);
930 
931 	mutex_enter(&px_p->px_fm_mutex);
932 	/*
933 	 * In rare cases when trap occurs and in the middle of scanning the
934 	 * fabric, a PIO will fail in the scan fabric.  The CPU error handling
935 	 * code will correctly panic the system, while a mondo for the failed
936 	 * PIO may also show up.  Normally the mondo will try to grab the mutex
937 	 * and wait until the callback finishes.  But in this rare case,
938 	 * mutex_enter actually suceeds also continues to scan the fabric.
939 	 *
940 	 * This code below is designed specifically to check for this case.  If
941 	 * we successfully grab the px_fm_mutex, the px_fm_mutex_owner better be
942 	 * NULL.  If it isn't that means we are in the rare corner case.  Return
943 	 * DDI_FAILURE, this should prevent PX from doing anymore error
944 	 * handling.
945 	 */
946 	if (px_p->px_fm_mutex_owner) {
947 		return (DDI_FAILURE);
948 	}
949 
950 	px_p->px_fm_mutex_owner = curthread;
951 
952 	if (px_panicing) {
953 		px_fm_exit(px_p);
954 		return (DDI_FAILURE);
955 	}
956 
957 	/* Signal the PCIe error handling module error handling is starting */
958 	pf_eh_enter(PCIE_DIP2BUS(px_p->px_dip));
959 
960 	return (DDI_SUCCESS);
961 }
962 
963 static void
964 px_guest_panic(px_t *px_p)
965 {
966 	pf_data_t *root_pfd_p = PCIE_DIP2PFD(px_p->px_dip);
967 	pf_data_t *pfd_p;
968 	pcie_bus_t *bus_p, *root_bus_p;
969 	pcie_req_id_list_t *rl;
970 
971 	/*
972 	 * check if all devices under the root device are unassigned.
973 	 * this function should quickly return in non-IOV environment.
974 	 */
975 	root_bus_p = PCIE_PFD2BUS(root_pfd_p);
976 	if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
977 		return;
978 
979 	for (pfd_p = root_pfd_p; pfd_p; pfd_p = pfd_p->pe_next) {
980 		bus_p = PCIE_PFD2BUS(pfd_p);
981 
982 		/* assume all affected devs were in the error Q */
983 		if (!PCIE_BUS2DOM(bus_p)->nfma_panic)
984 			continue;
985 
986 		if (PCIE_IS_BDG(bus_p)) {
987 			rl = PCIE_BDF_LIST_GET(bus_p);
988 			while (rl) {
989 				px_panic_domain(px_p, rl->bdf);
990 				rl = rl->next;
991 			}
992 		} else {
993 			px_panic_domain(px_p, bus_p->bus_bdf);
994 		}
995 		/* clear panic flag */
996 		PCIE_BUS2DOM(bus_p)->nfma_panic = B_FALSE;
997 	}
998 }
999 
1000 void
1001 px_fm_exit(px_t *px_p) {
1002 	px_p->px_fm_mutex_owner = NULL;
1003 	if (px_p->px_pfd_idx == 0) {
1004 		mutex_exit(&px_p->px_fm_mutex);
1005 		return;
1006 	}
1007 	/* panic the affected domains that are non-fma-capable */
1008 	px_guest_panic(px_p);
1009 	/* Signal the PCIe error handling module error handling is ending */
1010 	pf_eh_exit(PCIE_DIP2BUS(px_p->px_dip));
1011 	px_p->px_pfd_idx = 0;
1012 	mutex_exit(&px_p->px_fm_mutex);
1013 }
1014 
1015 /*
1016  * Panic if the err tunable is set and that we are not already in the middle
1017  * of panic'ing.
1018  *
1019  * rc_err = Error severity of PX specific errors
1020  * msg = Where the error was detected
1021  * fabric_err = Error severity of PCIe Fabric errors
1022  * isTest = Test if error severity causes panic
1023  */
1024 #define	MSZ (sizeof (fm_msg) -strlen(fm_msg) - 1)
1025 void
1026 px_err_panic(int rc_err, int msg, int fabric_err, boolean_t isTest)
1027 {
1028 	char fm_msg[96] = "";
1029 	int ferr = PX_NO_ERROR;
1030 
1031 	if (panicstr) {
1032 		px_panicing = B_TRUE;
1033 		return;
1034 	}
1035 
1036 	if (!(rc_err & px_die))
1037 		goto fabric;
1038 	if (msg & PX_RC)
1039 		(void) strncat(fm_msg, px_panic_rc_msg, MSZ);
1040 	if (msg & PX_RP)
1041 		(void) strncat(fm_msg, px_panic_rp_msg, MSZ);
1042 	if (msg & PX_HB)
1043 		(void) strncat(fm_msg, px_panic_hb_msg, MSZ);
1044 
1045 fabric:
1046 	if (fabric_err & PF_ERR_FATAL_FLAGS)
1047 		ferr = PX_PANIC;
1048 	else if (fabric_err & ~(PF_ERR_FATAL_FLAGS | PF_ERR_NO_ERROR))
1049 		ferr = PX_NO_PANIC;
1050 
1051 	if (ferr & px_die) {
1052 		if (strlen(fm_msg)) {
1053 			(void) strncat(fm_msg, " and", MSZ);
1054 		}
1055 		(void) strncat(fm_msg, px_panic_fab_msg, MSZ);
1056 	}
1057 
1058 	if (strlen(fm_msg)) {
1059 		px_panicing = B_TRUE;
1060 		if (!isTest)
1061 			fm_panic("Fatal error has occured in:%s.(0x%x)(0x%x)",
1062 			    fm_msg, rc_err, fabric_err);
1063 	}
1064 }
1065