xref: /titanic_51/usr/src/uts/sun4/io/px/px_fm.c (revision 83fcdc8cfa9b16b358b13c5dd920d71bbaf4a8b5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PX Fault Management Architecture
31  */
32 #include <sys/types.h>
33 #include <sys/sunndi.h>
34 #include <sys/sunddi.h>
35 #include <sys/fm/protocol.h>
36 #include <sys/fm/util.h>
37 #include <sys/membar.h>
38 #include "px_obj.h"
39 
40 typedef struct px_fabric_cfgspace {
41 	/* Error information */
42 	msgcode_t	msg_code;
43 	pcie_req_id_t	rid;
44 
45 	/* Config space header and device type */
46 	uint8_t		hdr_type;
47 	uint16_t	dev_type;
48 
49 	/* Register pointers */
50 	uint16_t	cap_off;
51 	uint16_t	aer_off;
52 
53 	/* PCI register values */
54 	uint32_t	sts_reg;
55 	uint32_t	sts_sreg;
56 
57 	/* PCIE register values */
58 	uint32_t	dev_sts_reg;
59 	uint32_t	aer_ce_reg;
60 	uint32_t	aer_ue_reg;
61 	uint32_t	aer_sev_reg;
62 	uint32_t	aer_ue_sreg;
63 	uint32_t	aer_sev_sreg;
64 
65 	/* PCIE Header Log Registers */
66 	uint32_t	aer_h1;
67 	uint32_t	aer_h2;
68 	uint32_t	aer_h3;
69 	uint32_t	aer_h4;
70 	uint32_t	aer_sh1;
71 	uint32_t	aer_sh2;
72 	uint32_t	aer_sh3;
73 	uint32_t	aer_sh4;
74 } px_fabric_cfgspace_t;
75 
76 static uint16_t px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid);
77 static uint16_t px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid);
78 static int px_fabric_handle_psts(px_fabric_cfgspace_t *cs);
79 static int px_fabric_handle_ssts(px_fabric_cfgspace_t *cs);
80 static int px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs);
81 static int px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs);
82 static int px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs);
83 static void px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs);
84 static uint_t px_fabric_check(px_t *px_p, msgcode_t msg_code,
85     pcie_req_id_t rid, ddi_fm_error_t *derr);
86 
87 /*
88  * Initialize px FMA support
89  */
90 int
91 px_fm_attach(px_t *px_p)
92 {
93 	px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
94 		DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
95 
96 	/*
97 	 * check parents' capability
98 	 */
99 	ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc);
100 
101 	/*
102 	 * parents need to be ereport and error handling capable
103 	 */
104 	ASSERT(px_p->px_fm_cap &&
105 	    (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE));
106 
107 	/*
108 	 * register error callback in parent
109 	 */
110 	ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p);
111 
112 	return (DDI_SUCCESS);
113 }
114 
115 /*
116  * Deregister FMA
117  */
118 void
119 px_fm_detach(px_t *px_p)
120 {
121 	ddi_fm_handler_unregister(px_p->px_dip);
122 	ddi_fm_fini(px_p->px_dip);
123 }
124 
125 /*
126  * Function used to setup access functions depending on level of desired
127  * protection.
128  */
129 void
130 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip)
131 {
132 	uchar_t fflag;
133 	ddi_acc_hdl_t *hp;
134 	ddi_acc_impl_t *ap;
135 
136 	hp = mp->map_handlep;
137 	ap = (ddi_acc_impl_t *)hp->ah_platform_private;
138 	fflag = ap->ahi_common.ah_acc.devacc_attr_access;
139 
140 	if (mp->map_op == DDI_MO_MAP_LOCKED) {
141 		ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL);
142 		switch (fflag) {
143 		case DDI_FLAGERR_ACC:
144 			ap->ahi_get8 = i_ddi_prot_get8;
145 			ap->ahi_get16 = i_ddi_prot_get16;
146 			ap->ahi_get32 = i_ddi_prot_get32;
147 			ap->ahi_get64 = i_ddi_prot_get64;
148 			ap->ahi_put8 = i_ddi_prot_put8;
149 			ap->ahi_put16 = i_ddi_prot_put16;
150 			ap->ahi_put32 = i_ddi_prot_put32;
151 			ap->ahi_put64 = i_ddi_prot_put64;
152 			ap->ahi_rep_get8 = i_ddi_prot_rep_get8;
153 			ap->ahi_rep_get16 = i_ddi_prot_rep_get16;
154 			ap->ahi_rep_get32 = i_ddi_prot_rep_get32;
155 			ap->ahi_rep_get64 = i_ddi_prot_rep_get64;
156 			ap->ahi_rep_put8 = i_ddi_prot_rep_put8;
157 			ap->ahi_rep_put16 = i_ddi_prot_rep_put16;
158 			ap->ahi_rep_put32 = i_ddi_prot_rep_put32;
159 			ap->ahi_rep_put64 = i_ddi_prot_rep_put64;
160 			break;
161 		case DDI_CAUTIOUS_ACC :
162 			ap->ahi_get8 = i_ddi_caut_get8;
163 			ap->ahi_get16 = i_ddi_caut_get16;
164 			ap->ahi_get32 = i_ddi_caut_get32;
165 			ap->ahi_get64 = i_ddi_caut_get64;
166 			ap->ahi_put8 = i_ddi_caut_put8;
167 			ap->ahi_put16 = i_ddi_caut_put16;
168 			ap->ahi_put32 = i_ddi_caut_put32;
169 			ap->ahi_put64 = i_ddi_caut_put64;
170 			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
171 			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
172 			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
173 			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
174 			ap->ahi_rep_put8 = i_ddi_caut_rep_put8;
175 			ap->ahi_rep_put16 = i_ddi_caut_rep_put16;
176 			ap->ahi_rep_put32 = i_ddi_caut_rep_put32;
177 			ap->ahi_rep_put64 = i_ddi_caut_rep_put64;
178 			break;
179 		default:
180 			break;
181 		}
182 	} else if (mp->map_op == DDI_MO_UNMAP) {
183 		ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
184 	}
185 }
186 
187 /*
188  * Function called after a dma fault occurred to find out whether the
189  * fault address is associated with a driver that is able to handle faults
190  * and recover from faults. The driver has to set DDI_DMA_FLAGERR and
191  * cache dma handles in order to make this checking effective to help
192  * recovery from dma faults.
193  */
194 /* ARGSUSED */
195 static int
196 px_dma_check(dev_info_t *dip, const void *handle, const void *comp_addr,
197     const void *not_used)
198 {
199 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle;
200 	pfn_t fault_pfn = mmu_btop(*(uint64_t *)comp_addr);
201 	pfn_t comp_pfn;
202 	int page;
203 
204 	/*
205 	 * Assertion failure if DDI_FM_DMACHK_CAPABLE capability has not
206 	 * been effectively initialized during attach.
207 	 */
208 	ASSERT(mp);
209 
210 	for (page = 0; page < mp->dmai_ndvmapages; page++) {
211 		comp_pfn = PX_GET_MP_PFN(mp, page);
212 		if (fault_pfn == comp_pfn)
213 			return (DDI_FM_NONFATAL);
214 	}
215 
216 	return (DDI_FM_UNKNOWN);
217 }
218 
219 /*
220  * Function used to check if a given access handle owns the failing address.
221  * Called by ndi_fmc_error, when we detect a PIO error.
222  */
223 /* ARGSUSED */
224 static int
225 px_acc_check(dev_info_t *dip, const void *handle, const void *comp_addr,
226     const void *not_used)
227 {
228 	pfn_t pfn, fault_pfn;
229 	ddi_acc_hdl_t *hp = impl_acc_hdl_get((ddi_acc_handle_t)handle);
230 
231 	/*
232 	 * Assertion failure if DDI_FM_ACCCHK_CAPABLE capability has not
233 	 * been effectively initialized during attach.
234 	 */
235 	ASSERT(hp);
236 
237 	pfn = hp->ah_pfn;
238 	fault_pfn = mmu_btop(*(uint64_t *)comp_addr);
239 	if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum))
240 		return (DDI_FM_NONFATAL);
241 
242 	return (DDI_FM_UNKNOWN);
243 }
244 
245 /*
246  * Function used by PCI error handlers to check if captured address is stored
247  * in the DMA or ACC handle caches.
248  */
249 int
250 px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar)
251 {
252 	uint32_t cap = ((px_t *)DIP_TO_STATE(dip))->px_fm_cap;
253 	int	ret = DDI_FM_FATAL;
254 
255 	int (*f)() = type == DMA_HANDLE ?
256 	    (DDI_FM_DMA_ERR_CAP(cap) ? px_dma_check : NULL) :
257 	    (DDI_FM_ACC_ERR_CAP(cap) ? px_acc_check : NULL);
258 
259 	if (f)
260 		ret = ndi_fmc_error(dip, NULL, type, f, fme_ena, afar);
261 
262 	return (ret == DDI_FM_UNKNOWN ? DDI_FM_FATAL : ret);
263 }
264 
265 /*
266  * Function used to initialize FMA for our children nodes. Called
267  * through pci busops when child node calls ddi_fm_init.
268  */
269 /*ARGSUSED*/
270 int
271 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
272     ddi_iblock_cookie_t *ibc_p)
273 {
274 	px_t *px_p = DIP_TO_STATE(dip);
275 
276 	ASSERT(ibc_p != NULL);
277 	*ibc_p = px_p->px_fm_ibc;
278 
279 	return (px_p->px_fm_cap);
280 }
281 
282 /*
283  * lock access for exclusive PCIe access
284  */
285 void
286 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle)
287 {
288 	px_pec_t	*pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p;
289 
290 	/*
291 	 * Exclusive access has been used for cautious put/get,
292 	 * Both utilize i_ddi_ontrap which, on sparcv9, implements
293 	 * similar protection as what on_trap() does, and which calls
294 	 * membar  #Sync to flush out all cpu deferred errors
295 	 * prior to get/put operation, so here we're not calling
296 	 * membar  #Sync - a difference from what's in pci_bus_enter().
297 	 */
298 	mutex_enter(&pec_p->pec_pokefault_mutex);
299 	pec_p->pec_acc_hdl = handle;
300 }
301 
302 /*
303  * unlock access for exclusive PCIe access
304  */
305 /* ARGSUSED */
306 void
307 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle)
308 {
309 	px_t		*px_p = DIP_TO_STATE(dip);
310 	px_pec_t	*pec_p = px_p->px_pec_p;
311 
312 	pec_p->pec_acc_hdl = NULL;
313 	mutex_exit(&pec_p->pec_pokefault_mutex);
314 }
315 
316 
317 /*
318  * PCI error callback which is registered with our parent to call
319  * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors
320  * and PCI BERR/TO/UE
321  *
322  * Dispatch on all known leaves of this fire device because we cannot tell
323  * which side the error came from.
324  */
325 /*ARGSUSED*/
326 int
327 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data)
328 {
329 	px_t	*px_p = (px_t *)impl_data;
330 	px_cb_t	*cb_p = px_p->px_cb_p;
331 	int	err = PX_OK;
332 	int	fatal = 0;
333 	int	nonfatal = 0;
334 	int	unknown = 0;
335 	int	ret = DDI_FM_OK;
336 	int	i;
337 
338 	mutex_enter(&cb_p->xbc_fm_mutex);
339 
340 	for (i = 0; i < PX_CB_MAX_LEAF; i++) {
341 		px_p = cb_p->xbc_px_list[i];
342 		if (px_p != NULL)
343 			err |= px_err_handle(px_p, derr, PX_TRAP_CALL,
344 			    (i == 0));
345 	}
346 
347 	for (i = 0; i < PX_CB_MAX_LEAF; i++) {
348 		px_p = cb_p->xbc_px_list[i];
349 		if (px_p != NULL) {
350 			ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr);
351 			switch (ret) {
352 			case DDI_FM_FATAL:
353 				fatal++;
354 				break;
355 			case DDI_FM_NONFATAL:
356 				nonfatal++;
357 				break;
358 			case DDI_FM_UNKNOWN:
359 				unknown++;
360 				break;
361 			default:
362 				break;
363 			}
364 		}
365 	}
366 	mutex_exit(&cb_p->xbc_fm_mutex);
367 
368 	ret = (fatal != 0) ? DDI_FM_FATAL :
369 	    ((nonfatal != 0) ? DDI_FM_NONFATAL :
370 	    (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK)));
371 
372 	/* fire fatal error overrides device error */
373 	if (err & (PX_FATAL_GOS | PX_FATAL_SW))
374 		ret = DDI_FM_FATAL;
375 	/* if fire encounts no error, then take whatever device error */
376 	else if ((err != PX_OK) && (ret != DDI_FM_FATAL))
377 		ret = DDI_FM_NONFATAL;
378 
379 	return (ret);
380 }
381 
382 static uint16_t
383 px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid)
384 {
385 	uint32_t	hdr, hdr_next_ptr, hdr_cap_id;
386 	uint16_t	offset = PCIE_EXT_CAP;
387 	int		deadcount = 0;
388 
389 	/* Find the Advanced Error Register */
390 	hdr = px_fab_get(px_p, rid, offset);
391 	hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) &
392 	    PCIE_EXT_CAP_NEXT_PTR_MASK;
393 	hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) &
394 	    PCIE_EXT_CAP_ID_MASK;
395 
396 	while ((hdr_next_ptr != PCIE_EXT_CAP_NEXT_PTR_NULL) &&
397 	    (hdr_cap_id != PCIE_EXT_CAP_ID_AER)) {
398 		offset = hdr_next_ptr;
399 		hdr = px_fab_get(px_p, rid, offset);
400 		hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) &
401 		    PCIE_EXT_CAP_NEXT_PTR_MASK;
402 		hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) &
403 		    PCIE_EXT_CAP_ID_MASK;
404 
405 		if (deadcount++ > 100)
406 			break;
407 	}
408 
409 	if (hdr_cap_id == PCIE_EXT_CAP_ID_AER)
410 		return (offset);
411 
412 	return (0);
413 }
414 
415 static uint16_t
416 px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid)
417 {
418 	uint32_t	hdr, hdr_next_ptr, hdr_cap_id;
419 	uint16_t	offset = PCI_CONF_STAT;
420 	int		deadcount = 0;
421 
422 	hdr = px_fab_get(px_p, rid, PCI_CONF_COMM) >> 16;
423 	if (!(hdr & PCI_STAT_CAP)) {
424 		/* This is not a PCIE device */
425 		return (0);
426 	}
427 
428 	hdr = px_fab_get(px_p, rid, PCI_CONF_CAP_PTR);
429 	hdr_next_ptr = hdr & 0xFF;
430 	hdr_cap_id = 0;
431 
432 	while ((hdr_next_ptr != PCI_CAP_NEXT_PTR_NULL) &&
433 	    (hdr_cap_id != PCI_CAP_ID_PCI_E)) {
434 		offset = hdr_next_ptr;
435 
436 		if (hdr_next_ptr < 0x40) {
437 			break;
438 		}
439 
440 		hdr = px_fab_get(px_p, rid, hdr_next_ptr);
441 		hdr_next_ptr = (hdr >> 8) & 0xFF;
442 		hdr_cap_id = hdr & 0xFF;
443 
444 		if (deadcount++ > 100)
445 			break;
446 	}
447 
448 	if (hdr_cap_id == PCI_CAP_ID_PCI_E)
449 		return (offset);
450 
451 	return (0);
452 }
453 
454 /*
455  * This function checks the primary status registers.
456  * Take the PCI status register and translate it to PCIe equivalent.
457  */
458 static int
459 px_fabric_handle_psts(px_fabric_cfgspace_t *cs) {
460 	uint16_t	sts_reg = cs->sts_reg >> 16;
461 	uint16_t	pci_status;
462 	uint32_t	pcie_status;
463 	int		ret = PX_NONFATAL;
464 
465 	/* Parity Err == Send/Recv Poisoned TLP */
466 	pci_status = PCI_STAT_S_PERROR | PCI_STAT_PERROR;
467 	pcie_status = PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC;
468 	if (sts_reg & pci_status)
469 		ret |= PX_FABRIC_ERR_SEV(pcie_status,
470 		    px_fabric_die_ue, px_fabric_die_ue_gos);
471 
472 	/* Target Abort == Completer Abort */
473 	pci_status = PCI_STAT_S_TARG_AB | PCI_STAT_R_TARG_AB;
474 	pcie_status = PCIE_AER_UCE_CA;
475 	if (sts_reg & pci_status)
476 		ret |= PX_FABRIC_ERR_SEV(pcie_status,
477 		    px_fabric_die_ue, px_fabric_die_ue_gos);
478 
479 	/* Master Abort == Unsupport Request */
480 	pci_status = PCI_STAT_R_MAST_AB;
481 	pcie_status = PCIE_AER_UCE_UR;
482 	if (sts_reg & pci_status)
483 		ret |= PX_FABRIC_ERR_SEV(pcie_status,
484 		    px_fabric_die_ue, px_fabric_die_ue_gos);
485 
486 	/* System Error == Uncorrectable Error */
487 	pci_status = PCI_STAT_S_SYSERR;
488 	pcie_status = -1;
489 	if (sts_reg & pci_status)
490 		ret |= PX_FABRIC_ERR_SEV(pcie_status,
491 		    px_fabric_die_ue, px_fabric_die_ue_gos);
492 
493 	return (ret);
494 }
495 
496 /*
497  * This function checks the secondary status registers.
498  * Switches and Bridges have a different behavior.
499  */
500 static int
501 px_fabric_handle_ssts(px_fabric_cfgspace_t *cs) {
502 	uint16_t	sts_reg = cs->sts_sreg >> 16;
503 	int		ret = PX_NONFATAL;
504 
505 	if (cs->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) {
506 		/*
507 		 * This is a PCIE-PCI bridge, but only check the severity
508 		 * if this device doesn't support AERs.
509 		 */
510 		if (!cs->aer_off)
511 			ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_bdg_sts,
512 			    px_fabric_die_bdg_sts_gos);
513 	} else {
514 		/* This is most likely a PCIE switch */
515 		ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_sw_sts,
516 		    px_fabric_die_sw_sts_gos);
517 	}
518 
519 	return (ret);
520 }
521 
522 /*
523  * This function checks and clears the primary AER.
524  */
525 static int
526 px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs) {
527 	uint32_t	chk_reg, chk_reg_gos, off_reg, reg;
528 	int		ret = PX_NONFATAL;
529 
530 	/* Determine severity and clear the AER */
531 	switch (cs->msg_code) {
532 	case PCIE_MSG_CODE_ERR_COR:
533 		off_reg = PCIE_AER_CE_STS;
534 		chk_reg = px_fabric_die_ce;
535 		chk_reg_gos = px_fabric_die_ce_gos;
536 		reg = cs->aer_ce_reg;
537 		break;
538 	case PCIE_MSG_CODE_ERR_NONFATAL:
539 		off_reg = PCIE_AER_UCE_STS;
540 		chk_reg = px_fabric_die_ue;
541 		chk_reg_gos = px_fabric_die_ue_gos;
542 		reg = cs->aer_ue_reg & ~(cs->aer_sev_reg);
543 		break;
544 	case PCIE_MSG_CODE_ERR_FATAL:
545 		off_reg = PCIE_AER_UCE_STS;
546 		chk_reg = px_fabric_die_ue;
547 		chk_reg_gos = px_fabric_die_ue_gos;
548 		reg = cs->aer_ue_reg & cs->aer_sev_reg;
549 		break;
550 	default:
551 		/* Major error force a panic */
552 		return (PX_FATAL_GOS);
553 	}
554 	px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg);
555 	ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos);
556 
557 	return (ret);
558 }
559 
560 /*
561  * This function checks and clears the secondary AER.
562  */
563 static int
564 px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs) {
565 	uint32_t	chk_reg, chk_reg_gos, off_reg, reg;
566 	uint32_t	sev;
567 	int		ret = PX_NONFATAL;
568 
569 	/* Determine severity and clear the AER */
570 	switch (cs->msg_code) {
571 	case PCIE_MSG_CODE_ERR_COR:
572 		/* Ignore Correctable Errors */
573 		sev = 0;
574 		break;
575 	case PCIE_MSG_CODE_ERR_NONFATAL:
576 		sev = ~(cs->aer_sev_sreg);
577 		break;
578 	case PCIE_MSG_CODE_ERR_FATAL:
579 		sev = cs->aer_sev_sreg;
580 		break;
581 	default:
582 		/* Major error force a panic */
583 		return (DDI_FM_FATAL);
584 	}
585 	off_reg = PCIE_AER_SUCE_STS;
586 	chk_reg = px_fabric_die_sue;
587 	chk_reg_gos = px_fabric_die_sue_gos;
588 	reg = cs->aer_ue_sreg & sev;
589 	px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg);
590 	ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos);
591 
592 	return (ret);
593 }
594 
595 static int
596 px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs)
597 {
598 	pcie_req_id_t	rid = cs->rid;
599 	uint16_t	cap_off = cs->cap_off;
600 	uint16_t	aer_off = cs->aer_off;
601 	uint8_t		hdr_type = cs->hdr_type;
602 	uint16_t	dev_type = cs->dev_type;
603 	int		ret = PX_NONFATAL;
604 
605 	if (hdr_type == PCI_HEADER_PPB) {
606 		ret |= px_fabric_handle_ssts(cs);
607 	}
608 
609 	if (!aer_off) {
610 		ret |= px_fabric_handle_psts(cs);
611 	}
612 
613 	if (aer_off) {
614 		ret |= px_fabric_handle_paer(px_p, cs);
615 	}
616 
617 	if (aer_off && (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)) {
618 		ret |= px_fabric_handle_saer(px_p, cs);
619 	}
620 
621 	/* Clear the standard PCIe error registers */
622 	px_fab_set(px_p, rid, cap_off + PCIE_DEVCTL, cs->dev_sts_reg);
623 
624 	/* Clear the legacy error registers */
625 	px_fab_set(px_p, rid, PCI_CONF_COMM, cs->sts_reg);
626 
627 	/* Clear the legacy secondary error registers */
628 	if (hdr_type == PCI_HEADER_PPB) {
629 		px_fab_set(px_p, rid, PCI_BCNF_IO_BASE_LOW,
630 		    cs->sts_sreg);
631 	}
632 
633 	return (ret);
634 }
635 
636 static void
637 px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs)
638 {
639 	uint16_t	cap_off, aer_off;
640 	pcie_req_id_t	rid = cs->rid;
641 
642 	/* Gather Basic Device Information */
643 	cs->hdr_type = (px_fab_get(px_p, rid,
644 			    PCI_CONF_CACHE_LINESZ) >> 16) & 0xFF;
645 
646 	cs->cap_off = px_fabric_get_pciecap(px_p, rid);
647 	cap_off = cs->cap_off;
648 	if (!cap_off)
649 		return;
650 
651 	cs->aer_off = px_fabric_get_aer(px_p, rid);
652 	aer_off = cs->aer_off;
653 
654 	cs->dev_type = px_fab_get(px_p, rid, cap_off) >> 16;
655 	cs->dev_type &= PCIE_PCIECAP_DEV_TYPE_MASK;
656 
657 	/* Get the Primary Sts Reg */
658 	cs->sts_reg = px_fab_get(px_p, rid, PCI_CONF_COMM);
659 
660 	/* If it is a bridge/switch get the Secondary Sts Reg */
661 	if (cs->hdr_type == PCI_HEADER_PPB)
662 		cs->sts_sreg = px_fab_get(px_p, rid,
663 		    PCI_BCNF_IO_BASE_LOW);
664 
665 	/* Get the PCIe Dev Sts Reg */
666 	cs->dev_sts_reg = px_fab_get(px_p, rid,
667 	    cap_off + PCIE_DEVCTL);
668 
669 	if (!aer_off)
670 		return;
671 
672 	/* Get the AER register information */
673 	cs->aer_ce_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_CE_STS);
674 	cs->aer_ue_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_STS);
675 	cs->aer_sev_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_SERV);
676 	cs->aer_h1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x0);
677 	cs->aer_h2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x4);
678 	cs->aer_h3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x8);
679 	cs->aer_h4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0xC);
680 
681 	if (cs->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)
682 		return;
683 
684 	/* If this is a bridge check secondary aer */
685 	cs->aer_ue_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_STS);
686 	cs->aer_sev_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_SERV);
687 	cs->aer_sh1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x0);
688 	cs->aer_sh2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x4);
689 	cs->aer_sh3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x8);
690 	cs->aer_sh4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0xC);
691 }
692 
693 /*
694  * If a fabric intr occurs, query and clear the error registers on that device.
695  * Based on the error found return DDI_FM_OK or DDI_FM_FATAL.
696  */
697 static uint_t
698 px_fabric_check(px_t *px_p, msgcode_t msg_code,
699     pcie_req_id_t rid, ddi_fm_error_t *derr)
700 {
701 	dev_info_t	*dip = px_p->px_dip;
702 	char		buf[FM_MAX_CLASS];
703 	px_fabric_cfgspace_t cs;
704 	int		ret;
705 
706 	/* clear cs */
707 	bzero(&cs, sizeof (px_fabric_cfgspace_t));
708 
709 	cs.msg_code = msg_code;
710 	cs.rid = rid;
711 
712 	px_fabric_fill_cs(px_p, &cs);
713 	if (cs.cap_off)
714 		ret = px_fabric_handle(px_p, &cs);
715 	else
716 		ret = PX_FATAL_GOS;
717 
718 	(void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_FABRIC_CLASS);
719 	ddi_fm_ereport_post(dip, buf, derr->fme_ena,
720 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
721 	    PX_FM_FABRIC_MSG_CODE, DATA_TYPE_UINT8, msg_code,
722 	    PX_FM_FABRIC_REQ_ID, DATA_TYPE_UINT16, rid,
723 	    "cap_off", DATA_TYPE_UINT16, cs.cap_off,
724 	    "aer_off", DATA_TYPE_UINT16, cs.aer_off,
725 	    "sts_reg", DATA_TYPE_UINT16, cs.sts_reg >> 16,
726 	    "sts_sreg", DATA_TYPE_UINT16, cs.sts_sreg >> 16,
727 	    "dev_sts_reg", DATA_TYPE_UINT16, cs.dev_sts_reg >> 16,
728 	    "aer_ce", DATA_TYPE_UINT32, cs.aer_ce_reg,
729 	    "aer_ue", DATA_TYPE_UINT32, cs.aer_ue_reg,
730 	    "aer_sev", DATA_TYPE_UINT32, cs.aer_sev_reg,
731 	    "aer_h1", DATA_TYPE_UINT32, cs.aer_h1,
732 	    "aer_h2", DATA_TYPE_UINT32, cs.aer_h2,
733 	    "aer_h3", DATA_TYPE_UINT32, cs.aer_h3,
734 	    "aer_h4", DATA_TYPE_UINT32, cs.aer_h4,
735 	    "saer_ue", DATA_TYPE_UINT32, cs.aer_ue_sreg,
736 	    "saer_sev", DATA_TYPE_UINT32, cs.aer_sev_sreg,
737 	    "saer_h1", DATA_TYPE_UINT32, cs.aer_sh1,
738 	    "saer_h2", DATA_TYPE_UINT32, cs.aer_sh2,
739 	    "saer_h3", DATA_TYPE_UINT32, cs.aer_sh3,
740 	    "saer_h4", DATA_TYPE_UINT32, cs.aer_sh4,
741 	    "severity", DATA_TYPE_UINT32, ret,
742 	    NULL);
743 
744 	/* Check for protected access */
745 	switch (derr->fme_flag) {
746 	case DDI_FM_ERR_EXPECTED:
747 	case DDI_FM_ERR_PEEK:
748 	case DDI_FM_ERR_POKE:
749 		ret &= PX_FATAL_GOS;
750 		break;
751 	}
752 
753 
754 	if (px_fabric_die &&
755 	    (ret & (PX_FATAL_GOS | PX_FATAL_SW)))
756 			ret = DDI_FM_FATAL;
757 
758 	return (ret);
759 }
760 
761 /*
762  * px_err_fabric_intr:
763  * Interrupt handler for PCIE fabric block.
764  * o lock
765  * o create derr
766  * o px_err_handle(leaf, with jbc)
767  * o send ereport(fire fmri, derr, payload = BDF)
768  * o dispatch (leaf)
769  * o unlock
770  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
771  */
772 /* ARGSUSED */
773 uint_t
774 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code,
775     pcie_req_id_t rid)
776 {
777 	dev_info_t	*rpdip = px_p->px_dip;
778 	px_cb_t		*cb_p = px_p->px_cb_p;
779 	int		err = PX_OK, ret = DDI_FM_OK, fab_err = DDI_FM_OK;
780 	ddi_fm_error_t	derr;
781 
782 	mutex_enter(&cb_p->xbc_fm_mutex);
783 
784 	/* Create the derr */
785 	bzero(&derr, sizeof (ddi_fm_error_t));
786 	derr.fme_version = DDI_FME_VERSION;
787 	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
788 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
789 
790 	/* send ereport/handle/clear fire registers */
791 	err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE);
792 
793 	/* Check and clear the fabric error */
794 	fab_err = px_fabric_check(px_p, msg_code, rid, &derr);
795 
796 	/* Check all child devices for errors */
797 	ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr);
798 
799 	mutex_exit(&cb_p->xbc_fm_mutex);
800 
801 	/*
802 	 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset,
803 	 * therefore it does not cause panic.
804 	 */
805 	if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) ||
806 	    (ret == DDI_FM_FATAL) || (fab_err == DDI_FM_FATAL))
807 		PX_FM_PANIC("Fatal PCIe Fabric Error has occurred\n");
808 
809 	return (DDI_INTR_CLAIMED);
810 }
811 
812 /*
813  * px_err_safeacc_check:
814  * Check to see if a peek/poke and cautious access is currently being
815  * done on a particular leaf.
816  *
817  * Safe access reads induced fire errors will be handled by cpu trap handler
818  * which will call px_fm_callback() which calls this function. In that
819  * case, the derr fields will be set by trap handler with the correct values.
820  *
821  * Safe access writes induced errors will be handled by px interrupt
822  * handlers, this function will fill in the derr fields.
823  *
824  * If a cpu trap does occur, it will quiesce all other interrupts allowing
825  * the cpu trap error handling to finish before Fire receives an interrupt.
826  *
827  * If fire does indeed have an error when a cpu trap occurs as a result of
828  * a safe access, a trap followed by a Mondo/Fabric interrupt will occur.
829  * In which case derr will be initialized as "UNEXPECTED" by the interrupt
830  * handler and this function will need to find if this error occured in the
831  * middle of a safe access operation.
832  *
833  * @param px_p		leaf in which to check access
834  * @param derr		fm err data structure to be updated
835  */
836 void
837 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr)
838 {
839 	px_pec_t 	*pec_p = px_p->px_pec_p;
840 	px_cb_t		*cb_p = px_p->px_cb_p;
841 	int		acctype = pec_p->pec_safeacc_type;
842 
843 	ASSERT(MUTEX_HELD(&cb_p->xbc_fm_mutex));
844 
845 	if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
846 		return;
847 	}
848 
849 	/* safe access checking */
850 	switch (acctype) {
851 	case DDI_FM_ERR_EXPECTED:
852 		/*
853 		 * cautious access protection, protected from all err.
854 		 */
855 		ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex));
856 		ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr,
857 		    DDI_FME_VERSION);
858 		derr->fme_flag = acctype;
859 		derr->fme_acc_handle = pec_p->pec_acc_hdl;
860 		break;
861 	case DDI_FM_ERR_POKE:
862 		/*
863 		 * ddi_poke protection, check nexus and children for
864 		 * expected errors.
865 		 */
866 		ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex));
867 		membar_sync();
868 		derr->fme_flag = acctype;
869 		break;
870 	case DDI_FM_ERR_PEEK:
871 		derr->fme_flag = acctype;
872 		break;
873 	}
874 }
875