xref: /illumos-gate/usr/src/uts/sun4/io/px/px_fm.c (revision 54d82594cac34899a52710db0b8235a171e83e31)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PX Fault Management Architecture
31  */
32 #include <sys/types.h>
33 #include <sys/sunndi.h>
34 #include <sys/sunddi.h>
35 #include <sys/fm/protocol.h>
36 #include <sys/fm/util.h>
37 #include <sys/membar.h>
38 #include "px_obj.h"
39 
40 typedef struct px_fabric_cfgspace {
41 	/* Error information */
42 	msgcode_t	msg_code;
43 	pcie_req_id_t	rid;
44 
45 	/* Config space header and device type */
46 	uint8_t		hdr_type;
47 	uint16_t	dev_type;
48 
49 	/* Register pointers */
50 	uint16_t	cap_off;
51 	uint16_t	aer_off;
52 
53 	/* PCI register values */
54 	uint32_t	sts_reg;
55 	uint32_t	sts_sreg;
56 
57 	/* PCIE register values */
58 	uint32_t	dev_sts_reg;
59 	uint32_t	aer_ce_reg;
60 	uint32_t	aer_ue_reg;
61 	uint32_t	aer_sev_reg;
62 	uint32_t	aer_ue_sreg;
63 	uint32_t	aer_sev_sreg;
64 
65 	/* PCIE Header Log Registers */
66 	uint32_t	aer_h1;
67 	uint32_t	aer_h2;
68 	uint32_t	aer_h3;
69 	uint32_t	aer_h4;
70 	uint32_t	aer_sh1;
71 	uint32_t	aer_sh2;
72 	uint32_t	aer_sh3;
73 	uint32_t	aer_sh4;
74 } px_fabric_cfgspace_t;
75 
76 static uint16_t px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid);
77 static uint16_t px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid);
78 static int px_fabric_handle_psts(px_fabric_cfgspace_t *cs);
79 static int px_fabric_handle_ssts(px_fabric_cfgspace_t *cs);
80 static int px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs);
81 static int px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs);
82 static int px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs);
83 static void px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs);
84 static uint_t px_fabric_check(px_t *px_p, msgcode_t msg_code,
85     pcie_req_id_t rid, ddi_fm_error_t *derr);
86 
87 /*
88  * Initialize px FMA support
89  */
90 int
91 px_fm_attach(px_t *px_p)
92 {
93 	px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
94 		DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
95 
96 	/*
97 	 * check parents' capability
98 	 */
99 	ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc);
100 
101 	/*
102 	 * parents need to be ereport and error handling capable
103 	 */
104 	ASSERT(px_p->px_fm_cap &&
105 	    (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE));
106 
107 	/*
108 	 * register error callback in parent
109 	 */
110 	ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p);
111 
112 	return (DDI_SUCCESS);
113 }
114 
115 /*
116  * Deregister FMA
117  */
118 void
119 px_fm_detach(px_t *px_p)
120 {
121 	ddi_fm_handler_unregister(px_p->px_dip);
122 	ddi_fm_fini(px_p->px_dip);
123 }
124 
125 /*
126  * Function used to setup access functions depending on level of desired
127  * protection.
128  */
129 void
130 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip)
131 {
132 	uchar_t fflag;
133 	ddi_acc_hdl_t *hp;
134 	ddi_acc_impl_t *ap;
135 
136 	hp = mp->map_handlep;
137 	ap = (ddi_acc_impl_t *)hp->ah_platform_private;
138 	fflag = ap->ahi_common.ah_acc.devacc_attr_access;
139 
140 	if (mp->map_op == DDI_MO_MAP_LOCKED) {
141 		ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL);
142 		switch (fflag) {
143 		case DDI_FLAGERR_ACC:
144 			ap->ahi_get8 = i_ddi_prot_get8;
145 			ap->ahi_get16 = i_ddi_prot_get16;
146 			ap->ahi_get32 = i_ddi_prot_get32;
147 			ap->ahi_get64 = i_ddi_prot_get64;
148 			ap->ahi_put8 = i_ddi_prot_put8;
149 			ap->ahi_put16 = i_ddi_prot_put16;
150 			ap->ahi_put32 = i_ddi_prot_put32;
151 			ap->ahi_put64 = i_ddi_prot_put64;
152 			ap->ahi_rep_get8 = i_ddi_prot_rep_get8;
153 			ap->ahi_rep_get16 = i_ddi_prot_rep_get16;
154 			ap->ahi_rep_get32 = i_ddi_prot_rep_get32;
155 			ap->ahi_rep_get64 = i_ddi_prot_rep_get64;
156 			ap->ahi_rep_put8 = i_ddi_prot_rep_put8;
157 			ap->ahi_rep_put16 = i_ddi_prot_rep_put16;
158 			ap->ahi_rep_put32 = i_ddi_prot_rep_put32;
159 			ap->ahi_rep_put64 = i_ddi_prot_rep_put64;
160 			break;
161 		case DDI_CAUTIOUS_ACC :
162 			ap->ahi_get8 = i_ddi_caut_get8;
163 			ap->ahi_get16 = i_ddi_caut_get16;
164 			ap->ahi_get32 = i_ddi_caut_get32;
165 			ap->ahi_get64 = i_ddi_caut_get64;
166 			ap->ahi_put8 = i_ddi_caut_put8;
167 			ap->ahi_put16 = i_ddi_caut_put16;
168 			ap->ahi_put32 = i_ddi_caut_put32;
169 			ap->ahi_put64 = i_ddi_caut_put64;
170 			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
171 			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
172 			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
173 			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
174 			ap->ahi_rep_put8 = i_ddi_caut_rep_put8;
175 			ap->ahi_rep_put16 = i_ddi_caut_rep_put16;
176 			ap->ahi_rep_put32 = i_ddi_caut_rep_put32;
177 			ap->ahi_rep_put64 = i_ddi_caut_rep_put64;
178 			break;
179 		default:
180 			break;
181 		}
182 	} else if (mp->map_op == DDI_MO_UNMAP) {
183 		ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
184 	}
185 }
186 
187 /*
188  * Function called after a dma fault occurred to find out whether the
189  * fault address is associated with a driver that is able to handle faults
190  * and recover from faults. The driver has to set DDI_DMA_FLAGERR and
191  * cache dma handles in order to make this checking effective to help
192  * recovery from dma faults.
193  */
194 /* ARGSUSED */
195 static int
196 px_dma_check(dev_info_t *dip, const void *handle, const void *comp_addr,
197     const void *not_used)
198 {
199 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle;
200 	pfn_t fault_pfn = mmu_btop(*(uint64_t *)comp_addr);
201 	pfn_t comp_pfn;
202 	int page;
203 
204 	/*
205 	 * Assertion failure if DDI_FM_DMACHK_CAPABLE capability has not
206 	 * been effectively initialized during attach.
207 	 */
208 	ASSERT(mp);
209 
210 	for (page = 0; page < mp->dmai_ndvmapages; page++) {
211 		comp_pfn = PX_GET_MP_PFN(mp, page);
212 		if (fault_pfn == comp_pfn)
213 			return (DDI_FM_NONFATAL);
214 	}
215 
216 	return (DDI_FM_UNKNOWN);
217 }
218 
219 /*
220  * Function used to check if a given access handle owns the failing address.
221  * Called by ndi_fmc_error, when we detect a PIO error.
222  */
223 /* ARGSUSED */
224 static int
225 px_acc_check(dev_info_t *dip, const void *handle, const void *comp_addr,
226     const void *not_used)
227 {
228 	pfn_t pfn, fault_pfn;
229 	ddi_acc_hdl_t *hp = impl_acc_hdl_get((ddi_acc_handle_t)handle);
230 
231 	/*
232 	 * Assertion failure if DDI_FM_ACCCHK_CAPABLE capability has not
233 	 * been effectively initialized during attach.
234 	 */
235 	ASSERT(hp);
236 
237 	pfn = hp->ah_pfn;
238 	fault_pfn = mmu_btop(*(uint64_t *)comp_addr);
239 	if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum))
240 		return (DDI_FM_NONFATAL);
241 
242 	return (DDI_FM_UNKNOWN);
243 }
244 
245 /*
246  * Function used by PCI error handlers to check if captured address is stored
247  * in the DMA or ACC handle caches.
248  */
249 int
250 px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar)
251 {
252 	uint32_t cap = ((px_t *)DIP_TO_STATE(dip))->px_fm_cap;
253 	int (*f)() = type == DMA_HANDLE ?
254 	    (DDI_FM_DMA_ERR_CAP(cap) ? px_dma_check : NULL) :
255 	    (DDI_FM_ACC_ERR_CAP(cap) ? px_acc_check : NULL);
256 
257 	return (f ? ndi_fmc_error(dip, NULL, type, f, fme_ena, afar) :
258 	    DDI_FM_UNKNOWN);
259 }
260 
261 /*
262  * Function used to initialize FMA for our children nodes. Called
263  * through pci busops when child node calls ddi_fm_init.
264  */
265 /*ARGSUSED*/
266 int
267 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
268     ddi_iblock_cookie_t *ibc_p)
269 {
270 	px_t *px_p = DIP_TO_STATE(dip);
271 
272 	ASSERT(ibc_p != NULL);
273 	*ibc_p = px_p->px_fm_ibc;
274 
275 	return (px_p->px_fm_cap);
276 }
277 
278 /*
279  * lock access for exclusive PCIe access
280  */
281 void
282 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle)
283 {
284 	px_pec_t	*pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p;
285 
286 	/*
287 	 * Exclusive access has been used for cautious put/get,
288 	 * Both utilize i_ddi_ontrap which, on sparcv9, implements
289 	 * similar protection as what on_trap() does, and which calls
290 	 * membar  #Sync to flush out all cpu deferred errors
291 	 * prior to get/put operation, so here we're not calling
292 	 * membar  #Sync - a difference from what's in pci_bus_enter().
293 	 */
294 	mutex_enter(&pec_p->pec_pokefault_mutex);
295 	pec_p->pec_acc_hdl = handle;
296 }
297 
298 /*
299  * unlock access for exclusive PCIe access
300  */
301 /* ARGSUSED */
302 void
303 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle)
304 {
305 	px_t		*px_p = DIP_TO_STATE(dip);
306 	px_pec_t	*pec_p = px_p->px_pec_p;
307 
308 	pec_p->pec_acc_hdl = NULL;
309 	mutex_exit(&pec_p->pec_pokefault_mutex);
310 }
311 
312 
313 /*
314  * PCI error callback which is registered with our parent to call
315  * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors
316  * and PCI BERR/TO/UE
317  *
318  * Dispatch on all known leaves of this fire device because we cannot tell
319  * which side the error came from.
320  */
321 /*ARGSUSED*/
322 int
323 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data)
324 {
325 	px_t	*px_p = (px_t *)impl_data;
326 	px_cb_t	*cb_p = px_p->px_cb_p;
327 	int	err = PX_OK;
328 	int	fatal = 0;
329 	int	nonfatal = 0;
330 	int	unknown = 0;
331 	int	ret = DDI_FM_OK;
332 	int	i;
333 
334 	mutex_enter(&cb_p->xbc_fm_mutex);
335 
336 	for (i = 0; i < PX_CB_MAX_LEAF; i++) {
337 		px_p = cb_p->xbc_px_list[i];
338 		if (px_p != NULL)
339 			err |= px_err_handle(px_p, derr, PX_TRAP_CALL,
340 			    (i == 0));
341 	}
342 
343 	for (i = 0; i < PX_CB_MAX_LEAF; i++) {
344 		px_p = cb_p->xbc_px_list[i];
345 		if (px_p != NULL) {
346 			ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr);
347 			switch (ret) {
348 			case DDI_FM_FATAL:
349 				fatal++;
350 				break;
351 			case DDI_FM_NONFATAL:
352 				nonfatal++;
353 				break;
354 			case DDI_FM_UNKNOWN:
355 				unknown++;
356 				break;
357 			default:
358 				break;
359 			}
360 		}
361 	}
362 	mutex_exit(&cb_p->xbc_fm_mutex);
363 
364 	ret = (fatal != 0) ? DDI_FM_FATAL :
365 	    ((nonfatal != 0) ? DDI_FM_NONFATAL :
366 	    (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK)));
367 
368 	/* fire fatal error overrides device error */
369 	if (err & (PX_FATAL_GOS | PX_FATAL_SW))
370 		ret = DDI_FM_FATAL;
371 	/* if fire encounts no error, then take whatever device error */
372 	else if ((err != PX_OK) && (ret != DDI_FM_FATAL))
373 		ret = DDI_FM_NONFATAL;
374 
375 	return (ret);
376 }
377 
378 static uint16_t
379 px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid)
380 {
381 	uint32_t	hdr, hdr_next_ptr, hdr_cap_id;
382 	uint16_t	offset = PCIE_EXT_CAP;
383 	int		deadcount = 0;
384 
385 	/* Find the Advanced Error Register */
386 	hdr = px_fab_get(px_p, rid, offset);
387 	hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) &
388 	    PCIE_EXT_CAP_NEXT_PTR_MASK;
389 	hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) &
390 	    PCIE_EXT_CAP_ID_MASK;
391 
392 	while ((hdr_next_ptr != PCIE_EXT_CAP_NEXT_PTR_NULL) &&
393 	    (hdr_cap_id != PCIE_EXT_CAP_ID_AER)) {
394 		offset = hdr_next_ptr;
395 		hdr = px_fab_get(px_p, rid, offset);
396 		hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) &
397 		    PCIE_EXT_CAP_NEXT_PTR_MASK;
398 		hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) &
399 		    PCIE_EXT_CAP_ID_MASK;
400 
401 		if (deadcount++ > 100)
402 			break;
403 	}
404 
405 	if (hdr_cap_id == PCIE_EXT_CAP_ID_AER)
406 		return (offset);
407 
408 	return (0);
409 }
410 
411 static uint16_t
412 px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid)
413 {
414 	uint32_t	hdr, hdr_next_ptr, hdr_cap_id;
415 	uint16_t	offset = PCI_CONF_STAT;
416 	int		deadcount = 0;
417 
418 	hdr = px_fab_get(px_p, rid, PCI_CONF_COMM) >> 16;
419 	if (!(hdr & PCI_STAT_CAP)) {
420 		/* This is not a PCIE device */
421 		return (0);
422 	}
423 
424 	hdr = px_fab_get(px_p, rid, PCI_CONF_CAP_PTR);
425 	hdr_next_ptr = hdr & 0xFF;
426 	hdr_cap_id = 0;
427 
428 	while ((hdr_next_ptr != PCI_CAP_NEXT_PTR_NULL) &&
429 	    (hdr_cap_id != PCI_CAP_ID_PCI_E)) {
430 		offset = hdr_next_ptr;
431 
432 		if (hdr_next_ptr < 0x40) {
433 			break;
434 		}
435 
436 		hdr = px_fab_get(px_p, rid, hdr_next_ptr);
437 		hdr_next_ptr = (hdr >> 8) & 0xFF;
438 		hdr_cap_id = hdr & 0xFF;
439 
440 		if (deadcount++ > 100)
441 			break;
442 	}
443 
444 	if (hdr_cap_id == PCI_CAP_ID_PCI_E)
445 		return (offset);
446 
447 	return (0);
448 }
449 
450 /*
451  * This function checks the primary status registers.
452  * Take the PCI status register and translate it to PCIe equivalent.
453  */
454 static int
455 px_fabric_handle_psts(px_fabric_cfgspace_t *cs) {
456 	uint16_t	sts_reg = cs->sts_reg >> 16;
457 	uint16_t	pci_status;
458 	uint32_t	pcie_status;
459 	int		ret = PX_NONFATAL;
460 
461 	/* Parity Err == Send/Recv Poisoned TLP */
462 	pci_status = PCI_STAT_S_PERROR | PCI_STAT_PERROR;
463 	pcie_status = PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC;
464 	if (sts_reg & pci_status)
465 		ret |= PX_FABRIC_ERR_SEV(pcie_status,
466 		    px_fabric_die_ue, px_fabric_die_ue_gos);
467 
468 	/* Target Abort == Completer Abort */
469 	pci_status = PCI_STAT_S_TARG_AB | PCI_STAT_R_TARG_AB;
470 	pcie_status = PCIE_AER_UCE_CA;
471 	if (sts_reg & pci_status)
472 		ret |= PX_FABRIC_ERR_SEV(pcie_status,
473 		    px_fabric_die_ue, px_fabric_die_ue_gos);
474 
475 	/* Master Abort == Unsupport Request */
476 	pci_status = PCI_STAT_R_MAST_AB;
477 	pcie_status = PCIE_AER_UCE_UR;
478 	if (sts_reg & pci_status)
479 		ret |= PX_FABRIC_ERR_SEV(pcie_status,
480 		    px_fabric_die_ue, px_fabric_die_ue_gos);
481 
482 	/* System Error == Uncorrectable Error */
483 	pci_status = PCI_STAT_S_SYSERR;
484 	pcie_status = -1;
485 	if (sts_reg & pci_status)
486 		ret |= PX_FABRIC_ERR_SEV(pcie_status,
487 		    px_fabric_die_ue, px_fabric_die_ue_gos);
488 
489 	return (ret);
490 }
491 
492 /*
493  * This function checks the secondary status registers.
494  * Switches and Bridges have a different behavior.
495  */
496 static int
497 px_fabric_handle_ssts(px_fabric_cfgspace_t *cs) {
498 	uint16_t	sts_reg = cs->sts_sreg >> 16;
499 	int		ret = PX_NONFATAL;
500 
501 	if (cs->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) {
502 		/*
503 		 * This is a PCIE-PCI bridge, but only check the severity
504 		 * if this device doesn't support AERs.
505 		 */
506 		if (!cs->aer_off)
507 			ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_bdg_sts,
508 			    px_fabric_die_bdg_sts_gos);
509 	} else {
510 		/* This is most likely a PCIE switch */
511 		ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_sw_sts,
512 		    px_fabric_die_sw_sts_gos);
513 	}
514 
515 	return (ret);
516 }
517 
518 /*
519  * This function checks and clears the primary AER.
520  */
521 static int
522 px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs) {
523 	uint32_t	chk_reg, chk_reg_gos, off_reg, reg;
524 	int		ret = PX_NONFATAL;
525 
526 	/* Determine severity and clear the AER */
527 	switch (cs->msg_code) {
528 	case PCIE_MSG_CODE_ERR_COR:
529 		off_reg = PCIE_AER_CE_STS;
530 		chk_reg = px_fabric_die_ce;
531 		chk_reg_gos = px_fabric_die_ce_gos;
532 		reg = cs->aer_ce_reg;
533 		break;
534 	case PCIE_MSG_CODE_ERR_NONFATAL:
535 		off_reg = PCIE_AER_UCE_STS;
536 		chk_reg = px_fabric_die_ue;
537 		chk_reg_gos = px_fabric_die_ue_gos;
538 		reg = cs->aer_ue_reg & ~(cs->aer_sev_reg);
539 		break;
540 	case PCIE_MSG_CODE_ERR_FATAL:
541 		off_reg = PCIE_AER_UCE_STS;
542 		chk_reg = px_fabric_die_ue;
543 		chk_reg_gos = px_fabric_die_ue_gos;
544 		reg = cs->aer_ue_reg & cs->aer_sev_reg;
545 		break;
546 	default:
547 		/* Major error force a panic */
548 		return (PX_FATAL_GOS);
549 	}
550 	px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg);
551 	ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos);
552 
553 	return (ret);
554 }
555 
556 /*
557  * This function checks and clears the secondary AER.
558  */
559 static int
560 px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs) {
561 	uint32_t	chk_reg, chk_reg_gos, off_reg, reg;
562 	uint32_t	sev;
563 	int		ret = PX_NONFATAL;
564 
565 	/* Determine severity and clear the AER */
566 	switch (cs->msg_code) {
567 	case PCIE_MSG_CODE_ERR_COR:
568 		/* Ignore Correctable Errors */
569 		sev = 0;
570 		break;
571 	case PCIE_MSG_CODE_ERR_NONFATAL:
572 		sev = ~(cs->aer_sev_sreg);
573 		break;
574 	case PCIE_MSG_CODE_ERR_FATAL:
575 		sev = cs->aer_sev_sreg;
576 		break;
577 	default:
578 		/* Major error force a panic */
579 		return (DDI_FM_FATAL);
580 	}
581 	off_reg = PCIE_AER_SUCE_STS;
582 	chk_reg = px_fabric_die_sue;
583 	chk_reg_gos = px_fabric_die_sue_gos;
584 	reg = cs->aer_ue_sreg & sev;
585 	px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg);
586 	ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos);
587 
588 	return (ret);
589 }
590 
591 static int
592 px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs)
593 {
594 	pcie_req_id_t	rid = cs->rid;
595 	uint16_t	cap_off = cs->cap_off;
596 	uint16_t	aer_off = cs->aer_off;
597 	uint8_t		hdr_type = cs->hdr_type;
598 	uint16_t	dev_type = cs->dev_type;
599 	int		ret = PX_NONFATAL;
600 
601 	if (hdr_type == PCI_HEADER_PPB) {
602 		ret |= px_fabric_handle_ssts(cs);
603 	}
604 
605 	if (!aer_off) {
606 		ret |= px_fabric_handle_psts(cs);
607 	}
608 
609 	if (aer_off) {
610 		ret |= px_fabric_handle_paer(px_p, cs);
611 	}
612 
613 	if (aer_off && (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)) {
614 		ret |= px_fabric_handle_saer(px_p, cs);
615 	}
616 
617 	/* Clear the standard PCIe error registers */
618 	px_fab_set(px_p, rid, cap_off + PCIE_DEVCTL, cs->dev_sts_reg);
619 
620 	/* Clear the legacy error registers */
621 	px_fab_set(px_p, rid, PCI_CONF_COMM, cs->sts_reg);
622 
623 	/* Clear the legacy secondary error registers */
624 	if (hdr_type == PCI_HEADER_PPB) {
625 		px_fab_set(px_p, rid, PCI_BCNF_IO_BASE_LOW,
626 		    cs->sts_sreg);
627 	}
628 
629 	return (ret);
630 }
631 
632 static void
633 px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs)
634 {
635 	uint16_t	cap_off, aer_off;
636 	pcie_req_id_t	rid = cs->rid;
637 
638 	/* Gather Basic Device Information */
639 	cs->hdr_type = (px_fab_get(px_p, rid,
640 			    PCI_CONF_CACHE_LINESZ) >> 16) & 0xFF;
641 
642 	cs->cap_off = px_fabric_get_pciecap(px_p, rid);
643 	cap_off = cs->cap_off;
644 	if (!cap_off)
645 		return;
646 
647 	cs->aer_off = px_fabric_get_aer(px_p, rid);
648 	aer_off = cs->aer_off;
649 
650 	cs->dev_type = px_fab_get(px_p, rid, cap_off) >> 16;
651 	cs->dev_type &= PCIE_PCIECAP_DEV_TYPE_MASK;
652 
653 	/* Get the Primary Sts Reg */
654 	cs->sts_reg = px_fab_get(px_p, rid, PCI_CONF_COMM);
655 
656 	/* If it is a bridge/switch get the Secondary Sts Reg */
657 	if (cs->hdr_type == PCI_HEADER_PPB)
658 		cs->sts_sreg = px_fab_get(px_p, rid,
659 		    PCI_BCNF_IO_BASE_LOW);
660 
661 	/* Get the PCIe Dev Sts Reg */
662 	cs->dev_sts_reg = px_fab_get(px_p, rid,
663 	    cap_off + PCIE_DEVCTL);
664 
665 	if (!aer_off)
666 		return;
667 
668 	/* Get the AER register information */
669 	cs->aer_ce_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_CE_STS);
670 	cs->aer_ue_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_STS);
671 	cs->aer_sev_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_SERV);
672 	cs->aer_h1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x0);
673 	cs->aer_h2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x4);
674 	cs->aer_h3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x8);
675 	cs->aer_h4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0xC);
676 
677 	if (cs->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)
678 		return;
679 
680 	/* If this is a bridge check secondary aer */
681 	cs->aer_ue_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_STS);
682 	cs->aer_sev_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_SERV);
683 	cs->aer_sh1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x0);
684 	cs->aer_sh2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x4);
685 	cs->aer_sh3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x8);
686 	cs->aer_sh4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0xC);
687 }
688 
689 /*
690  * If a fabric intr occurs, query and clear the error registers on that device.
691  * Based on the error found return DDI_FM_OK or DDI_FM_FATAL.
692  */
693 static uint_t
694 px_fabric_check(px_t *px_p, msgcode_t msg_code,
695     pcie_req_id_t rid, ddi_fm_error_t *derr)
696 {
697 	dev_info_t	*dip = px_p->px_dip;
698 	char		buf[FM_MAX_CLASS];
699 	px_fabric_cfgspace_t cs;
700 	int		ret;
701 
702 	/* clear cs */
703 	bzero(&cs, sizeof (px_fabric_cfgspace_t));
704 
705 	cs.msg_code = msg_code;
706 	cs.rid = rid;
707 
708 	px_fabric_fill_cs(px_p, &cs);
709 	if (cs.cap_off)
710 		ret = px_fabric_handle(px_p, &cs);
711 	else
712 		ret = PX_FATAL_GOS;
713 
714 	(void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_FABRIC_CLASS);
715 	ddi_fm_ereport_post(dip, buf, derr->fme_ena,
716 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
717 	    PX_FM_FABRIC_MSG_CODE, DATA_TYPE_UINT8, msg_code,
718 	    PX_FM_FABRIC_REQ_ID, DATA_TYPE_UINT16, rid,
719 	    "cap_off", DATA_TYPE_UINT16, cs.cap_off,
720 	    "aer_off", DATA_TYPE_UINT16, cs.aer_off,
721 	    "sts_reg", DATA_TYPE_UINT16, cs.sts_reg >> 16,
722 	    "sts_sreg", DATA_TYPE_UINT16, cs.sts_sreg >> 16,
723 	    "dev_sts_reg", DATA_TYPE_UINT16, cs.dev_sts_reg >> 16,
724 	    "aer_ce", DATA_TYPE_UINT32, cs.aer_ce_reg,
725 	    "aer_ue", DATA_TYPE_UINT32, cs.aer_ue_reg,
726 	    "aer_sev", DATA_TYPE_UINT32, cs.aer_sev_reg,
727 	    "aer_h1", DATA_TYPE_UINT32, cs.aer_h1,
728 	    "aer_h2", DATA_TYPE_UINT32, cs.aer_h2,
729 	    "aer_h3", DATA_TYPE_UINT32, cs.aer_h3,
730 	    "aer_h4", DATA_TYPE_UINT32, cs.aer_h4,
731 	    "saer_ue", DATA_TYPE_UINT32, cs.aer_ue_sreg,
732 	    "saer_sev", DATA_TYPE_UINT32, cs.aer_sev_sreg,
733 	    "saer_h1", DATA_TYPE_UINT32, cs.aer_sh1,
734 	    "saer_h2", DATA_TYPE_UINT32, cs.aer_sh2,
735 	    "saer_h3", DATA_TYPE_UINT32, cs.aer_sh3,
736 	    "saer_h4", DATA_TYPE_UINT32, cs.aer_sh4,
737 	    "severity", DATA_TYPE_UINT32, ret,
738 	    NULL);
739 
740 	/* Check for protected access */
741 	switch (derr->fme_flag) {
742 	case DDI_FM_ERR_EXPECTED:
743 	case DDI_FM_ERR_PEEK:
744 	case DDI_FM_ERR_POKE:
745 		ret &= PX_FATAL_GOS;
746 		break;
747 	}
748 
749 
750 	if (px_fabric_die &&
751 	    (ret & (PX_FATAL_GOS | PX_FATAL_SW)))
752 			ret = DDI_FM_FATAL;
753 
754 	return (ret);
755 }
756 
757 /*
758  * px_err_fabric_intr:
759  * Interrupt handler for PCIE fabric block.
760  * o lock
761  * o create derr
762  * o px_err_handle(leaf, with jbc)
763  * o send ereport(fire fmri, derr, payload = BDF)
764  * o dispatch (leaf)
765  * o unlock
766  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
767  */
768 /* ARGSUSED */
769 uint_t
770 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code,
771     pcie_req_id_t rid)
772 {
773 	dev_info_t	*rpdip = px_p->px_dip;
774 	px_cb_t		*cb_p = px_p->px_cb_p;
775 	int		err = PX_OK, ret = DDI_FM_OK, fab_err = DDI_FM_OK;
776 	ddi_fm_error_t	derr;
777 
778 	mutex_enter(&cb_p->xbc_fm_mutex);
779 
780 	/* Create the derr */
781 	bzero(&derr, sizeof (ddi_fm_error_t));
782 	derr.fme_version = DDI_FME_VERSION;
783 	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
784 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
785 
786 	/* send ereport/handle/clear fire registers */
787 	err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE);
788 
789 	/* Check and clear the fabric error */
790 	fab_err = px_fabric_check(px_p, msg_code, rid, &derr);
791 
792 	/* Check all child devices for errors */
793 	ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr);
794 
795 	mutex_exit(&cb_p->xbc_fm_mutex);
796 
797 	/*
798 	 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset,
799 	 * therefore it does not cause panic.
800 	 */
801 	if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) ||
802 	    (ret == DDI_FM_FATAL) || (fab_err == DDI_FM_FATAL))
803 		fm_panic("Fatal PCIe Fabric Error has occurred\n");
804 
805 	return (DDI_INTR_CLAIMED);
806 }
807 
808 /*
809  * px_err_safeacc_check:
810  * Check to see if a peek/poke and cautious access is currently being
811  * done on a particular leaf.
812  *
813  * Safe access reads induced fire errors will be handled by cpu trap handler
814  * which will call px_fm_callback() which calls this function. In that
815  * case, the derr fields will be set by trap handler with the correct values.
816  *
817  * Safe access writes induced errors will be handled by px interrupt
818  * handlers, this function will fill in the derr fields.
819  *
820  * If a cpu trap does occur, it will quiesce all other interrupts allowing
821  * the cpu trap error handling to finish before Fire receives an interrupt.
822  *
823  * If fire does indeed have an error when a cpu trap occurs as a result of
824  * a safe access, a trap followed by a Mondo/Fabric interrupt will occur.
825  * In which case derr will be initialized as "UNEXPECTED" by the interrupt
826  * handler and this function will need to find if this error occured in the
827  * middle of a safe access operation.
828  *
829  * @param px_p		leaf in which to check access
830  * @param derr		fm err data structure to be updated
831  */
832 void
833 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr)
834 {
835 	px_pec_t 	*pec_p = px_p->px_pec_p;
836 	px_cb_t		*cb_p = px_p->px_cb_p;
837 	int		acctype = pec_p->pec_safeacc_type;
838 
839 	ASSERT(MUTEX_HELD(&cb_p->xbc_fm_mutex));
840 
841 	if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
842 		return;
843 	}
844 
845 	/* safe access checking */
846 	switch (acctype) {
847 	case DDI_FM_ERR_EXPECTED:
848 		/*
849 		 * cautious access protection, protected from all err.
850 		 */
851 		ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex));
852 		ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr,
853 		    DDI_FME_VERSION);
854 		derr->fme_flag = acctype;
855 		derr->fme_acc_handle = pec_p->pec_acc_hdl;
856 		break;
857 	case DDI_FM_ERR_POKE:
858 		/*
859 		 * ddi_poke protection, check nexus and children for
860 		 * expected errors.
861 		 */
862 		ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex));
863 		membar_sync();
864 		derr->fme_flag = acctype;
865 		break;
866 	case DDI_FM_ERR_PEEK:
867 		derr->fme_flag = acctype;
868 		break;
869 	}
870 }
871