xref: /titanic_44/usr/src/uts/sun4u/io/px/px_err.c (revision ef69670ded4ed2349f664bb59f0d513cc0364906)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * sun4u Fire Error Handling
30  */
31 
32 #include <sys/types.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/fm/protocol.h>
36 #include <sys/fm/util.h>
37 #include <sys/pcie.h>
38 #include <sys/pcie_impl.h>
39 #include "px_obj.h"
40 #include <px_regs.h>
41 #include <px_csr.h>
42 #include <sys/membar.h>
43 #include <sys/machcpuvar.h>
44 #include <sys/platform_module.h>
45 #include "pcie_pwr.h"
46 #include "px_lib4u.h"
47 #include "px_err.h"
48 #include "px_err_impl.h"
49 #include "oberon_regs.h"
50 
51 uint64_t px_tlu_ue_intr_mask	= PX_ERR_EN_ALL;
52 uint64_t px_tlu_ue_log_mask	= PX_ERR_EN_ALL;
53 uint64_t px_tlu_ue_count_mask	= PX_ERR_EN_ALL;
54 
55 uint64_t px_tlu_ce_intr_mask	= PX_ERR_MASK_NONE;
56 uint64_t px_tlu_ce_log_mask	= PX_ERR_MASK_NONE;
57 uint64_t px_tlu_ce_count_mask	= PX_ERR_MASK_NONE;
58 
59 /*
60  * Do not enable Link Interrupts
61  */
62 uint64_t px_tlu_oe_intr_mask	= PX_ERR_EN_ALL & ~0x80000000800;
63 uint64_t px_tlu_oe_log_mask	= PX_ERR_EN_ALL & ~0x80000000800;
64 uint64_t px_tlu_oe_count_mask	= PX_ERR_EN_ALL;
65 
66 uint64_t px_mmu_intr_mask	= PX_ERR_EN_ALL;
67 uint64_t px_mmu_log_mask	= PX_ERR_EN_ALL;
68 uint64_t px_mmu_count_mask	= PX_ERR_EN_ALL;
69 
70 uint64_t px_imu_intr_mask	= PX_ERR_EN_ALL;
71 uint64_t px_imu_log_mask	= PX_ERR_EN_ALL;
72 uint64_t px_imu_count_mask	= PX_ERR_EN_ALL;
73 
74 /*
75  * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_S) |
76  * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_P);
77  */
78 uint64_t px_ilu_intr_mask	= (((uint64_t)0x10 << 32) | 0x10);
79 uint64_t px_ilu_log_mask	= (((uint64_t)0x10 << 32) | 0x10);
80 uint64_t px_ilu_count_mask	= PX_ERR_EN_ALL;
81 
82 uint64_t px_ubc_intr_mask	= PX_ERR_EN_ALL;
83 uint64_t px_ubc_log_mask		= PX_ERR_EN_ALL;
84 uint64_t px_ubc_count_mask	= PX_ERR_EN_ALL;
85 
86 uint64_t px_jbc_intr_mask	= PX_ERR_EN_ALL;
87 uint64_t px_jbc_log_mask		= PX_ERR_EN_ALL;
88 uint64_t px_jbc_count_mask	= PX_ERR_EN_ALL;
89 
90 /*
91  * LPU Intr Registers are reverse encoding from the registers above.
92  * 1 = disable
93  * 0 = enable
94  *
95  * Log and Count are however still the same.
96  */
97 uint64_t px_lpul_intr_mask	= LPU_INTR_DISABLE;
98 uint64_t px_lpul_log_mask	= PX_ERR_EN_ALL;
99 uint64_t px_lpul_count_mask	= PX_ERR_EN_ALL;
100 
101 uint64_t px_lpup_intr_mask	= LPU_INTR_DISABLE;
102 uint64_t px_lpup_log_mask	= PX_ERR_EN_ALL;
103 uint64_t px_lpup_count_mask	= PX_ERR_EN_ALL;
104 
105 uint64_t px_lpur_intr_mask	= LPU_INTR_DISABLE;
106 uint64_t px_lpur_log_mask	= PX_ERR_EN_ALL;
107 uint64_t px_lpur_count_mask	= PX_ERR_EN_ALL;
108 
109 uint64_t px_lpux_intr_mask	= LPU_INTR_DISABLE;
110 uint64_t px_lpux_log_mask	= PX_ERR_EN_ALL;
111 uint64_t px_lpux_count_mask	= PX_ERR_EN_ALL;
112 
113 uint64_t px_lpus_intr_mask	= LPU_INTR_DISABLE;
114 uint64_t px_lpus_log_mask	= PX_ERR_EN_ALL;
115 uint64_t px_lpus_count_mask	= PX_ERR_EN_ALL;
116 
117 uint64_t px_lpug_intr_mask	= LPU_INTR_DISABLE;
118 uint64_t px_lpug_log_mask	= PX_ERR_EN_ALL;
119 uint64_t px_lpug_count_mask	= PX_ERR_EN_ALL;
120 
121 /*
122  * JBC error bit table
123  */
124 #define	JBC_BIT_DESC(bit, hdl, erpt) \
125 	JBC_INTERRUPT_STATUS_ ## bit ## _P, \
126 	0, \
127 	PX_ERR_BIT_HANDLE(hdl), \
128 	PX_ERPT_SEND(erpt), \
129 	PX_ERR_JBC_CLASS(bit) }, \
130 	{ JBC_INTERRUPT_STATUS_ ## bit ## _S, \
131 	0, \
132 	PX_ERR_BIT_HANDLE(hdl), \
133 	PX_ERPT_SEND(erpt), \
134 	PX_ERR_JBC_CLASS(bit)
135 px_err_bit_desc_t px_err_jbc_tbl[] = {
136 	/* JBC FATAL - see io erpt doc, section 1.1 */
137 	{ JBC_BIT_DESC(MB_PEA,	fatal_hw,	jbc_fatal) },
138 	{ JBC_BIT_DESC(CPE,	fatal_hw,	jbc_fatal) },
139 	{ JBC_BIT_DESC(APE,	fatal_hw,	jbc_fatal) },
140 	{ JBC_BIT_DESC(PIO_CPE,	fatal_hw,	jbc_fatal) },
141 	{ JBC_BIT_DESC(JTCEEW,	fatal_hw,	jbc_fatal) },
142 	{ JBC_BIT_DESC(JTCEEI,	fatal_hw,	jbc_fatal) },
143 	{ JBC_BIT_DESC(JTCEER,	fatal_hw,	jbc_fatal) },
144 
145 	/* JBC MERGE - see io erpt doc, section 1.2 */
146 	{ JBC_BIT_DESC(MB_PER,	jbc_merge,	jbc_merge) },
147 	{ JBC_BIT_DESC(MB_PEW,	jbc_merge,	jbc_merge) },
148 
149 	/* JBC Jbusint IN - see io erpt doc, section 1.3 */
150 	{ JBC_BIT_DESC(UE_ASYN,	fatal_gos,	jbc_in) },
151 	{ JBC_BIT_DESC(CE_ASYN,	non_fatal,	jbc_in) },
152 	{ JBC_BIT_DESC(JTE,	fatal_gos,	jbc_in) },
153 	{ JBC_BIT_DESC(JBE,	jbc_jbusint_in,	jbc_in) },
154 	{ JBC_BIT_DESC(JUE,	jbc_jbusint_in,	jbc_in) },
155 	{ JBC_BIT_DESC(ICISE,	fatal_gos,	jbc_in) },
156 	{ JBC_BIT_DESC(WR_DPE,	jbc_jbusint_in,	jbc_in) },
157 	{ JBC_BIT_DESC(RD_DPE,	jbc_jbusint_in,	jbc_in) },
158 	{ JBC_BIT_DESC(ILL_BMW,	jbc_jbusint_in,	jbc_in) },
159 	{ JBC_BIT_DESC(ILL_BMR,	jbc_jbusint_in,	jbc_in) },
160 	{ JBC_BIT_DESC(BJC,	jbc_jbusint_in,	jbc_in) },
161 
162 	/* JBC Jbusint Out - see io erpt doc, section 1.4 */
163 	{ JBC_BIT_DESC(IJP,	fatal_gos,	jbc_out) },
164 
165 	/*
166 	 * JBC Dmcint ODCD - see io erpt doc, section 1.5
167 	 *
168 	 * Error bits which can be set via a bad PCItool access go through
169 	 * jbc_safe_acc instead.
170 	 */
171 	{ JBC_BIT_DESC(PIO_UNMAP_RD,	jbc_safe_acc,		jbc_odcd) },
172 	{ JBC_BIT_DESC(ILL_ACC_RD,	jbc_safe_acc,		jbc_odcd) },
173 	{ JBC_BIT_DESC(PIO_UNMAP,	jbc_safe_acc,		jbc_odcd) },
174 	{ JBC_BIT_DESC(PIO_DPE,		jbc_dmcint_odcd,	jbc_odcd) },
175 	{ JBC_BIT_DESC(PIO_CPE,		non_fatal,		jbc_odcd) },
176 	{ JBC_BIT_DESC(ILL_ACC,		jbc_safe_acc,		jbc_odcd) },
177 
178 	/* JBC Dmcint IDC - see io erpt doc, section 1.6 */
179 	{ JBC_BIT_DESC(UNSOL_RD,	non_fatal,	jbc_idc) },
180 	{ JBC_BIT_DESC(UNSOL_INTR,	non_fatal,	jbc_idc) },
181 
182 	/* JBC CSR - see io erpt doc, section 1.7 */
183 	{ JBC_BIT_DESC(EBUS_TO,	jbc_csr,	jbc_csr) }
184 };
185 
186 #define	px_err_jbc_keys \
187 	(sizeof (px_err_jbc_tbl)) / (sizeof (px_err_bit_desc_t))
188 
189 /*
190  * UBC error bit table
191  */
192 #define	UBC_BIT_DESC(bit, hdl, erpt) \
193 	UBC_INTERRUPT_STATUS_ ## bit ## _P, \
194 	0, \
195 	PX_ERR_BIT_HANDLE(hdl), \
196 	PX_ERPT_SEND(erpt), \
197 	PX_ERR_UBC_CLASS(bit) }, \
198 	{ UBC_INTERRUPT_STATUS_ ## bit ## _S, \
199 	0, \
200 	PX_ERR_BIT_HANDLE(hdl), \
201 	PX_ERPT_SEND(erpt), \
202 	PX_ERR_UBC_CLASS(bit)
203 px_err_bit_desc_t px_err_ubc_tbl[] = {
204 	/* UBC FATAL  */
205 	{ UBC_BIT_DESC(DMARDUEA,	non_fatal,	ubc_fatal) },
206 	{ UBC_BIT_DESC(DMAWTUEA,	fatal_sw,	ubc_fatal) },
207 	{ UBC_BIT_DESC(MEMRDAXA,	fatal_sw,	ubc_fatal) },
208 	{ UBC_BIT_DESC(MEMWTAXA,	fatal_sw,	ubc_fatal) },
209 	{ UBC_BIT_DESC(DMARDUEB,	non_fatal,	ubc_fatal) },
210 	{ UBC_BIT_DESC(DMAWTUEB,	fatal_sw,	ubc_fatal) },
211 	{ UBC_BIT_DESC(MEMRDAXB,	fatal_sw,	ubc_fatal) },
212 	{ UBC_BIT_DESC(MEMWTAXB,	fatal_sw,	ubc_fatal) },
213 	{ UBC_BIT_DESC(PIOWTUE,		fatal_sw,	ubc_fatal) },
214 	{ UBC_BIT_DESC(PIOWBEUE,	fatal_sw,	ubc_fatal) },
215 	{ UBC_BIT_DESC(PIORBEUE,	fatal_sw,	ubc_fatal) }
216 };
217 
218 #define	px_err_ubc_keys \
219 	(sizeof (px_err_ubc_tbl)) / (sizeof (px_err_bit_desc_t))
220 
221 
222 char *ubc_class_eid_qualifier[] = {
223 	"-mem",
224 	"-channel",
225 	"-cpu",
226 	"-path"
227 };
228 
229 
230 /*
231  * DMC error bit tables
232  */
233 #define	IMU_BIT_DESC(bit, hdl, erpt) \
234 	IMU_INTERRUPT_STATUS_ ## bit ## _P, \
235 	0, \
236 	PX_ERR_BIT_HANDLE(hdl), \
237 	PX_ERPT_SEND(erpt), \
238 	PX_ERR_DMC_CLASS(bit) }, \
239 	{ IMU_INTERRUPT_STATUS_ ## bit ## _S, \
240 	0, \
241 	PX_ERR_BIT_HANDLE(hdl), \
242 	PX_ERPT_SEND(erpt), \
243 	PX_ERR_DMC_CLASS(bit)
244 px_err_bit_desc_t px_err_imu_tbl[] = {
245 	/* DMC IMU RDS - see io erpt doc, section 2.1 */
246 	{ IMU_BIT_DESC(MSI_MAL_ERR,		non_fatal,	imu_rds) },
247 	{ IMU_BIT_DESC(MSI_PAR_ERR,		fatal_stuck,	imu_rds) },
248 	{ IMU_BIT_DESC(PMEACK_MES_NOT_EN,	imu_rbne,	imu_rds) },
249 	{ IMU_BIT_DESC(PMPME_MES_NOT_EN,	imu_pme,	imu_rds) },
250 	{ IMU_BIT_DESC(FATAL_MES_NOT_EN,	imu_rbne,	imu_rds) },
251 	{ IMU_BIT_DESC(NONFATAL_MES_NOT_EN,	imu_rbne,	imu_rds) },
252 	{ IMU_BIT_DESC(COR_MES_NOT_EN,		imu_rbne,	imu_rds) },
253 	{ IMU_BIT_DESC(MSI_NOT_EN,		imu_rbne,	imu_rds) },
254 
255 	/* DMC IMU SCS - see io erpt doc, section 2.2 */
256 	{ IMU_BIT_DESC(EQ_NOT_EN,		imu_rbne,	imu_rds) },
257 
258 	/* DMC IMU - see io erpt doc, section 2.3 */
259 	{ IMU_BIT_DESC(EQ_OVER,			imu_eq_ovfl,	imu) }
260 };
261 
262 #define	px_err_imu_keys (sizeof (px_err_imu_tbl)) / (sizeof (px_err_bit_desc_t))
263 
264 /* mmu errors */
265 #define	MMU_BIT_DESC(bit, hdl, erpt) \
266 	MMU_INTERRUPT_STATUS_ ## bit ## _P, \
267 	0, \
268 	PX_ERR_BIT_HANDLE(hdl), \
269 	PX_ERPT_SEND(erpt), \
270 	PX_ERR_DMC_CLASS(bit) }, \
271 	{ MMU_INTERRUPT_STATUS_ ## bit ## _S, \
272 	0, \
273 	PX_ERR_BIT_HANDLE(hdl), \
274 	PX_ERPT_SEND(erpt), \
275 	PX_ERR_DMC_CLASS(bit)
276 px_err_bit_desc_t px_err_mmu_tbl[] = {
277 	/* DMC MMU TFAR/TFSR - see io erpt doc, section 2.4 */
278 	{ MMU_BIT_DESC(BYP_ERR,		mmu_rbne,	mmu_tfar_tfsr) },
279 	{ MMU_BIT_DESC(BYP_OOR,		mmu_tfa,	mmu_tfar_tfsr) },
280 	{ MMU_BIT_DESC(TRN_ERR,		mmu_rbne,	mmu_tfar_tfsr) },
281 	{ MMU_BIT_DESC(TRN_OOR,		mmu_tfa,	mmu_tfar_tfsr) },
282 	{ MMU_BIT_DESC(TTE_INV,		mmu_tfa,	mmu_tfar_tfsr) },
283 	{ MMU_BIT_DESC(TTE_PRT,		mmu_tfa,	mmu_tfar_tfsr) },
284 	{ MMU_BIT_DESC(TTC_DPE,		mmu_tfa,	mmu_tfar_tfsr) },
285 	{ MMU_BIT_DESC(TBW_DME,		mmu_tblwlk,	mmu_tfar_tfsr) },
286 	{ MMU_BIT_DESC(TBW_UDE,		mmu_tblwlk,	mmu_tfar_tfsr) },
287 	{ MMU_BIT_DESC(TBW_ERR,		mmu_tblwlk,	mmu_tfar_tfsr) },
288 	{ MMU_BIT_DESC(TBW_DPE,		mmu_tblwlk,	mmu_tfar_tfsr) },
289 
290 	/* DMC MMU - see io erpt doc, section 2.5 */
291 	{ MMU_BIT_DESC(TTC_CAE,		non_fatal,	mmu) }
292 };
293 #define	px_err_mmu_keys (sizeof (px_err_mmu_tbl)) / (sizeof (px_err_bit_desc_t))
294 
295 
296 /*
297  * PEC error bit tables
298  */
299 #define	ILU_BIT_DESC(bit, hdl, erpt) \
300 	ILU_INTERRUPT_STATUS_ ## bit ## _P, \
301 	0, \
302 	PX_ERR_BIT_HANDLE(hdl), \
303 	PX_ERPT_SEND(erpt), \
304 	PX_ERR_PEC_CLASS(bit) }, \
305 	{ ILU_INTERRUPT_STATUS_ ## bit ## _S, \
306 	0, \
307 	PX_ERR_BIT_HANDLE(hdl), \
308 	PX_ERPT_SEND(erpt), \
309 	PX_ERR_PEC_CLASS(bit)
310 px_err_bit_desc_t px_err_ilu_tbl[] = {
311 	/* PEC ILU none - see io erpt doc, section 3.1 */
312 	{ ILU_BIT_DESC(IHB_PE,		fatal_gos,	pec_ilu) }
313 };
314 #define	px_err_ilu_keys \
315 	(sizeof (px_err_ilu_tbl)) / (sizeof (px_err_bit_desc_t))
316 
317 /*
318  * PEC UE errors implementation is incomplete pending PCIE generic
319  * fabric rules.  Must handle both PRIMARY and SECONDARY errors.
320  */
321 /* pec ue errors */
322 #define	TLU_UC_BIT_DESC(bit, hdl, erpt) \
323 	TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \
324 	0, \
325 	PX_ERR_BIT_HANDLE(hdl), \
326 	PX_ERPT_SEND(erpt), \
327 	PX_ERR_PEC_CLASS(bit) }, \
328 	{ TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \
329 	0, \
330 	PX_ERR_BIT_HANDLE(hdl), \
331 	PX_ERPT_SEND(erpt), \
332 	PX_ERR_PEC_CLASS(bit)
333 #define	TLU_UC_OB_BIT_DESC(bit, hdl, erpt) \
334 	TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \
335 	0, \
336 	PX_ERR_BIT_HANDLE(hdl), \
337 	PX_ERPT_SEND(erpt), \
338 	PX_ERR_PEC_OB_CLASS(bit) }, \
339 	{ TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \
340 	0, \
341 	PX_ERR_BIT_HANDLE(hdl), \
342 	PX_ERPT_SEND(erpt), \
343 	PX_ERR_PEC_CLASS(bit)
344 px_err_bit_desc_t px_err_tlu_ue_tbl[] = {
345 	/* PCI-E Receive Uncorrectable Errors - see io erpt doc, section 3.2 */
346 	{ TLU_UC_BIT_DESC(UR,		pciex_ue,	pciex_rx_ue) },
347 	{ TLU_UC_BIT_DESC(UC,		pciex_ue,	pciex_rx_ue) },
348 
349 	/* PCI-E Transmit Uncorrectable Errors - see io erpt doc, section 3.3 */
350 	{ TLU_UC_OB_BIT_DESC(ECRC,	pciex_ue,	pciex_rx_ue) },
351 	{ TLU_UC_BIT_DESC(CTO,		pciex_ue,	pciex_tx_ue) },
352 	{ TLU_UC_BIT_DESC(ROF,		pciex_ue,	pciex_tx_ue) },
353 
354 	/* PCI-E Rx/Tx Uncorrectable Errors - see io erpt doc, section 3.4 */
355 	{ TLU_UC_BIT_DESC(MFP,		pciex_ue,	pciex_rx_tx_ue) },
356 	{ TLU_UC_BIT_DESC(PP,		pciex_ue,	pciex_rx_tx_ue) },
357 
358 	/* Other PCI-E Uncorrectable Errors - see io erpt doc, section 3.5 */
359 	{ TLU_UC_BIT_DESC(FCP,		pciex_ue,	pciex_ue) },
360 	{ TLU_UC_BIT_DESC(DLP,		pciex_ue,	pciex_ue) },
361 	{ TLU_UC_BIT_DESC(TE,		pciex_ue,	pciex_ue) },
362 
363 	/* Not used */
364 	{ TLU_UC_BIT_DESC(CA,		pciex_ue,	do_not) }
365 };
366 #define	px_err_tlu_ue_keys \
367 	(sizeof (px_err_tlu_ue_tbl)) / (sizeof (px_err_bit_desc_t))
368 
369 
370 /*
371  * PEC CE errors implementation is incomplete pending PCIE generic
372  * fabric rules.
373  */
374 /* pec ce errors */
375 #define	TLU_CE_BIT_DESC(bit, hdl, erpt) \
376 	TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \
377 	0, \
378 	PX_ERR_BIT_HANDLE(hdl), \
379 	PX_ERPT_SEND(erpt), \
380 	PX_ERR_PEC_CLASS(bit) }, \
381 	{ TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \
382 	0, \
383 	PX_ERR_BIT_HANDLE(hdl), \
384 	PX_ERPT_SEND(erpt), \
385 	PX_ERR_PEC_CLASS(bit)
386 px_err_bit_desc_t px_err_tlu_ce_tbl[] = {
387 	/* PCI-E Correctable Errors - see io erpt doc, section 3.6 */
388 	{ TLU_CE_BIT_DESC(RTO,		pciex_ce,	pciex_ce) },
389 	{ TLU_CE_BIT_DESC(RNR,		pciex_ce,	pciex_ce) },
390 	{ TLU_CE_BIT_DESC(BDP,		pciex_ce,	pciex_ce) },
391 	{ TLU_CE_BIT_DESC(BTP,		pciex_ce,	pciex_ce) },
392 	{ TLU_CE_BIT_DESC(RE,		pciex_ce,	pciex_ce) }
393 };
394 #define	px_err_tlu_ce_keys \
395 	(sizeof (px_err_tlu_ce_tbl)) / (sizeof (px_err_bit_desc_t))
396 
397 
398 /* pec oe errors */
399 #define	TLU_OE_BIT_DESC(bit, hdl, erpt) \
400 	TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \
401 	0, \
402 	PX_ERR_BIT_HANDLE(hdl), \
403 	PX_ERPT_SEND(erpt), \
404 	PX_ERR_PEC_CLASS(bit) }, \
405 	{ TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \
406 	0, \
407 	PX_ERR_BIT_HANDLE(hdl), \
408 	PX_ERPT_SEND(erpt), \
409 	PX_ERR_PEC_CLASS(bit)
410 #define	TLU_OE_OB_BIT_DESC(bit, hdl, erpt) \
411 	TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \
412 	0, \
413 	PX_ERR_BIT_HANDLE(hdl), \
414 	PX_ERPT_SEND(erpt), \
415 	PX_ERR_PEC_OB_CLASS(bit) }, \
416 	{ TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \
417 	0, \
418 	PX_ERR_BIT_HANDLE(hdl), \
419 	PX_ERPT_SEND(erpt), \
420 	PX_ERR_PEC_OB_CLASS(bit)
421 px_err_bit_desc_t px_err_tlu_oe_tbl[] = {
422 	/*
423 	 * TLU Other Event Status (receive only) - see io erpt doc, section 3.7
424 	 */
425 	{ TLU_OE_BIT_DESC(MRC,		fatal_hw,	pciex_rx_oe) },
426 
427 	/* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */
428 	{ TLU_OE_BIT_DESC(WUC,		non_fatal,	pciex_rx_tx_oe) },
429 	{ TLU_OE_BIT_DESC(RUC,		non_fatal,	pciex_rx_tx_oe) },
430 	{ TLU_OE_BIT_DESC(CRS,		non_fatal,	pciex_rx_tx_oe) },
431 
432 	/* TLU Other Event - see io erpt doc, section 3.9 */
433 	{ TLU_OE_BIT_DESC(IIP,		fatal_gos,	pciex_oe) },
434 	{ TLU_OE_BIT_DESC(EDP,		fatal_gos,	pciex_oe) },
435 	{ TLU_OE_BIT_DESC(EHP,		fatal_gos,	pciex_oe) },
436 	{ TLU_OE_OB_BIT_DESC(TLUEITMO,	fatal_gos,	pciex_oe) },
437 	{ TLU_OE_BIT_DESC(LIN,		non_fatal,	pciex_oe) },
438 	{ TLU_OE_BIT_DESC(LRS,		non_fatal,	pciex_oe) },
439 	{ TLU_OE_BIT_DESC(LDN,		tlu_ldn,	pciex_oe) },
440 	{ TLU_OE_BIT_DESC(LUP,		tlu_lup,	pciex_oe) },
441 	{ TLU_OE_BIT_DESC(ERU,		fatal_gos,	pciex_oe) },
442 	{ TLU_OE_BIT_DESC(ERO,		fatal_gos,	pciex_oe) },
443 	{ TLU_OE_BIT_DESC(EMP,		fatal_gos,	pciex_oe) },
444 	{ TLU_OE_BIT_DESC(EPE,		fatal_gos,	pciex_oe) },
445 	{ TLU_OE_BIT_DESC(ERP,		fatal_gos,	pciex_oe) },
446 	{ TLU_OE_BIT_DESC(EIP,		fatal_gos,	pciex_oe) }
447 };
448 
449 #define	px_err_tlu_oe_keys \
450 	(sizeof (px_err_tlu_oe_tbl)) / (sizeof (px_err_bit_desc_t))
451 
452 
453 /*
454  * All the following tables below are for LPU Interrupts.  These interrupts
455  * are *NOT* error interrupts, but event status interrupts.
456  *
457  * These events are probably of most interest to:
458  * o Hotplug
459  * o Power Management
460  * o etc...
461  *
462  * There are also a few events that would be interresting for FMA.
463  * Again none of the regiseters below state that an error has occured
464  * or that data has been lost.  If anything, they give status that an
465  * error is *about* to occur.  examples
466  * o INT_SKP_ERR - indicates clock between fire and child is too far
467  *		   off and is most unlikely able to compensate
468  * o INT_TX_PAR_ERR - A parity error occured in ONE lane.  This is
469  *		      HW recoverable, but will like end up as a future
470  *		      fabric error as well.
471  *
472  * For now, we don't care about any of these errors and should be ignore,
473  * but cleared.
474  */
475 
476 /* LPU Link Interrupt Table */
477 #define	LPUL_BIT_DESC(bit, hdl, erpt) \
478 	LPU_LINK_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \
479 	0, \
480 	NULL, \
481 	NULL, \
482 	""
483 px_err_bit_desc_t px_err_lpul_tbl[] = {
484 	{ LPUL_BIT_DESC(LINK_ERR_ACT,	NULL,		NULL) }
485 };
486 #define	px_err_lpul_keys \
487 	(sizeof (px_err_lpul_tbl)) / (sizeof (px_err_bit_desc_t))
488 
489 /* LPU Physical Interrupt Table */
490 #define	LPUP_BIT_DESC(bit, hdl, erpt) \
491 	LPU_PHY_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \
492 	0, \
493 	NULL, \
494 	NULL, \
495 	""
496 px_err_bit_desc_t px_err_lpup_tbl[] = {
497 	{ LPUP_BIT_DESC(PHY_LAYER_ERR,	NULL,		NULL) }
498 };
499 #define	px_err_lpup_keys \
500 	(sizeof (px_err_lpup_tbl)) / (sizeof (px_err_bit_desc_t))
501 
502 /* LPU Receive Interrupt Table */
503 #define	LPUR_BIT_DESC(bit, hdl, erpt) \
504 	LPU_RECEIVE_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \
505 	0, \
506 	NULL, \
507 	NULL, \
508 	""
509 px_err_bit_desc_t px_err_lpur_tbl[] = {
510 	{ LPUR_BIT_DESC(RCV_PHY,	NULL,		NULL) }
511 };
512 #define	px_err_lpur_keys \
513 	(sizeof (px_err_lpur_tbl)) / (sizeof (px_err_bit_desc_t))
514 
515 /* LPU Transmit Interrupt Table */
516 #define	LPUX_BIT_DESC(bit, hdl, erpt) \
517 	LPU_TRANSMIT_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \
518 	0, \
519 	NULL, \
520 	NULL, \
521 	""
522 px_err_bit_desc_t px_err_lpux_tbl[] = {
523 	{ LPUX_BIT_DESC(UNMSK,		NULL,		NULL) }
524 };
525 #define	px_err_lpux_keys \
526 	(sizeof (px_err_lpux_tbl)) / (sizeof (px_err_bit_desc_t))
527 
528 /* LPU LTSSM Interrupt Table */
529 #define	LPUS_BIT_DESC(bit, hdl, erpt) \
530 	LPU_LTSSM_INTERRUPT_AND_STATUS_INT_ ## bit, \
531 	0, \
532 	NULL, \
533 	NULL, \
534 	""
535 px_err_bit_desc_t px_err_lpus_tbl[] = {
536 	{ LPUS_BIT_DESC(ANY,		NULL,		NULL) }
537 };
538 #define	px_err_lpus_keys \
539 	(sizeof (px_err_lpus_tbl)) / (sizeof (px_err_bit_desc_t))
540 
541 /* LPU Gigablaze Glue Interrupt Table */
542 #define	LPUG_BIT_DESC(bit, hdl, erpt) \
543 	LPU_GIGABLAZE_GLUE_INTERRUPT_AND_STATUS_INT_ ## bit, \
544 	0, \
545 	NULL, \
546 	NULL, \
547 	""
548 px_err_bit_desc_t px_err_lpug_tbl[] = {
549 	{ LPUG_BIT_DESC(GLOBL_UNMSK,	NULL,		NULL) }
550 };
551 #define	px_err_lpug_keys \
552 	(sizeof (px_err_lpug_tbl)) / (sizeof (px_err_bit_desc_t))
553 
554 
555 /* Mask and Tables */
556 #define	MnT6X(pre) \
557 	&px_ ## pre ## _intr_mask, \
558 	&px_ ## pre ## _log_mask, \
559 	&px_ ## pre ## _count_mask, \
560 	px_err_ ## pre ## _tbl, \
561 	px_err_ ## pre ## _keys, \
562 	PX_REG_XBC, \
563 	0
564 
565 #define	MnT6(pre) \
566 	&px_ ## pre ## _intr_mask, \
567 	&px_ ## pre ## _log_mask, \
568 	&px_ ## pre ## _count_mask, \
569 	px_err_ ## pre ## _tbl, \
570 	px_err_ ## pre ## _keys, \
571 	PX_REG_CSR, \
572 	0
573 
574 /* LPU Registers Addresses */
575 #define	LR4(pre) \
576 	NULL, \
577 	LPU_ ## pre ## _INTERRUPT_MASK, \
578 	LPU_ ## pre ## _INTERRUPT_AND_STATUS, \
579 	LPU_ ## pre ## _INTERRUPT_AND_STATUS
580 
581 /* LPU Registers Addresses with Irregularities */
582 #define	LR4_FIXME(pre) \
583 	NULL, \
584 	LPU_ ## pre ## _INTERRUPT_MASK, \
585 	LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS, \
586 	LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS
587 
588 /* TLU Registers Addresses */
589 #define	TR4(pre) \
590 	TLU_ ## pre ## _LOG_ENABLE, \
591 	TLU_ ## pre ## _INTERRUPT_ENABLE, \
592 	TLU_ ## pre ## _INTERRUPT_STATUS, \
593 	TLU_ ## pre ## _STATUS_CLEAR
594 
595 /* Registers Addresses for JBC, UBC, MMU, IMU and ILU */
596 #define	R4(pre) \
597 	pre ## _ERROR_LOG_ENABLE, \
598 	pre ## _INTERRUPT_ENABLE, \
599 	pre ## _INTERRUPT_STATUS, \
600 	pre ## _ERROR_STATUS_CLEAR
601 
602 /* Bits in chip_mask, set according to type. */
603 #define	CHP_O	BITMASK(PX_CHIP_OBERON)
604 #define	CHP_F	BITMASK(PX_CHIP_FIRE)
605 #define	CHP_FO	(CHP_F | CHP_O)
606 
607 /*
608  * Register error handling tables.
609  * The ID Field (first field) is identified by an enum px_err_id_t.
610  * It is located in px_err.h
611  */
612 static const
613 px_err_reg_desc_t px_err_reg_tbl[] = {
614 	{ CHP_F,  MnT6X(jbc),	R4(JBC),		  "JBC Error"},
615 	{ CHP_O,  MnT6X(ubc),	R4(UBC),		  "UBC Error"},
616 	{ CHP_FO, MnT6(mmu),	R4(MMU),		  "MMU Error"},
617 	{ CHP_FO, MnT6(imu),	R4(IMU),		  "IMU Error"},
618 	{ CHP_FO, MnT6(tlu_ue),	TR4(UNCORRECTABLE_ERROR), "TLU UE"},
619 	{ CHP_FO, MnT6(tlu_ce),	TR4(CORRECTABLE_ERROR),	  "TLU CE"},
620 	{ CHP_FO, MnT6(tlu_oe),	TR4(OTHER_EVENT),	  "TLU OE"},
621 	{ CHP_FO, MnT6(ilu),	R4(ILU),		  "ILU Error"},
622 	{ CHP_F,  MnT6(lpul),	LR4(LINK_LAYER),	  "LPU Link Layer"},
623 	{ CHP_F,  MnT6(lpup),	LR4_FIXME(PHY),		  "LPU Phy Layer"},
624 	{ CHP_F,  MnT6(lpur),	LR4(RECEIVE_PHY),	  "LPU RX Phy Layer"},
625 	{ CHP_F,  MnT6(lpux),	LR4(TRANSMIT_PHY),	  "LPU TX Phy Layer"},
626 	{ CHP_F,  MnT6(lpus),	LR4(LTSSM),		  "LPU LTSSM"},
627 	{ CHP_F,  MnT6(lpug),	LR4(GIGABLAZE_GLUE),	  "LPU GigaBlaze Glue"},
628 };
629 
630 #define	PX_ERR_REG_KEYS	(sizeof (px_err_reg_tbl)) / (sizeof (px_err_reg_tbl[0]))
631 
632 typedef struct px_err_ss {
633 	uint64_t err_status[PX_ERR_REG_KEYS];
634 } px_err_ss_t;
635 
636 static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, boolean_t chk_cb);
637 static int  px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr,
638     px_err_ss_t *ss);
639 static int  px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr,
640     int err, int caller);
641 
642 /*
643  * px_err_cb_intr:
644  * Interrupt handler for the JBC/UBC block.
645  * o lock
646  * o create derr
647  * o px_err_handle(leaf1, with cb)
648  * o px_err_handle(leaf2, without cb)
649  * o dispatch (leaf1)
650  * o dispatch (leaf2)
651  * o unlock
652  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
653  */
654 uint_t
655 px_err_cb_intr(caddr_t arg)
656 {
657 	px_fault_t	*px_fault_p = (px_fault_t *)arg;
658 	dev_info_t	*rpdip = px_fault_p->px_fh_dip;
659 	px_t		*px_p = DIP_TO_STATE(rpdip);
660 	int		err = PX_OK;
661 	int		ret = DDI_FM_OK;
662 	int		fatal = 0;
663 	ddi_fm_error_t	derr;
664 
665 	/* Create the derr */
666 	bzero(&derr, sizeof (ddi_fm_error_t));
667 	derr.fme_version = DDI_FME_VERSION;
668 	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
669 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
670 
671 	mutex_enter(&px_p->px_fm_mutex);
672 
673 	err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE);
674 
675 	ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr);
676 	switch (ret) {
677 	case DDI_FM_FATAL:
678 		fatal++;
679 		break;
680 	case DDI_FM_NONFATAL:
681 	case DDI_FM_UNKNOWN:
682 	default:
683 		break;
684 	}
685 
686 	/* Set the intr state to idle for the leaf that received the mondo */
687 
688 	(void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino,
689 	    INTR_IDLE_STATE);
690 
691 	mutex_exit(&px_p->px_fm_mutex);
692 
693 	/*
694 	 * PX_FATAL_HW error is diagnosed after system recovered from
695 	 * HW initiated reset, therefore no furthur handling is required.
696 	 */
697 	if (fatal || err & (PX_FATAL_GOS | PX_FATAL_SW))
698 		PX_FM_PANIC("Fatal System Bus Error has occurred\n");
699 
700 	return (DDI_INTR_CLAIMED);
701 }
702 
703 
704 /*
705  * px_err_dmc_pec_intr:
706  * Interrupt handler for the DMC/PEC block.
707  * o lock
708  * o create derr
709  * o px_err_handle(leaf, with cb)
710  * o dispatch (leaf)
711  * o unlock
712  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
713  */
714 uint_t
715 px_err_dmc_pec_intr(caddr_t arg)
716 {
717 	px_fault_t	*px_fault_p = (px_fault_t *)arg;
718 	dev_info_t	*rpdip = px_fault_p->px_fh_dip;
719 	px_t		*px_p = DIP_TO_STATE(rpdip);
720 	int		err = PX_OK;
721 	int		ret = DDI_FM_OK;
722 	ddi_fm_error_t	derr;
723 
724 	/* Create the derr */
725 	bzero(&derr, sizeof (ddi_fm_error_t));
726 	derr.fme_version = DDI_FME_VERSION;
727 	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
728 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
729 
730 	mutex_enter(&px_p->px_fm_mutex);
731 
732 	/* send ereport/handle/clear fire registers */
733 	err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE);
734 
735 	/* Check all child devices for errors */
736 	if (!px_lib_is_in_drain_state(px_p)) {
737 		ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr);
738 	}
739 
740 	/* Set the interrupt state to idle */
741 	(void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino,
742 	    INTR_IDLE_STATE);
743 
744 	mutex_exit(&px_p->px_fm_mutex);
745 
746 	/*
747 	 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset,
748 	 * therefore it does not cause panic.
749 	 */
750 	if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL))
751 		PX_FM_PANIC("Fatal System Port Error has occurred\n");
752 
753 	return (DDI_INTR_CLAIMED);
754 }
755 
756 /*
757  * Proper csr_base is responsibility of the caller. (Called from px_lib_dev_init
758  * via px_err_reg_setup_all for pcie error registers;  called from
759  * px_cb_add_intr for jbc/ubc from px_cb_attach.)
760  *
761  * Note: reg_id is passed in instead of reg_desc since this function is called
762  * from px_lib4u.c, which doesn't know about the structure of the table.
763  */
764 void
765 px_err_reg_enable(px_err_id_t reg_id, caddr_t csr_base)
766 {
767 	const px_err_reg_desc_t	*reg_desc_p = &px_err_reg_tbl[reg_id];
768 	uint64_t 		intr_mask = *reg_desc_p->intr_mask_p;
769 	uint64_t 		log_mask = *reg_desc_p->log_mask_p;
770 
771 	/* Enable logs if it exists */
772 	if (reg_desc_p->log_addr != NULL)
773 		CSR_XS(csr_base, reg_desc_p->log_addr, log_mask);
774 
775 	/*
776 	 * For readability you in code you set 1 to enable an interrupt.
777 	 * But in Fire it's backwards.  You set 1 to *disable* an intr.
778 	 * Reverse the user tunable intr mask field.
779 	 *
780 	 * Disable All Errors
781 	 * Clear All Errors
782 	 * Enable Errors
783 	 */
784 	CSR_XS(csr_base, reg_desc_p->enable_addr, 0);
785 	CSR_XS(csr_base, reg_desc_p->clear_addr, -1);
786 	CSR_XS(csr_base, reg_desc_p->enable_addr, intr_mask);
787 	DBG(DBG_ATTACH, NULL, "%s Mask: 0x%llx\n", reg_desc_p->msg,
788 	    CSR_XR(csr_base, reg_desc_p->enable_addr));
789 	DBG(DBG_ATTACH, NULL, "%s Status: 0x%llx\n", reg_desc_p->msg,
790 	    CSR_XR(csr_base, reg_desc_p->status_addr));
791 	DBG(DBG_ATTACH, NULL, "%s Clear: 0x%llx\n", reg_desc_p->msg,
792 	    CSR_XR(csr_base, reg_desc_p->clear_addr));
793 	if (reg_desc_p->log_addr != NULL) {
794 		DBG(DBG_ATTACH, NULL, "%s Log: 0x%llx\n", reg_desc_p->msg,
795 		    CSR_XR(csr_base, reg_desc_p->log_addr));
796 	}
797 }
798 
799 void
800 px_err_reg_disable(px_err_id_t reg_id, caddr_t csr_base)
801 {
802 	const px_err_reg_desc_t	*reg_desc_p = &px_err_reg_tbl[reg_id];
803 	uint64_t		val = (reg_id >= PX_ERR_LPU_LINK) ? -1 : 0;
804 
805 	if (reg_desc_p->log_addr != NULL)
806 		CSR_XS(csr_base, reg_desc_p->log_addr, val);
807 	CSR_XS(csr_base, reg_desc_p->enable_addr, val);
808 }
809 
810 /*
811  * Set up pcie error registers.
812  */
813 void
814 px_err_reg_setup_pcie(uint8_t chip_mask, caddr_t csr_base, boolean_t enable)
815 {
816 	px_err_id_t		reg_id;
817 	const px_err_reg_desc_t	*reg_desc_p;
818 	void (*px_err_reg_func)(px_err_id_t, caddr_t);
819 
820 	/*
821 	 * JBC or XBC are enabled during adding of common block interrupts,
822 	 * not done here.
823 	 */
824 	px_err_reg_func = (enable ? px_err_reg_enable : px_err_reg_disable);
825 	for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) {
826 		reg_desc_p = &px_err_reg_tbl[reg_id];
827 		if ((reg_desc_p->chip_mask & chip_mask) &&
828 		    (reg_desc_p->reg_bank == PX_REG_CSR))
829 			px_err_reg_func(reg_id, csr_base);
830 	}
831 }
832 
833 /*
834  * px_err_handle:
835  * Common function called by trap, mondo and fabric intr.
836  * o Snap shot current fire registers
837  * o check for safe access
838  * o send ereport and clear snap shot registers
839  * o check severity of snap shot registers
840  *
841  * @param px_p		leaf in which to check access
842  * @param derr		fm err data structure to be updated
843  * @param caller	PX_TRAP_CALL | PX_INTR_CALL
844  * @param chk_cb	whether to handle cb registers
845  * @return err		PX_OK | PX_NONFATAL |
846  *                      PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL
847  */
848 int
849 px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller,
850     boolean_t chk_cb)
851 {
852 	px_err_ss_t		ss = {0};
853 	int			err = PX_OK;
854 
855 	ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
856 
857 	/* snap shot the current fire registers */
858 	px_err_snapshot(px_p, &ss, chk_cb);
859 
860 	/* check for safe access */
861 	px_err_safeacc_check(px_p, derr);
862 
863 	/* send ereports/handle/clear registers */
864 	err = px_err_erpt_and_clr(px_p, derr, &ss);
865 
866 	/* check for error severity */
867 	err = px_err_check_severity(px_p, derr, err, caller);
868 
869 	/* Mark the On Trap Handle if an error occured */
870 	if (err != PX_OK) {
871 		px_pec_t	*pec_p = px_p->px_pec_p;
872 		on_trap_data_t	*otd = pec_p->pec_ontrap_data;
873 
874 		if ((otd != NULL) && (otd->ot_prot & OT_DATA_ACCESS))
875 			otd->ot_trap |= OT_DATA_ACCESS;
876 	}
877 
878 	return (err);
879 }
880 
881 /*
882  * Static function
883  */
884 
885 /*
886  * px_err_snapshot:
887  * Take a current snap shot of all the fire error registers.  This includes
888  * JBC/UBC, DMC, and PEC, unless chk_cb == false;
889  *
890  * @param px_p		leaf in which to take the snap shot.
891  * @param ss		pre-allocated memory to store the snap shot.
892  * @param chk_cb	boolean on whether to store jbc/ubc register.
893  */
894 static void
895 px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, boolean_t chk_cb)
896 {
897 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
898 	caddr_t	xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC];
899 	caddr_t	pec_csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
900 	uint8_t chip_mask = 1 << PX_CHIP_TYPE(pxu_p);
901 	const px_err_reg_desc_t *reg_desc_p = px_err_reg_tbl;
902 	px_err_id_t reg_id;
903 
904 	for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++, reg_desc_p++) {
905 		if (!(reg_desc_p->chip_mask & chip_mask))
906 			continue;
907 		ss_p->err_status[reg_id] =
908 		    (reg_desc_p->reg_bank == PX_REG_CSR) ?
909 		    CSR_XR(pec_csr_base, reg_desc_p->status_addr) :
910 		    (chk_cb ?
911 			CSR_XR(xbc_csr_base, reg_desc_p->status_addr) : 0);
912 	}
913 }
914 
915 /*
916  * px_err_erpt_and_clr:
917  * This function does the following thing to all the fire registers based
918  * on an earlier snap shot.
919  * o Send ereport
920  * o Handle the error
921  * o Clear the error
922  *
923  * @param px_p		leaf in which to take the snap shot.
924  * @param derr		fm err in which the ereport is to be based on
925  * @param ss_p		pre-allocated memory to store the snap shot.
926  */
927 static int
928 px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss_p)
929 {
930 	dev_info_t		*rpdip = px_p->px_dip;
931 	pxu_t			*pxu_p = (pxu_t *)px_p->px_plat_p;
932 	caddr_t			csr_base;
933 	const px_err_reg_desc_t	*err_reg_tbl;
934 	px_err_bit_desc_t	*err_bit_tbl;
935 	px_err_bit_desc_t	*err_bit_desc;
936 
937 	uint64_t		*log_mask, *count_mask;
938 	uint64_t		status_addr, clear_addr;
939 	uint64_t		ss_reg;
940 
941 	int			(*err_handler)();
942 	int			(*erpt_handler)();
943 	px_err_id_t		reg_id, key;
944 	int			err = PX_OK;
945 	int			biterr;
946 
947 	ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
948 
949 	/* send erport/handle/clear JBC errors */
950 	for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) {
951 		/* Get the correct register description table */
952 		err_reg_tbl = &px_err_reg_tbl[reg_id];
953 
954 		/* Only look at enabled groups. */
955 		if (!(BIT_TST(err_reg_tbl->chip_mask, PX_CHIP_TYPE(pxu_p))))
956 			continue;
957 
958 		/* Get the correct CSR BASE */
959 		csr_base = (caddr_t)pxu_p->px_address[err_reg_tbl->reg_bank];
960 
961 		/* Get pointers to masks and register addresses */
962 		log_mask = err_reg_tbl->log_mask_p;
963 		count_mask = err_reg_tbl->count_mask_p;
964 		status_addr = err_reg_tbl->status_addr;
965 		clear_addr = err_reg_tbl->clear_addr;
966 		ss_reg = ss_p->err_status[reg_id];
967 
968 		/* Get the register BIT description table */
969 		err_bit_tbl = err_reg_tbl->err_bit_tbl;
970 
971 		/* For each known bit in the register send erpt and handle */
972 		for (key = 0; key < err_reg_tbl->err_bit_keys; key++) {
973 			/* Get the bit description table for this register */
974 			err_bit_desc = &err_bit_tbl[key];
975 
976 			/*
977 			 * If the ss_reg is set for this bit,
978 			 * send ereport and handle
979 			 */
980 			if (BIT_TST(ss_reg, err_bit_desc->bit)) {
981 				/* Increment the counter if necessary */
982 				if (BIT_TST(*count_mask, err_bit_desc->bit)) {
983 					err_bit_desc->counter++;
984 				}
985 
986 				/* Error Handle for this bit */
987 				err_handler = err_bit_desc->err_handler;
988 				if (err_handler) {
989 					biterr = err_handler(rpdip,
990 					    csr_base,
991 					    derr,
992 					    err_reg_tbl,
993 					    err_bit_desc);
994 					err |= biterr;
995 				}
996 
997 				/* Send the ereport if it's an UNEXPECTED err */
998 				erpt_handler = err_bit_desc->erpt_handler;
999 				if ((derr->fme_flag == DDI_FM_ERR_UNEXPECTED) &&
1000 				    (biterr != PX_OK)) {
1001 					if (erpt_handler)
1002 						(void) erpt_handler(rpdip,
1003 						    csr_base,
1004 						    ss_reg,
1005 						    derr,
1006 						    err_bit_desc->bit,
1007 						    err_bit_desc->class_name);
1008 				}
1009 			}
1010 		}
1011 		/* Print register status */
1012 		if (ss_reg & *log_mask)
1013 			DBG(DBG_ERR_INTR, rpdip, "<%x>=%16llx %s\n",
1014 			    status_addr, ss_reg, err_reg_tbl->msg);
1015 
1016 		/* Clear the register and error */
1017 		CSR_XS(csr_base, clear_addr, ss_reg);
1018 	}
1019 
1020 	return (err);
1021 }
1022 
1023 /*
1024  * px_err_check_severity:
1025  * Check the severity of the fire error based on an earlier snapshot
1026  *
1027  * @param px_p		leaf in which to take the snap shot.
1028  * @param derr		fm err in which the ereport is to be based on
1029  * @param err		fire register error status
1030  * @param caller	PX_TRAP_CALL | PX_INTR_CALL | PX_LIB_CALL
1031  */
1032 static int
1033 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, int err, int caller)
1034 {
1035 	px_pec_t 	*pec_p = px_p->px_pec_p;
1036 	boolean_t	is_safeacc = B_FALSE;
1037 
1038 	/* nothing to do if called with no error */
1039 	if (err == PX_OK)
1040 		return (err);
1041 
1042 	/* Cautious access error handling  */
1043 	switch (derr->fme_flag) {
1044 	case DDI_FM_ERR_EXPECTED:
1045 		if (caller == PX_TRAP_CALL) {
1046 			/*
1047 			 * for ddi_caut_get treat all events as nonfatal
1048 			 * The trampoline will set err_ena = 0,
1049 			 * err_status = NONFATAL.
1050 			 */
1051 			derr->fme_status = DDI_FM_NONFATAL;
1052 			is_safeacc = B_TRUE;
1053 		} else {
1054 			/*
1055 			 * For ddi_caut_put treat all events as nonfatal. Here
1056 			 * we have the handle and can call ndi_fm_acc_err_set().
1057 			 */
1058 			derr->fme_status = DDI_FM_NONFATAL;
1059 			ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr);
1060 			is_safeacc = B_TRUE;
1061 		}
1062 		break;
1063 	case DDI_FM_ERR_PEEK:
1064 	case DDI_FM_ERR_POKE:
1065 		/*
1066 		 * For ddi_peek/poke treat all events as nonfatal.
1067 		 */
1068 		is_safeacc = B_TRUE;
1069 		break;
1070 	default:
1071 		is_safeacc = B_FALSE;
1072 	}
1073 
1074 	/*
1075 	 * The third argument "err" is passed in as error status from checking
1076 	 * Fire register, re-adjust error status from safe access.
1077 	 */
1078 	if (is_safeacc && !(err & PX_FATAL_GOS))
1079 		return (PX_NONFATAL);
1080 
1081 	return (err);
1082 }
1083 
1084 /* predefined convenience functions */
1085 /* ARGSUSED */
1086 int
1087 px_err_fatal_hw_handle(dev_info_t *rpdip, caddr_t csr_base,
1088 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1089 	px_err_bit_desc_t *err_bit_descr)
1090 {
1091 	return (PX_FATAL_HW);
1092 }
1093 
1094 /* ARGSUSED */
1095 int
1096 px_err_fatal_gos_handle(dev_info_t *rpdip, caddr_t csr_base,
1097 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1098 	px_err_bit_desc_t *err_bit_descr)
1099 {
1100 	return (PX_FATAL_GOS);
1101 }
1102 
1103 /* ARGSUSED */
1104 int
1105 px_err_fatal_stuck_handle(dev_info_t *rpdip, caddr_t csr_base,
1106 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1107 	px_err_bit_desc_t *err_bit_descr)
1108 {
1109 	return (PX_STUCK_FATAL);
1110 }
1111 
1112 /* ARGSUSED */
1113 int
1114 px_err_fatal_sw_handle(dev_info_t *rpdip, caddr_t csr_base,
1115 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1116 	px_err_bit_desc_t *err_bit_descr)
1117 {
1118 	return (PX_FATAL_SW);
1119 }
1120 
1121 /* ARGSUSED */
1122 int
1123 px_err_non_fatal_handle(dev_info_t *rpdip, caddr_t csr_base,
1124 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1125 	px_err_bit_desc_t *err_bit_descr)
1126 {
1127 	return (PX_NONFATAL);
1128 }
1129 
1130 /* ARGSUSED */
1131 int
1132 px_err_ok_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr,
1133 	px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr)
1134 {
1135 	return (PX_OK);
1136 }
1137 
1138 /* ARGSUSED */
1139 int
1140 px_err_unknown_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr,
1141 	px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr)
1142 {
1143 	return (PX_ERR_UNKNOWN);
1144 }
1145 
1146 /* ARGSUSED */
1147 PX_ERPT_SEND_DEC(do_not)
1148 {
1149 	return (PX_OK);
1150 }
1151 
1152 /* UBC FATAL - see io erpt doc, section 1.1 */
1153 /* ARGSUSED */
1154 PX_ERPT_SEND_DEC(ubc_fatal)
1155 {
1156 	char		buf[FM_MAX_CLASS];
1157 	uint64_t	memory_ue_log, marked;
1158 	char		unum[FM_MAX_CLASS];
1159 	int		unum_length;
1160 	uint64_t	device_id = 0;
1161 	uint8_t		cpu_version = 0;
1162 	nvlist_t	*resource = NULL;
1163 
1164 	unum[0] = '\0';
1165 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1166 
1167 	memory_ue_log = CSR_XR(csr_base, UBC_MEMORY_UE_LOG);
1168 	marked = (memory_ue_log >> UBC_MEMORY_UE_LOG_MARKED) &
1169 	    UBC_MEMORY_UE_LOG_MARKED_MASK;
1170 
1171 	if ((strstr(class_name, "ubc.piowtue") != NULL) ||
1172 	    (strstr(class_name, "ubc.piowbeue") != NULL) ||
1173 	    (strstr(class_name, "ubc.piorbeue") != NULL) ||
1174 	    (strstr(class_name, "ubc.dmarduea") != NULL) ||
1175 	    (strstr(class_name, "ubc.dmardueb") != NULL)) {
1176 		int eid = (memory_ue_log >> UBC_MEMORY_UE_LOG_EID) &
1177 		    UBC_MEMORY_UE_LOG_EID_MASK;
1178 		(void) strncat(buf, ubc_class_eid_qualifier[eid],
1179 		    FM_MAX_CLASS);
1180 
1181 		if (eid == UBC_EID_MEM) {
1182 			uint64_t phys_addr = memory_ue_log &
1183 			    MMU_OBERON_PADDR_MASK;
1184 			uint64_t offset = (uint64_t)-1;
1185 
1186 			resource = fm_nvlist_create(NULL);
1187 			if (&plat_get_mem_unum) {
1188 				if ((plat_get_mem_unum(0,
1189 				    phys_addr, 0, B_TRUE, 0, unum,
1190 				    FM_MAX_CLASS, &unum_length)) != 0)
1191 					unum[0] = '\0';
1192 			}
1193 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
1194 					NULL, unum, NULL, offset);
1195 
1196 		} else if (eid == UBC_EID_CPU) {
1197 			int cpuid = (marked & UBC_MARKED_MAX_CPUID_MASK);
1198 			char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1199 
1200 			resource = fm_nvlist_create(NULL);
1201 			cpu_version = cpunodes[cpuid].version;
1202 			device_id = cpunodes[cpuid].device_id;
1203 			(void) snprintf(sbuf, sizeof (sbuf), "%lX",
1204 			    device_id);
1205 			(void) fm_fmri_cpu_set(resource,
1206 			    FM_CPU_SCHEME_VERSION, NULL, cpuid,
1207 			    &cpu_version, sbuf);
1208 		}
1209 	}
1210 
1211 	if (resource) {
1212 		ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1213 		    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1214 		    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE,
1215 		    OBERON_UBC_ELE, DATA_TYPE_UINT64,
1216 		    CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE),
1217 		    OBERON_UBC_IE, DATA_TYPE_UINT64,
1218 		    CSR_XR(csr_base, UBC_INTERRUPT_ENABLE),
1219 		    OBERON_UBC_IS, DATA_TYPE_UINT64,
1220 		    CSR_XR(csr_base, UBC_INTERRUPT_STATUS),
1221 		    OBERON_UBC_ESS, DATA_TYPE_UINT64,
1222 		    CSR_XR(csr_base, UBC_ERROR_STATUS_SET),
1223 		    OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log,
1224 		    OBERON_UBC_UNUM, DATA_TYPE_STRING, unum,
1225 		    OBERON_UBC_DID, DATA_TYPE_UINT64, device_id,
1226 		    OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version,
1227 		    OBERON_UBC_RESOURCE, DATA_TYPE_NVLIST, resource,
1228 		    NULL);
1229 		fm_nvlist_destroy(resource, FM_NVA_FREE);
1230 	} else {
1231 		ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1232 		    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1233 		    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE,
1234 		    OBERON_UBC_ELE, DATA_TYPE_UINT64,
1235 		    CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE),
1236 		    OBERON_UBC_IE, DATA_TYPE_UINT64,
1237 		    CSR_XR(csr_base, UBC_INTERRUPT_ENABLE),
1238 		    OBERON_UBC_IS, DATA_TYPE_UINT64,
1239 		    CSR_XR(csr_base, UBC_INTERRUPT_STATUS),
1240 		    OBERON_UBC_ESS, DATA_TYPE_UINT64,
1241 		    CSR_XR(csr_base, UBC_ERROR_STATUS_SET),
1242 		    OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log,
1243 		    OBERON_UBC_UNUM, DATA_TYPE_STRING, unum,
1244 		    OBERON_UBC_DID, DATA_TYPE_UINT64, device_id,
1245 		    OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version,
1246 		    NULL);
1247 	}
1248 
1249 	return (PX_OK);
1250 }
1251 
1252 /* JBC FATAL - see io erpt doc, section 1.1 */
1253 PX_ERPT_SEND_DEC(jbc_fatal)
1254 {
1255 	char		buf[FM_MAX_CLASS];
1256 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1257 
1258 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1259 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1260 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1261 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1262 	    FIRE_JBC_ELE, DATA_TYPE_UINT64,
1263 	    CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1264 	    FIRE_JBC_IE, DATA_TYPE_UINT64,
1265 	    CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1266 	    FIRE_JBC_IS, DATA_TYPE_UINT64,
1267 	    ss_reg,
1268 	    FIRE_JBC_ESS, DATA_TYPE_UINT64,
1269 	    CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1270 	    FIRE_JBC_FEL1, DATA_TYPE_UINT64,
1271 	    CSR_XR(csr_base, FATAL_ERROR_LOG_1),
1272 	    FIRE_JBC_FEL2, DATA_TYPE_UINT64,
1273 	    CSR_XR(csr_base, FATAL_ERROR_LOG_2),
1274 	    NULL);
1275 
1276 	return (PX_OK);
1277 }
1278 
1279 /* JBC MERGE - see io erpt doc, section 1.2 */
1280 PX_ERPT_SEND_DEC(jbc_merge)
1281 {
1282 	char		buf[FM_MAX_CLASS];
1283 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1284 
1285 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1286 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1287 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1288 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1289 	    FIRE_JBC_ELE, DATA_TYPE_UINT64,
1290 	    CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1291 	    FIRE_JBC_IE, DATA_TYPE_UINT64,
1292 	    CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1293 	    FIRE_JBC_IS, DATA_TYPE_UINT64,
1294 	    ss_reg,
1295 	    FIRE_JBC_ESS, DATA_TYPE_UINT64,
1296 	    CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1297 	    FIRE_JBC_MTEL, DATA_TYPE_UINT64,
1298 	    CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG),
1299 	    NULL);
1300 
1301 	return (PX_OK);
1302 }
1303 
1304 /*
1305  * JBC Merge buffer nonfatal errors:
1306  *    Merge buffer parity error (rd_buf): dma:read:M:nonfatal
1307  *    Merge buffer parity error (wr_buf): dma:write:M:nonfatal
1308  */
1309 /* ARGSUSED */
1310 int
1311 px_err_jbc_merge_handle(dev_info_t *rpdip, caddr_t csr_base,
1312 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1313 	px_err_bit_desc_t *err_bit_descr)
1314 {
1315 	boolean_t	pri = PX_ERR_IS_PRI(err_bit_descr->bit);
1316 	uint64_t	paddr;
1317 	int		ret;
1318 
1319 	if (!pri)
1320 		return (PX_FATAL_GOS);
1321 
1322 	paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG);
1323 	paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK;
1324 
1325 	ret = px_handle_lookup(
1326 		rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr);
1327 
1328 	return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL);
1329 }
1330 
1331 /* JBC Jbusint IN - see io erpt doc, section 1.3 */
1332 PX_ERPT_SEND_DEC(jbc_in)
1333 {
1334 	char		buf[FM_MAX_CLASS];
1335 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1336 
1337 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1338 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1339 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1340 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1341 	    FIRE_JBC_ELE, DATA_TYPE_UINT64,
1342 	    CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1343 	    FIRE_JBC_IE, DATA_TYPE_UINT64,
1344 	    CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1345 	    FIRE_JBC_IS, DATA_TYPE_UINT64,
1346 	    ss_reg,
1347 	    FIRE_JBC_ESS, DATA_TYPE_UINT64,
1348 	    CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1349 	    FIRE_JBC_JITEL1, DATA_TYPE_UINT64,
1350 	    CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG),
1351 	    FIRE_JBC_JITEL2, DATA_TYPE_UINT64,
1352 	    CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG_2),
1353 	    NULL);
1354 
1355 	return (PX_OK);
1356 }
1357 
1358 /*
1359  * JBC Jbusint IN nonfatal errors: PA logged in Jbusint In Transaction Error
1360  * Log Reg[42:0].
1361  *     CE async fault error: nonfatal
1362  *     Jbus bus error: dma::nonfatal
1363  *     Jbus unmapped error: pio|dma:rdwr:M:nonfatal
1364  *     Write data parity error: pio/write:M:nonfatal
1365  *     Read data parity error: pio/read:M:nonfatal
1366  *     Illegal NCWR bytemask: pio:write:M:nonfatal
1367  *     Illegal NCRD bytemask: pio:write:M:nonfatal
1368  *     Invalid jbus transaction: nonfatal
1369  */
1370 /* ARGSUSED */
1371 int
1372 px_err_jbc_jbusint_in_handle(dev_info_t *rpdip, caddr_t csr_base,
1373 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1374 	px_err_bit_desc_t *err_bit_descr)
1375 {
1376 	boolean_t	pri = PX_ERR_IS_PRI(err_bit_descr->bit);
1377 	uint64_t	paddr;
1378 	int		ret;
1379 
1380 	if (!pri)
1381 		return (PX_FATAL_GOS);
1382 
1383 	paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG);
1384 	paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK;
1385 
1386 	ret = px_handle_lookup(
1387 		rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr);
1388 
1389 	return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL);
1390 }
1391 
1392 
1393 /* JBC Jbusint Out - see io erpt doc, section 1.4 */
1394 PX_ERPT_SEND_DEC(jbc_out)
1395 {
1396 	char		buf[FM_MAX_CLASS];
1397 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1398 
1399 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1400 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1401 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1402 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1403 	    FIRE_JBC_ELE, DATA_TYPE_UINT64,
1404 	    CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1405 	    FIRE_JBC_IE, DATA_TYPE_UINT64,
1406 	    CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1407 	    FIRE_JBC_IS, DATA_TYPE_UINT64,
1408 	    ss_reg,
1409 	    FIRE_JBC_ESS, DATA_TYPE_UINT64,
1410 	    CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1411 	    FIRE_JBC_JOTEL1, DATA_TYPE_UINT64,
1412 	    CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG),
1413 	    FIRE_JBC_JOTEL2, DATA_TYPE_UINT64,
1414 	    CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG_2),
1415 	    NULL);
1416 
1417 	return (PX_OK);
1418 }
1419 
1420 /* JBC Dmcint ODCD - see io erpt doc, section 1.5 */
1421 PX_ERPT_SEND_DEC(jbc_odcd)
1422 {
1423 	char		buf[FM_MAX_CLASS];
1424 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1425 
1426 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1427 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1428 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1429 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1430 	    FIRE_JBC_ELE, DATA_TYPE_UINT64,
1431 	    CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1432 	    FIRE_JBC_IE, DATA_TYPE_UINT64,
1433 	    CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1434 	    FIRE_JBC_IS, DATA_TYPE_UINT64,
1435 	    ss_reg,
1436 	    FIRE_JBC_ESS, DATA_TYPE_UINT64,
1437 	    CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1438 	    FIRE_JBC_DMC_ODCD, DATA_TYPE_UINT64,
1439 	    CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG),
1440 	    NULL);
1441 
1442 	return (PX_OK);
1443 }
1444 
1445 /*
1446  * JBC Dmcint ODCO nonfatal errer handling -
1447  *    PIO data parity error: pio:write:M:nonfatal
1448  */
1449 /* ARGSUSED */
1450 int
1451 px_err_jbc_dmcint_odcd_handle(dev_info_t *rpdip, caddr_t csr_base,
1452 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1453 	px_err_bit_desc_t *err_bit_descr)
1454 {
1455 	boolean_t	pri = PX_ERR_IS_PRI(err_bit_descr->bit);
1456 	uint64_t	paddr;
1457 	int		ret;
1458 
1459 	if (!pri)
1460 		return (PX_FATAL_GOS);
1461 
1462 	paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG);
1463 	paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK;
1464 
1465 	ret = px_handle_lookup(
1466 		rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr);
1467 
1468 	return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL);
1469 }
1470 
1471 /* Does address in DMCINT error log register match address of pcitool access? */
1472 static boolean_t
1473 px_jbc_pcitool_addr_match(dev_info_t *rpdip, caddr_t csr_base)
1474 {
1475 	px_t	*px_p = DIP_TO_STATE(rpdip);
1476 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
1477 	caddr_t	pcitool_addr = pxu_p->pcitool_addr;
1478 	caddr_t errlog_addr =
1479 	    (caddr_t)CSR_FR(csr_base, DMCINT_ODCD_ERROR_LOG, ADDRESS);
1480 
1481 	return (pcitool_addr == errlog_addr);
1482 }
1483 
1484 /*
1485  * JBC Dmcint ODCD errer handling for errors which are forgivable during a safe
1486  * access.  (This will be most likely be a PCItool access.)  If not a safe
1487  * access context, treat like jbc_dmcint_odcd.
1488  *    Unmapped PIO read error: pio:read:M:nonfatal
1489  *    Unmapped PIO write error: pio:write:M:nonfatal
1490  *    Invalid PIO write to PCIe cfg/io, csr, ebus or i2c bus: pio:write:nonfatal
1491  *    Invalid PIO read to PCIe cfg/io, csr, ebus or i2c bus: pio:read:nonfatal
1492  */
1493 /* ARGSUSED */
1494 int
1495 px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base,
1496 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1497 	px_err_bit_desc_t *err_bit_descr)
1498 {
1499 	boolean_t	pri = PX_ERR_IS_PRI(err_bit_descr->bit);
1500 
1501 	if (!pri)
1502 		return (PX_FATAL_GOS);
1503 	/*
1504 	 * Got an error which is forgivable during a PCItool access.
1505 	 *
1506 	 * Don't do handler check since the error may otherwise be unfairly
1507 	 * attributed to a device.  Just return.
1508 	 *
1509 	 * Note: There is a hole here in that a legitimate error can come in
1510 	 * while a PCItool access is in play and be forgiven.  This is possible
1511 	 * though not likely.
1512 	 */
1513 	if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) &&
1514 	    (px_jbc_pcitool_addr_match(rpdip, csr_base)))
1515 		return (PX_FATAL_SW);
1516 
1517 	return (px_err_jbc_dmcint_odcd_handle(rpdip, csr_base, derr,
1518 	    err_reg_descr, err_bit_descr));
1519 }
1520 
1521 /* JBC Dmcint IDC - see io erpt doc, section 1.6 */
1522 PX_ERPT_SEND_DEC(jbc_idc)
1523 {
1524 	char		buf[FM_MAX_CLASS];
1525 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1526 
1527 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1528 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1529 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1530 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1531 	    FIRE_JBC_ELE, DATA_TYPE_UINT64,
1532 	    CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1533 	    FIRE_JBC_IE, DATA_TYPE_UINT64,
1534 	    CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1535 	    FIRE_JBC_IS, DATA_TYPE_UINT64,
1536 	    ss_reg,
1537 	    FIRE_JBC_ESS, DATA_TYPE_UINT64,
1538 	    CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1539 	    FIRE_JBC_DMC_IDC, DATA_TYPE_UINT64,
1540 	    CSR_XR(csr_base, DMCINT_IDC_ERROR_LOG),
1541 	    NULL);
1542 
1543 	return (PX_OK);
1544 }
1545 
1546 /* JBC CSR - see io erpt doc, section 1.7 */
1547 PX_ERPT_SEND_DEC(jbc_csr)
1548 {
1549 	char		buf[FM_MAX_CLASS];
1550 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1551 
1552 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1553 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1554 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1555 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1556 	    FIRE_JBC_ELE, DATA_TYPE_UINT64,
1557 	    CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1558 	    FIRE_JBC_IE, DATA_TYPE_UINT64,
1559 	    CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1560 	    FIRE_JBC_IS, DATA_TYPE_UINT64,
1561 	    ss_reg,
1562 	    FIRE_JBC_ESS, DATA_TYPE_UINT64,
1563 	    CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1564 	    "jbc-error-reg", DATA_TYPE_UINT64,
1565 	    CSR_XR(csr_base, CSR_ERROR_LOG),
1566 	    NULL);
1567 
1568 	return (PX_OK);
1569 }
1570 
1571 /*
1572  * JBC CSR errer handling -
1573  * Ebus ready timeout error: pio:rdwr:M:nonfatal
1574  */
1575 /* ARGSUSED */
1576 int
1577 px_err_jbc_csr_handle(dev_info_t *rpdip, caddr_t csr_base,
1578 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1579 	px_err_bit_desc_t *err_bit_descr)
1580 {
1581 	boolean_t	pri = PX_ERR_IS_PRI(err_bit_descr->bit);
1582 	uint64_t	paddr;
1583 	int		ret;
1584 
1585 	if (!pri)
1586 		return (PX_FATAL_GOS);
1587 
1588 	paddr = CSR_XR(csr_base, CSR_ERROR_LOG);
1589 	paddr &= CSR_ERROR_LOG_ADDRESS_MASK;
1590 
1591 	ret = px_handle_lookup(
1592 		rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr);
1593 
1594 	return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL);
1595 }
1596 
1597 /* JBC Dmcint IDC - see io erpt doc, section 1.6 */
1598 
1599 /* DMC IMU RDS - see io erpt doc, section 2.1 */
1600 PX_ERPT_SEND_DEC(imu_rds)
1601 {
1602 	char		buf[FM_MAX_CLASS];
1603 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1604 
1605 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1606 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1607 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1608 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1609 	    FIRE_IMU_ELE, DATA_TYPE_UINT64,
1610 	    CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE),
1611 	    FIRE_IMU_IE, DATA_TYPE_UINT64,
1612 	    CSR_XR(csr_base, IMU_INTERRUPT_ENABLE),
1613 	    FIRE_IMU_IS, DATA_TYPE_UINT64,
1614 	    ss_reg,
1615 	    FIRE_IMU_ESS, DATA_TYPE_UINT64,
1616 	    CSR_XR(csr_base, IMU_ERROR_STATUS_SET),
1617 	    FIRE_IMU_RDS, DATA_TYPE_UINT64,
1618 	    CSR_XR(csr_base, IMU_RDS_ERROR_LOG),
1619 	    NULL);
1620 
1621 	return (PX_OK);
1622 }
1623 
1624 /* imu function to handle all Received but Not Enabled errors */
1625 /* ARGSUSED */
1626 int
1627 px_err_imu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base,
1628 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1629 	px_err_bit_desc_t *err_bit_descr)
1630 {
1631 	uint64_t	imu_log_enable, imu_intr_enable;
1632 	int		mask = BITMASK(err_bit_descr->bit);
1633 	int		err = PX_NONFATAL;
1634 
1635 	imu_log_enable = CSR_XR(csr_base, err_reg_descr->log_addr);
1636 	imu_intr_enable = CSR_XR(csr_base, err_reg_descr->enable_addr);
1637 
1638 	/*
1639 	 * If matching bit is not set, meaning corresponding rbne not
1640 	 * enabled, then receiving it indicates some sort of malfunction
1641 	 * possibly in hardware.
1642 	 *
1643 	 * Other wise, software may have intentionally disabled certain
1644 	 * errors for a period of time within which the occuring of the
1645 	 * disabled errors become rbne, that is non fatal.
1646 	 */
1647 
1648 	if (!(imu_log_enable & imu_intr_enable & mask))
1649 		err = PX_FATAL_GOS;
1650 
1651 	return (err);
1652 }
1653 
1654 /*
1655  * No platforms uses PME. Any PME received is simply logged
1656  * for analysis.
1657  */
1658 /* ARGSUSED */
1659 int
1660 px_err_imu_pme_handle(dev_info_t *rpdip, caddr_t csr_base,
1661 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1662 	px_err_bit_desc_t *err_bit_descr)
1663 {
1664 	px_t		*px_p = DIP_TO_STATE(rpdip);
1665 
1666 	px_p->px_pme_ignored++;
1667 	return (PX_NONFATAL);
1668 }
1669 
1670 /* handle EQ overflow */
1671 /* ARGSUSED */
1672 int
1673 px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base,
1674 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1675 	px_err_bit_desc_t *err_bit_descr)
1676 {
1677 	px_t			*px_p = DIP_TO_STATE(rpdip);
1678 	px_msiq_state_t 	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
1679 	msiqid_t		eqno;
1680 	pci_msiq_state_t	msiq_state;
1681 	int			err = PX_NONFATAL;
1682 	int			i;
1683 
1684 	eqno = msiq_state_p->msiq_1st_msiq_id;
1685 	for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
1686 		if (px_lib_msiq_getstate(rpdip, eqno, &msiq_state) ==
1687 			DDI_SUCCESS) {
1688 			if (msiq_state == PCI_MSIQ_STATE_ERROR) {
1689 				err = PX_FATAL_GOS;
1690 			}
1691 		}
1692 	}
1693 
1694 	return (err);
1695 }
1696 
1697 /* DMC IMU SCS - see io erpt doc, section 2.2 */
1698 PX_ERPT_SEND_DEC(imu_scs)
1699 {
1700 	char		buf[FM_MAX_CLASS];
1701 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1702 
1703 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1704 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1705 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1706 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1707 	    FIRE_IMU_ELE, DATA_TYPE_UINT64,
1708 	    CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE),
1709 	    FIRE_IMU_IE, DATA_TYPE_UINT64,
1710 	    CSR_XR(csr_base, IMU_INTERRUPT_ENABLE),
1711 	    FIRE_IMU_IS, DATA_TYPE_UINT64,
1712 	    ss_reg,
1713 	    FIRE_IMU_ESS, DATA_TYPE_UINT64,
1714 	    CSR_XR(csr_base, IMU_ERROR_STATUS_SET),
1715 	    FIRE_IMU_SCS, DATA_TYPE_UINT64,
1716 	    CSR_XR(csr_base, IMU_SCS_ERROR_LOG),
1717 	    NULL);
1718 
1719 	return (PX_OK);
1720 }
1721 
1722 /* DMC IMU - see io erpt doc, section 2.3 */
1723 PX_ERPT_SEND_DEC(imu)
1724 {
1725 	char		buf[FM_MAX_CLASS];
1726 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1727 
1728 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1729 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1730 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1731 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1732 	    FIRE_IMU_ELE, DATA_TYPE_UINT64,
1733 	    CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE),
1734 	    FIRE_IMU_IE, DATA_TYPE_UINT64,
1735 	    CSR_XR(csr_base, IMU_INTERRUPT_ENABLE),
1736 	    FIRE_IMU_IS, DATA_TYPE_UINT64,
1737 	    ss_reg,
1738 	    FIRE_IMU_ESS, DATA_TYPE_UINT64,
1739 	    CSR_XR(csr_base, IMU_ERROR_STATUS_SET),
1740 	    NULL);
1741 
1742 	return (PX_OK);
1743 }
1744 
1745 /* DMC MMU TFAR/TFSR - see io erpt doc, section 2.4 */
1746 PX_ERPT_SEND_DEC(mmu_tfar_tfsr)
1747 {
1748 	char		buf[FM_MAX_CLASS];
1749 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1750 
1751 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1752 
1753 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1754 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1755 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1756 	    FIRE_MMU_ELE, DATA_TYPE_UINT64,
1757 	    CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE),
1758 	    FIRE_MMU_IE, DATA_TYPE_UINT64,
1759 	    CSR_XR(csr_base, MMU_INTERRUPT_ENABLE),
1760 	    FIRE_MMU_IS, DATA_TYPE_UINT64,
1761 	    ss_reg,
1762 	    FIRE_MMU_ESS, DATA_TYPE_UINT64,
1763 	    CSR_XR(csr_base, MMU_ERROR_STATUS_SET),
1764 	    FIRE_MMU_TFAR, DATA_TYPE_UINT64,
1765 	    CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS),
1766 	    FIRE_MMU_TFSR, DATA_TYPE_UINT64,
1767 	    CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS),
1768 	    NULL);
1769 
1770 	return (PX_OK);
1771 }
1772 
1773 /* DMC MMU - see io erpt doc, section 2.5 */
1774 PX_ERPT_SEND_DEC(mmu)
1775 {
1776 	char		buf[FM_MAX_CLASS];
1777 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1778 
1779 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1780 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1781 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1782 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1783 	    FIRE_MMU_ELE, DATA_TYPE_UINT64,
1784 	    CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE),
1785 	    FIRE_MMU_IE, DATA_TYPE_UINT64,
1786 	    CSR_XR(csr_base, MMU_INTERRUPT_ENABLE),
1787 	    FIRE_MMU_IS, DATA_TYPE_UINT64,
1788 	    ss_reg,
1789 	    FIRE_MMU_ESS, DATA_TYPE_UINT64,
1790 	    CSR_XR(csr_base, MMU_ERROR_STATUS_SET),
1791 	    NULL);
1792 
1793 	return (PX_OK);
1794 }
1795 
1796 /* imu function to handle all Received but Not Enabled errors */
1797 int
1798 px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base,
1799 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1800 	px_err_bit_desc_t *err_bit_descr)
1801 {
1802 	boolean_t	pri = PX_ERR_IS_PRI(err_bit_descr->bit);
1803 	uint64_t	mmu_log_enable, mmu_intr_enable;
1804 	uint64_t	mask = BITMASK(err_bit_descr->bit);
1805 	uint64_t	mmu_tfa, mmu_ctrl;
1806 	uint64_t	mmu_enable_bit = 0;
1807 	int		err = PX_NONFATAL;
1808 	int		ret;
1809 
1810 	mmu_log_enable = CSR_XR(csr_base, err_reg_descr->log_addr);
1811 	mmu_intr_enable = CSR_XR(csr_base, err_reg_descr->enable_addr);
1812 
1813 	mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS);
1814 	mmu_ctrl = CSR_XR(csr_base, MMU_CONTROL_AND_STATUS);
1815 
1816 	switch (err_bit_descr->bit) {
1817 	case MMU_INTERRUPT_STATUS_BYP_ERR_P:
1818 		mmu_enable_bit = BITMASK(MMU_CONTROL_AND_STATUS_BE);
1819 		break;
1820 	case MMU_INTERRUPT_STATUS_TRN_ERR_P:
1821 		mmu_enable_bit = BITMASK(MMU_CONTROL_AND_STATUS_TE);
1822 		break;
1823 	default:
1824 		mmu_enable_bit = 0;
1825 		break;
1826 	}
1827 
1828 	/*
1829 	 * If the interrupts are enabled and Translation/Bypass Enable bit
1830 	 * was set, then panic.  This error should not have occured.
1831 	 */
1832 	if (mmu_log_enable & mmu_intr_enable &
1833 	    (mmu_ctrl & mmu_enable_bit)) {
1834 		err = PX_FATAL_GOS;
1835 	} else {
1836 		if (!pri)
1837 			return (PX_FATAL_GOS);
1838 
1839 		ret = px_handle_lookup(
1840 			rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa);
1841 		err = (ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL;
1842 
1843 		/*
1844 		 * S/W bug - this error should always be enabled
1845 		 */
1846 
1847 		/* enable error & intr reporting for this bit */
1848 		CSR_XS(csr_base, MMU_ERROR_LOG_ENABLE, mmu_log_enable | mask);
1849 		CSR_XS(csr_base, MMU_INTERRUPT_ENABLE, mmu_intr_enable | mask);
1850 
1851 		/* enable translation access/bypass enable */
1852 		CSR_XS(csr_base, MMU_CONTROL_AND_STATUS,
1853 		    mmu_ctrl | mmu_enable_bit);
1854 	}
1855 
1856 	return (err);
1857 }
1858 
1859 /* Generic error handling functions that involve MMU Translation Fault Addr */
1860 /* ARGSUSED */
1861 int
1862 px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base,
1863 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1864 	px_err_bit_desc_t *err_bit_descr)
1865 {
1866 	boolean_t	pri = PX_ERR_IS_PRI(err_bit_descr->bit);
1867 	uint64_t	mmu_tfa;
1868 	uint_t		ret;
1869 
1870 	if (!pri)
1871 		return (PX_FATAL_GOS);
1872 
1873 	mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS);
1874 	ret = px_handle_lookup(
1875 		rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa);
1876 
1877 	return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL);
1878 }
1879 
1880 /* MMU Table walk errors */
1881 /* ARGSUSED */
1882 int
1883 px_err_mmu_tblwlk_handle(dev_info_t *rpdip, caddr_t csr_base,
1884 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1885 	px_err_bit_desc_t *err_bit_descr)
1886 {
1887 	boolean_t	pri = PX_ERR_IS_PRI(err_bit_descr->bit);
1888 	uint64_t	mmu_tfa;
1889 	uint_t		ret;
1890 
1891 	if (!pri)
1892 		return (PX_FATAL_GOS);
1893 
1894 	mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS);
1895 	ret = px_handle_lookup(
1896 		rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa);
1897 
1898 	return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL);
1899 }
1900 
1901 /*
1902  * TLU LUP event - if caused by power management activity, then it is expected.
1903  * In all other cases, it is an error.
1904  */
1905 /* ARGSUSED */
1906 int
1907 px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base,
1908 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1909 	px_err_bit_desc_t *err_bit_descr)
1910 {
1911 	px_t	*px_p = DIP_TO_STATE(rpdip);
1912 
1913 	/*
1914 	 * power management code is currently the only segment that sets
1915 	 * px_lup_pending to indicate its expectation for a healthy LUP
1916 	 * event.  For all other occasions, LUP event should be flaged as
1917 	 * error condition.
1918 	 */
1919 	return ((atomic_cas_32(&px_p->px_lup_pending, 1, 0) == 0) ?
1920 	    PX_NONFATAL : PX_OK);
1921 }
1922 
1923 /*
1924  * TLU LDN event - if caused by power management activity, then it is expected.
1925  * In all other cases, it is an error.
1926  */
1927 /* ARGSUSED */
1928 int
1929 px_err_tlu_ldn_handle(dev_info_t *rpdip, caddr_t csr_base,
1930 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1931 	px_err_bit_desc_t *err_bit_descr)
1932 {
1933 	px_t    *px_p = DIP_TO_STATE(rpdip);
1934 	return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_OK : PX_NONFATAL);
1935 }
1936 
1937 /* PEC ILU none - see io erpt doc, section 3.1 */
1938 PX_ERPT_SEND_DEC(pec_ilu)
1939 {
1940 	char		buf[FM_MAX_CLASS];
1941 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1942 
1943 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1944 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1945 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1946 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1947 	    FIRE_ILU_ELE, DATA_TYPE_UINT64,
1948 	    CSR_XR(csr_base, ILU_ERROR_LOG_ENABLE),
1949 	    FIRE_ILU_IE, DATA_TYPE_UINT64,
1950 	    CSR_XR(csr_base, ILU_INTERRUPT_ENABLE),
1951 	    FIRE_ILU_IS, DATA_TYPE_UINT64,
1952 	    ss_reg,
1953 	    FIRE_ILU_ESS, DATA_TYPE_UINT64,
1954 	    CSR_XR(csr_base, ILU_ERROR_STATUS_SET),
1955 	    NULL);
1956 
1957 	return (PX_OK);
1958 }
1959 
1960 /* PCIEX UE Errors */
1961 /* ARGSUSED */
1962 int
1963 px_err_pciex_ue_handle(dev_info_t *rpdip, caddr_t csr_base,
1964 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1965 	px_err_bit_desc_t *err_bit_descr)
1966 {
1967 	uint32_t	mask = (uint32_t)BITMASK(err_bit_descr->bit);
1968 
1969 	return ((err_bit_descr->bit >= 32 && px_fabric_die_rc_ue_gos) ?
1970 	    PX_FATAL_GOS : PX_FABRIC_ERR_SEV(mask, px_fabric_die_rc_ue,
1971 		px_fabric_die_rc_ue_gos));
1972 }
1973 
1974 /* PCI-E Uncorrectable Errors - see io erpt doc, section 3.2 */
1975 PX_ERPT_SEND_DEC(pciex_rx_ue)
1976 {
1977 	char		buf[FM_MAX_CLASS];
1978 	boolean_t	pri = PX_ERR_IS_PRI(bit);
1979 
1980 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1981 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1982 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1983 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1984 	    FIRE_TLU_UELE, DATA_TYPE_UINT64,
1985 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE),
1986 	    FIRE_TLU_UIE, DATA_TYPE_UINT64,
1987 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE),
1988 	    FIRE_TLU_UIS, DATA_TYPE_UINT64,
1989 	    ss_reg,
1990 	    FIRE_TLU_UESS, DATA_TYPE_UINT64,
1991 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET),
1992 	    FIRE_TLU_RUEH1L, DATA_TYPE_UINT64,
1993 	    CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG),
1994 	    FIRE_TLU_RUEH2L, DATA_TYPE_UINT64,
1995 	    CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG),
1996 	    NULL);
1997 
1998 	return (PX_OK);
1999 }
2000 
2001 /* PCI-E Uncorrectable Errors - see io erpt doc, section 3.3 */
2002 PX_ERPT_SEND_DEC(pciex_tx_ue)
2003 {
2004 	char		buf[FM_MAX_CLASS];
2005 	boolean_t	pri = PX_ERR_IS_PRI(bit);
2006 
2007 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2008 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2009 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2010 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2011 	    FIRE_TLU_UELE, DATA_TYPE_UINT64,
2012 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE),
2013 	    FIRE_TLU_UIE, DATA_TYPE_UINT64,
2014 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE),
2015 	    FIRE_TLU_UIS, DATA_TYPE_UINT64,
2016 	    ss_reg,
2017 	    FIRE_TLU_UESS, DATA_TYPE_UINT64,
2018 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET),
2019 	    FIRE_TLU_TUEH1L, DATA_TYPE_UINT64,
2020 	    CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG),
2021 	    FIRE_TLU_TUEH2L, DATA_TYPE_UINT64,
2022 	    CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG),
2023 	    NULL);
2024 
2025 	return (PX_OK);
2026 }
2027 
2028 /* PCI-E Uncorrectable Errors - see io erpt doc, section 3.4 */
2029 PX_ERPT_SEND_DEC(pciex_rx_tx_ue)
2030 {
2031 	char		buf[FM_MAX_CLASS];
2032 	boolean_t	pri = PX_ERR_IS_PRI(bit);
2033 
2034 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2035 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2036 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2037 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2038 	    FIRE_TLU_UELE, DATA_TYPE_UINT64,
2039 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE),
2040 	    FIRE_TLU_UIE, DATA_TYPE_UINT64,
2041 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE),
2042 	    FIRE_TLU_UIS, DATA_TYPE_UINT64,
2043 	    ss_reg,
2044 	    FIRE_TLU_UESS, DATA_TYPE_UINT64,
2045 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET),
2046 	    FIRE_TLU_RUEH1L, DATA_TYPE_UINT64,
2047 	    CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG),
2048 	    FIRE_TLU_RUEH2L, DATA_TYPE_UINT64,
2049 	    CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG),
2050 	    FIRE_TLU_TUEH1L, DATA_TYPE_UINT64,
2051 	    CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG),
2052 	    FIRE_TLU_TUEH2L, DATA_TYPE_UINT64,
2053 	    CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG),
2054 	    NULL);
2055 
2056 	return (PX_OK);
2057 }
2058 
2059 /* PCI-E Uncorrectable Errors - see io erpt doc, section 3.5 */
2060 PX_ERPT_SEND_DEC(pciex_ue)
2061 {
2062 	char		buf[FM_MAX_CLASS];
2063 	boolean_t	pri = PX_ERR_IS_PRI(bit);
2064 
2065 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2066 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2067 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2068 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2069 	    FIRE_TLU_UELE, DATA_TYPE_UINT64,
2070 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE),
2071 	    FIRE_TLU_UIE, DATA_TYPE_UINT64,
2072 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE),
2073 	    FIRE_TLU_UIS, DATA_TYPE_UINT64,
2074 	    ss_reg,
2075 	    FIRE_TLU_UESS, DATA_TYPE_UINT64,
2076 	    CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET),
2077 	    NULL);
2078 
2079 	return (PX_OK);
2080 }
2081 
2082 /* PCIEX UE Errors */
2083 /* ARGSUSED */
2084 int
2085 px_err_pciex_ce_handle(dev_info_t *rpdip, caddr_t csr_base,
2086 	ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
2087 	px_err_bit_desc_t *err_bit_descr)
2088 {
2089 	uint32_t	mask = (uint32_t)BITMASK(err_bit_descr->bit);
2090 
2091 	return ((err_bit_descr->bit >= 32 && px_fabric_die_rc_ce_gos) ?
2092 	    PX_FATAL_GOS : PX_FABRIC_ERR_SEV(mask, px_fabric_die_rc_ce,
2093 		px_fabric_die_rc_ce_gos));
2094 }
2095 
2096 /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */
2097 PX_ERPT_SEND_DEC(pciex_ce)
2098 {
2099 	char		buf[FM_MAX_CLASS];
2100 	boolean_t	pri = PX_ERR_IS_PRI(bit);
2101 
2102 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2103 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2104 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2105 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2106 	    FIRE_TLU_CELE, DATA_TYPE_UINT64,
2107 	    CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_LOG_ENABLE),
2108 	    FIRE_TLU_CIE, DATA_TYPE_UINT64,
2109 	    CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_INTERRUPT_ENABLE),
2110 	    FIRE_TLU_CIS, DATA_TYPE_UINT64,
2111 	    ss_reg,
2112 	    FIRE_TLU_CESS, DATA_TYPE_UINT64,
2113 	    CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_STATUS_SET),
2114 	    NULL);
2115 
2116 	return (PX_OK);
2117 }
2118 
2119 /* TLU Other Event Status (receive only) - see io erpt doc, section 3.7 */
2120 PX_ERPT_SEND_DEC(pciex_rx_oe)
2121 {
2122 	char		buf[FM_MAX_CLASS];
2123 	boolean_t	pri = PX_ERR_IS_PRI(bit);
2124 
2125 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2126 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2127 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2128 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2129 	    FIRE_TLU_OEELE, DATA_TYPE_UINT64,
2130 	    CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE),
2131 	    FIRE_TLU_OEIE, DATA_TYPE_UINT64,
2132 	    CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE),
2133 	    FIRE_TLU_OEIS, DATA_TYPE_UINT64,
2134 	    ss_reg,
2135 	    FIRE_TLU_OEESS, DATA_TYPE_UINT64,
2136 	    CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET),
2137 	    FIRE_TLU_RUEH1L, DATA_TYPE_UINT64,
2138 	    CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG),
2139 	    FIRE_TLU_RUEH2L, DATA_TYPE_UINT64,
2140 	    CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG),
2141 	    NULL);
2142 
2143 	return (PX_OK);
2144 }
2145 
2146 /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */
2147 PX_ERPT_SEND_DEC(pciex_rx_tx_oe)
2148 {
2149 	char		buf[FM_MAX_CLASS];
2150 	boolean_t	pri = PX_ERR_IS_PRI(bit);
2151 
2152 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2153 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2154 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2155 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2156 	    FIRE_TLU_OEELE, DATA_TYPE_UINT64,
2157 	    CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE),
2158 	    FIRE_TLU_OEIE, DATA_TYPE_UINT64,
2159 	    CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE),
2160 	    FIRE_TLU_OEIS, DATA_TYPE_UINT64,
2161 	    ss_reg,
2162 	    FIRE_TLU_OEESS, DATA_TYPE_UINT64,
2163 	    CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET),
2164 	    FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64,
2165 	    CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG),
2166 	    FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64,
2167 	    CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG),
2168 	    FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64,
2169 	    CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG),
2170 	    FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64,
2171 	    CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG),
2172 	    NULL);
2173 
2174 	return (PX_OK);
2175 }
2176 
2177 /* TLU Other Event - see io erpt doc, section 3.9 */
2178 PX_ERPT_SEND_DEC(pciex_oe)
2179 {
2180 	char		buf[FM_MAX_CLASS];
2181 	boolean_t	pri = PX_ERR_IS_PRI(bit);
2182 
2183 	(void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2184 	ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2185 	    DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2186 	    FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2187 	    FIRE_TLU_OEELE, DATA_TYPE_UINT64,
2188 	    CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE),
2189 	    FIRE_TLU_OEIE, DATA_TYPE_UINT64,
2190 	    CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE),
2191 	    FIRE_TLU_OEIS, DATA_TYPE_UINT64,
2192 	    ss_reg,
2193 	    FIRE_TLU_OEESS, DATA_TYPE_UINT64,
2194 	    CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET),
2195 	    NULL);
2196 
2197 	return (PX_OK);
2198 }
2199