1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * sun4u Fire Error Handling
29 */
30
31 #include <sys/types.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/sunndi.h>
35 #include <sys/fm/protocol.h>
36 #include <sys/fm/util.h>
37 #include <sys/pcie.h>
38 #include <sys/pcie_impl.h>
39 #include "px_obj.h"
40 #include <px_regs.h>
41 #include <px_csr.h>
42 #include <sys/membar.h>
43 #include <sys/machcpuvar.h>
44 #include <sys/platform_module.h>
45 #include "px_lib4u.h"
46 #include "px_err.h"
47 #include "px_err_impl.h"
48 #include "oberon_regs.h"
49
50 uint64_t px_tlu_ue_intr_mask = PX_ERR_EN_ALL;
51 uint64_t px_tlu_ue_log_mask = PX_ERR_EN_ALL;
52 uint64_t px_tlu_ue_count_mask = PX_ERR_EN_ALL;
53
54 uint64_t px_tlu_ce_intr_mask = PX_ERR_MASK_NONE;
55 uint64_t px_tlu_ce_log_mask = PX_ERR_MASK_NONE;
56 uint64_t px_tlu_ce_count_mask = PX_ERR_MASK_NONE;
57
58 /*
59 * Do not enable Link Interrupts
60 */
61 uint64_t px_tlu_oe_intr_mask = PX_ERR_EN_ALL & ~0x80000000800;
62 uint64_t px_tlu_oe_log_mask = PX_ERR_EN_ALL & ~0x80000000800;
63 uint64_t px_tlu_oe_count_mask = PX_ERR_EN_ALL;
64
65 uint64_t px_mmu_intr_mask = PX_ERR_EN_ALL;
66 uint64_t px_mmu_log_mask = PX_ERR_EN_ALL;
67 uint64_t px_mmu_count_mask = PX_ERR_EN_ALL;
68
69 uint64_t px_imu_intr_mask = PX_ERR_EN_ALL;
70 uint64_t px_imu_log_mask = PX_ERR_EN_ALL;
71 uint64_t px_imu_count_mask = PX_ERR_EN_ALL;
72
73 /*
74 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_S) |
75 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_P);
76 */
77 uint64_t px_ilu_intr_mask = (((uint64_t)0x10 << 32) | 0x10);
78 uint64_t px_ilu_log_mask = (((uint64_t)0x10 << 32) | 0x10);
79 uint64_t px_ilu_count_mask = PX_ERR_EN_ALL;
80
81 uint64_t px_ubc_intr_mask = PX_ERR_EN_ALL;
82 uint64_t px_ubc_log_mask = PX_ERR_EN_ALL;
83 uint64_t px_ubc_count_mask = PX_ERR_EN_ALL;
84
85 uint64_t px_jbc_intr_mask = PX_ERR_EN_ALL;
86 uint64_t px_jbc_log_mask = PX_ERR_EN_ALL;
87 uint64_t px_jbc_count_mask = PX_ERR_EN_ALL;
88
89 /*
90 * LPU Intr Registers are reverse encoding from the registers above.
91 * 1 = disable
92 * 0 = enable
93 *
94 * Log and Count are however still the same.
95 */
96 uint64_t px_lpul_intr_mask = LPU_INTR_DISABLE;
97 uint64_t px_lpul_log_mask = PX_ERR_EN_ALL;
98 uint64_t px_lpul_count_mask = PX_ERR_EN_ALL;
99
100 uint64_t px_lpup_intr_mask = LPU_INTR_DISABLE;
101 uint64_t px_lpup_log_mask = PX_ERR_EN_ALL;
102 uint64_t px_lpup_count_mask = PX_ERR_EN_ALL;
103
104 uint64_t px_lpur_intr_mask = LPU_INTR_DISABLE;
105 uint64_t px_lpur_log_mask = PX_ERR_EN_ALL;
106 uint64_t px_lpur_count_mask = PX_ERR_EN_ALL;
107
108 uint64_t px_lpux_intr_mask = LPU_INTR_DISABLE;
109 uint64_t px_lpux_log_mask = PX_ERR_EN_ALL;
110 uint64_t px_lpux_count_mask = PX_ERR_EN_ALL;
111
112 uint64_t px_lpus_intr_mask = LPU_INTR_DISABLE;
113 uint64_t px_lpus_log_mask = PX_ERR_EN_ALL;
114 uint64_t px_lpus_count_mask = PX_ERR_EN_ALL;
115
116 uint64_t px_lpug_intr_mask = LPU_INTR_DISABLE;
117 uint64_t px_lpug_log_mask = PX_ERR_EN_ALL;
118 uint64_t px_lpug_count_mask = PX_ERR_EN_ALL;
119
120 /*
121 * JBC error bit table
122 */
123 #define JBC_BIT_DESC(bit, hdl, erpt) \
124 JBC_INTERRUPT_STATUS_ ## bit ## _P, \
125 0, \
126 PX_ERR_BIT_HANDLE(hdl), \
127 PX_ERPT_SEND(erpt), \
128 PX_ERR_JBC_CLASS(bit) }, \
129 { JBC_INTERRUPT_STATUS_ ## bit ## _S, \
130 0, \
131 PX_ERR_BIT_HANDLE(hdl), \
132 PX_ERPT_SEND(erpt), \
133 PX_ERR_JBC_CLASS(bit)
134 px_err_bit_desc_t px_err_jbc_tbl[] = {
135 /* JBC FATAL */
136 { JBC_BIT_DESC(MB_PEA, hw_reset, jbc_fatal) },
137 { JBC_BIT_DESC(CPE, hw_reset, jbc_fatal) },
138 { JBC_BIT_DESC(APE, hw_reset, jbc_fatal) },
139 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_fatal) },
140 { JBC_BIT_DESC(JTCEEW, hw_reset, jbc_fatal) },
141 { JBC_BIT_DESC(JTCEEI, hw_reset, jbc_fatal) },
142 { JBC_BIT_DESC(JTCEER, hw_reset, jbc_fatal) },
143
144 /* JBC MERGE */
145 { JBC_BIT_DESC(MB_PER, jbc_merge, jbc_merge) },
146 { JBC_BIT_DESC(MB_PEW, jbc_merge, jbc_merge) },
147
148 /* JBC Jbusint IN */
149 { JBC_BIT_DESC(UE_ASYN, panic, jbc_in) },
150 { JBC_BIT_DESC(CE_ASYN, no_error, jbc_in) },
151 { JBC_BIT_DESC(JTE, panic, jbc_in) },
152 { JBC_BIT_DESC(JBE, panic, jbc_in) },
153 { JBC_BIT_DESC(JUE, panic, jbc_in) },
154 { JBC_BIT_DESC(ICISE, panic, jbc_in) },
155 { JBC_BIT_DESC(WR_DPE, jbc_jbusint_in, jbc_in) },
156 { JBC_BIT_DESC(RD_DPE, jbc_jbusint_in, jbc_in) },
157 { JBC_BIT_DESC(ILL_BMW, panic, jbc_in) },
158 { JBC_BIT_DESC(ILL_BMR, panic, jbc_in) },
159 { JBC_BIT_DESC(BJC, panic, jbc_in) },
160
161 /* JBC Jbusint Out */
162 { JBC_BIT_DESC(IJP, panic, jbc_out) },
163
164 /*
165 * JBC Dmcint ODCD
166 *
167 * Error bits which can be set via a bad PCItool access go through
168 * jbc_safe_acc instead.
169 */
170 { JBC_BIT_DESC(PIO_UNMAP_RD, jbc_safe_acc, jbc_odcd) },
171 { JBC_BIT_DESC(ILL_ACC_RD, jbc_safe_acc, jbc_odcd) },
172 { JBC_BIT_DESC(PIO_UNMAP, jbc_safe_acc, jbc_odcd) },
173 { JBC_BIT_DESC(PIO_DPE, jbc_dmcint_odcd, jbc_odcd) },
174 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_odcd) },
175 { JBC_BIT_DESC(ILL_ACC, jbc_safe_acc, jbc_odcd) },
176
177 /* JBC Dmcint IDC */
178 { JBC_BIT_DESC(UNSOL_RD, no_panic, jbc_idc) },
179 { JBC_BIT_DESC(UNSOL_INTR, no_panic, jbc_idc) },
180
181 /* JBC CSR */
182 { JBC_BIT_DESC(EBUS_TO, panic, jbc_csr) }
183 };
184
185 #define px_err_jbc_keys \
186 (sizeof (px_err_jbc_tbl)) / (sizeof (px_err_bit_desc_t))
187
188 /*
189 * UBC error bit table
190 */
191 #define UBC_BIT_DESC(bit, hdl, erpt) \
192 UBC_INTERRUPT_STATUS_ ## bit ## _P, \
193 0, \
194 PX_ERR_BIT_HANDLE(hdl), \
195 PX_ERPT_SEND(erpt), \
196 PX_ERR_UBC_CLASS(bit) }, \
197 { UBC_INTERRUPT_STATUS_ ## bit ## _S, \
198 0, \
199 PX_ERR_BIT_HANDLE(hdl), \
200 PX_ERPT_SEND(erpt), \
201 PX_ERR_UBC_CLASS(bit)
202 px_err_bit_desc_t px_err_ubc_tbl[] = {
203 /* UBC FATAL */
204 { UBC_BIT_DESC(DMARDUEA, no_panic, ubc_fatal) },
205 { UBC_BIT_DESC(DMAWTUEA, panic, ubc_fatal) },
206 { UBC_BIT_DESC(MEMRDAXA, panic, ubc_fatal) },
207 { UBC_BIT_DESC(MEMWTAXA, panic, ubc_fatal) },
208 { UBC_BIT_DESC(DMARDUEB, no_panic, ubc_fatal) },
209 { UBC_BIT_DESC(DMAWTUEB, panic, ubc_fatal) },
210 { UBC_BIT_DESC(MEMRDAXB, panic, ubc_fatal) },
211 { UBC_BIT_DESC(MEMWTAXB, panic, ubc_fatal) },
212 { UBC_BIT_DESC(PIOWTUE, panic, ubc_fatal) },
213 { UBC_BIT_DESC(PIOWBEUE, panic, ubc_fatal) },
214 { UBC_BIT_DESC(PIORBEUE, panic, ubc_fatal) }
215 };
216
217 #define px_err_ubc_keys \
218 (sizeof (px_err_ubc_tbl)) / (sizeof (px_err_bit_desc_t))
219
220
221 char *ubc_class_eid_qualifier[] = {
222 "-mem",
223 "-channel",
224 "-cpu",
225 "-path"
226 };
227
228
229 /*
230 * DMC error bit tables
231 */
232 #define IMU_BIT_DESC(bit, hdl, erpt) \
233 IMU_INTERRUPT_STATUS_ ## bit ## _P, \
234 0, \
235 PX_ERR_BIT_HANDLE(hdl), \
236 PX_ERPT_SEND(erpt), \
237 PX_ERR_DMC_CLASS(bit) }, \
238 { IMU_INTERRUPT_STATUS_ ## bit ## _S, \
239 0, \
240 PX_ERR_BIT_HANDLE(hdl), \
241 PX_ERPT_SEND(erpt), \
242 PX_ERR_DMC_CLASS(bit)
243 px_err_bit_desc_t px_err_imu_tbl[] = {
244 /* DMC IMU RDS */
245 { IMU_BIT_DESC(MSI_MAL_ERR, panic, imu_rds) },
246 { IMU_BIT_DESC(MSI_PAR_ERR, panic, imu_rds) },
247 { IMU_BIT_DESC(PMEACK_MES_NOT_EN, panic, imu_rds) },
248 { IMU_BIT_DESC(PMPME_MES_NOT_EN, panic, imu_rds) },
249 { IMU_BIT_DESC(FATAL_MES_NOT_EN, panic, imu_rds) },
250 { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, panic, imu_rds) },
251 { IMU_BIT_DESC(COR_MES_NOT_EN, panic, imu_rds) },
252 { IMU_BIT_DESC(MSI_NOT_EN, panic, imu_rds) },
253
254 /* DMC IMU SCS */
255 { IMU_BIT_DESC(EQ_NOT_EN, panic, imu_scs) },
256
257 /* DMC IMU */
258 { IMU_BIT_DESC(EQ_OVER, imu_eq_ovfl, imu) }
259 };
260
261 #define px_err_imu_keys (sizeof (px_err_imu_tbl)) / (sizeof (px_err_bit_desc_t))
262
263 /* mmu errors */
264 #define MMU_BIT_DESC(bit, hdl, erpt) \
265 MMU_INTERRUPT_STATUS_ ## bit ## _P, \
266 0, \
267 PX_ERR_BIT_HANDLE(hdl), \
268 PX_ERPT_SEND(erpt), \
269 PX_ERR_DMC_CLASS(bit) }, \
270 { MMU_INTERRUPT_STATUS_ ## bit ## _S, \
271 0, \
272 PX_ERR_BIT_HANDLE(hdl), \
273 PX_ERPT_SEND(erpt), \
274 PX_ERR_DMC_CLASS(bit)
275 px_err_bit_desc_t px_err_mmu_tbl[] = {
276 /* DMC MMU TFAR/TFSR */
277 { MMU_BIT_DESC(BYP_ERR, mmu_rbne, mmu_tfar_tfsr) },
278 { MMU_BIT_DESC(BYP_OOR, mmu_tfa, mmu_tfar_tfsr) },
279 { MMU_BIT_DESC(TRN_ERR, panic, mmu_tfar_tfsr) },
280 { MMU_BIT_DESC(TRN_OOR, mmu_tfa, mmu_tfar_tfsr) },
281 { MMU_BIT_DESC(TTE_INV, mmu_tfa, mmu_tfar_tfsr) },
282 { MMU_BIT_DESC(TTE_PRT, mmu_tfa, mmu_tfar_tfsr) },
283 { MMU_BIT_DESC(TTC_DPE, mmu_parity, mmu_tfar_tfsr) },
284 { MMU_BIT_DESC(TBW_DME, panic, mmu_tfar_tfsr) },
285 { MMU_BIT_DESC(TBW_UDE, panic, mmu_tfar_tfsr) },
286 { MMU_BIT_DESC(TBW_ERR, panic, mmu_tfar_tfsr) },
287 { MMU_BIT_DESC(TBW_DPE, mmu_parity, mmu_tfar_tfsr) },
288
289 /* DMC MMU */
290 { MMU_BIT_DESC(TTC_CAE, panic, mmu) }
291 };
292 #define px_err_mmu_keys (sizeof (px_err_mmu_tbl)) / (sizeof (px_err_bit_desc_t))
293
294
295 /*
296 * PEC error bit tables
297 */
298 #define ILU_BIT_DESC(bit, hdl, erpt) \
299 ILU_INTERRUPT_STATUS_ ## bit ## _P, \
300 0, \
301 PX_ERR_BIT_HANDLE(hdl), \
302 PX_ERPT_SEND(erpt), \
303 PX_ERR_PEC_CLASS(bit) }, \
304 { ILU_INTERRUPT_STATUS_ ## bit ## _S, \
305 0, \
306 PX_ERR_BIT_HANDLE(hdl), \
307 PX_ERPT_SEND(erpt), \
308 PX_ERR_PEC_CLASS(bit)
309 px_err_bit_desc_t px_err_ilu_tbl[] = {
310 /* PEC ILU none */
311 { ILU_BIT_DESC(IHB_PE, panic, pec_ilu) }
312 };
313 #define px_err_ilu_keys \
314 (sizeof (px_err_ilu_tbl)) / (sizeof (px_err_bit_desc_t))
315
316 /*
317 * PEC UE errors implementation is incomplete pending PCIE generic
318 * fabric rules. Must handle both PRIMARY and SECONDARY errors.
319 */
320 /* pec ue errors */
321 #define TLU_UC_BIT_DESC(bit, hdl, erpt) \
322 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \
323 0, \
324 PX_ERR_BIT_HANDLE(hdl), \
325 PX_ERPT_SEND(erpt), \
326 PX_ERR_PEC_CLASS(bit) }, \
327 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \
328 0, \
329 PX_ERR_BIT_HANDLE(hdl), \
330 PX_ERPT_SEND(erpt), \
331 PX_ERR_PEC_CLASS(bit)
332 #define TLU_UC_OB_BIT_DESC(bit, hdl, erpt) \
333 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \
334 0, \
335 PX_ERR_BIT_HANDLE(hdl), \
336 PX_ERPT_SEND(erpt), \
337 PX_ERR_PEC_OB_CLASS(bit) }, \
338 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \
339 0, \
340 PX_ERR_BIT_HANDLE(hdl), \
341 PX_ERPT_SEND(erpt), \
342 PX_ERR_PEC_OB_CLASS(bit)
343 px_err_bit_desc_t px_err_tlu_ue_tbl[] = {
344 /* PCI-E Receive Uncorrectable Errors */
345 { TLU_UC_BIT_DESC(UR, pciex_ue, pciex_rx_ue) },
346 { TLU_UC_BIT_DESC(UC, pciex_ue, pciex_rx_ue) },
347
348 /* PCI-E Transmit Uncorrectable Errors */
349 { TLU_UC_OB_BIT_DESC(ECRC, pciex_ue, pciex_rx_ue) },
350 { TLU_UC_BIT_DESC(CTO, pciex_ue, pciex_tx_ue) },
351 { TLU_UC_BIT_DESC(ROF, pciex_ue, pciex_tx_ue) },
352
353 /* PCI-E Rx/Tx Uncorrectable Errors */
354 { TLU_UC_BIT_DESC(MFP, pciex_ue, pciex_rx_tx_ue) },
355 { TLU_UC_BIT_DESC(PP, pciex_ue, pciex_rx_tx_ue) },
356
357 /* Other PCI-E Uncorrectable Errors */
358 { TLU_UC_BIT_DESC(FCP, pciex_ue, pciex_ue) },
359 { TLU_UC_BIT_DESC(DLP, pciex_ue, pciex_ue) },
360 { TLU_UC_BIT_DESC(TE, pciex_ue, pciex_ue) },
361
362 /* Not used */
363 { TLU_UC_BIT_DESC(CA, pciex_ue, do_not) }
364 };
365 #define px_err_tlu_ue_keys \
366 (sizeof (px_err_tlu_ue_tbl)) / (sizeof (px_err_bit_desc_t))
367
368
369 /*
370 * PEC CE errors implementation is incomplete pending PCIE generic
371 * fabric rules.
372 */
373 /* pec ce errors */
374 #define TLU_CE_BIT_DESC(bit, hdl, erpt) \
375 TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \
376 0, \
377 PX_ERR_BIT_HANDLE(hdl), \
378 PX_ERPT_SEND(erpt), \
379 PX_ERR_PEC_CLASS(bit) }, \
380 { TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \
381 0, \
382 PX_ERR_BIT_HANDLE(hdl), \
383 PX_ERPT_SEND(erpt), \
384 PX_ERR_PEC_CLASS(bit)
385 px_err_bit_desc_t px_err_tlu_ce_tbl[] = {
386 /* PCI-E Correctable Errors */
387 { TLU_CE_BIT_DESC(RTO, pciex_ce, pciex_ce) },
388 { TLU_CE_BIT_DESC(RNR, pciex_ce, pciex_ce) },
389 { TLU_CE_BIT_DESC(BDP, pciex_ce, pciex_ce) },
390 { TLU_CE_BIT_DESC(BTP, pciex_ce, pciex_ce) },
391 { TLU_CE_BIT_DESC(RE, pciex_ce, pciex_ce) }
392 };
393 #define px_err_tlu_ce_keys \
394 (sizeof (px_err_tlu_ce_tbl)) / (sizeof (px_err_bit_desc_t))
395
396
397 /* pec oe errors */
398 #define TLU_OE_BIT_DESC(bit, hdl, erpt) \
399 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \
400 0, \
401 PX_ERR_BIT_HANDLE(hdl), \
402 PX_ERPT_SEND(erpt), \
403 PX_ERR_PEC_CLASS(bit) }, \
404 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \
405 0, \
406 PX_ERR_BIT_HANDLE(hdl), \
407 PX_ERPT_SEND(erpt), \
408 PX_ERR_PEC_CLASS(bit)
409 #define TLU_OE_OB_BIT_DESC(bit, hdl, erpt) \
410 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \
411 0, \
412 PX_ERR_BIT_HANDLE(hdl), \
413 PX_ERPT_SEND(erpt), \
414 PX_ERR_PEC_OB_CLASS(bit) }, \
415 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \
416 0, \
417 PX_ERR_BIT_HANDLE(hdl), \
418 PX_ERPT_SEND(erpt), \
419 PX_ERR_PEC_OB_CLASS(bit)
420 px_err_bit_desc_t px_err_tlu_oe_tbl[] = {
421 /* TLU Other Event Status (receive only) */
422 { TLU_OE_BIT_DESC(MRC, hw_reset, pciex_rx_oe) },
423
424 /* TLU Other Event Status (rx + tx) */
425 { TLU_OE_BIT_DESC(WUC, wuc_ruc, pciex_rx_tx_oe) },
426 { TLU_OE_BIT_DESC(RUC, wuc_ruc, pciex_rx_tx_oe) },
427 { TLU_OE_BIT_DESC(CRS, no_panic, pciex_rx_tx_oe) },
428
429 /* TLU Other Event */
430 { TLU_OE_BIT_DESC(IIP, panic, pciex_oe) },
431 { TLU_OE_BIT_DESC(EDP, panic, pciex_oe) },
432 { TLU_OE_BIT_DESC(EHP, panic, pciex_oe) },
433 { TLU_OE_OB_BIT_DESC(TLUEITMO, panic, pciex_oe) },
434 { TLU_OE_BIT_DESC(LIN, no_panic, pciex_oe) },
435 { TLU_OE_BIT_DESC(LRS, no_panic, pciex_oe) },
436 { TLU_OE_BIT_DESC(LDN, tlu_ldn, pciex_oe) },
437 { TLU_OE_BIT_DESC(LUP, tlu_lup, pciex_oe) },
438 { TLU_OE_BIT_DESC(ERU, panic, pciex_oe) },
439 { TLU_OE_BIT_DESC(ERO, panic, pciex_oe) },
440 { TLU_OE_BIT_DESC(EMP, panic, pciex_oe) },
441 { TLU_OE_BIT_DESC(EPE, panic, pciex_oe) },
442 { TLU_OE_BIT_DESC(ERP, panic, pciex_oe) },
443 { TLU_OE_BIT_DESC(EIP, panic, pciex_oe) }
444 };
445
446 #define px_err_tlu_oe_keys \
447 (sizeof (px_err_tlu_oe_tbl)) / (sizeof (px_err_bit_desc_t))
448
449
450 /*
451 * All the following tables below are for LPU Interrupts. These interrupts
452 * are *NOT* error interrupts, but event status interrupts.
453 *
454 * These events are probably of most interest to:
455 * o Hotplug
456 * o Power Management
457 * o etc...
458 *
459 * There are also a few events that would be interresting for FMA.
460 * Again none of the regiseters below state that an error has occured
461 * or that data has been lost. If anything, they give status that an
462 * error is *about* to occur. examples
463 * o INT_SKP_ERR - indicates clock between fire and child is too far
464 * off and is most unlikely able to compensate
465 * o INT_TX_PAR_ERR - A parity error occured in ONE lane. This is
466 * HW recoverable, but will like end up as a future
467 * fabric error as well.
468 *
469 * For now, we don't care about any of these errors and should be ignore,
470 * but cleared.
471 */
472
473 /* LPU Link Interrupt Table */
474 #define LPUL_BIT_DESC(bit, hdl, erpt) \
475 LPU_LINK_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \
476 0, \
477 NULL, \
478 NULL, \
479 ""
480 px_err_bit_desc_t px_err_lpul_tbl[] = {
481 { LPUL_BIT_DESC(LINK_ERR_ACT, NULL, NULL) }
482 };
483 #define px_err_lpul_keys \
484 (sizeof (px_err_lpul_tbl)) / (sizeof (px_err_bit_desc_t))
485
486 /* LPU Physical Interrupt Table */
487 #define LPUP_BIT_DESC(bit, hdl, erpt) \
488 LPU_PHY_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \
489 0, \
490 NULL, \
491 NULL, \
492 ""
493 px_err_bit_desc_t px_err_lpup_tbl[] = {
494 { LPUP_BIT_DESC(PHY_LAYER_ERR, NULL, NULL) }
495 };
496 #define px_err_lpup_keys \
497 (sizeof (px_err_lpup_tbl)) / (sizeof (px_err_bit_desc_t))
498
499 /* LPU Receive Interrupt Table */
500 #define LPUR_BIT_DESC(bit, hdl, erpt) \
501 LPU_RECEIVE_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \
502 0, \
503 NULL, \
504 NULL, \
505 ""
506 px_err_bit_desc_t px_err_lpur_tbl[] = {
507 { LPUR_BIT_DESC(RCV_PHY, NULL, NULL) }
508 };
509 #define px_err_lpur_keys \
510 (sizeof (px_err_lpur_tbl)) / (sizeof (px_err_bit_desc_t))
511
512 /* LPU Transmit Interrupt Table */
513 #define LPUX_BIT_DESC(bit, hdl, erpt) \
514 LPU_TRANSMIT_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \
515 0, \
516 NULL, \
517 NULL, \
518 ""
519 px_err_bit_desc_t px_err_lpux_tbl[] = {
520 { LPUX_BIT_DESC(UNMSK, NULL, NULL) }
521 };
522 #define px_err_lpux_keys \
523 (sizeof (px_err_lpux_tbl)) / (sizeof (px_err_bit_desc_t))
524
525 /* LPU LTSSM Interrupt Table */
526 #define LPUS_BIT_DESC(bit, hdl, erpt) \
527 LPU_LTSSM_INTERRUPT_AND_STATUS_INT_ ## bit, \
528 0, \
529 NULL, \
530 NULL, \
531 ""
532 px_err_bit_desc_t px_err_lpus_tbl[] = {
533 { LPUS_BIT_DESC(ANY, NULL, NULL) }
534 };
535 #define px_err_lpus_keys \
536 (sizeof (px_err_lpus_tbl)) / (sizeof (px_err_bit_desc_t))
537
538 /* LPU Gigablaze Glue Interrupt Table */
539 #define LPUG_BIT_DESC(bit, hdl, erpt) \
540 LPU_GIGABLAZE_GLUE_INTERRUPT_AND_STATUS_INT_ ## bit, \
541 0, \
542 NULL, \
543 NULL, \
544 ""
545 px_err_bit_desc_t px_err_lpug_tbl[] = {
546 { LPUG_BIT_DESC(GLOBL_UNMSK, NULL, NULL) }
547 };
548 #define px_err_lpug_keys \
549 (sizeof (px_err_lpug_tbl)) / (sizeof (px_err_bit_desc_t))
550
551
552 /* Mask and Tables */
553 #define MnT6X(pre) \
554 &px_ ## pre ## _intr_mask, \
555 &px_ ## pre ## _log_mask, \
556 &px_ ## pre ## _count_mask, \
557 px_err_ ## pre ## _tbl, \
558 px_err_ ## pre ## _keys, \
559 PX_REG_XBC, \
560 0
561
562 #define MnT6(pre) \
563 &px_ ## pre ## _intr_mask, \
564 &px_ ## pre ## _log_mask, \
565 &px_ ## pre ## _count_mask, \
566 px_err_ ## pre ## _tbl, \
567 px_err_ ## pre ## _keys, \
568 PX_REG_CSR, \
569 0
570
571 /* LPU Registers Addresses */
572 #define LR4(pre) \
573 0, \
574 LPU_ ## pre ## _INTERRUPT_MASK, \
575 LPU_ ## pre ## _INTERRUPT_AND_STATUS, \
576 LPU_ ## pre ## _INTERRUPT_AND_STATUS
577
578 /* LPU Registers Addresses with Irregularities */
579 #define LR4_FIXME(pre) \
580 0, \
581 LPU_ ## pre ## _INTERRUPT_MASK, \
582 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS, \
583 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS
584
585 /* TLU Registers Addresses */
586 #define TR4(pre) \
587 TLU_ ## pre ## _LOG_ENABLE, \
588 TLU_ ## pre ## _INTERRUPT_ENABLE, \
589 TLU_ ## pre ## _INTERRUPT_STATUS, \
590 TLU_ ## pre ## _STATUS_CLEAR
591
592 /* Registers Addresses for JBC, UBC, MMU, IMU and ILU */
593 #define R4(pre) \
594 pre ## _ERROR_LOG_ENABLE, \
595 pre ## _INTERRUPT_ENABLE, \
596 pre ## _INTERRUPT_STATUS, \
597 pre ## _ERROR_STATUS_CLEAR
598
599 /* Bits in chip_mask, set according to type. */
600 #define CHP_O BITMASK(PX_CHIP_OBERON)
601 #define CHP_F BITMASK(PX_CHIP_FIRE)
602 #define CHP_FO (CHP_F | CHP_O)
603
604 /*
605 * Register error handling tables.
606 * The ID Field (first field) is identified by an enum px_err_id_t.
607 * It is located in px_err.h
608 */
609 static const
610 px_err_reg_desc_t px_err_reg_tbl[] = {
611 { CHP_F, MnT6X(jbc), R4(JBC), "JBC Error"},
612 { CHP_O, MnT6X(ubc), R4(UBC), "UBC Error"},
613 { CHP_FO, MnT6(mmu), R4(MMU), "MMU Error"},
614 { CHP_FO, MnT6(imu), R4(IMU), "IMU Error"},
615 { CHP_FO, MnT6(tlu_ue), TR4(UNCORRECTABLE_ERROR), "TLU UE"},
616 { CHP_FO, MnT6(tlu_ce), TR4(CORRECTABLE_ERROR), "TLU CE"},
617 { CHP_FO, MnT6(tlu_oe), TR4(OTHER_EVENT), "TLU OE"},
618 { CHP_FO, MnT6(ilu), R4(ILU), "ILU Error"},
619 { CHP_F, MnT6(lpul), LR4(LINK_LAYER), "LPU Link Layer"},
620 { CHP_F, MnT6(lpup), LR4_FIXME(PHY), "LPU Phy Layer"},
621 { CHP_F, MnT6(lpur), LR4(RECEIVE_PHY), "LPU RX Phy Layer"},
622 { CHP_F, MnT6(lpux), LR4(TRANSMIT_PHY), "LPU TX Phy Layer"},
623 { CHP_F, MnT6(lpus), LR4(LTSSM), "LPU LTSSM"},
624 { CHP_F, MnT6(lpug), LR4(GIGABLAZE_GLUE), "LPU GigaBlaze Glue"},
625 };
626
627 #define PX_ERR_REG_KEYS (sizeof (px_err_reg_tbl)) / (sizeof (px_err_reg_tbl[0]))
628
629 typedef struct px_err_ss {
630 uint64_t err_status[PX_ERR_REG_KEYS];
631 } px_err_ss_t;
632
633 static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, int block);
634 static int px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr,
635 px_err_ss_t *ss);
636 static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr,
637 int err, int caller);
638
639 /*
640 * px_err_cb_intr:
641 * Interrupt handler for the JBC/UBC block.
642 * o lock
643 * o create derr
644 * o px_err_cmn_intr
645 * o unlock
646 * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
647 */
648 uint_t
px_err_cb_intr(caddr_t arg)649 px_err_cb_intr(caddr_t arg)
650 {
651 px_fault_t *px_fault_p = (px_fault_t *)arg;
652 dev_info_t *rpdip = px_fault_p->px_fh_dip;
653 px_t *px_p = DIP_TO_STATE(rpdip);
654 int err;
655 ddi_fm_error_t derr;
656
657 /* Create the derr */
658 bzero(&derr, sizeof (ddi_fm_error_t));
659 derr.fme_version = DDI_FME_VERSION;
660 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
661 derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
662
663 if (px_fm_enter(px_p) != DDI_SUCCESS)
664 goto done;
665
666 err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_HOST);
667 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino,
668 INTR_IDLE_STATE);
669
670 px_err_panic(err, PX_HB, PX_NO_ERROR, B_TRUE);
671 px_fm_exit(px_p);
672 px_err_panic(err, PX_HB, PX_NO_ERROR, B_FALSE);
673
674 done:
675 return (DDI_INTR_CLAIMED);
676 }
677
678 /*
679 * px_err_dmc_pec_intr:
680 * Interrupt handler for the DMC/PEC block.
681 * o lock
682 * o create derr
683 * o px_err_cmn_intr(leaf, with out cb)
684 * o pcie_scan_fabric (leaf)
685 * o unlock
686 * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
687 */
688 uint_t
px_err_dmc_pec_intr(caddr_t arg)689 px_err_dmc_pec_intr(caddr_t arg)
690 {
691 px_fault_t *px_fault_p = (px_fault_t *)arg;
692 dev_info_t *rpdip = px_fault_p->px_fh_dip;
693 px_t *px_p = DIP_TO_STATE(rpdip);
694 int rc_err, fab_err;
695 ddi_fm_error_t derr;
696
697 /* Create the derr */
698 bzero(&derr, sizeof (ddi_fm_error_t));
699 derr.fme_version = DDI_FME_VERSION;
700 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
701 derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
702
703 if (px_fm_enter(px_p) != DDI_SUCCESS)
704 goto done;
705
706 /* send ereport/handle/clear fire registers */
707 rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE);
708
709 /* Check all child devices for errors */
710 fab_err = px_scan_fabric(px_p, rpdip, &derr);
711
712 /* Set the interrupt state to idle */
713 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino,
714 INTR_IDLE_STATE);
715
716 px_err_panic(rc_err, PX_RC, fab_err, B_TRUE);
717 px_fm_exit(px_p);
718 px_err_panic(rc_err, PX_RC, fab_err, B_FALSE);
719
720 done:
721 return (DDI_INTR_CLAIMED);
722 }
723
724 /*
725 * Proper csr_base is responsibility of the caller. (Called from px_lib_dev_init
726 * via px_err_reg_setup_all for pcie error registers; called from
727 * px_cb_add_intr for jbc/ubc from px_cb_attach.)
728 *
729 * Note: reg_id is passed in instead of reg_desc since this function is called
730 * from px_lib4u.c, which doesn't know about the structure of the table.
731 */
732 void
px_err_reg_enable(px_err_id_t reg_id,caddr_t csr_base)733 px_err_reg_enable(px_err_id_t reg_id, caddr_t csr_base)
734 {
735 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id];
736 uint64_t intr_mask = *reg_desc_p->intr_mask_p;
737 uint64_t log_mask = *reg_desc_p->log_mask_p;
738
739 /* Enable logs if it exists */
740 if (reg_desc_p->log_addr != 0)
741 CSR_XS(csr_base, reg_desc_p->log_addr, log_mask);
742
743 /*
744 * For readability you in code you set 1 to enable an interrupt.
745 * But in Fire it's backwards. You set 1 to *disable* an intr.
746 * Reverse the user tunable intr mask field.
747 *
748 * Disable All Errors
749 * Clear All Errors
750 * Enable Errors
751 */
752 CSR_XS(csr_base, reg_desc_p->enable_addr, 0);
753 CSR_XS(csr_base, reg_desc_p->clear_addr, -1);
754 CSR_XS(csr_base, reg_desc_p->enable_addr, intr_mask);
755 DBG(DBG_ATTACH, NULL, "%s Mask: 0x%llx\n", reg_desc_p->msg,
756 CSR_XR(csr_base, reg_desc_p->enable_addr));
757 DBG(DBG_ATTACH, NULL, "%s Status: 0x%llx\n", reg_desc_p->msg,
758 CSR_XR(csr_base, reg_desc_p->status_addr));
759 DBG(DBG_ATTACH, NULL, "%s Clear: 0x%llx\n", reg_desc_p->msg,
760 CSR_XR(csr_base, reg_desc_p->clear_addr));
761 if (reg_desc_p->log_addr != 0) {
762 DBG(DBG_ATTACH, NULL, "%s Log: 0x%llx\n", reg_desc_p->msg,
763 CSR_XR(csr_base, reg_desc_p->log_addr));
764 }
765 }
766
767 void
px_err_reg_disable(px_err_id_t reg_id,caddr_t csr_base)768 px_err_reg_disable(px_err_id_t reg_id, caddr_t csr_base)
769 {
770 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id];
771 uint64_t val = (reg_id >= PX_ERR_LPU_LINK) ? -1 : 0;
772
773 if (reg_desc_p->log_addr != 0)
774 CSR_XS(csr_base, reg_desc_p->log_addr, val);
775 CSR_XS(csr_base, reg_desc_p->enable_addr, val);
776 }
777
778 /*
779 * Set up pcie error registers.
780 */
781 void
px_err_reg_setup_pcie(uint8_t chip_mask,caddr_t csr_base,boolean_t enable)782 px_err_reg_setup_pcie(uint8_t chip_mask, caddr_t csr_base, boolean_t enable)
783 {
784 px_err_id_t reg_id;
785 const px_err_reg_desc_t *reg_desc_p;
786 void (*px_err_reg_func)(px_err_id_t, caddr_t);
787
788 /*
789 * JBC or XBC are enabled during adding of common block interrupts,
790 * not done here.
791 */
792 px_err_reg_func = (enable ? px_err_reg_enable : px_err_reg_disable);
793 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) {
794 reg_desc_p = &px_err_reg_tbl[reg_id];
795 if ((reg_desc_p->chip_mask & chip_mask) &&
796 (reg_desc_p->reg_bank == PX_REG_CSR))
797 px_err_reg_func(reg_id, csr_base);
798 }
799 }
800
801 /*
802 * px_err_cmn_intr:
803 * Common function called by trap, mondo and fabric intr.
804 * o Snap shot current fire registers
805 * o check for safe access
806 * o send ereport and clear snap shot registers
807 * o create and queue RC info for later use in fabric scan.
808 * o RUC/WUC, PTLP, MMU Errors(CA), UR
809 * o check severity of snap shot registers
810 *
811 * @param px_p leaf in which to check access
812 * @param derr fm err data structure to be updated
813 * @param caller PX_TRAP_CALL | PX_INTR_CALL
814 * @param block PX_FM_BLOCK_HOST | PX_FM_BLOCK_PCIE | PX_FM_BLOCK_ALL
815 * @return err PX_NO_PANIC | PX_PANIC | PX_HW_RESET | PX_PROTECTED
816 */
817 int
px_err_cmn_intr(px_t * px_p,ddi_fm_error_t * derr,int caller,int block)818 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block)
819 {
820 px_err_ss_t ss = {0};
821 int err;
822
823 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
824
825 /* check for safe access */
826 px_err_safeacc_check(px_p, derr);
827
828 /* snap shot the current fire registers */
829 px_err_snapshot(px_p, &ss, block);
830
831 /* send ereports/handle/clear registers */
832 err = px_err_erpt_and_clr(px_p, derr, &ss);
833
834 /* check for error severity */
835 err = px_err_check_severity(px_p, derr, err, caller);
836
837 /* Mark the On Trap Handle if an error occured */
838 if (err != PX_NO_ERROR) {
839 px_pec_t *pec_p = px_p->px_pec_p;
840 on_trap_data_t *otd = pec_p->pec_ontrap_data;
841
842 if ((otd != NULL) && (otd->ot_prot & OT_DATA_ACCESS))
843 otd->ot_trap |= OT_DATA_ACCESS;
844 }
845
846 return (err);
847 }
848
849 /*
850 * Static function
851 */
852
853 /*
854 * px_err_snapshot:
855 * Take a current snap shot of all the fire error registers. This includes
856 * JBC/UBC, DMC, and PEC depending on the block flag
857 *
858 * @param px_p leaf in which to take the snap shot.
859 * @param ss pre-allocated memory to store the snap shot.
860 * @param chk_cb boolean on whether to store jbc/ubc register.
861 */
862 static void
px_err_snapshot(px_t * px_p,px_err_ss_t * ss_p,int block)863 px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, int block)
864 {
865 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p;
866 caddr_t xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC];
867 caddr_t pec_csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
868 caddr_t csr_base;
869 uint8_t chip_mask = 1 << PX_CHIP_TYPE(pxu_p);
870 const px_err_reg_desc_t *reg_desc_p = px_err_reg_tbl;
871 px_err_id_t reg_id;
872
873 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++, reg_desc_p++) {
874 if (!(reg_desc_p->chip_mask & chip_mask))
875 continue;
876
877 if ((block & PX_FM_BLOCK_HOST) &&
878 (reg_desc_p->reg_bank == PX_REG_XBC))
879 csr_base = xbc_csr_base;
880 else if ((block & PX_FM_BLOCK_PCIE) &&
881 (reg_desc_p->reg_bank == PX_REG_CSR))
882 csr_base = pec_csr_base;
883 else {
884 ss_p->err_status[reg_id] = 0;
885 continue;
886 }
887
888 ss_p->err_status[reg_id] = CSR_XR(csr_base,
889 reg_desc_p->status_addr);
890 }
891 }
892
893 /*
894 * px_err_erpt_and_clr:
895 * This function does the following thing to all the fire registers based
896 * on an earlier snap shot.
897 * o Send ereport
898 * o Handle the error
899 * o Clear the error
900 *
901 * @param px_p leaf in which to take the snap shot.
902 * @param derr fm err in which the ereport is to be based on
903 * @param ss_p pre-allocated memory to store the snap shot.
904 */
905 static int
px_err_erpt_and_clr(px_t * px_p,ddi_fm_error_t * derr,px_err_ss_t * ss_p)906 px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss_p)
907 {
908 dev_info_t *rpdip = px_p->px_dip;
909 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p;
910 caddr_t csr_base;
911 const px_err_reg_desc_t *err_reg_tbl;
912 px_err_bit_desc_t *err_bit_tbl;
913 px_err_bit_desc_t *err_bit_desc;
914
915 uint64_t *count_mask;
916 uint64_t clear_addr;
917 uint64_t ss_reg;
918
919 int (*err_handler)();
920 int (*erpt_handler)();
921 int reg_id, key;
922 int err = PX_NO_ERROR;
923 int biterr = 0;
924
925 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
926
927 /* send erport/handle/clear JBC errors */
928 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) {
929 /* Get the correct register description table */
930 err_reg_tbl = &px_err_reg_tbl[reg_id];
931
932 /* Only look at enabled groups. */
933 if (!(BIT_TST(err_reg_tbl->chip_mask, PX_CHIP_TYPE(pxu_p))))
934 continue;
935
936 /* Get the correct CSR BASE */
937 csr_base = (caddr_t)pxu_p->px_address[err_reg_tbl->reg_bank];
938
939 /* If there are no errors in this register, continue */
940 ss_reg = ss_p->err_status[reg_id];
941 if (!ss_reg)
942 continue;
943
944 /* Get pointers to masks and register addresses */
945 count_mask = err_reg_tbl->count_mask_p;
946 clear_addr = err_reg_tbl->clear_addr;
947
948 /* Get the register BIT description table */
949 err_bit_tbl = err_reg_tbl->err_bit_tbl;
950
951 /* For each known bit in the register send erpt and handle */
952 for (key = 0; key < err_reg_tbl->err_bit_keys; key++) {
953 /*
954 * If the ss_reg is set for this bit,
955 * send ereport and handle
956 */
957 err_bit_desc = &err_bit_tbl[key];
958 if (!BIT_TST(ss_reg, err_bit_desc->bit))
959 continue;
960
961 /* Increment the counter if necessary */
962 if (BIT_TST(*count_mask, err_bit_desc->bit)) {
963 err_bit_desc->counter++;
964 }
965
966 /* Error Handle for this bit */
967 err_handler = err_bit_desc->err_handler;
968 if (err_handler) {
969 biterr = err_handler(rpdip, csr_base, derr,
970 err_reg_tbl, err_bit_desc);
971 err |= biterr;
972 }
973
974 /*
975 * Send the ereport if it's an UNEXPECTED err.
976 * This is the only place where PX_EXPECTED is utilized.
977 */
978 erpt_handler = err_bit_desc->erpt_handler;
979 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) ||
980 (biterr == PX_EXPECTED))
981 continue;
982
983 if (erpt_handler)
984 (void) erpt_handler(rpdip, csr_base, ss_reg,
985 derr, err_bit_desc->bit,
986 err_bit_desc->class_name);
987 }
988
989 /* Clear the register and error */
990 CSR_XS(csr_base, clear_addr, ss_reg);
991 }
992
993 return (err);
994 }
995
996 /*
997 * px_err_check_severity:
998 * Check the severity of the fire error based on an earlier snapshot
999 *
1000 * @param px_p leaf in which to take the snap shot.
1001 * @param derr fm err in which the ereport is to be based on
1002 * @param err fire register error status
1003 * @param caller PX_TRAP_CALL | PX_INTR_CALL | PX_LIB_CALL
1004 */
1005 static int
px_err_check_severity(px_t * px_p,ddi_fm_error_t * derr,int err,int caller)1006 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, int err, int caller)
1007 {
1008 px_pec_t *pec_p = px_p->px_pec_p;
1009 boolean_t is_safeacc = B_FALSE;
1010
1011 /*
1012 * Nothing to do if called with no error.
1013 * The err could have already been set to PX_NO_PANIC, which means the
1014 * system doesn't need to panic, but PEEK/POKE still failed.
1015 */
1016 if (err == PX_NO_ERROR)
1017 return (err);
1018
1019 /* Cautious access error handling */
1020 switch (derr->fme_flag) {
1021 case DDI_FM_ERR_EXPECTED:
1022 if (caller == PX_TRAP_CALL) {
1023 /*
1024 * for ddi_caut_get treat all events as nonfatal
1025 * The trampoline will set err_ena = 0,
1026 * err_status = NONFATAL.
1027 */
1028 derr->fme_status = DDI_FM_NONFATAL;
1029 is_safeacc = B_TRUE;
1030 } else {
1031 /*
1032 * For ddi_caut_put treat all events as nonfatal. Here
1033 * we have the handle and can call ndi_fm_acc_err_set().
1034 */
1035 derr->fme_status = DDI_FM_NONFATAL;
1036 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr);
1037 is_safeacc = B_TRUE;
1038 }
1039 break;
1040 case DDI_FM_ERR_PEEK:
1041 case DDI_FM_ERR_POKE:
1042 /*
1043 * For ddi_peek/poke treat all events as nonfatal.
1044 */
1045 is_safeacc = B_TRUE;
1046 break;
1047 default:
1048 is_safeacc = B_FALSE;
1049 }
1050
1051 /* re-adjust error status from safe access, forgive all errors */
1052 if (is_safeacc)
1053 return (PX_NO_PANIC);
1054
1055 return (err);
1056 }
1057
1058 /* predefined convenience functions */
1059 /* ARGSUSED */
1060 void
px_err_log_handle(dev_info_t * rpdip,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr,char * msg)1061 px_err_log_handle(dev_info_t *rpdip, px_err_reg_desc_t *err_reg_descr,
1062 px_err_bit_desc_t *err_bit_descr, char *msg)
1063 {
1064 DBG(DBG_ERR_INTR, rpdip,
1065 "Bit %d, %s, at %s(0x%x) has occured %d times with a severity "
1066 "of \"%s\"\n",
1067 err_bit_descr->bit, err_bit_descr->class_name,
1068 err_reg_descr->msg, err_reg_descr->status_addr,
1069 err_bit_descr->counter, msg);
1070 }
1071
1072 /* ARGSUSED */
1073 int
px_err_hw_reset_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1074 px_err_hw_reset_handle(dev_info_t *rpdip, caddr_t csr_base,
1075 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1076 px_err_bit_desc_t *err_bit_descr)
1077 {
1078 if (px_log & PX_HW_RESET) {
1079 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr,
1080 "HW RESET");
1081 }
1082
1083 return (PX_HW_RESET);
1084 }
1085
1086 /* ARGSUSED */
1087 int
px_err_panic_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1088 px_err_panic_handle(dev_info_t *rpdip, caddr_t csr_base,
1089 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1090 px_err_bit_desc_t *err_bit_descr)
1091 {
1092 if (px_log & PX_PANIC) {
1093 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, "PANIC");
1094 }
1095
1096 return (PX_PANIC);
1097 }
1098
1099 /* ARGSUSED */
1100 int
px_err_protected_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1101 px_err_protected_handle(dev_info_t *rpdip, caddr_t csr_base,
1102 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1103 px_err_bit_desc_t *err_bit_descr)
1104 {
1105 if (px_log & PX_PROTECTED) {
1106 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr,
1107 "PROTECTED");
1108 }
1109
1110 return (PX_PROTECTED);
1111 }
1112
1113 /* ARGSUSED */
1114 int
px_err_no_panic_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1115 px_err_no_panic_handle(dev_info_t *rpdip, caddr_t csr_base,
1116 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1117 px_err_bit_desc_t *err_bit_descr)
1118 {
1119 if (px_log & PX_NO_PANIC) {
1120 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr,
1121 "NO PANIC");
1122 }
1123
1124 return (PX_NO_PANIC);
1125 }
1126
1127 /* ARGSUSED */
1128 int
px_err_no_error_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1129 px_err_no_error_handle(dev_info_t *rpdip, caddr_t csr_base,
1130 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1131 px_err_bit_desc_t *err_bit_descr)
1132 {
1133 if (px_log & PX_NO_ERROR) {
1134 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr,
1135 "NO ERROR");
1136 }
1137
1138 return (PX_NO_ERROR);
1139 }
1140
1141 /* ARGSUSED */
PX_ERPT_SEND_DEC(do_not)1142 PX_ERPT_SEND_DEC(do_not)
1143 {
1144 return (PX_NO_ERROR);
1145 }
1146
1147 /*
1148 * Search the px_cb_list_t embedded in the px_cb_t for the
1149 * px_t of the specified Leaf (leaf_id). Return its associated dip.
1150 */
1151 static dev_info_t *
px_err_search_cb(px_cb_t * px_cb_p,uint_t leaf_id)1152 px_err_search_cb(px_cb_t *px_cb_p, uint_t leaf_id)
1153 {
1154 int i;
1155 px_cb_list_t *pxl_elemp;
1156
1157 for (i = px_cb_p->attachcnt, pxl_elemp = px_cb_p->pxl; i > 0;
1158 i--, pxl_elemp = pxl_elemp->next) {
1159 if ((((pxu_t *)pxl_elemp->pxp->px_plat_p)->portid &
1160 OBERON_PORT_ID_LEAF_MASK) == leaf_id) {
1161 return (pxl_elemp->pxp->px_dip);
1162 }
1163 }
1164 return (NULL);
1165 }
1166
1167 /* UBC FATAL - see io erpt doc, section 1.1 */
1168 /* ARGSUSED */
PX_ERPT_SEND_DEC(ubc_fatal)1169 PX_ERPT_SEND_DEC(ubc_fatal)
1170 {
1171 char buf[FM_MAX_CLASS];
1172 uint64_t memory_ue_log, marked;
1173 char unum[FM_MAX_CLASS];
1174 int unum_length;
1175 uint64_t device_id = 0;
1176 uint8_t cpu_version = 0;
1177 nvlist_t *resource = NULL;
1178 uint64_t ubc_intr_status;
1179 px_t *px_p;
1180 px_cb_t *px_cb_p;
1181 dev_info_t *actual_dip;
1182
1183 unum[0] = '\0';
1184 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1185
1186 memory_ue_log = CSR_XR(csr_base, UBC_MEMORY_UE_LOG);
1187 marked = (memory_ue_log >> UBC_MEMORY_UE_LOG_MARKED) &
1188 UBC_MEMORY_UE_LOG_MARKED_MASK;
1189
1190 if ((strstr(class_name, "ubc.piowtue") != NULL) ||
1191 (strstr(class_name, "ubc.piowbeue") != NULL) ||
1192 (strstr(class_name, "ubc.piorbeue") != NULL) ||
1193 (strstr(class_name, "ubc.dmarduea") != NULL) ||
1194 (strstr(class_name, "ubc.dmardueb") != NULL)) {
1195 int eid = (memory_ue_log >> UBC_MEMORY_UE_LOG_EID) &
1196 UBC_MEMORY_UE_LOG_EID_MASK;
1197 (void) strncat(buf, ubc_class_eid_qualifier[eid],
1198 FM_MAX_CLASS);
1199
1200 if (eid == UBC_EID_MEM) {
1201 uint64_t phys_addr = memory_ue_log &
1202 MMU_OBERON_PADDR_MASK;
1203 uint64_t offset = (uint64_t)-1;
1204
1205 resource = fm_nvlist_create(NULL);
1206 if (&plat_get_mem_unum) {
1207 if ((plat_get_mem_unum(0,
1208 phys_addr, 0, B_TRUE, 0, unum,
1209 FM_MAX_CLASS, &unum_length)) != 0)
1210 unum[0] = '\0';
1211 }
1212 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
1213 NULL, unum, NULL, offset);
1214
1215 } else if (eid == UBC_EID_CPU) {
1216 int cpuid = (marked & UBC_MARKED_MAX_CPUID_MASK);
1217 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1218
1219 resource = fm_nvlist_create(NULL);
1220 cpu_version = cpunodes[cpuid].version;
1221 device_id = cpunodes[cpuid].device_id;
1222 (void) snprintf(sbuf, sizeof (sbuf), "%lX",
1223 device_id);
1224 (void) fm_fmri_cpu_set(resource,
1225 FM_CPU_SCHEME_VERSION, NULL, cpuid,
1226 &cpu_version, sbuf);
1227 }
1228 }
1229
1230 /*
1231 * For most of the errors represented in the UBC Interrupt Status
1232 * register, one can compute the dip of the actual Leaf that was
1233 * involved in the error. To do this, find the px_cb_t structure
1234 * that is shared between a pair of Leaves (eg, LeafA and LeafB).
1235 *
1236 * If any of the error bits for LeafA are set in the hardware
1237 * register, search the list of px_t's rooted in the px_cb_t for
1238 * the one corresponding to LeafA. If error bits for LeafB are set,
1239 * search the list for LeafB's px_t. The px_t references its
1240 * associated dip.
1241 */
1242 px_p = DIP_TO_STATE(rpdip);
1243 px_cb_p = ((pxu_t *)px_p->px_plat_p)->px_cb_p;
1244
1245 /* read hardware register */
1246 ubc_intr_status = CSR_XR(csr_base, UBC_INTERRUPT_STATUS);
1247
1248 if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFA) != 0) {
1249 /* then Leaf A is involved in the error */
1250 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_A);
1251 ASSERT(actual_dip != NULL);
1252 rpdip = actual_dip;
1253 } else if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFB) != 0) {
1254 /* then Leaf B is involved in the error */
1255 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_B);
1256 ASSERT(actual_dip != NULL);
1257 rpdip = actual_dip;
1258 } /* else error cannot be associated with a Leaf */
1259
1260 if (resource) {
1261 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1262 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1263 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE,
1264 OBERON_UBC_ELE, DATA_TYPE_UINT64,
1265 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE),
1266 OBERON_UBC_IE, DATA_TYPE_UINT64,
1267 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE),
1268 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status,
1269 OBERON_UBC_ESS, DATA_TYPE_UINT64,
1270 CSR_XR(csr_base, UBC_ERROR_STATUS_SET),
1271 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log,
1272 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum,
1273 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id,
1274 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version,
1275 OBERON_UBC_RESOURCE, DATA_TYPE_NVLIST, resource,
1276 NULL);
1277 fm_nvlist_destroy(resource, FM_NVA_FREE);
1278 } else {
1279 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1280 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1281 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE,
1282 OBERON_UBC_ELE, DATA_TYPE_UINT64,
1283 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE),
1284 OBERON_UBC_IE, DATA_TYPE_UINT64,
1285 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE),
1286 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status,
1287 OBERON_UBC_ESS, DATA_TYPE_UINT64,
1288 CSR_XR(csr_base, UBC_ERROR_STATUS_SET),
1289 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log,
1290 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum,
1291 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id,
1292 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version,
1293 NULL);
1294 }
1295
1296 return (PX_NO_PANIC);
1297 }
1298
1299 /* JBC FATAL */
PX_ERPT_SEND_DEC(jbc_fatal)1300 PX_ERPT_SEND_DEC(jbc_fatal)
1301 {
1302 char buf[FM_MAX_CLASS];
1303 boolean_t pri = PX_ERR_IS_PRI(bit);
1304
1305 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1306 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1307 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1308 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1309 FIRE_JBC_ELE, DATA_TYPE_UINT64,
1310 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1311 FIRE_JBC_IE, DATA_TYPE_UINT64,
1312 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1313 FIRE_JBC_IS, DATA_TYPE_UINT64,
1314 ss_reg,
1315 FIRE_JBC_ESS, DATA_TYPE_UINT64,
1316 CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1317 FIRE_JBC_FEL1, DATA_TYPE_UINT64,
1318 CSR_XR(csr_base, FATAL_ERROR_LOG_1),
1319 FIRE_JBC_FEL2, DATA_TYPE_UINT64,
1320 CSR_XR(csr_base, FATAL_ERROR_LOG_2),
1321 NULL);
1322
1323 return (PX_NO_PANIC);
1324 }
1325
1326 /* JBC MERGE */
PX_ERPT_SEND_DEC(jbc_merge)1327 PX_ERPT_SEND_DEC(jbc_merge)
1328 {
1329 char buf[FM_MAX_CLASS];
1330 boolean_t pri = PX_ERR_IS_PRI(bit);
1331
1332 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1333 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1334 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1335 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1336 FIRE_JBC_ELE, DATA_TYPE_UINT64,
1337 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1338 FIRE_JBC_IE, DATA_TYPE_UINT64,
1339 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1340 FIRE_JBC_IS, DATA_TYPE_UINT64,
1341 ss_reg,
1342 FIRE_JBC_ESS, DATA_TYPE_UINT64,
1343 CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1344 FIRE_JBC_MTEL, DATA_TYPE_UINT64,
1345 CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG),
1346 NULL);
1347
1348 return (PX_NO_PANIC);
1349 }
1350
1351 /*
1352 * JBC Merge buffer retryable errors:
1353 * Merge buffer parity error (rd_buf): PIO or DMA
1354 * Merge buffer parity error (wr_buf): PIO or DMA
1355 */
1356 /* ARGSUSED */
1357 int
px_err_jbc_merge_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1358 px_err_jbc_merge_handle(dev_info_t *rpdip, caddr_t csr_base,
1359 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1360 px_err_bit_desc_t *err_bit_descr)
1361 {
1362 /*
1363 * Holder function to attempt error recovery. When the features
1364 * are in place, look up the address of the transaction in:
1365 *
1366 * paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG);
1367 * paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK;
1368 *
1369 * If the error is a secondary error, there is no log information
1370 * just panic as it is unknown which address has been affected.
1371 *
1372 * Remember the address is pretranslation and might be hard to look
1373 * up the appropriate driver based on the PA.
1374 */
1375 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr,
1376 err_bit_descr));
1377 }
1378
1379 /* JBC Jbusint IN */
PX_ERPT_SEND_DEC(jbc_in)1380 PX_ERPT_SEND_DEC(jbc_in)
1381 {
1382 char buf[FM_MAX_CLASS];
1383 boolean_t pri = PX_ERR_IS_PRI(bit);
1384
1385 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1386 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1387 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1388 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1389 FIRE_JBC_ELE, DATA_TYPE_UINT64,
1390 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1391 FIRE_JBC_IE, DATA_TYPE_UINT64,
1392 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1393 FIRE_JBC_IS, DATA_TYPE_UINT64,
1394 ss_reg,
1395 FIRE_JBC_ESS, DATA_TYPE_UINT64,
1396 CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1397 FIRE_JBC_JITEL1, DATA_TYPE_UINT64,
1398 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG),
1399 FIRE_JBC_JITEL2, DATA_TYPE_UINT64,
1400 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG_2),
1401 NULL);
1402
1403 return (PX_NO_PANIC);
1404 }
1405
1406 /*
1407 * JBC Jbusint IN retryable errors
1408 * Log Reg[42:0].
1409 * Write Data Parity Error: PIO Writes
1410 * Read Data Parity Error: DMA Reads
1411 */
1412 int
px_err_jbc_jbusint_in_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1413 px_err_jbc_jbusint_in_handle(dev_info_t *rpdip, caddr_t csr_base,
1414 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1415 px_err_bit_desc_t *err_bit_descr)
1416 {
1417 /*
1418 * Holder function to attempt error recovery. When the features
1419 * are in place, look up the address of the transaction in:
1420 *
1421 * paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG);
1422 * paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK;
1423 *
1424 * If the error is a secondary error, there is no log information
1425 * just panic as it is unknown which address has been affected.
1426 *
1427 * Remember the address is pretranslation and might be hard to look
1428 * up the appropriate driver based on the PA.
1429 */
1430 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr,
1431 err_bit_descr));
1432 }
1433
1434
1435 /* JBC Jbusint Out */
PX_ERPT_SEND_DEC(jbc_out)1436 PX_ERPT_SEND_DEC(jbc_out)
1437 {
1438 char buf[FM_MAX_CLASS];
1439 boolean_t pri = PX_ERR_IS_PRI(bit);
1440
1441 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1442 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1443 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1444 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1445 FIRE_JBC_ELE, DATA_TYPE_UINT64,
1446 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1447 FIRE_JBC_IE, DATA_TYPE_UINT64,
1448 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1449 FIRE_JBC_IS, DATA_TYPE_UINT64,
1450 ss_reg,
1451 FIRE_JBC_ESS, DATA_TYPE_UINT64,
1452 CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1453 FIRE_JBC_JOTEL1, DATA_TYPE_UINT64,
1454 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG),
1455 FIRE_JBC_JOTEL2, DATA_TYPE_UINT64,
1456 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG_2),
1457 NULL);
1458
1459 return (PX_NO_PANIC);
1460 }
1461
1462 /* JBC Dmcint ODCD */
PX_ERPT_SEND_DEC(jbc_odcd)1463 PX_ERPT_SEND_DEC(jbc_odcd)
1464 {
1465 char buf[FM_MAX_CLASS];
1466 boolean_t pri = PX_ERR_IS_PRI(bit);
1467
1468 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1469 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1470 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1471 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1472 FIRE_JBC_ELE, DATA_TYPE_UINT64,
1473 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1474 FIRE_JBC_IE, DATA_TYPE_UINT64,
1475 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1476 FIRE_JBC_IS, DATA_TYPE_UINT64,
1477 ss_reg,
1478 FIRE_JBC_ESS, DATA_TYPE_UINT64,
1479 CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1480 FIRE_JBC_DMC_ODCD, DATA_TYPE_UINT64,
1481 CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG),
1482 NULL);
1483
1484 return (PX_NO_PANIC);
1485 }
1486
1487 /*
1488 * JBC Dmcint ODCO nonfatal errer handling -
1489 * PIO data parity error: PIO
1490 */
1491 /* ARGSUSED */
1492 int
px_err_jbc_dmcint_odcd_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1493 px_err_jbc_dmcint_odcd_handle(dev_info_t *rpdip, caddr_t csr_base,
1494 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1495 px_err_bit_desc_t *err_bit_descr)
1496 {
1497 /*
1498 * Holder function to attempt error recovery. When the features
1499 * are in place, look up the address of the transaction in:
1500 *
1501 * paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG);
1502 * paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK;
1503 *
1504 * If the error is a secondary error, there is no log information
1505 * just panic as it is unknown which address has been affected.
1506 *
1507 * Remember the address is pretranslation and might be hard to look
1508 * up the appropriate driver based on the PA.
1509 */
1510 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr,
1511 err_bit_descr));
1512 }
1513
1514 /* Does address in DMCINT error log register match address of pcitool access? */
1515 static boolean_t
px_jbc_pcitool_addr_match(dev_info_t * rpdip,caddr_t csr_base)1516 px_jbc_pcitool_addr_match(dev_info_t *rpdip, caddr_t csr_base)
1517 {
1518 px_t *px_p = DIP_TO_STATE(rpdip);
1519 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p;
1520 caddr_t pcitool_addr = pxu_p->pcitool_addr;
1521 caddr_t errlog_addr =
1522 (caddr_t)CSR_FR(csr_base, DMCINT_ODCD_ERROR_LOG, ADDRESS);
1523
1524 return (pcitool_addr == errlog_addr);
1525 }
1526
1527 /*
1528 * JBC Dmcint ODCD errer handling for errors which are forgivable during a safe
1529 * access. (This will be most likely be a PCItool access.) If not a safe
1530 * access context, treat like jbc_dmcint_odcd.
1531 * Unmapped PIO read error: pio:read:M:nonfatal
1532 * Unmapped PIO write error: pio:write:M:nonfatal
1533 * Invalid PIO write to PCIe cfg/io, csr, ebus or i2c bus: pio:write:nonfatal
1534 * Invalid PIO read to PCIe cfg/io, csr, ebus or i2c bus: pio:read:nonfatal
1535 */
1536 /* ARGSUSED */
1537 int
px_err_jbc_safe_acc_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1538 px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base,
1539 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1540 px_err_bit_desc_t *err_bit_descr)
1541 {
1542 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit);
1543
1544 if (!pri)
1545 return (px_err_panic_handle(rpdip, csr_base, derr,
1546 err_reg_descr, err_bit_descr));
1547 /*
1548 * Got an error which is forgivable during a PCItool access.
1549 *
1550 * Don't do handler check since the error may otherwise be unfairly
1551 * attributed to a device. Just return.
1552 *
1553 * Note: There is a hole here in that a legitimate error can come in
1554 * while a PCItool access is in play and be forgiven. This is possible
1555 * though not likely.
1556 */
1557 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) &&
1558 (px_jbc_pcitool_addr_match(rpdip, csr_base)))
1559 return (px_err_protected_handle(rpdip, csr_base, derr,
1560 err_reg_descr, err_bit_descr));
1561
1562 return (px_err_jbc_dmcint_odcd_handle(rpdip, csr_base, derr,
1563 err_reg_descr, err_bit_descr));
1564 }
1565
1566 /* JBC Dmcint IDC */
PX_ERPT_SEND_DEC(jbc_idc)1567 PX_ERPT_SEND_DEC(jbc_idc)
1568 {
1569 char buf[FM_MAX_CLASS];
1570 boolean_t pri = PX_ERR_IS_PRI(bit);
1571
1572 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1573 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1574 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1575 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1576 FIRE_JBC_ELE, DATA_TYPE_UINT64,
1577 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1578 FIRE_JBC_IE, DATA_TYPE_UINT64,
1579 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1580 FIRE_JBC_IS, DATA_TYPE_UINT64,
1581 ss_reg,
1582 FIRE_JBC_ESS, DATA_TYPE_UINT64,
1583 CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1584 FIRE_JBC_DMC_IDC, DATA_TYPE_UINT64,
1585 CSR_XR(csr_base, DMCINT_IDC_ERROR_LOG),
1586 NULL);
1587
1588 return (PX_NO_PANIC);
1589 }
1590
1591 /* JBC CSR */
PX_ERPT_SEND_DEC(jbc_csr)1592 PX_ERPT_SEND_DEC(jbc_csr)
1593 {
1594 char buf[FM_MAX_CLASS];
1595 boolean_t pri = PX_ERR_IS_PRI(bit);
1596
1597 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1598 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1599 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1600 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1601 FIRE_JBC_ELE, DATA_TYPE_UINT64,
1602 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE),
1603 FIRE_JBC_IE, DATA_TYPE_UINT64,
1604 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE),
1605 FIRE_JBC_IS, DATA_TYPE_UINT64,
1606 ss_reg,
1607 FIRE_JBC_ESS, DATA_TYPE_UINT64,
1608 CSR_XR(csr_base, JBC_ERROR_STATUS_SET),
1609 "jbc-error-reg", DATA_TYPE_UINT64,
1610 CSR_XR(csr_base, CSR_ERROR_LOG),
1611 NULL);
1612
1613 return (PX_NO_PANIC);
1614 }
1615
1616 /* DMC IMU RDS */
PX_ERPT_SEND_DEC(imu_rds)1617 PX_ERPT_SEND_DEC(imu_rds)
1618 {
1619 char buf[FM_MAX_CLASS];
1620 boolean_t pri = PX_ERR_IS_PRI(bit);
1621
1622 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1623 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1624 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1625 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1626 FIRE_IMU_ELE, DATA_TYPE_UINT64,
1627 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE),
1628 FIRE_IMU_IE, DATA_TYPE_UINT64,
1629 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE),
1630 FIRE_IMU_IS, DATA_TYPE_UINT64,
1631 ss_reg,
1632 FIRE_IMU_ESS, DATA_TYPE_UINT64,
1633 CSR_XR(csr_base, IMU_ERROR_STATUS_SET),
1634 FIRE_IMU_RDS, DATA_TYPE_UINT64,
1635 CSR_XR(csr_base, IMU_RDS_ERROR_LOG),
1636 NULL);
1637
1638 return (PX_NO_PANIC);
1639 }
1640
1641 /* handle EQ overflow */
1642 /* ARGSUSED */
1643 int
px_err_imu_eq_ovfl_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1644 px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base,
1645 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1646 px_err_bit_desc_t *err_bit_descr)
1647 {
1648 px_t *px_p = DIP_TO_STATE(rpdip);
1649 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p;
1650 int err = px_err_check_eq(rpdip);
1651
1652 if ((err == PX_PANIC) && (pxu_p->cpr_flag == PX_NOT_CPR)) {
1653 return (px_err_panic_handle(rpdip, csr_base, derr,
1654 err_reg_descr, err_bit_descr));
1655 } else {
1656 return (px_err_no_panic_handle(rpdip, csr_base, derr,
1657 err_reg_descr, err_bit_descr));
1658 }
1659 }
1660
1661 /* DMC IMU SCS */
PX_ERPT_SEND_DEC(imu_scs)1662 PX_ERPT_SEND_DEC(imu_scs)
1663 {
1664 char buf[FM_MAX_CLASS];
1665 boolean_t pri = PX_ERR_IS_PRI(bit);
1666
1667 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1668 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1669 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1670 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1671 FIRE_IMU_ELE, DATA_TYPE_UINT64,
1672 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE),
1673 FIRE_IMU_IE, DATA_TYPE_UINT64,
1674 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE),
1675 FIRE_IMU_IS, DATA_TYPE_UINT64,
1676 ss_reg,
1677 FIRE_IMU_ESS, DATA_TYPE_UINT64,
1678 CSR_XR(csr_base, IMU_ERROR_STATUS_SET),
1679 FIRE_IMU_SCS, DATA_TYPE_UINT64,
1680 CSR_XR(csr_base, IMU_SCS_ERROR_LOG),
1681 NULL);
1682
1683 return (PX_NO_PANIC);
1684 }
1685
1686 /* DMC IMU */
PX_ERPT_SEND_DEC(imu)1687 PX_ERPT_SEND_DEC(imu)
1688 {
1689 char buf[FM_MAX_CLASS];
1690 boolean_t pri = PX_ERR_IS_PRI(bit);
1691
1692 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1693 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1694 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1695 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1696 FIRE_IMU_ELE, DATA_TYPE_UINT64,
1697 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE),
1698 FIRE_IMU_IE, DATA_TYPE_UINT64,
1699 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE),
1700 FIRE_IMU_IS, DATA_TYPE_UINT64,
1701 ss_reg,
1702 FIRE_IMU_ESS, DATA_TYPE_UINT64,
1703 CSR_XR(csr_base, IMU_ERROR_STATUS_SET),
1704 NULL);
1705
1706 return (PX_NO_PANIC);
1707 }
1708
1709 /* DMC MMU TFAR/TFSR */
PX_ERPT_SEND_DEC(mmu_tfar_tfsr)1710 PX_ERPT_SEND_DEC(mmu_tfar_tfsr)
1711 {
1712 char buf[FM_MAX_CLASS];
1713 boolean_t pri = PX_ERR_IS_PRI(bit);
1714 px_t *px_p = DIP_TO_STATE(rpdip);
1715 pcie_req_id_t fault_bdf = PCIE_INVALID_BDF;
1716 uint16_t s_status = 0;
1717
1718 if (pri) {
1719 fault_bdf = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS)
1720 & (MMU_TRANSLATION_FAULT_STATUS_ID_MASK <<
1721 MMU_TRANSLATION_FAULT_STATUS_ID);
1722 s_status = PCI_STAT_S_TARG_AB;
1723
1724 /* Only PIO Fault Addresses are valid, this is DMA */
1725 (void) px_rp_en_q(px_p, fault_bdf, 0, s_status);
1726 }
1727
1728 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1729
1730 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1731 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1732 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1733 FIRE_MMU_ELE, DATA_TYPE_UINT64,
1734 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE),
1735 FIRE_MMU_IE, DATA_TYPE_UINT64,
1736 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE),
1737 FIRE_MMU_IS, DATA_TYPE_UINT64,
1738 ss_reg,
1739 FIRE_MMU_ESS, DATA_TYPE_UINT64,
1740 CSR_XR(csr_base, MMU_ERROR_STATUS_SET),
1741 FIRE_MMU_TFAR, DATA_TYPE_UINT64,
1742 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS),
1743 FIRE_MMU_TFSR, DATA_TYPE_UINT64,
1744 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS),
1745 NULL);
1746
1747 return (PX_NO_PANIC);
1748 }
1749
1750 /* DMC MMU */
PX_ERPT_SEND_DEC(mmu)1751 PX_ERPT_SEND_DEC(mmu)
1752 {
1753 char buf[FM_MAX_CLASS];
1754 boolean_t pri = PX_ERR_IS_PRI(bit);
1755
1756 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1757 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1758 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1759 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1760 FIRE_MMU_ELE, DATA_TYPE_UINT64,
1761 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE),
1762 FIRE_MMU_IE, DATA_TYPE_UINT64,
1763 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE),
1764 FIRE_MMU_IS, DATA_TYPE_UINT64,
1765 ss_reg,
1766 FIRE_MMU_ESS, DATA_TYPE_UINT64,
1767 CSR_XR(csr_base, MMU_ERROR_STATUS_SET),
1768 NULL);
1769
1770 return (PX_NO_PANIC);
1771 }
1772
1773 /*
1774 * IMU function to handle all Received but Not Enabled errors.
1775 *
1776 * These errors are due to transactions modes in which the PX driver was not
1777 * setup to be able to do. If possible, inform the driver that their DMA has
1778 * failed by marking their DMA handle as failed, but do not panic the system.
1779 * Most likely the address is not valid, as Fire wasn't setup to handle them in
1780 * the first place.
1781 *
1782 * These errors are not retryable, unless the PX mode has changed, otherwise the
1783 * same error will occur again.
1784 */
1785 int
px_err_mmu_rbne_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1786 px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base,
1787 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1788 px_err_bit_desc_t *err_bit_descr)
1789 {
1790 pcie_req_id_t bdf;
1791
1792 if (!PX_ERR_IS_PRI(err_bit_descr->bit))
1793 goto done;
1794
1795 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID);
1796 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, 0, bdf);
1797
1798 done:
1799 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr,
1800 err_bit_descr));
1801 }
1802
1803 /*
1804 * IMU function to handle all invalid address errors.
1805 *
1806 * These errors are due to transactions in which the address is not recognized.
1807 * If possible, inform the driver that all DMAs have failed by marking their DMA
1808 * handles. Fire should not panic the system, it'll be up to the driver to
1809 * panic. The address logged is invalid.
1810 *
1811 * These errors are not retryable since retrying the same transaction with the
1812 * same invalid address will result in the same error.
1813 */
1814 /* ARGSUSED */
1815 int
px_err_mmu_tfa_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1816 px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base,
1817 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1818 px_err_bit_desc_t *err_bit_descr)
1819 {
1820 pcie_req_id_t bdf;
1821
1822 if (!PX_ERR_IS_PRI(err_bit_descr->bit))
1823 goto done;
1824
1825 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID);
1826 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, 0, bdf);
1827
1828 done:
1829 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr,
1830 err_bit_descr));
1831 }
1832
1833 /*
1834 * IMU function to handle normal transactions that encounter a parity error.
1835 *
1836 * These errors are due to transactions that enouter a parity error. If
1837 * possible, inform the driver that their DMA have failed and that they should
1838 * retry. If Fire is unable to contact the leaf driver, panic the system.
1839 * Otherwise, it'll be up to the device to determine is this is a panicable
1840 * error.
1841 */
1842 /* ARGSUSED */
1843 int
px_err_mmu_parity_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1844 px_err_mmu_parity_handle(dev_info_t *rpdip, caddr_t csr_base,
1845 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1846 px_err_bit_desc_t *err_bit_descr)
1847 {
1848 uint64_t mmu_tfa;
1849 pcie_req_id_t bdf;
1850 int status = PF_HDL_NOTFOUND;
1851
1852 if (!PX_ERR_IS_PRI(err_bit_descr->bit))
1853 goto done;
1854
1855 mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS);
1856 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID);
1857 status = pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA,
1858 (uint32_t)mmu_tfa, bdf);
1859
1860 done:
1861 if (status == PF_HDL_NOTFOUND)
1862 return (px_err_panic_handle(rpdip, csr_base, derr,
1863 err_reg_descr, err_bit_descr));
1864 else
1865 return (px_err_no_panic_handle(rpdip, csr_base, derr,
1866 err_reg_descr, err_bit_descr));
1867 }
1868
1869 /*
1870 * wuc/ruc event - Mark the handle of the failed PIO access. Return "no_panic"
1871 */
1872 /* ARGSUSED */
1873 int
px_err_wuc_ruc_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1874 px_err_wuc_ruc_handle(dev_info_t *rpdip, caddr_t csr_base,
1875 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1876 px_err_bit_desc_t *err_bit_descr)
1877 {
1878 px_t *px_p = DIP_TO_STATE(rpdip);
1879 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p;
1880 uint64_t data;
1881 pf_pcie_adv_err_regs_t adv_reg;
1882 int sts;
1883
1884 if (!PX_ERR_IS_PRI(err_bit_descr->bit))
1885 goto done;
1886
1887 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG);
1888 adv_reg.pcie_ue_hdr[0] = (uint32_t)(data >> 32);
1889 adv_reg.pcie_ue_hdr[1] = (uint32_t)(data & 0xFFFFFFFF);
1890 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG);
1891 adv_reg.pcie_ue_hdr[2] = (uint32_t)(data >> 32);
1892 adv_reg.pcie_ue_hdr[3] = (uint32_t)(data & 0xFFFFFFFF);
1893
1894 (void) pf_tlp_decode(PCIE_DIP2BUS(rpdip), &adv_reg);
1895 sts = pf_hdl_lookup(rpdip, derr->fme_ena, adv_reg.pcie_ue_tgt_trans,
1896 adv_reg.pcie_ue_tgt_addr, adv_reg.pcie_ue_tgt_bdf);
1897 done:
1898 if ((sts == PF_HDL_NOTFOUND) && (pxu_p->cpr_flag == PX_NOT_CPR))
1899 return (px_err_protected_handle(rpdip, csr_base, derr,
1900 err_reg_descr, err_bit_descr));
1901
1902 return (px_err_no_panic_handle(rpdip, csr_base, derr,
1903 err_reg_descr, err_bit_descr));
1904 }
1905
1906 /*
1907 * TLU LUP event - if caused by power management activity, then it is expected.
1908 * In all other cases, it is an error.
1909 */
1910 /* ARGSUSED */
1911 int
px_err_tlu_lup_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1912 px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base,
1913 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1914 px_err_bit_desc_t *err_bit_descr)
1915 {
1916 px_t *px_p = DIP_TO_STATE(rpdip);
1917
1918 /*
1919 * power management code is currently the only segment that sets
1920 * px_lup_pending to indicate its expectation for a healthy LUP
1921 * event. For all other occasions, LUP event should be flaged as
1922 * error condition.
1923 */
1924 return ((atomic_cas_32(&px_p->px_lup_pending, 1, 0) == 0) ?
1925 PX_NO_PANIC : PX_EXPECTED);
1926 }
1927
1928 /*
1929 * TLU LDN event - if caused by power management activity, then it is expected.
1930 * In all other cases, it is an error.
1931 */
1932 /* ARGSUSED */
1933 int
px_err_tlu_ldn_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1934 px_err_tlu_ldn_handle(dev_info_t *rpdip, caddr_t csr_base,
1935 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1936 px_err_bit_desc_t *err_bit_descr)
1937 {
1938 px_t *px_p = DIP_TO_STATE(rpdip);
1939 return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_EXPECTED :
1940 PX_NO_PANIC);
1941 }
1942
1943 /* PEC ILU none - see io erpt doc, section 3.1 */
PX_ERPT_SEND_DEC(pec_ilu)1944 PX_ERPT_SEND_DEC(pec_ilu)
1945 {
1946 char buf[FM_MAX_CLASS];
1947 boolean_t pri = PX_ERR_IS_PRI(bit);
1948
1949 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
1950 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
1951 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
1952 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
1953 FIRE_ILU_ELE, DATA_TYPE_UINT64,
1954 CSR_XR(csr_base, ILU_ERROR_LOG_ENABLE),
1955 FIRE_ILU_IE, DATA_TYPE_UINT64,
1956 CSR_XR(csr_base, ILU_INTERRUPT_ENABLE),
1957 FIRE_ILU_IS, DATA_TYPE_UINT64,
1958 ss_reg,
1959 FIRE_ILU_ESS, DATA_TYPE_UINT64,
1960 CSR_XR(csr_base, ILU_ERROR_STATUS_SET),
1961 NULL);
1962
1963 return (PX_NO_PANIC);
1964 }
1965
1966 /* PCIEX UE Errors */
1967 /* ARGSUSED */
1968 int
px_err_pciex_ue_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)1969 px_err_pciex_ue_handle(dev_info_t *rpdip, caddr_t csr_base,
1970 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
1971 px_err_bit_desc_t *err_bit_descr)
1972 {
1973 px_err_pcie_t regs = {0};
1974 uint32_t err_bit;
1975 int err;
1976 uint64_t log;
1977
1978 if (err_bit_descr->bit < 32) {
1979 err_bit = (uint32_t)BITMASK(err_bit_descr->bit);
1980 regs.ue_reg = err_bit;
1981 regs.primary_ue = err_bit;
1982
1983 /*
1984 * Log the Received Log for PTLP, UR and UC.
1985 */
1986 if ((PCIE_AER_UCE_PTLP | PCIE_AER_UCE_UR | PCIE_AER_UCE_UC) &
1987 err_bit) {
1988 log = CSR_XR(csr_base,
1989 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG);
1990 regs.rx_hdr1 = (uint32_t)(log >> 32);
1991 regs.rx_hdr2 = (uint32_t)(log & 0xFFFFFFFF);
1992
1993 log = CSR_XR(csr_base,
1994 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG);
1995 regs.rx_hdr3 = (uint32_t)(log >> 32);
1996 regs.rx_hdr4 = (uint32_t)(log & 0xFFFFFFFF);
1997 }
1998 } else {
1999 regs.ue_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32);
2000 }
2001
2002 err = px_err_check_pcie(rpdip, derr, ®s, PF_INTR_TYPE_INTERNAL);
2003
2004 if (err & PX_PANIC) {
2005 return (px_err_panic_handle(rpdip, csr_base, derr,
2006 err_reg_descr, err_bit_descr));
2007 } else {
2008 return (px_err_no_panic_handle(rpdip, csr_base, derr,
2009 err_reg_descr, err_bit_descr));
2010 }
2011 }
2012
2013 /* PCI-E Uncorrectable Errors */
PX_ERPT_SEND_DEC(pciex_rx_ue)2014 PX_ERPT_SEND_DEC(pciex_rx_ue)
2015 {
2016 char buf[FM_MAX_CLASS];
2017 boolean_t pri = PX_ERR_IS_PRI(bit);
2018
2019 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2020 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2021 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2022 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2023 FIRE_TLU_UELE, DATA_TYPE_UINT64,
2024 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE),
2025 FIRE_TLU_UIE, DATA_TYPE_UINT64,
2026 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE),
2027 FIRE_TLU_UIS, DATA_TYPE_UINT64,
2028 ss_reg,
2029 FIRE_TLU_UESS, DATA_TYPE_UINT64,
2030 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET),
2031 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64,
2032 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG),
2033 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64,
2034 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG),
2035 NULL);
2036
2037 return (PX_NO_PANIC);
2038 }
2039
2040 /* PCI-E Uncorrectable Errors */
PX_ERPT_SEND_DEC(pciex_tx_ue)2041 PX_ERPT_SEND_DEC(pciex_tx_ue)
2042 {
2043 char buf[FM_MAX_CLASS];
2044 boolean_t pri = PX_ERR_IS_PRI(bit);
2045
2046 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2047 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2048 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2049 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2050 FIRE_TLU_UELE, DATA_TYPE_UINT64,
2051 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE),
2052 FIRE_TLU_UIE, DATA_TYPE_UINT64,
2053 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE),
2054 FIRE_TLU_UIS, DATA_TYPE_UINT64,
2055 ss_reg,
2056 FIRE_TLU_UESS, DATA_TYPE_UINT64,
2057 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET),
2058 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64,
2059 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG),
2060 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64,
2061 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG),
2062 NULL);
2063
2064 return (PX_NO_PANIC);
2065 }
2066
2067 /* PCI-E Uncorrectable Errors */
PX_ERPT_SEND_DEC(pciex_rx_tx_ue)2068 PX_ERPT_SEND_DEC(pciex_rx_tx_ue)
2069 {
2070 char buf[FM_MAX_CLASS];
2071 boolean_t pri = PX_ERR_IS_PRI(bit);
2072
2073 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2074 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2075 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2076 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2077 FIRE_TLU_UELE, DATA_TYPE_UINT64,
2078 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE),
2079 FIRE_TLU_UIE, DATA_TYPE_UINT64,
2080 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE),
2081 FIRE_TLU_UIS, DATA_TYPE_UINT64,
2082 ss_reg,
2083 FIRE_TLU_UESS, DATA_TYPE_UINT64,
2084 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET),
2085 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64,
2086 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG),
2087 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64,
2088 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG),
2089 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64,
2090 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG),
2091 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64,
2092 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG),
2093 NULL);
2094
2095 return (PX_NO_PANIC);
2096 }
2097
2098 /* PCI-E Uncorrectable Errors */
PX_ERPT_SEND_DEC(pciex_ue)2099 PX_ERPT_SEND_DEC(pciex_ue)
2100 {
2101 char buf[FM_MAX_CLASS];
2102 boolean_t pri = PX_ERR_IS_PRI(bit);
2103
2104 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2105 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2106 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2107 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2108 FIRE_TLU_UELE, DATA_TYPE_UINT64,
2109 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE),
2110 FIRE_TLU_UIE, DATA_TYPE_UINT64,
2111 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE),
2112 FIRE_TLU_UIS, DATA_TYPE_UINT64,
2113 ss_reg,
2114 FIRE_TLU_UESS, DATA_TYPE_UINT64,
2115 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET),
2116 NULL);
2117
2118 return (PX_NO_PANIC);
2119 }
2120
2121 /* PCIEX UE Errors */
2122 /* ARGSUSED */
2123 int
px_err_pciex_ce_handle(dev_info_t * rpdip,caddr_t csr_base,ddi_fm_error_t * derr,px_err_reg_desc_t * err_reg_descr,px_err_bit_desc_t * err_bit_descr)2124 px_err_pciex_ce_handle(dev_info_t *rpdip, caddr_t csr_base,
2125 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr,
2126 px_err_bit_desc_t *err_bit_descr)
2127 {
2128 px_err_pcie_t regs = {0};
2129 int err;
2130
2131 if (err_bit_descr->bit < 32)
2132 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit);
2133 else
2134 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32);
2135
2136 err = px_err_check_pcie(rpdip, derr, ®s, PF_INTR_TYPE_INTERNAL);
2137
2138 if (err & PX_PANIC) {
2139 return (px_err_panic_handle(rpdip, csr_base, derr,
2140 err_reg_descr, err_bit_descr));
2141 } else {
2142 return (px_err_no_panic_handle(rpdip, csr_base, derr,
2143 err_reg_descr, err_bit_descr));
2144 }
2145 }
2146
2147 /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */
PX_ERPT_SEND_DEC(pciex_ce)2148 PX_ERPT_SEND_DEC(pciex_ce)
2149 {
2150 char buf[FM_MAX_CLASS];
2151 boolean_t pri = PX_ERR_IS_PRI(bit);
2152
2153 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2154 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2155 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2156 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2157 FIRE_TLU_CELE, DATA_TYPE_UINT64,
2158 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_LOG_ENABLE),
2159 FIRE_TLU_CIE, DATA_TYPE_UINT64,
2160 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_INTERRUPT_ENABLE),
2161 FIRE_TLU_CIS, DATA_TYPE_UINT64,
2162 ss_reg,
2163 FIRE_TLU_CESS, DATA_TYPE_UINT64,
2164 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_STATUS_SET),
2165 NULL);
2166
2167 return (PX_NO_PANIC);
2168 }
2169
2170 /* TLU Other Event Status (receive only) - see io erpt doc, section 3.7 */
PX_ERPT_SEND_DEC(pciex_rx_oe)2171 PX_ERPT_SEND_DEC(pciex_rx_oe)
2172 {
2173 char buf[FM_MAX_CLASS];
2174 boolean_t pri = PX_ERR_IS_PRI(bit);
2175
2176 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2177 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2178 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2179 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2180 FIRE_TLU_OEELE, DATA_TYPE_UINT64,
2181 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE),
2182 FIRE_TLU_OEIE, DATA_TYPE_UINT64,
2183 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE),
2184 FIRE_TLU_OEIS, DATA_TYPE_UINT64,
2185 ss_reg,
2186 FIRE_TLU_OEESS, DATA_TYPE_UINT64,
2187 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET),
2188 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64,
2189 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG),
2190 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64,
2191 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG),
2192 NULL);
2193
2194 return (PX_NO_PANIC);
2195 }
2196
2197 /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */
PX_ERPT_SEND_DEC(pciex_rx_tx_oe)2198 PX_ERPT_SEND_DEC(pciex_rx_tx_oe)
2199 {
2200 char buf[FM_MAX_CLASS];
2201 boolean_t pri = PX_ERR_IS_PRI(bit);
2202 px_t *px_p = DIP_TO_STATE(rpdip);
2203 uint64_t rx_h1, rx_h2, tx_h1, tx_h2;
2204 uint16_t s_status;
2205 int sts;
2206 pcie_cpl_t *cpl;
2207 pf_pcie_adv_err_regs_t adv_reg;
2208
2209 rx_h1 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG);
2210 rx_h2 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG);
2211 tx_h1 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG);
2212 tx_h2 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG);
2213
2214 if ((bit == TLU_OTHER_EVENT_STATUS_SET_RUC_P) ||
2215 (bit == TLU_OTHER_EVENT_STATUS_SET_WUC_P)) {
2216 adv_reg.pcie_ue_hdr[0] = (uint32_t)(rx_h1 >> 32);
2217 adv_reg.pcie_ue_hdr[1] = (uint32_t)rx_h1;
2218 adv_reg.pcie_ue_hdr[2] = (uint32_t)(rx_h2 >> 32);
2219 adv_reg.pcie_ue_hdr[3] = (uint32_t)rx_h2;
2220
2221 /* get completer bdf (fault bdf) from rx logs */
2222 cpl = (pcie_cpl_t *)&adv_reg.pcie_ue_hdr[1];
2223
2224 /* Figure out if UR/CA from rx logs */
2225 if (cpl->status == PCIE_CPL_STS_UR)
2226 s_status = PCI_STAT_R_MAST_AB;
2227 else if (cpl->status == PCIE_CPL_STS_CA)
2228 s_status = PCI_STAT_R_TARG_AB;
2229
2230 adv_reg.pcie_ue_hdr[0] = (uint32_t)(tx_h1 >> 32);
2231 adv_reg.pcie_ue_hdr[1] = (uint32_t)tx_h1;
2232 adv_reg.pcie_ue_hdr[2] = (uint32_t)(tx_h2 >> 32);
2233 adv_reg.pcie_ue_hdr[3] = (uint32_t)tx_h2;
2234
2235 /* get fault addr from tx logs */
2236 sts = pf_tlp_decode(PCIE_DIP2BUS(rpdip), &adv_reg);
2237
2238 if (sts == DDI_SUCCESS)
2239 (void) px_rp_en_q(px_p, adv_reg.pcie_ue_tgt_bdf,
2240 adv_reg.pcie_ue_tgt_addr, s_status);
2241 }
2242
2243 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2244 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2245 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2246 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2247 FIRE_TLU_OEELE, DATA_TYPE_UINT64,
2248 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE),
2249 FIRE_TLU_OEIE, DATA_TYPE_UINT64,
2250 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE),
2251 FIRE_TLU_OEIS, DATA_TYPE_UINT64,
2252 ss_reg,
2253 FIRE_TLU_OEESS, DATA_TYPE_UINT64,
2254 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET),
2255 FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, rx_h1,
2256 FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, rx_h2,
2257 FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, tx_h1,
2258 FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, tx_h2,
2259 NULL);
2260
2261 return (PX_NO_PANIC);
2262 }
2263
2264 /* TLU Other Event - see io erpt doc, section 3.9 */
PX_ERPT_SEND_DEC(pciex_oe)2265 PX_ERPT_SEND_DEC(pciex_oe)
2266 {
2267 char buf[FM_MAX_CLASS];
2268 boolean_t pri = PX_ERR_IS_PRI(bit);
2269
2270 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name);
2271 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena,
2272 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
2273 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri,
2274 FIRE_TLU_OEELE, DATA_TYPE_UINT64,
2275 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE),
2276 FIRE_TLU_OEIE, DATA_TYPE_UINT64,
2277 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE),
2278 FIRE_TLU_OEIS, DATA_TYPE_UINT64,
2279 ss_reg,
2280 FIRE_TLU_OEESS, DATA_TYPE_UINT64,
2281 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET),
2282 NULL);
2283
2284 return (PX_NO_PANIC);
2285 }
2286