1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * sun4u Fire Error Handling 30 */ 31 32 #include <sys/types.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/fm/protocol.h> 36 #include <sys/fm/util.h> 37 #include <sys/pcie.h> 38 #include <sys/pcie_impl.h> 39 #include "px_obj.h" 40 #include <px_regs.h> 41 #include <px_csr.h> 42 #include <sys/membar.h> 43 #include "pcie_pwr.h" 44 #include "px_lib4u.h" 45 #include "px_err.h" 46 #include "px_err_impl.h" 47 48 /* 49 * JBC error bit table 50 */ 51 #define JBC_BIT_DESC(bit, hdl, erpt) \ 52 JBC_INTERRUPT_STATUS_ ## bit ## _P, \ 53 0, \ 54 PX_ERR_BIT_HANDLE(hdl), \ 55 PX_ERPT_SEND(erpt), \ 56 PX_ERR_JBC_CLASS(bit) }, \ 57 { JBC_INTERRUPT_STATUS_ ## bit ## _S, \ 58 0, \ 59 PX_ERR_BIT_HANDLE(hdl), \ 60 PX_ERPT_SEND(erpt), \ 61 PX_ERR_JBC_CLASS(bit) 62 px_err_bit_desc_t px_err_cb_tbl[] = { 63 /* JBC FATAL - see io erpt doc, section 1.1 */ 64 { JBC_BIT_DESC(MB_PEA, fatal_hw, jbc_fatal) }, 65 { JBC_BIT_DESC(CPE, fatal_hw, jbc_fatal) }, 66 { JBC_BIT_DESC(APE, fatal_hw, jbc_fatal) }, 67 { JBC_BIT_DESC(PIO_CPE, fatal_hw, jbc_fatal) }, 68 { JBC_BIT_DESC(JTCEEW, fatal_hw, jbc_fatal) }, 69 { JBC_BIT_DESC(JTCEEI, fatal_hw, jbc_fatal) }, 70 { JBC_BIT_DESC(JTCEER, fatal_hw, jbc_fatal) }, 71 72 /* JBC MERGE - see io erpt doc, section 1.2 */ 73 { JBC_BIT_DESC(MB_PER, jbc_merge, jbc_merge) }, 74 { JBC_BIT_DESC(MB_PEW, jbc_merge, jbc_merge) }, 75 76 /* JBC Jbusint IN - see io erpt doc, section 1.3 */ 77 { JBC_BIT_DESC(UE_ASYN, fatal_gos, jbc_in) }, 78 { JBC_BIT_DESC(CE_ASYN, non_fatal, jbc_in) }, 79 { JBC_BIT_DESC(JTE, fatal_gos, jbc_in) }, 80 { JBC_BIT_DESC(JBE, jbc_jbusint_in, jbc_in) }, 81 { JBC_BIT_DESC(JUE, jbc_jbusint_in, jbc_in) }, 82 { JBC_BIT_DESC(ICISE, fatal_gos, jbc_in) }, 83 { JBC_BIT_DESC(WR_DPE, jbc_jbusint_in, jbc_in) }, 84 { JBC_BIT_DESC(RD_DPE, jbc_jbusint_in, jbc_in) }, 85 { JBC_BIT_DESC(ILL_BMW, jbc_jbusint_in, jbc_in) }, 86 { JBC_BIT_DESC(ILL_BMR, jbc_jbusint_in, jbc_in) }, 87 { JBC_BIT_DESC(BJC, jbc_jbusint_in, jbc_in) }, 88 89 /* JBC Jbusint Out - see io erpt doc, section 1.4 */ 90 { JBC_BIT_DESC(IJP, fatal_gos, jbc_out) }, 91 92 /* JBC Dmcint ODCD - see io erpt doc, section 1.5 */ 93 { JBC_BIT_DESC(PIO_UNMAP_RD, jbc_dmcint_odcd, jbc_odcd) }, 94 { JBC_BIT_DESC(ILL_ACC_RD, jbc_dmcint_odcd, jbc_odcd) }, 95 { JBC_BIT_DESC(PIO_UNMAP, jbc_dmcint_odcd, jbc_odcd) }, 96 { JBC_BIT_DESC(PIO_DPE, jbc_dmcint_odcd, jbc_odcd) }, 97 { JBC_BIT_DESC(PIO_CPE, non_fatal, jbc_odcd) }, 98 { JBC_BIT_DESC(ILL_ACC, jbc_dmcint_odcd, jbc_odcd) }, 99 100 /* JBC Dmcint IDC - see io erpt doc, section 1.6 */ 101 { JBC_BIT_DESC(UNSOL_RD, non_fatal, jbc_idc) }, 102 { JBC_BIT_DESC(UNSOL_INTR, non_fatal, jbc_idc) }, 103 104 /* JBC CSR - see io erpt doc, section 1.7 */ 105 { JBC_BIT_DESC(EBUS_TO, jbc_csr, jbc_csr) } 106 }; 107 108 #define px_err_cb_keys \ 109 (sizeof (px_err_cb_tbl)) / (sizeof (px_err_bit_desc_t)) 110 111 /* 112 * DMC error bit tables 113 */ 114 #define IMU_BIT_DESC(bit, hdl, erpt) \ 115 IMU_INTERRUPT_STATUS_ ## bit ## _P, \ 116 0, \ 117 PX_ERR_BIT_HANDLE(hdl), \ 118 PX_ERPT_SEND(erpt), \ 119 PX_ERR_DMC_CLASS(bit) }, \ 120 { IMU_INTERRUPT_STATUS_ ## bit ## _S, \ 121 0, \ 122 PX_ERR_BIT_HANDLE(hdl), \ 123 PX_ERPT_SEND(erpt), \ 124 PX_ERR_DMC_CLASS(bit) 125 px_err_bit_desc_t px_err_imu_tbl[] = { 126 /* DMC IMU RDS - see io erpt doc, section 2.1 */ 127 { IMU_BIT_DESC(MSI_MAL_ERR, non_fatal, imu_rds) }, 128 { IMU_BIT_DESC(MSI_PAR_ERR, fatal_stuck, imu_rds) }, 129 { IMU_BIT_DESC(PMEACK_MES_NOT_EN, imu_rbne, imu_rds) }, 130 { IMU_BIT_DESC(PMPME_MES_NOT_EN, imu_pme, imu_rds) }, 131 { IMU_BIT_DESC(FATAL_MES_NOT_EN, imu_rbne, imu_rds) }, 132 { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, imu_rbne, imu_rds) }, 133 { IMU_BIT_DESC(COR_MES_NOT_EN, imu_rbne, imu_rds) }, 134 { IMU_BIT_DESC(MSI_NOT_EN, imu_rbne, imu_rds) }, 135 136 /* DMC IMU SCS - see io erpt doc, section 2.2 */ 137 { IMU_BIT_DESC(EQ_NOT_EN, imu_rbne, imu_rds) }, 138 139 /* DMC IMU - see io erpt doc, section 2.3 */ 140 { IMU_BIT_DESC(EQ_OVER, imu_eq_ovfl, imu) } 141 }; 142 143 #define px_err_imu_keys (sizeof (px_err_imu_tbl)) / (sizeof (px_err_bit_desc_t)) 144 145 /* mmu errors */ 146 #define MMU_BIT_DESC(bit, hdl, erpt) \ 147 MMU_INTERRUPT_STATUS_ ## bit ## _P, \ 148 0, \ 149 PX_ERR_BIT_HANDLE(hdl), \ 150 PX_ERPT_SEND(erpt), \ 151 PX_ERR_DMC_CLASS(bit) }, \ 152 { MMU_INTERRUPT_STATUS_ ## bit ## _S, \ 153 0, \ 154 PX_ERR_BIT_HANDLE(hdl), \ 155 PX_ERPT_SEND(erpt), \ 156 PX_ERR_DMC_CLASS(bit) 157 px_err_bit_desc_t px_err_mmu_tbl[] = { 158 /* DMC MMU TFAR/TFSR - see io erpt doc, section 2.4 */ 159 { MMU_BIT_DESC(BYP_ERR, mmu_rbne, mmu_tfar_tfsr) }, 160 { MMU_BIT_DESC(BYP_OOR, mmu_tfa, mmu_tfar_tfsr) }, 161 { MMU_BIT_DESC(TRN_ERR, mmu_rbne, mmu_tfar_tfsr) }, 162 { MMU_BIT_DESC(TRN_OOR, mmu_tfa, mmu_tfar_tfsr) }, 163 { MMU_BIT_DESC(TTE_INV, mmu_tfa, mmu_tfar_tfsr) }, 164 { MMU_BIT_DESC(TTE_PRT, mmu_tfa, mmu_tfar_tfsr) }, 165 { MMU_BIT_DESC(TTC_DPE, mmu_tfa, mmu_tfar_tfsr) }, 166 { MMU_BIT_DESC(TBW_DME, mmu_tblwlk, mmu_tfar_tfsr) }, 167 { MMU_BIT_DESC(TBW_UDE, mmu_tblwlk, mmu_tfar_tfsr) }, 168 { MMU_BIT_DESC(TBW_ERR, mmu_tblwlk, mmu_tfar_tfsr) }, 169 { MMU_BIT_DESC(TBW_DPE, mmu_tblwlk, mmu_tfar_tfsr) }, 170 171 /* DMC MMU - see io erpt doc, section 2.5 */ 172 { MMU_BIT_DESC(TTC_CAE, non_fatal, mmu) } 173 }; 174 #define px_err_mmu_keys (sizeof (px_err_mmu_tbl)) / (sizeof (px_err_bit_desc_t)) 175 176 /* 177 * PEC error bit tables 178 */ 179 #define ILU_BIT_DESC(bit, hdl, erpt) \ 180 ILU_INTERRUPT_STATUS_ ## bit ## _P, \ 181 0, \ 182 PX_ERR_BIT_HANDLE(hdl), \ 183 PX_ERPT_SEND(erpt), \ 184 PX_ERR_PEC_CLASS(bit) }, \ 185 { ILU_INTERRUPT_STATUS_ ## bit ## _S, \ 186 0, \ 187 PX_ERR_BIT_HANDLE(hdl), \ 188 PX_ERPT_SEND(erpt), \ 189 PX_ERR_PEC_CLASS(bit) 190 px_err_bit_desc_t px_err_ilu_tbl[] = { 191 /* PEC ILU none - see io erpt doc, section 3.1 */ 192 { ILU_BIT_DESC(IHB_PE, fatal_gos, pec_ilu) } 193 }; 194 #define px_err_ilu_keys \ 195 (sizeof (px_err_ilu_tbl)) / (sizeof (px_err_bit_desc_t)) 196 197 /* 198 * PEC UE errors implementation is incomplete pending PCIE generic 199 * fabric rules. Must handle both PRIMARY and SECONDARY errors. 200 */ 201 /* pec ue errors */ 202 #define TLU_UC_BIT_DESC(bit, hdl, erpt) \ 203 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 204 0, \ 205 PX_ERR_BIT_HANDLE(hdl), \ 206 PX_ERPT_SEND(erpt), \ 207 PX_ERR_PEC_CLASS(bit) }, \ 208 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 209 0, \ 210 PX_ERR_BIT_HANDLE(hdl), \ 211 PX_ERPT_SEND(erpt), \ 212 PX_ERR_PEC_CLASS(bit) 213 px_err_bit_desc_t px_err_tlu_ue_tbl[] = { 214 /* PCI-E Receive Uncorrectable Errors - see io erpt doc, section 3.2 */ 215 { TLU_UC_BIT_DESC(UR, pciex_ue, pciex_rx_ue) }, 216 { TLU_UC_BIT_DESC(UC, pciex_ue, pciex_rx_ue) }, 217 218 /* PCI-E Transmit Uncorrectable Errors - see io erpt doc, section 3.3 */ 219 { TLU_UC_BIT_DESC(CTO, pciex_ue, pciex_tx_ue) }, 220 { TLU_UC_BIT_DESC(ROF, pciex_ue, pciex_tx_ue) }, 221 222 /* PCI-E Rx/Tx Uncorrectable Errors - see io erpt doc, section 3.4 */ 223 { TLU_UC_BIT_DESC(MFP, pciex_ue, pciex_rx_tx_ue) }, 224 { TLU_UC_BIT_DESC(PP, pciex_ue, pciex_rx_tx_ue) }, 225 226 /* Other PCI-E Uncorrectable Errors - see io erpt doc, section 3.5 */ 227 { TLU_UC_BIT_DESC(FCP, pciex_ue, pciex_ue) }, 228 { TLU_UC_BIT_DESC(DLP, pciex_ue, pciex_ue) }, 229 { TLU_UC_BIT_DESC(TE, pciex_ue, pciex_ue) }, 230 231 /* Not used */ 232 { TLU_UC_BIT_DESC(CA, pciex_ue, do_not) } 233 }; 234 #define px_err_tlu_ue_keys \ 235 (sizeof (px_err_tlu_ue_tbl)) / (sizeof (px_err_bit_desc_t)) 236 237 /* 238 * PEC CE errors implementation is incomplete pending PCIE generic 239 * fabric rules. 240 */ 241 /* pec ce errors */ 242 #define TLU_CE_BIT_DESC(bit, hdl, erpt) \ 243 TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 244 0, \ 245 PX_ERR_BIT_HANDLE(hdl), \ 246 PX_ERPT_SEND(erpt), \ 247 PX_ERR_PEC_CLASS(bit) }, \ 248 { TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 249 0, \ 250 PX_ERR_BIT_HANDLE(hdl), \ 251 PX_ERPT_SEND(erpt), \ 252 PX_ERR_PEC_CLASS(bit) 253 px_err_bit_desc_t px_err_tlu_ce_tbl[] = { 254 /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ 255 { TLU_CE_BIT_DESC(RTO, pciex_ce, pciex_ce) }, 256 { TLU_CE_BIT_DESC(RNR, pciex_ce, pciex_ce) }, 257 { TLU_CE_BIT_DESC(BDP, pciex_ce, pciex_ce) }, 258 { TLU_CE_BIT_DESC(BTP, pciex_ce, pciex_ce) }, 259 { TLU_CE_BIT_DESC(RE, pciex_ce, pciex_ce) } 260 }; 261 #define px_err_tlu_ce_keys \ 262 (sizeof (px_err_tlu_ce_tbl)) / (sizeof (px_err_bit_desc_t)) 263 264 /* pec oe errors */ 265 #define TLU_OE_BIT_DESC(bit, hdl, erpt) \ 266 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \ 267 0, \ 268 PX_ERR_BIT_HANDLE(hdl), \ 269 PX_ERPT_SEND(erpt), \ 270 PX_ERR_PEC_CLASS(bit) }, \ 271 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \ 272 0, \ 273 PX_ERR_BIT_HANDLE(hdl), \ 274 PX_ERPT_SEND(erpt), \ 275 PX_ERR_PEC_CLASS(bit) 276 px_err_bit_desc_t px_err_tlu_oe_tbl[] = { 277 /* 278 * TLU Other Event Status (receive only) - see io erpt doc, section 3.7 279 */ 280 { TLU_OE_BIT_DESC(MRC, fatal_hw, pciex_rx_oe) }, 281 282 /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ 283 { TLU_OE_BIT_DESC(WUC, non_fatal, pciex_rx_tx_oe) }, 284 { TLU_OE_BIT_DESC(RUC, non_fatal, pciex_rx_tx_oe) }, 285 { TLU_OE_BIT_DESC(CRS, non_fatal, pciex_rx_tx_oe) }, 286 287 /* TLU Other Event - see io erpt doc, section 3.9 */ 288 { TLU_OE_BIT_DESC(IIP, fatal_gos, pciex_oe) }, 289 { TLU_OE_BIT_DESC(EDP, fatal_gos, pciex_oe) }, 290 { TLU_OE_BIT_DESC(EHP, fatal_gos, pciex_oe) }, 291 { TLU_OE_BIT_DESC(LIN, non_fatal, pciex_oe) }, 292 { TLU_OE_BIT_DESC(LRS, non_fatal, pciex_oe) }, 293 { TLU_OE_BIT_DESC(LDN, tlu_ldn, pciex_oe) }, 294 { TLU_OE_BIT_DESC(LUP, tlu_lup, pciex_oe) }, 295 { TLU_OE_BIT_DESC(ERU, fatal_gos, pciex_oe) }, 296 { TLU_OE_BIT_DESC(ERO, fatal_gos, pciex_oe) }, 297 { TLU_OE_BIT_DESC(EMP, fatal_gos, pciex_oe) }, 298 { TLU_OE_BIT_DESC(EPE, fatal_gos, pciex_oe) }, 299 { TLU_OE_BIT_DESC(ERP, fatal_gos, pciex_oe) }, 300 { TLU_OE_BIT_DESC(EIP, fatal_gos, pciex_oe) } 301 }; 302 303 #define px_err_tlu_oe_keys \ 304 (sizeof (px_err_tlu_oe_tbl)) / (sizeof (px_err_bit_desc_t)) 305 306 /* 307 * All the following tables below are for LPU Interrupts. These interrupts 308 * are *NOT* error interrupts, but event status interrupts. 309 * 310 * These events are probably of most interest to: 311 * o Hotplug 312 * o Power Management 313 * o etc... 314 * 315 * There are also a few events that would be interresting for FMA. 316 * Again none of the regiseters below state that an error has occured 317 * or that data has been lost. If anything, they give status that an 318 * error is *about* to occur. examples 319 * o INT_SKP_ERR - indicates clock between fire and child is too far 320 * off and is most unlikely able to compensate 321 * o INT_TX_PAR_ERR - A parity error occured in ONE lane. This is 322 * HW recoverable, but will like end up as a future 323 * fabric error as well. 324 * 325 * For now, we don't care about any of these errors and should be ignore, 326 * but cleared. 327 */ 328 329 /* LPU Link Interrupt Table */ 330 #define LPUL_BIT_DESC(bit, hdl, erpt) \ 331 LPU_LINK_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 332 0, \ 333 NULL, \ 334 NULL, \ 335 "" 336 px_err_bit_desc_t px_err_lpul_tbl[] = { 337 { LPUL_BIT_DESC(LINK_ERR_ACT, NULL, NULL) } 338 }; 339 #define px_err_lpul_keys \ 340 (sizeof (px_err_lpul_tbl)) / (sizeof (px_err_bit_desc_t)) 341 342 /* LPU Physical Interrupt Table */ 343 #define LPUP_BIT_DESC(bit, hdl, erpt) \ 344 LPU_PHY_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 345 0, \ 346 NULL, \ 347 NULL, \ 348 "" 349 px_err_bit_desc_t px_err_lpup_tbl[] = { 350 { LPUP_BIT_DESC(PHY_LAYER_ERR, NULL, NULL) } 351 }; 352 #define px_err_lpup_keys \ 353 (sizeof (px_err_lpup_tbl)) / (sizeof (px_err_bit_desc_t)) 354 355 /* LPU Receive Interrupt Table */ 356 #define LPUR_BIT_DESC(bit, hdl, erpt) \ 357 LPU_RECEIVE_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 358 0, \ 359 NULL, \ 360 NULL, \ 361 "" 362 px_err_bit_desc_t px_err_lpur_tbl[] = { 363 { LPUR_BIT_DESC(RCV_PHY, NULL, NULL) } 364 }; 365 #define px_err_lpur_keys \ 366 (sizeof (px_err_lpur_tbl)) / (sizeof (px_err_bit_desc_t)) 367 368 /* LPU Transmit Interrupt Table */ 369 #define LPUX_BIT_DESC(bit, hdl, erpt) \ 370 LPU_TRANSMIT_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 371 0, \ 372 NULL, \ 373 NULL, \ 374 "" 375 px_err_bit_desc_t px_err_lpux_tbl[] = { 376 { LPUX_BIT_DESC(UNMSK, NULL, NULL) } 377 }; 378 #define px_err_lpux_keys \ 379 (sizeof (px_err_lpux_tbl)) / (sizeof (px_err_bit_desc_t)) 380 381 /* LPU LTSSM Interrupt Table */ 382 #define LPUS_BIT_DESC(bit, hdl, erpt) \ 383 LPU_LTSSM_INTERRUPT_AND_STATUS_INT_ ## bit, \ 384 0, \ 385 NULL, \ 386 NULL, \ 387 "" 388 px_err_bit_desc_t px_err_lpus_tbl[] = { 389 { LPUS_BIT_DESC(ANY, NULL, NULL) } 390 }; 391 #define px_err_lpus_keys \ 392 (sizeof (px_err_lpus_tbl)) / (sizeof (px_err_bit_desc_t)) 393 394 /* LPU Gigablaze Glue Interrupt Table */ 395 #define LPUG_BIT_DESC(bit, hdl, erpt) \ 396 LPU_GIGABLAZE_GLUE_INTERRUPT_AND_STATUS_INT_ ## bit, \ 397 0, \ 398 NULL, \ 399 NULL, \ 400 "" 401 px_err_bit_desc_t px_err_lpug_tbl[] = { 402 { LPUG_BIT_DESC(GLOBL_UNMSK, NULL, NULL) } 403 }; 404 #define px_err_lpug_keys \ 405 (sizeof (px_err_lpug_tbl)) / (sizeof (px_err_bit_desc_t)) 406 407 408 /* Mask and Tables */ 409 #define MnT6(pre) \ 410 B_FALSE, \ 411 &px_ ## pre ## _intr_mask, \ 412 &px_ ## pre ## _log_mask, \ 413 &px_ ## pre ## _count_mask, \ 414 px_err_ ## pre ## _tbl, \ 415 px_err_ ## pre ## _keys, \ 416 0 417 418 /* LPU Registers Addresses */ 419 #define LR4(pre) \ 420 NULL, \ 421 LPU_ ## pre ## _INTERRUPT_MASK, \ 422 LPU_ ## pre ## _INTERRUPT_AND_STATUS, \ 423 LPU_ ## pre ## _INTERRUPT_AND_STATUS 424 425 /* LPU Registers Addresses with Irregularities */ 426 #define LR4_FIXME(pre) \ 427 NULL, \ 428 LPU_ ## pre ## _INTERRUPT_MASK, \ 429 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS, \ 430 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS 431 432 /* TLU Registers Addresses */ 433 #define TR4(pre) \ 434 TLU_ ## pre ## _LOG_ENABLE, \ 435 TLU_ ## pre ## _INTERRUPT_ENABLE, \ 436 TLU_ ## pre ## _INTERRUPT_STATUS, \ 437 TLU_ ## pre ## _STATUS_CLEAR 438 439 /* Registers Addresses for JBC, MMU, IMU and ILU */ 440 #define R4(pre) \ 441 pre ## _ERROR_LOG_ENABLE, \ 442 pre ## _INTERRUPT_ENABLE, \ 443 pre ## _INTERRUPT_STATUS, \ 444 pre ## _ERROR_STATUS_CLEAR 445 446 /* 447 * Register error handling tables. 448 * The ID Field (first field) is identified by an enum px_err_id_t. 449 * It is located in px_err.h 450 */ 451 px_err_reg_desc_t px_err_reg_tbl[] = { 452 { MnT6(cb), R4(JBC), "JBC Error"}, 453 { MnT6(mmu), R4(MMU), "MMU Error"}, 454 { MnT6(imu), R4(IMU), "IMU Error"}, 455 { MnT6(tlu_ue), TR4(UNCORRECTABLE_ERROR), "TLU UE"}, 456 { MnT6(tlu_ce), TR4(CORRECTABLE_ERROR), "TLU CE"}, 457 { MnT6(tlu_oe), TR4(OTHER_EVENT), "TLU OE"}, 458 { MnT6(ilu), R4(ILU), "ILU Error"}, 459 { MnT6(lpul), LR4(LINK_LAYER), "LPU Link Layer"}, 460 { MnT6(lpup), LR4_FIXME(PHY), "LPU Phy Layer"}, 461 { MnT6(lpur), LR4(RECEIVE_PHY), "LPU RX Phy Layer"}, 462 { MnT6(lpux), LR4(TRANSMIT_PHY), "LPU TX Phy Layer"}, 463 { MnT6(lpus), LR4(LTSSM), "LPU LTSSM"}, 464 { MnT6(lpug), LR4(GIGABLAZE_GLUE), "LPU GigaBlaze Glue"} 465 }; 466 #define PX_ERR_REG_KEYS (sizeof (px_err_reg_tbl)) / (sizeof (px_err_reg_tbl[0])) 467 468 typedef struct px_err_ss { 469 uint64_t err_status[PX_ERR_REG_KEYS]; 470 } px_err_ss_t; 471 472 static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, boolean_t chkjbc); 473 static int px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, 474 px_err_ss_t *ss); 475 static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, 476 int err, int caller); 477 478 /* 479 * px_err_cb_intr: 480 * Interrupt handler for the JBC block. 481 * o lock 482 * o create derr 483 * o px_err_handle(leaf1, with jbc) 484 * o px_err_handle(leaf2, without jbc) 485 * o dispatch (leaf1) 486 * o dispatch (leaf2) 487 * o unlock 488 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 489 */ 490 uint_t 491 px_err_cb_intr(caddr_t arg) 492 { 493 px_fault_t *px_fault_p = (px_fault_t *)arg; 494 dev_info_t *rpdip = px_fault_p->px_fh_dip; 495 px_t *px_p = DIP_TO_STATE(rpdip); 496 int err = PX_OK; 497 int ret = DDI_FM_OK; 498 int fatal = 0; 499 ddi_fm_error_t derr; 500 501 /* Create the derr */ 502 bzero(&derr, sizeof (ddi_fm_error_t)); 503 derr.fme_version = DDI_FME_VERSION; 504 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 505 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 506 507 mutex_enter(&px_p->px_fm_mutex); 508 509 err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); 510 511 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 512 switch (ret) { 513 case DDI_FM_FATAL: 514 fatal++; 515 break; 516 case DDI_FM_NONFATAL: 517 case DDI_FM_UNKNOWN: 518 default: 519 break; 520 } 521 522 /* Set the intr state to idle for the leaf that received the mondo */ 523 524 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 525 INTR_IDLE_STATE); 526 527 mutex_exit(&px_p->px_fm_mutex); 528 529 /* 530 * PX_FATAL_HW error is diagnosed after system recovered from 531 * HW initiated reset, therefore no furthur handling is required. 532 */ 533 if (fatal || err & (PX_FATAL_GOS | PX_FATAL_SW)) 534 PX_FM_PANIC("Fatal System Bus Error has occurred\n"); 535 536 return (DDI_INTR_CLAIMED); 537 } 538 539 /* 540 * px_err_dmc_pec_intr: 541 * Interrupt handler for the DMC/PEC block. 542 * o lock 543 * o create derr 544 * o px_err_handle(leaf, with jbc) 545 * o dispatch (leaf) 546 * o unlock 547 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 548 */ 549 uint_t 550 px_err_dmc_pec_intr(caddr_t arg) 551 { 552 px_fault_t *px_fault_p = (px_fault_t *)arg; 553 dev_info_t *rpdip = px_fault_p->px_fh_dip; 554 px_t *px_p = DIP_TO_STATE(rpdip); 555 int err = PX_OK; 556 int ret = DDI_FM_OK; 557 ddi_fm_error_t derr; 558 559 /* Create the derr */ 560 bzero(&derr, sizeof (ddi_fm_error_t)); 561 derr.fme_version = DDI_FME_VERSION; 562 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 563 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 564 565 mutex_enter(&px_p->px_fm_mutex); 566 567 /* send ereport/handle/clear fire registers */ 568 err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); 569 570 /* Check all child devices for errors */ 571 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 572 573 /* Set the interrupt state to idle */ 574 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 575 INTR_IDLE_STATE); 576 577 mutex_exit(&px_p->px_fm_mutex); 578 579 /* 580 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, 581 * therefore it does not cause panic. 582 */ 583 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) 584 PX_FM_PANIC("Fatal System Port Error has occurred\n"); 585 586 return (DDI_INTR_CLAIMED); 587 } 588 589 /* 590 * Error register are being handled by px_hlib xxx_init functions. 591 * They are also called again by px_err_add_intr for mondo62 and 63 592 * from px_cb_attach and px_attach 593 */ 594 void 595 px_err_reg_enable(px_t *px_p, px_err_id_t id) 596 { 597 px_err_reg_desc_t *reg_desc = &px_err_reg_tbl[id]; 598 uint64_t intr_mask = *reg_desc->intr_mask_p; 599 uint64_t log_mask = *reg_desc->log_mask_p; 600 caddr_t csr_base; 601 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 602 603 if (id == PX_ERR_JBC) 604 csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC]; 605 else 606 csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR]; 607 608 reg_desc->enabled = B_TRUE; 609 610 /* Enable logs if it exists */ 611 if (reg_desc->log_addr != NULL) 612 CSR_XS(csr_base, reg_desc->log_addr, log_mask); 613 614 /* 615 * For readability you in code you set 1 to enable an interrupt. 616 * But in Fire it's backwards. You set 1 to *disable* an intr. 617 * Reverse the user tunable intr mask field. 618 * 619 * Disable All Errors 620 * Clear All Errors 621 * Enable Errors 622 */ 623 CSR_XS(csr_base, reg_desc->enable_addr, 0); 624 CSR_XS(csr_base, reg_desc->clear_addr, -1); 625 CSR_XS(csr_base, reg_desc->enable_addr, intr_mask); 626 DBG(DBG_ATTACH, NULL, "%s Mask: 0x%llx\n", 627 reg_desc->msg, CSR_XR(csr_base, reg_desc->enable_addr)); 628 DBG(DBG_ATTACH, NULL, "%s Status: 0x%llx\n", 629 reg_desc->msg, CSR_XR(csr_base, reg_desc->status_addr)); 630 DBG(DBG_ATTACH, NULL, "%s Clear: 0x%llx\n", 631 reg_desc->msg, CSR_XR(csr_base, reg_desc->clear_addr)); 632 if (reg_desc->log_addr != NULL) { 633 DBG(DBG_ATTACH, NULL, "%s Log: 0x%llx\n", 634 reg_desc->msg, CSR_XR(csr_base, reg_desc->log_addr)); 635 } 636 } 637 638 void 639 px_err_reg_disable(px_t *px_p, px_err_id_t id) 640 { 641 px_err_reg_desc_t *reg_desc = &px_err_reg_tbl[id]; 642 caddr_t csr_base; 643 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 644 645 if (id == PX_ERR_JBC) 646 csr_base = (caddr_t)(uintptr_t)pxu_p->px_address[PX_REG_XBC]; 647 else 648 csr_base = (caddr_t)(uintptr_t)pxu_p->px_address[PX_REG_CSR]; 649 650 reg_desc->enabled = B_FALSE; 651 652 switch (id) { 653 case PX_ERR_JBC: 654 case PX_ERR_MMU: 655 case PX_ERR_IMU: 656 case PX_ERR_TLU_UE: 657 case PX_ERR_TLU_CE: 658 case PX_ERR_TLU_OE: 659 case PX_ERR_ILU: 660 if (reg_desc->log_addr != NULL) { 661 CSR_XS(csr_base, reg_desc->log_addr, 0); 662 } 663 CSR_XS(csr_base, reg_desc->enable_addr, 0); 664 break; 665 case PX_ERR_LPU_LINK: 666 case PX_ERR_LPU_PHY: 667 case PX_ERR_LPU_RX: 668 case PX_ERR_LPU_TX: 669 case PX_ERR_LPU_LTSSM: 670 case PX_ERR_LPU_GIGABLZ: 671 if (reg_desc->log_addr != NULL) { 672 CSR_XS(csr_base, reg_desc->log_addr, -1); 673 } 674 CSR_XS(csr_base, reg_desc->enable_addr, -1); 675 break; 676 } 677 } 678 679 /* 680 * px_err_handle: 681 * Common function called by trap, mondo and fabric intr. 682 * o Snap shot current fire registers 683 * o check for safe access 684 * o send ereport and clear snap shot registers 685 * o check severity of snap shot registers 686 * 687 * @param px_p leaf in which to check access 688 * @param derr fm err data structure to be updated 689 * @param caller PX_TRAP_CALL | PX_INTR_CALL 690 * @param chkjbc whether to handle jbc registers 691 * @return err PX_OK | PX_NONFATAL | 692 * PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL 693 */ 694 int 695 px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, 696 boolean_t chkjbc) 697 { 698 px_err_ss_t ss; 699 int err = PX_OK; 700 701 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 702 703 /* snap shot the current fire registers */ 704 px_err_snapshot(px_p, &ss, chkjbc); 705 706 /* check for safe access */ 707 px_err_safeacc_check(px_p, derr); 708 709 /* send ereports/handle/clear registers */ 710 err = px_err_erpt_and_clr(px_p, derr, &ss); 711 712 /* check for error severity */ 713 err = px_err_check_severity(px_p, derr, err, caller); 714 715 /* Mark the On Trap Handle if an error occured */ 716 if (err != PX_OK) { 717 px_pec_t *pec_p = px_p->px_pec_p; 718 on_trap_data_t *otd = pec_p->pec_ontrap_data; 719 720 if ((otd != NULL) && (otd->ot_prot & OT_DATA_ACCESS)) 721 otd->ot_trap |= OT_DATA_ACCESS; 722 } 723 724 return (err); 725 } 726 727 /* 728 * Static function 729 */ 730 731 /* 732 * px_err_snapshot: 733 * Take a current snap shot of all the fire error registers. This includes 734 * JBC, DMC, and PEC, unless chkjbc == false; 735 * 736 * @param px_p leaf in which to take the snap shot. 737 * @param ss pre-allocated memory to store the snap shot. 738 * @param chkjbc boolean on whether to store jbc register. 739 */ 740 static void 741 px_err_snapshot(px_t *px_p, px_err_ss_t *ss, boolean_t chkjbc) 742 { 743 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 744 caddr_t xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC]; 745 caddr_t pec_csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR]; 746 px_err_reg_desc_t *reg_desc; 747 int reg_id; 748 749 /* snapshot JBC interrupt status */ 750 reg_id = PX_ERR_JBC; 751 if (chkjbc == B_TRUE) { 752 reg_desc = &px_err_reg_tbl[reg_id]; 753 ss->err_status[reg_id] = CSR_XR(xbc_csr_base, 754 reg_desc->status_addr); 755 } else { 756 ss->err_status[reg_id] = 0; 757 } 758 759 /* snapshot DMC/PEC interrupt status */ 760 for (reg_id = 1; reg_id < PX_ERR_REG_KEYS; reg_id += 1) { 761 reg_desc = &px_err_reg_tbl[reg_id]; 762 ss->err_status[reg_id] = CSR_XR(pec_csr_base, 763 reg_desc->status_addr); 764 } 765 } 766 767 /* 768 * px_err_erpt_and_clr: 769 * This function does the following thing to all the fire registers based 770 * on an earlier snap shot. 771 * o Send ereport 772 * o Handle the error 773 * o Clear the error 774 * 775 * @param px_p leaf in which to take the snap shot. 776 * @param derr fm err in which the ereport is to be based on 777 * @param ss pre-allocated memory to store the snap shot. 778 */ 779 static int 780 px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss) 781 { 782 dev_info_t *rpdip = px_p->px_dip; 783 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 784 caddr_t csr_base; 785 px_err_reg_desc_t *err_reg_tbl; 786 px_err_bit_desc_t *err_bit_tbl; 787 px_err_bit_desc_t *err_bit_desc; 788 789 uint64_t *log_mask, *count_mask; 790 uint64_t status_addr, clear_addr; 791 uint64_t ss_reg; 792 793 int (*err_handler)(); 794 int (*erpt_handler)(); 795 int reg_id, key; 796 int err = PX_OK; 797 int biterr; 798 799 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 800 801 /* send erport/handle/clear JBC errors */ 802 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id += 1) { 803 /* Get the correct register description table */ 804 err_reg_tbl = &px_err_reg_tbl[reg_id]; 805 806 /* Get the correct CSR BASE */ 807 if (reg_id == PX_ERR_JBC) { 808 csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC]; 809 } else { 810 csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR]; 811 } 812 813 /* Get pointers to masks and register addresses */ 814 log_mask = err_reg_tbl->log_mask_p; 815 count_mask = err_reg_tbl->count_mask_p; 816 status_addr = err_reg_tbl->status_addr; 817 clear_addr = err_reg_tbl->clear_addr; 818 ss_reg = ss->err_status[reg_id]; 819 820 /* Get the register BIT description table */ 821 err_bit_tbl = err_reg_tbl->err_bit_tbl; 822 823 /* For each known bit in the register send erpt and handle */ 824 for (key = 0; key < err_reg_tbl->err_bit_keys; key += 1) { 825 /* Get the bit description table for this register */ 826 err_bit_desc = &err_bit_tbl[key]; 827 828 /* 829 * If the ss_reg is set for this bit, 830 * send ereport and handle 831 */ 832 if (BIT_TST(ss_reg, err_bit_desc->bit)) { 833 /* Increment the counter if necessary */ 834 if (BIT_TST(*count_mask, err_bit_desc->bit)) { 835 err_bit_desc->counter++; 836 } 837 838 /* Error Handle for this bit */ 839 err_handler = err_bit_desc->err_handler; 840 if (err_handler) { 841 biterr = err_handler(rpdip, 842 csr_base, 843 derr, 844 err_reg_tbl, 845 err_bit_desc); 846 err |= biterr; 847 } 848 849 /* Send the ereport if it's an UNEXPECTED err */ 850 erpt_handler = err_bit_desc->erpt_handler; 851 if ((derr->fme_flag == DDI_FM_ERR_UNEXPECTED) && 852 (biterr != PX_OK)) { 853 if (erpt_handler) 854 (void) erpt_handler(rpdip, 855 csr_base, 856 ss_reg, 857 derr, 858 err_bit_desc->bit, 859 err_bit_desc->class_name); 860 } 861 } 862 863 } 864 865 /* Print register status */ 866 if (ss_reg & *log_mask) 867 DBG(DBG_ERR_INTR, rpdip, "<%x>=%16llx %s\n", 868 status_addr, ss_reg, err_reg_tbl->msg); 869 870 /* Clear the register and error */ 871 CSR_XS(csr_base, clear_addr, ss_reg); 872 } 873 874 return (err); 875 } 876 877 /* 878 * px_err_check_severity: 879 * Check the severity of the fire error based on an earlier snapshot 880 * 881 * @param px_p leaf in which to take the snap shot. 882 * @param derr fm err in which the ereport is to be based on 883 * @param ss pre-allocated memory to store the snap shot. 884 */ 885 static int 886 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, int err, int caller) 887 { 888 px_pec_t *pec_p = px_p->px_pec_p; 889 boolean_t is_safeacc = B_FALSE; 890 891 /* nothing to do if called with no error */ 892 if (err == PX_OK) 893 return (err); 894 895 /* Cautious access error handling */ 896 switch (derr->fme_flag) { 897 case DDI_FM_ERR_EXPECTED: 898 if (caller == PX_TRAP_CALL) { 899 /* 900 * for ddi_caut_get treat all events as nonfatal 901 * The trampoline will set err_ena = 0, 902 * err_status = NONFATAL. 903 */ 904 derr->fme_status = DDI_FM_NONFATAL; 905 is_safeacc = B_TRUE; 906 } else { 907 /* 908 * For ddi_caut_put treat all events as nonfatal. Here 909 * we have the handle and can call ndi_fm_acc_err_set(). 910 */ 911 derr->fme_status = DDI_FM_NONFATAL; 912 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 913 is_safeacc = B_TRUE; 914 } 915 break; 916 case DDI_FM_ERR_PEEK: 917 case DDI_FM_ERR_POKE: 918 /* 919 * For ddi_peek/poke treat all events as nonfatal. 920 */ 921 is_safeacc = B_TRUE; 922 break; 923 default: 924 is_safeacc = B_FALSE; 925 } 926 927 /* 928 * The third argument "err" is passed in as error status from checking 929 * Fire register, re-adjust error status from safe access. 930 */ 931 if (is_safeacc && !(err & PX_FATAL_GOS)) 932 return (PX_NONFATAL); 933 934 return (err); 935 } 936 937 /* predefined convenience functions */ 938 /* ARGSUSED */ 939 int 940 px_err_fatal_hw_handle(dev_info_t *rpdip, caddr_t csr_base, 941 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 942 px_err_bit_desc_t *err_bit_descr) 943 { 944 return (PX_FATAL_HW); 945 } 946 947 /* ARGSUSED */ 948 int 949 px_err_fatal_gos_handle(dev_info_t *rpdip, caddr_t csr_base, 950 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 951 px_err_bit_desc_t *err_bit_descr) 952 { 953 return (PX_FATAL_GOS); 954 } 955 956 /* ARGSUSED */ 957 int 958 px_err_fatal_stuck_handle(dev_info_t *rpdip, caddr_t csr_base, 959 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 960 px_err_bit_desc_t *err_bit_descr) 961 { 962 return (PX_STUCK_FATAL); 963 } 964 965 /* ARGSUSED */ 966 int 967 px_err_fatal_sw_handle(dev_info_t *rpdip, caddr_t csr_base, 968 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 969 px_err_bit_desc_t *err_bit_descr) 970 { 971 return (PX_FATAL_SW); 972 } 973 974 /* ARGSUSED */ 975 int 976 px_err_non_fatal_handle(dev_info_t *rpdip, caddr_t csr_base, 977 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 978 px_err_bit_desc_t *err_bit_descr) 979 { 980 return (PX_NONFATAL); 981 } 982 983 /* ARGSUSED */ 984 int 985 px_err_ok_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, 986 px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) 987 { 988 return (PX_OK); 989 } 990 991 /* ARGSUSED */ 992 int 993 px_err_unknown_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, 994 px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) 995 { 996 return (PX_ERR_UNKNOWN); 997 } 998 999 /* ARGSUSED */ 1000 PX_ERPT_SEND_DEC(do_not) 1001 { 1002 return (PX_OK); 1003 } 1004 1005 1006 /* JBC FATAL - see io erpt doc, section 1.1 */ 1007 PX_ERPT_SEND_DEC(jbc_fatal) 1008 { 1009 char buf[FM_MAX_CLASS]; 1010 boolean_t pri = PX_ERR_IS_PRI(bit); 1011 1012 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1013 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1014 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1015 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1016 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1017 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1018 FIRE_JBC_IE, DATA_TYPE_UINT64, 1019 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1020 FIRE_JBC_IS, DATA_TYPE_UINT64, 1021 ss_reg, 1022 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1023 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1024 FIRE_JBC_FEL1, DATA_TYPE_UINT64, 1025 CSR_XR(csr_base, FATAL_ERROR_LOG_1), 1026 FIRE_JBC_FEL2, DATA_TYPE_UINT64, 1027 CSR_XR(csr_base, FATAL_ERROR_LOG_2), 1028 NULL); 1029 1030 return (PX_OK); 1031 } 1032 1033 /* JBC MERGE - see io erpt doc, section 1.2 */ 1034 PX_ERPT_SEND_DEC(jbc_merge) 1035 { 1036 char buf[FM_MAX_CLASS]; 1037 boolean_t pri = PX_ERR_IS_PRI(bit); 1038 1039 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1040 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1041 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1042 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1043 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1044 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1045 FIRE_JBC_IE, DATA_TYPE_UINT64, 1046 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1047 FIRE_JBC_IS, DATA_TYPE_UINT64, 1048 ss_reg, 1049 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1050 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1051 FIRE_JBC_MTEL, DATA_TYPE_UINT64, 1052 CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG), 1053 NULL); 1054 1055 return (PX_OK); 1056 } 1057 1058 /* 1059 * JBC Merge buffer nonfatal errors: 1060 * Merge buffer parity error (rd_buf): dma:read:M:nonfatal 1061 * Merge buffer parity error (wr_buf): dma:write:M:nonfatal 1062 */ 1063 /* ARGSUSED */ 1064 int 1065 px_err_jbc_merge_handle(dev_info_t *rpdip, caddr_t csr_base, 1066 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1067 px_err_bit_desc_t *err_bit_descr) 1068 { 1069 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1070 uint64_t paddr; 1071 int ret; 1072 1073 if (!pri) 1074 return (PX_FATAL_GOS); 1075 1076 paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG); 1077 paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1078 1079 ret = px_handle_lookup( 1080 rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); 1081 1082 return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); 1083 } 1084 1085 /* JBC Jbusint IN - see io erpt doc, section 1.3 */ 1086 PX_ERPT_SEND_DEC(jbc_in) 1087 { 1088 char buf[FM_MAX_CLASS]; 1089 boolean_t pri = PX_ERR_IS_PRI(bit); 1090 1091 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1092 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1093 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1094 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1095 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1096 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1097 FIRE_JBC_IE, DATA_TYPE_UINT64, 1098 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1099 FIRE_JBC_IS, DATA_TYPE_UINT64, 1100 ss_reg, 1101 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1102 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1103 FIRE_JBC_JITEL1, DATA_TYPE_UINT64, 1104 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG), 1105 FIRE_JBC_JITEL2, DATA_TYPE_UINT64, 1106 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG_2), 1107 NULL); 1108 1109 return (PX_OK); 1110 } 1111 1112 /* 1113 * JBC Jbusint IN nonfatal errors: PA logged in Jbusint In Transaction Error 1114 * Log Reg[42:0]. 1115 * CE async fault error: nonfatal 1116 * Jbus bus error: dma::nonfatal 1117 * Jbus unmapped error: pio|dma:rdwr:M:nonfatal 1118 * Write data parity error: pio/write:M:nonfatal 1119 * Read data parity error: pio/read:M:nonfatal 1120 * Illegal NCWR bytemask: pio:write:M:nonfatal 1121 * Illegal NCRD bytemask: pio:write:M:nonfatal 1122 * Invalid jbus transaction: nonfatal 1123 */ 1124 /* ARGSUSED */ 1125 int 1126 px_err_jbc_jbusint_in_handle(dev_info_t *rpdip, caddr_t csr_base, 1127 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1128 px_err_bit_desc_t *err_bit_descr) 1129 { 1130 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1131 uint64_t paddr; 1132 int ret; 1133 1134 if (!pri) 1135 return (PX_FATAL_GOS); 1136 1137 paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG); 1138 paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1139 1140 ret = px_handle_lookup( 1141 rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); 1142 1143 return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); 1144 } 1145 1146 1147 /* JBC Jbusint Out - see io erpt doc, section 1.4 */ 1148 PX_ERPT_SEND_DEC(jbc_out) 1149 { 1150 char buf[FM_MAX_CLASS]; 1151 boolean_t pri = PX_ERR_IS_PRI(bit); 1152 1153 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1154 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1155 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1156 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1157 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1158 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1159 FIRE_JBC_IE, DATA_TYPE_UINT64, 1160 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1161 FIRE_JBC_IS, DATA_TYPE_UINT64, 1162 ss_reg, 1163 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1164 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1165 FIRE_JBC_JOTEL1, DATA_TYPE_UINT64, 1166 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG), 1167 FIRE_JBC_JOTEL2, DATA_TYPE_UINT64, 1168 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG_2), 1169 NULL); 1170 1171 return (PX_OK); 1172 } 1173 1174 /* JBC Dmcint ODCD - see io erpt doc, section 1.5 */ 1175 PX_ERPT_SEND_DEC(jbc_odcd) 1176 { 1177 char buf[FM_MAX_CLASS]; 1178 boolean_t pri = PX_ERR_IS_PRI(bit); 1179 1180 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1181 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1182 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1183 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1184 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1185 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1186 FIRE_JBC_IE, DATA_TYPE_UINT64, 1187 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1188 FIRE_JBC_IS, DATA_TYPE_UINT64, 1189 ss_reg, 1190 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1191 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1192 FIRE_JBC_DMC_ODCD, DATA_TYPE_UINT64, 1193 CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG), 1194 NULL); 1195 1196 return (PX_OK); 1197 } 1198 1199 /* 1200 * JBC Dmcint ODCO nonfatal errer handling - 1201 * Unmapped PIO read error: pio:read:M:nonfatal 1202 * Unmapped PIO write error: pio:write:M:nonfatal 1203 * PIO data parity error: pio:write:M:nonfatal 1204 * Invalid PIO write to PCIe cfg/io, csr, ebus or i2c bus: pio:write:nonfatal 1205 * Invalid PIO read to PCIe cfg/io, csr, ebus or i2c bus: pio:read:nonfatal 1206 */ 1207 /* ARGSUSED */ 1208 int 1209 px_err_jbc_dmcint_odcd_handle(dev_info_t *rpdip, caddr_t csr_base, 1210 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1211 px_err_bit_desc_t *err_bit_descr) 1212 { 1213 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1214 uint64_t paddr; 1215 int ret; 1216 1217 if (!pri) 1218 return (PX_FATAL_GOS); 1219 1220 paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG); 1221 paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK; 1222 1223 ret = px_handle_lookup( 1224 rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); 1225 1226 return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); 1227 } 1228 1229 /* JBC Dmcint IDC - see io erpt doc, section 1.6 */ 1230 PX_ERPT_SEND_DEC(jbc_idc) 1231 { 1232 char buf[FM_MAX_CLASS]; 1233 boolean_t pri = PX_ERR_IS_PRI(bit); 1234 1235 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1236 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1237 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1238 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1239 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1240 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1241 FIRE_JBC_IE, DATA_TYPE_UINT64, 1242 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1243 FIRE_JBC_IS, DATA_TYPE_UINT64, 1244 ss_reg, 1245 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1246 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1247 FIRE_JBC_DMC_IDC, DATA_TYPE_UINT64, 1248 CSR_XR(csr_base, DMCINT_IDC_ERROR_LOG), 1249 NULL); 1250 1251 return (PX_OK); 1252 } 1253 1254 /* JBC CSR - see io erpt doc, section 1.7 */ 1255 PX_ERPT_SEND_DEC(jbc_csr) 1256 { 1257 char buf[FM_MAX_CLASS]; 1258 boolean_t pri = PX_ERR_IS_PRI(bit); 1259 1260 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1261 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1262 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1263 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1264 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1265 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1266 FIRE_JBC_IE, DATA_TYPE_UINT64, 1267 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1268 FIRE_JBC_IS, DATA_TYPE_UINT64, 1269 ss_reg, 1270 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1271 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1272 "jbc-error-reg", DATA_TYPE_UINT64, 1273 CSR_XR(csr_base, CSR_ERROR_LOG), 1274 NULL); 1275 1276 return (PX_OK); 1277 } 1278 1279 /* 1280 * JBC CSR errer handling - 1281 * Ebus ready timeout error: pio:rdwr:M:nonfatal 1282 */ 1283 /* ARGSUSED */ 1284 int 1285 px_err_jbc_csr_handle(dev_info_t *rpdip, caddr_t csr_base, 1286 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1287 px_err_bit_desc_t *err_bit_descr) 1288 { 1289 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1290 uint64_t paddr; 1291 int ret; 1292 1293 if (!pri) 1294 return (PX_FATAL_GOS); 1295 1296 paddr = CSR_XR(csr_base, CSR_ERROR_LOG); 1297 paddr &= CSR_ERROR_LOG_ADDRESS_MASK; 1298 1299 ret = px_handle_lookup( 1300 rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); 1301 1302 return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); 1303 } 1304 1305 /* JBC Dmcint IDC - see io erpt doc, section 1.6 */ 1306 1307 /* DMC IMU RDS - see io erpt doc, section 2.1 */ 1308 PX_ERPT_SEND_DEC(imu_rds) 1309 { 1310 char buf[FM_MAX_CLASS]; 1311 boolean_t pri = PX_ERR_IS_PRI(bit); 1312 1313 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1314 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1315 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1316 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1317 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1318 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1319 FIRE_IMU_IE, DATA_TYPE_UINT64, 1320 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1321 FIRE_IMU_IS, DATA_TYPE_UINT64, 1322 ss_reg, 1323 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1324 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1325 FIRE_IMU_RDS, DATA_TYPE_UINT64, 1326 CSR_XR(csr_base, IMU_RDS_ERROR_LOG), 1327 NULL); 1328 1329 return (PX_OK); 1330 } 1331 1332 /* imu function to handle all Received but Not Enabled errors */ 1333 /* ARGSUSED */ 1334 int 1335 px_err_imu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, 1336 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1337 px_err_bit_desc_t *err_bit_descr) 1338 { 1339 uint64_t imu_log_enable, imu_intr_enable; 1340 int mask = BITMASK(err_bit_descr->bit); 1341 int err = PX_NONFATAL; 1342 1343 imu_log_enable = CSR_XR(csr_base, err_reg_descr->log_addr); 1344 imu_intr_enable = CSR_XR(csr_base, err_reg_descr->enable_addr); 1345 1346 /* 1347 * If matching bit is not set, meaning corresponding rbne not 1348 * enabled, then receiving it indicates some sort of malfunction 1349 * possibly in hardware. 1350 * 1351 * Other wise, software may have intentionally disabled certain 1352 * errors for a period of time within which the occuring of the 1353 * disabled errors become rbne, that is non fatal. 1354 */ 1355 if (!(imu_log_enable & imu_intr_enable & mask)) 1356 err = PX_FATAL_SW; 1357 1358 return (err); 1359 } 1360 1361 /* 1362 * No platforms uses PME. Any PME received is simply logged 1363 * for analysis. 1364 */ 1365 /* ARGSUSED */ 1366 int 1367 px_err_imu_pme_handle(dev_info_t *rpdip, caddr_t csr_base, 1368 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1369 px_err_bit_desc_t *err_bit_descr) 1370 { 1371 px_t *px_p = DIP_TO_STATE(rpdip); 1372 1373 px_p->px_pme_ignored++; 1374 return (PX_NONFATAL); 1375 } 1376 1377 /* handle EQ overflow */ 1378 /* ARGSUSED */ 1379 int 1380 px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base, 1381 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1382 px_err_bit_desc_t *err_bit_descr) 1383 { 1384 px_t *px_p = DIP_TO_STATE(rpdip); 1385 px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state; 1386 msiqid_t eqno; 1387 pci_msiq_state_t msiq_state; 1388 int err = PX_NONFATAL; 1389 int i; 1390 1391 eqno = msiq_state_p->msiq_1st_msiq_id; 1392 for (i = 0; i < msiq_state_p->msiq_cnt; i++) { 1393 if (px_lib_msiq_getstate(rpdip, eqno, &msiq_state) == 1394 DDI_SUCCESS) { 1395 if (msiq_state == PCI_MSIQ_STATE_ERROR) { 1396 err = PX_FATAL_SW; 1397 } 1398 } 1399 } 1400 1401 return (err); 1402 } 1403 1404 /* DMC IMU SCS - see io erpt doc, section 2.2 */ 1405 PX_ERPT_SEND_DEC(imu_scs) 1406 { 1407 char buf[FM_MAX_CLASS]; 1408 boolean_t pri = PX_ERR_IS_PRI(bit); 1409 1410 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1411 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1412 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1413 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1414 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1415 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1416 FIRE_IMU_IE, DATA_TYPE_UINT64, 1417 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1418 FIRE_IMU_IS, DATA_TYPE_UINT64, 1419 ss_reg, 1420 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1421 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1422 FIRE_IMU_SCS, DATA_TYPE_UINT64, 1423 CSR_XR(csr_base, IMU_SCS_ERROR_LOG), 1424 NULL); 1425 1426 return (PX_OK); 1427 } 1428 1429 /* DMC IMU - see io erpt doc, section 2.3 */ 1430 PX_ERPT_SEND_DEC(imu) 1431 { 1432 char buf[FM_MAX_CLASS]; 1433 boolean_t pri = PX_ERR_IS_PRI(bit); 1434 1435 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1436 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1437 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1438 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1439 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1440 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1441 FIRE_IMU_IE, DATA_TYPE_UINT64, 1442 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1443 FIRE_IMU_IS, DATA_TYPE_UINT64, 1444 ss_reg, 1445 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1446 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1447 NULL); 1448 1449 return (PX_OK); 1450 } 1451 1452 /* DMC MMU TFAR/TFSR - see io erpt doc, section 2.4 */ 1453 PX_ERPT_SEND_DEC(mmu_tfar_tfsr) 1454 { 1455 char buf[FM_MAX_CLASS]; 1456 boolean_t pri = PX_ERR_IS_PRI(bit); 1457 1458 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1459 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1460 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1461 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1462 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1463 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1464 FIRE_MMU_IE, DATA_TYPE_UINT64, 1465 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1466 FIRE_MMU_IS, DATA_TYPE_UINT64, 1467 ss_reg, 1468 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1469 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1470 FIRE_MMU_TFAR, DATA_TYPE_UINT64, 1471 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS), 1472 FIRE_MMU_TFSR, DATA_TYPE_UINT64, 1473 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS), 1474 NULL); 1475 1476 return (PX_OK); 1477 } 1478 1479 /* DMC MMU - see io erpt doc, section 2.5 */ 1480 PX_ERPT_SEND_DEC(mmu) 1481 { 1482 char buf[FM_MAX_CLASS]; 1483 boolean_t pri = PX_ERR_IS_PRI(bit); 1484 1485 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1486 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1487 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1488 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1489 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1490 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1491 FIRE_MMU_IE, DATA_TYPE_UINT64, 1492 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1493 FIRE_MMU_IS, DATA_TYPE_UINT64, 1494 ss_reg, 1495 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1496 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1497 NULL); 1498 1499 return (PX_OK); 1500 } 1501 1502 /* imu function to handle all Received but Not Enabled errors */ 1503 int 1504 px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, 1505 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1506 px_err_bit_desc_t *err_bit_descr) 1507 { 1508 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1509 uint64_t mmu_log_enable, mmu_intr_enable; 1510 uint64_t mask = BITMASK(err_bit_descr->bit); 1511 uint64_t mmu_tfa, mmu_ctrl; 1512 uint64_t mmu_enable_bit = 0; 1513 int err = PX_NONFATAL; 1514 int ret; 1515 1516 mmu_log_enable = CSR_XR(csr_base, err_reg_descr->log_addr); 1517 mmu_intr_enable = CSR_XR(csr_base, err_reg_descr->enable_addr); 1518 1519 mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); 1520 mmu_ctrl = CSR_XR(csr_base, MMU_CONTROL_AND_STATUS); 1521 1522 switch (err_bit_descr->bit) { 1523 case MMU_INTERRUPT_STATUS_BYP_ERR_P: 1524 mmu_enable_bit = BITMASK(MMU_CONTROL_AND_STATUS_BE); 1525 break; 1526 case MMU_INTERRUPT_STATUS_TRN_ERR_P: 1527 mmu_enable_bit = BITMASK(MMU_CONTROL_AND_STATUS_TE); 1528 break; 1529 default: 1530 mmu_enable_bit = 0; 1531 break; 1532 } 1533 1534 /* 1535 * If the interrupts are enabled and Translation/Bypass Enable bit 1536 * was set, then panic. This error should not have occured. 1537 */ 1538 if (mmu_log_enable & mmu_intr_enable & 1539 (mmu_ctrl & mmu_enable_bit)) { 1540 err = PX_FATAL_SW; 1541 } else { 1542 if (!pri) 1543 return (PX_FATAL_GOS); 1544 1545 ret = px_handle_lookup( 1546 rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa); 1547 err = (ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL; 1548 1549 /* 1550 * S/W bug - this error should always be enabled 1551 */ 1552 1553 /* enable error & intr reporting for this bit */ 1554 CSR_XS(csr_base, MMU_ERROR_LOG_ENABLE, mmu_log_enable | mask); 1555 CSR_XS(csr_base, MMU_INTERRUPT_ENABLE, mmu_intr_enable | mask); 1556 1557 /* enable translation access/bypass enable */ 1558 CSR_XS(csr_base, MMU_CONTROL_AND_STATUS, 1559 mmu_ctrl | mmu_enable_bit); 1560 } 1561 1562 return (err); 1563 } 1564 1565 /* Generic error handling functions that involve MMU Translation Fault Addr */ 1566 /* ARGSUSED */ 1567 int 1568 px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, 1569 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1570 px_err_bit_desc_t *err_bit_descr) 1571 { 1572 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1573 uint64_t mmu_tfa; 1574 uint_t ret; 1575 1576 if (!pri) 1577 return (PX_FATAL_GOS); 1578 1579 mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); 1580 ret = px_handle_lookup( 1581 rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa); 1582 1583 return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); 1584 } 1585 1586 /* MMU Table walk errors */ 1587 /* ARGSUSED */ 1588 int 1589 px_err_mmu_tblwlk_handle(dev_info_t *rpdip, caddr_t csr_base, 1590 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1591 px_err_bit_desc_t *err_bit_descr) 1592 { 1593 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1594 uint64_t mmu_tfa; 1595 uint_t ret; 1596 1597 if (!pri) 1598 return (PX_FATAL_GOS); 1599 1600 mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); 1601 ret = px_handle_lookup( 1602 rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa); 1603 1604 return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); 1605 } 1606 1607 /* 1608 * TLU LUP event - if caused by power management activity, then it is expected. 1609 * In all other cases, it is an error. 1610 */ 1611 /* ARGSUSED */ 1612 int 1613 px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base, 1614 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1615 px_err_bit_desc_t *err_bit_descr) 1616 { 1617 px_t *px_p = DIP_TO_STATE(rpdip); 1618 1619 /* 1620 * power management code is currently the only segment that sets 1621 * px_lup_pending to indicate its expectation for a healthy LUP 1622 * event. For all other occasions, LUP event should be flaged as 1623 * error condition. 1624 */ 1625 return ((atomic_cas_32(&px_p->px_lup_pending, 1, 0) == 0) ? 1626 PX_NONFATAL : PX_OK); 1627 } 1628 1629 /* 1630 * TLU LDN event - if caused by power management activity, then it is expected. 1631 * In all other cases, it is an error. 1632 */ 1633 /* ARGSUSED */ 1634 int 1635 px_err_tlu_ldn_handle(dev_info_t *rpdip, caddr_t csr_base, 1636 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1637 px_err_bit_desc_t *err_bit_descr) 1638 { 1639 px_t *px_p = DIP_TO_STATE(rpdip); 1640 return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_OK : PX_NONFATAL); 1641 } 1642 1643 /* PEC ILU none - see io erpt doc, section 3.1 */ 1644 PX_ERPT_SEND_DEC(pec_ilu) 1645 { 1646 char buf[FM_MAX_CLASS]; 1647 boolean_t pri = PX_ERR_IS_PRI(bit); 1648 1649 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1650 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1651 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1652 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1653 FIRE_ILU_ELE, DATA_TYPE_UINT64, 1654 CSR_XR(csr_base, ILU_ERROR_LOG_ENABLE), 1655 FIRE_ILU_IE, DATA_TYPE_UINT64, 1656 CSR_XR(csr_base, ILU_INTERRUPT_ENABLE), 1657 FIRE_ILU_IS, DATA_TYPE_UINT64, 1658 ss_reg, 1659 FIRE_ILU_ESS, DATA_TYPE_UINT64, 1660 CSR_XR(csr_base, ILU_ERROR_STATUS_SET), 1661 NULL); 1662 1663 return (PX_OK); 1664 } 1665 1666 /* PCIEX UE Errors */ 1667 /* ARGSUSED */ 1668 int 1669 px_err_pciex_ue_handle(dev_info_t *rpdip, caddr_t csr_base, 1670 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1671 px_err_bit_desc_t *err_bit_descr) 1672 { 1673 uint32_t mask = (uint32_t)BITMASK(err_bit_descr->bit); 1674 1675 return ((err_bit_descr->bit >= 32 && px_fabric_die_rc_ue_gos) ? 1676 PX_FATAL_GOS : PX_FABRIC_ERR_SEV(mask, px_fabric_die_rc_ue, 1677 px_fabric_die_rc_ue_gos)); 1678 } 1679 1680 /* PCI-E Uncorrectable Errors - see io erpt doc, section 3.2 */ 1681 PX_ERPT_SEND_DEC(pciex_rx_ue) 1682 { 1683 char buf[FM_MAX_CLASS]; 1684 boolean_t pri = PX_ERR_IS_PRI(bit); 1685 1686 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1687 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1688 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1689 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1690 FIRE_TLU_UELE, DATA_TYPE_UINT64, 1691 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 1692 FIRE_TLU_UIE, DATA_TYPE_UINT64, 1693 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 1694 FIRE_TLU_UIS, DATA_TYPE_UINT64, 1695 ss_reg, 1696 FIRE_TLU_UESS, DATA_TYPE_UINT64, 1697 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 1698 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 1699 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 1700 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 1701 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 1702 NULL); 1703 1704 return (PX_OK); 1705 } 1706 1707 /* PCI-E Uncorrectable Errors - see io erpt doc, section 3.3 */ 1708 PX_ERPT_SEND_DEC(pciex_tx_ue) 1709 { 1710 char buf[FM_MAX_CLASS]; 1711 boolean_t pri = PX_ERR_IS_PRI(bit); 1712 1713 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1714 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1715 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1716 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1717 FIRE_TLU_UELE, DATA_TYPE_UINT64, 1718 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 1719 FIRE_TLU_UIE, DATA_TYPE_UINT64, 1720 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 1721 FIRE_TLU_UIS, DATA_TYPE_UINT64, 1722 ss_reg, 1723 FIRE_TLU_UESS, DATA_TYPE_UINT64, 1724 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 1725 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 1726 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 1727 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 1728 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 1729 NULL); 1730 1731 return (PX_OK); 1732 } 1733 1734 /* PCI-E Uncorrectable Errors - see io erpt doc, section 3.4 */ 1735 PX_ERPT_SEND_DEC(pciex_rx_tx_ue) 1736 { 1737 char buf[FM_MAX_CLASS]; 1738 boolean_t pri = PX_ERR_IS_PRI(bit); 1739 1740 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1741 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1742 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1743 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1744 FIRE_TLU_UELE, DATA_TYPE_UINT64, 1745 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 1746 FIRE_TLU_UIE, DATA_TYPE_UINT64, 1747 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 1748 FIRE_TLU_UIS, DATA_TYPE_UINT64, 1749 ss_reg, 1750 FIRE_TLU_UESS, DATA_TYPE_UINT64, 1751 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 1752 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 1753 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 1754 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 1755 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 1756 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 1757 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 1758 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 1759 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 1760 NULL); 1761 1762 return (PX_OK); 1763 } 1764 1765 /* PCI-E Uncorrectable Errors - see io erpt doc, section 3.5 */ 1766 PX_ERPT_SEND_DEC(pciex_ue) 1767 { 1768 char buf[FM_MAX_CLASS]; 1769 boolean_t pri = PX_ERR_IS_PRI(bit); 1770 1771 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1772 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1773 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1774 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1775 FIRE_TLU_UELE, DATA_TYPE_UINT64, 1776 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 1777 FIRE_TLU_UIE, DATA_TYPE_UINT64, 1778 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 1779 FIRE_TLU_UIS, DATA_TYPE_UINT64, 1780 ss_reg, 1781 FIRE_TLU_UESS, DATA_TYPE_UINT64, 1782 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 1783 NULL); 1784 1785 return (PX_OK); 1786 } 1787 1788 /* PCIEX UE Errors */ 1789 /* ARGSUSED */ 1790 int 1791 px_err_pciex_ce_handle(dev_info_t *rpdip, caddr_t csr_base, 1792 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1793 px_err_bit_desc_t *err_bit_descr) 1794 { 1795 uint32_t mask = (uint32_t)BITMASK(err_bit_descr->bit); 1796 1797 return ((err_bit_descr->bit >= 32 && px_fabric_die_rc_ce_gos) ? 1798 PX_FATAL_GOS : PX_FABRIC_ERR_SEV(mask, px_fabric_die_rc_ce, 1799 px_fabric_die_rc_ce_gos)); 1800 } 1801 1802 /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ 1803 PX_ERPT_SEND_DEC(pciex_ce) 1804 { 1805 char buf[FM_MAX_CLASS]; 1806 boolean_t pri = PX_ERR_IS_PRI(bit); 1807 1808 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1809 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1810 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1811 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1812 FIRE_TLU_CELE, DATA_TYPE_UINT64, 1813 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_LOG_ENABLE), 1814 FIRE_TLU_CIE, DATA_TYPE_UINT64, 1815 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_INTERRUPT_ENABLE), 1816 FIRE_TLU_CIS, DATA_TYPE_UINT64, 1817 ss_reg, 1818 FIRE_TLU_CESS, DATA_TYPE_UINT64, 1819 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_STATUS_SET), 1820 NULL); 1821 1822 return (PX_OK); 1823 } 1824 1825 /* TLU Other Event Status (receive only) - see io erpt doc, section 3.7 */ 1826 PX_ERPT_SEND_DEC(pciex_rx_oe) 1827 { 1828 char buf[FM_MAX_CLASS]; 1829 boolean_t pri = PX_ERR_IS_PRI(bit); 1830 1831 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1832 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1833 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1834 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1835 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 1836 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 1837 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 1838 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 1839 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 1840 ss_reg, 1841 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 1842 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 1843 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 1844 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG), 1845 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 1846 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), 1847 NULL); 1848 1849 return (PX_OK); 1850 } 1851 1852 /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ 1853 PX_ERPT_SEND_DEC(pciex_rx_tx_oe) 1854 { 1855 char buf[FM_MAX_CLASS]; 1856 boolean_t pri = PX_ERR_IS_PRI(bit); 1857 1858 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1859 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1860 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1861 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1862 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 1863 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 1864 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 1865 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 1866 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 1867 ss_reg, 1868 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 1869 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 1870 FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, 1871 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG), 1872 FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, 1873 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), 1874 FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, 1875 CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG), 1876 FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, 1877 CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG), 1878 NULL); 1879 1880 return (PX_OK); 1881 } 1882 1883 /* TLU Other Event - see io erpt doc, section 3.9 */ 1884 PX_ERPT_SEND_DEC(pciex_oe) 1885 { 1886 char buf[FM_MAX_CLASS]; 1887 boolean_t pri = PX_ERR_IS_PRI(bit); 1888 1889 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1890 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1891 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1892 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1893 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 1894 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 1895 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 1896 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 1897 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 1898 ss_reg, 1899 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 1900 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 1901 NULL); 1902 1903 return (PX_OK); 1904 } 1905