1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * sun4u Fire Error Handling 28 */ 29 30 #include <sys/types.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/sunndi.h> 34 #include <sys/fm/protocol.h> 35 #include <sys/fm/util.h> 36 #include <sys/pcie.h> 37 #include <sys/pcie_impl.h> 38 #include "px_obj.h" 39 #include <px_regs.h> 40 #include <px_csr.h> 41 #include <sys/membar.h> 42 #include <sys/machcpuvar.h> 43 #include <sys/platform_module.h> 44 #include "px_lib4u.h" 45 #include "px_err.h" 46 #include "px_err_impl.h" 47 #include "oberon_regs.h" 48 49 uint64_t px_tlu_ue_intr_mask = PX_ERR_EN_ALL; 50 uint64_t px_tlu_ue_log_mask = PX_ERR_EN_ALL; 51 uint64_t px_tlu_ue_count_mask = PX_ERR_EN_ALL; 52 53 uint64_t px_tlu_ce_intr_mask = PX_ERR_MASK_NONE; 54 uint64_t px_tlu_ce_log_mask = PX_ERR_MASK_NONE; 55 uint64_t px_tlu_ce_count_mask = PX_ERR_MASK_NONE; 56 57 /* 58 * Do not enable Link Interrupts 59 */ 60 uint64_t px_tlu_oe_intr_mask = PX_ERR_EN_ALL & ~0x80000000800; 61 uint64_t px_tlu_oe_log_mask = PX_ERR_EN_ALL & ~0x80000000800; 62 uint64_t px_tlu_oe_count_mask = PX_ERR_EN_ALL; 63 64 uint64_t px_mmu_intr_mask = PX_ERR_EN_ALL; 65 uint64_t px_mmu_log_mask = PX_ERR_EN_ALL; 66 uint64_t px_mmu_count_mask = PX_ERR_EN_ALL; 67 68 uint64_t px_imu_intr_mask = PX_ERR_EN_ALL; 69 uint64_t px_imu_log_mask = PX_ERR_EN_ALL; 70 uint64_t px_imu_count_mask = PX_ERR_EN_ALL; 71 72 /* 73 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_S) | 74 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_P); 75 */ 76 uint64_t px_ilu_intr_mask = (((uint64_t)0x10 << 32) | 0x10); 77 uint64_t px_ilu_log_mask = (((uint64_t)0x10 << 32) | 0x10); 78 uint64_t px_ilu_count_mask = PX_ERR_EN_ALL; 79 80 uint64_t px_ubc_intr_mask = PX_ERR_EN_ALL; 81 uint64_t px_ubc_log_mask = PX_ERR_EN_ALL; 82 uint64_t px_ubc_count_mask = PX_ERR_EN_ALL; 83 84 uint64_t px_jbc_intr_mask = PX_ERR_EN_ALL; 85 uint64_t px_jbc_log_mask = PX_ERR_EN_ALL; 86 uint64_t px_jbc_count_mask = PX_ERR_EN_ALL; 87 88 /* 89 * LPU Intr Registers are reverse encoding from the registers above. 90 * 1 = disable 91 * 0 = enable 92 * 93 * Log and Count are however still the same. 94 */ 95 uint64_t px_lpul_intr_mask = LPU_INTR_DISABLE; 96 uint64_t px_lpul_log_mask = PX_ERR_EN_ALL; 97 uint64_t px_lpul_count_mask = PX_ERR_EN_ALL; 98 99 uint64_t px_lpup_intr_mask = LPU_INTR_DISABLE; 100 uint64_t px_lpup_log_mask = PX_ERR_EN_ALL; 101 uint64_t px_lpup_count_mask = PX_ERR_EN_ALL; 102 103 uint64_t px_lpur_intr_mask = LPU_INTR_DISABLE; 104 uint64_t px_lpur_log_mask = PX_ERR_EN_ALL; 105 uint64_t px_lpur_count_mask = PX_ERR_EN_ALL; 106 107 uint64_t px_lpux_intr_mask = LPU_INTR_DISABLE; 108 uint64_t px_lpux_log_mask = PX_ERR_EN_ALL; 109 uint64_t px_lpux_count_mask = PX_ERR_EN_ALL; 110 111 uint64_t px_lpus_intr_mask = LPU_INTR_DISABLE; 112 uint64_t px_lpus_log_mask = PX_ERR_EN_ALL; 113 uint64_t px_lpus_count_mask = PX_ERR_EN_ALL; 114 115 uint64_t px_lpug_intr_mask = LPU_INTR_DISABLE; 116 uint64_t px_lpug_log_mask = PX_ERR_EN_ALL; 117 uint64_t px_lpug_count_mask = PX_ERR_EN_ALL; 118 119 /* 120 * JBC error bit table 121 */ 122 #define JBC_BIT_DESC(bit, hdl, erpt) \ 123 JBC_INTERRUPT_STATUS_ ## bit ## _P, \ 124 0, \ 125 PX_ERR_BIT_HANDLE(hdl), \ 126 PX_ERPT_SEND(erpt), \ 127 PX_ERR_JBC_CLASS(bit) }, \ 128 { JBC_INTERRUPT_STATUS_ ## bit ## _S, \ 129 0, \ 130 PX_ERR_BIT_HANDLE(hdl), \ 131 PX_ERPT_SEND(erpt), \ 132 PX_ERR_JBC_CLASS(bit) 133 px_err_bit_desc_t px_err_jbc_tbl[] = { 134 /* JBC FATAL */ 135 { JBC_BIT_DESC(MB_PEA, hw_reset, jbc_fatal) }, 136 { JBC_BIT_DESC(CPE, hw_reset, jbc_fatal) }, 137 { JBC_BIT_DESC(APE, hw_reset, jbc_fatal) }, 138 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_fatal) }, 139 { JBC_BIT_DESC(JTCEEW, hw_reset, jbc_fatal) }, 140 { JBC_BIT_DESC(JTCEEI, hw_reset, jbc_fatal) }, 141 { JBC_BIT_DESC(JTCEER, hw_reset, jbc_fatal) }, 142 143 /* JBC MERGE */ 144 { JBC_BIT_DESC(MB_PER, jbc_merge, jbc_merge) }, 145 { JBC_BIT_DESC(MB_PEW, jbc_merge, jbc_merge) }, 146 147 /* JBC Jbusint IN */ 148 { JBC_BIT_DESC(UE_ASYN, panic, jbc_in) }, 149 { JBC_BIT_DESC(CE_ASYN, no_error, jbc_in) }, 150 { JBC_BIT_DESC(JTE, panic, jbc_in) }, 151 { JBC_BIT_DESC(JBE, panic, jbc_in) }, 152 { JBC_BIT_DESC(JUE, panic, jbc_in) }, 153 { JBC_BIT_DESC(ICISE, panic, jbc_in) }, 154 { JBC_BIT_DESC(WR_DPE, jbc_jbusint_in, jbc_in) }, 155 { JBC_BIT_DESC(RD_DPE, jbc_jbusint_in, jbc_in) }, 156 { JBC_BIT_DESC(ILL_BMW, panic, jbc_in) }, 157 { JBC_BIT_DESC(ILL_BMR, panic, jbc_in) }, 158 { JBC_BIT_DESC(BJC, panic, jbc_in) }, 159 160 /* JBC Jbusint Out */ 161 { JBC_BIT_DESC(IJP, panic, jbc_out) }, 162 163 /* 164 * JBC Dmcint ODCD 165 * 166 * Error bits which can be set via a bad PCItool access go through 167 * jbc_safe_acc instead. 168 */ 169 { JBC_BIT_DESC(PIO_UNMAP_RD, jbc_safe_acc, jbc_odcd) }, 170 { JBC_BIT_DESC(ILL_ACC_RD, jbc_safe_acc, jbc_odcd) }, 171 { JBC_BIT_DESC(PIO_UNMAP, jbc_safe_acc, jbc_odcd) }, 172 { JBC_BIT_DESC(PIO_DPE, jbc_dmcint_odcd, jbc_odcd) }, 173 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_odcd) }, 174 { JBC_BIT_DESC(ILL_ACC, jbc_safe_acc, jbc_odcd) }, 175 176 /* JBC Dmcint IDC */ 177 { JBC_BIT_DESC(UNSOL_RD, no_panic, jbc_idc) }, 178 { JBC_BIT_DESC(UNSOL_INTR, no_panic, jbc_idc) }, 179 180 /* JBC CSR */ 181 { JBC_BIT_DESC(EBUS_TO, panic, jbc_csr) } 182 }; 183 184 #define px_err_jbc_keys \ 185 (sizeof (px_err_jbc_tbl)) / (sizeof (px_err_bit_desc_t)) 186 187 /* 188 * UBC error bit table 189 */ 190 #define UBC_BIT_DESC(bit, hdl, erpt) \ 191 UBC_INTERRUPT_STATUS_ ## bit ## _P, \ 192 0, \ 193 PX_ERR_BIT_HANDLE(hdl), \ 194 PX_ERPT_SEND(erpt), \ 195 PX_ERR_UBC_CLASS(bit) }, \ 196 { UBC_INTERRUPT_STATUS_ ## bit ## _S, \ 197 0, \ 198 PX_ERR_BIT_HANDLE(hdl), \ 199 PX_ERPT_SEND(erpt), \ 200 PX_ERR_UBC_CLASS(bit) 201 px_err_bit_desc_t px_err_ubc_tbl[] = { 202 /* UBC FATAL */ 203 { UBC_BIT_DESC(DMARDUEA, no_panic, ubc_fatal) }, 204 { UBC_BIT_DESC(DMAWTUEA, panic, ubc_fatal) }, 205 { UBC_BIT_DESC(MEMRDAXA, panic, ubc_fatal) }, 206 { UBC_BIT_DESC(MEMWTAXA, panic, ubc_fatal) }, 207 { UBC_BIT_DESC(DMARDUEB, no_panic, ubc_fatal) }, 208 { UBC_BIT_DESC(DMAWTUEB, panic, ubc_fatal) }, 209 { UBC_BIT_DESC(MEMRDAXB, panic, ubc_fatal) }, 210 { UBC_BIT_DESC(MEMWTAXB, panic, ubc_fatal) }, 211 { UBC_BIT_DESC(PIOWTUE, panic, ubc_fatal) }, 212 { UBC_BIT_DESC(PIOWBEUE, panic, ubc_fatal) }, 213 { UBC_BIT_DESC(PIORBEUE, panic, ubc_fatal) } 214 }; 215 216 #define px_err_ubc_keys \ 217 (sizeof (px_err_ubc_tbl)) / (sizeof (px_err_bit_desc_t)) 218 219 220 char *ubc_class_eid_qualifier[] = { 221 "-mem", 222 "-channel", 223 "-cpu", 224 "-path" 225 }; 226 227 228 /* 229 * DMC error bit tables 230 */ 231 #define IMU_BIT_DESC(bit, hdl, erpt) \ 232 IMU_INTERRUPT_STATUS_ ## bit ## _P, \ 233 0, \ 234 PX_ERR_BIT_HANDLE(hdl), \ 235 PX_ERPT_SEND(erpt), \ 236 PX_ERR_DMC_CLASS(bit) }, \ 237 { IMU_INTERRUPT_STATUS_ ## bit ## _S, \ 238 0, \ 239 PX_ERR_BIT_HANDLE(hdl), \ 240 PX_ERPT_SEND(erpt), \ 241 PX_ERR_DMC_CLASS(bit) 242 px_err_bit_desc_t px_err_imu_tbl[] = { 243 /* DMC IMU RDS */ 244 { IMU_BIT_DESC(MSI_MAL_ERR, panic, imu_rds) }, 245 { IMU_BIT_DESC(MSI_PAR_ERR, panic, imu_rds) }, 246 { IMU_BIT_DESC(PMEACK_MES_NOT_EN, panic, imu_rds) }, 247 { IMU_BIT_DESC(PMPME_MES_NOT_EN, panic, imu_rds) }, 248 { IMU_BIT_DESC(FATAL_MES_NOT_EN, panic, imu_rds) }, 249 { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, panic, imu_rds) }, 250 { IMU_BIT_DESC(COR_MES_NOT_EN, panic, imu_rds) }, 251 { IMU_BIT_DESC(MSI_NOT_EN, panic, imu_rds) }, 252 253 /* DMC IMU SCS */ 254 { IMU_BIT_DESC(EQ_NOT_EN, panic, imu_scs) }, 255 256 /* DMC IMU */ 257 { IMU_BIT_DESC(EQ_OVER, imu_eq_ovfl, imu) } 258 }; 259 260 #define px_err_imu_keys (sizeof (px_err_imu_tbl)) / (sizeof (px_err_bit_desc_t)) 261 262 /* mmu errors */ 263 #define MMU_BIT_DESC(bit, hdl, erpt) \ 264 MMU_INTERRUPT_STATUS_ ## bit ## _P, \ 265 0, \ 266 PX_ERR_BIT_HANDLE(hdl), \ 267 PX_ERPT_SEND(erpt), \ 268 PX_ERR_DMC_CLASS(bit) }, \ 269 { MMU_INTERRUPT_STATUS_ ## bit ## _S, \ 270 0, \ 271 PX_ERR_BIT_HANDLE(hdl), \ 272 PX_ERPT_SEND(erpt), \ 273 PX_ERR_DMC_CLASS(bit) 274 px_err_bit_desc_t px_err_mmu_tbl[] = { 275 /* DMC MMU TFAR/TFSR */ 276 { MMU_BIT_DESC(BYP_ERR, mmu_rbne, mmu_tfar_tfsr) }, 277 { MMU_BIT_DESC(BYP_OOR, mmu_tfa, mmu_tfar_tfsr) }, 278 { MMU_BIT_DESC(TRN_ERR, panic, mmu_tfar_tfsr) }, 279 { MMU_BIT_DESC(TRN_OOR, mmu_tfa, mmu_tfar_tfsr) }, 280 { MMU_BIT_DESC(TTE_INV, mmu_tfa, mmu_tfar_tfsr) }, 281 { MMU_BIT_DESC(TTE_PRT, mmu_tfa, mmu_tfar_tfsr) }, 282 { MMU_BIT_DESC(TTC_DPE, mmu_parity, mmu_tfar_tfsr) }, 283 { MMU_BIT_DESC(TBW_DME, panic, mmu_tfar_tfsr) }, 284 { MMU_BIT_DESC(TBW_UDE, panic, mmu_tfar_tfsr) }, 285 { MMU_BIT_DESC(TBW_ERR, panic, mmu_tfar_tfsr) }, 286 { MMU_BIT_DESC(TBW_DPE, mmu_parity, mmu_tfar_tfsr) }, 287 288 /* DMC MMU */ 289 { MMU_BIT_DESC(TTC_CAE, panic, mmu) } 290 }; 291 #define px_err_mmu_keys (sizeof (px_err_mmu_tbl)) / (sizeof (px_err_bit_desc_t)) 292 293 294 /* 295 * PEC error bit tables 296 */ 297 #define ILU_BIT_DESC(bit, hdl, erpt) \ 298 ILU_INTERRUPT_STATUS_ ## bit ## _P, \ 299 0, \ 300 PX_ERR_BIT_HANDLE(hdl), \ 301 PX_ERPT_SEND(erpt), \ 302 PX_ERR_PEC_CLASS(bit) }, \ 303 { ILU_INTERRUPT_STATUS_ ## bit ## _S, \ 304 0, \ 305 PX_ERR_BIT_HANDLE(hdl), \ 306 PX_ERPT_SEND(erpt), \ 307 PX_ERR_PEC_CLASS(bit) 308 px_err_bit_desc_t px_err_ilu_tbl[] = { 309 /* PEC ILU none */ 310 { ILU_BIT_DESC(IHB_PE, panic, pec_ilu) } 311 }; 312 #define px_err_ilu_keys \ 313 (sizeof (px_err_ilu_tbl)) / (sizeof (px_err_bit_desc_t)) 314 315 /* 316 * PEC UE errors implementation is incomplete pending PCIE generic 317 * fabric rules. Must handle both PRIMARY and SECONDARY errors. 318 */ 319 /* pec ue errors */ 320 #define TLU_UC_BIT_DESC(bit, hdl, erpt) \ 321 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 322 0, \ 323 PX_ERR_BIT_HANDLE(hdl), \ 324 PX_ERPT_SEND(erpt), \ 325 PX_ERR_PEC_CLASS(bit) }, \ 326 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 327 0, \ 328 PX_ERR_BIT_HANDLE(hdl), \ 329 PX_ERPT_SEND(erpt), \ 330 PX_ERR_PEC_CLASS(bit) 331 #define TLU_UC_OB_BIT_DESC(bit, hdl, erpt) \ 332 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 333 0, \ 334 PX_ERR_BIT_HANDLE(hdl), \ 335 PX_ERPT_SEND(erpt), \ 336 PX_ERR_PEC_OB_CLASS(bit) }, \ 337 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 338 0, \ 339 PX_ERR_BIT_HANDLE(hdl), \ 340 PX_ERPT_SEND(erpt), \ 341 PX_ERR_PEC_OB_CLASS(bit) 342 px_err_bit_desc_t px_err_tlu_ue_tbl[] = { 343 /* PCI-E Receive Uncorrectable Errors */ 344 { TLU_UC_BIT_DESC(UR, pciex_ue, pciex_rx_ue) }, 345 { TLU_UC_BIT_DESC(UC, pciex_ue, pciex_rx_ue) }, 346 347 /* PCI-E Transmit Uncorrectable Errors */ 348 { TLU_UC_OB_BIT_DESC(ECRC, pciex_ue, pciex_rx_ue) }, 349 { TLU_UC_BIT_DESC(CTO, pciex_ue, pciex_tx_ue) }, 350 { TLU_UC_BIT_DESC(ROF, pciex_ue, pciex_tx_ue) }, 351 352 /* PCI-E Rx/Tx Uncorrectable Errors */ 353 { TLU_UC_BIT_DESC(MFP, pciex_ue, pciex_rx_tx_ue) }, 354 { TLU_UC_BIT_DESC(PP, pciex_ue, pciex_rx_tx_ue) }, 355 356 /* Other PCI-E Uncorrectable Errors */ 357 { TLU_UC_BIT_DESC(FCP, pciex_ue, pciex_ue) }, 358 { TLU_UC_BIT_DESC(DLP, pciex_ue, pciex_ue) }, 359 { TLU_UC_BIT_DESC(TE, pciex_ue, pciex_ue) }, 360 361 /* Not used */ 362 { TLU_UC_BIT_DESC(CA, pciex_ue, do_not) } 363 }; 364 #define px_err_tlu_ue_keys \ 365 (sizeof (px_err_tlu_ue_tbl)) / (sizeof (px_err_bit_desc_t)) 366 367 368 /* 369 * PEC CE errors implementation is incomplete pending PCIE generic 370 * fabric rules. 371 */ 372 /* pec ce errors */ 373 #define TLU_CE_BIT_DESC(bit, hdl, erpt) \ 374 TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 375 0, \ 376 PX_ERR_BIT_HANDLE(hdl), \ 377 PX_ERPT_SEND(erpt), \ 378 PX_ERR_PEC_CLASS(bit) }, \ 379 { TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 380 0, \ 381 PX_ERR_BIT_HANDLE(hdl), \ 382 PX_ERPT_SEND(erpt), \ 383 PX_ERR_PEC_CLASS(bit) 384 px_err_bit_desc_t px_err_tlu_ce_tbl[] = { 385 /* PCI-E Correctable Errors */ 386 { TLU_CE_BIT_DESC(RTO, pciex_ce, pciex_ce) }, 387 { TLU_CE_BIT_DESC(RNR, pciex_ce, pciex_ce) }, 388 { TLU_CE_BIT_DESC(BDP, pciex_ce, pciex_ce) }, 389 { TLU_CE_BIT_DESC(BTP, pciex_ce, pciex_ce) }, 390 { TLU_CE_BIT_DESC(RE, pciex_ce, pciex_ce) } 391 }; 392 #define px_err_tlu_ce_keys \ 393 (sizeof (px_err_tlu_ce_tbl)) / (sizeof (px_err_bit_desc_t)) 394 395 396 /* pec oe errors */ 397 #define TLU_OE_BIT_DESC(bit, hdl, erpt) \ 398 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \ 399 0, \ 400 PX_ERR_BIT_HANDLE(hdl), \ 401 PX_ERPT_SEND(erpt), \ 402 PX_ERR_PEC_CLASS(bit) }, \ 403 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \ 404 0, \ 405 PX_ERR_BIT_HANDLE(hdl), \ 406 PX_ERPT_SEND(erpt), \ 407 PX_ERR_PEC_CLASS(bit) 408 #define TLU_OE_OB_BIT_DESC(bit, hdl, erpt) \ 409 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \ 410 0, \ 411 PX_ERR_BIT_HANDLE(hdl), \ 412 PX_ERPT_SEND(erpt), \ 413 PX_ERR_PEC_OB_CLASS(bit) }, \ 414 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \ 415 0, \ 416 PX_ERR_BIT_HANDLE(hdl), \ 417 PX_ERPT_SEND(erpt), \ 418 PX_ERR_PEC_OB_CLASS(bit) 419 px_err_bit_desc_t px_err_tlu_oe_tbl[] = { 420 /* TLU Other Event Status (receive only) */ 421 { TLU_OE_BIT_DESC(MRC, hw_reset, pciex_rx_oe) }, 422 423 /* TLU Other Event Status (rx + tx) */ 424 { TLU_OE_BIT_DESC(WUC, wuc_ruc, pciex_rx_tx_oe) }, 425 { TLU_OE_BIT_DESC(RUC, wuc_ruc, pciex_rx_tx_oe) }, 426 { TLU_OE_BIT_DESC(CRS, no_panic, pciex_rx_tx_oe) }, 427 428 /* TLU Other Event */ 429 { TLU_OE_BIT_DESC(IIP, panic, pciex_oe) }, 430 { TLU_OE_BIT_DESC(EDP, panic, pciex_oe) }, 431 { TLU_OE_BIT_DESC(EHP, panic, pciex_oe) }, 432 { TLU_OE_OB_BIT_DESC(TLUEITMO, panic, pciex_oe) }, 433 { TLU_OE_BIT_DESC(LIN, no_panic, pciex_oe) }, 434 { TLU_OE_BIT_DESC(LRS, no_panic, pciex_oe) }, 435 { TLU_OE_BIT_DESC(LDN, tlu_ldn, pciex_oe) }, 436 { TLU_OE_BIT_DESC(LUP, tlu_lup, pciex_oe) }, 437 { TLU_OE_BIT_DESC(ERU, panic, pciex_oe) }, 438 { TLU_OE_BIT_DESC(ERO, panic, pciex_oe) }, 439 { TLU_OE_BIT_DESC(EMP, panic, pciex_oe) }, 440 { TLU_OE_BIT_DESC(EPE, panic, pciex_oe) }, 441 { TLU_OE_BIT_DESC(ERP, panic, pciex_oe) }, 442 { TLU_OE_BIT_DESC(EIP, panic, pciex_oe) } 443 }; 444 445 #define px_err_tlu_oe_keys \ 446 (sizeof (px_err_tlu_oe_tbl)) / (sizeof (px_err_bit_desc_t)) 447 448 449 /* 450 * All the following tables below are for LPU Interrupts. These interrupts 451 * are *NOT* error interrupts, but event status interrupts. 452 * 453 * These events are probably of most interest to: 454 * o Hotplug 455 * o Power Management 456 * o etc... 457 * 458 * There are also a few events that would be interresting for FMA. 459 * Again none of the regiseters below state that an error has occured 460 * or that data has been lost. If anything, they give status that an 461 * error is *about* to occur. examples 462 * o INT_SKP_ERR - indicates clock between fire and child is too far 463 * off and is most unlikely able to compensate 464 * o INT_TX_PAR_ERR - A parity error occured in ONE lane. This is 465 * HW recoverable, but will like end up as a future 466 * fabric error as well. 467 * 468 * For now, we don't care about any of these errors and should be ignore, 469 * but cleared. 470 */ 471 472 /* LPU Link Interrupt Table */ 473 #define LPUL_BIT_DESC(bit, hdl, erpt) \ 474 LPU_LINK_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 475 0, \ 476 NULL, \ 477 NULL, \ 478 "" 479 px_err_bit_desc_t px_err_lpul_tbl[] = { 480 { LPUL_BIT_DESC(LINK_ERR_ACT, NULL, NULL) } 481 }; 482 #define px_err_lpul_keys \ 483 (sizeof (px_err_lpul_tbl)) / (sizeof (px_err_bit_desc_t)) 484 485 /* LPU Physical Interrupt Table */ 486 #define LPUP_BIT_DESC(bit, hdl, erpt) \ 487 LPU_PHY_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 488 0, \ 489 NULL, \ 490 NULL, \ 491 "" 492 px_err_bit_desc_t px_err_lpup_tbl[] = { 493 { LPUP_BIT_DESC(PHY_LAYER_ERR, NULL, NULL) } 494 }; 495 #define px_err_lpup_keys \ 496 (sizeof (px_err_lpup_tbl)) / (sizeof (px_err_bit_desc_t)) 497 498 /* LPU Receive Interrupt Table */ 499 #define LPUR_BIT_DESC(bit, hdl, erpt) \ 500 LPU_RECEIVE_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 501 0, \ 502 NULL, \ 503 NULL, \ 504 "" 505 px_err_bit_desc_t px_err_lpur_tbl[] = { 506 { LPUR_BIT_DESC(RCV_PHY, NULL, NULL) } 507 }; 508 #define px_err_lpur_keys \ 509 (sizeof (px_err_lpur_tbl)) / (sizeof (px_err_bit_desc_t)) 510 511 /* LPU Transmit Interrupt Table */ 512 #define LPUX_BIT_DESC(bit, hdl, erpt) \ 513 LPU_TRANSMIT_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 514 0, \ 515 NULL, \ 516 NULL, \ 517 "" 518 px_err_bit_desc_t px_err_lpux_tbl[] = { 519 { LPUX_BIT_DESC(UNMSK, NULL, NULL) } 520 }; 521 #define px_err_lpux_keys \ 522 (sizeof (px_err_lpux_tbl)) / (sizeof (px_err_bit_desc_t)) 523 524 /* LPU LTSSM Interrupt Table */ 525 #define LPUS_BIT_DESC(bit, hdl, erpt) \ 526 LPU_LTSSM_INTERRUPT_AND_STATUS_INT_ ## bit, \ 527 0, \ 528 NULL, \ 529 NULL, \ 530 "" 531 px_err_bit_desc_t px_err_lpus_tbl[] = { 532 { LPUS_BIT_DESC(ANY, NULL, NULL) } 533 }; 534 #define px_err_lpus_keys \ 535 (sizeof (px_err_lpus_tbl)) / (sizeof (px_err_bit_desc_t)) 536 537 /* LPU Gigablaze Glue Interrupt Table */ 538 #define LPUG_BIT_DESC(bit, hdl, erpt) \ 539 LPU_GIGABLAZE_GLUE_INTERRUPT_AND_STATUS_INT_ ## bit, \ 540 0, \ 541 NULL, \ 542 NULL, \ 543 "" 544 px_err_bit_desc_t px_err_lpug_tbl[] = { 545 { LPUG_BIT_DESC(GLOBL_UNMSK, NULL, NULL) } 546 }; 547 #define px_err_lpug_keys \ 548 (sizeof (px_err_lpug_tbl)) / (sizeof (px_err_bit_desc_t)) 549 550 551 /* Mask and Tables */ 552 #define MnT6X(pre) \ 553 &px_ ## pre ## _intr_mask, \ 554 &px_ ## pre ## _log_mask, \ 555 &px_ ## pre ## _count_mask, \ 556 px_err_ ## pre ## _tbl, \ 557 px_err_ ## pre ## _keys, \ 558 PX_REG_XBC, \ 559 0 560 561 #define MnT6(pre) \ 562 &px_ ## pre ## _intr_mask, \ 563 &px_ ## pre ## _log_mask, \ 564 &px_ ## pre ## _count_mask, \ 565 px_err_ ## pre ## _tbl, \ 566 px_err_ ## pre ## _keys, \ 567 PX_REG_CSR, \ 568 0 569 570 /* LPU Registers Addresses */ 571 #define LR4(pre) \ 572 NULL, \ 573 LPU_ ## pre ## _INTERRUPT_MASK, \ 574 LPU_ ## pre ## _INTERRUPT_AND_STATUS, \ 575 LPU_ ## pre ## _INTERRUPT_AND_STATUS 576 577 /* LPU Registers Addresses with Irregularities */ 578 #define LR4_FIXME(pre) \ 579 NULL, \ 580 LPU_ ## pre ## _INTERRUPT_MASK, \ 581 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS, \ 582 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS 583 584 /* TLU Registers Addresses */ 585 #define TR4(pre) \ 586 TLU_ ## pre ## _LOG_ENABLE, \ 587 TLU_ ## pre ## _INTERRUPT_ENABLE, \ 588 TLU_ ## pre ## _INTERRUPT_STATUS, \ 589 TLU_ ## pre ## _STATUS_CLEAR 590 591 /* Registers Addresses for JBC, UBC, MMU, IMU and ILU */ 592 #define R4(pre) \ 593 pre ## _ERROR_LOG_ENABLE, \ 594 pre ## _INTERRUPT_ENABLE, \ 595 pre ## _INTERRUPT_STATUS, \ 596 pre ## _ERROR_STATUS_CLEAR 597 598 /* Bits in chip_mask, set according to type. */ 599 #define CHP_O BITMASK(PX_CHIP_OBERON) 600 #define CHP_F BITMASK(PX_CHIP_FIRE) 601 #define CHP_FO (CHP_F | CHP_O) 602 603 /* 604 * Register error handling tables. 605 * The ID Field (first field) is identified by an enum px_err_id_t. 606 * It is located in px_err.h 607 */ 608 static const 609 px_err_reg_desc_t px_err_reg_tbl[] = { 610 { CHP_F, MnT6X(jbc), R4(JBC), "JBC Error"}, 611 { CHP_O, MnT6X(ubc), R4(UBC), "UBC Error"}, 612 { CHP_FO, MnT6(mmu), R4(MMU), "MMU Error"}, 613 { CHP_FO, MnT6(imu), R4(IMU), "IMU Error"}, 614 { CHP_FO, MnT6(tlu_ue), TR4(UNCORRECTABLE_ERROR), "TLU UE"}, 615 { CHP_FO, MnT6(tlu_ce), TR4(CORRECTABLE_ERROR), "TLU CE"}, 616 { CHP_FO, MnT6(tlu_oe), TR4(OTHER_EVENT), "TLU OE"}, 617 { CHP_FO, MnT6(ilu), R4(ILU), "ILU Error"}, 618 { CHP_F, MnT6(lpul), LR4(LINK_LAYER), "LPU Link Layer"}, 619 { CHP_F, MnT6(lpup), LR4_FIXME(PHY), "LPU Phy Layer"}, 620 { CHP_F, MnT6(lpur), LR4(RECEIVE_PHY), "LPU RX Phy Layer"}, 621 { CHP_F, MnT6(lpux), LR4(TRANSMIT_PHY), "LPU TX Phy Layer"}, 622 { CHP_F, MnT6(lpus), LR4(LTSSM), "LPU LTSSM"}, 623 { CHP_F, MnT6(lpug), LR4(GIGABLAZE_GLUE), "LPU GigaBlaze Glue"}, 624 }; 625 626 #define PX_ERR_REG_KEYS (sizeof (px_err_reg_tbl)) / (sizeof (px_err_reg_tbl[0])) 627 628 typedef struct px_err_ss { 629 uint64_t err_status[PX_ERR_REG_KEYS]; 630 } px_err_ss_t; 631 632 static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, int block); 633 static int px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, 634 px_err_ss_t *ss); 635 static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, 636 int err, int caller); 637 638 /* 639 * px_err_cb_intr: 640 * Interrupt handler for the JBC/UBC block. 641 * o lock 642 * o create derr 643 * o px_err_cmn_intr 644 * o unlock 645 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 646 */ 647 uint_t 648 px_err_cb_intr(caddr_t arg) 649 { 650 px_fault_t *px_fault_p = (px_fault_t *)arg; 651 dev_info_t *rpdip = px_fault_p->px_fh_dip; 652 px_t *px_p = DIP_TO_STATE(rpdip); 653 int err; 654 ddi_fm_error_t derr; 655 656 /* Create the derr */ 657 bzero(&derr, sizeof (ddi_fm_error_t)); 658 derr.fme_version = DDI_FME_VERSION; 659 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 660 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 661 662 if (px_fm_enter(px_p) != DDI_SUCCESS) 663 goto done; 664 665 err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_HOST); 666 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 667 INTR_IDLE_STATE); 668 669 px_err_panic(err, PX_HB, PX_NO_ERROR, B_TRUE); 670 px_fm_exit(px_p); 671 px_err_panic(err, PX_HB, PX_NO_ERROR, B_FALSE); 672 673 done: 674 return (DDI_INTR_CLAIMED); 675 } 676 677 /* 678 * px_err_dmc_pec_intr: 679 * Interrupt handler for the DMC/PEC block. 680 * o lock 681 * o create derr 682 * o px_err_cmn_intr(leaf, with out cb) 683 * o pcie_scan_fabric (leaf) 684 * o unlock 685 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 686 */ 687 uint_t 688 px_err_dmc_pec_intr(caddr_t arg) 689 { 690 px_fault_t *px_fault_p = (px_fault_t *)arg; 691 dev_info_t *rpdip = px_fault_p->px_fh_dip; 692 px_t *px_p = DIP_TO_STATE(rpdip); 693 int rc_err, fab_err; 694 ddi_fm_error_t derr; 695 696 /* Create the derr */ 697 bzero(&derr, sizeof (ddi_fm_error_t)); 698 derr.fme_version = DDI_FME_VERSION; 699 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 700 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 701 702 if (px_fm_enter(px_p) != DDI_SUCCESS) 703 goto done; 704 705 /* send ereport/handle/clear fire registers */ 706 rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); 707 708 /* Check all child devices for errors */ 709 fab_err = px_scan_fabric(px_p, rpdip, &derr); 710 711 /* Set the interrupt state to idle */ 712 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 713 INTR_IDLE_STATE); 714 715 px_err_panic(rc_err, PX_RC, fab_err, B_TRUE); 716 px_fm_exit(px_p); 717 px_err_panic(rc_err, PX_RC, fab_err, B_FALSE); 718 719 done: 720 return (DDI_INTR_CLAIMED); 721 } 722 723 /* 724 * Proper csr_base is responsibility of the caller. (Called from px_lib_dev_init 725 * via px_err_reg_setup_all for pcie error registers; called from 726 * px_cb_add_intr for jbc/ubc from px_cb_attach.) 727 * 728 * Note: reg_id is passed in instead of reg_desc since this function is called 729 * from px_lib4u.c, which doesn't know about the structure of the table. 730 */ 731 void 732 px_err_reg_enable(px_err_id_t reg_id, caddr_t csr_base) 733 { 734 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id]; 735 uint64_t intr_mask = *reg_desc_p->intr_mask_p; 736 uint64_t log_mask = *reg_desc_p->log_mask_p; 737 738 /* Enable logs if it exists */ 739 if (reg_desc_p->log_addr != NULL) 740 CSR_XS(csr_base, reg_desc_p->log_addr, log_mask); 741 742 /* 743 * For readability you in code you set 1 to enable an interrupt. 744 * But in Fire it's backwards. You set 1 to *disable* an intr. 745 * Reverse the user tunable intr mask field. 746 * 747 * Disable All Errors 748 * Clear All Errors 749 * Enable Errors 750 */ 751 CSR_XS(csr_base, reg_desc_p->enable_addr, 0); 752 CSR_XS(csr_base, reg_desc_p->clear_addr, -1); 753 CSR_XS(csr_base, reg_desc_p->enable_addr, intr_mask); 754 DBG(DBG_ATTACH, NULL, "%s Mask: 0x%llx\n", reg_desc_p->msg, 755 CSR_XR(csr_base, reg_desc_p->enable_addr)); 756 DBG(DBG_ATTACH, NULL, "%s Status: 0x%llx\n", reg_desc_p->msg, 757 CSR_XR(csr_base, reg_desc_p->status_addr)); 758 DBG(DBG_ATTACH, NULL, "%s Clear: 0x%llx\n", reg_desc_p->msg, 759 CSR_XR(csr_base, reg_desc_p->clear_addr)); 760 if (reg_desc_p->log_addr != NULL) { 761 DBG(DBG_ATTACH, NULL, "%s Log: 0x%llx\n", reg_desc_p->msg, 762 CSR_XR(csr_base, reg_desc_p->log_addr)); 763 } 764 } 765 766 void 767 px_err_reg_disable(px_err_id_t reg_id, caddr_t csr_base) 768 { 769 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id]; 770 uint64_t val = (reg_id >= PX_ERR_LPU_LINK) ? -1 : 0; 771 772 if (reg_desc_p->log_addr != NULL) 773 CSR_XS(csr_base, reg_desc_p->log_addr, val); 774 CSR_XS(csr_base, reg_desc_p->enable_addr, val); 775 } 776 777 /* 778 * Set up pcie error registers. 779 */ 780 void 781 px_err_reg_setup_pcie(uint8_t chip_mask, caddr_t csr_base, boolean_t enable) 782 { 783 px_err_id_t reg_id; 784 const px_err_reg_desc_t *reg_desc_p; 785 void (*px_err_reg_func)(px_err_id_t, caddr_t); 786 787 /* 788 * JBC or XBC are enabled during adding of common block interrupts, 789 * not done here. 790 */ 791 px_err_reg_func = (enable ? px_err_reg_enable : px_err_reg_disable); 792 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) { 793 reg_desc_p = &px_err_reg_tbl[reg_id]; 794 if ((reg_desc_p->chip_mask & chip_mask) && 795 (reg_desc_p->reg_bank == PX_REG_CSR)) 796 px_err_reg_func(reg_id, csr_base); 797 } 798 } 799 800 /* 801 * px_err_cmn_intr: 802 * Common function called by trap, mondo and fabric intr. 803 * o Snap shot current fire registers 804 * o check for safe access 805 * o send ereport and clear snap shot registers 806 * o create and queue RC info for later use in fabric scan. 807 * o RUC/WUC, PTLP, MMU Errors(CA), UR 808 * o check severity of snap shot registers 809 * 810 * @param px_p leaf in which to check access 811 * @param derr fm err data structure to be updated 812 * @param caller PX_TRAP_CALL | PX_INTR_CALL 813 * @param block PX_FM_BLOCK_HOST | PX_FM_BLOCK_PCIE | PX_FM_BLOCK_ALL 814 * @return err PX_NO_PANIC | PX_PANIC | PX_HW_RESET | PX_PROTECTED 815 */ 816 int 817 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) 818 { 819 px_err_ss_t ss = {0}; 820 int err; 821 822 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 823 824 /* check for safe access */ 825 px_err_safeacc_check(px_p, derr); 826 827 /* snap shot the current fire registers */ 828 px_err_snapshot(px_p, &ss, block); 829 830 /* send ereports/handle/clear registers */ 831 err = px_err_erpt_and_clr(px_p, derr, &ss); 832 833 /* check for error severity */ 834 err = px_err_check_severity(px_p, derr, err, caller); 835 836 /* Mark the On Trap Handle if an error occured */ 837 if (err != PX_NO_ERROR) { 838 px_pec_t *pec_p = px_p->px_pec_p; 839 on_trap_data_t *otd = pec_p->pec_ontrap_data; 840 841 if ((otd != NULL) && (otd->ot_prot & OT_DATA_ACCESS)) 842 otd->ot_trap |= OT_DATA_ACCESS; 843 } 844 845 return (err); 846 } 847 848 /* 849 * Static function 850 */ 851 852 /* 853 * px_err_snapshot: 854 * Take a current snap shot of all the fire error registers. This includes 855 * JBC/UBC, DMC, and PEC depending on the block flag 856 * 857 * @param px_p leaf in which to take the snap shot. 858 * @param ss pre-allocated memory to store the snap shot. 859 * @param chk_cb boolean on whether to store jbc/ubc register. 860 */ 861 static void 862 px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, int block) 863 { 864 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 865 caddr_t xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC]; 866 caddr_t pec_csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR]; 867 caddr_t csr_base; 868 uint8_t chip_mask = 1 << PX_CHIP_TYPE(pxu_p); 869 const px_err_reg_desc_t *reg_desc_p = px_err_reg_tbl; 870 px_err_id_t reg_id; 871 872 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++, reg_desc_p++) { 873 if (!(reg_desc_p->chip_mask & chip_mask)) 874 continue; 875 876 if ((block & PX_FM_BLOCK_HOST) && 877 (reg_desc_p->reg_bank == PX_REG_XBC)) 878 csr_base = xbc_csr_base; 879 else if ((block & PX_FM_BLOCK_PCIE) && 880 (reg_desc_p->reg_bank == PX_REG_CSR)) 881 csr_base = pec_csr_base; 882 else { 883 ss_p->err_status[reg_id] = 0; 884 continue; 885 } 886 887 ss_p->err_status[reg_id] = CSR_XR(csr_base, 888 reg_desc_p->status_addr); 889 } 890 } 891 892 /* 893 * px_err_erpt_and_clr: 894 * This function does the following thing to all the fire registers based 895 * on an earlier snap shot. 896 * o Send ereport 897 * o Handle the error 898 * o Clear the error 899 * 900 * @param px_p leaf in which to take the snap shot. 901 * @param derr fm err in which the ereport is to be based on 902 * @param ss_p pre-allocated memory to store the snap shot. 903 */ 904 static int 905 px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss_p) 906 { 907 dev_info_t *rpdip = px_p->px_dip; 908 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 909 caddr_t csr_base; 910 const px_err_reg_desc_t *err_reg_tbl; 911 px_err_bit_desc_t *err_bit_tbl; 912 px_err_bit_desc_t *err_bit_desc; 913 914 uint64_t *count_mask; 915 uint64_t clear_addr; 916 uint64_t ss_reg; 917 918 int (*err_handler)(); 919 int (*erpt_handler)(); 920 int reg_id, key; 921 int err = PX_NO_ERROR; 922 int biterr = 0; 923 924 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 925 926 /* send erport/handle/clear JBC errors */ 927 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) { 928 /* Get the correct register description table */ 929 err_reg_tbl = &px_err_reg_tbl[reg_id]; 930 931 /* Only look at enabled groups. */ 932 if (!(BIT_TST(err_reg_tbl->chip_mask, PX_CHIP_TYPE(pxu_p)))) 933 continue; 934 935 /* Get the correct CSR BASE */ 936 csr_base = (caddr_t)pxu_p->px_address[err_reg_tbl->reg_bank]; 937 938 /* If there are no errors in this register, continue */ 939 ss_reg = ss_p->err_status[reg_id]; 940 if (!ss_reg) 941 continue; 942 943 /* Get pointers to masks and register addresses */ 944 count_mask = err_reg_tbl->count_mask_p; 945 clear_addr = err_reg_tbl->clear_addr; 946 947 /* Get the register BIT description table */ 948 err_bit_tbl = err_reg_tbl->err_bit_tbl; 949 950 /* For each known bit in the register send erpt and handle */ 951 for (key = 0; key < err_reg_tbl->err_bit_keys; key++) { 952 /* 953 * If the ss_reg is set for this bit, 954 * send ereport and handle 955 */ 956 err_bit_desc = &err_bit_tbl[key]; 957 if (!BIT_TST(ss_reg, err_bit_desc->bit)) 958 continue; 959 960 /* Increment the counter if necessary */ 961 if (BIT_TST(*count_mask, err_bit_desc->bit)) { 962 err_bit_desc->counter++; 963 } 964 965 /* Error Handle for this bit */ 966 err_handler = err_bit_desc->err_handler; 967 if (err_handler) { 968 biterr = err_handler(rpdip, csr_base, derr, 969 err_reg_tbl, err_bit_desc); 970 err |= biterr; 971 } 972 973 /* 974 * Send the ereport if it's an UNEXPECTED err. 975 * This is the only place where PX_EXPECTED is utilized. 976 */ 977 erpt_handler = err_bit_desc->erpt_handler; 978 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) || 979 (biterr == PX_EXPECTED)) 980 continue; 981 982 if (erpt_handler) 983 (void) erpt_handler(rpdip, csr_base, ss_reg, 984 derr, err_bit_desc->bit, 985 err_bit_desc->class_name); 986 } 987 988 /* Clear the register and error */ 989 CSR_XS(csr_base, clear_addr, ss_reg); 990 } 991 992 return (err); 993 } 994 995 /* 996 * px_err_check_severity: 997 * Check the severity of the fire error based on an earlier snapshot 998 * 999 * @param px_p leaf in which to take the snap shot. 1000 * @param derr fm err in which the ereport is to be based on 1001 * @param err fire register error status 1002 * @param caller PX_TRAP_CALL | PX_INTR_CALL | PX_LIB_CALL 1003 */ 1004 static int 1005 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, int err, int caller) 1006 { 1007 px_pec_t *pec_p = px_p->px_pec_p; 1008 boolean_t is_safeacc = B_FALSE; 1009 1010 /* 1011 * Nothing to do if called with no error. 1012 * The err could have already been set to PX_NO_PANIC, which means the 1013 * system doesn't need to panic, but PEEK/POKE still failed. 1014 */ 1015 if (err == PX_NO_ERROR) 1016 return (err); 1017 1018 /* Cautious access error handling */ 1019 switch (derr->fme_flag) { 1020 case DDI_FM_ERR_EXPECTED: 1021 if (caller == PX_TRAP_CALL) { 1022 /* 1023 * for ddi_caut_get treat all events as nonfatal 1024 * The trampoline will set err_ena = 0, 1025 * err_status = NONFATAL. 1026 */ 1027 derr->fme_status = DDI_FM_NONFATAL; 1028 is_safeacc = B_TRUE; 1029 } else { 1030 /* 1031 * For ddi_caut_put treat all events as nonfatal. Here 1032 * we have the handle and can call ndi_fm_acc_err_set(). 1033 */ 1034 derr->fme_status = DDI_FM_NONFATAL; 1035 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 1036 is_safeacc = B_TRUE; 1037 } 1038 break; 1039 case DDI_FM_ERR_PEEK: 1040 case DDI_FM_ERR_POKE: 1041 /* 1042 * For ddi_peek/poke treat all events as nonfatal. 1043 */ 1044 is_safeacc = B_TRUE; 1045 break; 1046 default: 1047 is_safeacc = B_FALSE; 1048 } 1049 1050 /* re-adjust error status from safe access, forgive all errors */ 1051 if (is_safeacc) 1052 return (PX_NO_PANIC); 1053 1054 return (err); 1055 } 1056 1057 /* predefined convenience functions */ 1058 /* ARGSUSED */ 1059 void 1060 px_err_log_handle(dev_info_t *rpdip, px_err_reg_desc_t *err_reg_descr, 1061 px_err_bit_desc_t *err_bit_descr, char *msg) 1062 { 1063 DBG(DBG_ERR_INTR, rpdip, 1064 "Bit %d, %s, at %s(0x%x) has occured %d times with a severity " 1065 "of \"%s\"\n", 1066 err_bit_descr->bit, err_bit_descr->class_name, 1067 err_reg_descr->msg, err_reg_descr->status_addr, 1068 err_bit_descr->counter, msg); 1069 } 1070 1071 /* ARGSUSED */ 1072 int 1073 px_err_hw_reset_handle(dev_info_t *rpdip, caddr_t csr_base, 1074 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1075 px_err_bit_desc_t *err_bit_descr) 1076 { 1077 if (px_log & PX_HW_RESET) { 1078 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1079 "HW RESET"); 1080 } 1081 1082 return (PX_HW_RESET); 1083 } 1084 1085 /* ARGSUSED */ 1086 int 1087 px_err_panic_handle(dev_info_t *rpdip, caddr_t csr_base, 1088 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1089 px_err_bit_desc_t *err_bit_descr) 1090 { 1091 if (px_log & PX_PANIC) { 1092 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, "PANIC"); 1093 } 1094 1095 return (PX_PANIC); 1096 } 1097 1098 /* ARGSUSED */ 1099 int 1100 px_err_protected_handle(dev_info_t *rpdip, caddr_t csr_base, 1101 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1102 px_err_bit_desc_t *err_bit_descr) 1103 { 1104 if (px_log & PX_PROTECTED) { 1105 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1106 "PROTECTED"); 1107 } 1108 1109 return (PX_PROTECTED); 1110 } 1111 1112 /* ARGSUSED */ 1113 int 1114 px_err_no_panic_handle(dev_info_t *rpdip, caddr_t csr_base, 1115 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1116 px_err_bit_desc_t *err_bit_descr) 1117 { 1118 if (px_log & PX_NO_PANIC) { 1119 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1120 "NO PANIC"); 1121 } 1122 1123 return (PX_NO_PANIC); 1124 } 1125 1126 /* ARGSUSED */ 1127 int 1128 px_err_no_error_handle(dev_info_t *rpdip, caddr_t csr_base, 1129 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1130 px_err_bit_desc_t *err_bit_descr) 1131 { 1132 if (px_log & PX_NO_ERROR) { 1133 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1134 "NO ERROR"); 1135 } 1136 1137 return (PX_NO_ERROR); 1138 } 1139 1140 /* ARGSUSED */ 1141 PX_ERPT_SEND_DEC(do_not) 1142 { 1143 return (PX_NO_ERROR); 1144 } 1145 1146 /* 1147 * Search the px_cb_list_t embedded in the px_cb_t for the 1148 * px_t of the specified Leaf (leaf_id). Return its associated dip. 1149 */ 1150 static dev_info_t * 1151 px_err_search_cb(px_cb_t *px_cb_p, uint_t leaf_id) 1152 { 1153 int i; 1154 px_cb_list_t *pxl_elemp; 1155 1156 for (i = px_cb_p->attachcnt, pxl_elemp = px_cb_p->pxl; i > 0; 1157 i--, pxl_elemp = pxl_elemp->next) { 1158 if ((((pxu_t *)pxl_elemp->pxp->px_plat_p)->portid & 1159 OBERON_PORT_ID_LEAF_MASK) == leaf_id) { 1160 return (pxl_elemp->pxp->px_dip); 1161 } 1162 } 1163 return (NULL); 1164 } 1165 1166 /* UBC FATAL - see io erpt doc, section 1.1 */ 1167 /* ARGSUSED */ 1168 PX_ERPT_SEND_DEC(ubc_fatal) 1169 { 1170 char buf[FM_MAX_CLASS]; 1171 uint64_t memory_ue_log, marked; 1172 char unum[FM_MAX_CLASS]; 1173 int unum_length; 1174 uint64_t device_id = 0; 1175 uint8_t cpu_version = 0; 1176 nvlist_t *resource = NULL; 1177 uint64_t ubc_intr_status; 1178 px_t *px_p; 1179 px_cb_t *px_cb_p; 1180 dev_info_t *actual_dip; 1181 1182 unum[0] = '\0'; 1183 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1184 1185 memory_ue_log = CSR_XR(csr_base, UBC_MEMORY_UE_LOG); 1186 marked = (memory_ue_log >> UBC_MEMORY_UE_LOG_MARKED) & 1187 UBC_MEMORY_UE_LOG_MARKED_MASK; 1188 1189 if ((strstr(class_name, "ubc.piowtue") != NULL) || 1190 (strstr(class_name, "ubc.piowbeue") != NULL) || 1191 (strstr(class_name, "ubc.piorbeue") != NULL) || 1192 (strstr(class_name, "ubc.dmarduea") != NULL) || 1193 (strstr(class_name, "ubc.dmardueb") != NULL)) { 1194 int eid = (memory_ue_log >> UBC_MEMORY_UE_LOG_EID) & 1195 UBC_MEMORY_UE_LOG_EID_MASK; 1196 (void) strncat(buf, ubc_class_eid_qualifier[eid], 1197 FM_MAX_CLASS); 1198 1199 if (eid == UBC_EID_MEM) { 1200 uint64_t phys_addr = memory_ue_log & 1201 MMU_OBERON_PADDR_MASK; 1202 uint64_t offset = (uint64_t)-1; 1203 1204 resource = fm_nvlist_create(NULL); 1205 if (&plat_get_mem_unum) { 1206 if ((plat_get_mem_unum(0, 1207 phys_addr, 0, B_TRUE, 0, unum, 1208 FM_MAX_CLASS, &unum_length)) != 0) 1209 unum[0] = '\0'; 1210 } 1211 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 1212 NULL, unum, NULL, offset); 1213 1214 } else if (eid == UBC_EID_CPU) { 1215 int cpuid = (marked & UBC_MARKED_MAX_CPUID_MASK); 1216 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1217 1218 resource = fm_nvlist_create(NULL); 1219 cpu_version = cpunodes[cpuid].version; 1220 device_id = cpunodes[cpuid].device_id; 1221 (void) snprintf(sbuf, sizeof (sbuf), "%lX", 1222 device_id); 1223 (void) fm_fmri_cpu_set(resource, 1224 FM_CPU_SCHEME_VERSION, NULL, cpuid, 1225 &cpu_version, sbuf); 1226 } 1227 } 1228 1229 /* 1230 * For most of the errors represented in the UBC Interrupt Status 1231 * register, one can compute the dip of the actual Leaf that was 1232 * involved in the error. To do this, find the px_cb_t structure 1233 * that is shared between a pair of Leaves (eg, LeafA and LeafB). 1234 * 1235 * If any of the error bits for LeafA are set in the hardware 1236 * register, search the list of px_t's rooted in the px_cb_t for 1237 * the one corresponding to LeafA. If error bits for LeafB are set, 1238 * search the list for LeafB's px_t. The px_t references its 1239 * associated dip. 1240 */ 1241 px_p = DIP_TO_STATE(rpdip); 1242 px_cb_p = ((pxu_t *)px_p->px_plat_p)->px_cb_p; 1243 1244 /* read hardware register */ 1245 ubc_intr_status = CSR_XR(csr_base, UBC_INTERRUPT_STATUS); 1246 1247 if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFA) != 0) { 1248 /* then Leaf A is involved in the error */ 1249 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_A); 1250 ASSERT(actual_dip != NULL); 1251 rpdip = actual_dip; 1252 } else if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFB) != 0) { 1253 /* then Leaf B is involved in the error */ 1254 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_B); 1255 ASSERT(actual_dip != NULL); 1256 rpdip = actual_dip; 1257 } /* else error cannot be associated with a Leaf */ 1258 1259 if (resource) { 1260 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1261 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1262 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE, 1263 OBERON_UBC_ELE, DATA_TYPE_UINT64, 1264 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE), 1265 OBERON_UBC_IE, DATA_TYPE_UINT64, 1266 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE), 1267 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status, 1268 OBERON_UBC_ESS, DATA_TYPE_UINT64, 1269 CSR_XR(csr_base, UBC_ERROR_STATUS_SET), 1270 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log, 1271 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum, 1272 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id, 1273 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version, 1274 OBERON_UBC_RESOURCE, DATA_TYPE_NVLIST, resource, 1275 NULL); 1276 fm_nvlist_destroy(resource, FM_NVA_FREE); 1277 } else { 1278 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1279 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1280 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE, 1281 OBERON_UBC_ELE, DATA_TYPE_UINT64, 1282 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE), 1283 OBERON_UBC_IE, DATA_TYPE_UINT64, 1284 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE), 1285 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status, 1286 OBERON_UBC_ESS, DATA_TYPE_UINT64, 1287 CSR_XR(csr_base, UBC_ERROR_STATUS_SET), 1288 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log, 1289 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum, 1290 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id, 1291 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version, 1292 NULL); 1293 } 1294 1295 return (PX_NO_PANIC); 1296 } 1297 1298 /* JBC FATAL */ 1299 PX_ERPT_SEND_DEC(jbc_fatal) 1300 { 1301 char buf[FM_MAX_CLASS]; 1302 boolean_t pri = PX_ERR_IS_PRI(bit); 1303 1304 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1305 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1306 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1307 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1308 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1309 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1310 FIRE_JBC_IE, DATA_TYPE_UINT64, 1311 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1312 FIRE_JBC_IS, DATA_TYPE_UINT64, 1313 ss_reg, 1314 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1315 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1316 FIRE_JBC_FEL1, DATA_TYPE_UINT64, 1317 CSR_XR(csr_base, FATAL_ERROR_LOG_1), 1318 FIRE_JBC_FEL2, DATA_TYPE_UINT64, 1319 CSR_XR(csr_base, FATAL_ERROR_LOG_2), 1320 NULL); 1321 1322 return (PX_NO_PANIC); 1323 } 1324 1325 /* JBC MERGE */ 1326 PX_ERPT_SEND_DEC(jbc_merge) 1327 { 1328 char buf[FM_MAX_CLASS]; 1329 boolean_t pri = PX_ERR_IS_PRI(bit); 1330 1331 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1332 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1333 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1334 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1335 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1336 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1337 FIRE_JBC_IE, DATA_TYPE_UINT64, 1338 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1339 FIRE_JBC_IS, DATA_TYPE_UINT64, 1340 ss_reg, 1341 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1342 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1343 FIRE_JBC_MTEL, DATA_TYPE_UINT64, 1344 CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG), 1345 NULL); 1346 1347 return (PX_NO_PANIC); 1348 } 1349 1350 /* 1351 * JBC Merge buffer retryable errors: 1352 * Merge buffer parity error (rd_buf): PIO or DMA 1353 * Merge buffer parity error (wr_buf): PIO or DMA 1354 */ 1355 /* ARGSUSED */ 1356 int 1357 px_err_jbc_merge_handle(dev_info_t *rpdip, caddr_t csr_base, 1358 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1359 px_err_bit_desc_t *err_bit_descr) 1360 { 1361 /* 1362 * Holder function to attempt error recovery. When the features 1363 * are in place, look up the address of the transaction in: 1364 * 1365 * paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG); 1366 * paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1367 * 1368 * If the error is a secondary error, there is no log information 1369 * just panic as it is unknown which address has been affected. 1370 * 1371 * Remember the address is pretranslation and might be hard to look 1372 * up the appropriate driver based on the PA. 1373 */ 1374 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1375 err_bit_descr)); 1376 } 1377 1378 /* JBC Jbusint IN */ 1379 PX_ERPT_SEND_DEC(jbc_in) 1380 { 1381 char buf[FM_MAX_CLASS]; 1382 boolean_t pri = PX_ERR_IS_PRI(bit); 1383 1384 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1385 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1386 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1387 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1388 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1389 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1390 FIRE_JBC_IE, DATA_TYPE_UINT64, 1391 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1392 FIRE_JBC_IS, DATA_TYPE_UINT64, 1393 ss_reg, 1394 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1395 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1396 FIRE_JBC_JITEL1, DATA_TYPE_UINT64, 1397 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG), 1398 FIRE_JBC_JITEL2, DATA_TYPE_UINT64, 1399 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG_2), 1400 NULL); 1401 1402 return (PX_NO_PANIC); 1403 } 1404 1405 /* 1406 * JBC Jbusint IN retryable errors 1407 * Log Reg[42:0]. 1408 * Write Data Parity Error: PIO Writes 1409 * Read Data Parity Error: DMA Reads 1410 */ 1411 int 1412 px_err_jbc_jbusint_in_handle(dev_info_t *rpdip, caddr_t csr_base, 1413 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1414 px_err_bit_desc_t *err_bit_descr) 1415 { 1416 /* 1417 * Holder function to attempt error recovery. When the features 1418 * are in place, look up the address of the transaction in: 1419 * 1420 * paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG); 1421 * paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1422 * 1423 * If the error is a secondary error, there is no log information 1424 * just panic as it is unknown which address has been affected. 1425 * 1426 * Remember the address is pretranslation and might be hard to look 1427 * up the appropriate driver based on the PA. 1428 */ 1429 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1430 err_bit_descr)); 1431 } 1432 1433 1434 /* JBC Jbusint Out */ 1435 PX_ERPT_SEND_DEC(jbc_out) 1436 { 1437 char buf[FM_MAX_CLASS]; 1438 boolean_t pri = PX_ERR_IS_PRI(bit); 1439 1440 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1441 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1442 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1443 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1444 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1445 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1446 FIRE_JBC_IE, DATA_TYPE_UINT64, 1447 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1448 FIRE_JBC_IS, DATA_TYPE_UINT64, 1449 ss_reg, 1450 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1451 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1452 FIRE_JBC_JOTEL1, DATA_TYPE_UINT64, 1453 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG), 1454 FIRE_JBC_JOTEL2, DATA_TYPE_UINT64, 1455 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG_2), 1456 NULL); 1457 1458 return (PX_NO_PANIC); 1459 } 1460 1461 /* JBC Dmcint ODCD */ 1462 PX_ERPT_SEND_DEC(jbc_odcd) 1463 { 1464 char buf[FM_MAX_CLASS]; 1465 boolean_t pri = PX_ERR_IS_PRI(bit); 1466 1467 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1468 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1469 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1470 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1471 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1472 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1473 FIRE_JBC_IE, DATA_TYPE_UINT64, 1474 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1475 FIRE_JBC_IS, DATA_TYPE_UINT64, 1476 ss_reg, 1477 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1478 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1479 FIRE_JBC_DMC_ODCD, DATA_TYPE_UINT64, 1480 CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG), 1481 NULL); 1482 1483 return (PX_NO_PANIC); 1484 } 1485 1486 /* 1487 * JBC Dmcint ODCO nonfatal errer handling - 1488 * PIO data parity error: PIO 1489 */ 1490 /* ARGSUSED */ 1491 int 1492 px_err_jbc_dmcint_odcd_handle(dev_info_t *rpdip, caddr_t csr_base, 1493 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1494 px_err_bit_desc_t *err_bit_descr) 1495 { 1496 /* 1497 * Holder function to attempt error recovery. When the features 1498 * are in place, look up the address of the transaction in: 1499 * 1500 * paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG); 1501 * paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK; 1502 * 1503 * If the error is a secondary error, there is no log information 1504 * just panic as it is unknown which address has been affected. 1505 * 1506 * Remember the address is pretranslation and might be hard to look 1507 * up the appropriate driver based on the PA. 1508 */ 1509 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1510 err_bit_descr)); 1511 } 1512 1513 /* Does address in DMCINT error log register match address of pcitool access? */ 1514 static boolean_t 1515 px_jbc_pcitool_addr_match(dev_info_t *rpdip, caddr_t csr_base) 1516 { 1517 px_t *px_p = DIP_TO_STATE(rpdip); 1518 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1519 caddr_t pcitool_addr = pxu_p->pcitool_addr; 1520 caddr_t errlog_addr = 1521 (caddr_t)CSR_FR(csr_base, DMCINT_ODCD_ERROR_LOG, ADDRESS); 1522 1523 return (pcitool_addr == errlog_addr); 1524 } 1525 1526 /* 1527 * JBC Dmcint ODCD errer handling for errors which are forgivable during a safe 1528 * access. (This will be most likely be a PCItool access.) If not a safe 1529 * access context, treat like jbc_dmcint_odcd. 1530 * Unmapped PIO read error: pio:read:M:nonfatal 1531 * Unmapped PIO write error: pio:write:M:nonfatal 1532 * Invalid PIO write to PCIe cfg/io, csr, ebus or i2c bus: pio:write:nonfatal 1533 * Invalid PIO read to PCIe cfg/io, csr, ebus or i2c bus: pio:read:nonfatal 1534 */ 1535 /* ARGSUSED */ 1536 int 1537 px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base, 1538 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1539 px_err_bit_desc_t *err_bit_descr) 1540 { 1541 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1542 1543 if (!pri) 1544 return (px_err_panic_handle(rpdip, csr_base, derr, 1545 err_reg_descr, err_bit_descr)); 1546 /* 1547 * Got an error which is forgivable during a PCItool access. 1548 * 1549 * Don't do handler check since the error may otherwise be unfairly 1550 * attributed to a device. Just return. 1551 * 1552 * Note: There is a hole here in that a legitimate error can come in 1553 * while a PCItool access is in play and be forgiven. This is possible 1554 * though not likely. 1555 */ 1556 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) && 1557 (px_jbc_pcitool_addr_match(rpdip, csr_base))) 1558 return (px_err_protected_handle(rpdip, csr_base, derr, 1559 err_reg_descr, err_bit_descr)); 1560 1561 return (px_err_jbc_dmcint_odcd_handle(rpdip, csr_base, derr, 1562 err_reg_descr, err_bit_descr)); 1563 } 1564 1565 /* JBC Dmcint IDC */ 1566 PX_ERPT_SEND_DEC(jbc_idc) 1567 { 1568 char buf[FM_MAX_CLASS]; 1569 boolean_t pri = PX_ERR_IS_PRI(bit); 1570 1571 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1572 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1573 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1574 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1575 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1576 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1577 FIRE_JBC_IE, DATA_TYPE_UINT64, 1578 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1579 FIRE_JBC_IS, DATA_TYPE_UINT64, 1580 ss_reg, 1581 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1582 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1583 FIRE_JBC_DMC_IDC, DATA_TYPE_UINT64, 1584 CSR_XR(csr_base, DMCINT_IDC_ERROR_LOG), 1585 NULL); 1586 1587 return (PX_NO_PANIC); 1588 } 1589 1590 /* JBC CSR */ 1591 PX_ERPT_SEND_DEC(jbc_csr) 1592 { 1593 char buf[FM_MAX_CLASS]; 1594 boolean_t pri = PX_ERR_IS_PRI(bit); 1595 1596 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1597 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1598 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1599 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1600 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1601 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1602 FIRE_JBC_IE, DATA_TYPE_UINT64, 1603 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1604 FIRE_JBC_IS, DATA_TYPE_UINT64, 1605 ss_reg, 1606 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1607 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1608 "jbc-error-reg", DATA_TYPE_UINT64, 1609 CSR_XR(csr_base, CSR_ERROR_LOG), 1610 NULL); 1611 1612 return (PX_NO_PANIC); 1613 } 1614 1615 /* DMC IMU RDS */ 1616 PX_ERPT_SEND_DEC(imu_rds) 1617 { 1618 char buf[FM_MAX_CLASS]; 1619 boolean_t pri = PX_ERR_IS_PRI(bit); 1620 1621 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1622 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1623 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1624 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1625 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1626 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1627 FIRE_IMU_IE, DATA_TYPE_UINT64, 1628 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1629 FIRE_IMU_IS, DATA_TYPE_UINT64, 1630 ss_reg, 1631 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1632 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1633 FIRE_IMU_RDS, DATA_TYPE_UINT64, 1634 CSR_XR(csr_base, IMU_RDS_ERROR_LOG), 1635 NULL); 1636 1637 return (PX_NO_PANIC); 1638 } 1639 1640 /* handle EQ overflow */ 1641 /* ARGSUSED */ 1642 int 1643 px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base, 1644 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1645 px_err_bit_desc_t *err_bit_descr) 1646 { 1647 px_t *px_p = DIP_TO_STATE(rpdip); 1648 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1649 int err = px_err_check_eq(rpdip); 1650 1651 if ((err == PX_PANIC) && (pxu_p->cpr_flag == PX_NOT_CPR)) { 1652 return (px_err_panic_handle(rpdip, csr_base, derr, 1653 err_reg_descr, err_bit_descr)); 1654 } else { 1655 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1656 err_reg_descr, err_bit_descr)); 1657 } 1658 } 1659 1660 /* DMC IMU SCS */ 1661 PX_ERPT_SEND_DEC(imu_scs) 1662 { 1663 char buf[FM_MAX_CLASS]; 1664 boolean_t pri = PX_ERR_IS_PRI(bit); 1665 1666 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1667 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1668 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1669 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1670 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1671 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1672 FIRE_IMU_IE, DATA_TYPE_UINT64, 1673 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1674 FIRE_IMU_IS, DATA_TYPE_UINT64, 1675 ss_reg, 1676 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1677 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1678 FIRE_IMU_SCS, DATA_TYPE_UINT64, 1679 CSR_XR(csr_base, IMU_SCS_ERROR_LOG), 1680 NULL); 1681 1682 return (PX_NO_PANIC); 1683 } 1684 1685 /* DMC IMU */ 1686 PX_ERPT_SEND_DEC(imu) 1687 { 1688 char buf[FM_MAX_CLASS]; 1689 boolean_t pri = PX_ERR_IS_PRI(bit); 1690 1691 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1692 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1693 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1694 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1695 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1696 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1697 FIRE_IMU_IE, DATA_TYPE_UINT64, 1698 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1699 FIRE_IMU_IS, DATA_TYPE_UINT64, 1700 ss_reg, 1701 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1702 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1703 NULL); 1704 1705 return (PX_NO_PANIC); 1706 } 1707 1708 /* DMC MMU TFAR/TFSR */ 1709 PX_ERPT_SEND_DEC(mmu_tfar_tfsr) 1710 { 1711 char buf[FM_MAX_CLASS]; 1712 boolean_t pri = PX_ERR_IS_PRI(bit); 1713 px_t *px_p = DIP_TO_STATE(rpdip); 1714 pcie_req_id_t fault_bdf = PCIE_INVALID_BDF; 1715 uint16_t s_status = 0; 1716 1717 if (pri) { 1718 fault_bdf = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS) 1719 & (MMU_TRANSLATION_FAULT_STATUS_ID_MASK << 1720 MMU_TRANSLATION_FAULT_STATUS_ID); 1721 s_status = PCI_STAT_S_TARG_AB; 1722 1723 /* Only PIO Fault Addresses are valid, this is DMA */ 1724 (void) px_rp_en_q(px_p, fault_bdf, NULL, s_status); 1725 } 1726 1727 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1728 1729 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1730 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1731 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1732 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1733 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1734 FIRE_MMU_IE, DATA_TYPE_UINT64, 1735 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1736 FIRE_MMU_IS, DATA_TYPE_UINT64, 1737 ss_reg, 1738 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1739 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1740 FIRE_MMU_TFAR, DATA_TYPE_UINT64, 1741 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS), 1742 FIRE_MMU_TFSR, DATA_TYPE_UINT64, 1743 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS), 1744 NULL); 1745 1746 return (PX_NO_PANIC); 1747 } 1748 1749 /* DMC MMU */ 1750 PX_ERPT_SEND_DEC(mmu) 1751 { 1752 char buf[FM_MAX_CLASS]; 1753 boolean_t pri = PX_ERR_IS_PRI(bit); 1754 1755 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1756 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1757 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1758 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1759 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1760 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1761 FIRE_MMU_IE, DATA_TYPE_UINT64, 1762 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1763 FIRE_MMU_IS, DATA_TYPE_UINT64, 1764 ss_reg, 1765 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1766 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1767 NULL); 1768 1769 return (PX_NO_PANIC); 1770 } 1771 1772 /* 1773 * IMU function to handle all Received but Not Enabled errors. 1774 * 1775 * These errors are due to transactions modes in which the PX driver was not 1776 * setup to be able to do. If possible, inform the driver that their DMA has 1777 * failed by marking their DMA handle as failed, but do not panic the system. 1778 * Most likely the address is not valid, as Fire wasn't setup to handle them in 1779 * the first place. 1780 * 1781 * These errors are not retryable, unless the PX mode has changed, otherwise the 1782 * same error will occur again. 1783 */ 1784 int 1785 px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, 1786 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1787 px_err_bit_desc_t *err_bit_descr) 1788 { 1789 pcie_req_id_t bdf; 1790 1791 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1792 goto done; 1793 1794 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1795 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, NULL, 1796 bdf); 1797 1798 done: 1799 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1800 err_bit_descr)); 1801 } 1802 1803 /* 1804 * IMU function to handle all invalid address errors. 1805 * 1806 * These errors are due to transactions in which the address is not recognized. 1807 * If possible, inform the driver that all DMAs have failed by marking their DMA 1808 * handles. Fire should not panic the system, it'll be up to the driver to 1809 * panic. The address logged is invalid. 1810 * 1811 * These errors are not retryable since retrying the same transaction with the 1812 * same invalid address will result in the same error. 1813 */ 1814 /* ARGSUSED */ 1815 int 1816 px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, 1817 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1818 px_err_bit_desc_t *err_bit_descr) 1819 { 1820 pcie_req_id_t bdf; 1821 1822 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1823 goto done; 1824 1825 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1826 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, NULL, 1827 bdf); 1828 1829 done: 1830 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1831 err_bit_descr)); 1832 } 1833 1834 /* 1835 * IMU function to handle normal transactions that encounter a parity error. 1836 * 1837 * These errors are due to transactions that enouter a parity error. If 1838 * possible, inform the driver that their DMA have failed and that they should 1839 * retry. If Fire is unable to contact the leaf driver, panic the system. 1840 * Otherwise, it'll be up to the device to determine is this is a panicable 1841 * error. 1842 */ 1843 /* ARGSUSED */ 1844 int 1845 px_err_mmu_parity_handle(dev_info_t *rpdip, caddr_t csr_base, 1846 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1847 px_err_bit_desc_t *err_bit_descr) 1848 { 1849 uint64_t mmu_tfa; 1850 pcie_req_id_t bdf; 1851 int status = PF_HDL_NOTFOUND; 1852 1853 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1854 goto done; 1855 1856 mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); 1857 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1858 status = pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, 1859 (uint32_t)mmu_tfa, bdf); 1860 1861 done: 1862 if (status == PF_HDL_NOTFOUND) 1863 return (px_err_panic_handle(rpdip, csr_base, derr, 1864 err_reg_descr, err_bit_descr)); 1865 else 1866 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1867 err_reg_descr, err_bit_descr)); 1868 } 1869 1870 /* 1871 * wuc/ruc event - Mark the handle of the failed PIO access. Return "no_panic" 1872 */ 1873 /* ARGSUSED */ 1874 int 1875 px_err_wuc_ruc_handle(dev_info_t *rpdip, caddr_t csr_base, 1876 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1877 px_err_bit_desc_t *err_bit_descr) 1878 { 1879 px_t *px_p = DIP_TO_STATE(rpdip); 1880 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1881 uint64_t data; 1882 pf_pcie_adv_err_regs_t adv_reg; 1883 int sts; 1884 1885 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1886 goto done; 1887 1888 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); 1889 adv_reg.pcie_ue_hdr[0] = (uint32_t)(data >> 32); 1890 adv_reg.pcie_ue_hdr[1] = (uint32_t)(data & 0xFFFFFFFF); 1891 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); 1892 adv_reg.pcie_ue_hdr[2] = (uint32_t)(data >> 32); 1893 adv_reg.pcie_ue_hdr[3] = (uint32_t)(data & 0xFFFFFFFF); 1894 1895 pf_tlp_decode(PCIE_DIP2BUS(rpdip), &adv_reg); 1896 sts = pf_hdl_lookup(rpdip, derr->fme_ena, adv_reg.pcie_ue_tgt_trans, 1897 adv_reg.pcie_ue_tgt_addr, adv_reg.pcie_ue_tgt_bdf); 1898 done: 1899 if ((sts == PF_HDL_NOTFOUND) && (pxu_p->cpr_flag == PX_NOT_CPR)) 1900 return (px_err_protected_handle(rpdip, csr_base, derr, 1901 err_reg_descr, err_bit_descr)); 1902 1903 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1904 err_reg_descr, err_bit_descr)); 1905 } 1906 1907 /* 1908 * TLU LUP event - if caused by power management activity, then it is expected. 1909 * In all other cases, it is an error. 1910 */ 1911 /* ARGSUSED */ 1912 int 1913 px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base, 1914 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1915 px_err_bit_desc_t *err_bit_descr) 1916 { 1917 px_t *px_p = DIP_TO_STATE(rpdip); 1918 1919 /* 1920 * power management code is currently the only segment that sets 1921 * px_lup_pending to indicate its expectation for a healthy LUP 1922 * event. For all other occasions, LUP event should be flaged as 1923 * error condition. 1924 */ 1925 return ((atomic_cas_32(&px_p->px_lup_pending, 1, 0) == 0) ? 1926 PX_NO_PANIC : PX_EXPECTED); 1927 } 1928 1929 /* 1930 * TLU LDN event - if caused by power management activity, then it is expected. 1931 * In all other cases, it is an error. 1932 */ 1933 /* ARGSUSED */ 1934 int 1935 px_err_tlu_ldn_handle(dev_info_t *rpdip, caddr_t csr_base, 1936 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1937 px_err_bit_desc_t *err_bit_descr) 1938 { 1939 px_t *px_p = DIP_TO_STATE(rpdip); 1940 return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_EXPECTED : 1941 PX_NO_PANIC); 1942 } 1943 1944 /* PEC ILU none - see io erpt doc, section 3.1 */ 1945 PX_ERPT_SEND_DEC(pec_ilu) 1946 { 1947 char buf[FM_MAX_CLASS]; 1948 boolean_t pri = PX_ERR_IS_PRI(bit); 1949 1950 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1951 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1952 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1953 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1954 FIRE_ILU_ELE, DATA_TYPE_UINT64, 1955 CSR_XR(csr_base, ILU_ERROR_LOG_ENABLE), 1956 FIRE_ILU_IE, DATA_TYPE_UINT64, 1957 CSR_XR(csr_base, ILU_INTERRUPT_ENABLE), 1958 FIRE_ILU_IS, DATA_TYPE_UINT64, 1959 ss_reg, 1960 FIRE_ILU_ESS, DATA_TYPE_UINT64, 1961 CSR_XR(csr_base, ILU_ERROR_STATUS_SET), 1962 NULL); 1963 1964 return (PX_NO_PANIC); 1965 } 1966 1967 /* PCIEX UE Errors */ 1968 /* ARGSUSED */ 1969 int 1970 px_err_pciex_ue_handle(dev_info_t *rpdip, caddr_t csr_base, 1971 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1972 px_err_bit_desc_t *err_bit_descr) 1973 { 1974 px_err_pcie_t regs = {0}; 1975 uint32_t err_bit; 1976 int err; 1977 uint64_t log; 1978 1979 if (err_bit_descr->bit < 32) { 1980 err_bit = (uint32_t)BITMASK(err_bit_descr->bit); 1981 regs.ue_reg = err_bit; 1982 regs.primary_ue = err_bit; 1983 1984 /* 1985 * Log the Received Log for PTLP, UR and UC. 1986 */ 1987 if ((PCIE_AER_UCE_PTLP | PCIE_AER_UCE_UR | PCIE_AER_UCE_UC) & 1988 err_bit) { 1989 log = CSR_XR(csr_base, 1990 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG); 1991 regs.rx_hdr1 = (uint32_t)(log >> 32); 1992 regs.rx_hdr2 = (uint32_t)(log & 0xFFFFFFFF); 1993 1994 log = CSR_XR(csr_base, 1995 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG); 1996 regs.rx_hdr3 = (uint32_t)(log >> 32); 1997 regs.rx_hdr4 = (uint32_t)(log & 0xFFFFFFFF); 1998 } 1999 } else { 2000 regs.ue_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); 2001 } 2002 2003 err = px_err_check_pcie(rpdip, derr, ®s); 2004 2005 if (err & PX_PANIC) { 2006 return (px_err_panic_handle(rpdip, csr_base, derr, 2007 err_reg_descr, err_bit_descr)); 2008 } else { 2009 return (px_err_no_panic_handle(rpdip, csr_base, derr, 2010 err_reg_descr, err_bit_descr)); 2011 } 2012 } 2013 2014 /* PCI-E Uncorrectable Errors */ 2015 PX_ERPT_SEND_DEC(pciex_rx_ue) 2016 { 2017 char buf[FM_MAX_CLASS]; 2018 boolean_t pri = PX_ERR_IS_PRI(bit); 2019 2020 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2021 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2022 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2023 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2024 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2025 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2026 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2027 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2028 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2029 ss_reg, 2030 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2031 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2032 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2033 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 2034 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2035 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 2036 NULL); 2037 2038 return (PX_NO_PANIC); 2039 } 2040 2041 /* PCI-E Uncorrectable Errors */ 2042 PX_ERPT_SEND_DEC(pciex_tx_ue) 2043 { 2044 char buf[FM_MAX_CLASS]; 2045 boolean_t pri = PX_ERR_IS_PRI(bit); 2046 2047 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2048 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2049 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2050 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2051 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2052 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2053 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2054 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2055 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2056 ss_reg, 2057 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2058 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2059 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 2060 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 2061 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 2062 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 2063 NULL); 2064 2065 return (PX_NO_PANIC); 2066 } 2067 2068 /* PCI-E Uncorrectable Errors */ 2069 PX_ERPT_SEND_DEC(pciex_rx_tx_ue) 2070 { 2071 char buf[FM_MAX_CLASS]; 2072 boolean_t pri = PX_ERR_IS_PRI(bit); 2073 2074 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2075 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2076 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2077 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2078 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2079 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2080 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2081 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2082 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2083 ss_reg, 2084 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2085 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2086 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2087 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 2088 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2089 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 2090 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 2091 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 2092 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 2093 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 2094 NULL); 2095 2096 return (PX_NO_PANIC); 2097 } 2098 2099 /* PCI-E Uncorrectable Errors */ 2100 PX_ERPT_SEND_DEC(pciex_ue) 2101 { 2102 char buf[FM_MAX_CLASS]; 2103 boolean_t pri = PX_ERR_IS_PRI(bit); 2104 2105 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2106 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2107 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2108 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2109 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2110 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2111 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2112 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2113 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2114 ss_reg, 2115 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2116 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2117 NULL); 2118 2119 return (PX_NO_PANIC); 2120 } 2121 2122 /* PCIEX UE Errors */ 2123 /* ARGSUSED */ 2124 int 2125 px_err_pciex_ce_handle(dev_info_t *rpdip, caddr_t csr_base, 2126 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 2127 px_err_bit_desc_t *err_bit_descr) 2128 { 2129 px_err_pcie_t regs = {0}; 2130 int err; 2131 2132 if (err_bit_descr->bit < 32) 2133 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit); 2134 else 2135 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); 2136 2137 err = px_err_check_pcie(rpdip, derr, ®s); 2138 2139 if (err & PX_PANIC) { 2140 return (px_err_panic_handle(rpdip, csr_base, derr, 2141 err_reg_descr, err_bit_descr)); 2142 } else { 2143 return (px_err_no_panic_handle(rpdip, csr_base, derr, 2144 err_reg_descr, err_bit_descr)); 2145 } 2146 } 2147 2148 /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ 2149 PX_ERPT_SEND_DEC(pciex_ce) 2150 { 2151 char buf[FM_MAX_CLASS]; 2152 boolean_t pri = PX_ERR_IS_PRI(bit); 2153 2154 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2155 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2156 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2157 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2158 FIRE_TLU_CELE, DATA_TYPE_UINT64, 2159 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_LOG_ENABLE), 2160 FIRE_TLU_CIE, DATA_TYPE_UINT64, 2161 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_INTERRUPT_ENABLE), 2162 FIRE_TLU_CIS, DATA_TYPE_UINT64, 2163 ss_reg, 2164 FIRE_TLU_CESS, DATA_TYPE_UINT64, 2165 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_STATUS_SET), 2166 NULL); 2167 2168 return (PX_NO_PANIC); 2169 } 2170 2171 /* TLU Other Event Status (receive only) - see io erpt doc, section 3.7 */ 2172 PX_ERPT_SEND_DEC(pciex_rx_oe) 2173 { 2174 char buf[FM_MAX_CLASS]; 2175 boolean_t pri = PX_ERR_IS_PRI(bit); 2176 2177 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2178 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2179 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2180 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2181 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2182 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2183 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2184 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2185 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2186 ss_reg, 2187 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2188 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2189 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2190 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG), 2191 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2192 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), 2193 NULL); 2194 2195 return (PX_NO_PANIC); 2196 } 2197 2198 /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ 2199 PX_ERPT_SEND_DEC(pciex_rx_tx_oe) 2200 { 2201 char buf[FM_MAX_CLASS]; 2202 boolean_t pri = PX_ERR_IS_PRI(bit); 2203 px_t *px_p = DIP_TO_STATE(rpdip); 2204 uint64_t rx_h1, rx_h2, tx_h1, tx_h2; 2205 uint16_t s_status; 2206 int sts; 2207 pcie_cpl_t *cpl; 2208 pf_pcie_adv_err_regs_t adv_reg; 2209 2210 rx_h1 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG); 2211 rx_h2 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG); 2212 tx_h1 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); 2213 tx_h2 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); 2214 2215 if ((bit == TLU_OTHER_EVENT_STATUS_SET_RUC_P) || 2216 (bit == TLU_OTHER_EVENT_STATUS_SET_WUC_P)) { 2217 adv_reg.pcie_ue_hdr[0] = (uint32_t)(rx_h1 >> 32); 2218 adv_reg.pcie_ue_hdr[1] = (uint32_t)rx_h1; 2219 adv_reg.pcie_ue_hdr[2] = (uint32_t)(rx_h2 >> 32); 2220 adv_reg.pcie_ue_hdr[3] = (uint32_t)rx_h2; 2221 2222 /* get completer bdf (fault bdf) from rx logs */ 2223 cpl = (pcie_cpl_t *)&adv_reg.pcie_ue_hdr[1]; 2224 2225 /* Figure out if UR/CA from rx logs */ 2226 if (cpl->status == PCIE_CPL_STS_UR) 2227 s_status = PCI_STAT_R_MAST_AB; 2228 else if (cpl->status == PCIE_CPL_STS_CA) 2229 s_status = PCI_STAT_R_TARG_AB; 2230 2231 adv_reg.pcie_ue_hdr[0] = (uint32_t)(tx_h1 >> 32); 2232 adv_reg.pcie_ue_hdr[1] = (uint32_t)tx_h1; 2233 adv_reg.pcie_ue_hdr[2] = (uint32_t)(tx_h2 >> 32); 2234 adv_reg.pcie_ue_hdr[3] = (uint32_t)tx_h2; 2235 2236 /* get fault addr from tx logs */ 2237 sts = pf_tlp_decode(PCIE_DIP2BUS(rpdip), &adv_reg); 2238 2239 if (sts == DDI_SUCCESS) 2240 (void) px_rp_en_q(px_p, adv_reg.pcie_ue_tgt_bdf, 2241 adv_reg.pcie_ue_tgt_addr, s_status); 2242 } 2243 2244 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2245 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2246 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2247 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2248 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2249 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2250 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2251 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2252 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2253 ss_reg, 2254 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2255 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2256 FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, rx_h1, 2257 FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, rx_h2, 2258 FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, tx_h1, 2259 FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, tx_h2, 2260 NULL); 2261 2262 return (PX_NO_PANIC); 2263 } 2264 2265 /* TLU Other Event - see io erpt doc, section 3.9 */ 2266 PX_ERPT_SEND_DEC(pciex_oe) 2267 { 2268 char buf[FM_MAX_CLASS]; 2269 boolean_t pri = PX_ERR_IS_PRI(bit); 2270 2271 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2272 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2273 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2274 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2275 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2276 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2277 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2278 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2279 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2280 ss_reg, 2281 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2282 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2283 NULL); 2284 2285 return (PX_NO_PANIC); 2286 } 2287