1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * sun4u Fire Error Handling 30 */ 31 32 #include <sys/types.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/fm/protocol.h> 36 #include <sys/fm/util.h> 37 #include <sys/pcie.h> 38 #include <sys/pcie_impl.h> 39 #include "px_obj.h" 40 #include <px_regs.h> 41 #include <px_csr.h> 42 #include <sys/membar.h> 43 #include <sys/machcpuvar.h> 44 #include <sys/platform_module.h> 45 #include "pcie_pwr.h" 46 #include "px_lib4u.h" 47 #include "px_err.h" 48 #include "px_err_impl.h" 49 #include "oberon_regs.h" 50 51 uint64_t px_tlu_ue_intr_mask = PX_ERR_EN_ALL; 52 uint64_t px_tlu_ue_log_mask = PX_ERR_EN_ALL; 53 uint64_t px_tlu_ue_count_mask = PX_ERR_EN_ALL; 54 55 uint64_t px_tlu_ce_intr_mask = PX_ERR_MASK_NONE; 56 uint64_t px_tlu_ce_log_mask = PX_ERR_MASK_NONE; 57 uint64_t px_tlu_ce_count_mask = PX_ERR_MASK_NONE; 58 59 /* 60 * Do not enable Link Interrupts 61 */ 62 uint64_t px_tlu_oe_intr_mask = PX_ERR_EN_ALL & ~0x80000000800; 63 uint64_t px_tlu_oe_log_mask = PX_ERR_EN_ALL & ~0x80000000800; 64 uint64_t px_tlu_oe_count_mask = PX_ERR_EN_ALL; 65 66 uint64_t px_mmu_intr_mask = PX_ERR_EN_ALL; 67 uint64_t px_mmu_log_mask = PX_ERR_EN_ALL; 68 uint64_t px_mmu_count_mask = PX_ERR_EN_ALL; 69 70 uint64_t px_imu_intr_mask = PX_ERR_EN_ALL; 71 uint64_t px_imu_log_mask = PX_ERR_EN_ALL; 72 uint64_t px_imu_count_mask = PX_ERR_EN_ALL; 73 74 /* 75 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_S) | 76 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_P); 77 */ 78 uint64_t px_ilu_intr_mask = (((uint64_t)0x10 << 32) | 0x10); 79 uint64_t px_ilu_log_mask = (((uint64_t)0x10 << 32) | 0x10); 80 uint64_t px_ilu_count_mask = PX_ERR_EN_ALL; 81 82 uint64_t px_ubc_intr_mask = PX_ERR_EN_ALL; 83 uint64_t px_ubc_log_mask = PX_ERR_EN_ALL; 84 uint64_t px_ubc_count_mask = PX_ERR_EN_ALL; 85 86 uint64_t px_jbc_intr_mask = PX_ERR_EN_ALL; 87 uint64_t px_jbc_log_mask = PX_ERR_EN_ALL; 88 uint64_t px_jbc_count_mask = PX_ERR_EN_ALL; 89 90 /* 91 * LPU Intr Registers are reverse encoding from the registers above. 92 * 1 = disable 93 * 0 = enable 94 * 95 * Log and Count are however still the same. 96 */ 97 uint64_t px_lpul_intr_mask = LPU_INTR_DISABLE; 98 uint64_t px_lpul_log_mask = PX_ERR_EN_ALL; 99 uint64_t px_lpul_count_mask = PX_ERR_EN_ALL; 100 101 uint64_t px_lpup_intr_mask = LPU_INTR_DISABLE; 102 uint64_t px_lpup_log_mask = PX_ERR_EN_ALL; 103 uint64_t px_lpup_count_mask = PX_ERR_EN_ALL; 104 105 uint64_t px_lpur_intr_mask = LPU_INTR_DISABLE; 106 uint64_t px_lpur_log_mask = PX_ERR_EN_ALL; 107 uint64_t px_lpur_count_mask = PX_ERR_EN_ALL; 108 109 uint64_t px_lpux_intr_mask = LPU_INTR_DISABLE; 110 uint64_t px_lpux_log_mask = PX_ERR_EN_ALL; 111 uint64_t px_lpux_count_mask = PX_ERR_EN_ALL; 112 113 uint64_t px_lpus_intr_mask = LPU_INTR_DISABLE; 114 uint64_t px_lpus_log_mask = PX_ERR_EN_ALL; 115 uint64_t px_lpus_count_mask = PX_ERR_EN_ALL; 116 117 uint64_t px_lpug_intr_mask = LPU_INTR_DISABLE; 118 uint64_t px_lpug_log_mask = PX_ERR_EN_ALL; 119 uint64_t px_lpug_count_mask = PX_ERR_EN_ALL; 120 121 /* 122 * JBC error bit table 123 */ 124 #define JBC_BIT_DESC(bit, hdl, erpt) \ 125 JBC_INTERRUPT_STATUS_ ## bit ## _P, \ 126 0, \ 127 PX_ERR_BIT_HANDLE(hdl), \ 128 PX_ERPT_SEND(erpt), \ 129 PX_ERR_JBC_CLASS(bit) }, \ 130 { JBC_INTERRUPT_STATUS_ ## bit ## _S, \ 131 0, \ 132 PX_ERR_BIT_HANDLE(hdl), \ 133 PX_ERPT_SEND(erpt), \ 134 PX_ERR_JBC_CLASS(bit) 135 px_err_bit_desc_t px_err_jbc_tbl[] = { 136 /* JBC FATAL */ 137 { JBC_BIT_DESC(MB_PEA, hw_reset, jbc_fatal) }, 138 { JBC_BIT_DESC(CPE, hw_reset, jbc_fatal) }, 139 { JBC_BIT_DESC(APE, hw_reset, jbc_fatal) }, 140 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_fatal) }, 141 { JBC_BIT_DESC(JTCEEW, hw_reset, jbc_fatal) }, 142 { JBC_BIT_DESC(JTCEEI, hw_reset, jbc_fatal) }, 143 { JBC_BIT_DESC(JTCEER, hw_reset, jbc_fatal) }, 144 145 /* JBC MERGE */ 146 { JBC_BIT_DESC(MB_PER, jbc_merge, jbc_merge) }, 147 { JBC_BIT_DESC(MB_PEW, jbc_merge, jbc_merge) }, 148 149 /* JBC Jbusint IN */ 150 { JBC_BIT_DESC(UE_ASYN, panic, jbc_in) }, 151 { JBC_BIT_DESC(CE_ASYN, no_error, jbc_in) }, 152 { JBC_BIT_DESC(JTE, panic, jbc_in) }, 153 { JBC_BIT_DESC(JBE, panic, jbc_in) }, 154 { JBC_BIT_DESC(JUE, panic, jbc_in) }, 155 { JBC_BIT_DESC(ICISE, panic, jbc_in) }, 156 { JBC_BIT_DESC(WR_DPE, jbc_jbusint_in, jbc_in) }, 157 { JBC_BIT_DESC(RD_DPE, jbc_jbusint_in, jbc_in) }, 158 { JBC_BIT_DESC(ILL_BMW, panic, jbc_in) }, 159 { JBC_BIT_DESC(ILL_BMR, panic, jbc_in) }, 160 { JBC_BIT_DESC(BJC, panic, jbc_in) }, 161 162 /* JBC Jbusint Out */ 163 { JBC_BIT_DESC(IJP, panic, jbc_out) }, 164 165 /* 166 * JBC Dmcint ODCD 167 * 168 * Error bits which can be set via a bad PCItool access go through 169 * jbc_safe_acc instead. 170 */ 171 { JBC_BIT_DESC(PIO_UNMAP_RD, jbc_safe_acc, jbc_odcd) }, 172 { JBC_BIT_DESC(ILL_ACC_RD, jbc_safe_acc, jbc_odcd) }, 173 { JBC_BIT_DESC(PIO_UNMAP, jbc_safe_acc, jbc_odcd) }, 174 { JBC_BIT_DESC(PIO_DPE, jbc_dmcint_odcd, jbc_odcd) }, 175 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_odcd) }, 176 { JBC_BIT_DESC(ILL_ACC, jbc_safe_acc, jbc_odcd) }, 177 178 /* JBC Dmcint IDC */ 179 { JBC_BIT_DESC(UNSOL_RD, no_panic, jbc_idc) }, 180 { JBC_BIT_DESC(UNSOL_INTR, no_panic, jbc_idc) }, 181 182 /* JBC CSR */ 183 { JBC_BIT_DESC(EBUS_TO, panic, jbc_csr) } 184 }; 185 186 #define px_err_jbc_keys \ 187 (sizeof (px_err_jbc_tbl)) / (sizeof (px_err_bit_desc_t)) 188 189 /* 190 * UBC error bit table 191 */ 192 #define UBC_BIT_DESC(bit, hdl, erpt) \ 193 UBC_INTERRUPT_STATUS_ ## bit ## _P, \ 194 0, \ 195 PX_ERR_BIT_HANDLE(hdl), \ 196 PX_ERPT_SEND(erpt), \ 197 PX_ERR_UBC_CLASS(bit) }, \ 198 { UBC_INTERRUPT_STATUS_ ## bit ## _S, \ 199 0, \ 200 PX_ERR_BIT_HANDLE(hdl), \ 201 PX_ERPT_SEND(erpt), \ 202 PX_ERR_UBC_CLASS(bit) 203 px_err_bit_desc_t px_err_ubc_tbl[] = { 204 /* UBC FATAL */ 205 { UBC_BIT_DESC(DMARDUEA, no_panic, ubc_fatal) }, 206 { UBC_BIT_DESC(DMAWTUEA, panic, ubc_fatal) }, 207 { UBC_BIT_DESC(MEMRDAXA, panic, ubc_fatal) }, 208 { UBC_BIT_DESC(MEMWTAXA, panic, ubc_fatal) }, 209 { UBC_BIT_DESC(DMARDUEB, no_panic, ubc_fatal) }, 210 { UBC_BIT_DESC(DMAWTUEB, panic, ubc_fatal) }, 211 { UBC_BIT_DESC(MEMRDAXB, panic, ubc_fatal) }, 212 { UBC_BIT_DESC(MEMWTAXB, panic, ubc_fatal) }, 213 { UBC_BIT_DESC(PIOWTUE, panic, ubc_fatal) }, 214 { UBC_BIT_DESC(PIOWBEUE, panic, ubc_fatal) }, 215 { UBC_BIT_DESC(PIORBEUE, panic, ubc_fatal) } 216 }; 217 218 #define px_err_ubc_keys \ 219 (sizeof (px_err_ubc_tbl)) / (sizeof (px_err_bit_desc_t)) 220 221 222 char *ubc_class_eid_qualifier[] = { 223 "-mem", 224 "-channel", 225 "-cpu", 226 "-path" 227 }; 228 229 230 /* 231 * DMC error bit tables 232 */ 233 #define IMU_BIT_DESC(bit, hdl, erpt) \ 234 IMU_INTERRUPT_STATUS_ ## bit ## _P, \ 235 0, \ 236 PX_ERR_BIT_HANDLE(hdl), \ 237 PX_ERPT_SEND(erpt), \ 238 PX_ERR_DMC_CLASS(bit) }, \ 239 { IMU_INTERRUPT_STATUS_ ## bit ## _S, \ 240 0, \ 241 PX_ERR_BIT_HANDLE(hdl), \ 242 PX_ERPT_SEND(erpt), \ 243 PX_ERR_DMC_CLASS(bit) 244 px_err_bit_desc_t px_err_imu_tbl[] = { 245 /* DMC IMU RDS */ 246 { IMU_BIT_DESC(MSI_MAL_ERR, panic, imu_rds) }, 247 { IMU_BIT_DESC(MSI_PAR_ERR, panic, imu_rds) }, 248 { IMU_BIT_DESC(PMEACK_MES_NOT_EN, panic, imu_rds) }, 249 { IMU_BIT_DESC(PMPME_MES_NOT_EN, panic, imu_rds) }, 250 { IMU_BIT_DESC(FATAL_MES_NOT_EN, panic, imu_rds) }, 251 { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, panic, imu_rds) }, 252 { IMU_BIT_DESC(COR_MES_NOT_EN, panic, imu_rds) }, 253 { IMU_BIT_DESC(MSI_NOT_EN, panic, imu_rds) }, 254 255 /* DMC IMU SCS */ 256 { IMU_BIT_DESC(EQ_NOT_EN, panic, imu_scs) }, 257 258 /* DMC IMU */ 259 { IMU_BIT_DESC(EQ_OVER, imu_eq_ovfl, imu) } 260 }; 261 262 #define px_err_imu_keys (sizeof (px_err_imu_tbl)) / (sizeof (px_err_bit_desc_t)) 263 264 /* mmu errors */ 265 #define MMU_BIT_DESC(bit, hdl, erpt) \ 266 MMU_INTERRUPT_STATUS_ ## bit ## _P, \ 267 0, \ 268 PX_ERR_BIT_HANDLE(hdl), \ 269 PX_ERPT_SEND(erpt), \ 270 PX_ERR_DMC_CLASS(bit) }, \ 271 { MMU_INTERRUPT_STATUS_ ## bit ## _S, \ 272 0, \ 273 PX_ERR_BIT_HANDLE(hdl), \ 274 PX_ERPT_SEND(erpt), \ 275 PX_ERR_DMC_CLASS(bit) 276 px_err_bit_desc_t px_err_mmu_tbl[] = { 277 /* DMC MMU TFAR/TFSR */ 278 { MMU_BIT_DESC(BYP_ERR, mmu_rbne, mmu_tfar_tfsr) }, 279 { MMU_BIT_DESC(BYP_OOR, mmu_tfa, mmu_tfar_tfsr) }, 280 { MMU_BIT_DESC(TRN_ERR, panic, mmu_tfar_tfsr) }, 281 { MMU_BIT_DESC(TRN_OOR, mmu_tfa, mmu_tfar_tfsr) }, 282 { MMU_BIT_DESC(TTE_INV, mmu_tfa, mmu_tfar_tfsr) }, 283 { MMU_BIT_DESC(TTE_PRT, mmu_tfa, mmu_tfar_tfsr) }, 284 { MMU_BIT_DESC(TTC_DPE, mmu_parity, mmu_tfar_tfsr) }, 285 { MMU_BIT_DESC(TBW_DME, panic, mmu_tfar_tfsr) }, 286 { MMU_BIT_DESC(TBW_UDE, panic, mmu_tfar_tfsr) }, 287 { MMU_BIT_DESC(TBW_ERR, panic, mmu_tfar_tfsr) }, 288 { MMU_BIT_DESC(TBW_DPE, mmu_parity, mmu_tfar_tfsr) }, 289 290 /* DMC MMU */ 291 { MMU_BIT_DESC(TTC_CAE, panic, mmu) } 292 }; 293 #define px_err_mmu_keys (sizeof (px_err_mmu_tbl)) / (sizeof (px_err_bit_desc_t)) 294 295 296 /* 297 * PEC error bit tables 298 */ 299 #define ILU_BIT_DESC(bit, hdl, erpt) \ 300 ILU_INTERRUPT_STATUS_ ## bit ## _P, \ 301 0, \ 302 PX_ERR_BIT_HANDLE(hdl), \ 303 PX_ERPT_SEND(erpt), \ 304 PX_ERR_PEC_CLASS(bit) }, \ 305 { ILU_INTERRUPT_STATUS_ ## bit ## _S, \ 306 0, \ 307 PX_ERR_BIT_HANDLE(hdl), \ 308 PX_ERPT_SEND(erpt), \ 309 PX_ERR_PEC_CLASS(bit) 310 px_err_bit_desc_t px_err_ilu_tbl[] = { 311 /* PEC ILU none */ 312 { ILU_BIT_DESC(IHB_PE, panic, pec_ilu) } 313 }; 314 #define px_err_ilu_keys \ 315 (sizeof (px_err_ilu_tbl)) / (sizeof (px_err_bit_desc_t)) 316 317 /* 318 * PEC UE errors implementation is incomplete pending PCIE generic 319 * fabric rules. Must handle both PRIMARY and SECONDARY errors. 320 */ 321 /* pec ue errors */ 322 #define TLU_UC_BIT_DESC(bit, hdl, erpt) \ 323 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 324 0, \ 325 PX_ERR_BIT_HANDLE(hdl), \ 326 PX_ERPT_SEND(erpt), \ 327 PX_ERR_PEC_CLASS(bit) }, \ 328 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 329 0, \ 330 PX_ERR_BIT_HANDLE(hdl), \ 331 PX_ERPT_SEND(erpt), \ 332 PX_ERR_PEC_CLASS(bit) 333 #define TLU_UC_OB_BIT_DESC(bit, hdl, erpt) \ 334 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 335 0, \ 336 PX_ERR_BIT_HANDLE(hdl), \ 337 PX_ERPT_SEND(erpt), \ 338 PX_ERR_PEC_OB_CLASS(bit) }, \ 339 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 340 0, \ 341 PX_ERR_BIT_HANDLE(hdl), \ 342 PX_ERPT_SEND(erpt), \ 343 PX_ERR_PEC_CLASS(bit) 344 px_err_bit_desc_t px_err_tlu_ue_tbl[] = { 345 /* PCI-E Receive Uncorrectable Errors */ 346 { TLU_UC_BIT_DESC(UR, pciex_ue, pciex_rx_ue) }, 347 { TLU_UC_BIT_DESC(UC, pciex_ue, pciex_rx_ue) }, 348 349 /* PCI-E Transmit Uncorrectable Errors */ 350 { TLU_UC_OB_BIT_DESC(ECRC, pciex_ue, pciex_rx_ue) }, 351 { TLU_UC_BIT_DESC(CTO, pciex_ue, pciex_tx_ue) }, 352 { TLU_UC_BIT_DESC(ROF, pciex_ue, pciex_tx_ue) }, 353 354 /* PCI-E Rx/Tx Uncorrectable Errors */ 355 { TLU_UC_BIT_DESC(MFP, pciex_ue, pciex_rx_tx_ue) }, 356 { TLU_UC_BIT_DESC(PP, pciex_ue, pciex_rx_tx_ue) }, 357 358 /* Other PCI-E Uncorrectable Errors */ 359 { TLU_UC_BIT_DESC(FCP, pciex_ue, pciex_ue) }, 360 { TLU_UC_BIT_DESC(DLP, pciex_ue, pciex_ue) }, 361 { TLU_UC_BIT_DESC(TE, pciex_ue, pciex_ue) }, 362 363 /* Not used */ 364 { TLU_UC_BIT_DESC(CA, pciex_ue, do_not) } 365 }; 366 #define px_err_tlu_ue_keys \ 367 (sizeof (px_err_tlu_ue_tbl)) / (sizeof (px_err_bit_desc_t)) 368 369 370 /* 371 * PEC CE errors implementation is incomplete pending PCIE generic 372 * fabric rules. 373 */ 374 /* pec ce errors */ 375 #define TLU_CE_BIT_DESC(bit, hdl, erpt) \ 376 TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 377 0, \ 378 PX_ERR_BIT_HANDLE(hdl), \ 379 PX_ERPT_SEND(erpt), \ 380 PX_ERR_PEC_CLASS(bit) }, \ 381 { TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 382 0, \ 383 PX_ERR_BIT_HANDLE(hdl), \ 384 PX_ERPT_SEND(erpt), \ 385 PX_ERR_PEC_CLASS(bit) 386 px_err_bit_desc_t px_err_tlu_ce_tbl[] = { 387 /* PCI-E Correctable Errors */ 388 { TLU_CE_BIT_DESC(RTO, pciex_ce, pciex_ce) }, 389 { TLU_CE_BIT_DESC(RNR, pciex_ce, pciex_ce) }, 390 { TLU_CE_BIT_DESC(BDP, pciex_ce, pciex_ce) }, 391 { TLU_CE_BIT_DESC(BTP, pciex_ce, pciex_ce) }, 392 { TLU_CE_BIT_DESC(RE, pciex_ce, pciex_ce) } 393 }; 394 #define px_err_tlu_ce_keys \ 395 (sizeof (px_err_tlu_ce_tbl)) / (sizeof (px_err_bit_desc_t)) 396 397 398 /* pec oe errors */ 399 #define TLU_OE_BIT_DESC(bit, hdl, erpt) \ 400 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \ 401 0, \ 402 PX_ERR_BIT_HANDLE(hdl), \ 403 PX_ERPT_SEND(erpt), \ 404 PX_ERR_PEC_CLASS(bit) }, \ 405 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \ 406 0, \ 407 PX_ERR_BIT_HANDLE(hdl), \ 408 PX_ERPT_SEND(erpt), \ 409 PX_ERR_PEC_CLASS(bit) 410 #define TLU_OE_OB_BIT_DESC(bit, hdl, erpt) \ 411 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \ 412 0, \ 413 PX_ERR_BIT_HANDLE(hdl), \ 414 PX_ERPT_SEND(erpt), \ 415 PX_ERR_PEC_OB_CLASS(bit) }, \ 416 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \ 417 0, \ 418 PX_ERR_BIT_HANDLE(hdl), \ 419 PX_ERPT_SEND(erpt), \ 420 PX_ERR_PEC_OB_CLASS(bit) 421 px_err_bit_desc_t px_err_tlu_oe_tbl[] = { 422 /* TLU Other Event Status (receive only) */ 423 { TLU_OE_BIT_DESC(MRC, hw_reset, pciex_rx_oe) }, 424 425 /* TLU Other Event Status (rx + tx) */ 426 { TLU_OE_BIT_DESC(WUC, wuc_ruc, pciex_rx_tx_oe) }, 427 { TLU_OE_BIT_DESC(RUC, wuc_ruc, pciex_rx_tx_oe) }, 428 { TLU_OE_BIT_DESC(CRS, no_panic, pciex_rx_tx_oe) }, 429 430 /* TLU Other Event */ 431 { TLU_OE_BIT_DESC(IIP, panic, pciex_oe) }, 432 { TLU_OE_BIT_DESC(EDP, panic, pciex_oe) }, 433 { TLU_OE_BIT_DESC(EHP, panic, pciex_oe) }, 434 { TLU_OE_OB_BIT_DESC(TLUEITMO, panic, pciex_oe) }, 435 { TLU_OE_BIT_DESC(LIN, no_panic, pciex_oe) }, 436 { TLU_OE_BIT_DESC(LRS, no_panic, pciex_oe) }, 437 { TLU_OE_BIT_DESC(LDN, tlu_ldn, pciex_oe) }, 438 { TLU_OE_BIT_DESC(LUP, tlu_lup, pciex_oe) }, 439 { TLU_OE_BIT_DESC(ERU, panic, pciex_oe) }, 440 { TLU_OE_BIT_DESC(ERO, panic, pciex_oe) }, 441 { TLU_OE_BIT_DESC(EMP, panic, pciex_oe) }, 442 { TLU_OE_BIT_DESC(EPE, panic, pciex_oe) }, 443 { TLU_OE_BIT_DESC(ERP, panic, pciex_oe) }, 444 { TLU_OE_BIT_DESC(EIP, panic, pciex_oe) } 445 }; 446 447 #define px_err_tlu_oe_keys \ 448 (sizeof (px_err_tlu_oe_tbl)) / (sizeof (px_err_bit_desc_t)) 449 450 451 /* 452 * All the following tables below are for LPU Interrupts. These interrupts 453 * are *NOT* error interrupts, but event status interrupts. 454 * 455 * These events are probably of most interest to: 456 * o Hotplug 457 * o Power Management 458 * o etc... 459 * 460 * There are also a few events that would be interresting for FMA. 461 * Again none of the regiseters below state that an error has occured 462 * or that data has been lost. If anything, they give status that an 463 * error is *about* to occur. examples 464 * o INT_SKP_ERR - indicates clock between fire and child is too far 465 * off and is most unlikely able to compensate 466 * o INT_TX_PAR_ERR - A parity error occured in ONE lane. This is 467 * HW recoverable, but will like end up as a future 468 * fabric error as well. 469 * 470 * For now, we don't care about any of these errors and should be ignore, 471 * but cleared. 472 */ 473 474 /* LPU Link Interrupt Table */ 475 #define LPUL_BIT_DESC(bit, hdl, erpt) \ 476 LPU_LINK_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 477 0, \ 478 NULL, \ 479 NULL, \ 480 "" 481 px_err_bit_desc_t px_err_lpul_tbl[] = { 482 { LPUL_BIT_DESC(LINK_ERR_ACT, NULL, NULL) } 483 }; 484 #define px_err_lpul_keys \ 485 (sizeof (px_err_lpul_tbl)) / (sizeof (px_err_bit_desc_t)) 486 487 /* LPU Physical Interrupt Table */ 488 #define LPUP_BIT_DESC(bit, hdl, erpt) \ 489 LPU_PHY_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 490 0, \ 491 NULL, \ 492 NULL, \ 493 "" 494 px_err_bit_desc_t px_err_lpup_tbl[] = { 495 { LPUP_BIT_DESC(PHY_LAYER_ERR, NULL, NULL) } 496 }; 497 #define px_err_lpup_keys \ 498 (sizeof (px_err_lpup_tbl)) / (sizeof (px_err_bit_desc_t)) 499 500 /* LPU Receive Interrupt Table */ 501 #define LPUR_BIT_DESC(bit, hdl, erpt) \ 502 LPU_RECEIVE_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 503 0, \ 504 NULL, \ 505 NULL, \ 506 "" 507 px_err_bit_desc_t px_err_lpur_tbl[] = { 508 { LPUR_BIT_DESC(RCV_PHY, NULL, NULL) } 509 }; 510 #define px_err_lpur_keys \ 511 (sizeof (px_err_lpur_tbl)) / (sizeof (px_err_bit_desc_t)) 512 513 /* LPU Transmit Interrupt Table */ 514 #define LPUX_BIT_DESC(bit, hdl, erpt) \ 515 LPU_TRANSMIT_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 516 0, \ 517 NULL, \ 518 NULL, \ 519 "" 520 px_err_bit_desc_t px_err_lpux_tbl[] = { 521 { LPUX_BIT_DESC(UNMSK, NULL, NULL) } 522 }; 523 #define px_err_lpux_keys \ 524 (sizeof (px_err_lpux_tbl)) / (sizeof (px_err_bit_desc_t)) 525 526 /* LPU LTSSM Interrupt Table */ 527 #define LPUS_BIT_DESC(bit, hdl, erpt) \ 528 LPU_LTSSM_INTERRUPT_AND_STATUS_INT_ ## bit, \ 529 0, \ 530 NULL, \ 531 NULL, \ 532 "" 533 px_err_bit_desc_t px_err_lpus_tbl[] = { 534 { LPUS_BIT_DESC(ANY, NULL, NULL) } 535 }; 536 #define px_err_lpus_keys \ 537 (sizeof (px_err_lpus_tbl)) / (sizeof (px_err_bit_desc_t)) 538 539 /* LPU Gigablaze Glue Interrupt Table */ 540 #define LPUG_BIT_DESC(bit, hdl, erpt) \ 541 LPU_GIGABLAZE_GLUE_INTERRUPT_AND_STATUS_INT_ ## bit, \ 542 0, \ 543 NULL, \ 544 NULL, \ 545 "" 546 px_err_bit_desc_t px_err_lpug_tbl[] = { 547 { LPUG_BIT_DESC(GLOBL_UNMSK, NULL, NULL) } 548 }; 549 #define px_err_lpug_keys \ 550 (sizeof (px_err_lpug_tbl)) / (sizeof (px_err_bit_desc_t)) 551 552 553 /* Mask and Tables */ 554 #define MnT6X(pre) \ 555 &px_ ## pre ## _intr_mask, \ 556 &px_ ## pre ## _log_mask, \ 557 &px_ ## pre ## _count_mask, \ 558 px_err_ ## pre ## _tbl, \ 559 px_err_ ## pre ## _keys, \ 560 PX_REG_XBC, \ 561 0 562 563 #define MnT6(pre) \ 564 &px_ ## pre ## _intr_mask, \ 565 &px_ ## pre ## _log_mask, \ 566 &px_ ## pre ## _count_mask, \ 567 px_err_ ## pre ## _tbl, \ 568 px_err_ ## pre ## _keys, \ 569 PX_REG_CSR, \ 570 0 571 572 /* LPU Registers Addresses */ 573 #define LR4(pre) \ 574 NULL, \ 575 LPU_ ## pre ## _INTERRUPT_MASK, \ 576 LPU_ ## pre ## _INTERRUPT_AND_STATUS, \ 577 LPU_ ## pre ## _INTERRUPT_AND_STATUS 578 579 /* LPU Registers Addresses with Irregularities */ 580 #define LR4_FIXME(pre) \ 581 NULL, \ 582 LPU_ ## pre ## _INTERRUPT_MASK, \ 583 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS, \ 584 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS 585 586 /* TLU Registers Addresses */ 587 #define TR4(pre) \ 588 TLU_ ## pre ## _LOG_ENABLE, \ 589 TLU_ ## pre ## _INTERRUPT_ENABLE, \ 590 TLU_ ## pre ## _INTERRUPT_STATUS, \ 591 TLU_ ## pre ## _STATUS_CLEAR 592 593 /* Registers Addresses for JBC, UBC, MMU, IMU and ILU */ 594 #define R4(pre) \ 595 pre ## _ERROR_LOG_ENABLE, \ 596 pre ## _INTERRUPT_ENABLE, \ 597 pre ## _INTERRUPT_STATUS, \ 598 pre ## _ERROR_STATUS_CLEAR 599 600 /* Bits in chip_mask, set according to type. */ 601 #define CHP_O BITMASK(PX_CHIP_OBERON) 602 #define CHP_F BITMASK(PX_CHIP_FIRE) 603 #define CHP_FO (CHP_F | CHP_O) 604 605 /* 606 * Register error handling tables. 607 * The ID Field (first field) is identified by an enum px_err_id_t. 608 * It is located in px_err.h 609 */ 610 static const 611 px_err_reg_desc_t px_err_reg_tbl[] = { 612 { CHP_F, MnT6X(jbc), R4(JBC), "JBC Error"}, 613 { CHP_O, MnT6X(ubc), R4(UBC), "UBC Error"}, 614 { CHP_FO, MnT6(mmu), R4(MMU), "MMU Error"}, 615 { CHP_FO, MnT6(imu), R4(IMU), "IMU Error"}, 616 { CHP_FO, MnT6(tlu_ue), TR4(UNCORRECTABLE_ERROR), "TLU UE"}, 617 { CHP_FO, MnT6(tlu_ce), TR4(CORRECTABLE_ERROR), "TLU CE"}, 618 { CHP_FO, MnT6(tlu_oe), TR4(OTHER_EVENT), "TLU OE"}, 619 { CHP_FO, MnT6(ilu), R4(ILU), "ILU Error"}, 620 { CHP_F, MnT6(lpul), LR4(LINK_LAYER), "LPU Link Layer"}, 621 { CHP_F, MnT6(lpup), LR4_FIXME(PHY), "LPU Phy Layer"}, 622 { CHP_F, MnT6(lpur), LR4(RECEIVE_PHY), "LPU RX Phy Layer"}, 623 { CHP_F, MnT6(lpux), LR4(TRANSMIT_PHY), "LPU TX Phy Layer"}, 624 { CHP_F, MnT6(lpus), LR4(LTSSM), "LPU LTSSM"}, 625 { CHP_F, MnT6(lpug), LR4(GIGABLAZE_GLUE), "LPU GigaBlaze Glue"}, 626 }; 627 628 #define PX_ERR_REG_KEYS (sizeof (px_err_reg_tbl)) / (sizeof (px_err_reg_tbl[0])) 629 630 typedef struct px_err_ss { 631 uint64_t err_status[PX_ERR_REG_KEYS]; 632 } px_err_ss_t; 633 634 static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, int block); 635 static int px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, 636 px_err_ss_t *ss); 637 static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, 638 int err, int caller); 639 640 /* 641 * px_err_cb_intr: 642 * Interrupt handler for the JBC/UBC block. 643 * o lock 644 * o create derr 645 * o px_err_cmn_intr 646 * o unlock 647 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 648 */ 649 uint_t 650 px_err_cb_intr(caddr_t arg) 651 { 652 px_fault_t *px_fault_p = (px_fault_t *)arg; 653 dev_info_t *rpdip = px_fault_p->px_fh_dip; 654 px_t *px_p = DIP_TO_STATE(rpdip); 655 int err; 656 ddi_fm_error_t derr; 657 658 /* Create the derr */ 659 bzero(&derr, sizeof (ddi_fm_error_t)); 660 derr.fme_version = DDI_FME_VERSION; 661 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 662 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 663 664 mutex_enter(&px_p->px_fm_mutex); 665 px_p->px_fm_mutex_owner = curthread; 666 667 err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_HOST); 668 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 669 INTR_IDLE_STATE); 670 671 px_p->px_fm_mutex_owner = NULL; 672 mutex_exit(&px_p->px_fm_mutex); 673 674 px_err_panic(err, PX_HB, PX_NO_ERROR); 675 676 return (DDI_INTR_CLAIMED); 677 } 678 679 /* 680 * px_err_dmc_pec_intr: 681 * Interrupt handler for the DMC/PEC block. 682 * o lock 683 * o create derr 684 * o px_err_cmn_intr(leaf, with out cb) 685 * o pcie_scan_fabric (leaf) 686 * o unlock 687 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 688 */ 689 uint_t 690 px_err_dmc_pec_intr(caddr_t arg) 691 { 692 px_fault_t *px_fault_p = (px_fault_t *)arg; 693 dev_info_t *rpdip = px_fault_p->px_fh_dip; 694 px_t *px_p = DIP_TO_STATE(rpdip); 695 int rc_err, fab_err = PF_NO_PANIC; 696 ddi_fm_error_t derr; 697 698 /* Create the derr */ 699 bzero(&derr, sizeof (ddi_fm_error_t)); 700 derr.fme_version = DDI_FME_VERSION; 701 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 702 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 703 704 mutex_enter(&px_p->px_fm_mutex); 705 px_p->px_fm_mutex_owner = curthread; 706 707 /* send ereport/handle/clear fire registers */ 708 rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); 709 710 /* Check all child devices for errors */ 711 if (!px_lib_is_in_drain_state(px_p)) { 712 fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p, 713 &px_p->px_dq_tail); 714 } 715 716 /* Set the interrupt state to idle */ 717 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 718 INTR_IDLE_STATE); 719 720 px_p->px_fm_mutex_owner = NULL; 721 mutex_exit(&px_p->px_fm_mutex); 722 723 px_err_panic(rc_err, PX_RC, fab_err); 724 725 return (DDI_INTR_CLAIMED); 726 } 727 728 /* 729 * Proper csr_base is responsibility of the caller. (Called from px_lib_dev_init 730 * via px_err_reg_setup_all for pcie error registers; called from 731 * px_cb_add_intr for jbc/ubc from px_cb_attach.) 732 * 733 * Note: reg_id is passed in instead of reg_desc since this function is called 734 * from px_lib4u.c, which doesn't know about the structure of the table. 735 */ 736 void 737 px_err_reg_enable(px_err_id_t reg_id, caddr_t csr_base) 738 { 739 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id]; 740 uint64_t intr_mask = *reg_desc_p->intr_mask_p; 741 uint64_t log_mask = *reg_desc_p->log_mask_p; 742 743 /* Enable logs if it exists */ 744 if (reg_desc_p->log_addr != NULL) 745 CSR_XS(csr_base, reg_desc_p->log_addr, log_mask); 746 747 /* 748 * For readability you in code you set 1 to enable an interrupt. 749 * But in Fire it's backwards. You set 1 to *disable* an intr. 750 * Reverse the user tunable intr mask field. 751 * 752 * Disable All Errors 753 * Clear All Errors 754 * Enable Errors 755 */ 756 CSR_XS(csr_base, reg_desc_p->enable_addr, 0); 757 CSR_XS(csr_base, reg_desc_p->clear_addr, -1); 758 CSR_XS(csr_base, reg_desc_p->enable_addr, intr_mask); 759 DBG(DBG_ATTACH, NULL, "%s Mask: 0x%llx\n", reg_desc_p->msg, 760 CSR_XR(csr_base, reg_desc_p->enable_addr)); 761 DBG(DBG_ATTACH, NULL, "%s Status: 0x%llx\n", reg_desc_p->msg, 762 CSR_XR(csr_base, reg_desc_p->status_addr)); 763 DBG(DBG_ATTACH, NULL, "%s Clear: 0x%llx\n", reg_desc_p->msg, 764 CSR_XR(csr_base, reg_desc_p->clear_addr)); 765 if (reg_desc_p->log_addr != NULL) { 766 DBG(DBG_ATTACH, NULL, "%s Log: 0x%llx\n", reg_desc_p->msg, 767 CSR_XR(csr_base, reg_desc_p->log_addr)); 768 } 769 } 770 771 void 772 px_err_reg_disable(px_err_id_t reg_id, caddr_t csr_base) 773 { 774 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id]; 775 uint64_t val = (reg_id >= PX_ERR_LPU_LINK) ? -1 : 0; 776 777 if (reg_desc_p->log_addr != NULL) 778 CSR_XS(csr_base, reg_desc_p->log_addr, val); 779 CSR_XS(csr_base, reg_desc_p->enable_addr, val); 780 } 781 782 /* 783 * Set up pcie error registers. 784 */ 785 void 786 px_err_reg_setup_pcie(uint8_t chip_mask, caddr_t csr_base, boolean_t enable) 787 { 788 px_err_id_t reg_id; 789 const px_err_reg_desc_t *reg_desc_p; 790 void (*px_err_reg_func)(px_err_id_t, caddr_t); 791 792 /* 793 * JBC or XBC are enabled during adding of common block interrupts, 794 * not done here. 795 */ 796 px_err_reg_func = (enable ? px_err_reg_enable : px_err_reg_disable); 797 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) { 798 reg_desc_p = &px_err_reg_tbl[reg_id]; 799 if ((reg_desc_p->chip_mask & chip_mask) && 800 (reg_desc_p->reg_bank == PX_REG_CSR)) 801 px_err_reg_func(reg_id, csr_base); 802 } 803 } 804 805 /* 806 * px_err_cmn_intr: 807 * Common function called by trap, mondo and fabric intr. 808 * o Snap shot current fire registers 809 * o check for safe access 810 * o send ereport and clear snap shot registers 811 * o create and queue RC info for later use in fabric scan. 812 * o RUC/WUC, PTLP, MMU Errors(CA), UR 813 * o check severity of snap shot registers 814 * 815 * @param px_p leaf in which to check access 816 * @param derr fm err data structure to be updated 817 * @param caller PX_TRAP_CALL | PX_INTR_CALL 818 * @param block PX_FM_BLOCK_HOST | PX_FM_BLOCK_PCIE | PX_FM_BLOCK_ALL 819 * @return err PX_NO_PANIC | PX_PANIC | PX_HW_RESET | PX_PROTECTED 820 */ 821 int 822 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) 823 { 824 px_err_ss_t ss = {0}; 825 int err; 826 827 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 828 829 /* check for safe access */ 830 px_err_safeacc_check(px_p, derr); 831 832 /* snap shot the current fire registers */ 833 px_err_snapshot(px_p, &ss, block); 834 835 /* send ereports/handle/clear registers */ 836 err = px_err_erpt_and_clr(px_p, derr, &ss); 837 838 /* check for error severity */ 839 err = px_err_check_severity(px_p, derr, err, caller); 840 841 /* Mark the On Trap Handle if an error occured */ 842 if (err != PX_NO_ERROR) { 843 px_pec_t *pec_p = px_p->px_pec_p; 844 on_trap_data_t *otd = pec_p->pec_ontrap_data; 845 846 if ((otd != NULL) && (otd->ot_prot & OT_DATA_ACCESS)) 847 otd->ot_trap |= OT_DATA_ACCESS; 848 } 849 850 return (err); 851 } 852 853 /* 854 * Static function 855 */ 856 857 /* 858 * px_err_snapshot: 859 * Take a current snap shot of all the fire error registers. This includes 860 * JBC/UBC, DMC, and PEC depending on the block flag 861 * 862 * @param px_p leaf in which to take the snap shot. 863 * @param ss pre-allocated memory to store the snap shot. 864 * @param chk_cb boolean on whether to store jbc/ubc register. 865 */ 866 static void 867 px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, int block) 868 { 869 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 870 caddr_t xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC]; 871 caddr_t pec_csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR]; 872 caddr_t csr_base; 873 uint8_t chip_mask = 1 << PX_CHIP_TYPE(pxu_p); 874 const px_err_reg_desc_t *reg_desc_p = px_err_reg_tbl; 875 px_err_id_t reg_id; 876 877 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++, reg_desc_p++) { 878 if (!(reg_desc_p->chip_mask & chip_mask)) 879 continue; 880 881 if ((block & PX_FM_BLOCK_HOST) && 882 (reg_desc_p->reg_bank == PX_REG_XBC)) 883 csr_base = xbc_csr_base; 884 else if ((block & PX_FM_BLOCK_PCIE) && 885 (reg_desc_p->reg_bank == PX_REG_CSR)) 886 csr_base = pec_csr_base; 887 else { 888 ss_p->err_status[reg_id] = 0; 889 continue; 890 } 891 892 ss_p->err_status[reg_id] = CSR_XR(csr_base, 893 reg_desc_p->status_addr); 894 } 895 } 896 897 /* 898 * px_err_erpt_and_clr: 899 * This function does the following thing to all the fire registers based 900 * on an earlier snap shot. 901 * o Send ereport 902 * o Handle the error 903 * o Clear the error 904 * 905 * @param px_p leaf in which to take the snap shot. 906 * @param derr fm err in which the ereport is to be based on 907 * @param ss_p pre-allocated memory to store the snap shot. 908 */ 909 static int 910 px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss_p) 911 { 912 dev_info_t *rpdip = px_p->px_dip; 913 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 914 caddr_t csr_base; 915 const px_err_reg_desc_t *err_reg_tbl; 916 px_err_bit_desc_t *err_bit_tbl; 917 px_err_bit_desc_t *err_bit_desc; 918 919 uint64_t *count_mask; 920 uint64_t clear_addr; 921 uint64_t ss_reg; 922 923 int (*err_handler)(); 924 int (*erpt_handler)(); 925 int reg_id, key; 926 int err = PX_NO_ERROR; 927 int biterr = 0; 928 929 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 930 931 /* send erport/handle/clear JBC errors */ 932 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) { 933 /* Get the correct register description table */ 934 err_reg_tbl = &px_err_reg_tbl[reg_id]; 935 936 /* Only look at enabled groups. */ 937 if (!(BIT_TST(err_reg_tbl->chip_mask, PX_CHIP_TYPE(pxu_p)))) 938 continue; 939 940 /* Get the correct CSR BASE */ 941 csr_base = (caddr_t)pxu_p->px_address[err_reg_tbl->reg_bank]; 942 943 /* If there are no errors in this register, continue */ 944 ss_reg = ss_p->err_status[reg_id]; 945 if (!ss_reg) 946 continue; 947 948 /* Get pointers to masks and register addresses */ 949 count_mask = err_reg_tbl->count_mask_p; 950 clear_addr = err_reg_tbl->clear_addr; 951 952 /* Get the register BIT description table */ 953 err_bit_tbl = err_reg_tbl->err_bit_tbl; 954 955 /* For each known bit in the register send erpt and handle */ 956 for (key = 0; key < err_reg_tbl->err_bit_keys; key++) { 957 /* 958 * If the ss_reg is set for this bit, 959 * send ereport and handle 960 */ 961 err_bit_desc = &err_bit_tbl[key]; 962 if (!BIT_TST(ss_reg, err_bit_desc->bit)) 963 continue; 964 965 /* Increment the counter if necessary */ 966 if (BIT_TST(*count_mask, err_bit_desc->bit)) { 967 err_bit_desc->counter++; 968 } 969 970 /* Error Handle for this bit */ 971 err_handler = err_bit_desc->err_handler; 972 if (err_handler) { 973 biterr = err_handler(rpdip, csr_base, derr, 974 err_reg_tbl, err_bit_desc); 975 err |= biterr; 976 } 977 978 /* 979 * Send the ereport if it's an UNEXPECTED err. 980 * This is the only place where PX_EXPECTED is utilized. 981 */ 982 erpt_handler = err_bit_desc->erpt_handler; 983 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) || 984 (biterr == PX_EXPECTED)) 985 continue; 986 987 if (erpt_handler) 988 (void) erpt_handler(rpdip, csr_base, ss_reg, 989 derr, err_bit_desc->bit, 990 err_bit_desc->class_name); 991 } 992 993 /* Clear the register and error */ 994 CSR_XS(csr_base, clear_addr, ss_reg); 995 } 996 997 return (err); 998 } 999 1000 /* 1001 * px_err_check_severity: 1002 * Check the severity of the fire error based on an earlier snapshot 1003 * 1004 * @param px_p leaf in which to take the snap shot. 1005 * @param derr fm err in which the ereport is to be based on 1006 * @param err fire register error status 1007 * @param caller PX_TRAP_CALL | PX_INTR_CALL | PX_LIB_CALL 1008 */ 1009 static int 1010 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, int err, int caller) 1011 { 1012 px_pec_t *pec_p = px_p->px_pec_p; 1013 boolean_t is_safeacc = B_FALSE; 1014 1015 /* 1016 * Nothing to do if called with no error. 1017 * The err could have already been set to PX_NO_PANIC, which means the 1018 * system doesn't need to panic, but PEEK/POKE still failed. 1019 */ 1020 if (err == PX_NO_ERROR) 1021 return (err); 1022 1023 /* Cautious access error handling */ 1024 switch (derr->fme_flag) { 1025 case DDI_FM_ERR_EXPECTED: 1026 if (caller == PX_TRAP_CALL) { 1027 /* 1028 * for ddi_caut_get treat all events as nonfatal 1029 * The trampoline will set err_ena = 0, 1030 * err_status = NONFATAL. 1031 */ 1032 derr->fme_status = DDI_FM_NONFATAL; 1033 is_safeacc = B_TRUE; 1034 } else { 1035 /* 1036 * For ddi_caut_put treat all events as nonfatal. Here 1037 * we have the handle and can call ndi_fm_acc_err_set(). 1038 */ 1039 derr->fme_status = DDI_FM_NONFATAL; 1040 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 1041 is_safeacc = B_TRUE; 1042 } 1043 break; 1044 case DDI_FM_ERR_PEEK: 1045 case DDI_FM_ERR_POKE: 1046 /* 1047 * For ddi_peek/poke treat all events as nonfatal. 1048 */ 1049 is_safeacc = B_TRUE; 1050 break; 1051 default: 1052 is_safeacc = B_FALSE; 1053 } 1054 1055 /* re-adjust error status from safe access, forgive all errors */ 1056 if (is_safeacc) 1057 return (PX_NO_PANIC); 1058 1059 return (err); 1060 } 1061 1062 /* predefined convenience functions */ 1063 /* ARGSUSED */ 1064 void 1065 px_err_log_handle(dev_info_t *rpdip, px_err_reg_desc_t *err_reg_descr, 1066 px_err_bit_desc_t *err_bit_descr, char *msg) 1067 { 1068 DBG(DBG_ERR_INTR, rpdip, 1069 "Bit %d, %s, at %s(0x%x) has occured %d times with a severity " 1070 "of \"%s\"\n", 1071 err_bit_descr->bit, err_bit_descr->class_name, 1072 err_reg_descr->msg, err_reg_descr->status_addr, 1073 err_bit_descr->counter, msg); 1074 } 1075 1076 /* ARGSUSED */ 1077 int 1078 px_err_hw_reset_handle(dev_info_t *rpdip, caddr_t csr_base, 1079 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1080 px_err_bit_desc_t *err_bit_descr) 1081 { 1082 if (px_log & PX_HW_RESET) { 1083 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1084 "HW RESET"); 1085 } 1086 1087 return (PX_HW_RESET); 1088 } 1089 1090 /* ARGSUSED */ 1091 int 1092 px_err_panic_handle(dev_info_t *rpdip, caddr_t csr_base, 1093 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1094 px_err_bit_desc_t *err_bit_descr) 1095 { 1096 if (px_log & PX_PANIC) { 1097 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, "PANIC"); 1098 } 1099 1100 return (PX_PANIC); 1101 } 1102 1103 /* ARGSUSED */ 1104 int 1105 px_err_protected_handle(dev_info_t *rpdip, caddr_t csr_base, 1106 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1107 px_err_bit_desc_t *err_bit_descr) 1108 { 1109 if (px_log & PX_PROTECTED) { 1110 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1111 "PROTECTED"); 1112 } 1113 1114 return (PX_PROTECTED); 1115 } 1116 1117 /* ARGSUSED */ 1118 int 1119 px_err_no_panic_handle(dev_info_t *rpdip, caddr_t csr_base, 1120 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1121 px_err_bit_desc_t *err_bit_descr) 1122 { 1123 if (px_log & PX_NO_PANIC) { 1124 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1125 "NO PANIC"); 1126 } 1127 1128 return (PX_NO_PANIC); 1129 } 1130 1131 /* ARGSUSED */ 1132 int 1133 px_err_no_error_handle(dev_info_t *rpdip, caddr_t csr_base, 1134 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1135 px_err_bit_desc_t *err_bit_descr) 1136 { 1137 if (px_log & PX_NO_ERROR) { 1138 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1139 "NO ERROR"); 1140 } 1141 1142 return (PX_NO_ERROR); 1143 } 1144 1145 /* ARGSUSED */ 1146 PX_ERPT_SEND_DEC(do_not) 1147 { 1148 return (PX_NO_ERROR); 1149 } 1150 1151 /* 1152 * Search the px_cb_list_t embedded in the px_cb_t for the 1153 * px_t of the specified Leaf (leaf_id). Return its associated dip. 1154 */ 1155 static dev_info_t * 1156 px_err_search_cb(px_cb_t *px_cb_p, uint_t leaf_id) 1157 { 1158 int i; 1159 px_cb_list_t *pxl_elemp; 1160 1161 for (i = px_cb_p->attachcnt, pxl_elemp = px_cb_p->pxl; i > 0; 1162 i--, pxl_elemp = pxl_elemp->next) { 1163 if ((((pxu_t *)pxl_elemp->pxp->px_plat_p)->portid & 1164 OBERON_PORT_ID_LEAF_MASK) == leaf_id) { 1165 return (pxl_elemp->pxp->px_dip); 1166 } 1167 } 1168 return (NULL); 1169 } 1170 1171 /* UBC FATAL - see io erpt doc, section 1.1 */ 1172 /* ARGSUSED */ 1173 PX_ERPT_SEND_DEC(ubc_fatal) 1174 { 1175 char buf[FM_MAX_CLASS]; 1176 uint64_t memory_ue_log, marked; 1177 char unum[FM_MAX_CLASS]; 1178 int unum_length; 1179 uint64_t device_id = 0; 1180 uint8_t cpu_version = 0; 1181 nvlist_t *resource = NULL; 1182 uint64_t ubc_intr_status; 1183 px_t *px_p; 1184 px_cb_t *px_cb_p; 1185 dev_info_t *actual_dip; 1186 1187 unum[0] = '\0'; 1188 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1189 1190 memory_ue_log = CSR_XR(csr_base, UBC_MEMORY_UE_LOG); 1191 marked = (memory_ue_log >> UBC_MEMORY_UE_LOG_MARKED) & 1192 UBC_MEMORY_UE_LOG_MARKED_MASK; 1193 1194 if ((strstr(class_name, "ubc.piowtue") != NULL) || 1195 (strstr(class_name, "ubc.piowbeue") != NULL) || 1196 (strstr(class_name, "ubc.piorbeue") != NULL) || 1197 (strstr(class_name, "ubc.dmarduea") != NULL) || 1198 (strstr(class_name, "ubc.dmardueb") != NULL)) { 1199 int eid = (memory_ue_log >> UBC_MEMORY_UE_LOG_EID) & 1200 UBC_MEMORY_UE_LOG_EID_MASK; 1201 (void) strncat(buf, ubc_class_eid_qualifier[eid], 1202 FM_MAX_CLASS); 1203 1204 if (eid == UBC_EID_MEM) { 1205 uint64_t phys_addr = memory_ue_log & 1206 MMU_OBERON_PADDR_MASK; 1207 uint64_t offset = (uint64_t)-1; 1208 1209 resource = fm_nvlist_create(NULL); 1210 if (&plat_get_mem_unum) { 1211 if ((plat_get_mem_unum(0, 1212 phys_addr, 0, B_TRUE, 0, unum, 1213 FM_MAX_CLASS, &unum_length)) != 0) 1214 unum[0] = '\0'; 1215 } 1216 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 1217 NULL, unum, NULL, offset); 1218 1219 } else if (eid == UBC_EID_CPU) { 1220 int cpuid = (marked & UBC_MARKED_MAX_CPUID_MASK); 1221 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1222 1223 resource = fm_nvlist_create(NULL); 1224 cpu_version = cpunodes[cpuid].version; 1225 device_id = cpunodes[cpuid].device_id; 1226 (void) snprintf(sbuf, sizeof (sbuf), "%lX", 1227 device_id); 1228 (void) fm_fmri_cpu_set(resource, 1229 FM_CPU_SCHEME_VERSION, NULL, cpuid, 1230 &cpu_version, sbuf); 1231 } 1232 } 1233 1234 /* 1235 * For most of the errors represented in the UBC Interrupt Status 1236 * register, one can compute the dip of the actual Leaf that was 1237 * involved in the error. To do this, find the px_cb_t structure 1238 * that is shared between a pair of Leaves (eg, LeafA and LeafB). 1239 * 1240 * If any of the error bits for LeafA are set in the hardware 1241 * register, search the list of px_t's rooted in the px_cb_t for 1242 * the one corresponding to LeafA. If error bits for LeafB are set, 1243 * search the list for LeafB's px_t. The px_t references its 1244 * associated dip. 1245 */ 1246 px_p = DIP_TO_STATE(rpdip); 1247 px_cb_p = ((pxu_t *)px_p->px_plat_p)->px_cb_p; 1248 1249 /* read hardware register */ 1250 ubc_intr_status = CSR_XR(csr_base, UBC_INTERRUPT_STATUS); 1251 1252 if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFA) != 0) { 1253 /* then Leaf A is involved in the error */ 1254 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_A); 1255 ASSERT(actual_dip != NULL); 1256 rpdip = actual_dip; 1257 } else if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFB) != 0) { 1258 /* then Leaf B is involved in the error */ 1259 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_B); 1260 ASSERT(actual_dip != NULL); 1261 rpdip = actual_dip; 1262 } /* else error cannot be associated with a Leaf */ 1263 1264 if (resource) { 1265 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1266 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1267 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE, 1268 OBERON_UBC_ELE, DATA_TYPE_UINT64, 1269 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE), 1270 OBERON_UBC_IE, DATA_TYPE_UINT64, 1271 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE), 1272 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status, 1273 OBERON_UBC_ESS, DATA_TYPE_UINT64, 1274 CSR_XR(csr_base, UBC_ERROR_STATUS_SET), 1275 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log, 1276 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum, 1277 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id, 1278 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version, 1279 OBERON_UBC_RESOURCE, DATA_TYPE_NVLIST, resource, 1280 NULL); 1281 fm_nvlist_destroy(resource, FM_NVA_FREE); 1282 } else { 1283 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1284 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1285 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE, 1286 OBERON_UBC_ELE, DATA_TYPE_UINT64, 1287 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE), 1288 OBERON_UBC_IE, DATA_TYPE_UINT64, 1289 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE), 1290 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status, 1291 OBERON_UBC_ESS, DATA_TYPE_UINT64, 1292 CSR_XR(csr_base, UBC_ERROR_STATUS_SET), 1293 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log, 1294 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum, 1295 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id, 1296 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version, 1297 NULL); 1298 } 1299 1300 return (PX_NO_PANIC); 1301 } 1302 1303 /* JBC FATAL */ 1304 PX_ERPT_SEND_DEC(jbc_fatal) 1305 { 1306 char buf[FM_MAX_CLASS]; 1307 boolean_t pri = PX_ERR_IS_PRI(bit); 1308 1309 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1310 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1311 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1312 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1313 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1314 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1315 FIRE_JBC_IE, DATA_TYPE_UINT64, 1316 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1317 FIRE_JBC_IS, DATA_TYPE_UINT64, 1318 ss_reg, 1319 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1320 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1321 FIRE_JBC_FEL1, DATA_TYPE_UINT64, 1322 CSR_XR(csr_base, FATAL_ERROR_LOG_1), 1323 FIRE_JBC_FEL2, DATA_TYPE_UINT64, 1324 CSR_XR(csr_base, FATAL_ERROR_LOG_2), 1325 NULL); 1326 1327 return (PX_NO_PANIC); 1328 } 1329 1330 /* JBC MERGE */ 1331 PX_ERPT_SEND_DEC(jbc_merge) 1332 { 1333 char buf[FM_MAX_CLASS]; 1334 boolean_t pri = PX_ERR_IS_PRI(bit); 1335 1336 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1337 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1338 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1339 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1340 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1341 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1342 FIRE_JBC_IE, DATA_TYPE_UINT64, 1343 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1344 FIRE_JBC_IS, DATA_TYPE_UINT64, 1345 ss_reg, 1346 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1347 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1348 FIRE_JBC_MTEL, DATA_TYPE_UINT64, 1349 CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG), 1350 NULL); 1351 1352 return (PX_NO_PANIC); 1353 } 1354 1355 /* 1356 * JBC Merge buffer retryable errors: 1357 * Merge buffer parity error (rd_buf): PIO or DMA 1358 * Merge buffer parity error (wr_buf): PIO or DMA 1359 */ 1360 /* ARGSUSED */ 1361 int 1362 px_err_jbc_merge_handle(dev_info_t *rpdip, caddr_t csr_base, 1363 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1364 px_err_bit_desc_t *err_bit_descr) 1365 { 1366 /* 1367 * Holder function to attempt error recovery. When the features 1368 * are in place, look up the address of the transaction in: 1369 * 1370 * paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG); 1371 * paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1372 * 1373 * If the error is a secondary error, there is no log information 1374 * just panic as it is unknown which address has been affected. 1375 * 1376 * Remember the address is pretranslation and might be hard to look 1377 * up the appropriate driver based on the PA. 1378 */ 1379 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1380 err_bit_descr)); 1381 } 1382 1383 /* JBC Jbusint IN */ 1384 PX_ERPT_SEND_DEC(jbc_in) 1385 { 1386 char buf[FM_MAX_CLASS]; 1387 boolean_t pri = PX_ERR_IS_PRI(bit); 1388 1389 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1390 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1391 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1392 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1393 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1394 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1395 FIRE_JBC_IE, DATA_TYPE_UINT64, 1396 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1397 FIRE_JBC_IS, DATA_TYPE_UINT64, 1398 ss_reg, 1399 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1400 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1401 FIRE_JBC_JITEL1, DATA_TYPE_UINT64, 1402 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG), 1403 FIRE_JBC_JITEL2, DATA_TYPE_UINT64, 1404 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG_2), 1405 NULL); 1406 1407 return (PX_NO_PANIC); 1408 } 1409 1410 /* 1411 * JBC Jbusint IN retryable errors 1412 * Log Reg[42:0]. 1413 * Write Data Parity Error: PIO Writes 1414 * Read Data Parity Error: DMA Reads 1415 */ 1416 int 1417 px_err_jbc_jbusint_in_handle(dev_info_t *rpdip, caddr_t csr_base, 1418 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1419 px_err_bit_desc_t *err_bit_descr) 1420 { 1421 /* 1422 * Holder function to attempt error recovery. When the features 1423 * are in place, look up the address of the transaction in: 1424 * 1425 * paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG); 1426 * paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1427 * 1428 * If the error is a secondary error, there is no log information 1429 * just panic as it is unknown which address has been affected. 1430 * 1431 * Remember the address is pretranslation and might be hard to look 1432 * up the appropriate driver based on the PA. 1433 */ 1434 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1435 err_bit_descr)); 1436 } 1437 1438 1439 /* JBC Jbusint Out */ 1440 PX_ERPT_SEND_DEC(jbc_out) 1441 { 1442 char buf[FM_MAX_CLASS]; 1443 boolean_t pri = PX_ERR_IS_PRI(bit); 1444 1445 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1446 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1447 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1448 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1449 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1450 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1451 FIRE_JBC_IE, DATA_TYPE_UINT64, 1452 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1453 FIRE_JBC_IS, DATA_TYPE_UINT64, 1454 ss_reg, 1455 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1456 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1457 FIRE_JBC_JOTEL1, DATA_TYPE_UINT64, 1458 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG), 1459 FIRE_JBC_JOTEL2, DATA_TYPE_UINT64, 1460 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG_2), 1461 NULL); 1462 1463 return (PX_NO_PANIC); 1464 } 1465 1466 /* JBC Dmcint ODCD */ 1467 PX_ERPT_SEND_DEC(jbc_odcd) 1468 { 1469 char buf[FM_MAX_CLASS]; 1470 boolean_t pri = PX_ERR_IS_PRI(bit); 1471 1472 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1473 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1474 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1475 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1476 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1477 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1478 FIRE_JBC_IE, DATA_TYPE_UINT64, 1479 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1480 FIRE_JBC_IS, DATA_TYPE_UINT64, 1481 ss_reg, 1482 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1483 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1484 FIRE_JBC_DMC_ODCD, DATA_TYPE_UINT64, 1485 CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG), 1486 NULL); 1487 1488 return (PX_NO_PANIC); 1489 } 1490 1491 /* 1492 * JBC Dmcint ODCO nonfatal errer handling - 1493 * PIO data parity error: PIO 1494 */ 1495 /* ARGSUSED */ 1496 int 1497 px_err_jbc_dmcint_odcd_handle(dev_info_t *rpdip, caddr_t csr_base, 1498 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1499 px_err_bit_desc_t *err_bit_descr) 1500 { 1501 /* 1502 * Holder function to attempt error recovery. When the features 1503 * are in place, look up the address of the transaction in: 1504 * 1505 * paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG); 1506 * paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK; 1507 * 1508 * If the error is a secondary error, there is no log information 1509 * just panic as it is unknown which address has been affected. 1510 * 1511 * Remember the address is pretranslation and might be hard to look 1512 * up the appropriate driver based on the PA. 1513 */ 1514 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1515 err_bit_descr)); 1516 } 1517 1518 /* Does address in DMCINT error log register match address of pcitool access? */ 1519 static boolean_t 1520 px_jbc_pcitool_addr_match(dev_info_t *rpdip, caddr_t csr_base) 1521 { 1522 px_t *px_p = DIP_TO_STATE(rpdip); 1523 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1524 caddr_t pcitool_addr = pxu_p->pcitool_addr; 1525 caddr_t errlog_addr = 1526 (caddr_t)CSR_FR(csr_base, DMCINT_ODCD_ERROR_LOG, ADDRESS); 1527 1528 return (pcitool_addr == errlog_addr); 1529 } 1530 1531 /* 1532 * JBC Dmcint ODCD errer handling for errors which are forgivable during a safe 1533 * access. (This will be most likely be a PCItool access.) If not a safe 1534 * access context, treat like jbc_dmcint_odcd. 1535 * Unmapped PIO read error: pio:read:M:nonfatal 1536 * Unmapped PIO write error: pio:write:M:nonfatal 1537 * Invalid PIO write to PCIe cfg/io, csr, ebus or i2c bus: pio:write:nonfatal 1538 * Invalid PIO read to PCIe cfg/io, csr, ebus or i2c bus: pio:read:nonfatal 1539 */ 1540 /* ARGSUSED */ 1541 int 1542 px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base, 1543 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1544 px_err_bit_desc_t *err_bit_descr) 1545 { 1546 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1547 1548 if (!pri) 1549 return (px_err_panic_handle(rpdip, csr_base, derr, 1550 err_reg_descr, err_bit_descr)); 1551 /* 1552 * Got an error which is forgivable during a PCItool access. 1553 * 1554 * Don't do handler check since the error may otherwise be unfairly 1555 * attributed to a device. Just return. 1556 * 1557 * Note: There is a hole here in that a legitimate error can come in 1558 * while a PCItool access is in play and be forgiven. This is possible 1559 * though not likely. 1560 */ 1561 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) && 1562 (px_jbc_pcitool_addr_match(rpdip, csr_base))) 1563 return (px_err_protected_handle(rpdip, csr_base, derr, 1564 err_reg_descr, err_bit_descr)); 1565 1566 return (px_err_jbc_dmcint_odcd_handle(rpdip, csr_base, derr, 1567 err_reg_descr, err_bit_descr)); 1568 } 1569 1570 /* JBC Dmcint IDC */ 1571 PX_ERPT_SEND_DEC(jbc_idc) 1572 { 1573 char buf[FM_MAX_CLASS]; 1574 boolean_t pri = PX_ERR_IS_PRI(bit); 1575 1576 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1577 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1578 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1579 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1580 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1581 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1582 FIRE_JBC_IE, DATA_TYPE_UINT64, 1583 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1584 FIRE_JBC_IS, DATA_TYPE_UINT64, 1585 ss_reg, 1586 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1587 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1588 FIRE_JBC_DMC_IDC, DATA_TYPE_UINT64, 1589 CSR_XR(csr_base, DMCINT_IDC_ERROR_LOG), 1590 NULL); 1591 1592 return (PX_NO_PANIC); 1593 } 1594 1595 /* JBC CSR */ 1596 PX_ERPT_SEND_DEC(jbc_csr) 1597 { 1598 char buf[FM_MAX_CLASS]; 1599 boolean_t pri = PX_ERR_IS_PRI(bit); 1600 1601 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1602 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1603 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1604 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1605 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1606 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1607 FIRE_JBC_IE, DATA_TYPE_UINT64, 1608 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1609 FIRE_JBC_IS, DATA_TYPE_UINT64, 1610 ss_reg, 1611 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1612 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1613 "jbc-error-reg", DATA_TYPE_UINT64, 1614 CSR_XR(csr_base, CSR_ERROR_LOG), 1615 NULL); 1616 1617 return (PX_NO_PANIC); 1618 } 1619 1620 /* DMC IMU RDS */ 1621 PX_ERPT_SEND_DEC(imu_rds) 1622 { 1623 char buf[FM_MAX_CLASS]; 1624 boolean_t pri = PX_ERR_IS_PRI(bit); 1625 1626 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1627 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1628 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1629 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1630 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1631 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1632 FIRE_IMU_IE, DATA_TYPE_UINT64, 1633 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1634 FIRE_IMU_IS, DATA_TYPE_UINT64, 1635 ss_reg, 1636 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1637 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1638 FIRE_IMU_RDS, DATA_TYPE_UINT64, 1639 CSR_XR(csr_base, IMU_RDS_ERROR_LOG), 1640 NULL); 1641 1642 return (PX_NO_PANIC); 1643 } 1644 1645 /* handle EQ overflow */ 1646 /* ARGSUSED */ 1647 int 1648 px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base, 1649 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1650 px_err_bit_desc_t *err_bit_descr) 1651 { 1652 px_t *px_p = DIP_TO_STATE(rpdip); 1653 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1654 int err = px_err_check_eq(rpdip); 1655 1656 if ((err == PX_PANIC) && (pxu_p->cpr_flag == PX_NOT_CPR)) { 1657 return (px_err_panic_handle(rpdip, csr_base, derr, 1658 err_reg_descr, err_bit_descr)); 1659 } else { 1660 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1661 err_reg_descr, err_bit_descr)); 1662 } 1663 } 1664 1665 /* DMC IMU SCS */ 1666 PX_ERPT_SEND_DEC(imu_scs) 1667 { 1668 char buf[FM_MAX_CLASS]; 1669 boolean_t pri = PX_ERR_IS_PRI(bit); 1670 1671 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1672 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1673 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1674 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1675 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1676 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1677 FIRE_IMU_IE, DATA_TYPE_UINT64, 1678 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1679 FIRE_IMU_IS, DATA_TYPE_UINT64, 1680 ss_reg, 1681 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1682 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1683 FIRE_IMU_SCS, DATA_TYPE_UINT64, 1684 CSR_XR(csr_base, IMU_SCS_ERROR_LOG), 1685 NULL); 1686 1687 return (PX_NO_PANIC); 1688 } 1689 1690 /* DMC IMU */ 1691 PX_ERPT_SEND_DEC(imu) 1692 { 1693 char buf[FM_MAX_CLASS]; 1694 boolean_t pri = PX_ERR_IS_PRI(bit); 1695 1696 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1697 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1698 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1699 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1700 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1701 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1702 FIRE_IMU_IE, DATA_TYPE_UINT64, 1703 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1704 FIRE_IMU_IS, DATA_TYPE_UINT64, 1705 ss_reg, 1706 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1707 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1708 NULL); 1709 1710 return (PX_NO_PANIC); 1711 } 1712 1713 /* DMC MMU TFAR/TFSR */ 1714 PX_ERPT_SEND_DEC(mmu_tfar_tfsr) 1715 { 1716 char buf[FM_MAX_CLASS]; 1717 boolean_t pri = PX_ERR_IS_PRI(bit); 1718 px_t *px_p = DIP_TO_STATE(rpdip); 1719 pcie_req_id_t fault_bdf = 0; 1720 uint16_t s_status = 0; 1721 1722 if (pri) { 1723 fault_bdf = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS) 1724 & (MMU_TRANSLATION_FAULT_STATUS_ID_MASK << 1725 MMU_TRANSLATION_FAULT_STATUS_ID); 1726 s_status = PCI_STAT_S_TARG_AB; 1727 1728 /* Only PIO Fault Addresses are valid, this is DMA */ 1729 (void) px_rp_en_q(px_p, fault_bdf, NULL, s_status); 1730 } 1731 1732 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1733 1734 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1735 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1736 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1737 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1738 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1739 FIRE_MMU_IE, DATA_TYPE_UINT64, 1740 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1741 FIRE_MMU_IS, DATA_TYPE_UINT64, 1742 ss_reg, 1743 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1744 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1745 FIRE_MMU_TFAR, DATA_TYPE_UINT64, 1746 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS), 1747 FIRE_MMU_TFSR, DATA_TYPE_UINT64, 1748 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS), 1749 NULL); 1750 1751 return (PX_NO_PANIC); 1752 } 1753 1754 /* DMC MMU */ 1755 PX_ERPT_SEND_DEC(mmu) 1756 { 1757 char buf[FM_MAX_CLASS]; 1758 boolean_t pri = PX_ERR_IS_PRI(bit); 1759 1760 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1761 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1762 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1763 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1764 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1765 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1766 FIRE_MMU_IE, DATA_TYPE_UINT64, 1767 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1768 FIRE_MMU_IS, DATA_TYPE_UINT64, 1769 ss_reg, 1770 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1771 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1772 NULL); 1773 1774 return (PX_NO_PANIC); 1775 } 1776 1777 /* 1778 * IMU function to handle all Received but Not Enabled errors. 1779 * 1780 * These errors are due to transactions modes in which the PX driver was not 1781 * setup to be able to do. If possible, inform the driver that their DMA has 1782 * failed by marking their DMA handle as failed, but do not panic the system. 1783 * Most likely the address is not valid, as Fire wasn't setup to handle them in 1784 * the first place. 1785 * 1786 * These errors are not retryable, unless the PX mode has changed, otherwise the 1787 * same error will occur again. 1788 */ 1789 int 1790 px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, 1791 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1792 px_err_bit_desc_t *err_bit_descr) 1793 { 1794 pcie_req_id_t bdf; 1795 1796 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1797 goto done; 1798 1799 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1800 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_DMA_ADDR, NULL, 1801 bdf); 1802 1803 done: 1804 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1805 err_bit_descr)); 1806 } 1807 1808 /* 1809 * IMU function to handle all invalid address errors. 1810 * 1811 * These errors are due to transactions in which the address is not recognized. 1812 * If possible, inform the driver that all DMAs have failed by marking their DMA 1813 * handles. Fire should not panic the system, it'll be up to the driver to 1814 * panic. The address logged is invalid. 1815 * 1816 * These errors are not retryable since retrying the same transaction with the 1817 * same invalid address will result in the same error. 1818 */ 1819 /* ARGSUSED */ 1820 int 1821 px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, 1822 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1823 px_err_bit_desc_t *err_bit_descr) 1824 { 1825 pcie_req_id_t bdf; 1826 1827 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1828 goto done; 1829 1830 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1831 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_DMA_ADDR, NULL, 1832 bdf); 1833 1834 done: 1835 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1836 err_bit_descr)); 1837 } 1838 1839 /* 1840 * IMU function to handle normal transactions that encounter a parity error. 1841 * 1842 * These errors are due to transactions that enouter a parity error. If 1843 * possible, inform the driver that their DMA have failed and that they should 1844 * retry. If Fire is unable to contact the leaf driver, panic the system. 1845 * Otherwise, it'll be up to the device to determine is this is a panicable 1846 * error. 1847 */ 1848 /* ARGSUSED */ 1849 int 1850 px_err_mmu_parity_handle(dev_info_t *rpdip, caddr_t csr_base, 1851 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1852 px_err_bit_desc_t *err_bit_descr) 1853 { 1854 uint64_t mmu_tfa; 1855 pcie_req_id_t bdf; 1856 int status = PF_HDL_NOTFOUND; 1857 1858 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1859 goto done; 1860 1861 mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); 1862 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1863 status = pf_hdl_lookup(rpdip, derr->fme_ena, PF_DMA_ADDR, 1864 (uint32_t)mmu_tfa, bdf); 1865 1866 done: 1867 if (status == PF_HDL_NOTFOUND) 1868 return (px_err_panic_handle(rpdip, csr_base, derr, 1869 err_reg_descr, err_bit_descr)); 1870 else 1871 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1872 err_reg_descr, err_bit_descr)); 1873 } 1874 1875 /* 1876 * wuc/ruc event - Mark the handle of the failed PIO access. Return "no_panic" 1877 */ 1878 /* ARGSUSED */ 1879 int 1880 px_err_wuc_ruc_handle(dev_info_t *rpdip, caddr_t csr_base, 1881 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1882 px_err_bit_desc_t *err_bit_descr) 1883 { 1884 px_t *px_p = DIP_TO_STATE(rpdip); 1885 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1886 uint64_t data; 1887 uint32_t addr, hdr; 1888 pcie_tlp_hdr_t *tlp; 1889 int sts = PF_HDL_NOTFOUND; 1890 1891 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1892 goto done; 1893 1894 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); 1895 hdr = (uint32_t)(data >> 32); 1896 tlp = (pcie_tlp_hdr_t *)&hdr; 1897 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); 1898 addr = (uint32_t)(data >> 32); 1899 1900 switch (tlp->type) { 1901 case PCIE_TLP_TYPE_IO: 1902 case PCIE_TLP_TYPE_MEM: 1903 case PCIE_TLP_TYPE_MEMLK: 1904 sts = pf_hdl_lookup(rpdip, derr->fme_ena, PF_PIO_ADDR, 1905 addr, NULL); 1906 break; 1907 case PCIE_TLP_TYPE_CFG0: 1908 case PCIE_TLP_TYPE_CFG1: 1909 sts = pf_hdl_lookup(rpdip, derr->fme_ena, PF_CFG_ADDR, 1910 addr, (addr >> 16)); 1911 break; 1912 } 1913 1914 done: 1915 if ((sts == PF_HDL_NOTFOUND) && (pxu_p->cpr_flag == PX_NOT_CPR)) 1916 return (px_err_protected_handle(rpdip, csr_base, derr, 1917 err_reg_descr, err_bit_descr)); 1918 1919 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1920 err_reg_descr, err_bit_descr)); 1921 } 1922 1923 /* 1924 * TLU LUP event - if caused by power management activity, then it is expected. 1925 * In all other cases, it is an error. 1926 */ 1927 /* ARGSUSED */ 1928 int 1929 px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base, 1930 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1931 px_err_bit_desc_t *err_bit_descr) 1932 { 1933 px_t *px_p = DIP_TO_STATE(rpdip); 1934 1935 /* 1936 * power management code is currently the only segment that sets 1937 * px_lup_pending to indicate its expectation for a healthy LUP 1938 * event. For all other occasions, LUP event should be flaged as 1939 * error condition. 1940 */ 1941 return ((atomic_cas_32(&px_p->px_lup_pending, 1, 0) == 0) ? 1942 PX_NO_PANIC : PX_EXPECTED); 1943 } 1944 1945 /* 1946 * TLU LDN event - if caused by power management activity, then it is expected. 1947 * In all other cases, it is an error. 1948 */ 1949 /* ARGSUSED */ 1950 int 1951 px_err_tlu_ldn_handle(dev_info_t *rpdip, caddr_t csr_base, 1952 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1953 px_err_bit_desc_t *err_bit_descr) 1954 { 1955 px_t *px_p = DIP_TO_STATE(rpdip); 1956 return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_EXPECTED : 1957 PX_NO_PANIC); 1958 } 1959 1960 /* PEC ILU none - see io erpt doc, section 3.1 */ 1961 PX_ERPT_SEND_DEC(pec_ilu) 1962 { 1963 char buf[FM_MAX_CLASS]; 1964 boolean_t pri = PX_ERR_IS_PRI(bit); 1965 1966 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1967 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1968 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1969 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1970 FIRE_ILU_ELE, DATA_TYPE_UINT64, 1971 CSR_XR(csr_base, ILU_ERROR_LOG_ENABLE), 1972 FIRE_ILU_IE, DATA_TYPE_UINT64, 1973 CSR_XR(csr_base, ILU_INTERRUPT_ENABLE), 1974 FIRE_ILU_IS, DATA_TYPE_UINT64, 1975 ss_reg, 1976 FIRE_ILU_ESS, DATA_TYPE_UINT64, 1977 CSR_XR(csr_base, ILU_ERROR_STATUS_SET), 1978 NULL); 1979 1980 return (PX_NO_PANIC); 1981 } 1982 1983 /* PCIEX UE Errors */ 1984 /* ARGSUSED */ 1985 int 1986 px_err_pciex_ue_handle(dev_info_t *rpdip, caddr_t csr_base, 1987 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1988 px_err_bit_desc_t *err_bit_descr) 1989 { 1990 px_err_pcie_t regs = {0}; 1991 uint32_t err_bit; 1992 int err; 1993 uint64_t log; 1994 1995 if (err_bit_descr->bit < 32) { 1996 err_bit = (uint32_t)BITMASK(err_bit_descr->bit); 1997 regs.ue_reg = err_bit; 1998 regs.primary_ue = err_bit; 1999 2000 /* 2001 * Log the Received Log for PTLP and UR. The PTLP most likely 2002 * is a poisoned completion. The original transaction will be 2003 * logged inthe Transmit Log. 2004 */ 2005 if (err_bit & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_UR)) { 2006 log = CSR_XR(csr_base, 2007 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG); 2008 regs.rx_hdr1 = (uint32_t)(log >> 32); 2009 regs.rx_hdr2 = (uint32_t)(log && 0xFFFFFFFF); 2010 2011 log = CSR_XR(csr_base, 2012 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG); 2013 regs.rx_hdr3 = (uint32_t)(log >> 32); 2014 regs.rx_hdr4 = (uint32_t)(log && 0xFFFFFFFF); 2015 } 2016 2017 if (err_bit & (PCIE_AER_UCE_PTLP)) { 2018 log = CSR_XR(csr_base, 2019 TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG); 2020 regs.tx_hdr1 = (uint32_t)(log >> 32); 2021 regs.tx_hdr2 = (uint32_t)(log && 0xFFFFFFFF); 2022 2023 log = CSR_XR(csr_base, 2024 TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG); 2025 regs.tx_hdr3 = (uint32_t)(log >> 32); 2026 regs.tx_hdr4 = (uint32_t)(log && 0xFFFFFFFF); 2027 } 2028 } else { 2029 regs.ue_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); 2030 } 2031 2032 err = px_err_check_pcie(rpdip, derr, ®s); 2033 2034 if (err & PX_PANIC) { 2035 return (px_err_panic_handle(rpdip, csr_base, derr, 2036 err_reg_descr, err_bit_descr)); 2037 } else { 2038 return (px_err_no_panic_handle(rpdip, csr_base, derr, 2039 err_reg_descr, err_bit_descr)); 2040 } 2041 } 2042 2043 /* PCI-E Uncorrectable Errors */ 2044 PX_ERPT_SEND_DEC(pciex_rx_ue) 2045 { 2046 char buf[FM_MAX_CLASS]; 2047 boolean_t pri = PX_ERR_IS_PRI(bit); 2048 2049 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2050 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2051 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2052 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2053 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2054 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2055 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2056 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2057 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2058 ss_reg, 2059 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2060 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2061 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2062 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 2063 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2064 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 2065 NULL); 2066 2067 return (PX_NO_PANIC); 2068 } 2069 2070 /* PCI-E Uncorrectable Errors */ 2071 PX_ERPT_SEND_DEC(pciex_tx_ue) 2072 { 2073 char buf[FM_MAX_CLASS]; 2074 boolean_t pri = PX_ERR_IS_PRI(bit); 2075 2076 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2077 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2078 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2079 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2080 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2081 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2082 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2083 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2084 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2085 ss_reg, 2086 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2087 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2088 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 2089 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 2090 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 2091 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 2092 NULL); 2093 2094 return (PX_NO_PANIC); 2095 } 2096 2097 /* PCI-E Uncorrectable Errors */ 2098 PX_ERPT_SEND_DEC(pciex_rx_tx_ue) 2099 { 2100 char buf[FM_MAX_CLASS]; 2101 boolean_t pri = PX_ERR_IS_PRI(bit); 2102 2103 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2104 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2105 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2106 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2107 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2108 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2109 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2110 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2111 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2112 ss_reg, 2113 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2114 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2115 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2116 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 2117 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2118 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 2119 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 2120 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 2121 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 2122 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 2123 NULL); 2124 2125 return (PX_NO_PANIC); 2126 } 2127 2128 /* PCI-E Uncorrectable Errors */ 2129 PX_ERPT_SEND_DEC(pciex_ue) 2130 { 2131 char buf[FM_MAX_CLASS]; 2132 boolean_t pri = PX_ERR_IS_PRI(bit); 2133 2134 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2135 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2136 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2137 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2138 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2139 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2140 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2141 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2142 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2143 ss_reg, 2144 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2145 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2146 NULL); 2147 2148 return (PX_NO_PANIC); 2149 } 2150 2151 /* PCIEX UE Errors */ 2152 /* ARGSUSED */ 2153 int 2154 px_err_pciex_ce_handle(dev_info_t *rpdip, caddr_t csr_base, 2155 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 2156 px_err_bit_desc_t *err_bit_descr) 2157 { 2158 px_err_pcie_t regs = {0}; 2159 int err; 2160 2161 if (err_bit_descr->bit < 32) 2162 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit); 2163 else 2164 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); 2165 2166 err = px_err_check_pcie(rpdip, derr, ®s); 2167 2168 if (err & PX_PANIC) { 2169 return (px_err_panic_handle(rpdip, csr_base, derr, 2170 err_reg_descr, err_bit_descr)); 2171 } else { 2172 return (px_err_no_panic_handle(rpdip, csr_base, derr, 2173 err_reg_descr, err_bit_descr)); 2174 } 2175 } 2176 2177 /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ 2178 PX_ERPT_SEND_DEC(pciex_ce) 2179 { 2180 char buf[FM_MAX_CLASS]; 2181 boolean_t pri = PX_ERR_IS_PRI(bit); 2182 2183 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2184 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2185 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2186 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2187 FIRE_TLU_CELE, DATA_TYPE_UINT64, 2188 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_LOG_ENABLE), 2189 FIRE_TLU_CIE, DATA_TYPE_UINT64, 2190 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_INTERRUPT_ENABLE), 2191 FIRE_TLU_CIS, DATA_TYPE_UINT64, 2192 ss_reg, 2193 FIRE_TLU_CESS, DATA_TYPE_UINT64, 2194 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_STATUS_SET), 2195 NULL); 2196 2197 return (PX_NO_PANIC); 2198 } 2199 2200 /* TLU Other Event Status (receive only) - see io erpt doc, section 3.7 */ 2201 PX_ERPT_SEND_DEC(pciex_rx_oe) 2202 { 2203 char buf[FM_MAX_CLASS]; 2204 boolean_t pri = PX_ERR_IS_PRI(bit); 2205 2206 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2207 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2208 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2209 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2210 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2211 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2212 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2213 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2214 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2215 ss_reg, 2216 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2217 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2218 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2219 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG), 2220 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2221 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), 2222 NULL); 2223 2224 return (PX_NO_PANIC); 2225 } 2226 2227 /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ 2228 PX_ERPT_SEND_DEC(pciex_rx_tx_oe) 2229 { 2230 char buf[FM_MAX_CLASS]; 2231 boolean_t pri = PX_ERR_IS_PRI(bit); 2232 px_t *px_p = DIP_TO_STATE(rpdip); 2233 uint32_t trans_type, fault_addr = 0; 2234 uint64_t rx_h1, rx_h2, tx_h1, tx_h2; 2235 uint16_t s_status; 2236 int sts; 2237 pcie_req_id_t fault_bdf = 0; 2238 pcie_cpl_t *cpl; 2239 pf_data_t pf_data = {0}; 2240 2241 rx_h1 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG); 2242 rx_h2 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG); 2243 tx_h1 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); 2244 tx_h2 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); 2245 2246 if ((bit == TLU_OTHER_EVENT_STATUS_SET_RUC_P) || 2247 (bit == TLU_OTHER_EVENT_STATUS_SET_WUC_P)) { 2248 pf_data.aer_h0 = (uint32_t)(rx_h1 >> 32); 2249 pf_data.aer_h1 = (uint32_t)rx_h1; 2250 pf_data.aer_h2 = (uint32_t)(rx_h2 >> 32); 2251 pf_data.aer_h3 = (uint32_t)rx_h2; 2252 2253 /* get completer bdf (fault bdf) from rx logs */ 2254 cpl = (pcie_cpl_t *)&pf_data.aer_h1; 2255 fault_bdf = cpl->cid; 2256 2257 /* Figure out if UR/CA from rx logs */ 2258 if (cpl->status == PCIE_CPL_STS_UR) 2259 s_status = PCI_STAT_R_MAST_AB; 2260 else if (cpl->status == PCIE_CPL_STS_CA) 2261 s_status = PCI_STAT_R_TARG_AB; 2262 2263 2264 pf_data.aer_h0 = (uint32_t)(tx_h1 >> 32); 2265 pf_data.aer_h1 = (uint32_t)tx_h1; 2266 pf_data.aer_h2 = (uint32_t)(tx_h2 >> 32); 2267 pf_data.aer_h3 = (uint32_t)tx_h2; 2268 2269 /* get fault addr from tx logs */ 2270 sts = pf_tlp_decode(rpdip, &pf_data, 0, &fault_addr, 2271 &trans_type); 2272 2273 if (sts == DDI_SUCCESS) 2274 (void) px_rp_en_q(px_p, fault_bdf, fault_addr, 2275 s_status); 2276 } 2277 2278 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2279 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2280 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2281 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2282 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2283 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2284 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2285 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2286 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2287 ss_reg, 2288 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2289 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2290 FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, rx_h1, 2291 FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, rx_h2, 2292 FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, tx_h1, 2293 FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, tx_h2, 2294 NULL); 2295 2296 return (PX_NO_PANIC); 2297 } 2298 2299 /* TLU Other Event - see io erpt doc, section 3.9 */ 2300 PX_ERPT_SEND_DEC(pciex_oe) 2301 { 2302 char buf[FM_MAX_CLASS]; 2303 boolean_t pri = PX_ERR_IS_PRI(bit); 2304 2305 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2306 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2307 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2308 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2309 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2310 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2311 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2312 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2313 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2314 ss_reg, 2315 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2316 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2317 NULL); 2318 2319 return (PX_NO_PANIC); 2320 } 2321