1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * sun4u Fire Error Handling 30 */ 31 32 #include <sys/types.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/sunndi.h> 36 #include <sys/fm/protocol.h> 37 #include <sys/fm/util.h> 38 #include <sys/pcie.h> 39 #include <sys/pcie_impl.h> 40 #include "px_obj.h" 41 #include <px_regs.h> 42 #include <px_csr.h> 43 #include <sys/membar.h> 44 #include <sys/machcpuvar.h> 45 #include <sys/platform_module.h> 46 #include "pcie_pwr.h" 47 #include "px_lib4u.h" 48 #include "px_err.h" 49 #include "px_err_impl.h" 50 #include "oberon_regs.h" 51 52 uint64_t px_tlu_ue_intr_mask = PX_ERR_EN_ALL; 53 uint64_t px_tlu_ue_log_mask = PX_ERR_EN_ALL; 54 uint64_t px_tlu_ue_count_mask = PX_ERR_EN_ALL; 55 56 uint64_t px_tlu_ce_intr_mask = PX_ERR_MASK_NONE; 57 uint64_t px_tlu_ce_log_mask = PX_ERR_MASK_NONE; 58 uint64_t px_tlu_ce_count_mask = PX_ERR_MASK_NONE; 59 60 /* 61 * Do not enable Link Interrupts 62 */ 63 uint64_t px_tlu_oe_intr_mask = PX_ERR_EN_ALL & ~0x80000000800; 64 uint64_t px_tlu_oe_log_mask = PX_ERR_EN_ALL & ~0x80000000800; 65 uint64_t px_tlu_oe_count_mask = PX_ERR_EN_ALL; 66 67 uint64_t px_mmu_intr_mask = PX_ERR_EN_ALL; 68 uint64_t px_mmu_log_mask = PX_ERR_EN_ALL; 69 uint64_t px_mmu_count_mask = PX_ERR_EN_ALL; 70 71 uint64_t px_imu_intr_mask = PX_ERR_EN_ALL; 72 uint64_t px_imu_log_mask = PX_ERR_EN_ALL; 73 uint64_t px_imu_count_mask = PX_ERR_EN_ALL; 74 75 /* 76 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_S) | 77 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_P); 78 */ 79 uint64_t px_ilu_intr_mask = (((uint64_t)0x10 << 32) | 0x10); 80 uint64_t px_ilu_log_mask = (((uint64_t)0x10 << 32) | 0x10); 81 uint64_t px_ilu_count_mask = PX_ERR_EN_ALL; 82 83 uint64_t px_ubc_intr_mask = PX_ERR_EN_ALL; 84 uint64_t px_ubc_log_mask = PX_ERR_EN_ALL; 85 uint64_t px_ubc_count_mask = PX_ERR_EN_ALL; 86 87 uint64_t px_jbc_intr_mask = PX_ERR_EN_ALL; 88 uint64_t px_jbc_log_mask = PX_ERR_EN_ALL; 89 uint64_t px_jbc_count_mask = PX_ERR_EN_ALL; 90 91 /* 92 * LPU Intr Registers are reverse encoding from the registers above. 93 * 1 = disable 94 * 0 = enable 95 * 96 * Log and Count are however still the same. 97 */ 98 uint64_t px_lpul_intr_mask = LPU_INTR_DISABLE; 99 uint64_t px_lpul_log_mask = PX_ERR_EN_ALL; 100 uint64_t px_lpul_count_mask = PX_ERR_EN_ALL; 101 102 uint64_t px_lpup_intr_mask = LPU_INTR_DISABLE; 103 uint64_t px_lpup_log_mask = PX_ERR_EN_ALL; 104 uint64_t px_lpup_count_mask = PX_ERR_EN_ALL; 105 106 uint64_t px_lpur_intr_mask = LPU_INTR_DISABLE; 107 uint64_t px_lpur_log_mask = PX_ERR_EN_ALL; 108 uint64_t px_lpur_count_mask = PX_ERR_EN_ALL; 109 110 uint64_t px_lpux_intr_mask = LPU_INTR_DISABLE; 111 uint64_t px_lpux_log_mask = PX_ERR_EN_ALL; 112 uint64_t px_lpux_count_mask = PX_ERR_EN_ALL; 113 114 uint64_t px_lpus_intr_mask = LPU_INTR_DISABLE; 115 uint64_t px_lpus_log_mask = PX_ERR_EN_ALL; 116 uint64_t px_lpus_count_mask = PX_ERR_EN_ALL; 117 118 uint64_t px_lpug_intr_mask = LPU_INTR_DISABLE; 119 uint64_t px_lpug_log_mask = PX_ERR_EN_ALL; 120 uint64_t px_lpug_count_mask = PX_ERR_EN_ALL; 121 122 /* 123 * JBC error bit table 124 */ 125 #define JBC_BIT_DESC(bit, hdl, erpt) \ 126 JBC_INTERRUPT_STATUS_ ## bit ## _P, \ 127 0, \ 128 PX_ERR_BIT_HANDLE(hdl), \ 129 PX_ERPT_SEND(erpt), \ 130 PX_ERR_JBC_CLASS(bit) }, \ 131 { JBC_INTERRUPT_STATUS_ ## bit ## _S, \ 132 0, \ 133 PX_ERR_BIT_HANDLE(hdl), \ 134 PX_ERPT_SEND(erpt), \ 135 PX_ERR_JBC_CLASS(bit) 136 px_err_bit_desc_t px_err_jbc_tbl[] = { 137 /* JBC FATAL */ 138 { JBC_BIT_DESC(MB_PEA, hw_reset, jbc_fatal) }, 139 { JBC_BIT_DESC(CPE, hw_reset, jbc_fatal) }, 140 { JBC_BIT_DESC(APE, hw_reset, jbc_fatal) }, 141 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_fatal) }, 142 { JBC_BIT_DESC(JTCEEW, hw_reset, jbc_fatal) }, 143 { JBC_BIT_DESC(JTCEEI, hw_reset, jbc_fatal) }, 144 { JBC_BIT_DESC(JTCEER, hw_reset, jbc_fatal) }, 145 146 /* JBC MERGE */ 147 { JBC_BIT_DESC(MB_PER, jbc_merge, jbc_merge) }, 148 { JBC_BIT_DESC(MB_PEW, jbc_merge, jbc_merge) }, 149 150 /* JBC Jbusint IN */ 151 { JBC_BIT_DESC(UE_ASYN, panic, jbc_in) }, 152 { JBC_BIT_DESC(CE_ASYN, no_error, jbc_in) }, 153 { JBC_BIT_DESC(JTE, panic, jbc_in) }, 154 { JBC_BIT_DESC(JBE, panic, jbc_in) }, 155 { JBC_BIT_DESC(JUE, panic, jbc_in) }, 156 { JBC_BIT_DESC(ICISE, panic, jbc_in) }, 157 { JBC_BIT_DESC(WR_DPE, jbc_jbusint_in, jbc_in) }, 158 { JBC_BIT_DESC(RD_DPE, jbc_jbusint_in, jbc_in) }, 159 { JBC_BIT_DESC(ILL_BMW, panic, jbc_in) }, 160 { JBC_BIT_DESC(ILL_BMR, panic, jbc_in) }, 161 { JBC_BIT_DESC(BJC, panic, jbc_in) }, 162 163 /* JBC Jbusint Out */ 164 { JBC_BIT_DESC(IJP, panic, jbc_out) }, 165 166 /* 167 * JBC Dmcint ODCD 168 * 169 * Error bits which can be set via a bad PCItool access go through 170 * jbc_safe_acc instead. 171 */ 172 { JBC_BIT_DESC(PIO_UNMAP_RD, jbc_safe_acc, jbc_odcd) }, 173 { JBC_BIT_DESC(ILL_ACC_RD, jbc_safe_acc, jbc_odcd) }, 174 { JBC_BIT_DESC(PIO_UNMAP, jbc_safe_acc, jbc_odcd) }, 175 { JBC_BIT_DESC(PIO_DPE, jbc_dmcint_odcd, jbc_odcd) }, 176 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_odcd) }, 177 { JBC_BIT_DESC(ILL_ACC, jbc_safe_acc, jbc_odcd) }, 178 179 /* JBC Dmcint IDC */ 180 { JBC_BIT_DESC(UNSOL_RD, no_panic, jbc_idc) }, 181 { JBC_BIT_DESC(UNSOL_INTR, no_panic, jbc_idc) }, 182 183 /* JBC CSR */ 184 { JBC_BIT_DESC(EBUS_TO, panic, jbc_csr) } 185 }; 186 187 #define px_err_jbc_keys \ 188 (sizeof (px_err_jbc_tbl)) / (sizeof (px_err_bit_desc_t)) 189 190 /* 191 * UBC error bit table 192 */ 193 #define UBC_BIT_DESC(bit, hdl, erpt) \ 194 UBC_INTERRUPT_STATUS_ ## bit ## _P, \ 195 0, \ 196 PX_ERR_BIT_HANDLE(hdl), \ 197 PX_ERPT_SEND(erpt), \ 198 PX_ERR_UBC_CLASS(bit) }, \ 199 { UBC_INTERRUPT_STATUS_ ## bit ## _S, \ 200 0, \ 201 PX_ERR_BIT_HANDLE(hdl), \ 202 PX_ERPT_SEND(erpt), \ 203 PX_ERR_UBC_CLASS(bit) 204 px_err_bit_desc_t px_err_ubc_tbl[] = { 205 /* UBC FATAL */ 206 { UBC_BIT_DESC(DMARDUEA, no_panic, ubc_fatal) }, 207 { UBC_BIT_DESC(DMAWTUEA, panic, ubc_fatal) }, 208 { UBC_BIT_DESC(MEMRDAXA, panic, ubc_fatal) }, 209 { UBC_BIT_DESC(MEMWTAXA, panic, ubc_fatal) }, 210 { UBC_BIT_DESC(DMARDUEB, no_panic, ubc_fatal) }, 211 { UBC_BIT_DESC(DMAWTUEB, panic, ubc_fatal) }, 212 { UBC_BIT_DESC(MEMRDAXB, panic, ubc_fatal) }, 213 { UBC_BIT_DESC(MEMWTAXB, panic, ubc_fatal) }, 214 { UBC_BIT_DESC(PIOWTUE, panic, ubc_fatal) }, 215 { UBC_BIT_DESC(PIOWBEUE, panic, ubc_fatal) }, 216 { UBC_BIT_DESC(PIORBEUE, panic, ubc_fatal) } 217 }; 218 219 #define px_err_ubc_keys \ 220 (sizeof (px_err_ubc_tbl)) / (sizeof (px_err_bit_desc_t)) 221 222 223 char *ubc_class_eid_qualifier[] = { 224 "-mem", 225 "-channel", 226 "-cpu", 227 "-path" 228 }; 229 230 231 /* 232 * DMC error bit tables 233 */ 234 #define IMU_BIT_DESC(bit, hdl, erpt) \ 235 IMU_INTERRUPT_STATUS_ ## bit ## _P, \ 236 0, \ 237 PX_ERR_BIT_HANDLE(hdl), \ 238 PX_ERPT_SEND(erpt), \ 239 PX_ERR_DMC_CLASS(bit) }, \ 240 { IMU_INTERRUPT_STATUS_ ## bit ## _S, \ 241 0, \ 242 PX_ERR_BIT_HANDLE(hdl), \ 243 PX_ERPT_SEND(erpt), \ 244 PX_ERR_DMC_CLASS(bit) 245 px_err_bit_desc_t px_err_imu_tbl[] = { 246 /* DMC IMU RDS */ 247 { IMU_BIT_DESC(MSI_MAL_ERR, panic, imu_rds) }, 248 { IMU_BIT_DESC(MSI_PAR_ERR, panic, imu_rds) }, 249 { IMU_BIT_DESC(PMEACK_MES_NOT_EN, panic, imu_rds) }, 250 { IMU_BIT_DESC(PMPME_MES_NOT_EN, panic, imu_rds) }, 251 { IMU_BIT_DESC(FATAL_MES_NOT_EN, panic, imu_rds) }, 252 { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, panic, imu_rds) }, 253 { IMU_BIT_DESC(COR_MES_NOT_EN, panic, imu_rds) }, 254 { IMU_BIT_DESC(MSI_NOT_EN, panic, imu_rds) }, 255 256 /* DMC IMU SCS */ 257 { IMU_BIT_DESC(EQ_NOT_EN, panic, imu_scs) }, 258 259 /* DMC IMU */ 260 { IMU_BIT_DESC(EQ_OVER, imu_eq_ovfl, imu) } 261 }; 262 263 #define px_err_imu_keys (sizeof (px_err_imu_tbl)) / (sizeof (px_err_bit_desc_t)) 264 265 /* mmu errors */ 266 #define MMU_BIT_DESC(bit, hdl, erpt) \ 267 MMU_INTERRUPT_STATUS_ ## bit ## _P, \ 268 0, \ 269 PX_ERR_BIT_HANDLE(hdl), \ 270 PX_ERPT_SEND(erpt), \ 271 PX_ERR_DMC_CLASS(bit) }, \ 272 { MMU_INTERRUPT_STATUS_ ## bit ## _S, \ 273 0, \ 274 PX_ERR_BIT_HANDLE(hdl), \ 275 PX_ERPT_SEND(erpt), \ 276 PX_ERR_DMC_CLASS(bit) 277 px_err_bit_desc_t px_err_mmu_tbl[] = { 278 /* DMC MMU TFAR/TFSR */ 279 { MMU_BIT_DESC(BYP_ERR, mmu_rbne, mmu_tfar_tfsr) }, 280 { MMU_BIT_DESC(BYP_OOR, mmu_tfa, mmu_tfar_tfsr) }, 281 { MMU_BIT_DESC(TRN_ERR, panic, mmu_tfar_tfsr) }, 282 { MMU_BIT_DESC(TRN_OOR, mmu_tfa, mmu_tfar_tfsr) }, 283 { MMU_BIT_DESC(TTE_INV, mmu_tfa, mmu_tfar_tfsr) }, 284 { MMU_BIT_DESC(TTE_PRT, mmu_tfa, mmu_tfar_tfsr) }, 285 { MMU_BIT_DESC(TTC_DPE, mmu_parity, mmu_tfar_tfsr) }, 286 { MMU_BIT_DESC(TBW_DME, panic, mmu_tfar_tfsr) }, 287 { MMU_BIT_DESC(TBW_UDE, panic, mmu_tfar_tfsr) }, 288 { MMU_BIT_DESC(TBW_ERR, panic, mmu_tfar_tfsr) }, 289 { MMU_BIT_DESC(TBW_DPE, mmu_parity, mmu_tfar_tfsr) }, 290 291 /* DMC MMU */ 292 { MMU_BIT_DESC(TTC_CAE, panic, mmu) } 293 }; 294 #define px_err_mmu_keys (sizeof (px_err_mmu_tbl)) / (sizeof (px_err_bit_desc_t)) 295 296 297 /* 298 * PEC error bit tables 299 */ 300 #define ILU_BIT_DESC(bit, hdl, erpt) \ 301 ILU_INTERRUPT_STATUS_ ## bit ## _P, \ 302 0, \ 303 PX_ERR_BIT_HANDLE(hdl), \ 304 PX_ERPT_SEND(erpt), \ 305 PX_ERR_PEC_CLASS(bit) }, \ 306 { ILU_INTERRUPT_STATUS_ ## bit ## _S, \ 307 0, \ 308 PX_ERR_BIT_HANDLE(hdl), \ 309 PX_ERPT_SEND(erpt), \ 310 PX_ERR_PEC_CLASS(bit) 311 px_err_bit_desc_t px_err_ilu_tbl[] = { 312 /* PEC ILU none */ 313 { ILU_BIT_DESC(IHB_PE, panic, pec_ilu) } 314 }; 315 #define px_err_ilu_keys \ 316 (sizeof (px_err_ilu_tbl)) / (sizeof (px_err_bit_desc_t)) 317 318 /* 319 * PEC UE errors implementation is incomplete pending PCIE generic 320 * fabric rules. Must handle both PRIMARY and SECONDARY errors. 321 */ 322 /* pec ue errors */ 323 #define TLU_UC_BIT_DESC(bit, hdl, erpt) \ 324 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 325 0, \ 326 PX_ERR_BIT_HANDLE(hdl), \ 327 PX_ERPT_SEND(erpt), \ 328 PX_ERR_PEC_CLASS(bit) }, \ 329 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 330 0, \ 331 PX_ERR_BIT_HANDLE(hdl), \ 332 PX_ERPT_SEND(erpt), \ 333 PX_ERR_PEC_CLASS(bit) 334 #define TLU_UC_OB_BIT_DESC(bit, hdl, erpt) \ 335 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 336 0, \ 337 PX_ERR_BIT_HANDLE(hdl), \ 338 PX_ERPT_SEND(erpt), \ 339 PX_ERR_PEC_OB_CLASS(bit) }, \ 340 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 341 0, \ 342 PX_ERR_BIT_HANDLE(hdl), \ 343 PX_ERPT_SEND(erpt), \ 344 PX_ERR_PEC_OB_CLASS(bit) 345 px_err_bit_desc_t px_err_tlu_ue_tbl[] = { 346 /* PCI-E Receive Uncorrectable Errors */ 347 { TLU_UC_BIT_DESC(UR, pciex_ue, pciex_rx_ue) }, 348 { TLU_UC_BIT_DESC(UC, pciex_ue, pciex_rx_ue) }, 349 350 /* PCI-E Transmit Uncorrectable Errors */ 351 { TLU_UC_OB_BIT_DESC(ECRC, pciex_ue, pciex_rx_ue) }, 352 { TLU_UC_BIT_DESC(CTO, pciex_ue, pciex_tx_ue) }, 353 { TLU_UC_BIT_DESC(ROF, pciex_ue, pciex_tx_ue) }, 354 355 /* PCI-E Rx/Tx Uncorrectable Errors */ 356 { TLU_UC_BIT_DESC(MFP, pciex_ue, pciex_rx_tx_ue) }, 357 { TLU_UC_BIT_DESC(PP, pciex_ue, pciex_rx_tx_ue) }, 358 359 /* Other PCI-E Uncorrectable Errors */ 360 { TLU_UC_BIT_DESC(FCP, pciex_ue, pciex_ue) }, 361 { TLU_UC_BIT_DESC(DLP, pciex_ue, pciex_ue) }, 362 { TLU_UC_BIT_DESC(TE, pciex_ue, pciex_ue) }, 363 364 /* Not used */ 365 { TLU_UC_BIT_DESC(CA, pciex_ue, do_not) } 366 }; 367 #define px_err_tlu_ue_keys \ 368 (sizeof (px_err_tlu_ue_tbl)) / (sizeof (px_err_bit_desc_t)) 369 370 371 /* 372 * PEC CE errors implementation is incomplete pending PCIE generic 373 * fabric rules. 374 */ 375 /* pec ce errors */ 376 #define TLU_CE_BIT_DESC(bit, hdl, erpt) \ 377 TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 378 0, \ 379 PX_ERR_BIT_HANDLE(hdl), \ 380 PX_ERPT_SEND(erpt), \ 381 PX_ERR_PEC_CLASS(bit) }, \ 382 { TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 383 0, \ 384 PX_ERR_BIT_HANDLE(hdl), \ 385 PX_ERPT_SEND(erpt), \ 386 PX_ERR_PEC_CLASS(bit) 387 px_err_bit_desc_t px_err_tlu_ce_tbl[] = { 388 /* PCI-E Correctable Errors */ 389 { TLU_CE_BIT_DESC(RTO, pciex_ce, pciex_ce) }, 390 { TLU_CE_BIT_DESC(RNR, pciex_ce, pciex_ce) }, 391 { TLU_CE_BIT_DESC(BDP, pciex_ce, pciex_ce) }, 392 { TLU_CE_BIT_DESC(BTP, pciex_ce, pciex_ce) }, 393 { TLU_CE_BIT_DESC(RE, pciex_ce, pciex_ce) } 394 }; 395 #define px_err_tlu_ce_keys \ 396 (sizeof (px_err_tlu_ce_tbl)) / (sizeof (px_err_bit_desc_t)) 397 398 399 /* pec oe errors */ 400 #define TLU_OE_BIT_DESC(bit, hdl, erpt) \ 401 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \ 402 0, \ 403 PX_ERR_BIT_HANDLE(hdl), \ 404 PX_ERPT_SEND(erpt), \ 405 PX_ERR_PEC_CLASS(bit) }, \ 406 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \ 407 0, \ 408 PX_ERR_BIT_HANDLE(hdl), \ 409 PX_ERPT_SEND(erpt), \ 410 PX_ERR_PEC_CLASS(bit) 411 #define TLU_OE_OB_BIT_DESC(bit, hdl, erpt) \ 412 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \ 413 0, \ 414 PX_ERR_BIT_HANDLE(hdl), \ 415 PX_ERPT_SEND(erpt), \ 416 PX_ERR_PEC_OB_CLASS(bit) }, \ 417 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \ 418 0, \ 419 PX_ERR_BIT_HANDLE(hdl), \ 420 PX_ERPT_SEND(erpt), \ 421 PX_ERR_PEC_OB_CLASS(bit) 422 px_err_bit_desc_t px_err_tlu_oe_tbl[] = { 423 /* TLU Other Event Status (receive only) */ 424 { TLU_OE_BIT_DESC(MRC, hw_reset, pciex_rx_oe) }, 425 426 /* TLU Other Event Status (rx + tx) */ 427 { TLU_OE_BIT_DESC(WUC, wuc_ruc, pciex_rx_tx_oe) }, 428 { TLU_OE_BIT_DESC(RUC, wuc_ruc, pciex_rx_tx_oe) }, 429 { TLU_OE_BIT_DESC(CRS, no_panic, pciex_rx_tx_oe) }, 430 431 /* TLU Other Event */ 432 { TLU_OE_BIT_DESC(IIP, panic, pciex_oe) }, 433 { TLU_OE_BIT_DESC(EDP, panic, pciex_oe) }, 434 { TLU_OE_BIT_DESC(EHP, panic, pciex_oe) }, 435 { TLU_OE_OB_BIT_DESC(TLUEITMO, panic, pciex_oe) }, 436 { TLU_OE_BIT_DESC(LIN, no_panic, pciex_oe) }, 437 { TLU_OE_BIT_DESC(LRS, no_panic, pciex_oe) }, 438 { TLU_OE_BIT_DESC(LDN, tlu_ldn, pciex_oe) }, 439 { TLU_OE_BIT_DESC(LUP, tlu_lup, pciex_oe) }, 440 { TLU_OE_BIT_DESC(ERU, panic, pciex_oe) }, 441 { TLU_OE_BIT_DESC(ERO, panic, pciex_oe) }, 442 { TLU_OE_BIT_DESC(EMP, panic, pciex_oe) }, 443 { TLU_OE_BIT_DESC(EPE, panic, pciex_oe) }, 444 { TLU_OE_BIT_DESC(ERP, panic, pciex_oe) }, 445 { TLU_OE_BIT_DESC(EIP, panic, pciex_oe) } 446 }; 447 448 #define px_err_tlu_oe_keys \ 449 (sizeof (px_err_tlu_oe_tbl)) / (sizeof (px_err_bit_desc_t)) 450 451 452 /* 453 * All the following tables below are for LPU Interrupts. These interrupts 454 * are *NOT* error interrupts, but event status interrupts. 455 * 456 * These events are probably of most interest to: 457 * o Hotplug 458 * o Power Management 459 * o etc... 460 * 461 * There are also a few events that would be interresting for FMA. 462 * Again none of the regiseters below state that an error has occured 463 * or that data has been lost. If anything, they give status that an 464 * error is *about* to occur. examples 465 * o INT_SKP_ERR - indicates clock between fire and child is too far 466 * off and is most unlikely able to compensate 467 * o INT_TX_PAR_ERR - A parity error occured in ONE lane. This is 468 * HW recoverable, but will like end up as a future 469 * fabric error as well. 470 * 471 * For now, we don't care about any of these errors and should be ignore, 472 * but cleared. 473 */ 474 475 /* LPU Link Interrupt Table */ 476 #define LPUL_BIT_DESC(bit, hdl, erpt) \ 477 LPU_LINK_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 478 0, \ 479 NULL, \ 480 NULL, \ 481 "" 482 px_err_bit_desc_t px_err_lpul_tbl[] = { 483 { LPUL_BIT_DESC(LINK_ERR_ACT, NULL, NULL) } 484 }; 485 #define px_err_lpul_keys \ 486 (sizeof (px_err_lpul_tbl)) / (sizeof (px_err_bit_desc_t)) 487 488 /* LPU Physical Interrupt Table */ 489 #define LPUP_BIT_DESC(bit, hdl, erpt) \ 490 LPU_PHY_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 491 0, \ 492 NULL, \ 493 NULL, \ 494 "" 495 px_err_bit_desc_t px_err_lpup_tbl[] = { 496 { LPUP_BIT_DESC(PHY_LAYER_ERR, NULL, NULL) } 497 }; 498 #define px_err_lpup_keys \ 499 (sizeof (px_err_lpup_tbl)) / (sizeof (px_err_bit_desc_t)) 500 501 /* LPU Receive Interrupt Table */ 502 #define LPUR_BIT_DESC(bit, hdl, erpt) \ 503 LPU_RECEIVE_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 504 0, \ 505 NULL, \ 506 NULL, \ 507 "" 508 px_err_bit_desc_t px_err_lpur_tbl[] = { 509 { LPUR_BIT_DESC(RCV_PHY, NULL, NULL) } 510 }; 511 #define px_err_lpur_keys \ 512 (sizeof (px_err_lpur_tbl)) / (sizeof (px_err_bit_desc_t)) 513 514 /* LPU Transmit Interrupt Table */ 515 #define LPUX_BIT_DESC(bit, hdl, erpt) \ 516 LPU_TRANSMIT_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 517 0, \ 518 NULL, \ 519 NULL, \ 520 "" 521 px_err_bit_desc_t px_err_lpux_tbl[] = { 522 { LPUX_BIT_DESC(UNMSK, NULL, NULL) } 523 }; 524 #define px_err_lpux_keys \ 525 (sizeof (px_err_lpux_tbl)) / (sizeof (px_err_bit_desc_t)) 526 527 /* LPU LTSSM Interrupt Table */ 528 #define LPUS_BIT_DESC(bit, hdl, erpt) \ 529 LPU_LTSSM_INTERRUPT_AND_STATUS_INT_ ## bit, \ 530 0, \ 531 NULL, \ 532 NULL, \ 533 "" 534 px_err_bit_desc_t px_err_lpus_tbl[] = { 535 { LPUS_BIT_DESC(ANY, NULL, NULL) } 536 }; 537 #define px_err_lpus_keys \ 538 (sizeof (px_err_lpus_tbl)) / (sizeof (px_err_bit_desc_t)) 539 540 /* LPU Gigablaze Glue Interrupt Table */ 541 #define LPUG_BIT_DESC(bit, hdl, erpt) \ 542 LPU_GIGABLAZE_GLUE_INTERRUPT_AND_STATUS_INT_ ## bit, \ 543 0, \ 544 NULL, \ 545 NULL, \ 546 "" 547 px_err_bit_desc_t px_err_lpug_tbl[] = { 548 { LPUG_BIT_DESC(GLOBL_UNMSK, NULL, NULL) } 549 }; 550 #define px_err_lpug_keys \ 551 (sizeof (px_err_lpug_tbl)) / (sizeof (px_err_bit_desc_t)) 552 553 554 /* Mask and Tables */ 555 #define MnT6X(pre) \ 556 &px_ ## pre ## _intr_mask, \ 557 &px_ ## pre ## _log_mask, \ 558 &px_ ## pre ## _count_mask, \ 559 px_err_ ## pre ## _tbl, \ 560 px_err_ ## pre ## _keys, \ 561 PX_REG_XBC, \ 562 0 563 564 #define MnT6(pre) \ 565 &px_ ## pre ## _intr_mask, \ 566 &px_ ## pre ## _log_mask, \ 567 &px_ ## pre ## _count_mask, \ 568 px_err_ ## pre ## _tbl, \ 569 px_err_ ## pre ## _keys, \ 570 PX_REG_CSR, \ 571 0 572 573 /* LPU Registers Addresses */ 574 #define LR4(pre) \ 575 NULL, \ 576 LPU_ ## pre ## _INTERRUPT_MASK, \ 577 LPU_ ## pre ## _INTERRUPT_AND_STATUS, \ 578 LPU_ ## pre ## _INTERRUPT_AND_STATUS 579 580 /* LPU Registers Addresses with Irregularities */ 581 #define LR4_FIXME(pre) \ 582 NULL, \ 583 LPU_ ## pre ## _INTERRUPT_MASK, \ 584 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS, \ 585 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS 586 587 /* TLU Registers Addresses */ 588 #define TR4(pre) \ 589 TLU_ ## pre ## _LOG_ENABLE, \ 590 TLU_ ## pre ## _INTERRUPT_ENABLE, \ 591 TLU_ ## pre ## _INTERRUPT_STATUS, \ 592 TLU_ ## pre ## _STATUS_CLEAR 593 594 /* Registers Addresses for JBC, UBC, MMU, IMU and ILU */ 595 #define R4(pre) \ 596 pre ## _ERROR_LOG_ENABLE, \ 597 pre ## _INTERRUPT_ENABLE, \ 598 pre ## _INTERRUPT_STATUS, \ 599 pre ## _ERROR_STATUS_CLEAR 600 601 /* Bits in chip_mask, set according to type. */ 602 #define CHP_O BITMASK(PX_CHIP_OBERON) 603 #define CHP_F BITMASK(PX_CHIP_FIRE) 604 #define CHP_FO (CHP_F | CHP_O) 605 606 /* 607 * Register error handling tables. 608 * The ID Field (first field) is identified by an enum px_err_id_t. 609 * It is located in px_err.h 610 */ 611 static const 612 px_err_reg_desc_t px_err_reg_tbl[] = { 613 { CHP_F, MnT6X(jbc), R4(JBC), "JBC Error"}, 614 { CHP_O, MnT6X(ubc), R4(UBC), "UBC Error"}, 615 { CHP_FO, MnT6(mmu), R4(MMU), "MMU Error"}, 616 { CHP_FO, MnT6(imu), R4(IMU), "IMU Error"}, 617 { CHP_FO, MnT6(tlu_ue), TR4(UNCORRECTABLE_ERROR), "TLU UE"}, 618 { CHP_FO, MnT6(tlu_ce), TR4(CORRECTABLE_ERROR), "TLU CE"}, 619 { CHP_FO, MnT6(tlu_oe), TR4(OTHER_EVENT), "TLU OE"}, 620 { CHP_FO, MnT6(ilu), R4(ILU), "ILU Error"}, 621 { CHP_F, MnT6(lpul), LR4(LINK_LAYER), "LPU Link Layer"}, 622 { CHP_F, MnT6(lpup), LR4_FIXME(PHY), "LPU Phy Layer"}, 623 { CHP_F, MnT6(lpur), LR4(RECEIVE_PHY), "LPU RX Phy Layer"}, 624 { CHP_F, MnT6(lpux), LR4(TRANSMIT_PHY), "LPU TX Phy Layer"}, 625 { CHP_F, MnT6(lpus), LR4(LTSSM), "LPU LTSSM"}, 626 { CHP_F, MnT6(lpug), LR4(GIGABLAZE_GLUE), "LPU GigaBlaze Glue"}, 627 }; 628 629 #define PX_ERR_REG_KEYS (sizeof (px_err_reg_tbl)) / (sizeof (px_err_reg_tbl[0])) 630 631 typedef struct px_err_ss { 632 uint64_t err_status[PX_ERR_REG_KEYS]; 633 } px_err_ss_t; 634 635 static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, int block); 636 static int px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, 637 px_err_ss_t *ss); 638 static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, 639 int err, int caller); 640 641 /* 642 * px_err_cb_intr: 643 * Interrupt handler for the JBC/UBC block. 644 * o lock 645 * o create derr 646 * o px_err_cmn_intr 647 * o unlock 648 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 649 */ 650 uint_t 651 px_err_cb_intr(caddr_t arg) 652 { 653 px_fault_t *px_fault_p = (px_fault_t *)arg; 654 dev_info_t *rpdip = px_fault_p->px_fh_dip; 655 px_t *px_p = DIP_TO_STATE(rpdip); 656 int err; 657 ddi_fm_error_t derr; 658 659 /* Create the derr */ 660 bzero(&derr, sizeof (ddi_fm_error_t)); 661 derr.fme_version = DDI_FME_VERSION; 662 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 663 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 664 665 if (px_fm_enter(px_p) != DDI_SUCCESS) 666 goto done; 667 668 err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_HOST); 669 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 670 INTR_IDLE_STATE); 671 672 px_err_panic(err, PX_HB, PX_NO_ERROR, B_TRUE); 673 px_fm_exit(px_p); 674 px_err_panic(err, PX_HB, PX_NO_ERROR, B_FALSE); 675 676 done: 677 return (DDI_INTR_CLAIMED); 678 } 679 680 /* 681 * px_err_dmc_pec_intr: 682 * Interrupt handler for the DMC/PEC block. 683 * o lock 684 * o create derr 685 * o px_err_cmn_intr(leaf, with out cb) 686 * o pcie_scan_fabric (leaf) 687 * o unlock 688 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 689 */ 690 uint_t 691 px_err_dmc_pec_intr(caddr_t arg) 692 { 693 px_fault_t *px_fault_p = (px_fault_t *)arg; 694 dev_info_t *rpdip = px_fault_p->px_fh_dip; 695 px_t *px_p = DIP_TO_STATE(rpdip); 696 int rc_err, fab_err; 697 ddi_fm_error_t derr; 698 699 /* Create the derr */ 700 bzero(&derr, sizeof (ddi_fm_error_t)); 701 derr.fme_version = DDI_FME_VERSION; 702 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 703 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 704 705 if (px_fm_enter(px_p) != DDI_SUCCESS) 706 goto done; 707 708 /* send ereport/handle/clear fire registers */ 709 rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); 710 711 /* Check all child devices for errors */ 712 fab_err = px_scan_fabric(px_p, rpdip, &derr); 713 714 /* Set the interrupt state to idle */ 715 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 716 INTR_IDLE_STATE); 717 718 px_err_panic(rc_err, PX_RC, fab_err, B_TRUE); 719 px_fm_exit(px_p); 720 px_err_panic(rc_err, PX_RC, fab_err, B_FALSE); 721 722 done: 723 return (DDI_INTR_CLAIMED); 724 } 725 726 /* 727 * Proper csr_base is responsibility of the caller. (Called from px_lib_dev_init 728 * via px_err_reg_setup_all for pcie error registers; called from 729 * px_cb_add_intr for jbc/ubc from px_cb_attach.) 730 * 731 * Note: reg_id is passed in instead of reg_desc since this function is called 732 * from px_lib4u.c, which doesn't know about the structure of the table. 733 */ 734 void 735 px_err_reg_enable(px_err_id_t reg_id, caddr_t csr_base) 736 { 737 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id]; 738 uint64_t intr_mask = *reg_desc_p->intr_mask_p; 739 uint64_t log_mask = *reg_desc_p->log_mask_p; 740 741 /* Enable logs if it exists */ 742 if (reg_desc_p->log_addr != NULL) 743 CSR_XS(csr_base, reg_desc_p->log_addr, log_mask); 744 745 /* 746 * For readability you in code you set 1 to enable an interrupt. 747 * But in Fire it's backwards. You set 1 to *disable* an intr. 748 * Reverse the user tunable intr mask field. 749 * 750 * Disable All Errors 751 * Clear All Errors 752 * Enable Errors 753 */ 754 CSR_XS(csr_base, reg_desc_p->enable_addr, 0); 755 CSR_XS(csr_base, reg_desc_p->clear_addr, -1); 756 CSR_XS(csr_base, reg_desc_p->enable_addr, intr_mask); 757 DBG(DBG_ATTACH, NULL, "%s Mask: 0x%llx\n", reg_desc_p->msg, 758 CSR_XR(csr_base, reg_desc_p->enable_addr)); 759 DBG(DBG_ATTACH, NULL, "%s Status: 0x%llx\n", reg_desc_p->msg, 760 CSR_XR(csr_base, reg_desc_p->status_addr)); 761 DBG(DBG_ATTACH, NULL, "%s Clear: 0x%llx\n", reg_desc_p->msg, 762 CSR_XR(csr_base, reg_desc_p->clear_addr)); 763 if (reg_desc_p->log_addr != NULL) { 764 DBG(DBG_ATTACH, NULL, "%s Log: 0x%llx\n", reg_desc_p->msg, 765 CSR_XR(csr_base, reg_desc_p->log_addr)); 766 } 767 } 768 769 void 770 px_err_reg_disable(px_err_id_t reg_id, caddr_t csr_base) 771 { 772 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id]; 773 uint64_t val = (reg_id >= PX_ERR_LPU_LINK) ? -1 : 0; 774 775 if (reg_desc_p->log_addr != NULL) 776 CSR_XS(csr_base, reg_desc_p->log_addr, val); 777 CSR_XS(csr_base, reg_desc_p->enable_addr, val); 778 } 779 780 /* 781 * Set up pcie error registers. 782 */ 783 void 784 px_err_reg_setup_pcie(uint8_t chip_mask, caddr_t csr_base, boolean_t enable) 785 { 786 px_err_id_t reg_id; 787 const px_err_reg_desc_t *reg_desc_p; 788 void (*px_err_reg_func)(px_err_id_t, caddr_t); 789 790 /* 791 * JBC or XBC are enabled during adding of common block interrupts, 792 * not done here. 793 */ 794 px_err_reg_func = (enable ? px_err_reg_enable : px_err_reg_disable); 795 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) { 796 reg_desc_p = &px_err_reg_tbl[reg_id]; 797 if ((reg_desc_p->chip_mask & chip_mask) && 798 (reg_desc_p->reg_bank == PX_REG_CSR)) 799 px_err_reg_func(reg_id, csr_base); 800 } 801 } 802 803 /* 804 * px_err_cmn_intr: 805 * Common function called by trap, mondo and fabric intr. 806 * o Snap shot current fire registers 807 * o check for safe access 808 * o send ereport and clear snap shot registers 809 * o create and queue RC info for later use in fabric scan. 810 * o RUC/WUC, PTLP, MMU Errors(CA), UR 811 * o check severity of snap shot registers 812 * 813 * @param px_p leaf in which to check access 814 * @param derr fm err data structure to be updated 815 * @param caller PX_TRAP_CALL | PX_INTR_CALL 816 * @param block PX_FM_BLOCK_HOST | PX_FM_BLOCK_PCIE | PX_FM_BLOCK_ALL 817 * @return err PX_NO_PANIC | PX_PANIC | PX_HW_RESET | PX_PROTECTED 818 */ 819 int 820 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) 821 { 822 px_err_ss_t ss = {0}; 823 int err; 824 825 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 826 827 /* check for safe access */ 828 px_err_safeacc_check(px_p, derr); 829 830 /* snap shot the current fire registers */ 831 px_err_snapshot(px_p, &ss, block); 832 833 /* send ereports/handle/clear registers */ 834 err = px_err_erpt_and_clr(px_p, derr, &ss); 835 836 /* check for error severity */ 837 err = px_err_check_severity(px_p, derr, err, caller); 838 839 /* Mark the On Trap Handle if an error occured */ 840 if (err != PX_NO_ERROR) { 841 px_pec_t *pec_p = px_p->px_pec_p; 842 on_trap_data_t *otd = pec_p->pec_ontrap_data; 843 844 if ((otd != NULL) && (otd->ot_prot & OT_DATA_ACCESS)) 845 otd->ot_trap |= OT_DATA_ACCESS; 846 } 847 848 return (err); 849 } 850 851 /* 852 * Static function 853 */ 854 855 /* 856 * px_err_snapshot: 857 * Take a current snap shot of all the fire error registers. This includes 858 * JBC/UBC, DMC, and PEC depending on the block flag 859 * 860 * @param px_p leaf in which to take the snap shot. 861 * @param ss pre-allocated memory to store the snap shot. 862 * @param chk_cb boolean on whether to store jbc/ubc register. 863 */ 864 static void 865 px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, int block) 866 { 867 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 868 caddr_t xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC]; 869 caddr_t pec_csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR]; 870 caddr_t csr_base; 871 uint8_t chip_mask = 1 << PX_CHIP_TYPE(pxu_p); 872 const px_err_reg_desc_t *reg_desc_p = px_err_reg_tbl; 873 px_err_id_t reg_id; 874 875 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++, reg_desc_p++) { 876 if (!(reg_desc_p->chip_mask & chip_mask)) 877 continue; 878 879 if ((block & PX_FM_BLOCK_HOST) && 880 (reg_desc_p->reg_bank == PX_REG_XBC)) 881 csr_base = xbc_csr_base; 882 else if ((block & PX_FM_BLOCK_PCIE) && 883 (reg_desc_p->reg_bank == PX_REG_CSR)) 884 csr_base = pec_csr_base; 885 else { 886 ss_p->err_status[reg_id] = 0; 887 continue; 888 } 889 890 ss_p->err_status[reg_id] = CSR_XR(csr_base, 891 reg_desc_p->status_addr); 892 } 893 } 894 895 /* 896 * px_err_erpt_and_clr: 897 * This function does the following thing to all the fire registers based 898 * on an earlier snap shot. 899 * o Send ereport 900 * o Handle the error 901 * o Clear the error 902 * 903 * @param px_p leaf in which to take the snap shot. 904 * @param derr fm err in which the ereport is to be based on 905 * @param ss_p pre-allocated memory to store the snap shot. 906 */ 907 static int 908 px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss_p) 909 { 910 dev_info_t *rpdip = px_p->px_dip; 911 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 912 caddr_t csr_base; 913 const px_err_reg_desc_t *err_reg_tbl; 914 px_err_bit_desc_t *err_bit_tbl; 915 px_err_bit_desc_t *err_bit_desc; 916 917 uint64_t *count_mask; 918 uint64_t clear_addr; 919 uint64_t ss_reg; 920 921 int (*err_handler)(); 922 int (*erpt_handler)(); 923 int reg_id, key; 924 int err = PX_NO_ERROR; 925 int biterr = 0; 926 927 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 928 929 /* send erport/handle/clear JBC errors */ 930 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) { 931 /* Get the correct register description table */ 932 err_reg_tbl = &px_err_reg_tbl[reg_id]; 933 934 /* Only look at enabled groups. */ 935 if (!(BIT_TST(err_reg_tbl->chip_mask, PX_CHIP_TYPE(pxu_p)))) 936 continue; 937 938 /* Get the correct CSR BASE */ 939 csr_base = (caddr_t)pxu_p->px_address[err_reg_tbl->reg_bank]; 940 941 /* If there are no errors in this register, continue */ 942 ss_reg = ss_p->err_status[reg_id]; 943 if (!ss_reg) 944 continue; 945 946 /* Get pointers to masks and register addresses */ 947 count_mask = err_reg_tbl->count_mask_p; 948 clear_addr = err_reg_tbl->clear_addr; 949 950 /* Get the register BIT description table */ 951 err_bit_tbl = err_reg_tbl->err_bit_tbl; 952 953 /* For each known bit in the register send erpt and handle */ 954 for (key = 0; key < err_reg_tbl->err_bit_keys; key++) { 955 /* 956 * If the ss_reg is set for this bit, 957 * send ereport and handle 958 */ 959 err_bit_desc = &err_bit_tbl[key]; 960 if (!BIT_TST(ss_reg, err_bit_desc->bit)) 961 continue; 962 963 /* Increment the counter if necessary */ 964 if (BIT_TST(*count_mask, err_bit_desc->bit)) { 965 err_bit_desc->counter++; 966 } 967 968 /* Error Handle for this bit */ 969 err_handler = err_bit_desc->err_handler; 970 if (err_handler) { 971 biterr = err_handler(rpdip, csr_base, derr, 972 err_reg_tbl, err_bit_desc); 973 err |= biterr; 974 } 975 976 /* 977 * Send the ereport if it's an UNEXPECTED err. 978 * This is the only place where PX_EXPECTED is utilized. 979 */ 980 erpt_handler = err_bit_desc->erpt_handler; 981 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) || 982 (biterr == PX_EXPECTED)) 983 continue; 984 985 if (erpt_handler) 986 (void) erpt_handler(rpdip, csr_base, ss_reg, 987 derr, err_bit_desc->bit, 988 err_bit_desc->class_name); 989 } 990 991 /* Clear the register and error */ 992 CSR_XS(csr_base, clear_addr, ss_reg); 993 } 994 995 return (err); 996 } 997 998 /* 999 * px_err_check_severity: 1000 * Check the severity of the fire error based on an earlier snapshot 1001 * 1002 * @param px_p leaf in which to take the snap shot. 1003 * @param derr fm err in which the ereport is to be based on 1004 * @param err fire register error status 1005 * @param caller PX_TRAP_CALL | PX_INTR_CALL | PX_LIB_CALL 1006 */ 1007 static int 1008 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, int err, int caller) 1009 { 1010 px_pec_t *pec_p = px_p->px_pec_p; 1011 boolean_t is_safeacc = B_FALSE; 1012 1013 /* 1014 * Nothing to do if called with no error. 1015 * The err could have already been set to PX_NO_PANIC, which means the 1016 * system doesn't need to panic, but PEEK/POKE still failed. 1017 */ 1018 if (err == PX_NO_ERROR) 1019 return (err); 1020 1021 /* Cautious access error handling */ 1022 switch (derr->fme_flag) { 1023 case DDI_FM_ERR_EXPECTED: 1024 if (caller == PX_TRAP_CALL) { 1025 /* 1026 * for ddi_caut_get treat all events as nonfatal 1027 * The trampoline will set err_ena = 0, 1028 * err_status = NONFATAL. 1029 */ 1030 derr->fme_status = DDI_FM_NONFATAL; 1031 is_safeacc = B_TRUE; 1032 } else { 1033 /* 1034 * For ddi_caut_put treat all events as nonfatal. Here 1035 * we have the handle and can call ndi_fm_acc_err_set(). 1036 */ 1037 derr->fme_status = DDI_FM_NONFATAL; 1038 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 1039 is_safeacc = B_TRUE; 1040 } 1041 break; 1042 case DDI_FM_ERR_PEEK: 1043 case DDI_FM_ERR_POKE: 1044 /* 1045 * For ddi_peek/poke treat all events as nonfatal. 1046 */ 1047 is_safeacc = B_TRUE; 1048 break; 1049 default: 1050 is_safeacc = B_FALSE; 1051 } 1052 1053 /* re-adjust error status from safe access, forgive all errors */ 1054 if (is_safeacc) 1055 return (PX_NO_PANIC); 1056 1057 return (err); 1058 } 1059 1060 /* predefined convenience functions */ 1061 /* ARGSUSED */ 1062 void 1063 px_err_log_handle(dev_info_t *rpdip, px_err_reg_desc_t *err_reg_descr, 1064 px_err_bit_desc_t *err_bit_descr, char *msg) 1065 { 1066 DBG(DBG_ERR_INTR, rpdip, 1067 "Bit %d, %s, at %s(0x%x) has occured %d times with a severity " 1068 "of \"%s\"\n", 1069 err_bit_descr->bit, err_bit_descr->class_name, 1070 err_reg_descr->msg, err_reg_descr->status_addr, 1071 err_bit_descr->counter, msg); 1072 } 1073 1074 /* ARGSUSED */ 1075 int 1076 px_err_hw_reset_handle(dev_info_t *rpdip, caddr_t csr_base, 1077 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1078 px_err_bit_desc_t *err_bit_descr) 1079 { 1080 if (px_log & PX_HW_RESET) { 1081 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1082 "HW RESET"); 1083 } 1084 1085 return (PX_HW_RESET); 1086 } 1087 1088 /* ARGSUSED */ 1089 int 1090 px_err_panic_handle(dev_info_t *rpdip, caddr_t csr_base, 1091 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1092 px_err_bit_desc_t *err_bit_descr) 1093 { 1094 if (px_log & PX_PANIC) { 1095 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, "PANIC"); 1096 } 1097 1098 return (PX_PANIC); 1099 } 1100 1101 /* ARGSUSED */ 1102 int 1103 px_err_protected_handle(dev_info_t *rpdip, caddr_t csr_base, 1104 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1105 px_err_bit_desc_t *err_bit_descr) 1106 { 1107 if (px_log & PX_PROTECTED) { 1108 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1109 "PROTECTED"); 1110 } 1111 1112 return (PX_PROTECTED); 1113 } 1114 1115 /* ARGSUSED */ 1116 int 1117 px_err_no_panic_handle(dev_info_t *rpdip, caddr_t csr_base, 1118 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1119 px_err_bit_desc_t *err_bit_descr) 1120 { 1121 if (px_log & PX_NO_PANIC) { 1122 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1123 "NO PANIC"); 1124 } 1125 1126 return (PX_NO_PANIC); 1127 } 1128 1129 /* ARGSUSED */ 1130 int 1131 px_err_no_error_handle(dev_info_t *rpdip, caddr_t csr_base, 1132 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1133 px_err_bit_desc_t *err_bit_descr) 1134 { 1135 if (px_log & PX_NO_ERROR) { 1136 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1137 "NO ERROR"); 1138 } 1139 1140 return (PX_NO_ERROR); 1141 } 1142 1143 /* ARGSUSED */ 1144 PX_ERPT_SEND_DEC(do_not) 1145 { 1146 return (PX_NO_ERROR); 1147 } 1148 1149 /* 1150 * Search the px_cb_list_t embedded in the px_cb_t for the 1151 * px_t of the specified Leaf (leaf_id). Return its associated dip. 1152 */ 1153 static dev_info_t * 1154 px_err_search_cb(px_cb_t *px_cb_p, uint_t leaf_id) 1155 { 1156 int i; 1157 px_cb_list_t *pxl_elemp; 1158 1159 for (i = px_cb_p->attachcnt, pxl_elemp = px_cb_p->pxl; i > 0; 1160 i--, pxl_elemp = pxl_elemp->next) { 1161 if ((((pxu_t *)pxl_elemp->pxp->px_plat_p)->portid & 1162 OBERON_PORT_ID_LEAF_MASK) == leaf_id) { 1163 return (pxl_elemp->pxp->px_dip); 1164 } 1165 } 1166 return (NULL); 1167 } 1168 1169 /* UBC FATAL - see io erpt doc, section 1.1 */ 1170 /* ARGSUSED */ 1171 PX_ERPT_SEND_DEC(ubc_fatal) 1172 { 1173 char buf[FM_MAX_CLASS]; 1174 uint64_t memory_ue_log, marked; 1175 char unum[FM_MAX_CLASS]; 1176 int unum_length; 1177 uint64_t device_id = 0; 1178 uint8_t cpu_version = 0; 1179 nvlist_t *resource = NULL; 1180 uint64_t ubc_intr_status; 1181 px_t *px_p; 1182 px_cb_t *px_cb_p; 1183 dev_info_t *actual_dip; 1184 1185 unum[0] = '\0'; 1186 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1187 1188 memory_ue_log = CSR_XR(csr_base, UBC_MEMORY_UE_LOG); 1189 marked = (memory_ue_log >> UBC_MEMORY_UE_LOG_MARKED) & 1190 UBC_MEMORY_UE_LOG_MARKED_MASK; 1191 1192 if ((strstr(class_name, "ubc.piowtue") != NULL) || 1193 (strstr(class_name, "ubc.piowbeue") != NULL) || 1194 (strstr(class_name, "ubc.piorbeue") != NULL) || 1195 (strstr(class_name, "ubc.dmarduea") != NULL) || 1196 (strstr(class_name, "ubc.dmardueb") != NULL)) { 1197 int eid = (memory_ue_log >> UBC_MEMORY_UE_LOG_EID) & 1198 UBC_MEMORY_UE_LOG_EID_MASK; 1199 (void) strncat(buf, ubc_class_eid_qualifier[eid], 1200 FM_MAX_CLASS); 1201 1202 if (eid == UBC_EID_MEM) { 1203 uint64_t phys_addr = memory_ue_log & 1204 MMU_OBERON_PADDR_MASK; 1205 uint64_t offset = (uint64_t)-1; 1206 1207 resource = fm_nvlist_create(NULL); 1208 if (&plat_get_mem_unum) { 1209 if ((plat_get_mem_unum(0, 1210 phys_addr, 0, B_TRUE, 0, unum, 1211 FM_MAX_CLASS, &unum_length)) != 0) 1212 unum[0] = '\0'; 1213 } 1214 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 1215 NULL, unum, NULL, offset); 1216 1217 } else if (eid == UBC_EID_CPU) { 1218 int cpuid = (marked & UBC_MARKED_MAX_CPUID_MASK); 1219 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1220 1221 resource = fm_nvlist_create(NULL); 1222 cpu_version = cpunodes[cpuid].version; 1223 device_id = cpunodes[cpuid].device_id; 1224 (void) snprintf(sbuf, sizeof (sbuf), "%lX", 1225 device_id); 1226 (void) fm_fmri_cpu_set(resource, 1227 FM_CPU_SCHEME_VERSION, NULL, cpuid, 1228 &cpu_version, sbuf); 1229 } 1230 } 1231 1232 /* 1233 * For most of the errors represented in the UBC Interrupt Status 1234 * register, one can compute the dip of the actual Leaf that was 1235 * involved in the error. To do this, find the px_cb_t structure 1236 * that is shared between a pair of Leaves (eg, LeafA and LeafB). 1237 * 1238 * If any of the error bits for LeafA are set in the hardware 1239 * register, search the list of px_t's rooted in the px_cb_t for 1240 * the one corresponding to LeafA. If error bits for LeafB are set, 1241 * search the list for LeafB's px_t. The px_t references its 1242 * associated dip. 1243 */ 1244 px_p = DIP_TO_STATE(rpdip); 1245 px_cb_p = ((pxu_t *)px_p->px_plat_p)->px_cb_p; 1246 1247 /* read hardware register */ 1248 ubc_intr_status = CSR_XR(csr_base, UBC_INTERRUPT_STATUS); 1249 1250 if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFA) != 0) { 1251 /* then Leaf A is involved in the error */ 1252 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_A); 1253 ASSERT(actual_dip != NULL); 1254 rpdip = actual_dip; 1255 } else if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFB) != 0) { 1256 /* then Leaf B is involved in the error */ 1257 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_B); 1258 ASSERT(actual_dip != NULL); 1259 rpdip = actual_dip; 1260 } /* else error cannot be associated with a Leaf */ 1261 1262 if (resource) { 1263 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1264 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1265 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE, 1266 OBERON_UBC_ELE, DATA_TYPE_UINT64, 1267 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE), 1268 OBERON_UBC_IE, DATA_TYPE_UINT64, 1269 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE), 1270 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status, 1271 OBERON_UBC_ESS, DATA_TYPE_UINT64, 1272 CSR_XR(csr_base, UBC_ERROR_STATUS_SET), 1273 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log, 1274 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum, 1275 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id, 1276 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version, 1277 OBERON_UBC_RESOURCE, DATA_TYPE_NVLIST, resource, 1278 NULL); 1279 fm_nvlist_destroy(resource, FM_NVA_FREE); 1280 } else { 1281 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1282 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1283 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE, 1284 OBERON_UBC_ELE, DATA_TYPE_UINT64, 1285 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE), 1286 OBERON_UBC_IE, DATA_TYPE_UINT64, 1287 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE), 1288 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status, 1289 OBERON_UBC_ESS, DATA_TYPE_UINT64, 1290 CSR_XR(csr_base, UBC_ERROR_STATUS_SET), 1291 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log, 1292 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum, 1293 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id, 1294 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version, 1295 NULL); 1296 } 1297 1298 return (PX_NO_PANIC); 1299 } 1300 1301 /* JBC FATAL */ 1302 PX_ERPT_SEND_DEC(jbc_fatal) 1303 { 1304 char buf[FM_MAX_CLASS]; 1305 boolean_t pri = PX_ERR_IS_PRI(bit); 1306 1307 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1308 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1309 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1310 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1311 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1312 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1313 FIRE_JBC_IE, DATA_TYPE_UINT64, 1314 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1315 FIRE_JBC_IS, DATA_TYPE_UINT64, 1316 ss_reg, 1317 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1318 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1319 FIRE_JBC_FEL1, DATA_TYPE_UINT64, 1320 CSR_XR(csr_base, FATAL_ERROR_LOG_1), 1321 FIRE_JBC_FEL2, DATA_TYPE_UINT64, 1322 CSR_XR(csr_base, FATAL_ERROR_LOG_2), 1323 NULL); 1324 1325 return (PX_NO_PANIC); 1326 } 1327 1328 /* JBC MERGE */ 1329 PX_ERPT_SEND_DEC(jbc_merge) 1330 { 1331 char buf[FM_MAX_CLASS]; 1332 boolean_t pri = PX_ERR_IS_PRI(bit); 1333 1334 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1335 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1336 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1337 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1338 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1339 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1340 FIRE_JBC_IE, DATA_TYPE_UINT64, 1341 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1342 FIRE_JBC_IS, DATA_TYPE_UINT64, 1343 ss_reg, 1344 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1345 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1346 FIRE_JBC_MTEL, DATA_TYPE_UINT64, 1347 CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG), 1348 NULL); 1349 1350 return (PX_NO_PANIC); 1351 } 1352 1353 /* 1354 * JBC Merge buffer retryable errors: 1355 * Merge buffer parity error (rd_buf): PIO or DMA 1356 * Merge buffer parity error (wr_buf): PIO or DMA 1357 */ 1358 /* ARGSUSED */ 1359 int 1360 px_err_jbc_merge_handle(dev_info_t *rpdip, caddr_t csr_base, 1361 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1362 px_err_bit_desc_t *err_bit_descr) 1363 { 1364 /* 1365 * Holder function to attempt error recovery. When the features 1366 * are in place, look up the address of the transaction in: 1367 * 1368 * paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG); 1369 * paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1370 * 1371 * If the error is a secondary error, there is no log information 1372 * just panic as it is unknown which address has been affected. 1373 * 1374 * Remember the address is pretranslation and might be hard to look 1375 * up the appropriate driver based on the PA. 1376 */ 1377 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1378 err_bit_descr)); 1379 } 1380 1381 /* JBC Jbusint IN */ 1382 PX_ERPT_SEND_DEC(jbc_in) 1383 { 1384 char buf[FM_MAX_CLASS]; 1385 boolean_t pri = PX_ERR_IS_PRI(bit); 1386 1387 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1388 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1389 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1390 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1391 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1392 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1393 FIRE_JBC_IE, DATA_TYPE_UINT64, 1394 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1395 FIRE_JBC_IS, DATA_TYPE_UINT64, 1396 ss_reg, 1397 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1398 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1399 FIRE_JBC_JITEL1, DATA_TYPE_UINT64, 1400 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG), 1401 FIRE_JBC_JITEL2, DATA_TYPE_UINT64, 1402 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG_2), 1403 NULL); 1404 1405 return (PX_NO_PANIC); 1406 } 1407 1408 /* 1409 * JBC Jbusint IN retryable errors 1410 * Log Reg[42:0]. 1411 * Write Data Parity Error: PIO Writes 1412 * Read Data Parity Error: DMA Reads 1413 */ 1414 int 1415 px_err_jbc_jbusint_in_handle(dev_info_t *rpdip, caddr_t csr_base, 1416 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1417 px_err_bit_desc_t *err_bit_descr) 1418 { 1419 /* 1420 * Holder function to attempt error recovery. When the features 1421 * are in place, look up the address of the transaction in: 1422 * 1423 * paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG); 1424 * paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1425 * 1426 * If the error is a secondary error, there is no log information 1427 * just panic as it is unknown which address has been affected. 1428 * 1429 * Remember the address is pretranslation and might be hard to look 1430 * up the appropriate driver based on the PA. 1431 */ 1432 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1433 err_bit_descr)); 1434 } 1435 1436 1437 /* JBC Jbusint Out */ 1438 PX_ERPT_SEND_DEC(jbc_out) 1439 { 1440 char buf[FM_MAX_CLASS]; 1441 boolean_t pri = PX_ERR_IS_PRI(bit); 1442 1443 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1444 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1445 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1446 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1447 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1448 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1449 FIRE_JBC_IE, DATA_TYPE_UINT64, 1450 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1451 FIRE_JBC_IS, DATA_TYPE_UINT64, 1452 ss_reg, 1453 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1454 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1455 FIRE_JBC_JOTEL1, DATA_TYPE_UINT64, 1456 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG), 1457 FIRE_JBC_JOTEL2, DATA_TYPE_UINT64, 1458 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG_2), 1459 NULL); 1460 1461 return (PX_NO_PANIC); 1462 } 1463 1464 /* JBC Dmcint ODCD */ 1465 PX_ERPT_SEND_DEC(jbc_odcd) 1466 { 1467 char buf[FM_MAX_CLASS]; 1468 boolean_t pri = PX_ERR_IS_PRI(bit); 1469 1470 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1471 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1472 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1473 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1474 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1475 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1476 FIRE_JBC_IE, DATA_TYPE_UINT64, 1477 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1478 FIRE_JBC_IS, DATA_TYPE_UINT64, 1479 ss_reg, 1480 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1481 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1482 FIRE_JBC_DMC_ODCD, DATA_TYPE_UINT64, 1483 CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG), 1484 NULL); 1485 1486 return (PX_NO_PANIC); 1487 } 1488 1489 /* 1490 * JBC Dmcint ODCO nonfatal errer handling - 1491 * PIO data parity error: PIO 1492 */ 1493 /* ARGSUSED */ 1494 int 1495 px_err_jbc_dmcint_odcd_handle(dev_info_t *rpdip, caddr_t csr_base, 1496 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1497 px_err_bit_desc_t *err_bit_descr) 1498 { 1499 /* 1500 * Holder function to attempt error recovery. When the features 1501 * are in place, look up the address of the transaction in: 1502 * 1503 * paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG); 1504 * paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK; 1505 * 1506 * If the error is a secondary error, there is no log information 1507 * just panic as it is unknown which address has been affected. 1508 * 1509 * Remember the address is pretranslation and might be hard to look 1510 * up the appropriate driver based on the PA. 1511 */ 1512 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1513 err_bit_descr)); 1514 } 1515 1516 /* Does address in DMCINT error log register match address of pcitool access? */ 1517 static boolean_t 1518 px_jbc_pcitool_addr_match(dev_info_t *rpdip, caddr_t csr_base) 1519 { 1520 px_t *px_p = DIP_TO_STATE(rpdip); 1521 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1522 caddr_t pcitool_addr = pxu_p->pcitool_addr; 1523 caddr_t errlog_addr = 1524 (caddr_t)CSR_FR(csr_base, DMCINT_ODCD_ERROR_LOG, ADDRESS); 1525 1526 return (pcitool_addr == errlog_addr); 1527 } 1528 1529 /* 1530 * JBC Dmcint ODCD errer handling for errors which are forgivable during a safe 1531 * access. (This will be most likely be a PCItool access.) If not a safe 1532 * access context, treat like jbc_dmcint_odcd. 1533 * Unmapped PIO read error: pio:read:M:nonfatal 1534 * Unmapped PIO write error: pio:write:M:nonfatal 1535 * Invalid PIO write to PCIe cfg/io, csr, ebus or i2c bus: pio:write:nonfatal 1536 * Invalid PIO read to PCIe cfg/io, csr, ebus or i2c bus: pio:read:nonfatal 1537 */ 1538 /* ARGSUSED */ 1539 int 1540 px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base, 1541 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1542 px_err_bit_desc_t *err_bit_descr) 1543 { 1544 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1545 1546 if (!pri) 1547 return (px_err_panic_handle(rpdip, csr_base, derr, 1548 err_reg_descr, err_bit_descr)); 1549 /* 1550 * Got an error which is forgivable during a PCItool access. 1551 * 1552 * Don't do handler check since the error may otherwise be unfairly 1553 * attributed to a device. Just return. 1554 * 1555 * Note: There is a hole here in that a legitimate error can come in 1556 * while a PCItool access is in play and be forgiven. This is possible 1557 * though not likely. 1558 */ 1559 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) && 1560 (px_jbc_pcitool_addr_match(rpdip, csr_base))) 1561 return (px_err_protected_handle(rpdip, csr_base, derr, 1562 err_reg_descr, err_bit_descr)); 1563 1564 return (px_err_jbc_dmcint_odcd_handle(rpdip, csr_base, derr, 1565 err_reg_descr, err_bit_descr)); 1566 } 1567 1568 /* JBC Dmcint IDC */ 1569 PX_ERPT_SEND_DEC(jbc_idc) 1570 { 1571 char buf[FM_MAX_CLASS]; 1572 boolean_t pri = PX_ERR_IS_PRI(bit); 1573 1574 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1575 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1576 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1577 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1578 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1579 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1580 FIRE_JBC_IE, DATA_TYPE_UINT64, 1581 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1582 FIRE_JBC_IS, DATA_TYPE_UINT64, 1583 ss_reg, 1584 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1585 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1586 FIRE_JBC_DMC_IDC, DATA_TYPE_UINT64, 1587 CSR_XR(csr_base, DMCINT_IDC_ERROR_LOG), 1588 NULL); 1589 1590 return (PX_NO_PANIC); 1591 } 1592 1593 /* JBC CSR */ 1594 PX_ERPT_SEND_DEC(jbc_csr) 1595 { 1596 char buf[FM_MAX_CLASS]; 1597 boolean_t pri = PX_ERR_IS_PRI(bit); 1598 1599 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1600 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1601 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1602 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1603 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1604 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1605 FIRE_JBC_IE, DATA_TYPE_UINT64, 1606 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1607 FIRE_JBC_IS, DATA_TYPE_UINT64, 1608 ss_reg, 1609 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1610 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1611 "jbc-error-reg", DATA_TYPE_UINT64, 1612 CSR_XR(csr_base, CSR_ERROR_LOG), 1613 NULL); 1614 1615 return (PX_NO_PANIC); 1616 } 1617 1618 /* DMC IMU RDS */ 1619 PX_ERPT_SEND_DEC(imu_rds) 1620 { 1621 char buf[FM_MAX_CLASS]; 1622 boolean_t pri = PX_ERR_IS_PRI(bit); 1623 1624 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1625 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1626 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1627 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1628 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1629 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1630 FIRE_IMU_IE, DATA_TYPE_UINT64, 1631 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1632 FIRE_IMU_IS, DATA_TYPE_UINT64, 1633 ss_reg, 1634 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1635 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1636 FIRE_IMU_RDS, DATA_TYPE_UINT64, 1637 CSR_XR(csr_base, IMU_RDS_ERROR_LOG), 1638 NULL); 1639 1640 return (PX_NO_PANIC); 1641 } 1642 1643 /* handle EQ overflow */ 1644 /* ARGSUSED */ 1645 int 1646 px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base, 1647 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1648 px_err_bit_desc_t *err_bit_descr) 1649 { 1650 px_t *px_p = DIP_TO_STATE(rpdip); 1651 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1652 int err = px_err_check_eq(rpdip); 1653 1654 if ((err == PX_PANIC) && (pxu_p->cpr_flag == PX_NOT_CPR)) { 1655 return (px_err_panic_handle(rpdip, csr_base, derr, 1656 err_reg_descr, err_bit_descr)); 1657 } else { 1658 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1659 err_reg_descr, err_bit_descr)); 1660 } 1661 } 1662 1663 /* DMC IMU SCS */ 1664 PX_ERPT_SEND_DEC(imu_scs) 1665 { 1666 char buf[FM_MAX_CLASS]; 1667 boolean_t pri = PX_ERR_IS_PRI(bit); 1668 1669 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1670 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1671 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1672 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1673 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1674 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1675 FIRE_IMU_IE, DATA_TYPE_UINT64, 1676 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1677 FIRE_IMU_IS, DATA_TYPE_UINT64, 1678 ss_reg, 1679 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1680 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1681 FIRE_IMU_SCS, DATA_TYPE_UINT64, 1682 CSR_XR(csr_base, IMU_SCS_ERROR_LOG), 1683 NULL); 1684 1685 return (PX_NO_PANIC); 1686 } 1687 1688 /* DMC IMU */ 1689 PX_ERPT_SEND_DEC(imu) 1690 { 1691 char buf[FM_MAX_CLASS]; 1692 boolean_t pri = PX_ERR_IS_PRI(bit); 1693 1694 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1695 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1696 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1697 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1698 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1699 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1700 FIRE_IMU_IE, DATA_TYPE_UINT64, 1701 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1702 FIRE_IMU_IS, DATA_TYPE_UINT64, 1703 ss_reg, 1704 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1705 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1706 NULL); 1707 1708 return (PX_NO_PANIC); 1709 } 1710 1711 /* DMC MMU TFAR/TFSR */ 1712 PX_ERPT_SEND_DEC(mmu_tfar_tfsr) 1713 { 1714 char buf[FM_MAX_CLASS]; 1715 boolean_t pri = PX_ERR_IS_PRI(bit); 1716 px_t *px_p = DIP_TO_STATE(rpdip); 1717 pcie_req_id_t fault_bdf = 0; 1718 uint16_t s_status = 0; 1719 1720 if (pri) { 1721 fault_bdf = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS) 1722 & (MMU_TRANSLATION_FAULT_STATUS_ID_MASK << 1723 MMU_TRANSLATION_FAULT_STATUS_ID); 1724 s_status = PCI_STAT_S_TARG_AB; 1725 1726 /* Only PIO Fault Addresses are valid, this is DMA */ 1727 (void) px_rp_en_q(px_p, fault_bdf, NULL, s_status); 1728 } 1729 1730 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1731 1732 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1733 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1734 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1735 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1736 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1737 FIRE_MMU_IE, DATA_TYPE_UINT64, 1738 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1739 FIRE_MMU_IS, DATA_TYPE_UINT64, 1740 ss_reg, 1741 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1742 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1743 FIRE_MMU_TFAR, DATA_TYPE_UINT64, 1744 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS), 1745 FIRE_MMU_TFSR, DATA_TYPE_UINT64, 1746 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS), 1747 NULL); 1748 1749 return (PX_NO_PANIC); 1750 } 1751 1752 /* DMC MMU */ 1753 PX_ERPT_SEND_DEC(mmu) 1754 { 1755 char buf[FM_MAX_CLASS]; 1756 boolean_t pri = PX_ERR_IS_PRI(bit); 1757 1758 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1759 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1760 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1761 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1762 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1763 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1764 FIRE_MMU_IE, DATA_TYPE_UINT64, 1765 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1766 FIRE_MMU_IS, DATA_TYPE_UINT64, 1767 ss_reg, 1768 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1769 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1770 NULL); 1771 1772 return (PX_NO_PANIC); 1773 } 1774 1775 /* 1776 * IMU function to handle all Received but Not Enabled errors. 1777 * 1778 * These errors are due to transactions modes in which the PX driver was not 1779 * setup to be able to do. If possible, inform the driver that their DMA has 1780 * failed by marking their DMA handle as failed, but do not panic the system. 1781 * Most likely the address is not valid, as Fire wasn't setup to handle them in 1782 * the first place. 1783 * 1784 * These errors are not retryable, unless the PX mode has changed, otherwise the 1785 * same error will occur again. 1786 */ 1787 int 1788 px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, 1789 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1790 px_err_bit_desc_t *err_bit_descr) 1791 { 1792 pcie_req_id_t bdf; 1793 1794 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1795 goto done; 1796 1797 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1798 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, NULL, 1799 bdf); 1800 1801 done: 1802 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1803 err_bit_descr)); 1804 } 1805 1806 /* 1807 * IMU function to handle all invalid address errors. 1808 * 1809 * These errors are due to transactions in which the address is not recognized. 1810 * If possible, inform the driver that all DMAs have failed by marking their DMA 1811 * handles. Fire should not panic the system, it'll be up to the driver to 1812 * panic. The address logged is invalid. 1813 * 1814 * These errors are not retryable since retrying the same transaction with the 1815 * same invalid address will result in the same error. 1816 */ 1817 /* ARGSUSED */ 1818 int 1819 px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, 1820 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1821 px_err_bit_desc_t *err_bit_descr) 1822 { 1823 pcie_req_id_t bdf; 1824 1825 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1826 goto done; 1827 1828 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1829 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, NULL, 1830 bdf); 1831 1832 done: 1833 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1834 err_bit_descr)); 1835 } 1836 1837 /* 1838 * IMU function to handle normal transactions that encounter a parity error. 1839 * 1840 * These errors are due to transactions that enouter a parity error. If 1841 * possible, inform the driver that their DMA have failed and that they should 1842 * retry. If Fire is unable to contact the leaf driver, panic the system. 1843 * Otherwise, it'll be up to the device to determine is this is a panicable 1844 * error. 1845 */ 1846 /* ARGSUSED */ 1847 int 1848 px_err_mmu_parity_handle(dev_info_t *rpdip, caddr_t csr_base, 1849 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1850 px_err_bit_desc_t *err_bit_descr) 1851 { 1852 uint64_t mmu_tfa; 1853 pcie_req_id_t bdf; 1854 int status = PF_HDL_NOTFOUND; 1855 1856 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1857 goto done; 1858 1859 mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); 1860 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1861 status = pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, 1862 (uint32_t)mmu_tfa, bdf); 1863 1864 done: 1865 if (status == PF_HDL_NOTFOUND) 1866 return (px_err_panic_handle(rpdip, csr_base, derr, 1867 err_reg_descr, err_bit_descr)); 1868 else 1869 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1870 err_reg_descr, err_bit_descr)); 1871 } 1872 1873 /* 1874 * wuc/ruc event - Mark the handle of the failed PIO access. Return "no_panic" 1875 */ 1876 /* ARGSUSED */ 1877 int 1878 px_err_wuc_ruc_handle(dev_info_t *rpdip, caddr_t csr_base, 1879 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1880 px_err_bit_desc_t *err_bit_descr) 1881 { 1882 px_t *px_p = DIP_TO_STATE(rpdip); 1883 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1884 uint64_t data; 1885 pf_pcie_adv_err_regs_t adv_reg; 1886 int sts; 1887 1888 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1889 goto done; 1890 1891 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); 1892 adv_reg.pcie_ue_hdr[0] = (uint32_t)(data >> 32); 1893 adv_reg.pcie_ue_hdr[1] = (uint32_t)(data & 0xFFFFFFFF); 1894 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); 1895 adv_reg.pcie_ue_hdr[2] = (uint32_t)(data >> 32); 1896 adv_reg.pcie_ue_hdr[3] = (uint32_t)(data & 0xFFFFFFFF); 1897 1898 pf_tlp_decode(PCIE_DIP2BUS(rpdip), &adv_reg); 1899 sts = pf_hdl_lookup(rpdip, derr->fme_ena, adv_reg.pcie_ue_tgt_trans, 1900 adv_reg.pcie_ue_tgt_addr, adv_reg.pcie_ue_tgt_bdf); 1901 done: 1902 if ((sts == PF_HDL_NOTFOUND) && (pxu_p->cpr_flag == PX_NOT_CPR)) 1903 return (px_err_protected_handle(rpdip, csr_base, derr, 1904 err_reg_descr, err_bit_descr)); 1905 1906 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1907 err_reg_descr, err_bit_descr)); 1908 } 1909 1910 /* 1911 * TLU LUP event - if caused by power management activity, then it is expected. 1912 * In all other cases, it is an error. 1913 */ 1914 /* ARGSUSED */ 1915 int 1916 px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base, 1917 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1918 px_err_bit_desc_t *err_bit_descr) 1919 { 1920 px_t *px_p = DIP_TO_STATE(rpdip); 1921 1922 /* 1923 * power management code is currently the only segment that sets 1924 * px_lup_pending to indicate its expectation for a healthy LUP 1925 * event. For all other occasions, LUP event should be flaged as 1926 * error condition. 1927 */ 1928 return ((atomic_cas_32(&px_p->px_lup_pending, 1, 0) == 0) ? 1929 PX_NO_PANIC : PX_EXPECTED); 1930 } 1931 1932 /* 1933 * TLU LDN event - if caused by power management activity, then it is expected. 1934 * In all other cases, it is an error. 1935 */ 1936 /* ARGSUSED */ 1937 int 1938 px_err_tlu_ldn_handle(dev_info_t *rpdip, caddr_t csr_base, 1939 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1940 px_err_bit_desc_t *err_bit_descr) 1941 { 1942 px_t *px_p = DIP_TO_STATE(rpdip); 1943 return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_EXPECTED : 1944 PX_NO_PANIC); 1945 } 1946 1947 /* PEC ILU none - see io erpt doc, section 3.1 */ 1948 PX_ERPT_SEND_DEC(pec_ilu) 1949 { 1950 char buf[FM_MAX_CLASS]; 1951 boolean_t pri = PX_ERR_IS_PRI(bit); 1952 1953 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1954 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1955 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1956 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1957 FIRE_ILU_ELE, DATA_TYPE_UINT64, 1958 CSR_XR(csr_base, ILU_ERROR_LOG_ENABLE), 1959 FIRE_ILU_IE, DATA_TYPE_UINT64, 1960 CSR_XR(csr_base, ILU_INTERRUPT_ENABLE), 1961 FIRE_ILU_IS, DATA_TYPE_UINT64, 1962 ss_reg, 1963 FIRE_ILU_ESS, DATA_TYPE_UINT64, 1964 CSR_XR(csr_base, ILU_ERROR_STATUS_SET), 1965 NULL); 1966 1967 return (PX_NO_PANIC); 1968 } 1969 1970 /* PCIEX UE Errors */ 1971 /* ARGSUSED */ 1972 int 1973 px_err_pciex_ue_handle(dev_info_t *rpdip, caddr_t csr_base, 1974 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1975 px_err_bit_desc_t *err_bit_descr) 1976 { 1977 px_err_pcie_t regs = {0}; 1978 uint32_t err_bit; 1979 int err; 1980 uint64_t log; 1981 1982 if (err_bit_descr->bit < 32) { 1983 err_bit = (uint32_t)BITMASK(err_bit_descr->bit); 1984 regs.ue_reg = err_bit; 1985 regs.primary_ue = err_bit; 1986 1987 /* 1988 * Log the Received Log for PTLP, UR and UC. 1989 */ 1990 if ((PCIE_AER_UCE_PTLP | PCIE_AER_UCE_UR | PCIE_AER_UCE_UC) & 1991 err_bit) { 1992 log = CSR_XR(csr_base, 1993 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG); 1994 regs.rx_hdr1 = (uint32_t)(log >> 32); 1995 regs.rx_hdr2 = (uint32_t)(log & 0xFFFFFFFF); 1996 1997 log = CSR_XR(csr_base, 1998 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG); 1999 regs.rx_hdr3 = (uint32_t)(log >> 32); 2000 regs.rx_hdr4 = (uint32_t)(log & 0xFFFFFFFF); 2001 } 2002 } else { 2003 regs.ue_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); 2004 } 2005 2006 err = px_err_check_pcie(rpdip, derr, ®s); 2007 2008 if (err & PX_PANIC) { 2009 return (px_err_panic_handle(rpdip, csr_base, derr, 2010 err_reg_descr, err_bit_descr)); 2011 } else { 2012 return (px_err_no_panic_handle(rpdip, csr_base, derr, 2013 err_reg_descr, err_bit_descr)); 2014 } 2015 } 2016 2017 /* PCI-E Uncorrectable Errors */ 2018 PX_ERPT_SEND_DEC(pciex_rx_ue) 2019 { 2020 char buf[FM_MAX_CLASS]; 2021 boolean_t pri = PX_ERR_IS_PRI(bit); 2022 2023 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2024 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2025 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2026 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2027 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2028 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2029 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2030 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2031 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2032 ss_reg, 2033 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2034 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2035 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2036 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 2037 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2038 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 2039 NULL); 2040 2041 return (PX_NO_PANIC); 2042 } 2043 2044 /* PCI-E Uncorrectable Errors */ 2045 PX_ERPT_SEND_DEC(pciex_tx_ue) 2046 { 2047 char buf[FM_MAX_CLASS]; 2048 boolean_t pri = PX_ERR_IS_PRI(bit); 2049 2050 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2051 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2052 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2053 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2054 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2055 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2056 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2057 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2058 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2059 ss_reg, 2060 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2061 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2062 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 2063 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 2064 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 2065 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 2066 NULL); 2067 2068 return (PX_NO_PANIC); 2069 } 2070 2071 /* PCI-E Uncorrectable Errors */ 2072 PX_ERPT_SEND_DEC(pciex_rx_tx_ue) 2073 { 2074 char buf[FM_MAX_CLASS]; 2075 boolean_t pri = PX_ERR_IS_PRI(bit); 2076 2077 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2078 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2079 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2080 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2081 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2082 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2083 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2084 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2085 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2086 ss_reg, 2087 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2088 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2089 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2090 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 2091 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2092 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 2093 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 2094 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 2095 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 2096 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 2097 NULL); 2098 2099 return (PX_NO_PANIC); 2100 } 2101 2102 /* PCI-E Uncorrectable Errors */ 2103 PX_ERPT_SEND_DEC(pciex_ue) 2104 { 2105 char buf[FM_MAX_CLASS]; 2106 boolean_t pri = PX_ERR_IS_PRI(bit); 2107 2108 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2109 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2110 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2111 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2112 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2113 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2114 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2115 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2116 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2117 ss_reg, 2118 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2119 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2120 NULL); 2121 2122 return (PX_NO_PANIC); 2123 } 2124 2125 /* PCIEX UE Errors */ 2126 /* ARGSUSED */ 2127 int 2128 px_err_pciex_ce_handle(dev_info_t *rpdip, caddr_t csr_base, 2129 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 2130 px_err_bit_desc_t *err_bit_descr) 2131 { 2132 px_err_pcie_t regs = {0}; 2133 int err; 2134 2135 if (err_bit_descr->bit < 32) 2136 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit); 2137 else 2138 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); 2139 2140 err = px_err_check_pcie(rpdip, derr, ®s); 2141 2142 if (err & PX_PANIC) { 2143 return (px_err_panic_handle(rpdip, csr_base, derr, 2144 err_reg_descr, err_bit_descr)); 2145 } else { 2146 return (px_err_no_panic_handle(rpdip, csr_base, derr, 2147 err_reg_descr, err_bit_descr)); 2148 } 2149 } 2150 2151 /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ 2152 PX_ERPT_SEND_DEC(pciex_ce) 2153 { 2154 char buf[FM_MAX_CLASS]; 2155 boolean_t pri = PX_ERR_IS_PRI(bit); 2156 2157 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2158 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2159 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2160 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2161 FIRE_TLU_CELE, DATA_TYPE_UINT64, 2162 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_LOG_ENABLE), 2163 FIRE_TLU_CIE, DATA_TYPE_UINT64, 2164 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_INTERRUPT_ENABLE), 2165 FIRE_TLU_CIS, DATA_TYPE_UINT64, 2166 ss_reg, 2167 FIRE_TLU_CESS, DATA_TYPE_UINT64, 2168 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_STATUS_SET), 2169 NULL); 2170 2171 return (PX_NO_PANIC); 2172 } 2173 2174 /* TLU Other Event Status (receive only) - see io erpt doc, section 3.7 */ 2175 PX_ERPT_SEND_DEC(pciex_rx_oe) 2176 { 2177 char buf[FM_MAX_CLASS]; 2178 boolean_t pri = PX_ERR_IS_PRI(bit); 2179 2180 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2181 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2182 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2183 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2184 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2185 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2186 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2187 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2188 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2189 ss_reg, 2190 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2191 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2192 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2193 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG), 2194 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2195 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), 2196 NULL); 2197 2198 return (PX_NO_PANIC); 2199 } 2200 2201 /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ 2202 PX_ERPT_SEND_DEC(pciex_rx_tx_oe) 2203 { 2204 char buf[FM_MAX_CLASS]; 2205 boolean_t pri = PX_ERR_IS_PRI(bit); 2206 px_t *px_p = DIP_TO_STATE(rpdip); 2207 uint64_t rx_h1, rx_h2, tx_h1, tx_h2; 2208 uint16_t s_status; 2209 int sts; 2210 pcie_cpl_t *cpl; 2211 pf_pcie_adv_err_regs_t adv_reg; 2212 2213 rx_h1 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG); 2214 rx_h2 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG); 2215 tx_h1 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); 2216 tx_h2 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); 2217 2218 if ((bit == TLU_OTHER_EVENT_STATUS_SET_RUC_P) || 2219 (bit == TLU_OTHER_EVENT_STATUS_SET_WUC_P)) { 2220 adv_reg.pcie_ue_hdr[0] = (uint32_t)(rx_h1 >> 32); 2221 adv_reg.pcie_ue_hdr[1] = (uint32_t)rx_h1; 2222 adv_reg.pcie_ue_hdr[2] = (uint32_t)(rx_h2 >> 32); 2223 adv_reg.pcie_ue_hdr[3] = (uint32_t)rx_h2; 2224 2225 /* get completer bdf (fault bdf) from rx logs */ 2226 cpl = (pcie_cpl_t *)&adv_reg.pcie_ue_hdr[1]; 2227 2228 /* Figure out if UR/CA from rx logs */ 2229 if (cpl->status == PCIE_CPL_STS_UR) 2230 s_status = PCI_STAT_R_MAST_AB; 2231 else if (cpl->status == PCIE_CPL_STS_CA) 2232 s_status = PCI_STAT_R_TARG_AB; 2233 2234 adv_reg.pcie_ue_hdr[0] = (uint32_t)(tx_h1 >> 32); 2235 adv_reg.pcie_ue_hdr[1] = (uint32_t)tx_h1; 2236 adv_reg.pcie_ue_hdr[2] = (uint32_t)(tx_h2 >> 32); 2237 adv_reg.pcie_ue_hdr[3] = (uint32_t)tx_h2; 2238 2239 /* get fault addr from tx logs */ 2240 sts = pf_tlp_decode(PCIE_DIP2BUS(rpdip), &adv_reg); 2241 2242 if (sts == DDI_SUCCESS) 2243 (void) px_rp_en_q(px_p, adv_reg.pcie_ue_tgt_bdf, 2244 adv_reg.pcie_ue_tgt_addr, s_status); 2245 } 2246 2247 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2248 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2249 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2250 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2251 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2252 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2253 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2254 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2255 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2256 ss_reg, 2257 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2258 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2259 FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, rx_h1, 2260 FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, rx_h2, 2261 FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, tx_h1, 2262 FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, tx_h2, 2263 NULL); 2264 2265 return (PX_NO_PANIC); 2266 } 2267 2268 /* TLU Other Event - see io erpt doc, section 3.9 */ 2269 PX_ERPT_SEND_DEC(pciex_oe) 2270 { 2271 char buf[FM_MAX_CLASS]; 2272 boolean_t pri = PX_ERR_IS_PRI(bit); 2273 2274 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2275 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2276 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2277 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2278 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2279 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2280 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2281 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2282 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2283 ss_reg, 2284 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2285 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2286 NULL); 2287 2288 return (PX_NO_PANIC); 2289 } 2290