1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * sun4u Fire Error Handling 29 */ 30 31 #include <sys/types.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/sunndi.h> 35 #include <sys/fm/protocol.h> 36 #include <sys/fm/util.h> 37 #include <sys/pcie.h> 38 #include <sys/pcie_impl.h> 39 #include "px_obj.h" 40 #include <px_regs.h> 41 #include <px_csr.h> 42 #include <sys/membar.h> 43 #include <sys/machcpuvar.h> 44 #include <sys/platform_module.h> 45 #include "px_lib4u.h" 46 #include "px_err.h" 47 #include "px_err_impl.h" 48 #include "oberon_regs.h" 49 50 uint64_t px_tlu_ue_intr_mask = PX_ERR_EN_ALL; 51 uint64_t px_tlu_ue_log_mask = PX_ERR_EN_ALL; 52 uint64_t px_tlu_ue_count_mask = PX_ERR_EN_ALL; 53 54 uint64_t px_tlu_ce_intr_mask = PX_ERR_MASK_NONE; 55 uint64_t px_tlu_ce_log_mask = PX_ERR_MASK_NONE; 56 uint64_t px_tlu_ce_count_mask = PX_ERR_MASK_NONE; 57 58 /* 59 * Do not enable Link Interrupts 60 */ 61 uint64_t px_tlu_oe_intr_mask = PX_ERR_EN_ALL & ~0x80000000800; 62 uint64_t px_tlu_oe_log_mask = PX_ERR_EN_ALL & ~0x80000000800; 63 uint64_t px_tlu_oe_count_mask = PX_ERR_EN_ALL; 64 65 uint64_t px_mmu_intr_mask = PX_ERR_EN_ALL; 66 uint64_t px_mmu_log_mask = PX_ERR_EN_ALL; 67 uint64_t px_mmu_count_mask = PX_ERR_EN_ALL; 68 69 uint64_t px_imu_intr_mask = PX_ERR_EN_ALL; 70 uint64_t px_imu_log_mask = PX_ERR_EN_ALL; 71 uint64_t px_imu_count_mask = PX_ERR_EN_ALL; 72 73 /* 74 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_S) | 75 * (1ull << ILU_INTERRUPT_ENABLE_IHB_PE_P); 76 */ 77 uint64_t px_ilu_intr_mask = (((uint64_t)0x10 << 32) | 0x10); 78 uint64_t px_ilu_log_mask = (((uint64_t)0x10 << 32) | 0x10); 79 uint64_t px_ilu_count_mask = PX_ERR_EN_ALL; 80 81 uint64_t px_ubc_intr_mask = PX_ERR_EN_ALL; 82 uint64_t px_ubc_log_mask = PX_ERR_EN_ALL; 83 uint64_t px_ubc_count_mask = PX_ERR_EN_ALL; 84 85 uint64_t px_jbc_intr_mask = PX_ERR_EN_ALL; 86 uint64_t px_jbc_log_mask = PX_ERR_EN_ALL; 87 uint64_t px_jbc_count_mask = PX_ERR_EN_ALL; 88 89 /* 90 * LPU Intr Registers are reverse encoding from the registers above. 91 * 1 = disable 92 * 0 = enable 93 * 94 * Log and Count are however still the same. 95 */ 96 uint64_t px_lpul_intr_mask = LPU_INTR_DISABLE; 97 uint64_t px_lpul_log_mask = PX_ERR_EN_ALL; 98 uint64_t px_lpul_count_mask = PX_ERR_EN_ALL; 99 100 uint64_t px_lpup_intr_mask = LPU_INTR_DISABLE; 101 uint64_t px_lpup_log_mask = PX_ERR_EN_ALL; 102 uint64_t px_lpup_count_mask = PX_ERR_EN_ALL; 103 104 uint64_t px_lpur_intr_mask = LPU_INTR_DISABLE; 105 uint64_t px_lpur_log_mask = PX_ERR_EN_ALL; 106 uint64_t px_lpur_count_mask = PX_ERR_EN_ALL; 107 108 uint64_t px_lpux_intr_mask = LPU_INTR_DISABLE; 109 uint64_t px_lpux_log_mask = PX_ERR_EN_ALL; 110 uint64_t px_lpux_count_mask = PX_ERR_EN_ALL; 111 112 uint64_t px_lpus_intr_mask = LPU_INTR_DISABLE; 113 uint64_t px_lpus_log_mask = PX_ERR_EN_ALL; 114 uint64_t px_lpus_count_mask = PX_ERR_EN_ALL; 115 116 uint64_t px_lpug_intr_mask = LPU_INTR_DISABLE; 117 uint64_t px_lpug_log_mask = PX_ERR_EN_ALL; 118 uint64_t px_lpug_count_mask = PX_ERR_EN_ALL; 119 120 /* 121 * JBC error bit table 122 */ 123 #define JBC_BIT_DESC(bit, hdl, erpt) \ 124 JBC_INTERRUPT_STATUS_ ## bit ## _P, \ 125 0, \ 126 PX_ERR_BIT_HANDLE(hdl), \ 127 PX_ERPT_SEND(erpt), \ 128 PX_ERR_JBC_CLASS(bit) }, \ 129 { JBC_INTERRUPT_STATUS_ ## bit ## _S, \ 130 0, \ 131 PX_ERR_BIT_HANDLE(hdl), \ 132 PX_ERPT_SEND(erpt), \ 133 PX_ERR_JBC_CLASS(bit) 134 px_err_bit_desc_t px_err_jbc_tbl[] = { 135 /* JBC FATAL */ 136 { JBC_BIT_DESC(MB_PEA, hw_reset, jbc_fatal) }, 137 { JBC_BIT_DESC(CPE, hw_reset, jbc_fatal) }, 138 { JBC_BIT_DESC(APE, hw_reset, jbc_fatal) }, 139 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_fatal) }, 140 { JBC_BIT_DESC(JTCEEW, hw_reset, jbc_fatal) }, 141 { JBC_BIT_DESC(JTCEEI, hw_reset, jbc_fatal) }, 142 { JBC_BIT_DESC(JTCEER, hw_reset, jbc_fatal) }, 143 144 /* JBC MERGE */ 145 { JBC_BIT_DESC(MB_PER, jbc_merge, jbc_merge) }, 146 { JBC_BIT_DESC(MB_PEW, jbc_merge, jbc_merge) }, 147 148 /* JBC Jbusint IN */ 149 { JBC_BIT_DESC(UE_ASYN, panic, jbc_in) }, 150 { JBC_BIT_DESC(CE_ASYN, no_error, jbc_in) }, 151 { JBC_BIT_DESC(JTE, panic, jbc_in) }, 152 { JBC_BIT_DESC(JBE, panic, jbc_in) }, 153 { JBC_BIT_DESC(JUE, panic, jbc_in) }, 154 { JBC_BIT_DESC(ICISE, panic, jbc_in) }, 155 { JBC_BIT_DESC(WR_DPE, jbc_jbusint_in, jbc_in) }, 156 { JBC_BIT_DESC(RD_DPE, jbc_jbusint_in, jbc_in) }, 157 { JBC_BIT_DESC(ILL_BMW, panic, jbc_in) }, 158 { JBC_BIT_DESC(ILL_BMR, panic, jbc_in) }, 159 { JBC_BIT_DESC(BJC, panic, jbc_in) }, 160 161 /* JBC Jbusint Out */ 162 { JBC_BIT_DESC(IJP, panic, jbc_out) }, 163 164 /* 165 * JBC Dmcint ODCD 166 * 167 * Error bits which can be set via a bad PCItool access go through 168 * jbc_safe_acc instead. 169 */ 170 { JBC_BIT_DESC(PIO_UNMAP_RD, jbc_safe_acc, jbc_odcd) }, 171 { JBC_BIT_DESC(ILL_ACC_RD, jbc_safe_acc, jbc_odcd) }, 172 { JBC_BIT_DESC(PIO_UNMAP, jbc_safe_acc, jbc_odcd) }, 173 { JBC_BIT_DESC(PIO_DPE, jbc_dmcint_odcd, jbc_odcd) }, 174 { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_odcd) }, 175 { JBC_BIT_DESC(ILL_ACC, jbc_safe_acc, jbc_odcd) }, 176 177 /* JBC Dmcint IDC */ 178 { JBC_BIT_DESC(UNSOL_RD, no_panic, jbc_idc) }, 179 { JBC_BIT_DESC(UNSOL_INTR, no_panic, jbc_idc) }, 180 181 /* JBC CSR */ 182 { JBC_BIT_DESC(EBUS_TO, panic, jbc_csr) } 183 }; 184 185 #define px_err_jbc_keys \ 186 (sizeof (px_err_jbc_tbl)) / (sizeof (px_err_bit_desc_t)) 187 188 /* 189 * UBC error bit table 190 */ 191 #define UBC_BIT_DESC(bit, hdl, erpt) \ 192 UBC_INTERRUPT_STATUS_ ## bit ## _P, \ 193 0, \ 194 PX_ERR_BIT_HANDLE(hdl), \ 195 PX_ERPT_SEND(erpt), \ 196 PX_ERR_UBC_CLASS(bit) }, \ 197 { UBC_INTERRUPT_STATUS_ ## bit ## _S, \ 198 0, \ 199 PX_ERR_BIT_HANDLE(hdl), \ 200 PX_ERPT_SEND(erpt), \ 201 PX_ERR_UBC_CLASS(bit) 202 px_err_bit_desc_t px_err_ubc_tbl[] = { 203 /* UBC FATAL */ 204 { UBC_BIT_DESC(DMARDUEA, no_panic, ubc_fatal) }, 205 { UBC_BIT_DESC(DMAWTUEA, panic, ubc_fatal) }, 206 { UBC_BIT_DESC(MEMRDAXA, panic, ubc_fatal) }, 207 { UBC_BIT_DESC(MEMWTAXA, panic, ubc_fatal) }, 208 { UBC_BIT_DESC(DMARDUEB, no_panic, ubc_fatal) }, 209 { UBC_BIT_DESC(DMAWTUEB, panic, ubc_fatal) }, 210 { UBC_BIT_DESC(MEMRDAXB, panic, ubc_fatal) }, 211 { UBC_BIT_DESC(MEMWTAXB, panic, ubc_fatal) }, 212 { UBC_BIT_DESC(PIOWTUE, panic, ubc_fatal) }, 213 { UBC_BIT_DESC(PIOWBEUE, panic, ubc_fatal) }, 214 { UBC_BIT_DESC(PIORBEUE, panic, ubc_fatal) } 215 }; 216 217 #define px_err_ubc_keys \ 218 (sizeof (px_err_ubc_tbl)) / (sizeof (px_err_bit_desc_t)) 219 220 221 char *ubc_class_eid_qualifier[] = { 222 "-mem", 223 "-channel", 224 "-cpu", 225 "-path" 226 }; 227 228 229 /* 230 * DMC error bit tables 231 */ 232 #define IMU_BIT_DESC(bit, hdl, erpt) \ 233 IMU_INTERRUPT_STATUS_ ## bit ## _P, \ 234 0, \ 235 PX_ERR_BIT_HANDLE(hdl), \ 236 PX_ERPT_SEND(erpt), \ 237 PX_ERR_DMC_CLASS(bit) }, \ 238 { IMU_INTERRUPT_STATUS_ ## bit ## _S, \ 239 0, \ 240 PX_ERR_BIT_HANDLE(hdl), \ 241 PX_ERPT_SEND(erpt), \ 242 PX_ERR_DMC_CLASS(bit) 243 px_err_bit_desc_t px_err_imu_tbl[] = { 244 /* DMC IMU RDS */ 245 { IMU_BIT_DESC(MSI_MAL_ERR, panic, imu_rds) }, 246 { IMU_BIT_DESC(MSI_PAR_ERR, panic, imu_rds) }, 247 { IMU_BIT_DESC(PMEACK_MES_NOT_EN, panic, imu_rds) }, 248 { IMU_BIT_DESC(PMPME_MES_NOT_EN, panic, imu_rds) }, 249 { IMU_BIT_DESC(FATAL_MES_NOT_EN, panic, imu_rds) }, 250 { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, panic, imu_rds) }, 251 { IMU_BIT_DESC(COR_MES_NOT_EN, panic, imu_rds) }, 252 { IMU_BIT_DESC(MSI_NOT_EN, panic, imu_rds) }, 253 254 /* DMC IMU SCS */ 255 { IMU_BIT_DESC(EQ_NOT_EN, panic, imu_scs) }, 256 257 /* DMC IMU */ 258 { IMU_BIT_DESC(EQ_OVER, imu_eq_ovfl, imu) } 259 }; 260 261 #define px_err_imu_keys (sizeof (px_err_imu_tbl)) / (sizeof (px_err_bit_desc_t)) 262 263 /* mmu errors */ 264 #define MMU_BIT_DESC(bit, hdl, erpt) \ 265 MMU_INTERRUPT_STATUS_ ## bit ## _P, \ 266 0, \ 267 PX_ERR_BIT_HANDLE(hdl), \ 268 PX_ERPT_SEND(erpt), \ 269 PX_ERR_DMC_CLASS(bit) }, \ 270 { MMU_INTERRUPT_STATUS_ ## bit ## _S, \ 271 0, \ 272 PX_ERR_BIT_HANDLE(hdl), \ 273 PX_ERPT_SEND(erpt), \ 274 PX_ERR_DMC_CLASS(bit) 275 px_err_bit_desc_t px_err_mmu_tbl[] = { 276 /* DMC MMU TFAR/TFSR */ 277 { MMU_BIT_DESC(BYP_ERR, mmu_rbne, mmu_tfar_tfsr) }, 278 { MMU_BIT_DESC(BYP_OOR, mmu_tfa, mmu_tfar_tfsr) }, 279 { MMU_BIT_DESC(TRN_ERR, panic, mmu_tfar_tfsr) }, 280 { MMU_BIT_DESC(TRN_OOR, mmu_tfa, mmu_tfar_tfsr) }, 281 { MMU_BIT_DESC(TTE_INV, mmu_tfa, mmu_tfar_tfsr) }, 282 { MMU_BIT_DESC(TTE_PRT, mmu_tfa, mmu_tfar_tfsr) }, 283 { MMU_BIT_DESC(TTC_DPE, mmu_parity, mmu_tfar_tfsr) }, 284 { MMU_BIT_DESC(TBW_DME, panic, mmu_tfar_tfsr) }, 285 { MMU_BIT_DESC(TBW_UDE, panic, mmu_tfar_tfsr) }, 286 { MMU_BIT_DESC(TBW_ERR, panic, mmu_tfar_tfsr) }, 287 { MMU_BIT_DESC(TBW_DPE, mmu_parity, mmu_tfar_tfsr) }, 288 289 /* DMC MMU */ 290 { MMU_BIT_DESC(TTC_CAE, panic, mmu) } 291 }; 292 #define px_err_mmu_keys (sizeof (px_err_mmu_tbl)) / (sizeof (px_err_bit_desc_t)) 293 294 295 /* 296 * PEC error bit tables 297 */ 298 #define ILU_BIT_DESC(bit, hdl, erpt) \ 299 ILU_INTERRUPT_STATUS_ ## bit ## _P, \ 300 0, \ 301 PX_ERR_BIT_HANDLE(hdl), \ 302 PX_ERPT_SEND(erpt), \ 303 PX_ERR_PEC_CLASS(bit) }, \ 304 { ILU_INTERRUPT_STATUS_ ## bit ## _S, \ 305 0, \ 306 PX_ERR_BIT_HANDLE(hdl), \ 307 PX_ERPT_SEND(erpt), \ 308 PX_ERR_PEC_CLASS(bit) 309 px_err_bit_desc_t px_err_ilu_tbl[] = { 310 /* PEC ILU none */ 311 { ILU_BIT_DESC(IHB_PE, panic, pec_ilu) } 312 }; 313 #define px_err_ilu_keys \ 314 (sizeof (px_err_ilu_tbl)) / (sizeof (px_err_bit_desc_t)) 315 316 /* 317 * PEC UE errors implementation is incomplete pending PCIE generic 318 * fabric rules. Must handle both PRIMARY and SECONDARY errors. 319 */ 320 /* pec ue errors */ 321 #define TLU_UC_BIT_DESC(bit, hdl, erpt) \ 322 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 323 0, \ 324 PX_ERR_BIT_HANDLE(hdl), \ 325 PX_ERPT_SEND(erpt), \ 326 PX_ERR_PEC_CLASS(bit) }, \ 327 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 328 0, \ 329 PX_ERR_BIT_HANDLE(hdl), \ 330 PX_ERPT_SEND(erpt), \ 331 PX_ERR_PEC_CLASS(bit) 332 #define TLU_UC_OB_BIT_DESC(bit, hdl, erpt) \ 333 TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 334 0, \ 335 PX_ERR_BIT_HANDLE(hdl), \ 336 PX_ERPT_SEND(erpt), \ 337 PX_ERR_PEC_OB_CLASS(bit) }, \ 338 { TLU_UNCORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 339 0, \ 340 PX_ERR_BIT_HANDLE(hdl), \ 341 PX_ERPT_SEND(erpt), \ 342 PX_ERR_PEC_OB_CLASS(bit) 343 px_err_bit_desc_t px_err_tlu_ue_tbl[] = { 344 /* PCI-E Receive Uncorrectable Errors */ 345 { TLU_UC_BIT_DESC(UR, pciex_ue, pciex_rx_ue) }, 346 { TLU_UC_BIT_DESC(UC, pciex_ue, pciex_rx_ue) }, 347 348 /* PCI-E Transmit Uncorrectable Errors */ 349 { TLU_UC_OB_BIT_DESC(ECRC, pciex_ue, pciex_rx_ue) }, 350 { TLU_UC_BIT_DESC(CTO, pciex_ue, pciex_tx_ue) }, 351 { TLU_UC_BIT_DESC(ROF, pciex_ue, pciex_tx_ue) }, 352 353 /* PCI-E Rx/Tx Uncorrectable Errors */ 354 { TLU_UC_BIT_DESC(MFP, pciex_ue, pciex_rx_tx_ue) }, 355 { TLU_UC_BIT_DESC(PP, pciex_ue, pciex_rx_tx_ue) }, 356 357 /* Other PCI-E Uncorrectable Errors */ 358 { TLU_UC_BIT_DESC(FCP, pciex_ue, pciex_ue) }, 359 { TLU_UC_BIT_DESC(DLP, pciex_ue, pciex_ue) }, 360 { TLU_UC_BIT_DESC(TE, pciex_ue, pciex_ue) }, 361 362 /* Not used */ 363 { TLU_UC_BIT_DESC(CA, pciex_ue, do_not) } 364 }; 365 #define px_err_tlu_ue_keys \ 366 (sizeof (px_err_tlu_ue_tbl)) / (sizeof (px_err_bit_desc_t)) 367 368 369 /* 370 * PEC CE errors implementation is incomplete pending PCIE generic 371 * fabric rules. 372 */ 373 /* pec ce errors */ 374 #define TLU_CE_BIT_DESC(bit, hdl, erpt) \ 375 TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _P, \ 376 0, \ 377 PX_ERR_BIT_HANDLE(hdl), \ 378 PX_ERPT_SEND(erpt), \ 379 PX_ERR_PEC_CLASS(bit) }, \ 380 { TLU_CORRECTABLE_ERROR_STATUS_CLEAR_ ## bit ## _S, \ 381 0, \ 382 PX_ERR_BIT_HANDLE(hdl), \ 383 PX_ERPT_SEND(erpt), \ 384 PX_ERR_PEC_CLASS(bit) 385 px_err_bit_desc_t px_err_tlu_ce_tbl[] = { 386 /* PCI-E Correctable Errors */ 387 { TLU_CE_BIT_DESC(RTO, pciex_ce, pciex_ce) }, 388 { TLU_CE_BIT_DESC(RNR, pciex_ce, pciex_ce) }, 389 { TLU_CE_BIT_DESC(BDP, pciex_ce, pciex_ce) }, 390 { TLU_CE_BIT_DESC(BTP, pciex_ce, pciex_ce) }, 391 { TLU_CE_BIT_DESC(RE, pciex_ce, pciex_ce) } 392 }; 393 #define px_err_tlu_ce_keys \ 394 (sizeof (px_err_tlu_ce_tbl)) / (sizeof (px_err_bit_desc_t)) 395 396 397 /* pec oe errors */ 398 #define TLU_OE_BIT_DESC(bit, hdl, erpt) \ 399 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \ 400 0, \ 401 PX_ERR_BIT_HANDLE(hdl), \ 402 PX_ERPT_SEND(erpt), \ 403 PX_ERR_PEC_CLASS(bit) }, \ 404 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \ 405 0, \ 406 PX_ERR_BIT_HANDLE(hdl), \ 407 PX_ERPT_SEND(erpt), \ 408 PX_ERR_PEC_CLASS(bit) 409 #define TLU_OE_OB_BIT_DESC(bit, hdl, erpt) \ 410 TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _P, \ 411 0, \ 412 PX_ERR_BIT_HANDLE(hdl), \ 413 PX_ERPT_SEND(erpt), \ 414 PX_ERR_PEC_OB_CLASS(bit) }, \ 415 { TLU_OTHER_EVENT_STATUS_CLEAR_ ## bit ## _S, \ 416 0, \ 417 PX_ERR_BIT_HANDLE(hdl), \ 418 PX_ERPT_SEND(erpt), \ 419 PX_ERR_PEC_OB_CLASS(bit) 420 px_err_bit_desc_t px_err_tlu_oe_tbl[] = { 421 /* TLU Other Event Status (receive only) */ 422 { TLU_OE_BIT_DESC(MRC, hw_reset, pciex_rx_oe) }, 423 424 /* TLU Other Event Status (rx + tx) */ 425 { TLU_OE_BIT_DESC(WUC, wuc_ruc, pciex_rx_tx_oe) }, 426 { TLU_OE_BIT_DESC(RUC, wuc_ruc, pciex_rx_tx_oe) }, 427 { TLU_OE_BIT_DESC(CRS, no_panic, pciex_rx_tx_oe) }, 428 429 /* TLU Other Event */ 430 { TLU_OE_BIT_DESC(IIP, panic, pciex_oe) }, 431 { TLU_OE_BIT_DESC(EDP, panic, pciex_oe) }, 432 { TLU_OE_BIT_DESC(EHP, panic, pciex_oe) }, 433 { TLU_OE_OB_BIT_DESC(TLUEITMO, panic, pciex_oe) }, 434 { TLU_OE_BIT_DESC(LIN, no_panic, pciex_oe) }, 435 { TLU_OE_BIT_DESC(LRS, no_panic, pciex_oe) }, 436 { TLU_OE_BIT_DESC(LDN, tlu_ldn, pciex_oe) }, 437 { TLU_OE_BIT_DESC(LUP, tlu_lup, pciex_oe) }, 438 { TLU_OE_BIT_DESC(ERU, panic, pciex_oe) }, 439 { TLU_OE_BIT_DESC(ERO, panic, pciex_oe) }, 440 { TLU_OE_BIT_DESC(EMP, panic, pciex_oe) }, 441 { TLU_OE_BIT_DESC(EPE, panic, pciex_oe) }, 442 { TLU_OE_BIT_DESC(ERP, panic, pciex_oe) }, 443 { TLU_OE_BIT_DESC(EIP, panic, pciex_oe) } 444 }; 445 446 #define px_err_tlu_oe_keys \ 447 (sizeof (px_err_tlu_oe_tbl)) / (sizeof (px_err_bit_desc_t)) 448 449 450 /* 451 * All the following tables below are for LPU Interrupts. These interrupts 452 * are *NOT* error interrupts, but event status interrupts. 453 * 454 * These events are probably of most interest to: 455 * o Hotplug 456 * o Power Management 457 * o etc... 458 * 459 * There are also a few events that would be interresting for FMA. 460 * Again none of the regiseters below state that an error has occured 461 * or that data has been lost. If anything, they give status that an 462 * error is *about* to occur. examples 463 * o INT_SKP_ERR - indicates clock between fire and child is too far 464 * off and is most unlikely able to compensate 465 * o INT_TX_PAR_ERR - A parity error occured in ONE lane. This is 466 * HW recoverable, but will like end up as a future 467 * fabric error as well. 468 * 469 * For now, we don't care about any of these errors and should be ignore, 470 * but cleared. 471 */ 472 473 /* LPU Link Interrupt Table */ 474 #define LPUL_BIT_DESC(bit, hdl, erpt) \ 475 LPU_LINK_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 476 0, \ 477 NULL, \ 478 NULL, \ 479 "" 480 px_err_bit_desc_t px_err_lpul_tbl[] = { 481 { LPUL_BIT_DESC(LINK_ERR_ACT, NULL, NULL) } 482 }; 483 #define px_err_lpul_keys \ 484 (sizeof (px_err_lpul_tbl)) / (sizeof (px_err_bit_desc_t)) 485 486 /* LPU Physical Interrupt Table */ 487 #define LPUP_BIT_DESC(bit, hdl, erpt) \ 488 LPU_PHY_LAYER_INTERRUPT_AND_STATUS_INT_ ## bit, \ 489 0, \ 490 NULL, \ 491 NULL, \ 492 "" 493 px_err_bit_desc_t px_err_lpup_tbl[] = { 494 { LPUP_BIT_DESC(PHY_LAYER_ERR, NULL, NULL) } 495 }; 496 #define px_err_lpup_keys \ 497 (sizeof (px_err_lpup_tbl)) / (sizeof (px_err_bit_desc_t)) 498 499 /* LPU Receive Interrupt Table */ 500 #define LPUR_BIT_DESC(bit, hdl, erpt) \ 501 LPU_RECEIVE_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 502 0, \ 503 NULL, \ 504 NULL, \ 505 "" 506 px_err_bit_desc_t px_err_lpur_tbl[] = { 507 { LPUR_BIT_DESC(RCV_PHY, NULL, NULL) } 508 }; 509 #define px_err_lpur_keys \ 510 (sizeof (px_err_lpur_tbl)) / (sizeof (px_err_bit_desc_t)) 511 512 /* LPU Transmit Interrupt Table */ 513 #define LPUX_BIT_DESC(bit, hdl, erpt) \ 514 LPU_TRANSMIT_PHY_INTERRUPT_AND_STATUS_INT_ ## bit, \ 515 0, \ 516 NULL, \ 517 NULL, \ 518 "" 519 px_err_bit_desc_t px_err_lpux_tbl[] = { 520 { LPUX_BIT_DESC(UNMSK, NULL, NULL) } 521 }; 522 #define px_err_lpux_keys \ 523 (sizeof (px_err_lpux_tbl)) / (sizeof (px_err_bit_desc_t)) 524 525 /* LPU LTSSM Interrupt Table */ 526 #define LPUS_BIT_DESC(bit, hdl, erpt) \ 527 LPU_LTSSM_INTERRUPT_AND_STATUS_INT_ ## bit, \ 528 0, \ 529 NULL, \ 530 NULL, \ 531 "" 532 px_err_bit_desc_t px_err_lpus_tbl[] = { 533 { LPUS_BIT_DESC(ANY, NULL, NULL) } 534 }; 535 #define px_err_lpus_keys \ 536 (sizeof (px_err_lpus_tbl)) / (sizeof (px_err_bit_desc_t)) 537 538 /* LPU Gigablaze Glue Interrupt Table */ 539 #define LPUG_BIT_DESC(bit, hdl, erpt) \ 540 LPU_GIGABLAZE_GLUE_INTERRUPT_AND_STATUS_INT_ ## bit, \ 541 0, \ 542 NULL, \ 543 NULL, \ 544 "" 545 px_err_bit_desc_t px_err_lpug_tbl[] = { 546 { LPUG_BIT_DESC(GLOBL_UNMSK, NULL, NULL) } 547 }; 548 #define px_err_lpug_keys \ 549 (sizeof (px_err_lpug_tbl)) / (sizeof (px_err_bit_desc_t)) 550 551 552 /* Mask and Tables */ 553 #define MnT6X(pre) \ 554 &px_ ## pre ## _intr_mask, \ 555 &px_ ## pre ## _log_mask, \ 556 &px_ ## pre ## _count_mask, \ 557 px_err_ ## pre ## _tbl, \ 558 px_err_ ## pre ## _keys, \ 559 PX_REG_XBC, \ 560 0 561 562 #define MnT6(pre) \ 563 &px_ ## pre ## _intr_mask, \ 564 &px_ ## pre ## _log_mask, \ 565 &px_ ## pre ## _count_mask, \ 566 px_err_ ## pre ## _tbl, \ 567 px_err_ ## pre ## _keys, \ 568 PX_REG_CSR, \ 569 0 570 571 /* LPU Registers Addresses */ 572 #define LR4(pre) \ 573 NULL, \ 574 LPU_ ## pre ## _INTERRUPT_MASK, \ 575 LPU_ ## pre ## _INTERRUPT_AND_STATUS, \ 576 LPU_ ## pre ## _INTERRUPT_AND_STATUS 577 578 /* LPU Registers Addresses with Irregularities */ 579 #define LR4_FIXME(pre) \ 580 NULL, \ 581 LPU_ ## pre ## _INTERRUPT_MASK, \ 582 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS, \ 583 LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS 584 585 /* TLU Registers Addresses */ 586 #define TR4(pre) \ 587 TLU_ ## pre ## _LOG_ENABLE, \ 588 TLU_ ## pre ## _INTERRUPT_ENABLE, \ 589 TLU_ ## pre ## _INTERRUPT_STATUS, \ 590 TLU_ ## pre ## _STATUS_CLEAR 591 592 /* Registers Addresses for JBC, UBC, MMU, IMU and ILU */ 593 #define R4(pre) \ 594 pre ## _ERROR_LOG_ENABLE, \ 595 pre ## _INTERRUPT_ENABLE, \ 596 pre ## _INTERRUPT_STATUS, \ 597 pre ## _ERROR_STATUS_CLEAR 598 599 /* Bits in chip_mask, set according to type. */ 600 #define CHP_O BITMASK(PX_CHIP_OBERON) 601 #define CHP_F BITMASK(PX_CHIP_FIRE) 602 #define CHP_FO (CHP_F | CHP_O) 603 604 /* 605 * Register error handling tables. 606 * The ID Field (first field) is identified by an enum px_err_id_t. 607 * It is located in px_err.h 608 */ 609 static const 610 px_err_reg_desc_t px_err_reg_tbl[] = { 611 { CHP_F, MnT6X(jbc), R4(JBC), "JBC Error"}, 612 { CHP_O, MnT6X(ubc), R4(UBC), "UBC Error"}, 613 { CHP_FO, MnT6(mmu), R4(MMU), "MMU Error"}, 614 { CHP_FO, MnT6(imu), R4(IMU), "IMU Error"}, 615 { CHP_FO, MnT6(tlu_ue), TR4(UNCORRECTABLE_ERROR), "TLU UE"}, 616 { CHP_FO, MnT6(tlu_ce), TR4(CORRECTABLE_ERROR), "TLU CE"}, 617 { CHP_FO, MnT6(tlu_oe), TR4(OTHER_EVENT), "TLU OE"}, 618 { CHP_FO, MnT6(ilu), R4(ILU), "ILU Error"}, 619 { CHP_F, MnT6(lpul), LR4(LINK_LAYER), "LPU Link Layer"}, 620 { CHP_F, MnT6(lpup), LR4_FIXME(PHY), "LPU Phy Layer"}, 621 { CHP_F, MnT6(lpur), LR4(RECEIVE_PHY), "LPU RX Phy Layer"}, 622 { CHP_F, MnT6(lpux), LR4(TRANSMIT_PHY), "LPU TX Phy Layer"}, 623 { CHP_F, MnT6(lpus), LR4(LTSSM), "LPU LTSSM"}, 624 { CHP_F, MnT6(lpug), LR4(GIGABLAZE_GLUE), "LPU GigaBlaze Glue"}, 625 }; 626 627 #define PX_ERR_REG_KEYS (sizeof (px_err_reg_tbl)) / (sizeof (px_err_reg_tbl[0])) 628 629 typedef struct px_err_ss { 630 uint64_t err_status[PX_ERR_REG_KEYS]; 631 } px_err_ss_t; 632 633 static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, int block); 634 static int px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, 635 px_err_ss_t *ss); 636 static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, 637 int err, int caller); 638 639 /* 640 * px_err_cb_intr: 641 * Interrupt handler for the JBC/UBC block. 642 * o lock 643 * o create derr 644 * o px_err_cmn_intr 645 * o unlock 646 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 647 */ 648 uint_t 649 px_err_cb_intr(caddr_t arg) 650 { 651 px_fault_t *px_fault_p = (px_fault_t *)arg; 652 dev_info_t *rpdip = px_fault_p->px_fh_dip; 653 px_t *px_p = DIP_TO_STATE(rpdip); 654 int err; 655 ddi_fm_error_t derr; 656 657 /* Create the derr */ 658 bzero(&derr, sizeof (ddi_fm_error_t)); 659 derr.fme_version = DDI_FME_VERSION; 660 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 661 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 662 663 if (px_fm_enter(px_p) != DDI_SUCCESS) 664 goto done; 665 666 err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_HOST); 667 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 668 INTR_IDLE_STATE); 669 670 px_err_panic(err, PX_HB, PX_NO_ERROR, B_TRUE); 671 px_fm_exit(px_p); 672 px_err_panic(err, PX_HB, PX_NO_ERROR, B_FALSE); 673 674 done: 675 return (DDI_INTR_CLAIMED); 676 } 677 678 /* 679 * px_err_dmc_pec_intr: 680 * Interrupt handler for the DMC/PEC block. 681 * o lock 682 * o create derr 683 * o px_err_cmn_intr(leaf, with out cb) 684 * o pcie_scan_fabric (leaf) 685 * o unlock 686 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 687 */ 688 uint_t 689 px_err_dmc_pec_intr(caddr_t arg) 690 { 691 px_fault_t *px_fault_p = (px_fault_t *)arg; 692 dev_info_t *rpdip = px_fault_p->px_fh_dip; 693 px_t *px_p = DIP_TO_STATE(rpdip); 694 int rc_err, fab_err; 695 ddi_fm_error_t derr; 696 697 /* Create the derr */ 698 bzero(&derr, sizeof (ddi_fm_error_t)); 699 derr.fme_version = DDI_FME_VERSION; 700 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 701 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 702 703 if (px_fm_enter(px_p) != DDI_SUCCESS) 704 goto done; 705 706 /* send ereport/handle/clear fire registers */ 707 rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); 708 709 /* Check all child devices for errors */ 710 fab_err = px_scan_fabric(px_p, rpdip, &derr); 711 712 /* Set the interrupt state to idle */ 713 (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, 714 INTR_IDLE_STATE); 715 716 px_err_panic(rc_err, PX_RC, fab_err, B_TRUE); 717 px_fm_exit(px_p); 718 px_err_panic(rc_err, PX_RC, fab_err, B_FALSE); 719 720 done: 721 return (DDI_INTR_CLAIMED); 722 } 723 724 /* 725 * Proper csr_base is responsibility of the caller. (Called from px_lib_dev_init 726 * via px_err_reg_setup_all for pcie error registers; called from 727 * px_cb_add_intr for jbc/ubc from px_cb_attach.) 728 * 729 * Note: reg_id is passed in instead of reg_desc since this function is called 730 * from px_lib4u.c, which doesn't know about the structure of the table. 731 */ 732 void 733 px_err_reg_enable(px_err_id_t reg_id, caddr_t csr_base) 734 { 735 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id]; 736 uint64_t intr_mask = *reg_desc_p->intr_mask_p; 737 uint64_t log_mask = *reg_desc_p->log_mask_p; 738 739 /* Enable logs if it exists */ 740 if (reg_desc_p->log_addr != NULL) 741 CSR_XS(csr_base, reg_desc_p->log_addr, log_mask); 742 743 /* 744 * For readability you in code you set 1 to enable an interrupt. 745 * But in Fire it's backwards. You set 1 to *disable* an intr. 746 * Reverse the user tunable intr mask field. 747 * 748 * Disable All Errors 749 * Clear All Errors 750 * Enable Errors 751 */ 752 CSR_XS(csr_base, reg_desc_p->enable_addr, 0); 753 CSR_XS(csr_base, reg_desc_p->clear_addr, -1); 754 CSR_XS(csr_base, reg_desc_p->enable_addr, intr_mask); 755 DBG(DBG_ATTACH, NULL, "%s Mask: 0x%llx\n", reg_desc_p->msg, 756 CSR_XR(csr_base, reg_desc_p->enable_addr)); 757 DBG(DBG_ATTACH, NULL, "%s Status: 0x%llx\n", reg_desc_p->msg, 758 CSR_XR(csr_base, reg_desc_p->status_addr)); 759 DBG(DBG_ATTACH, NULL, "%s Clear: 0x%llx\n", reg_desc_p->msg, 760 CSR_XR(csr_base, reg_desc_p->clear_addr)); 761 if (reg_desc_p->log_addr != NULL) { 762 DBG(DBG_ATTACH, NULL, "%s Log: 0x%llx\n", reg_desc_p->msg, 763 CSR_XR(csr_base, reg_desc_p->log_addr)); 764 } 765 } 766 767 void 768 px_err_reg_disable(px_err_id_t reg_id, caddr_t csr_base) 769 { 770 const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id]; 771 uint64_t val = (reg_id >= PX_ERR_LPU_LINK) ? -1 : 0; 772 773 if (reg_desc_p->log_addr != NULL) 774 CSR_XS(csr_base, reg_desc_p->log_addr, val); 775 CSR_XS(csr_base, reg_desc_p->enable_addr, val); 776 } 777 778 /* 779 * Set up pcie error registers. 780 */ 781 void 782 px_err_reg_setup_pcie(uint8_t chip_mask, caddr_t csr_base, boolean_t enable) 783 { 784 px_err_id_t reg_id; 785 const px_err_reg_desc_t *reg_desc_p; 786 void (*px_err_reg_func)(px_err_id_t, caddr_t); 787 788 /* 789 * JBC or XBC are enabled during adding of common block interrupts, 790 * not done here. 791 */ 792 px_err_reg_func = (enable ? px_err_reg_enable : px_err_reg_disable); 793 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) { 794 reg_desc_p = &px_err_reg_tbl[reg_id]; 795 if ((reg_desc_p->chip_mask & chip_mask) && 796 (reg_desc_p->reg_bank == PX_REG_CSR)) 797 px_err_reg_func(reg_id, csr_base); 798 } 799 } 800 801 /* 802 * px_err_cmn_intr: 803 * Common function called by trap, mondo and fabric intr. 804 * o Snap shot current fire registers 805 * o check for safe access 806 * o send ereport and clear snap shot registers 807 * o create and queue RC info for later use in fabric scan. 808 * o RUC/WUC, PTLP, MMU Errors(CA), UR 809 * o check severity of snap shot registers 810 * 811 * @param px_p leaf in which to check access 812 * @param derr fm err data structure to be updated 813 * @param caller PX_TRAP_CALL | PX_INTR_CALL 814 * @param block PX_FM_BLOCK_HOST | PX_FM_BLOCK_PCIE | PX_FM_BLOCK_ALL 815 * @return err PX_NO_PANIC | PX_PANIC | PX_HW_RESET | PX_PROTECTED 816 */ 817 int 818 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) 819 { 820 px_err_ss_t ss = {0}; 821 int err; 822 823 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 824 825 /* check for safe access */ 826 px_err_safeacc_check(px_p, derr); 827 828 /* snap shot the current fire registers */ 829 px_err_snapshot(px_p, &ss, block); 830 831 /* send ereports/handle/clear registers */ 832 err = px_err_erpt_and_clr(px_p, derr, &ss); 833 834 /* check for error severity */ 835 err = px_err_check_severity(px_p, derr, err, caller); 836 837 /* Mark the On Trap Handle if an error occured */ 838 if (err != PX_NO_ERROR) { 839 px_pec_t *pec_p = px_p->px_pec_p; 840 on_trap_data_t *otd = pec_p->pec_ontrap_data; 841 842 if ((otd != NULL) && (otd->ot_prot & OT_DATA_ACCESS)) 843 otd->ot_trap |= OT_DATA_ACCESS; 844 } 845 846 return (err); 847 } 848 849 /* 850 * Static function 851 */ 852 853 /* 854 * px_err_snapshot: 855 * Take a current snap shot of all the fire error registers. This includes 856 * JBC/UBC, DMC, and PEC depending on the block flag 857 * 858 * @param px_p leaf in which to take the snap shot. 859 * @param ss pre-allocated memory to store the snap shot. 860 * @param chk_cb boolean on whether to store jbc/ubc register. 861 */ 862 static void 863 px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, int block) 864 { 865 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 866 caddr_t xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC]; 867 caddr_t pec_csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR]; 868 caddr_t csr_base; 869 uint8_t chip_mask = 1 << PX_CHIP_TYPE(pxu_p); 870 const px_err_reg_desc_t *reg_desc_p = px_err_reg_tbl; 871 px_err_id_t reg_id; 872 873 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++, reg_desc_p++) { 874 if (!(reg_desc_p->chip_mask & chip_mask)) 875 continue; 876 877 if ((block & PX_FM_BLOCK_HOST) && 878 (reg_desc_p->reg_bank == PX_REG_XBC)) 879 csr_base = xbc_csr_base; 880 else if ((block & PX_FM_BLOCK_PCIE) && 881 (reg_desc_p->reg_bank == PX_REG_CSR)) 882 csr_base = pec_csr_base; 883 else { 884 ss_p->err_status[reg_id] = 0; 885 continue; 886 } 887 888 ss_p->err_status[reg_id] = CSR_XR(csr_base, 889 reg_desc_p->status_addr); 890 } 891 } 892 893 /* 894 * px_err_erpt_and_clr: 895 * This function does the following thing to all the fire registers based 896 * on an earlier snap shot. 897 * o Send ereport 898 * o Handle the error 899 * o Clear the error 900 * 901 * @param px_p leaf in which to take the snap shot. 902 * @param derr fm err in which the ereport is to be based on 903 * @param ss_p pre-allocated memory to store the snap shot. 904 */ 905 static int 906 px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss_p) 907 { 908 dev_info_t *rpdip = px_p->px_dip; 909 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 910 caddr_t csr_base; 911 const px_err_reg_desc_t *err_reg_tbl; 912 px_err_bit_desc_t *err_bit_tbl; 913 px_err_bit_desc_t *err_bit_desc; 914 915 uint64_t *count_mask; 916 uint64_t clear_addr; 917 uint64_t ss_reg; 918 919 int (*err_handler)(); 920 int (*erpt_handler)(); 921 int reg_id, key; 922 int err = PX_NO_ERROR; 923 int biterr = 0; 924 925 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 926 927 /* send erport/handle/clear JBC errors */ 928 for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++) { 929 /* Get the correct register description table */ 930 err_reg_tbl = &px_err_reg_tbl[reg_id]; 931 932 /* Only look at enabled groups. */ 933 if (!(BIT_TST(err_reg_tbl->chip_mask, PX_CHIP_TYPE(pxu_p)))) 934 continue; 935 936 /* Get the correct CSR BASE */ 937 csr_base = (caddr_t)pxu_p->px_address[err_reg_tbl->reg_bank]; 938 939 /* If there are no errors in this register, continue */ 940 ss_reg = ss_p->err_status[reg_id]; 941 if (!ss_reg) 942 continue; 943 944 /* Get pointers to masks and register addresses */ 945 count_mask = err_reg_tbl->count_mask_p; 946 clear_addr = err_reg_tbl->clear_addr; 947 948 /* Get the register BIT description table */ 949 err_bit_tbl = err_reg_tbl->err_bit_tbl; 950 951 /* For each known bit in the register send erpt and handle */ 952 for (key = 0; key < err_reg_tbl->err_bit_keys; key++) { 953 /* 954 * If the ss_reg is set for this bit, 955 * send ereport and handle 956 */ 957 err_bit_desc = &err_bit_tbl[key]; 958 if (!BIT_TST(ss_reg, err_bit_desc->bit)) 959 continue; 960 961 /* Increment the counter if necessary */ 962 if (BIT_TST(*count_mask, err_bit_desc->bit)) { 963 err_bit_desc->counter++; 964 } 965 966 /* Error Handle for this bit */ 967 err_handler = err_bit_desc->err_handler; 968 if (err_handler) { 969 biterr = err_handler(rpdip, csr_base, derr, 970 err_reg_tbl, err_bit_desc); 971 err |= biterr; 972 } 973 974 /* 975 * Send the ereport if it's an UNEXPECTED err. 976 * This is the only place where PX_EXPECTED is utilized. 977 */ 978 erpt_handler = err_bit_desc->erpt_handler; 979 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) || 980 (biterr == PX_EXPECTED)) 981 continue; 982 983 if (erpt_handler) 984 (void) erpt_handler(rpdip, csr_base, ss_reg, 985 derr, err_bit_desc->bit, 986 err_bit_desc->class_name); 987 } 988 989 /* Clear the register and error */ 990 CSR_XS(csr_base, clear_addr, ss_reg); 991 } 992 993 return (err); 994 } 995 996 /* 997 * px_err_check_severity: 998 * Check the severity of the fire error based on an earlier snapshot 999 * 1000 * @param px_p leaf in which to take the snap shot. 1001 * @param derr fm err in which the ereport is to be based on 1002 * @param err fire register error status 1003 * @param caller PX_TRAP_CALL | PX_INTR_CALL | PX_LIB_CALL 1004 */ 1005 static int 1006 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, int err, int caller) 1007 { 1008 px_pec_t *pec_p = px_p->px_pec_p; 1009 boolean_t is_safeacc = B_FALSE; 1010 1011 /* 1012 * Nothing to do if called with no error. 1013 * The err could have already been set to PX_NO_PANIC, which means the 1014 * system doesn't need to panic, but PEEK/POKE still failed. 1015 */ 1016 if (err == PX_NO_ERROR) 1017 return (err); 1018 1019 /* Cautious access error handling */ 1020 switch (derr->fme_flag) { 1021 case DDI_FM_ERR_EXPECTED: 1022 if (caller == PX_TRAP_CALL) { 1023 /* 1024 * for ddi_caut_get treat all events as nonfatal 1025 * The trampoline will set err_ena = 0, 1026 * err_status = NONFATAL. 1027 */ 1028 derr->fme_status = DDI_FM_NONFATAL; 1029 is_safeacc = B_TRUE; 1030 } else { 1031 /* 1032 * For ddi_caut_put treat all events as nonfatal. Here 1033 * we have the handle and can call ndi_fm_acc_err_set(). 1034 */ 1035 derr->fme_status = DDI_FM_NONFATAL; 1036 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 1037 is_safeacc = B_TRUE; 1038 } 1039 break; 1040 case DDI_FM_ERR_PEEK: 1041 case DDI_FM_ERR_POKE: 1042 /* 1043 * For ddi_peek/poke treat all events as nonfatal. 1044 */ 1045 is_safeacc = B_TRUE; 1046 break; 1047 default: 1048 is_safeacc = B_FALSE; 1049 } 1050 1051 /* re-adjust error status from safe access, forgive all errors */ 1052 if (is_safeacc) 1053 return (PX_NO_PANIC); 1054 1055 return (err); 1056 } 1057 1058 /* predefined convenience functions */ 1059 /* ARGSUSED */ 1060 void 1061 px_err_log_handle(dev_info_t *rpdip, px_err_reg_desc_t *err_reg_descr, 1062 px_err_bit_desc_t *err_bit_descr, char *msg) 1063 { 1064 DBG(DBG_ERR_INTR, rpdip, 1065 "Bit %d, %s, at %s(0x%x) has occured %d times with a severity " 1066 "of \"%s\"\n", 1067 err_bit_descr->bit, err_bit_descr->class_name, 1068 err_reg_descr->msg, err_reg_descr->status_addr, 1069 err_bit_descr->counter, msg); 1070 } 1071 1072 /* ARGSUSED */ 1073 int 1074 px_err_hw_reset_handle(dev_info_t *rpdip, caddr_t csr_base, 1075 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1076 px_err_bit_desc_t *err_bit_descr) 1077 { 1078 if (px_log & PX_HW_RESET) { 1079 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1080 "HW RESET"); 1081 } 1082 1083 return (PX_HW_RESET); 1084 } 1085 1086 /* ARGSUSED */ 1087 int 1088 px_err_panic_handle(dev_info_t *rpdip, caddr_t csr_base, 1089 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1090 px_err_bit_desc_t *err_bit_descr) 1091 { 1092 if (px_log & PX_PANIC) { 1093 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, "PANIC"); 1094 } 1095 1096 return (PX_PANIC); 1097 } 1098 1099 /* ARGSUSED */ 1100 int 1101 px_err_protected_handle(dev_info_t *rpdip, caddr_t csr_base, 1102 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1103 px_err_bit_desc_t *err_bit_descr) 1104 { 1105 if (px_log & PX_PROTECTED) { 1106 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1107 "PROTECTED"); 1108 } 1109 1110 return (PX_PROTECTED); 1111 } 1112 1113 /* ARGSUSED */ 1114 int 1115 px_err_no_panic_handle(dev_info_t *rpdip, caddr_t csr_base, 1116 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1117 px_err_bit_desc_t *err_bit_descr) 1118 { 1119 if (px_log & PX_NO_PANIC) { 1120 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1121 "NO PANIC"); 1122 } 1123 1124 return (PX_NO_PANIC); 1125 } 1126 1127 /* ARGSUSED */ 1128 int 1129 px_err_no_error_handle(dev_info_t *rpdip, caddr_t csr_base, 1130 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1131 px_err_bit_desc_t *err_bit_descr) 1132 { 1133 if (px_log & PX_NO_ERROR) { 1134 px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, 1135 "NO ERROR"); 1136 } 1137 1138 return (PX_NO_ERROR); 1139 } 1140 1141 /* ARGSUSED */ 1142 PX_ERPT_SEND_DEC(do_not) 1143 { 1144 return (PX_NO_ERROR); 1145 } 1146 1147 /* 1148 * Search the px_cb_list_t embedded in the px_cb_t for the 1149 * px_t of the specified Leaf (leaf_id). Return its associated dip. 1150 */ 1151 static dev_info_t * 1152 px_err_search_cb(px_cb_t *px_cb_p, uint_t leaf_id) 1153 { 1154 int i; 1155 px_cb_list_t *pxl_elemp; 1156 1157 for (i = px_cb_p->attachcnt, pxl_elemp = px_cb_p->pxl; i > 0; 1158 i--, pxl_elemp = pxl_elemp->next) { 1159 if ((((pxu_t *)pxl_elemp->pxp->px_plat_p)->portid & 1160 OBERON_PORT_ID_LEAF_MASK) == leaf_id) { 1161 return (pxl_elemp->pxp->px_dip); 1162 } 1163 } 1164 return (NULL); 1165 } 1166 1167 /* UBC FATAL - see io erpt doc, section 1.1 */ 1168 /* ARGSUSED */ 1169 PX_ERPT_SEND_DEC(ubc_fatal) 1170 { 1171 char buf[FM_MAX_CLASS]; 1172 uint64_t memory_ue_log, marked; 1173 char unum[FM_MAX_CLASS]; 1174 int unum_length; 1175 uint64_t device_id = 0; 1176 uint8_t cpu_version = 0; 1177 nvlist_t *resource = NULL; 1178 uint64_t ubc_intr_status; 1179 px_t *px_p; 1180 px_cb_t *px_cb_p; 1181 dev_info_t *actual_dip; 1182 1183 unum[0] = '\0'; 1184 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1185 1186 memory_ue_log = CSR_XR(csr_base, UBC_MEMORY_UE_LOG); 1187 marked = (memory_ue_log >> UBC_MEMORY_UE_LOG_MARKED) & 1188 UBC_MEMORY_UE_LOG_MARKED_MASK; 1189 1190 if ((strstr(class_name, "ubc.piowtue") != NULL) || 1191 (strstr(class_name, "ubc.piowbeue") != NULL) || 1192 (strstr(class_name, "ubc.piorbeue") != NULL) || 1193 (strstr(class_name, "ubc.dmarduea") != NULL) || 1194 (strstr(class_name, "ubc.dmardueb") != NULL)) { 1195 int eid = (memory_ue_log >> UBC_MEMORY_UE_LOG_EID) & 1196 UBC_MEMORY_UE_LOG_EID_MASK; 1197 (void) strncat(buf, ubc_class_eid_qualifier[eid], 1198 FM_MAX_CLASS); 1199 1200 if (eid == UBC_EID_MEM) { 1201 uint64_t phys_addr = memory_ue_log & 1202 MMU_OBERON_PADDR_MASK; 1203 uint64_t offset = (uint64_t)-1; 1204 1205 resource = fm_nvlist_create(NULL); 1206 if (&plat_get_mem_unum) { 1207 if ((plat_get_mem_unum(0, 1208 phys_addr, 0, B_TRUE, 0, unum, 1209 FM_MAX_CLASS, &unum_length)) != 0) 1210 unum[0] = '\0'; 1211 } 1212 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 1213 NULL, unum, NULL, offset); 1214 1215 } else if (eid == UBC_EID_CPU) { 1216 int cpuid = (marked & UBC_MARKED_MAX_CPUID_MASK); 1217 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1218 1219 resource = fm_nvlist_create(NULL); 1220 cpu_version = cpunodes[cpuid].version; 1221 device_id = cpunodes[cpuid].device_id; 1222 (void) snprintf(sbuf, sizeof (sbuf), "%lX", 1223 device_id); 1224 (void) fm_fmri_cpu_set(resource, 1225 FM_CPU_SCHEME_VERSION, NULL, cpuid, 1226 &cpu_version, sbuf); 1227 } 1228 } 1229 1230 /* 1231 * For most of the errors represented in the UBC Interrupt Status 1232 * register, one can compute the dip of the actual Leaf that was 1233 * involved in the error. To do this, find the px_cb_t structure 1234 * that is shared between a pair of Leaves (eg, LeafA and LeafB). 1235 * 1236 * If any of the error bits for LeafA are set in the hardware 1237 * register, search the list of px_t's rooted in the px_cb_t for 1238 * the one corresponding to LeafA. If error bits for LeafB are set, 1239 * search the list for LeafB's px_t. The px_t references its 1240 * associated dip. 1241 */ 1242 px_p = DIP_TO_STATE(rpdip); 1243 px_cb_p = ((pxu_t *)px_p->px_plat_p)->px_cb_p; 1244 1245 /* read hardware register */ 1246 ubc_intr_status = CSR_XR(csr_base, UBC_INTERRUPT_STATUS); 1247 1248 if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFA) != 0) { 1249 /* then Leaf A is involved in the error */ 1250 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_A); 1251 ASSERT(actual_dip != NULL); 1252 rpdip = actual_dip; 1253 } else if ((ubc_intr_status & UBC_INTERRUPT_STATUS_LEAFB) != 0) { 1254 /* then Leaf B is involved in the error */ 1255 actual_dip = px_err_search_cb(px_cb_p, OBERON_PORT_ID_LEAF_B); 1256 ASSERT(actual_dip != NULL); 1257 rpdip = actual_dip; 1258 } /* else error cannot be associated with a Leaf */ 1259 1260 if (resource) { 1261 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1262 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1263 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE, 1264 OBERON_UBC_ELE, DATA_TYPE_UINT64, 1265 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE), 1266 OBERON_UBC_IE, DATA_TYPE_UINT64, 1267 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE), 1268 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status, 1269 OBERON_UBC_ESS, DATA_TYPE_UINT64, 1270 CSR_XR(csr_base, UBC_ERROR_STATUS_SET), 1271 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log, 1272 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum, 1273 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id, 1274 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version, 1275 OBERON_UBC_RESOURCE, DATA_TYPE_NVLIST, resource, 1276 NULL); 1277 fm_nvlist_destroy(resource, FM_NVA_FREE); 1278 } else { 1279 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1280 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1281 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE, 1282 OBERON_UBC_ELE, DATA_TYPE_UINT64, 1283 CSR_XR(csr_base, UBC_ERROR_LOG_ENABLE), 1284 OBERON_UBC_IE, DATA_TYPE_UINT64, 1285 CSR_XR(csr_base, UBC_INTERRUPT_ENABLE), 1286 OBERON_UBC_IS, DATA_TYPE_UINT64, ubc_intr_status, 1287 OBERON_UBC_ESS, DATA_TYPE_UINT64, 1288 CSR_XR(csr_base, UBC_ERROR_STATUS_SET), 1289 OBERON_UBC_MUE, DATA_TYPE_UINT64, memory_ue_log, 1290 OBERON_UBC_UNUM, DATA_TYPE_STRING, unum, 1291 OBERON_UBC_DID, DATA_TYPE_UINT64, device_id, 1292 OBERON_UBC_CPUV, DATA_TYPE_UINT32, cpu_version, 1293 NULL); 1294 } 1295 1296 return (PX_NO_PANIC); 1297 } 1298 1299 /* JBC FATAL */ 1300 PX_ERPT_SEND_DEC(jbc_fatal) 1301 { 1302 char buf[FM_MAX_CLASS]; 1303 boolean_t pri = PX_ERR_IS_PRI(bit); 1304 1305 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1306 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1307 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1308 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1309 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1310 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1311 FIRE_JBC_IE, DATA_TYPE_UINT64, 1312 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1313 FIRE_JBC_IS, DATA_TYPE_UINT64, 1314 ss_reg, 1315 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1316 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1317 FIRE_JBC_FEL1, DATA_TYPE_UINT64, 1318 CSR_XR(csr_base, FATAL_ERROR_LOG_1), 1319 FIRE_JBC_FEL2, DATA_TYPE_UINT64, 1320 CSR_XR(csr_base, FATAL_ERROR_LOG_2), 1321 NULL); 1322 1323 return (PX_NO_PANIC); 1324 } 1325 1326 /* JBC MERGE */ 1327 PX_ERPT_SEND_DEC(jbc_merge) 1328 { 1329 char buf[FM_MAX_CLASS]; 1330 boolean_t pri = PX_ERR_IS_PRI(bit); 1331 1332 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1333 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1334 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1335 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1336 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1337 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1338 FIRE_JBC_IE, DATA_TYPE_UINT64, 1339 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1340 FIRE_JBC_IS, DATA_TYPE_UINT64, 1341 ss_reg, 1342 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1343 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1344 FIRE_JBC_MTEL, DATA_TYPE_UINT64, 1345 CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG), 1346 NULL); 1347 1348 return (PX_NO_PANIC); 1349 } 1350 1351 /* 1352 * JBC Merge buffer retryable errors: 1353 * Merge buffer parity error (rd_buf): PIO or DMA 1354 * Merge buffer parity error (wr_buf): PIO or DMA 1355 */ 1356 /* ARGSUSED */ 1357 int 1358 px_err_jbc_merge_handle(dev_info_t *rpdip, caddr_t csr_base, 1359 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1360 px_err_bit_desc_t *err_bit_descr) 1361 { 1362 /* 1363 * Holder function to attempt error recovery. When the features 1364 * are in place, look up the address of the transaction in: 1365 * 1366 * paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG); 1367 * paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1368 * 1369 * If the error is a secondary error, there is no log information 1370 * just panic as it is unknown which address has been affected. 1371 * 1372 * Remember the address is pretranslation and might be hard to look 1373 * up the appropriate driver based on the PA. 1374 */ 1375 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1376 err_bit_descr)); 1377 } 1378 1379 /* JBC Jbusint IN */ 1380 PX_ERPT_SEND_DEC(jbc_in) 1381 { 1382 char buf[FM_MAX_CLASS]; 1383 boolean_t pri = PX_ERR_IS_PRI(bit); 1384 1385 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1386 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1387 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1388 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1389 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1390 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1391 FIRE_JBC_IE, DATA_TYPE_UINT64, 1392 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1393 FIRE_JBC_IS, DATA_TYPE_UINT64, 1394 ss_reg, 1395 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1396 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1397 FIRE_JBC_JITEL1, DATA_TYPE_UINT64, 1398 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG), 1399 FIRE_JBC_JITEL2, DATA_TYPE_UINT64, 1400 CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG_2), 1401 NULL); 1402 1403 return (PX_NO_PANIC); 1404 } 1405 1406 /* 1407 * JBC Jbusint IN retryable errors 1408 * Log Reg[42:0]. 1409 * Write Data Parity Error: PIO Writes 1410 * Read Data Parity Error: DMA Reads 1411 */ 1412 int 1413 px_err_jbc_jbusint_in_handle(dev_info_t *rpdip, caddr_t csr_base, 1414 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1415 px_err_bit_desc_t *err_bit_descr) 1416 { 1417 /* 1418 * Holder function to attempt error recovery. When the features 1419 * are in place, look up the address of the transaction in: 1420 * 1421 * paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG); 1422 * paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK; 1423 * 1424 * If the error is a secondary error, there is no log information 1425 * just panic as it is unknown which address has been affected. 1426 * 1427 * Remember the address is pretranslation and might be hard to look 1428 * up the appropriate driver based on the PA. 1429 */ 1430 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1431 err_bit_descr)); 1432 } 1433 1434 1435 /* JBC Jbusint Out */ 1436 PX_ERPT_SEND_DEC(jbc_out) 1437 { 1438 char buf[FM_MAX_CLASS]; 1439 boolean_t pri = PX_ERR_IS_PRI(bit); 1440 1441 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1442 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1443 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1444 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1445 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1446 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1447 FIRE_JBC_IE, DATA_TYPE_UINT64, 1448 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1449 FIRE_JBC_IS, DATA_TYPE_UINT64, 1450 ss_reg, 1451 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1452 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1453 FIRE_JBC_JOTEL1, DATA_TYPE_UINT64, 1454 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG), 1455 FIRE_JBC_JOTEL2, DATA_TYPE_UINT64, 1456 CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG_2), 1457 NULL); 1458 1459 return (PX_NO_PANIC); 1460 } 1461 1462 /* JBC Dmcint ODCD */ 1463 PX_ERPT_SEND_DEC(jbc_odcd) 1464 { 1465 char buf[FM_MAX_CLASS]; 1466 boolean_t pri = PX_ERR_IS_PRI(bit); 1467 1468 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1469 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1470 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1471 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1472 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1473 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1474 FIRE_JBC_IE, DATA_TYPE_UINT64, 1475 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1476 FIRE_JBC_IS, DATA_TYPE_UINT64, 1477 ss_reg, 1478 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1479 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1480 FIRE_JBC_DMC_ODCD, DATA_TYPE_UINT64, 1481 CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG), 1482 NULL); 1483 1484 return (PX_NO_PANIC); 1485 } 1486 1487 /* 1488 * JBC Dmcint ODCO nonfatal errer handling - 1489 * PIO data parity error: PIO 1490 */ 1491 /* ARGSUSED */ 1492 int 1493 px_err_jbc_dmcint_odcd_handle(dev_info_t *rpdip, caddr_t csr_base, 1494 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1495 px_err_bit_desc_t *err_bit_descr) 1496 { 1497 /* 1498 * Holder function to attempt error recovery. When the features 1499 * are in place, look up the address of the transaction in: 1500 * 1501 * paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG); 1502 * paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK; 1503 * 1504 * If the error is a secondary error, there is no log information 1505 * just panic as it is unknown which address has been affected. 1506 * 1507 * Remember the address is pretranslation and might be hard to look 1508 * up the appropriate driver based on the PA. 1509 */ 1510 return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1511 err_bit_descr)); 1512 } 1513 1514 /* Does address in DMCINT error log register match address of pcitool access? */ 1515 static boolean_t 1516 px_jbc_pcitool_addr_match(dev_info_t *rpdip, caddr_t csr_base) 1517 { 1518 px_t *px_p = DIP_TO_STATE(rpdip); 1519 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1520 caddr_t pcitool_addr = pxu_p->pcitool_addr; 1521 caddr_t errlog_addr = 1522 (caddr_t)CSR_FR(csr_base, DMCINT_ODCD_ERROR_LOG, ADDRESS); 1523 1524 return (pcitool_addr == errlog_addr); 1525 } 1526 1527 /* 1528 * JBC Dmcint ODCD errer handling for errors which are forgivable during a safe 1529 * access. (This will be most likely be a PCItool access.) If not a safe 1530 * access context, treat like jbc_dmcint_odcd. 1531 * Unmapped PIO read error: pio:read:M:nonfatal 1532 * Unmapped PIO write error: pio:write:M:nonfatal 1533 * Invalid PIO write to PCIe cfg/io, csr, ebus or i2c bus: pio:write:nonfatal 1534 * Invalid PIO read to PCIe cfg/io, csr, ebus or i2c bus: pio:read:nonfatal 1535 */ 1536 /* ARGSUSED */ 1537 int 1538 px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base, 1539 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1540 px_err_bit_desc_t *err_bit_descr) 1541 { 1542 boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); 1543 1544 if (!pri) 1545 return (px_err_panic_handle(rpdip, csr_base, derr, 1546 err_reg_descr, err_bit_descr)); 1547 /* 1548 * Got an error which is forgivable during a PCItool access. 1549 * 1550 * Don't do handler check since the error may otherwise be unfairly 1551 * attributed to a device. Just return. 1552 * 1553 * Note: There is a hole here in that a legitimate error can come in 1554 * while a PCItool access is in play and be forgiven. This is possible 1555 * though not likely. 1556 */ 1557 if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) && 1558 (px_jbc_pcitool_addr_match(rpdip, csr_base))) 1559 return (px_err_protected_handle(rpdip, csr_base, derr, 1560 err_reg_descr, err_bit_descr)); 1561 1562 return (px_err_jbc_dmcint_odcd_handle(rpdip, csr_base, derr, 1563 err_reg_descr, err_bit_descr)); 1564 } 1565 1566 /* JBC Dmcint IDC */ 1567 PX_ERPT_SEND_DEC(jbc_idc) 1568 { 1569 char buf[FM_MAX_CLASS]; 1570 boolean_t pri = PX_ERR_IS_PRI(bit); 1571 1572 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1573 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1574 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1575 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1576 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1577 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1578 FIRE_JBC_IE, DATA_TYPE_UINT64, 1579 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1580 FIRE_JBC_IS, DATA_TYPE_UINT64, 1581 ss_reg, 1582 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1583 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1584 FIRE_JBC_DMC_IDC, DATA_TYPE_UINT64, 1585 CSR_XR(csr_base, DMCINT_IDC_ERROR_LOG), 1586 NULL); 1587 1588 return (PX_NO_PANIC); 1589 } 1590 1591 /* JBC CSR */ 1592 PX_ERPT_SEND_DEC(jbc_csr) 1593 { 1594 char buf[FM_MAX_CLASS]; 1595 boolean_t pri = PX_ERR_IS_PRI(bit); 1596 1597 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1598 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1599 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1600 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1601 FIRE_JBC_ELE, DATA_TYPE_UINT64, 1602 CSR_XR(csr_base, JBC_ERROR_LOG_ENABLE), 1603 FIRE_JBC_IE, DATA_TYPE_UINT64, 1604 CSR_XR(csr_base, JBC_INTERRUPT_ENABLE), 1605 FIRE_JBC_IS, DATA_TYPE_UINT64, 1606 ss_reg, 1607 FIRE_JBC_ESS, DATA_TYPE_UINT64, 1608 CSR_XR(csr_base, JBC_ERROR_STATUS_SET), 1609 "jbc-error-reg", DATA_TYPE_UINT64, 1610 CSR_XR(csr_base, CSR_ERROR_LOG), 1611 NULL); 1612 1613 return (PX_NO_PANIC); 1614 } 1615 1616 /* DMC IMU RDS */ 1617 PX_ERPT_SEND_DEC(imu_rds) 1618 { 1619 char buf[FM_MAX_CLASS]; 1620 boolean_t pri = PX_ERR_IS_PRI(bit); 1621 1622 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1623 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1624 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1625 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1626 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1627 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1628 FIRE_IMU_IE, DATA_TYPE_UINT64, 1629 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1630 FIRE_IMU_IS, DATA_TYPE_UINT64, 1631 ss_reg, 1632 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1633 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1634 FIRE_IMU_RDS, DATA_TYPE_UINT64, 1635 CSR_XR(csr_base, IMU_RDS_ERROR_LOG), 1636 NULL); 1637 1638 return (PX_NO_PANIC); 1639 } 1640 1641 /* handle EQ overflow */ 1642 /* ARGSUSED */ 1643 int 1644 px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base, 1645 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1646 px_err_bit_desc_t *err_bit_descr) 1647 { 1648 px_t *px_p = DIP_TO_STATE(rpdip); 1649 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1650 int err = px_err_check_eq(rpdip); 1651 1652 if ((err == PX_PANIC) && (pxu_p->cpr_flag == PX_NOT_CPR)) { 1653 return (px_err_panic_handle(rpdip, csr_base, derr, 1654 err_reg_descr, err_bit_descr)); 1655 } else { 1656 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1657 err_reg_descr, err_bit_descr)); 1658 } 1659 } 1660 1661 /* DMC IMU SCS */ 1662 PX_ERPT_SEND_DEC(imu_scs) 1663 { 1664 char buf[FM_MAX_CLASS]; 1665 boolean_t pri = PX_ERR_IS_PRI(bit); 1666 1667 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1668 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1669 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1670 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1671 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1672 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1673 FIRE_IMU_IE, DATA_TYPE_UINT64, 1674 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1675 FIRE_IMU_IS, DATA_TYPE_UINT64, 1676 ss_reg, 1677 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1678 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1679 FIRE_IMU_SCS, DATA_TYPE_UINT64, 1680 CSR_XR(csr_base, IMU_SCS_ERROR_LOG), 1681 NULL); 1682 1683 return (PX_NO_PANIC); 1684 } 1685 1686 /* DMC IMU */ 1687 PX_ERPT_SEND_DEC(imu) 1688 { 1689 char buf[FM_MAX_CLASS]; 1690 boolean_t pri = PX_ERR_IS_PRI(bit); 1691 1692 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1693 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1694 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1695 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1696 FIRE_IMU_ELE, DATA_TYPE_UINT64, 1697 CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE), 1698 FIRE_IMU_IE, DATA_TYPE_UINT64, 1699 CSR_XR(csr_base, IMU_INTERRUPT_ENABLE), 1700 FIRE_IMU_IS, DATA_TYPE_UINT64, 1701 ss_reg, 1702 FIRE_IMU_ESS, DATA_TYPE_UINT64, 1703 CSR_XR(csr_base, IMU_ERROR_STATUS_SET), 1704 NULL); 1705 1706 return (PX_NO_PANIC); 1707 } 1708 1709 /* DMC MMU TFAR/TFSR */ 1710 PX_ERPT_SEND_DEC(mmu_tfar_tfsr) 1711 { 1712 char buf[FM_MAX_CLASS]; 1713 boolean_t pri = PX_ERR_IS_PRI(bit); 1714 px_t *px_p = DIP_TO_STATE(rpdip); 1715 pcie_req_id_t fault_bdf = PCIE_INVALID_BDF; 1716 uint16_t s_status = 0; 1717 1718 if (pri) { 1719 fault_bdf = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS) 1720 & (MMU_TRANSLATION_FAULT_STATUS_ID_MASK << 1721 MMU_TRANSLATION_FAULT_STATUS_ID); 1722 s_status = PCI_STAT_S_TARG_AB; 1723 1724 /* Only PIO Fault Addresses are valid, this is DMA */ 1725 (void) px_rp_en_q(px_p, fault_bdf, NULL, s_status); 1726 } 1727 1728 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1729 1730 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1731 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1732 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1733 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1734 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1735 FIRE_MMU_IE, DATA_TYPE_UINT64, 1736 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1737 FIRE_MMU_IS, DATA_TYPE_UINT64, 1738 ss_reg, 1739 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1740 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1741 FIRE_MMU_TFAR, DATA_TYPE_UINT64, 1742 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS), 1743 FIRE_MMU_TFSR, DATA_TYPE_UINT64, 1744 CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS), 1745 NULL); 1746 1747 return (PX_NO_PANIC); 1748 } 1749 1750 /* DMC MMU */ 1751 PX_ERPT_SEND_DEC(mmu) 1752 { 1753 char buf[FM_MAX_CLASS]; 1754 boolean_t pri = PX_ERR_IS_PRI(bit); 1755 1756 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1757 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1758 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1759 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1760 FIRE_MMU_ELE, DATA_TYPE_UINT64, 1761 CSR_XR(csr_base, MMU_ERROR_LOG_ENABLE), 1762 FIRE_MMU_IE, DATA_TYPE_UINT64, 1763 CSR_XR(csr_base, MMU_INTERRUPT_ENABLE), 1764 FIRE_MMU_IS, DATA_TYPE_UINT64, 1765 ss_reg, 1766 FIRE_MMU_ESS, DATA_TYPE_UINT64, 1767 CSR_XR(csr_base, MMU_ERROR_STATUS_SET), 1768 NULL); 1769 1770 return (PX_NO_PANIC); 1771 } 1772 1773 /* 1774 * IMU function to handle all Received but Not Enabled errors. 1775 * 1776 * These errors are due to transactions modes in which the PX driver was not 1777 * setup to be able to do. If possible, inform the driver that their DMA has 1778 * failed by marking their DMA handle as failed, but do not panic the system. 1779 * Most likely the address is not valid, as Fire wasn't setup to handle them in 1780 * the first place. 1781 * 1782 * These errors are not retryable, unless the PX mode has changed, otherwise the 1783 * same error will occur again. 1784 */ 1785 int 1786 px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, 1787 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1788 px_err_bit_desc_t *err_bit_descr) 1789 { 1790 pcie_req_id_t bdf; 1791 1792 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1793 goto done; 1794 1795 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1796 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, NULL, 1797 bdf); 1798 1799 done: 1800 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1801 err_bit_descr)); 1802 } 1803 1804 /* 1805 * IMU function to handle all invalid address errors. 1806 * 1807 * These errors are due to transactions in which the address is not recognized. 1808 * If possible, inform the driver that all DMAs have failed by marking their DMA 1809 * handles. Fire should not panic the system, it'll be up to the driver to 1810 * panic. The address logged is invalid. 1811 * 1812 * These errors are not retryable since retrying the same transaction with the 1813 * same invalid address will result in the same error. 1814 */ 1815 /* ARGSUSED */ 1816 int 1817 px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, 1818 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1819 px_err_bit_desc_t *err_bit_descr) 1820 { 1821 pcie_req_id_t bdf; 1822 1823 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1824 goto done; 1825 1826 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1827 (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, NULL, 1828 bdf); 1829 1830 done: 1831 return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, 1832 err_bit_descr)); 1833 } 1834 1835 /* 1836 * IMU function to handle normal transactions that encounter a parity error. 1837 * 1838 * These errors are due to transactions that enouter a parity error. If 1839 * possible, inform the driver that their DMA have failed and that they should 1840 * retry. If Fire is unable to contact the leaf driver, panic the system. 1841 * Otherwise, it'll be up to the device to determine is this is a panicable 1842 * error. 1843 */ 1844 /* ARGSUSED */ 1845 int 1846 px_err_mmu_parity_handle(dev_info_t *rpdip, caddr_t csr_base, 1847 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1848 px_err_bit_desc_t *err_bit_descr) 1849 { 1850 uint64_t mmu_tfa; 1851 pcie_req_id_t bdf; 1852 int status = PF_HDL_NOTFOUND; 1853 1854 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1855 goto done; 1856 1857 mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); 1858 bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); 1859 status = pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, 1860 (uint32_t)mmu_tfa, bdf); 1861 1862 done: 1863 if (status == PF_HDL_NOTFOUND) 1864 return (px_err_panic_handle(rpdip, csr_base, derr, 1865 err_reg_descr, err_bit_descr)); 1866 else 1867 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1868 err_reg_descr, err_bit_descr)); 1869 } 1870 1871 /* 1872 * wuc/ruc event - Mark the handle of the failed PIO access. Return "no_panic" 1873 */ 1874 /* ARGSUSED */ 1875 int 1876 px_err_wuc_ruc_handle(dev_info_t *rpdip, caddr_t csr_base, 1877 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1878 px_err_bit_desc_t *err_bit_descr) 1879 { 1880 px_t *px_p = DIP_TO_STATE(rpdip); 1881 pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; 1882 uint64_t data; 1883 pf_pcie_adv_err_regs_t adv_reg; 1884 int sts; 1885 1886 if (!PX_ERR_IS_PRI(err_bit_descr->bit)) 1887 goto done; 1888 1889 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); 1890 adv_reg.pcie_ue_hdr[0] = (uint32_t)(data >> 32); 1891 adv_reg.pcie_ue_hdr[1] = (uint32_t)(data & 0xFFFFFFFF); 1892 data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); 1893 adv_reg.pcie_ue_hdr[2] = (uint32_t)(data >> 32); 1894 adv_reg.pcie_ue_hdr[3] = (uint32_t)(data & 0xFFFFFFFF); 1895 1896 (void) pf_tlp_decode(PCIE_DIP2BUS(rpdip), &adv_reg); 1897 sts = pf_hdl_lookup(rpdip, derr->fme_ena, adv_reg.pcie_ue_tgt_trans, 1898 adv_reg.pcie_ue_tgt_addr, adv_reg.pcie_ue_tgt_bdf); 1899 done: 1900 if ((sts == PF_HDL_NOTFOUND) && (pxu_p->cpr_flag == PX_NOT_CPR)) 1901 return (px_err_protected_handle(rpdip, csr_base, derr, 1902 err_reg_descr, err_bit_descr)); 1903 1904 return (px_err_no_panic_handle(rpdip, csr_base, derr, 1905 err_reg_descr, err_bit_descr)); 1906 } 1907 1908 /* 1909 * TLU LUP event - if caused by power management activity, then it is expected. 1910 * In all other cases, it is an error. 1911 */ 1912 /* ARGSUSED */ 1913 int 1914 px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base, 1915 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1916 px_err_bit_desc_t *err_bit_descr) 1917 { 1918 px_t *px_p = DIP_TO_STATE(rpdip); 1919 1920 /* 1921 * power management code is currently the only segment that sets 1922 * px_lup_pending to indicate its expectation for a healthy LUP 1923 * event. For all other occasions, LUP event should be flaged as 1924 * error condition. 1925 */ 1926 return ((atomic_cas_32(&px_p->px_lup_pending, 1, 0) == 0) ? 1927 PX_NO_PANIC : PX_EXPECTED); 1928 } 1929 1930 /* 1931 * TLU LDN event - if caused by power management activity, then it is expected. 1932 * In all other cases, it is an error. 1933 */ 1934 /* ARGSUSED */ 1935 int 1936 px_err_tlu_ldn_handle(dev_info_t *rpdip, caddr_t csr_base, 1937 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1938 px_err_bit_desc_t *err_bit_descr) 1939 { 1940 px_t *px_p = DIP_TO_STATE(rpdip); 1941 return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_EXPECTED : 1942 PX_NO_PANIC); 1943 } 1944 1945 /* PEC ILU none - see io erpt doc, section 3.1 */ 1946 PX_ERPT_SEND_DEC(pec_ilu) 1947 { 1948 char buf[FM_MAX_CLASS]; 1949 boolean_t pri = PX_ERR_IS_PRI(bit); 1950 1951 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 1952 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 1953 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 1954 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 1955 FIRE_ILU_ELE, DATA_TYPE_UINT64, 1956 CSR_XR(csr_base, ILU_ERROR_LOG_ENABLE), 1957 FIRE_ILU_IE, DATA_TYPE_UINT64, 1958 CSR_XR(csr_base, ILU_INTERRUPT_ENABLE), 1959 FIRE_ILU_IS, DATA_TYPE_UINT64, 1960 ss_reg, 1961 FIRE_ILU_ESS, DATA_TYPE_UINT64, 1962 CSR_XR(csr_base, ILU_ERROR_STATUS_SET), 1963 NULL); 1964 1965 return (PX_NO_PANIC); 1966 } 1967 1968 /* PCIEX UE Errors */ 1969 /* ARGSUSED */ 1970 int 1971 px_err_pciex_ue_handle(dev_info_t *rpdip, caddr_t csr_base, 1972 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 1973 px_err_bit_desc_t *err_bit_descr) 1974 { 1975 px_err_pcie_t regs = {0}; 1976 uint32_t err_bit; 1977 int err; 1978 uint64_t log; 1979 1980 if (err_bit_descr->bit < 32) { 1981 err_bit = (uint32_t)BITMASK(err_bit_descr->bit); 1982 regs.ue_reg = err_bit; 1983 regs.primary_ue = err_bit; 1984 1985 /* 1986 * Log the Received Log for PTLP, UR and UC. 1987 */ 1988 if ((PCIE_AER_UCE_PTLP | PCIE_AER_UCE_UR | PCIE_AER_UCE_UC) & 1989 err_bit) { 1990 log = CSR_XR(csr_base, 1991 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG); 1992 regs.rx_hdr1 = (uint32_t)(log >> 32); 1993 regs.rx_hdr2 = (uint32_t)(log & 0xFFFFFFFF); 1994 1995 log = CSR_XR(csr_base, 1996 TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG); 1997 regs.rx_hdr3 = (uint32_t)(log >> 32); 1998 regs.rx_hdr4 = (uint32_t)(log & 0xFFFFFFFF); 1999 } 2000 } else { 2001 regs.ue_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); 2002 } 2003 2004 err = px_err_check_pcie(rpdip, derr, ®s, PF_INTR_TYPE_INTERNAL); 2005 2006 if (err & PX_PANIC) { 2007 return (px_err_panic_handle(rpdip, csr_base, derr, 2008 err_reg_descr, err_bit_descr)); 2009 } else { 2010 return (px_err_no_panic_handle(rpdip, csr_base, derr, 2011 err_reg_descr, err_bit_descr)); 2012 } 2013 } 2014 2015 /* PCI-E Uncorrectable Errors */ 2016 PX_ERPT_SEND_DEC(pciex_rx_ue) 2017 { 2018 char buf[FM_MAX_CLASS]; 2019 boolean_t pri = PX_ERR_IS_PRI(bit); 2020 2021 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2022 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2023 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2024 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2025 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2026 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2027 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2028 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2029 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2030 ss_reg, 2031 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2032 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2033 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2034 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 2035 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2036 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 2037 NULL); 2038 2039 return (PX_NO_PANIC); 2040 } 2041 2042 /* PCI-E Uncorrectable Errors */ 2043 PX_ERPT_SEND_DEC(pciex_tx_ue) 2044 { 2045 char buf[FM_MAX_CLASS]; 2046 boolean_t pri = PX_ERR_IS_PRI(bit); 2047 2048 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2049 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2050 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2051 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2052 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2053 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2054 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2055 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2056 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2057 ss_reg, 2058 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2059 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2060 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 2061 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 2062 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 2063 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 2064 NULL); 2065 2066 return (PX_NO_PANIC); 2067 } 2068 2069 /* PCI-E Uncorrectable Errors */ 2070 PX_ERPT_SEND_DEC(pciex_rx_tx_ue) 2071 { 2072 char buf[FM_MAX_CLASS]; 2073 boolean_t pri = PX_ERR_IS_PRI(bit); 2074 2075 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2076 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2077 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2078 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2079 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2080 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2081 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2082 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2083 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2084 ss_reg, 2085 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2086 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2087 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2088 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG), 2089 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2090 CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), 2091 FIRE_TLU_TUEH1L, DATA_TYPE_UINT64, 2092 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG), 2093 FIRE_TLU_TUEH2L, DATA_TYPE_UINT64, 2094 CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), 2095 NULL); 2096 2097 return (PX_NO_PANIC); 2098 } 2099 2100 /* PCI-E Uncorrectable Errors */ 2101 PX_ERPT_SEND_DEC(pciex_ue) 2102 { 2103 char buf[FM_MAX_CLASS]; 2104 boolean_t pri = PX_ERR_IS_PRI(bit); 2105 2106 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2107 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2108 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2109 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2110 FIRE_TLU_UELE, DATA_TYPE_UINT64, 2111 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_LOG_ENABLE), 2112 FIRE_TLU_UIE, DATA_TYPE_UINT64, 2113 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_INTERRUPT_ENABLE), 2114 FIRE_TLU_UIS, DATA_TYPE_UINT64, 2115 ss_reg, 2116 FIRE_TLU_UESS, DATA_TYPE_UINT64, 2117 CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), 2118 NULL); 2119 2120 return (PX_NO_PANIC); 2121 } 2122 2123 /* PCIEX UE Errors */ 2124 /* ARGSUSED */ 2125 int 2126 px_err_pciex_ce_handle(dev_info_t *rpdip, caddr_t csr_base, 2127 ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, 2128 px_err_bit_desc_t *err_bit_descr) 2129 { 2130 px_err_pcie_t regs = {0}; 2131 int err; 2132 2133 if (err_bit_descr->bit < 32) 2134 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit); 2135 else 2136 regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); 2137 2138 err = px_err_check_pcie(rpdip, derr, ®s, PF_INTR_TYPE_INTERNAL); 2139 2140 if (err & PX_PANIC) { 2141 return (px_err_panic_handle(rpdip, csr_base, derr, 2142 err_reg_descr, err_bit_descr)); 2143 } else { 2144 return (px_err_no_panic_handle(rpdip, csr_base, derr, 2145 err_reg_descr, err_bit_descr)); 2146 } 2147 } 2148 2149 /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ 2150 PX_ERPT_SEND_DEC(pciex_ce) 2151 { 2152 char buf[FM_MAX_CLASS]; 2153 boolean_t pri = PX_ERR_IS_PRI(bit); 2154 2155 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2156 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2157 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2158 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2159 FIRE_TLU_CELE, DATA_TYPE_UINT64, 2160 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_LOG_ENABLE), 2161 FIRE_TLU_CIE, DATA_TYPE_UINT64, 2162 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_INTERRUPT_ENABLE), 2163 FIRE_TLU_CIS, DATA_TYPE_UINT64, 2164 ss_reg, 2165 FIRE_TLU_CESS, DATA_TYPE_UINT64, 2166 CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_STATUS_SET), 2167 NULL); 2168 2169 return (PX_NO_PANIC); 2170 } 2171 2172 /* TLU Other Event Status (receive only) - see io erpt doc, section 3.7 */ 2173 PX_ERPT_SEND_DEC(pciex_rx_oe) 2174 { 2175 char buf[FM_MAX_CLASS]; 2176 boolean_t pri = PX_ERR_IS_PRI(bit); 2177 2178 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2179 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2180 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2181 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2182 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2183 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2184 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2185 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2186 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2187 ss_reg, 2188 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2189 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2190 FIRE_TLU_RUEH1L, DATA_TYPE_UINT64, 2191 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG), 2192 FIRE_TLU_RUEH2L, DATA_TYPE_UINT64, 2193 CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), 2194 NULL); 2195 2196 return (PX_NO_PANIC); 2197 } 2198 2199 /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ 2200 PX_ERPT_SEND_DEC(pciex_rx_tx_oe) 2201 { 2202 char buf[FM_MAX_CLASS]; 2203 boolean_t pri = PX_ERR_IS_PRI(bit); 2204 px_t *px_p = DIP_TO_STATE(rpdip); 2205 uint64_t rx_h1, rx_h2, tx_h1, tx_h2; 2206 uint16_t s_status; 2207 int sts; 2208 pcie_cpl_t *cpl; 2209 pf_pcie_adv_err_regs_t adv_reg; 2210 2211 rx_h1 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG); 2212 rx_h2 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG); 2213 tx_h1 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); 2214 tx_h2 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); 2215 2216 if ((bit == TLU_OTHER_EVENT_STATUS_SET_RUC_P) || 2217 (bit == TLU_OTHER_EVENT_STATUS_SET_WUC_P)) { 2218 adv_reg.pcie_ue_hdr[0] = (uint32_t)(rx_h1 >> 32); 2219 adv_reg.pcie_ue_hdr[1] = (uint32_t)rx_h1; 2220 adv_reg.pcie_ue_hdr[2] = (uint32_t)(rx_h2 >> 32); 2221 adv_reg.pcie_ue_hdr[3] = (uint32_t)rx_h2; 2222 2223 /* get completer bdf (fault bdf) from rx logs */ 2224 cpl = (pcie_cpl_t *)&adv_reg.pcie_ue_hdr[1]; 2225 2226 /* Figure out if UR/CA from rx logs */ 2227 if (cpl->status == PCIE_CPL_STS_UR) 2228 s_status = PCI_STAT_R_MAST_AB; 2229 else if (cpl->status == PCIE_CPL_STS_CA) 2230 s_status = PCI_STAT_R_TARG_AB; 2231 2232 adv_reg.pcie_ue_hdr[0] = (uint32_t)(tx_h1 >> 32); 2233 adv_reg.pcie_ue_hdr[1] = (uint32_t)tx_h1; 2234 adv_reg.pcie_ue_hdr[2] = (uint32_t)(tx_h2 >> 32); 2235 adv_reg.pcie_ue_hdr[3] = (uint32_t)tx_h2; 2236 2237 /* get fault addr from tx logs */ 2238 sts = pf_tlp_decode(PCIE_DIP2BUS(rpdip), &adv_reg); 2239 2240 if (sts == DDI_SUCCESS) 2241 (void) px_rp_en_q(px_p, adv_reg.pcie_ue_tgt_bdf, 2242 adv_reg.pcie_ue_tgt_addr, s_status); 2243 } 2244 2245 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2246 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2247 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2248 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2249 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2250 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2251 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2252 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2253 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2254 ss_reg, 2255 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2256 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2257 FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, rx_h1, 2258 FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, rx_h2, 2259 FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, tx_h1, 2260 FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, tx_h2, 2261 NULL); 2262 2263 return (PX_NO_PANIC); 2264 } 2265 2266 /* TLU Other Event - see io erpt doc, section 3.9 */ 2267 PX_ERPT_SEND_DEC(pciex_oe) 2268 { 2269 char buf[FM_MAX_CLASS]; 2270 boolean_t pri = PX_ERR_IS_PRI(bit); 2271 2272 (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); 2273 ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, 2274 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 2275 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, pri, 2276 FIRE_TLU_OEELE, DATA_TYPE_UINT64, 2277 CSR_XR(csr_base, TLU_OTHER_EVENT_LOG_ENABLE), 2278 FIRE_TLU_OEIE, DATA_TYPE_UINT64, 2279 CSR_XR(csr_base, TLU_OTHER_EVENT_INTERRUPT_ENABLE), 2280 FIRE_TLU_OEIS, DATA_TYPE_UINT64, 2281 ss_reg, 2282 FIRE_TLU_OEESS, DATA_TYPE_UINT64, 2283 CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), 2284 NULL); 2285 2286 return (PX_NO_PANIC); 2287 } 2288