1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * sun4v Fire Error Handling 28 */ 29 30 #include <sys/types.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/sunndi.h> 34 #include <sys/fm/protocol.h> 35 #include <sys/fm/util.h> 36 #include <sys/membar.h> 37 #include "px_obj.h" 38 #include "px_err.h" 39 40 static void px_err_fill_pf_data(dev_info_t *dip, px_t *px_p, px_rc_err_t *epkt); 41 static uint_t px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt); 42 static int px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, 43 px_rc_err_t *epkt, int caller); 44 45 static void px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, 46 boolean_t is_block_pci, char *msg); 47 static void px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt, 48 boolean_t is_block_pci, int err, ddi_fm_error_t *derr, 49 boolean_t is_valid_epkt); 50 static int px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 51 px_rc_err_t *epkt); 52 static int px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 53 px_rc_err_t *epkt); 54 static int px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 55 px_rc_err_t *epkt); 56 static int px_port_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 57 px_rc_err_t *epkt); 58 static int px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 59 px_rc_err_t *epkt); 60 static int px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, 61 px_rc_err_t *epkt); 62 static int px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, 63 px_rc_err_t *epkt); 64 static void px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, 65 px_rc_err_t *epkt); 66 static int px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, 67 px_rc_err_t *epkt); 68 69 /* Include the code generated sun4v epkt checking code */ 70 #include "px_err_gen.c" 71 72 /* 73 * This variable indicates if we have a hypervisor that could potentially send 74 * incorrect epkts. We always set this to TRUE for now until we find a way to 75 * tell if this HV bug has been fixed. 76 */ 77 boolean_t px_legacy_epkt = B_TRUE; 78 79 /* 80 * px_err_cb_intr: 81 * Interrupt handler for the Host Bus Block. 82 */ 83 uint_t 84 px_err_cb_intr(caddr_t arg) 85 { 86 px_fault_t *fault_p = (px_fault_t *)arg; 87 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 88 89 if (epkt != NULL) { 90 return (px_err_intr(fault_p, epkt)); 91 } 92 93 return (DDI_INTR_UNCLAIMED); 94 } 95 96 /* 97 * px_err_dmc_pec_intr: 98 * Interrupt handler for the DMC/PEC block. 99 */ 100 uint_t 101 px_err_dmc_pec_intr(caddr_t arg) 102 { 103 px_fault_t *fault_p = (px_fault_t *)arg; 104 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 105 106 if (epkt != NULL) { 107 return (px_err_intr(fault_p, epkt)); 108 } 109 110 return (DDI_INTR_UNCLAIMED); 111 } 112 113 /* 114 * px_err_cmn_intr: 115 * Common function called by trap, mondo and fabric intr. 116 * This function is more meaningful in sun4u implementation. Kept 117 * to mirror sun4u call stack. 118 * o check for safe access 119 * o create and queue RC info for later use in fabric scan. 120 * o RUC/WUC, PTLP, MMU Errors(CA), UR 121 * 122 * @param px_p leaf in which to check access 123 * @param derr fm err data structure to be updated 124 * @param caller PX_TRAP_CALL | PX_INTR_CALL 125 * @param chkjbc whether to handle hostbus registers (ignored) 126 * @return err PX_NO_PANIC | PX_PROTECTED | 127 * PX_PANIC | PX_HW_RESET | PX_EXPECTED 128 */ 129 /* ARGSUSED */ 130 int 131 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) 132 { 133 px_err_safeacc_check(px_p, derr); 134 return (DDI_FM_OK); 135 } 136 137 /* 138 * fills RC specific fault data 139 */ 140 static void 141 px_err_fill_pfd(dev_info_t *dip, px_t *px_p, px_rc_err_t *epkt) { 142 pf_pcie_adv_err_regs_t adv_reg; 143 int sts = DDI_SUCCESS; 144 pcie_req_id_t fault_bdf = PCIE_INVALID_BDF; 145 uint64_t fault_addr = 0; 146 uint16_t s_status = 0; 147 148 /* Add an PCIE PF_DATA Entry */ 149 if (epkt->rc_descr.block == BLOCK_MMU) { 150 /* Only PIO Fault Addresses are valid, this is DMA */ 151 s_status = PCI_STAT_S_TARG_AB; 152 fault_addr = NULL; 153 154 if (epkt->rc_descr.H) 155 fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16); 156 else 157 sts = DDI_FAILURE; 158 } else { 159 px_pec_err_t *pec_p = (px_pec_err_t *)epkt; 160 uint32_t dir = pec_p->pec_descr.dir; 161 162 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0]); 163 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0] >> 32); 164 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1]); 165 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1] >> 32); 166 167 /* translate RC UR/CA to legacy secondary errors */ 168 if ((dir == DIR_READ || dir == DIR_WRITE) && 169 pec_p->pec_descr.U) { 170 if (pec_p->ue_reg_status & PCIE_AER_UCE_UR) 171 s_status |= PCI_STAT_R_MAST_AB; 172 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) 173 s_status |= PCI_STAT_R_TARG_AB; 174 } 175 176 if (pec_p->ue_reg_status & PCIE_AER_UCE_PTLP) 177 s_status |= PCI_STAT_PERROR; 178 179 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) 180 s_status |= PCI_STAT_S_TARG_AB; 181 182 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 183 fault_bdf = adv_reg.pcie_ue_tgt_bdf; 184 fault_addr = adv_reg.pcie_ue_tgt_addr; 185 } 186 187 if (sts == DDI_SUCCESS) 188 px_rp_en_q(px_p, fault_bdf, fault_addr, s_status); 189 } 190 191 /* 192 * px_err_intr: 193 * Interrupt handler for the JBC/DMC/PEC block. 194 * o lock 195 * o create derr 196 * o check safe access 197 * o px_err_check_severity(epkt) 198 * o pcie_scan_fabric 199 * o Idle intr state 200 * o unlock 201 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 202 */ 203 static uint_t 204 px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt) 205 { 206 px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip); 207 dev_info_t *rpdip = px_p->px_dip; 208 int rc_err, fab_err, msg; 209 ddi_fm_error_t derr; 210 211 if (px_fm_enter(px_p) != DDI_SUCCESS) 212 goto done; 213 214 /* Create the derr */ 215 bzero(&derr, sizeof (ddi_fm_error_t)); 216 derr.fme_version = DDI_FME_VERSION; 217 derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1); 218 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 219 220 /* Basically check for safe access */ 221 (void) px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_ALL); 222 223 /* Check the severity of this error */ 224 rc_err = px_err_epkt_severity(px_p, &derr, epkt, PX_INTR_CALL); 225 226 /* Scan the fabric if the root port is not in drain state. */ 227 fab_err = px_scan_fabric(px_p, rpdip, &derr); 228 229 /* Set the intr state to idle for the leaf that received the mondo */ 230 if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino, 231 INTR_IDLE_STATE) != DDI_SUCCESS) { 232 px_fm_exit(px_p); 233 return (DDI_INTR_UNCLAIMED); 234 } 235 236 switch (epkt->rc_descr.block) { 237 case BLOCK_MMU: /* FALLTHROUGH */ 238 case BLOCK_INTR: 239 msg = PX_RC; 240 break; 241 case BLOCK_PCIE: 242 msg = PX_RP; 243 break; 244 case BLOCK_HOSTBUS: /* FALLTHROUGH */ 245 default: 246 msg = PX_HB; 247 break; 248 } 249 250 px_err_panic(rc_err, msg, fab_err, B_TRUE); 251 px_fm_exit(px_p); 252 px_err_panic(rc_err, msg, fab_err, B_FALSE); 253 254 done: 255 return (DDI_INTR_CLAIMED); 256 } 257 258 /* 259 * px_err_epkt_severity: 260 * Check the severity of the fire error based the epkt received 261 * 262 * @param px_p leaf in which to take the snap shot. 263 * @param derr fm err in which the ereport is to be based on 264 * @param epkt epkt recevied from HV 265 */ 266 static int 267 px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, 268 int caller) 269 { 270 px_pec_t *pec_p = px_p->px_pec_p; 271 dev_info_t *dip = px_p->px_dip; 272 boolean_t is_safeacc = B_FALSE; 273 boolean_t is_block_pci = B_FALSE; 274 boolean_t is_valid_epkt = B_FALSE; 275 int err = 0; 276 277 /* Cautious access error handling */ 278 switch (derr->fme_flag) { 279 case DDI_FM_ERR_EXPECTED: 280 if (caller == PX_TRAP_CALL) { 281 /* 282 * for ddi_caut_get treat all events as nonfatal 283 * The trampoline will set err_ena = 0, 284 * err_status = NONFATAL. 285 */ 286 derr->fme_status = DDI_FM_NONFATAL; 287 is_safeacc = B_TRUE; 288 } else { 289 /* 290 * For ddi_caut_put treat all events as nonfatal. Here 291 * we have the handle and can call ndi_fm_acc_err_set(). 292 */ 293 derr->fme_status = DDI_FM_NONFATAL; 294 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 295 is_safeacc = B_TRUE; 296 } 297 break; 298 case DDI_FM_ERR_PEEK: 299 case DDI_FM_ERR_POKE: 300 /* 301 * For ddi_peek/poke treat all events as nonfatal. 302 */ 303 is_safeacc = B_TRUE; 304 break; 305 default: 306 is_safeacc = B_FALSE; 307 } 308 309 /* 310 * Older hypervisors in some cases send epkts with incorrect fields. 311 * We have to handle these "special" epkts correctly. 312 */ 313 if (px_legacy_epkt) 314 px_fix_legacy_epkt(dip, derr, epkt); 315 316 switch (epkt->rc_descr.block) { 317 case BLOCK_HOSTBUS: 318 err = px_cb_epkt_severity(dip, derr, epkt); 319 break; 320 case BLOCK_MMU: 321 err = px_mmu_epkt_severity(dip, derr, epkt); 322 px_err_fill_pfd(dip, px_p, epkt); 323 break; 324 case BLOCK_INTR: 325 err = px_intr_epkt_severity(dip, derr, epkt); 326 break; 327 case BLOCK_PORT: 328 err = px_port_epkt_severity(dip, derr, epkt); 329 break; 330 case BLOCK_PCIE: 331 is_block_pci = B_TRUE; 332 err = px_pcie_epkt_severity(dip, derr, epkt); 333 px_err_fill_pfd(dip, px_p, epkt); 334 break; 335 default: 336 err = 0; 337 } 338 339 if ((err & PX_HW_RESET) || (err & PX_PANIC)) { 340 if (px_log & PX_PANIC) 341 px_err_log_handle(dip, epkt, is_block_pci, "PANIC"); 342 is_valid_epkt = B_TRUE; 343 } else if (err & PX_PROTECTED) { 344 if (px_log & PX_PROTECTED) 345 px_err_log_handle(dip, epkt, is_block_pci, "PROTECTED"); 346 is_valid_epkt = B_TRUE; 347 } else if (err & PX_NO_PANIC) { 348 if (px_log & PX_NO_PANIC) 349 px_err_log_handle(dip, epkt, is_block_pci, "NO PANIC"); 350 is_valid_epkt = B_TRUE; 351 } else if (err & PX_NO_ERROR) { 352 if (px_log & PX_NO_ERROR) 353 px_err_log_handle(dip, epkt, is_block_pci, "NO ERROR"); 354 is_valid_epkt = B_TRUE; 355 } else if (err == 0) { 356 px_err_log_handle(dip, epkt, is_block_pci, "UNRECOGNIZED"); 357 is_valid_epkt = B_FALSE; 358 359 /* Panic on a unrecognized epkt */ 360 err = PX_PANIC; 361 } 362 363 px_err_send_epkt_erpt(dip, epkt, is_block_pci, err, derr, 364 is_valid_epkt); 365 366 /* Readjust the severity as a result of safe access */ 367 if (is_safeacc && !(err & PX_PANIC) && !(px_die & PX_PROTECTED)) 368 err = PX_NO_PANIC; 369 370 return (err); 371 } 372 373 static void 374 px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt, 375 boolean_t is_block_pci, int err, ddi_fm_error_t *derr, 376 boolean_t is_valid_epkt) 377 { 378 char buf[FM_MAX_CLASS], descr_buf[1024]; 379 380 /* send ereport for debug purposes */ 381 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_RC_UNRECOG); 382 383 if (is_block_pci) { 384 px_pec_err_t *pec = (px_pec_err_t *)epkt; 385 (void) snprintf(descr_buf, sizeof (descr_buf), 386 "%s Epkt contents:\n" 387 "Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n" 388 "I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n" 389 "PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n" 390 "CE Status Reg: 0x%x, UE Status Reg: 0x%x\n" 391 "HDR1: 0x%lx, HDR2: 0x%lx\n" 392 "Err Src Reg: 0x%x, Root Err Status: 0x%x\n" 393 "Err Severity: 0x%x\n", 394 is_valid_epkt ? "Valid" : "Invalid", 395 pec->pec_descr.block, pec->pec_descr.dir, 396 pec->pec_descr.Z, pec->pec_descr.S, 397 pec->pec_descr.R, pec->pec_descr.I, 398 pec->pec_descr.H, pec->pec_descr.C, 399 pec->pec_descr.U, pec->pec_descr.E, 400 pec->pec_descr.P, pec->pci_err_status, 401 pec->pcie_err_status, pec->ce_reg_status, 402 pec->ue_reg_status, pec->hdr[0], 403 pec->hdr[1], pec->err_src_reg, 404 pec->root_err_status, err); 405 406 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 407 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 408 EPKT_SYSINO, DATA_TYPE_UINT64, 409 is_valid_epkt ? pec->sysino : 0, 410 EPKT_EHDL, DATA_TYPE_UINT64, 411 is_valid_epkt ? pec->ehdl : 0, 412 EPKT_STICK, DATA_TYPE_UINT64, 413 is_valid_epkt ? pec->stick : 0, 414 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)pec)[3], 415 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)pec)[4], 416 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)pec)[5], 417 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)pec)[6], 418 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)pec)[7], 419 EPKT_PEC_DESCR, DATA_TYPE_STRING, descr_buf); 420 } else { 421 (void) snprintf(descr_buf, sizeof (descr_buf), 422 "%s Epkt contents:\n" 423 "Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" 424 "Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n" 425 "M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n" 426 "Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n" 427 "Err Severity: 0x%x\n", 428 is_valid_epkt ? "Valid" : "Invalid", 429 epkt->rc_descr.block, epkt->rc_descr.op, 430 epkt->rc_descr.phase, epkt->rc_descr.cond, 431 epkt->rc_descr.dir, epkt->rc_descr.STOP, 432 epkt->rc_descr.H, epkt->rc_descr.R, 433 epkt->rc_descr.D, epkt->rc_descr.M, 434 epkt->rc_descr.S, epkt->size, epkt->addr, 435 epkt->hdr[0], epkt->hdr[1], epkt->reserved, 436 err); 437 438 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 439 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 440 EPKT_SYSINO, DATA_TYPE_UINT64, 441 is_valid_epkt ? epkt->sysino : 0, 442 EPKT_EHDL, DATA_TYPE_UINT64, 443 is_valid_epkt ? epkt->ehdl : 0, 444 EPKT_STICK, DATA_TYPE_UINT64, 445 is_valid_epkt ? epkt->stick : 0, 446 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)epkt)[3], 447 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)epkt)[4], 448 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)epkt)[5], 449 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)epkt)[6], 450 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)epkt)[7], 451 EPKT_RC_DESCR, DATA_TYPE_STRING, descr_buf); 452 } 453 } 454 455 static void 456 px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, boolean_t is_block_pci, 457 char *msg) 458 { 459 if (is_block_pci) { 460 px_pec_err_t *pec = (px_pec_err_t *)epkt; 461 DBG(DBG_ERR_INTR, dip, 462 "A PCIe root port error has occured with a severity" 463 " \"%s\"\n" 464 "\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n" 465 "\tH=%d, C=%d, U=%d, E=%d, P=%d\n" 466 "\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n" 467 "\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n" 468 "\terr_src: 0x%x, root_err: 0x%x\n", 469 msg, pec->pec_descr.block, pec->pec_descr.dir, 470 pec->pec_descr.Z, pec->pec_descr.S, pec->pec_descr.R, 471 pec->pec_descr.I, pec->pec_descr.H, pec->pec_descr.C, 472 pec->pec_descr.U, pec->pec_descr.E, pec->pec_descr.P, 473 pec->pci_err_status, pec->pcie_err_status, 474 pec->ce_reg_status, pec->ue_reg_status, pec->hdr[0], 475 pec->hdr[1], pec->err_src_reg, pec->root_err_status); 476 } else { 477 DBG(DBG_ERR_INTR, dip, 478 "A PCIe root complex error has occured with a severity" 479 " \"%s\"\n" 480 "\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" 481 "\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n" 482 "\tS=%d, Size: 0x%x, Addr: 0x%p\n" 483 "\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n", 484 msg, epkt->rc_descr.block, epkt->rc_descr.op, 485 epkt->rc_descr.phase, epkt->rc_descr.cond, 486 epkt->rc_descr.dir, epkt->rc_descr.STOP, epkt->rc_descr.H, 487 epkt->rc_descr.R, epkt->rc_descr.D, epkt->rc_descr.M, 488 epkt->rc_descr.S, epkt->size, epkt->addr, epkt->hdr[0], 489 epkt->hdr[1], epkt->reserved); 490 } 491 } 492 493 /* ARGSUSED */ 494 static void 495 px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 496 { 497 /* 498 * We don't have a default case for any of the below switch statements 499 * since we are ok with the code falling through. 500 */ 501 switch (epkt->rc_descr.block) { 502 case BLOCK_HOSTBUS: 503 switch (epkt->rc_descr.op) { 504 case OP_DMA: 505 switch (epkt->rc_descr.phase) { 506 case PH_UNKNOWN: 507 switch (epkt->rc_descr.cond) { 508 case CND_UNKNOWN: 509 switch (epkt->rc_descr.dir) { 510 case DIR_RESERVED: 511 epkt->rc_descr.dir = DIR_READ; 512 break; 513 } /* DIR */ 514 } /* CND */ 515 } /* PH */ 516 } /* OP */ 517 break; 518 case BLOCK_MMU: 519 switch (epkt->rc_descr.op) { 520 case OP_XLAT: 521 switch (epkt->rc_descr.phase) { 522 case PH_DATA: 523 switch (epkt->rc_descr.cond) { 524 case CND_PROT: 525 switch (epkt->rc_descr.dir) { 526 case DIR_UNKNOWN: 527 epkt->rc_descr.dir = DIR_WRITE; 528 break; 529 } /* DIR */ 530 } /* CND */ 531 break; 532 case PH_IRR: 533 switch (epkt->rc_descr.cond) { 534 case CND_RESERVED: 535 switch (epkt->rc_descr.dir) { 536 case DIR_IRR: 537 epkt->rc_descr.phase = PH_ADDR; 538 epkt->rc_descr.cond = CND_IRR; 539 } /* DIR */ 540 } /* CND */ 541 } /* PH */ 542 } /* OP */ 543 break; 544 case BLOCK_INTR: 545 switch (epkt->rc_descr.op) { 546 case OP_MSIQ: 547 switch (epkt->rc_descr.phase) { 548 case PH_UNKNOWN: 549 switch (epkt->rc_descr.cond) { 550 case CND_ILL: 551 switch (epkt->rc_descr.dir) { 552 case DIR_RESERVED: 553 epkt->rc_descr.dir = DIR_IRR; 554 break; 555 } /* DIR */ 556 break; 557 case CND_IRR: 558 switch (epkt->rc_descr.dir) { 559 case DIR_IRR: 560 epkt->rc_descr.cond = CND_OV; 561 break; 562 } /* DIR */ 563 } /* CND */ 564 } /* PH */ 565 break; 566 case OP_RESERVED: 567 switch (epkt->rc_descr.phase) { 568 case PH_UNKNOWN: 569 switch (epkt->rc_descr.cond) { 570 case CND_ILL: 571 switch (epkt->rc_descr.dir) { 572 case DIR_IRR: 573 epkt->rc_descr.op = OP_MSI32; 574 epkt->rc_descr.phase = PH_DATA; 575 break; 576 } /* DIR */ 577 } /* CND */ 578 break; 579 case PH_DATA: 580 switch (epkt->rc_descr.cond) { 581 case CND_INT: 582 switch (epkt->rc_descr.dir) { 583 case DIR_UNKNOWN: 584 epkt->rc_descr.op = OP_MSI32; 585 break; 586 } /* DIR */ 587 } /* CND */ 588 } /* PH */ 589 } /* OP */ 590 } /* BLOCK */ 591 } 592 593 /* ARGSUSED */ 594 static int 595 px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 596 { 597 return (px_err_check_eq(dip)); 598 } 599 600 /* ARGSUSED */ 601 static int 602 px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 603 { 604 pf_pcie_adv_err_regs_t adv_reg; 605 uint16_t s_status; 606 int sts = PX_PANIC; 607 608 /* 609 * Check for failed non-posted writes, which are errors that are not 610 * defined in the PCIe spec. If not return panic. 611 */ 612 if (!((epkt->rc_descr.op == OP_PIO) && 613 (epkt->rc_descr.phase == PH_IRR))) { 614 sts = (PX_PANIC); 615 goto done; 616 } 617 618 /* 619 * Gather the error logs, if they do not exist just return with no panic 620 * and let the fabric message take care of the error. 621 */ 622 if (!epkt->rc_descr.H) { 623 sts = (PX_NO_PANIC); 624 goto done; 625 } 626 627 adv_reg.pcie_ue_hdr[0] = (uint32_t)(epkt->hdr[0]); 628 adv_reg.pcie_ue_hdr[1] = (uint32_t)(epkt->hdr[0] >> 32); 629 adv_reg.pcie_ue_hdr[2] = (uint32_t)(epkt->hdr[1]); 630 adv_reg.pcie_ue_hdr[3] = (uint32_t)(epkt->hdr[1] >> 32); 631 632 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 633 634 if (epkt->rc_descr.M) 635 adv_reg.pcie_ue_tgt_addr = epkt->addr; 636 637 if (!((sts == DDI_SUCCESS) || (epkt->rc_descr.M))) { 638 /* Let the fabric message take care of error */ 639 sts = PX_NO_PANIC; 640 goto done; 641 } 642 643 /* See if the failed transaction belonged to a hardened driver */ 644 if (pf_hdl_lookup(dip, derr->fme_ena, 645 adv_reg.pcie_ue_tgt_trans, adv_reg.pcie_ue_tgt_addr, 646 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND) 647 sts = (PX_NO_PANIC); 648 else 649 sts = (PX_PANIC); 650 651 /* Add pfd to cause a fabric scan */ 652 switch (epkt->rc_descr.cond) { 653 case CND_RCA: 654 s_status = PCI_STAT_R_TARG_AB; 655 break; 656 case CND_RUR: 657 s_status = PCI_STAT_R_MAST_AB; 658 break; 659 } 660 px_rp_en_q(DIP_TO_STATE(dip), adv_reg.pcie_ue_tgt_bdf, 661 adv_reg.pcie_ue_tgt_addr, s_status); 662 663 done: 664 return (sts); 665 } 666 667 /* ARGSUSED */ 668 static int 669 px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 670 { 671 px_pec_err_t *pec_p = (px_pec_err_t *)epkt; 672 px_err_pcie_t *pcie = (px_err_pcie_t *)epkt; 673 pf_pcie_adv_err_regs_t adv_reg; 674 int sts; 675 uint32_t temp; 676 677 /* 678 * Check for failed PIO Read/Writes, which are errors that are not 679 * defined in the PCIe spec. 680 */ 681 temp = PCIE_AER_UCE_UR | PCIE_AER_UCE_CA; 682 if (((pec_p->pec_descr.dir == DIR_READ) || 683 (pec_p->pec_descr.dir == DIR_WRITE)) && 684 pec_p->pec_descr.U && (pec_p->ue_reg_status & temp)) { 685 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0]); 686 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0] >> 32); 687 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1]); 688 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1] >> 32); 689 690 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 691 692 if (sts == DDI_SUCCESS && 693 pf_hdl_lookup(dip, derr->fme_ena, 694 adv_reg.pcie_ue_tgt_trans, 695 adv_reg.pcie_ue_tgt_addr, 696 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND) 697 return (PX_NO_PANIC); 698 else 699 return (PX_PANIC); 700 } 701 702 if (!pec_p->pec_descr.C) 703 pec_p->ce_reg_status = 0; 704 if (!pec_p->pec_descr.U) 705 pec_p->ue_reg_status = 0; 706 if (!pec_p->pec_descr.H) 707 pec_p->hdr[0] = 0; 708 if (!pec_p->pec_descr.I) 709 pec_p->hdr[1] = 0; 710 711 /* 712 * According to the PCIe spec, there is a first error pointer. If there 713 * are header logs recorded and there are more than one error, the log 714 * will belong to the error that the first error pointer points to. 715 * 716 * The regs.primary_ue expects a bit number, go through the ue register 717 * and find the first error that occured. Because the sun4v epkt spec 718 * does not define this value, the algorithm below gives the lower bit 719 * priority. 720 */ 721 temp = pcie->ue_reg; 722 if (temp) { 723 int x; 724 for (x = 0; !(temp & 0x1); x++) { 725 temp = temp >> 1; 726 } 727 pcie->primary_ue = 1 << x; 728 } else { 729 pcie->primary_ue = 0; 730 } 731 732 /* Sun4v doesn't log the TX hdr except for CTOs */ 733 if (pcie->primary_ue == PCIE_AER_UCE_TO) { 734 pcie->tx_hdr1 = pcie->rx_hdr1; 735 pcie->tx_hdr2 = pcie->rx_hdr2; 736 pcie->tx_hdr3 = pcie->rx_hdr3; 737 pcie->tx_hdr4 = pcie->rx_hdr4; 738 pcie->rx_hdr1 = 0; 739 pcie->rx_hdr2 = 0; 740 pcie->rx_hdr3 = 0; 741 pcie->rx_hdr4 = 0; 742 } else { 743 pcie->tx_hdr1 = 0; 744 pcie->tx_hdr2 = 0; 745 pcie->tx_hdr3 = 0; 746 pcie->tx_hdr4 = 0; 747 } 748 749 return (px_err_check_pcie(dip, derr, pcie)); 750 } 751 752 static int 753 px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 754 { 755 uint64_t addr = (uint64_t)epkt->addr; 756 pcie_req_id_t bdf = PCIE_INVALID_BDF; 757 758 if (epkt->rc_descr.H) { 759 bdf = (uint32_t)((epkt->hdr[0] >> 16) && 0xFFFF); 760 } 761 762 return (pf_hdl_lookup(dip, derr->fme_ena, PF_ADDR_DMA, addr, 763 bdf)); 764 } 765