1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * sun4v Fire Error Handling 28 */ 29 30 #include <sys/types.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/sunndi.h> 34 #include <sys/fm/protocol.h> 35 #include <sys/fm/util.h> 36 #include <sys/membar.h> 37 #include "px_obj.h" 38 #include "px_err.h" 39 40 static void px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, 41 px_rc_err_t *epkt); 42 static uint_t px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt); 43 static int px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, 44 px_rc_err_t *epkt, pf_data_t *pfd_p); 45 46 static void px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, 47 boolean_t is_block_pci, char *msg); 48 static void px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt, 49 boolean_t is_block_pci, int err, ddi_fm_error_t *derr, 50 boolean_t is_valid_epkt); 51 static int px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 52 px_rc_err_t *epkt); 53 static int px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 54 px_rc_err_t *epkt); 55 static int px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 56 px_rc_err_t *epkt); 57 static int px_port_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 58 px_rc_err_t *epkt, pf_data_t *pfd_p); 59 static int px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 60 px_rc_err_t *epkt); 61 static int px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, 62 px_rc_err_t *epkt); 63 static int px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, 64 px_rc_err_t *epkt, pf_data_t *pfd_p); 65 static void px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, 66 px_rc_err_t *epkt); 67 static int px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, 68 px_rc_err_t *epkt); 69 70 /* Include the code generated sun4v epkt checking code */ 71 #include "px_err_gen.c" 72 73 /* 74 * This variable indicates if we have a hypervisor that could potentially send 75 * incorrect epkts. We always set this to TRUE for now until we find a way to 76 * tell if this HV bug has been fixed. 77 */ 78 boolean_t px_legacy_epkt = B_TRUE; 79 80 /* 81 * px_err_cb_intr: 82 * Interrupt handler for the Host Bus Block. 83 */ 84 uint_t 85 px_err_cb_intr(caddr_t arg) 86 { 87 px_fault_t *fault_p = (px_fault_t *)arg; 88 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 89 90 if (epkt != NULL) { 91 return (px_err_intr(fault_p, epkt)); 92 } 93 94 return (DDI_INTR_UNCLAIMED); 95 } 96 97 /* 98 * px_err_dmc_pec_intr: 99 * Interrupt handler for the DMC/PEC block. 100 */ 101 uint_t 102 px_err_dmc_pec_intr(caddr_t arg) 103 { 104 px_fault_t *fault_p = (px_fault_t *)arg; 105 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 106 107 if (epkt != NULL) { 108 return (px_err_intr(fault_p, epkt)); 109 } 110 111 return (DDI_INTR_UNCLAIMED); 112 } 113 114 /* 115 * px_err_cmn_intr: 116 * Common function called by trap, mondo and fabric intr. 117 * This function is more meaningful in sun4u implementation. Kept 118 * to mirror sun4u call stack. 119 * o check for safe access 120 * o create and queue RC info for later use in fabric scan. 121 * o RUC/WUC, PTLP, MMU Errors(CA), UR 122 * 123 * @param px_p leaf in which to check access 124 * @param derr fm err data structure to be updated 125 * @param caller PX_TRAP_CALL | PX_INTR_CALL 126 * @param chkjbc whether to handle hostbus registers (ignored) 127 * @return err PX_NO_PANIC | PX_PROTECTED | 128 * PX_PANIC | PX_HW_RESET | PX_EXPECTED 129 */ 130 /* ARGSUSED */ 131 int 132 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) 133 { 134 px_err_safeacc_check(px_p, derr); 135 return (DDI_FM_OK); 136 } 137 138 /* 139 * fills RC specific fault data 140 */ 141 static void 142 px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, px_rc_err_t *epkt) { 143 pf_pcie_adv_err_regs_t adv_reg; 144 int sts = DDI_SUCCESS; 145 pcie_req_id_t fault_bdf = PCIE_INVALID_BDF; 146 uint64_t fault_addr = 0; 147 uint16_t s_status = 0; 148 149 /* Add an PCIE PF_DATA Entry */ 150 if (epkt->rc_descr.block == BLOCK_MMU) { 151 /* Only PIO Fault Addresses are valid, this is DMA */ 152 s_status = PCI_STAT_S_TARG_AB; 153 fault_addr = NULL; 154 155 if (epkt->rc_descr.H) { 156 fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16); 157 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 158 PF_AFFECTED_BDF; 159 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = 160 fault_bdf; 161 } else 162 sts = DDI_FAILURE; 163 } else { 164 px_pec_err_t *pec_p = (px_pec_err_t *)epkt; 165 uint32_t dir = pec_p->pec_descr.dir; 166 167 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0]); 168 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0] >> 32); 169 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1]); 170 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1] >> 32); 171 172 /* translate RC UR/CA to legacy secondary errors */ 173 if ((dir == DIR_READ || dir == DIR_WRITE) && 174 pec_p->pec_descr.U) { 175 if (pec_p->ue_reg_status & PCIE_AER_UCE_UR) 176 s_status |= PCI_STAT_R_MAST_AB; 177 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) 178 s_status |= PCI_STAT_R_TARG_AB; 179 } 180 181 if (pec_p->ue_reg_status & PCIE_AER_UCE_PTLP) 182 s_status |= PCI_STAT_PERROR; 183 184 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) 185 s_status |= PCI_STAT_S_TARG_AB; 186 187 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 188 fault_bdf = adv_reg.pcie_ue_tgt_bdf; 189 fault_addr = adv_reg.pcie_ue_tgt_addr; 190 /* affected BDF is to be filled in by px_scan_fabric */ 191 } 192 193 if (sts == DDI_SUCCESS) { 194 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = fault_bdf; 195 PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)fault_addr; 196 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status; 197 } 198 } 199 200 /* 201 * Convert error severity from PX internal values to PCIe Fabric values. Most 202 * are self explanitory, except PX_PROTECTED. PX_PROTECTED will never be 203 * returned as is if forgivable. 204 */ 205 static int px_err_to_fab_sev(int rc_err) { 206 int fab_err = 0; 207 208 if (rc_err & (PX_HW_RESET | PX_EXPECTED | PX_NO_PANIC)) 209 fab_err |= PF_ERR_NO_PANIC; 210 211 if (rc_err & (PX_PANIC | PX_PROTECTED)) 212 fab_err |= PF_ERR_PANIC; 213 214 if (rc_err & PX_NO_ERROR) 215 fab_err |= PF_ERR_NO_ERROR; 216 217 return (fab_err); 218 } 219 220 /* 221 * px_err_intr: 222 * Interrupt handler for the JBC/DMC/PEC block. 223 * o lock 224 * o create derr 225 * o check safe access 226 * o px_err_check_severity(epkt) 227 * o pcie_scan_fabric 228 * o Idle intr state 229 * o unlock 230 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 231 */ 232 static uint_t 233 px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt) 234 { 235 px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip); 236 dev_info_t *rpdip = px_p->px_dip; 237 int rc_err, fab_err, msg; 238 ddi_fm_error_t derr; 239 pf_data_t *pfd_p; 240 241 if (px_fm_enter(px_p) != DDI_SUCCESS) 242 goto done; 243 244 pfd_p = px_get_pfd(px_p); 245 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_INTERNAL; 246 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = epkt; 247 248 /* Create the derr */ 249 bzero(&derr, sizeof (ddi_fm_error_t)); 250 derr.fme_version = DDI_FME_VERSION; 251 derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1); 252 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 253 254 /* Basically check for safe access */ 255 (void) px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_ALL); 256 257 /* Check the severity of this error */ 258 rc_err = px_err_epkt_severity(px_p, &derr, epkt, pfd_p); 259 260 pfd_p->pe_severity_flags = px_err_to_fab_sev(rc_err); 261 /* 262 * px_err_epkt_severity needs to populate affected dev 263 * Only MMU errors and PCIe errors need this. 264 * For MMU we will call pf_handle_lookup, using fault bdf 265 * - need to call bdf look up.. 266 * For PCIe do not fill in affected.. 267 */ 268 269 /* Scan the fabric if the root port is not in drain state. */ 270 fab_err = px_scan_fabric(px_p, rpdip, &derr); 271 272 /* Set the intr state to idle for the leaf that received the mondo */ 273 if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino, 274 INTR_IDLE_STATE) != DDI_SUCCESS) { 275 px_fm_exit(px_p); 276 return (DDI_INTR_UNCLAIMED); 277 } 278 279 switch (epkt->rc_descr.block) { 280 case BLOCK_MMU: /* FALLTHROUGH */ 281 case BLOCK_INTR: 282 msg = PX_RC; 283 break; 284 case BLOCK_PCIE: 285 msg = PX_RP; 286 break; 287 case BLOCK_HOSTBUS: /* FALLTHROUGH */ 288 default: 289 msg = PX_HB; 290 break; 291 } 292 293 px_err_panic(rc_err, msg, fab_err, B_TRUE); 294 px_fm_exit(px_p); 295 px_err_panic(rc_err, msg, fab_err, B_FALSE); 296 297 done: 298 return (DDI_INTR_CLAIMED); 299 } 300 301 /* 302 * px_err_epkt_severity: 303 * Check the severity of the fire error based the epkt received 304 * 305 * @param px_p leaf in which to take the snap shot. 306 * @param derr fm err in which the ereport is to be based on 307 * @param epkt epkt recevied from HV 308 */ 309 static int 310 px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, 311 pf_data_t *pfd_p) 312 { 313 px_pec_t *pec_p = px_p->px_pec_p; 314 dev_info_t *dip = px_p->px_dip; 315 boolean_t is_safeacc = B_FALSE; 316 boolean_t is_block_pci = B_FALSE; 317 boolean_t is_valid_epkt = B_FALSE; 318 int err = 0; 319 320 /* Cautious access error handling */ 321 switch (derr->fme_flag) { 322 case DDI_FM_ERR_EXPECTED: 323 /* 324 * For ddi_caut_put treat all events as nonfatal. Here 325 * we have the handle and can call ndi_fm_acc_err_set(). 326 */ 327 derr->fme_status = DDI_FM_NONFATAL; 328 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 329 is_safeacc = B_TRUE; 330 break; 331 case DDI_FM_ERR_PEEK: 332 case DDI_FM_ERR_POKE: 333 /* 334 * For ddi_peek/poke treat all events as nonfatal. 335 */ 336 is_safeacc = B_TRUE; 337 break; 338 default: 339 is_safeacc = B_FALSE; 340 } 341 342 /* 343 * Older hypervisors in some cases send epkts with incorrect fields. 344 * We have to handle these "special" epkts correctly. 345 */ 346 if (px_legacy_epkt) 347 px_fix_legacy_epkt(dip, derr, epkt); 348 349 switch (epkt->rc_descr.block) { 350 case BLOCK_HOSTBUS: 351 err = px_cb_epkt_severity(dip, derr, epkt); 352 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; 353 break; 354 case BLOCK_MMU: 355 err = px_mmu_epkt_severity(dip, derr, epkt); 356 px_err_fill_pfd(dip, pfd_p, epkt); 357 break; 358 case BLOCK_INTR: 359 err = px_intr_epkt_severity(dip, derr, epkt); 360 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; 361 break; 362 case BLOCK_PORT: 363 err = px_port_epkt_severity(dip, derr, epkt, pfd_p); 364 break; 365 case BLOCK_PCIE: 366 is_block_pci = B_TRUE; 367 err = px_pcie_epkt_severity(dip, derr, epkt); 368 px_err_fill_pfd(dip, pfd_p, epkt); 369 break; 370 default: 371 err = 0; 372 } 373 374 if ((err & PX_HW_RESET) || (err & PX_PANIC)) { 375 if (px_log & PX_PANIC) 376 px_err_log_handle(dip, epkt, is_block_pci, "PANIC"); 377 is_valid_epkt = B_TRUE; 378 } else if (err & PX_PROTECTED) { 379 if (px_log & PX_PROTECTED) 380 px_err_log_handle(dip, epkt, is_block_pci, "PROTECTED"); 381 is_valid_epkt = B_TRUE; 382 } else if (err & PX_NO_PANIC) { 383 if (px_log & PX_NO_PANIC) 384 px_err_log_handle(dip, epkt, is_block_pci, "NO PANIC"); 385 is_valid_epkt = B_TRUE; 386 } else if (err & PX_NO_ERROR) { 387 if (px_log & PX_NO_ERROR) 388 px_err_log_handle(dip, epkt, is_block_pci, "NO ERROR"); 389 is_valid_epkt = B_TRUE; 390 } else if (err == 0) { 391 px_err_log_handle(dip, epkt, is_block_pci, "UNRECOGNIZED"); 392 is_valid_epkt = B_FALSE; 393 394 /* Panic on a unrecognized epkt */ 395 err = PX_PANIC; 396 } 397 398 px_err_send_epkt_erpt(dip, epkt, is_block_pci, err, derr, 399 is_valid_epkt); 400 401 /* Readjust the severity as a result of safe access */ 402 if (is_safeacc && !(err & PX_PANIC) && !(px_die & PX_PROTECTED)) 403 err = PX_NO_PANIC; 404 405 return (err); 406 } 407 408 static void 409 px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt, 410 boolean_t is_block_pci, int err, ddi_fm_error_t *derr, 411 boolean_t is_valid_epkt) 412 { 413 char buf[FM_MAX_CLASS], descr_buf[1024]; 414 415 /* send ereport for debug purposes */ 416 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_RC_UNRECOG); 417 418 if (is_block_pci) { 419 px_pec_err_t *pec = (px_pec_err_t *)epkt; 420 (void) snprintf(descr_buf, sizeof (descr_buf), 421 "%s Epkt contents:\n" 422 "Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n" 423 "I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n" 424 "PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n" 425 "CE Status Reg: 0x%x, UE Status Reg: 0x%x\n" 426 "HDR1: 0x%lx, HDR2: 0x%lx\n" 427 "Err Src Reg: 0x%x, Root Err Status: 0x%x\n" 428 "Err Severity: 0x%x\n", 429 is_valid_epkt ? "Valid" : "Invalid", 430 pec->pec_descr.block, pec->pec_descr.dir, 431 pec->pec_descr.Z, pec->pec_descr.S, 432 pec->pec_descr.R, pec->pec_descr.I, 433 pec->pec_descr.H, pec->pec_descr.C, 434 pec->pec_descr.U, pec->pec_descr.E, 435 pec->pec_descr.P, pec->pci_err_status, 436 pec->pcie_err_status, pec->ce_reg_status, 437 pec->ue_reg_status, pec->hdr[0], 438 pec->hdr[1], pec->err_src_reg, 439 pec->root_err_status, err); 440 441 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 442 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 443 EPKT_SYSINO, DATA_TYPE_UINT64, 444 is_valid_epkt ? pec->sysino : 0, 445 EPKT_EHDL, DATA_TYPE_UINT64, 446 is_valid_epkt ? pec->ehdl : 0, 447 EPKT_STICK, DATA_TYPE_UINT64, 448 is_valid_epkt ? pec->stick : 0, 449 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)pec)[3], 450 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)pec)[4], 451 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)pec)[5], 452 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)pec)[6], 453 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)pec)[7], 454 EPKT_PEC_DESCR, DATA_TYPE_STRING, descr_buf); 455 } else { 456 (void) snprintf(descr_buf, sizeof (descr_buf), 457 "%s Epkt contents:\n" 458 "Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" 459 "Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n" 460 "M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n" 461 "Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n" 462 "Err Severity: 0x%x\n", 463 is_valid_epkt ? "Valid" : "Invalid", 464 epkt->rc_descr.block, epkt->rc_descr.op, 465 epkt->rc_descr.phase, epkt->rc_descr.cond, 466 epkt->rc_descr.dir, epkt->rc_descr.STOP, 467 epkt->rc_descr.H, epkt->rc_descr.R, 468 epkt->rc_descr.D, epkt->rc_descr.M, 469 epkt->rc_descr.S, epkt->size, epkt->addr, 470 epkt->hdr[0], epkt->hdr[1], epkt->reserved, 471 err); 472 473 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 474 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 475 EPKT_SYSINO, DATA_TYPE_UINT64, 476 is_valid_epkt ? epkt->sysino : 0, 477 EPKT_EHDL, DATA_TYPE_UINT64, 478 is_valid_epkt ? epkt->ehdl : 0, 479 EPKT_STICK, DATA_TYPE_UINT64, 480 is_valid_epkt ? epkt->stick : 0, 481 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)epkt)[3], 482 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)epkt)[4], 483 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)epkt)[5], 484 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)epkt)[6], 485 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)epkt)[7], 486 EPKT_RC_DESCR, DATA_TYPE_STRING, descr_buf); 487 } 488 } 489 490 static void 491 px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, boolean_t is_block_pci, 492 char *msg) 493 { 494 if (is_block_pci) { 495 px_pec_err_t *pec = (px_pec_err_t *)epkt; 496 DBG(DBG_ERR_INTR, dip, 497 "A PCIe root port error has occured with a severity" 498 " \"%s\"\n" 499 "\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n" 500 "\tH=%d, C=%d, U=%d, E=%d, P=%d\n" 501 "\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n" 502 "\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n" 503 "\terr_src: 0x%x, root_err: 0x%x\n", 504 msg, pec->pec_descr.block, pec->pec_descr.dir, 505 pec->pec_descr.Z, pec->pec_descr.S, pec->pec_descr.R, 506 pec->pec_descr.I, pec->pec_descr.H, pec->pec_descr.C, 507 pec->pec_descr.U, pec->pec_descr.E, pec->pec_descr.P, 508 pec->pci_err_status, pec->pcie_err_status, 509 pec->ce_reg_status, pec->ue_reg_status, pec->hdr[0], 510 pec->hdr[1], pec->err_src_reg, pec->root_err_status); 511 } else { 512 DBG(DBG_ERR_INTR, dip, 513 "A PCIe root complex error has occured with a severity" 514 " \"%s\"\n" 515 "\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" 516 "\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n" 517 "\tS=%d, Size: 0x%x, Addr: 0x%p\n" 518 "\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n", 519 msg, epkt->rc_descr.block, epkt->rc_descr.op, 520 epkt->rc_descr.phase, epkt->rc_descr.cond, 521 epkt->rc_descr.dir, epkt->rc_descr.STOP, epkt->rc_descr.H, 522 epkt->rc_descr.R, epkt->rc_descr.D, epkt->rc_descr.M, 523 epkt->rc_descr.S, epkt->size, epkt->addr, epkt->hdr[0], 524 epkt->hdr[1], epkt->reserved); 525 } 526 } 527 528 /* ARGSUSED */ 529 static void 530 px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 531 { 532 /* 533 * We don't have a default case for any of the below switch statements 534 * since we are ok with the code falling through. 535 */ 536 switch (epkt->rc_descr.block) { 537 case BLOCK_HOSTBUS: 538 switch (epkt->rc_descr.op) { 539 case OP_DMA: 540 switch (epkt->rc_descr.phase) { 541 case PH_UNKNOWN: 542 switch (epkt->rc_descr.cond) { 543 case CND_UNKNOWN: 544 switch (epkt->rc_descr.dir) { 545 case DIR_RESERVED: 546 epkt->rc_descr.dir = DIR_READ; 547 break; 548 } /* DIR */ 549 } /* CND */ 550 } /* PH */ 551 } /* OP */ 552 break; 553 case BLOCK_MMU: 554 switch (epkt->rc_descr.op) { 555 case OP_XLAT: 556 switch (epkt->rc_descr.phase) { 557 case PH_DATA: 558 switch (epkt->rc_descr.cond) { 559 case CND_PROT: 560 switch (epkt->rc_descr.dir) { 561 case DIR_UNKNOWN: 562 epkt->rc_descr.dir = DIR_WRITE; 563 break; 564 } /* DIR */ 565 } /* CND */ 566 break; 567 case PH_IRR: 568 switch (epkt->rc_descr.cond) { 569 case CND_RESERVED: 570 switch (epkt->rc_descr.dir) { 571 case DIR_IRR: 572 epkt->rc_descr.phase = PH_ADDR; 573 epkt->rc_descr.cond = CND_IRR; 574 } /* DIR */ 575 } /* CND */ 576 } /* PH */ 577 } /* OP */ 578 break; 579 case BLOCK_INTR: 580 switch (epkt->rc_descr.op) { 581 case OP_MSIQ: 582 switch (epkt->rc_descr.phase) { 583 case PH_UNKNOWN: 584 switch (epkt->rc_descr.cond) { 585 case CND_ILL: 586 switch (epkt->rc_descr.dir) { 587 case DIR_RESERVED: 588 epkt->rc_descr.dir = DIR_IRR; 589 break; 590 } /* DIR */ 591 break; 592 case CND_IRR: 593 switch (epkt->rc_descr.dir) { 594 case DIR_IRR: 595 epkt->rc_descr.cond = CND_OV; 596 break; 597 } /* DIR */ 598 } /* CND */ 599 } /* PH */ 600 break; 601 case OP_RESERVED: 602 switch (epkt->rc_descr.phase) { 603 case PH_UNKNOWN: 604 switch (epkt->rc_descr.cond) { 605 case CND_ILL: 606 switch (epkt->rc_descr.dir) { 607 case DIR_IRR: 608 epkt->rc_descr.op = OP_MSI32; 609 epkt->rc_descr.phase = PH_DATA; 610 break; 611 } /* DIR */ 612 } /* CND */ 613 break; 614 case PH_DATA: 615 switch (epkt->rc_descr.cond) { 616 case CND_INT: 617 switch (epkt->rc_descr.dir) { 618 case DIR_UNKNOWN: 619 epkt->rc_descr.op = OP_MSI32; 620 break; 621 } /* DIR */ 622 } /* CND */ 623 } /* PH */ 624 } /* OP */ 625 } /* BLOCK */ 626 } 627 628 /* ARGSUSED */ 629 static int 630 px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 631 { 632 return (px_err_check_eq(dip)); 633 } 634 635 /* ARGSUSED */ 636 static int 637 px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt, 638 pf_data_t *pfd_p) 639 { 640 pf_pcie_adv_err_regs_t adv_reg; 641 uint16_t s_status; 642 int sts = PX_PANIC; 643 644 /* 645 * Check for failed non-posted writes, which are errors that are not 646 * defined in the PCIe spec. If not return panic. 647 */ 648 if (!((epkt->rc_descr.op == OP_PIO) && 649 (epkt->rc_descr.phase == PH_IRR))) { 650 sts = (PX_PANIC); 651 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; 652 goto done; 653 } 654 655 /* 656 * Gather the error logs, if they do not exist just return with no panic 657 * and let the fabric message take care of the error. 658 */ 659 if (!epkt->rc_descr.H) { 660 sts = (PX_NO_PANIC); 661 goto done; 662 } 663 664 adv_reg.pcie_ue_hdr[0] = (uint32_t)(epkt->hdr[0]); 665 adv_reg.pcie_ue_hdr[1] = (uint32_t)(epkt->hdr[0] >> 32); 666 adv_reg.pcie_ue_hdr[2] = (uint32_t)(epkt->hdr[1]); 667 adv_reg.pcie_ue_hdr[3] = (uint32_t)(epkt->hdr[1] >> 32); 668 669 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 670 671 if (epkt->rc_descr.M) 672 adv_reg.pcie_ue_tgt_addr = epkt->addr; 673 674 if (!((sts == DDI_SUCCESS) || (epkt->rc_descr.M))) { 675 /* Let the fabric message take care of error */ 676 sts = PX_NO_PANIC; 677 goto done; 678 } 679 680 /* See if the failed transaction belonged to a hardened driver */ 681 if (pf_hdl_lookup(dip, derr->fme_ena, 682 adv_reg.pcie_ue_tgt_trans, adv_reg.pcie_ue_tgt_addr, 683 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND) 684 sts = (PX_NO_PANIC); 685 else 686 sts = (PX_PANIC); 687 688 /* Add pfd to cause a fabric scan */ 689 switch (epkt->rc_descr.cond) { 690 case CND_RCA: 691 s_status = PCI_STAT_R_TARG_AB; 692 break; 693 case CND_RUR: 694 s_status = PCI_STAT_R_MAST_AB; 695 break; 696 } 697 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = adv_reg.pcie_ue_tgt_bdf; 698 PCIE_ROOT_FAULT(pfd_p)->scan_addr = adv_reg.pcie_ue_tgt_addr; 699 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status; 700 701 done: 702 return (sts); 703 } 704 705 /* ARGSUSED */ 706 static int 707 px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 708 { 709 px_pec_err_t *pec_p = (px_pec_err_t *)epkt; 710 px_err_pcie_t *pcie = (px_err_pcie_t *)epkt; 711 pf_pcie_adv_err_regs_t adv_reg; 712 int sts; 713 uint32_t temp; 714 715 /* 716 * Check for failed PIO Read/Writes, which are errors that are not 717 * defined in the PCIe spec. 718 */ 719 temp = PCIE_AER_UCE_UR | PCIE_AER_UCE_CA; 720 if (((pec_p->pec_descr.dir == DIR_READ) || 721 (pec_p->pec_descr.dir == DIR_WRITE)) && 722 pec_p->pec_descr.U && (pec_p->ue_reg_status & temp)) { 723 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0]); 724 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0] >> 32); 725 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1]); 726 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1] >> 32); 727 728 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 729 730 if (sts == DDI_SUCCESS && 731 pf_hdl_lookup(dip, derr->fme_ena, 732 adv_reg.pcie_ue_tgt_trans, 733 adv_reg.pcie_ue_tgt_addr, 734 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND) 735 return (PX_NO_PANIC); 736 else 737 return (PX_PANIC); 738 } 739 740 if (!pec_p->pec_descr.C) 741 pec_p->ce_reg_status = 0; 742 if (!pec_p->pec_descr.U) 743 pec_p->ue_reg_status = 0; 744 if (!pec_p->pec_descr.H) 745 pec_p->hdr[0] = 0; 746 if (!pec_p->pec_descr.I) 747 pec_p->hdr[1] = 0; 748 749 /* 750 * According to the PCIe spec, there is a first error pointer. If there 751 * are header logs recorded and there are more than one error, the log 752 * will belong to the error that the first error pointer points to. 753 * 754 * The regs.primary_ue expects a bit number, go through the ue register 755 * and find the first error that occured. Because the sun4v epkt spec 756 * does not define this value, the algorithm below gives the lower bit 757 * priority. 758 */ 759 temp = pcie->ue_reg; 760 if (temp) { 761 int x; 762 for (x = 0; !(temp & 0x1); x++) { 763 temp = temp >> 1; 764 } 765 pcie->primary_ue = 1 << x; 766 } else { 767 pcie->primary_ue = 0; 768 } 769 770 /* Sun4v doesn't log the TX hdr except for CTOs */ 771 if (pcie->primary_ue == PCIE_AER_UCE_TO) { 772 pcie->tx_hdr1 = pcie->rx_hdr1; 773 pcie->tx_hdr2 = pcie->rx_hdr2; 774 pcie->tx_hdr3 = pcie->rx_hdr3; 775 pcie->tx_hdr4 = pcie->rx_hdr4; 776 pcie->rx_hdr1 = 0; 777 pcie->rx_hdr2 = 0; 778 pcie->rx_hdr3 = 0; 779 pcie->rx_hdr4 = 0; 780 } else { 781 pcie->tx_hdr1 = 0; 782 pcie->tx_hdr2 = 0; 783 pcie->tx_hdr3 = 0; 784 pcie->tx_hdr4 = 0; 785 } 786 787 return (px_err_check_pcie(dip, derr, pcie)); 788 } 789 790 static int 791 px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 792 { 793 uint64_t addr = (uint64_t)epkt->addr; 794 pcie_req_id_t bdf = PCIE_INVALID_BDF; 795 796 if (epkt->rc_descr.H) { 797 bdf = (uint32_t)((epkt->hdr[0] >> 16) && 0xFFFF); 798 } 799 800 return (pf_hdl_lookup(dip, derr->fme_ena, PF_ADDR_DMA, addr, 801 bdf)); 802 } 803