1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * sun4v Fire Error Handling 28 */ 29 30 #include <sys/types.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/sunndi.h> 34 #include <sys/fm/protocol.h> 35 #include <sys/fm/util.h> 36 #include <sys/membar.h> 37 #include "px_obj.h" 38 #include "px_err.h" 39 40 static void px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, 41 px_rc_err_t *epkt); 42 static uint_t px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt); 43 static int px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, 44 px_rc_err_t *epkt, pf_data_t *pfd_p); 45 46 static void px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, 47 boolean_t is_block_pci, char *msg); 48 static void px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt, 49 boolean_t is_block_pci, int err, ddi_fm_error_t *derr, 50 boolean_t is_valid_epkt); 51 static int px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 52 px_rc_err_t *epkt); 53 static int px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 54 px_rc_err_t *epkt); 55 static int px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 56 px_rc_err_t *epkt); 57 static int px_port_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 58 px_rc_err_t *epkt, pf_data_t *pfd_p); 59 static int px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 60 px_rc_err_t *epkt); 61 static int px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, 62 px_rc_err_t *epkt); 63 static int px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, 64 px_rc_err_t *epkt, pf_data_t *pfd_p); 65 static void px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, 66 px_rc_err_t *epkt); 67 static int px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, 68 px_rc_err_t *epkt); 69 70 /* Include the code generated sun4v epkt checking code */ 71 #include "px_err_gen.c" 72 73 /* 74 * This variable indicates if we have a hypervisor that could potentially send 75 * incorrect epkts. We always set this to TRUE for now until we find a way to 76 * tell if this HV bug has been fixed. 77 */ 78 boolean_t px_legacy_epkt = B_TRUE; 79 80 /* 81 * px_err_cb_intr: 82 * Interrupt handler for the Host Bus Block. 83 */ 84 uint_t 85 px_err_cb_intr(caddr_t arg) 86 { 87 px_fault_t *fault_p = (px_fault_t *)arg; 88 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 89 90 if (epkt != NULL) { 91 return (px_err_intr(fault_p, epkt)); 92 } 93 94 return (DDI_INTR_UNCLAIMED); 95 } 96 97 /* 98 * px_err_dmc_pec_intr: 99 * Interrupt handler for the DMC/PEC block. 100 */ 101 uint_t 102 px_err_dmc_pec_intr(caddr_t arg) 103 { 104 px_fault_t *fault_p = (px_fault_t *)arg; 105 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 106 107 if (epkt != NULL) { 108 return (px_err_intr(fault_p, epkt)); 109 } 110 111 return (DDI_INTR_UNCLAIMED); 112 } 113 114 /* 115 * px_err_cmn_intr: 116 * Common function called by trap, mondo and fabric intr. 117 * This function is more meaningful in sun4u implementation. Kept 118 * to mirror sun4u call stack. 119 * o check for safe access 120 * o create and queue RC info for later use in fabric scan. 121 * o RUC/WUC, PTLP, MMU Errors(CA), UR 122 * 123 * @param px_p leaf in which to check access 124 * @param derr fm err data structure to be updated 125 * @param caller PX_TRAP_CALL | PX_INTR_CALL 126 * @param chkjbc whether to handle hostbus registers (ignored) 127 * @return err PX_NO_PANIC | PX_PROTECTED | 128 * PX_PANIC | PX_HW_RESET | PX_EXPECTED 129 */ 130 /* ARGSUSED */ 131 int 132 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) 133 { 134 px_err_safeacc_check(px_p, derr); 135 return (PX_NO_ERROR); 136 } 137 138 /* 139 * fills RC specific fault data 140 */ 141 static void 142 px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, px_rc_err_t *epkt) { 143 pf_pcie_adv_err_regs_t adv_reg; 144 pcie_req_id_t fault_bdf = PCIE_INVALID_BDF; 145 uint64_t fault_addr = 0; 146 uint16_t s_status = 0; 147 148 /* Add an PCIE PF_DATA Entry */ 149 if (epkt->rc_descr.block == BLOCK_MMU) { 150 /* Only PIO Fault Addresses are valid, this is DMA */ 151 s_status = PCI_STAT_S_TARG_AB; 152 fault_addr = NULL; 153 154 if (epkt->rc_descr.H) { 155 fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16); 156 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 157 PF_AFFECTED_BDF; 158 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = 159 fault_bdf; 160 } 161 } else { 162 px_pec_err_t *pec_p = (px_pec_err_t *)epkt; 163 uint32_t dir = pec_p->pec_descr.dir; 164 165 /* translate RC UR/CA to legacy secondary errors */ 166 if ((dir == DIR_READ || dir == DIR_WRITE) && 167 pec_p->pec_descr.U) { 168 if (pec_p->ue_reg_status & PCIE_AER_UCE_UR) 169 s_status |= PCI_STAT_R_MAST_AB; 170 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) 171 s_status |= PCI_STAT_R_TARG_AB; 172 } 173 174 if (pec_p->ue_reg_status & PCIE_AER_UCE_PTLP) 175 s_status |= PCI_STAT_PERROR; 176 177 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) 178 s_status |= PCI_STAT_S_TARG_AB; 179 180 if (pec_p->pec_descr.H) { 181 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0] >>32); 182 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0]); 183 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1] >>32); 184 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1]); 185 186 if (pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg) == 187 DDI_SUCCESS) { 188 fault_bdf = adv_reg.pcie_ue_tgt_bdf; 189 fault_addr = adv_reg.pcie_ue_tgt_addr; 190 /* 191 * affected BDF is to be filled in by 192 * px_scan_fabric 193 */ 194 } 195 } 196 } 197 198 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = fault_bdf; 199 PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)fault_addr; 200 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status; 201 } 202 203 /* 204 * Convert error severity from PX internal values to PCIe Fabric values. Most 205 * are self explanitory, except PX_PROTECTED. PX_PROTECTED will never be 206 * returned as is if forgivable. 207 */ 208 static int px_err_to_fab_sev(int rc_err) { 209 int fab_err = 0; 210 211 if (rc_err & (PX_HW_RESET | PX_EXPECTED | PX_NO_PANIC)) 212 fab_err |= PF_ERR_NO_PANIC; 213 214 if (rc_err & (PX_PANIC | PX_PROTECTED)) 215 fab_err |= PF_ERR_PANIC; 216 217 if (rc_err & PX_NO_ERROR) 218 fab_err |= PF_ERR_NO_ERROR; 219 220 return (fab_err); 221 } 222 223 /* 224 * px_err_intr: 225 * Interrupt handler for the JBC/DMC/PEC block. 226 * o lock 227 * o create derr 228 * o check safe access 229 * o px_err_check_severity(epkt) 230 * o pcie_scan_fabric 231 * o Idle intr state 232 * o unlock 233 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 234 */ 235 static uint_t 236 px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt) 237 { 238 px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip); 239 dev_info_t *rpdip = px_p->px_dip; 240 int rc_err, fab_err, msg; 241 ddi_fm_error_t derr; 242 pf_data_t *pfd_p; 243 244 if (px_fm_enter(px_p) != DDI_SUCCESS) 245 goto done; 246 247 pfd_p = px_get_pfd(px_p); 248 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_INTERNAL; 249 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = epkt; 250 251 /* Create the derr */ 252 bzero(&derr, sizeof (ddi_fm_error_t)); 253 derr.fme_version = DDI_FME_VERSION; 254 derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1); 255 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 256 257 /* Basically check for safe access */ 258 (void) px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_ALL); 259 260 /* Check the severity of this error */ 261 rc_err = px_err_epkt_severity(px_p, &derr, epkt, pfd_p); 262 263 pfd_p->pe_severity_flags = px_err_to_fab_sev(rc_err); 264 /* 265 * px_err_epkt_severity needs to populate affected dev 266 * Only MMU errors and PCIe errors need this. 267 * For MMU we will call pf_handle_lookup, using fault bdf 268 * - need to call bdf look up.. 269 * For PCIe do not fill in affected.. 270 */ 271 272 /* Scan the fabric if the root port is not in drain state. */ 273 fab_err = px_scan_fabric(px_p, rpdip, &derr); 274 275 /* Set the intr state to idle for the leaf that received the mondo */ 276 if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino, 277 INTR_IDLE_STATE) != DDI_SUCCESS) { 278 px_fm_exit(px_p); 279 return (DDI_INTR_UNCLAIMED); 280 } 281 282 switch (epkt->rc_descr.block) { 283 case BLOCK_MMU: /* FALLTHROUGH */ 284 case BLOCK_INTR: 285 msg = PX_RC; 286 break; 287 case BLOCK_PCIE: 288 msg = PX_RP; 289 break; 290 case BLOCK_HOSTBUS: /* FALLTHROUGH */ 291 default: 292 msg = PX_HB; 293 break; 294 } 295 296 px_err_panic(rc_err, msg, fab_err, B_TRUE); 297 px_fm_exit(px_p); 298 px_err_panic(rc_err, msg, fab_err, B_FALSE); 299 300 done: 301 return (DDI_INTR_CLAIMED); 302 } 303 304 /* 305 * px_err_epkt_severity: 306 * Check the severity of the fire error based the epkt received 307 * 308 * @param px_p leaf in which to take the snap shot. 309 * @param derr fm err in which the ereport is to be based on 310 * @param epkt epkt recevied from HV 311 */ 312 static int 313 px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, 314 pf_data_t *pfd_p) 315 { 316 px_pec_t *pec_p = px_p->px_pec_p; 317 dev_info_t *dip = px_p->px_dip; 318 boolean_t is_safeacc = B_FALSE; 319 boolean_t is_block_pci = B_FALSE; 320 boolean_t is_valid_epkt = B_FALSE; 321 int err = 0; 322 323 /* Cautious access error handling */ 324 switch (derr->fme_flag) { 325 case DDI_FM_ERR_EXPECTED: 326 /* 327 * For ddi_caut_put treat all events as nonfatal. Here 328 * we have the handle and can call ndi_fm_acc_err_set(). 329 */ 330 derr->fme_status = DDI_FM_NONFATAL; 331 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 332 is_safeacc = B_TRUE; 333 break; 334 case DDI_FM_ERR_PEEK: 335 case DDI_FM_ERR_POKE: 336 /* 337 * For ddi_peek/poke treat all events as nonfatal. 338 */ 339 is_safeacc = B_TRUE; 340 break; 341 default: 342 is_safeacc = B_FALSE; 343 } 344 345 /* 346 * Older hypervisors in some cases send epkts with incorrect fields. 347 * We have to handle these "special" epkts correctly. 348 */ 349 if (px_legacy_epkt) 350 px_fix_legacy_epkt(dip, derr, epkt); 351 352 switch (epkt->rc_descr.block) { 353 case BLOCK_HOSTBUS: 354 err = px_cb_epkt_severity(dip, derr, epkt); 355 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; 356 break; 357 case BLOCK_MMU: 358 err = px_mmu_epkt_severity(dip, derr, epkt); 359 px_err_fill_pfd(dip, pfd_p, epkt); 360 break; 361 case BLOCK_INTR: 362 err = px_intr_epkt_severity(dip, derr, epkt); 363 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; 364 break; 365 case BLOCK_PORT: 366 err = px_port_epkt_severity(dip, derr, epkt, pfd_p); 367 break; 368 case BLOCK_PCIE: 369 is_block_pci = B_TRUE; 370 err = px_pcie_epkt_severity(dip, derr, epkt); 371 px_err_fill_pfd(dip, pfd_p, epkt); 372 break; 373 default: 374 err = 0; 375 } 376 377 if ((err & PX_HW_RESET) || (err & PX_PANIC)) { 378 if (px_log & PX_PANIC) 379 px_err_log_handle(dip, epkt, is_block_pci, "PANIC"); 380 is_valid_epkt = B_TRUE; 381 } else if (err & PX_PROTECTED) { 382 if (px_log & PX_PROTECTED) 383 px_err_log_handle(dip, epkt, is_block_pci, "PROTECTED"); 384 is_valid_epkt = B_TRUE; 385 } else if (err & PX_NO_PANIC) { 386 if (px_log & PX_NO_PANIC) 387 px_err_log_handle(dip, epkt, is_block_pci, "NO PANIC"); 388 is_valid_epkt = B_TRUE; 389 } else if (err & PX_NO_ERROR) { 390 if (px_log & PX_NO_ERROR) 391 px_err_log_handle(dip, epkt, is_block_pci, "NO ERROR"); 392 is_valid_epkt = B_TRUE; 393 } else if (err == 0) { 394 px_err_log_handle(dip, epkt, is_block_pci, "UNRECOGNIZED"); 395 is_valid_epkt = B_FALSE; 396 397 /* Panic on a unrecognized epkt */ 398 err = PX_PANIC; 399 } 400 401 px_err_send_epkt_erpt(dip, epkt, is_block_pci, err, derr, 402 is_valid_epkt); 403 404 /* Readjust the severity as a result of safe access */ 405 if (is_safeacc && !(err & PX_PANIC) && !(px_die & PX_PROTECTED)) 406 err = PX_NO_PANIC; 407 408 return (err); 409 } 410 411 static void 412 px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt, 413 boolean_t is_block_pci, int err, ddi_fm_error_t *derr, 414 boolean_t is_valid_epkt) 415 { 416 char buf[FM_MAX_CLASS], descr_buf[1024]; 417 418 /* send ereport for debug purposes */ 419 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_RC_UNRECOG); 420 421 if (is_block_pci) { 422 px_pec_err_t *pec = (px_pec_err_t *)epkt; 423 (void) snprintf(descr_buf, sizeof (descr_buf), 424 "%s Epkt contents:\n" 425 "Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n" 426 "I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n" 427 "PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n" 428 "CE Status Reg: 0x%x, UE Status Reg: 0x%x\n" 429 "HDR1: 0x%lx, HDR2: 0x%lx\n" 430 "Err Src Reg: 0x%x, Root Err Status: 0x%x\n" 431 "Err Severity: 0x%x\n", 432 is_valid_epkt ? "Valid" : "Invalid", 433 pec->pec_descr.block, pec->pec_descr.dir, 434 pec->pec_descr.Z, pec->pec_descr.S, 435 pec->pec_descr.R, pec->pec_descr.I, 436 pec->pec_descr.H, pec->pec_descr.C, 437 pec->pec_descr.U, pec->pec_descr.E, 438 pec->pec_descr.P, pec->pci_err_status, 439 pec->pcie_err_status, pec->ce_reg_status, 440 pec->ue_reg_status, pec->hdr[0], 441 pec->hdr[1], pec->err_src_reg, 442 pec->root_err_status, err); 443 444 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 445 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 446 EPKT_SYSINO, DATA_TYPE_UINT64, 447 is_valid_epkt ? pec->sysino : 0, 448 EPKT_EHDL, DATA_TYPE_UINT64, 449 is_valid_epkt ? pec->ehdl : 0, 450 EPKT_STICK, DATA_TYPE_UINT64, 451 is_valid_epkt ? pec->stick : 0, 452 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)pec)[3], 453 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)pec)[4], 454 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)pec)[5], 455 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)pec)[6], 456 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)pec)[7], 457 EPKT_PEC_DESCR, DATA_TYPE_STRING, descr_buf); 458 } else { 459 (void) snprintf(descr_buf, sizeof (descr_buf), 460 "%s Epkt contents:\n" 461 "Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" 462 "Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n" 463 "M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n" 464 "Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n" 465 "Err Severity: 0x%x\n", 466 is_valid_epkt ? "Valid" : "Invalid", 467 epkt->rc_descr.block, epkt->rc_descr.op, 468 epkt->rc_descr.phase, epkt->rc_descr.cond, 469 epkt->rc_descr.dir, epkt->rc_descr.STOP, 470 epkt->rc_descr.H, epkt->rc_descr.R, 471 epkt->rc_descr.D, epkt->rc_descr.M, 472 epkt->rc_descr.S, epkt->size, epkt->addr, 473 epkt->hdr[0], epkt->hdr[1], epkt->reserved, 474 err); 475 476 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 477 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 478 EPKT_SYSINO, DATA_TYPE_UINT64, 479 is_valid_epkt ? epkt->sysino : 0, 480 EPKT_EHDL, DATA_TYPE_UINT64, 481 is_valid_epkt ? epkt->ehdl : 0, 482 EPKT_STICK, DATA_TYPE_UINT64, 483 is_valid_epkt ? epkt->stick : 0, 484 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)epkt)[3], 485 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)epkt)[4], 486 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)epkt)[5], 487 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)epkt)[6], 488 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)epkt)[7], 489 EPKT_RC_DESCR, DATA_TYPE_STRING, descr_buf); 490 } 491 } 492 493 static void 494 px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, boolean_t is_block_pci, 495 char *msg) 496 { 497 if (is_block_pci) { 498 px_pec_err_t *pec = (px_pec_err_t *)epkt; 499 DBG(DBG_ERR_INTR, dip, 500 "A PCIe root port error has occured with a severity" 501 " \"%s\"\n" 502 "\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n" 503 "\tH=%d, C=%d, U=%d, E=%d, P=%d\n" 504 "\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n" 505 "\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n" 506 "\terr_src: 0x%x, root_err: 0x%x\n", 507 msg, pec->pec_descr.block, pec->pec_descr.dir, 508 pec->pec_descr.Z, pec->pec_descr.S, pec->pec_descr.R, 509 pec->pec_descr.I, pec->pec_descr.H, pec->pec_descr.C, 510 pec->pec_descr.U, pec->pec_descr.E, pec->pec_descr.P, 511 pec->pci_err_status, pec->pcie_err_status, 512 pec->ce_reg_status, pec->ue_reg_status, pec->hdr[0], 513 pec->hdr[1], pec->err_src_reg, pec->root_err_status); 514 } else { 515 DBG(DBG_ERR_INTR, dip, 516 "A PCIe root complex error has occured with a severity" 517 " \"%s\"\n" 518 "\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" 519 "\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n" 520 "\tS=%d, Size: 0x%x, Addr: 0x%p\n" 521 "\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n", 522 msg, epkt->rc_descr.block, epkt->rc_descr.op, 523 epkt->rc_descr.phase, epkt->rc_descr.cond, 524 epkt->rc_descr.dir, epkt->rc_descr.STOP, epkt->rc_descr.H, 525 epkt->rc_descr.R, epkt->rc_descr.D, epkt->rc_descr.M, 526 epkt->rc_descr.S, epkt->size, epkt->addr, epkt->hdr[0], 527 epkt->hdr[1], epkt->reserved); 528 } 529 } 530 531 /* ARGSUSED */ 532 static void 533 px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 534 { 535 /* 536 * We don't have a default case for any of the below switch statements 537 * since we are ok with the code falling through. 538 */ 539 switch (epkt->rc_descr.block) { 540 case BLOCK_HOSTBUS: 541 switch (epkt->rc_descr.op) { 542 case OP_DMA: 543 switch (epkt->rc_descr.phase) { 544 case PH_UNKNOWN: 545 switch (epkt->rc_descr.cond) { 546 case CND_UNKNOWN: 547 switch (epkt->rc_descr.dir) { 548 case DIR_RESERVED: 549 epkt->rc_descr.dir = DIR_READ; 550 break; 551 } /* DIR */ 552 } /* CND */ 553 } /* PH */ 554 } /* OP */ 555 break; 556 case BLOCK_MMU: 557 switch (epkt->rc_descr.op) { 558 case OP_XLAT: 559 switch (epkt->rc_descr.phase) { 560 case PH_DATA: 561 switch (epkt->rc_descr.cond) { 562 case CND_PROT: 563 switch (epkt->rc_descr.dir) { 564 case DIR_UNKNOWN: 565 epkt->rc_descr.dir = DIR_WRITE; 566 break; 567 } /* DIR */ 568 } /* CND */ 569 break; 570 case PH_IRR: 571 switch (epkt->rc_descr.cond) { 572 case CND_RESERVED: 573 switch (epkt->rc_descr.dir) { 574 case DIR_IRR: 575 epkt->rc_descr.phase = PH_ADDR; 576 epkt->rc_descr.cond = CND_IRR; 577 } /* DIR */ 578 } /* CND */ 579 } /* PH */ 580 } /* OP */ 581 break; 582 case BLOCK_INTR: 583 switch (epkt->rc_descr.op) { 584 case OP_MSIQ: 585 switch (epkt->rc_descr.phase) { 586 case PH_UNKNOWN: 587 switch (epkt->rc_descr.cond) { 588 case CND_ILL: 589 switch (epkt->rc_descr.dir) { 590 case DIR_RESERVED: 591 epkt->rc_descr.dir = DIR_IRR; 592 break; 593 } /* DIR */ 594 break; 595 case CND_IRR: 596 switch (epkt->rc_descr.dir) { 597 case DIR_IRR: 598 epkt->rc_descr.cond = CND_OV; 599 break; 600 } /* DIR */ 601 } /* CND */ 602 } /* PH */ 603 break; 604 case OP_RESERVED: 605 switch (epkt->rc_descr.phase) { 606 case PH_UNKNOWN: 607 switch (epkt->rc_descr.cond) { 608 case CND_ILL: 609 switch (epkt->rc_descr.dir) { 610 case DIR_IRR: 611 epkt->rc_descr.op = OP_MSI32; 612 epkt->rc_descr.phase = PH_DATA; 613 break; 614 } /* DIR */ 615 } /* CND */ 616 break; 617 case PH_DATA: 618 switch (epkt->rc_descr.cond) { 619 case CND_INT: 620 switch (epkt->rc_descr.dir) { 621 case DIR_UNKNOWN: 622 epkt->rc_descr.op = OP_MSI32; 623 break; 624 } /* DIR */ 625 } /* CND */ 626 } /* PH */ 627 } /* OP */ 628 } /* BLOCK */ 629 } 630 631 /* ARGSUSED */ 632 static int 633 px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 634 { 635 return (px_err_check_eq(dip)); 636 } 637 638 /* ARGSUSED */ 639 static int 640 px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt, 641 pf_data_t *pfd_p) 642 { 643 pf_pcie_adv_err_regs_t adv_reg; 644 uint16_t s_status; 645 int sts = PX_PANIC; 646 647 /* 648 * Check for failed non-posted writes, which are errors that are not 649 * defined in the PCIe spec. If not return panic. 650 */ 651 if (!((epkt->rc_descr.op == OP_PIO) && 652 (epkt->rc_descr.phase == PH_IRR))) { 653 sts = (PX_PANIC); 654 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; 655 goto done; 656 } 657 658 /* 659 * Gather the error logs, if they do not exist just return with no panic 660 * and let the fabric message take care of the error. 661 */ 662 if (!epkt->rc_descr.H) { 663 sts = (PX_NO_PANIC); 664 goto done; 665 } 666 667 adv_reg.pcie_ue_hdr[0] = (uint32_t)(epkt->hdr[0] >> 32); 668 adv_reg.pcie_ue_hdr[1] = (uint32_t)(epkt->hdr[0]); 669 adv_reg.pcie_ue_hdr[2] = (uint32_t)(epkt->hdr[1] >> 32); 670 adv_reg.pcie_ue_hdr[3] = (uint32_t)(epkt->hdr[1]); 671 672 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 673 674 if (epkt->rc_descr.M) 675 adv_reg.pcie_ue_tgt_addr = epkt->addr; 676 677 if (!((sts == DDI_SUCCESS) || (epkt->rc_descr.M))) { 678 /* Let the fabric message take care of error */ 679 sts = PX_NO_PANIC; 680 goto done; 681 } 682 683 /* See if the failed transaction belonged to a hardened driver */ 684 if (pf_hdl_lookup(dip, derr->fme_ena, 685 adv_reg.pcie_ue_tgt_trans, adv_reg.pcie_ue_tgt_addr, 686 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND) 687 sts = (PX_NO_PANIC); 688 else 689 sts = (PX_PANIC); 690 691 /* Add pfd to cause a fabric scan */ 692 switch (epkt->rc_descr.cond) { 693 case CND_RCA: 694 s_status = PCI_STAT_R_TARG_AB; 695 break; 696 case CND_RUR: 697 s_status = PCI_STAT_R_MAST_AB; 698 break; 699 } 700 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = adv_reg.pcie_ue_tgt_bdf; 701 PCIE_ROOT_FAULT(pfd_p)->scan_addr = adv_reg.pcie_ue_tgt_addr; 702 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status; 703 704 done: 705 return (sts); 706 } 707 708 /* ARGSUSED */ 709 static int 710 px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 711 { 712 px_pec_err_t *pec_p = (px_pec_err_t *)epkt; 713 px_err_pcie_t *pcie = (px_err_pcie_t *)epkt; 714 pf_pcie_adv_err_regs_t adv_reg; 715 int sts; 716 uint32_t temp; 717 718 /* 719 * Check for failed PIO Read/Writes, which are errors that are not 720 * defined in the PCIe spec. 721 */ 722 723 temp = PCIE_AER_UCE_UR | PCIE_AER_UCE_CA; 724 if (((pec_p->pec_descr.dir == DIR_READ) || 725 (pec_p->pec_descr.dir == DIR_WRITE)) && 726 pec_p->pec_descr.U && (pec_p->ue_reg_status & temp)) { 727 728 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0] >> 32); 729 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0]); 730 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1] >> 32); 731 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1]); 732 733 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 734 735 if (sts == DDI_SUCCESS && 736 pf_hdl_lookup(dip, derr->fme_ena, 737 adv_reg.pcie_ue_tgt_trans, 738 adv_reg.pcie_ue_tgt_addr, 739 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND) 740 return (PX_NO_PANIC); 741 else 742 return (PX_PANIC); 743 } 744 745 if (!pec_p->pec_descr.C) 746 pec_p->ce_reg_status = 0; 747 if (!pec_p->pec_descr.U) 748 pec_p->ue_reg_status = 0; 749 if (!pec_p->pec_descr.H) 750 pec_p->hdr[0] = 0; 751 if (!pec_p->pec_descr.I) 752 pec_p->hdr[1] = 0; 753 754 /* 755 * According to the PCIe spec, there is a first error pointer. If there 756 * are header logs recorded and there are more than one error, the log 757 * will belong to the error that the first error pointer points to. 758 * 759 * The regs.primary_ue expects a bit number, go through the ue register 760 * and find the first error that occured. Because the sun4v epkt spec 761 * does not define this value, the algorithm below gives the lower bit 762 * priority. 763 */ 764 temp = pcie->ue_reg; 765 if (temp) { 766 int x; 767 for (x = 0; !(temp & 0x1); x++) { 768 temp = temp >> 1; 769 } 770 pcie->primary_ue = 1 << x; 771 } else { 772 pcie->primary_ue = 0; 773 } 774 775 /* Sun4v doesn't log the TX hdr except for CTOs */ 776 if (pcie->primary_ue == PCIE_AER_UCE_TO) { 777 pcie->tx_hdr1 = pcie->rx_hdr1; 778 pcie->tx_hdr2 = pcie->rx_hdr2; 779 pcie->tx_hdr3 = pcie->rx_hdr3; 780 pcie->tx_hdr4 = pcie->rx_hdr4; 781 pcie->rx_hdr1 = 0; 782 pcie->rx_hdr2 = 0; 783 pcie->rx_hdr3 = 0; 784 pcie->rx_hdr4 = 0; 785 } else { 786 pcie->tx_hdr1 = 0; 787 pcie->tx_hdr2 = 0; 788 pcie->tx_hdr3 = 0; 789 pcie->tx_hdr4 = 0; 790 } 791 792 return (px_err_check_pcie(dip, derr, pcie, PF_INTR_TYPE_INTERNAL)); 793 } 794 795 static int 796 px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 797 { 798 uint64_t addr = (uint64_t)epkt->addr; 799 pcie_req_id_t bdf = PCIE_INVALID_BDF; 800 801 if (epkt->rc_descr.H) { 802 bdf = (uint32_t)((epkt->hdr[0] >> 16) && 0xFFFF); 803 } 804 805 return (pf_hdl_lookup(dip, derr->fme_ena, PF_ADDR_DMA, addr, 806 bdf)); 807 } 808