1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * sun4v Fire Error Handling 27 */ 28 29 #include <sys/types.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/sunndi.h> 33 #include <sys/fm/protocol.h> 34 #include <sys/fm/util.h> 35 #include <sys/membar.h> 36 #include "px_obj.h" 37 #include "px_err.h" 38 39 static void px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, 40 px_rc_err_t *epkt); 41 static uint_t px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt); 42 static int px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, 43 px_rc_err_t *epkt, pf_data_t *pfd_p); 44 45 static void px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, 46 boolean_t is_block_pci, char *msg); 47 static void px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt, 48 boolean_t is_block_pci, int err, ddi_fm_error_t *derr, 49 boolean_t is_valid_epkt); 50 static int px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 51 px_rc_err_t *epkt, pf_data_t *pfd_p); 52 static int px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 53 px_rc_err_t *epkt, pf_data_t *pfd_p); 54 static int px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 55 px_rc_err_t *epkt, pf_data_t *pfd_p); 56 static int px_port_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 57 px_rc_err_t *epkt, pf_data_t *pfd_p); 58 static int px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, 59 px_rc_err_t *epkt, pf_data_t *pfd_p); 60 static int px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, 61 px_rc_err_t *epkt, pf_data_t *pfd_p); 62 static int px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, 63 px_rc_err_t *epkt, pf_data_t *pfd_p); 64 static void px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, 65 px_rc_err_t *epkt); 66 static int px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, 67 px_rc_err_t *epkt); 68 69 /* Include the code generated sun4v epkt checking code */ 70 #include "px_err_gen.c" 71 72 /* 73 * This variable indicates if we have a hypervisor that could potentially send 74 * incorrect epkts. We always set this to TRUE for now until we find a way to 75 * tell if this HV bug has been fixed. 76 */ 77 boolean_t px_legacy_epkt = B_TRUE; 78 79 /* 80 * px_err_cb_intr: 81 * Interrupt handler for the Host Bus Block. 82 */ 83 uint_t 84 px_err_cb_intr(caddr_t arg) 85 { 86 px_fault_t *fault_p = (px_fault_t *)arg; 87 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 88 89 if (epkt != NULL) { 90 return (px_err_intr(fault_p, epkt)); 91 } 92 93 return (DDI_INTR_UNCLAIMED); 94 } 95 96 /* 97 * px_err_dmc_pec_intr: 98 * Interrupt handler for the DMC/PEC block. 99 */ 100 uint_t 101 px_err_dmc_pec_intr(caddr_t arg) 102 { 103 px_fault_t *fault_p = (px_fault_t *)arg; 104 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 105 106 if (epkt != NULL) { 107 return (px_err_intr(fault_p, epkt)); 108 } 109 110 return (DDI_INTR_UNCLAIMED); 111 } 112 113 /* 114 * px_err_cmn_intr: 115 * Common function called by trap, mondo and fabric intr. 116 * This function is more meaningful in sun4u implementation. Kept 117 * to mirror sun4u call stack. 118 * o check for safe access 119 * o create and queue RC info for later use in fabric scan. 120 * o RUC/WUC, PTLP, MMU Errors(CA), UR 121 * 122 * @param px_p leaf in which to check access 123 * @param derr fm err data structure to be updated 124 * @param caller PX_TRAP_CALL | PX_INTR_CALL 125 * @param chkjbc whether to handle hostbus registers (ignored) 126 * @return err PX_NO_PANIC | PX_PROTECTED | 127 * PX_PANIC | PX_HW_RESET | PX_EXPECTED 128 */ 129 /* ARGSUSED */ 130 int 131 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) 132 { 133 px_err_safeacc_check(px_p, derr); 134 return (PX_NO_ERROR); 135 } 136 137 /* 138 * fills RC specific fault data 139 */ 140 static void 141 px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, px_rc_err_t *epkt) { 142 pf_pcie_adv_err_regs_t adv_reg; 143 pcie_req_id_t fault_bdf = PCIE_INVALID_BDF; 144 uint64_t fault_addr = 0; 145 uint16_t s_status = 0; 146 px_pec_err_t *pec_p; 147 uint32_t dir; 148 149 /* Add an PCIE PF_DATA Entry */ 150 switch (epkt->rc_descr.block) { 151 case BLOCK_MMU: 152 /* Only PIO Fault Addresses are valid, this is DMA */ 153 s_status = PCI_STAT_S_TARG_AB; 154 fault_addr = 0; 155 156 if (epkt->rc_descr.H) { 157 fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16); 158 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 159 PF_AFFECTED_BDF; 160 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = 161 fault_bdf; 162 } 163 break; 164 case BLOCK_PCIE: 165 pec_p = (px_pec_err_t *)epkt; 166 dir = pec_p->pec_descr.dir; 167 168 /* translate RC UR/CA to legacy secondary errors */ 169 if ((dir == DIR_READ || dir == DIR_WRITE) && 170 pec_p->pec_descr.U) { 171 if (pec_p->ue_reg_status & PCIE_AER_UCE_UR) 172 s_status |= PCI_STAT_R_MAST_AB; 173 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) 174 s_status |= PCI_STAT_R_TARG_AB; 175 } 176 177 if (pec_p->ue_reg_status & PCIE_AER_UCE_PTLP) 178 s_status |= PCI_STAT_PERROR; 179 180 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) 181 s_status |= PCI_STAT_S_TARG_AB; 182 183 if (pec_p->pec_descr.H) { 184 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0] >>32); 185 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0]); 186 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1] >>32); 187 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1]); 188 189 if (pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg) == 190 DDI_SUCCESS) { 191 fault_bdf = adv_reg.pcie_ue_tgt_bdf; 192 fault_addr = adv_reg.pcie_ue_tgt_addr; 193 /* 194 * affected BDF is to be filled in by 195 * px_scan_fabric 196 */ 197 } 198 } 199 break; 200 case BLOCK_HOSTBUS: 201 case BLOCK_INTR: 202 case BLOCK_PORT: 203 /* 204 * If the affected device information is available then we 205 * add the affected_bdf to the pfd, so the affected device 206 * will be scanned and added to the error q. This will then 207 * go through the pciev_eh code path and forgive the error 208 * as needed. 209 */ 210 if (PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags == 211 PF_AFFECTED_BDF) 212 fault_bdf = PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf; 213 214 break; 215 default: 216 break; 217 } 218 219 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = fault_bdf; 220 PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)fault_addr; 221 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status; 222 } 223 224 /* 225 * Convert error severity from PX internal values to PCIe Fabric values. Most 226 * are self explanitory, except PX_PROTECTED. PX_PROTECTED will never be 227 * returned as is if forgivable. 228 */ 229 static int 230 px_err_to_fab_sev(int *rc_err) { 231 int fab_err = 0; 232 233 if (*rc_err & px_die) { 234 /* 235 * Let fabric scan decide the final severity of the error. 236 * This is needed incase IOV code needs to forgive the error. 237 */ 238 *rc_err = PX_FABRIC_SCAN; 239 fab_err |= PF_ERR_PANIC; 240 } 241 242 if (*rc_err & (PX_EXPECTED | PX_NO_PANIC)) 243 fab_err |= PF_ERR_NO_PANIC; 244 245 if (*rc_err & PX_NO_ERROR) 246 fab_err |= PF_ERR_NO_ERROR; 247 248 return (fab_err); 249 } 250 251 /* 252 * px_err_intr: 253 * Interrupt handler for the JBC/DMC/PEC block. 254 * o lock 255 * o create derr 256 * o check safe access 257 * o px_err_check_severity(epkt) 258 * o pcie_scan_fabric 259 * o Idle intr state 260 * o unlock 261 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 262 */ 263 static uint_t 264 px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt) 265 { 266 px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip); 267 dev_info_t *rpdip = px_p->px_dip; 268 int rc_err, tmp_rc_err, fab_err, msg; 269 ddi_fm_error_t derr; 270 pf_data_t *pfd_p; 271 272 if (px_fm_enter(px_p) != DDI_SUCCESS) 273 goto done; 274 275 pfd_p = px_get_pfd(px_p); 276 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_INTERNAL; 277 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = epkt; 278 279 /* Create the derr */ 280 bzero(&derr, sizeof (ddi_fm_error_t)); 281 derr.fme_version = DDI_FME_VERSION; 282 derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1); 283 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 284 285 /* Basically check for safe access */ 286 (void) px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_ALL); 287 288 /* Check the severity of this error */ 289 rc_err = px_err_epkt_severity(px_p, &derr, epkt, pfd_p); 290 291 /* Pass the 'rc_err' severity to the fabric scan code. */ 292 tmp_rc_err = rc_err; 293 pfd_p->pe_severity_flags = px_err_to_fab_sev(&rc_err); 294 295 /* Scan the fabric */ 296 if (!(fab_err = px_scan_fabric(px_p, rpdip, &derr))) { 297 /* 298 * Fabric scan didn't occur because of some error condition 299 * such as Root Port being in drain state, so reset rc_err. 300 */ 301 rc_err = tmp_rc_err; 302 } 303 304 /* Set the intr state to idle for the leaf that received the mondo */ 305 if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino, 306 INTR_IDLE_STATE) != DDI_SUCCESS) { 307 px_fm_exit(px_p); 308 return (DDI_INTR_UNCLAIMED); 309 } 310 311 switch (epkt->rc_descr.block) { 312 case BLOCK_MMU: /* FALLTHROUGH */ 313 case BLOCK_INTR: 314 msg = PX_RC; 315 break; 316 case BLOCK_PCIE: 317 msg = PX_RP; 318 break; 319 case BLOCK_HOSTBUS: /* FALLTHROUGH */ 320 default: 321 msg = PX_HB; 322 break; 323 } 324 325 px_err_panic(rc_err, msg, fab_err, B_TRUE); 326 px_fm_exit(px_p); 327 px_err_panic(rc_err, msg, fab_err, B_FALSE); 328 329 done: 330 return (DDI_INTR_CLAIMED); 331 } 332 333 /* 334 * px_err_epkt_severity: 335 * Check the severity of the fire error based the epkt received 336 * 337 * @param px_p leaf in which to take the snap shot. 338 * @param derr fm err in which the ereport is to be based on 339 * @param epkt epkt recevied from HV 340 */ 341 static int 342 px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, 343 pf_data_t *pfd_p) 344 { 345 px_pec_t *pec_p = px_p->px_pec_p; 346 dev_info_t *dip = px_p->px_dip; 347 boolean_t is_safeacc = B_FALSE; 348 boolean_t is_block_pci = B_FALSE; 349 boolean_t is_valid_epkt = B_FALSE; 350 int err = 0; 351 352 /* Cautious access error handling */ 353 switch (derr->fme_flag) { 354 case DDI_FM_ERR_EXPECTED: 355 /* 356 * For ddi_caut_put treat all events as nonfatal. Here 357 * we have the handle and can call ndi_fm_acc_err_set(). 358 */ 359 derr->fme_status = DDI_FM_NONFATAL; 360 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 361 is_safeacc = B_TRUE; 362 break; 363 case DDI_FM_ERR_PEEK: 364 case DDI_FM_ERR_POKE: 365 /* 366 * For ddi_peek/poke treat all events as nonfatal. 367 */ 368 is_safeacc = B_TRUE; 369 break; 370 default: 371 is_safeacc = B_FALSE; 372 } 373 374 /* 375 * Older hypervisors in some cases send epkts with incorrect fields. 376 * We have to handle these "special" epkts correctly. 377 */ 378 if (px_legacy_epkt) 379 px_fix_legacy_epkt(dip, derr, epkt); 380 381 /* 382 * The affected device by default is set to 'SELF'. The 'block' 383 * specific error handling below will update this as needed. 384 */ 385 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; 386 387 switch (epkt->rc_descr.block) { 388 case BLOCK_HOSTBUS: 389 err = px_cb_epkt_severity(dip, derr, epkt, pfd_p); 390 break; 391 case BLOCK_MMU: 392 err = px_mmu_epkt_severity(dip, derr, epkt, pfd_p); 393 break; 394 case BLOCK_INTR: 395 err = px_intr_epkt_severity(dip, derr, epkt, pfd_p); 396 break; 397 case BLOCK_PORT: 398 err = px_port_epkt_severity(dip, derr, epkt, pfd_p); 399 break; 400 case BLOCK_PCIE: 401 is_block_pci = B_TRUE; 402 err = px_pcie_epkt_severity(dip, derr, epkt, pfd_p); 403 break; 404 default: 405 err = 0; 406 } 407 408 px_err_fill_pfd(dip, pfd_p, epkt); 409 410 if ((err & PX_HW_RESET) || (err & PX_PANIC)) { 411 if (px_log & PX_PANIC) 412 px_err_log_handle(dip, epkt, is_block_pci, "PANIC"); 413 is_valid_epkt = B_TRUE; 414 } else if (err & PX_PROTECTED) { 415 if (px_log & PX_PROTECTED) 416 px_err_log_handle(dip, epkt, is_block_pci, "PROTECTED"); 417 is_valid_epkt = B_TRUE; 418 } else if (err & PX_NO_PANIC) { 419 if (px_log & PX_NO_PANIC) 420 px_err_log_handle(dip, epkt, is_block_pci, "NO PANIC"); 421 is_valid_epkt = B_TRUE; 422 } else if (err & PX_NO_ERROR) { 423 if (px_log & PX_NO_ERROR) 424 px_err_log_handle(dip, epkt, is_block_pci, "NO ERROR"); 425 is_valid_epkt = B_TRUE; 426 } else if (err == 0) { 427 px_err_log_handle(dip, epkt, is_block_pci, "UNRECOGNIZED"); 428 is_valid_epkt = B_FALSE; 429 430 /* Panic on a unrecognized epkt */ 431 err = PX_PANIC; 432 } 433 434 px_err_send_epkt_erpt(dip, epkt, is_block_pci, err, derr, 435 is_valid_epkt); 436 437 /* Readjust the severity as a result of safe access */ 438 if (is_safeacc && !(err & PX_PANIC) && !(px_die & PX_PROTECTED)) 439 err = PX_NO_PANIC; 440 441 return (err); 442 } 443 444 static void 445 px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt, 446 boolean_t is_block_pci, int err, ddi_fm_error_t *derr, 447 boolean_t is_valid_epkt) 448 { 449 char buf[FM_MAX_CLASS], descr_buf[1024]; 450 451 /* send ereport for debug purposes */ 452 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_RC_UNRECOG); 453 454 if (is_block_pci) { 455 px_pec_err_t *pec = (px_pec_err_t *)epkt; 456 (void) snprintf(descr_buf, sizeof (descr_buf), 457 "%s Epkt contents:\n" 458 "Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n" 459 "I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n" 460 "PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n" 461 "CE Status Reg: 0x%x, UE Status Reg: 0x%x\n" 462 "HDR1: 0x%lx, HDR2: 0x%lx\n" 463 "Err Src Reg: 0x%x, Root Err Status: 0x%x\n" 464 "Err Severity: 0x%x\n", 465 is_valid_epkt ? "Valid" : "Invalid", 466 pec->pec_descr.block, pec->pec_descr.dir, 467 pec->pec_descr.Z, pec->pec_descr.S, 468 pec->pec_descr.R, pec->pec_descr.I, 469 pec->pec_descr.H, pec->pec_descr.C, 470 pec->pec_descr.U, pec->pec_descr.E, 471 pec->pec_descr.P, pec->pci_err_status, 472 pec->pcie_err_status, pec->ce_reg_status, 473 pec->ue_reg_status, pec->hdr[0], 474 pec->hdr[1], pec->err_src_reg, 475 pec->root_err_status, err); 476 477 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 478 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 479 EPKT_SYSINO, DATA_TYPE_UINT64, 480 is_valid_epkt ? pec->sysino : 0, 481 EPKT_EHDL, DATA_TYPE_UINT64, 482 is_valid_epkt ? pec->ehdl : 0, 483 EPKT_STICK, DATA_TYPE_UINT64, 484 is_valid_epkt ? pec->stick : 0, 485 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)pec)[3], 486 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)pec)[4], 487 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)pec)[5], 488 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)pec)[6], 489 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)pec)[7], 490 EPKT_PEC_DESCR, DATA_TYPE_STRING, descr_buf); 491 } else { 492 (void) snprintf(descr_buf, sizeof (descr_buf), 493 "%s Epkt contents:\n" 494 "Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" 495 "Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n" 496 "M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n" 497 "Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n" 498 "Err Severity: 0x%x\n", 499 is_valid_epkt ? "Valid" : "Invalid", 500 epkt->rc_descr.block, epkt->rc_descr.op, 501 epkt->rc_descr.phase, epkt->rc_descr.cond, 502 epkt->rc_descr.dir, epkt->rc_descr.STOP, 503 epkt->rc_descr.H, epkt->rc_descr.R, 504 epkt->rc_descr.D, epkt->rc_descr.M, 505 epkt->rc_descr.S, epkt->size, epkt->addr, 506 epkt->hdr[0], epkt->hdr[1], epkt->reserved, 507 err); 508 509 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 510 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 511 EPKT_SYSINO, DATA_TYPE_UINT64, 512 is_valid_epkt ? epkt->sysino : 0, 513 EPKT_EHDL, DATA_TYPE_UINT64, 514 is_valid_epkt ? epkt->ehdl : 0, 515 EPKT_STICK, DATA_TYPE_UINT64, 516 is_valid_epkt ? epkt->stick : 0, 517 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)epkt)[3], 518 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)epkt)[4], 519 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)epkt)[5], 520 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)epkt)[6], 521 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)epkt)[7], 522 EPKT_RC_DESCR, DATA_TYPE_STRING, descr_buf); 523 } 524 } 525 526 static void 527 px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, boolean_t is_block_pci, 528 char *msg) 529 { 530 if (is_block_pci) { 531 px_pec_err_t *pec = (px_pec_err_t *)epkt; 532 DBG(DBG_ERR_INTR, dip, 533 "A PCIe root port error has occured with a severity" 534 " \"%s\"\n" 535 "\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n" 536 "\tH=%d, C=%d, U=%d, E=%d, P=%d\n" 537 "\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n" 538 "\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n" 539 "\terr_src: 0x%x, root_err: 0x%x\n", 540 msg, pec->pec_descr.block, pec->pec_descr.dir, 541 pec->pec_descr.Z, pec->pec_descr.S, pec->pec_descr.R, 542 pec->pec_descr.I, pec->pec_descr.H, pec->pec_descr.C, 543 pec->pec_descr.U, pec->pec_descr.E, pec->pec_descr.P, 544 pec->pci_err_status, pec->pcie_err_status, 545 pec->ce_reg_status, pec->ue_reg_status, pec->hdr[0], 546 pec->hdr[1], pec->err_src_reg, pec->root_err_status); 547 } else { 548 DBG(DBG_ERR_INTR, dip, 549 "A PCIe root complex error has occured with a severity" 550 " \"%s\"\n" 551 "\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" 552 "\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n" 553 "\tS=%d, Size: 0x%x, Addr: 0x%p\n" 554 "\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n", 555 msg, epkt->rc_descr.block, epkt->rc_descr.op, 556 epkt->rc_descr.phase, epkt->rc_descr.cond, 557 epkt->rc_descr.dir, epkt->rc_descr.STOP, epkt->rc_descr.H, 558 epkt->rc_descr.R, epkt->rc_descr.D, epkt->rc_descr.M, 559 epkt->rc_descr.S, epkt->size, epkt->addr, epkt->hdr[0], 560 epkt->hdr[1], epkt->reserved); 561 } 562 } 563 564 /* ARGSUSED */ 565 static void 566 px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 567 { 568 /* 569 * We don't have a default case for any of the below switch statements 570 * since we are ok with the code falling through. 571 */ 572 switch (epkt->rc_descr.block) { 573 case BLOCK_HOSTBUS: 574 switch (epkt->rc_descr.op) { 575 case OP_DMA: 576 switch (epkt->rc_descr.phase) { 577 case PH_UNKNOWN: 578 switch (epkt->rc_descr.cond) { 579 case CND_UNKNOWN: 580 switch (epkt->rc_descr.dir) { 581 case DIR_RESERVED: 582 epkt->rc_descr.dir = DIR_READ; 583 break; 584 } /* DIR */ 585 } /* CND */ 586 } /* PH */ 587 } /* OP */ 588 break; 589 case BLOCK_MMU: 590 switch (epkt->rc_descr.op) { 591 case OP_XLAT: 592 switch (epkt->rc_descr.phase) { 593 case PH_DATA: 594 switch (epkt->rc_descr.cond) { 595 case CND_PROT: 596 switch (epkt->rc_descr.dir) { 597 case DIR_UNKNOWN: 598 epkt->rc_descr.dir = DIR_WRITE; 599 break; 600 } /* DIR */ 601 } /* CND */ 602 break; 603 case PH_IRR: 604 switch (epkt->rc_descr.cond) { 605 case CND_RESERVED: 606 switch (epkt->rc_descr.dir) { 607 case DIR_IRR: 608 epkt->rc_descr.phase = PH_ADDR; 609 epkt->rc_descr.cond = CND_IRR; 610 } /* DIR */ 611 } /* CND */ 612 } /* PH */ 613 } /* OP */ 614 break; 615 case BLOCK_INTR: 616 switch (epkt->rc_descr.op) { 617 case OP_MSIQ: 618 switch (epkt->rc_descr.phase) { 619 case PH_UNKNOWN: 620 switch (epkt->rc_descr.cond) { 621 case CND_ILL: 622 switch (epkt->rc_descr.dir) { 623 case DIR_RESERVED: 624 epkt->rc_descr.dir = DIR_IRR; 625 break; 626 } /* DIR */ 627 break; 628 case CND_IRR: 629 switch (epkt->rc_descr.dir) { 630 case DIR_IRR: 631 epkt->rc_descr.cond = CND_OV; 632 break; 633 } /* DIR */ 634 } /* CND */ 635 } /* PH */ 636 break; 637 case OP_RESERVED: 638 switch (epkt->rc_descr.phase) { 639 case PH_UNKNOWN: 640 switch (epkt->rc_descr.cond) { 641 case CND_ILL: 642 switch (epkt->rc_descr.dir) { 643 case DIR_IRR: 644 epkt->rc_descr.op = OP_MSI32; 645 epkt->rc_descr.phase = PH_DATA; 646 break; 647 } /* DIR */ 648 } /* CND */ 649 break; 650 case PH_DATA: 651 switch (epkt->rc_descr.cond) { 652 case CND_INT: 653 switch (epkt->rc_descr.dir) { 654 case DIR_UNKNOWN: 655 epkt->rc_descr.op = OP_MSI32; 656 break; 657 } /* DIR */ 658 } /* CND */ 659 } /* PH */ 660 } /* OP */ 661 } /* BLOCK */ 662 } 663 664 /* ARGSUSED */ 665 static int 666 px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt, 667 pf_data_t *pfd_p) 668 { 669 return (px_err_check_eq(dip)); 670 } 671 672 /* ARGSUSED */ 673 static int 674 px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt, 675 pf_data_t *pfd_p) 676 { 677 pf_pcie_adv_err_regs_t adv_reg; 678 uint16_t s_status; 679 int sts = PX_PANIC; 680 681 /* 682 * Check for failed non-posted writes, which are errors that are not 683 * defined in the PCIe spec. If not return panic. 684 */ 685 if (!((epkt->rc_descr.op == OP_PIO) && 686 (epkt->rc_descr.phase == PH_IRR))) { 687 sts = (PX_PANIC); 688 goto done; 689 } 690 691 /* 692 * Gather the error logs, if they do not exist just return with no panic 693 * and let the fabric message take care of the error. 694 */ 695 if (!epkt->rc_descr.H) { 696 sts = (PX_NO_PANIC); 697 goto done; 698 } 699 700 adv_reg.pcie_ue_hdr[0] = (uint32_t)(epkt->hdr[0] >> 32); 701 adv_reg.pcie_ue_hdr[1] = (uint32_t)(epkt->hdr[0]); 702 adv_reg.pcie_ue_hdr[2] = (uint32_t)(epkt->hdr[1] >> 32); 703 adv_reg.pcie_ue_hdr[3] = (uint32_t)(epkt->hdr[1]); 704 705 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 706 707 if (epkt->rc_descr.M) 708 adv_reg.pcie_ue_tgt_addr = epkt->addr; 709 710 if (!((sts == DDI_SUCCESS) || (epkt->rc_descr.M))) { 711 /* Let the fabric message take care of error */ 712 sts = PX_NO_PANIC; 713 goto done; 714 } 715 716 /* See if the failed transaction belonged to a hardened driver */ 717 if (pf_hdl_lookup(dip, derr->fme_ena, 718 adv_reg.pcie_ue_tgt_trans, adv_reg.pcie_ue_tgt_addr, 719 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND) 720 sts = (PX_NO_PANIC); 721 else 722 sts = (PX_PANIC); 723 724 /* Add pfd to cause a fabric scan */ 725 switch (epkt->rc_descr.cond) { 726 case CND_RCA: 727 s_status = PCI_STAT_R_TARG_AB; 728 break; 729 case CND_RUR: 730 s_status = PCI_STAT_R_MAST_AB; 731 break; 732 } 733 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = adv_reg.pcie_ue_tgt_bdf; 734 PCIE_ROOT_FAULT(pfd_p)->scan_addr = adv_reg.pcie_ue_tgt_addr; 735 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status; 736 737 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_BDF; 738 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = adv_reg.pcie_ue_tgt_bdf; 739 740 done: 741 return (sts); 742 } 743 744 /* ARGSUSED */ 745 static int 746 px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt, 747 pf_data_t *pfd_p) 748 { 749 px_pec_err_t *pec_p = (px_pec_err_t *)epkt; 750 px_err_pcie_t *pcie = (px_err_pcie_t *)epkt; 751 pf_pcie_adv_err_regs_t adv_reg; 752 int sts; 753 uint32_t temp; 754 755 /* 756 * Check for failed PIO Read/Writes, which are errors that are not 757 * defined in the PCIe spec. 758 */ 759 760 temp = PCIE_AER_UCE_UR | PCIE_AER_UCE_CA; 761 if (((pec_p->pec_descr.dir == DIR_READ) || 762 (pec_p->pec_descr.dir == DIR_WRITE)) && 763 pec_p->pec_descr.U && (pec_p->ue_reg_status & temp)) { 764 765 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0] >> 32); 766 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0]); 767 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1] >> 32); 768 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1]); 769 770 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); 771 772 if (sts == DDI_SUCCESS && 773 pf_hdl_lookup(dip, derr->fme_ena, 774 adv_reg.pcie_ue_tgt_trans, 775 adv_reg.pcie_ue_tgt_addr, 776 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND) 777 return (PX_NO_PANIC); 778 else 779 return (PX_PANIC); 780 } 781 782 if (!pec_p->pec_descr.C) 783 pec_p->ce_reg_status = 0; 784 if (!pec_p->pec_descr.U) 785 pec_p->ue_reg_status = 0; 786 if (!pec_p->pec_descr.H) 787 pec_p->hdr[0] = 0; 788 if (!pec_p->pec_descr.I) 789 pec_p->hdr[1] = 0; 790 791 /* 792 * According to the PCIe spec, there is a first error pointer. If there 793 * are header logs recorded and there are more than one error, the log 794 * will belong to the error that the first error pointer points to. 795 * 796 * The regs.primary_ue expects a bit number, go through the ue register 797 * and find the first error that occured. Because the sun4v epkt spec 798 * does not define this value, the algorithm below gives the lower bit 799 * priority. 800 */ 801 temp = pcie->ue_reg; 802 if (temp) { 803 int x; 804 for (x = 0; !(temp & 0x1); x++) { 805 temp = temp >> 1; 806 } 807 pcie->primary_ue = 1 << x; 808 } else { 809 pcie->primary_ue = 0; 810 } 811 812 /* Sun4v doesn't log the TX hdr except for CTOs */ 813 if (pcie->primary_ue == PCIE_AER_UCE_TO) { 814 pcie->tx_hdr1 = pcie->rx_hdr1; 815 pcie->tx_hdr2 = pcie->rx_hdr2; 816 pcie->tx_hdr3 = pcie->rx_hdr3; 817 pcie->tx_hdr4 = pcie->rx_hdr4; 818 pcie->rx_hdr1 = 0; 819 pcie->rx_hdr2 = 0; 820 pcie->rx_hdr3 = 0; 821 pcie->rx_hdr4 = 0; 822 } else { 823 pcie->tx_hdr1 = 0; 824 pcie->tx_hdr2 = 0; 825 pcie->tx_hdr3 = 0; 826 pcie->tx_hdr4 = 0; 827 } 828 829 return (px_err_check_pcie(dip, derr, pcie, PF_INTR_TYPE_INTERNAL)); 830 } 831 832 static int 833 px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) 834 { 835 uint64_t addr = (uint64_t)epkt->addr; 836 pcie_req_id_t bdf = PCIE_INVALID_BDF; 837 838 if (epkt->rc_descr.H) { 839 bdf = (uint32_t)((epkt->hdr[0] >> 16) & 0xFFFF); 840 } 841 842 return (pf_hdl_lookup(dip, derr->fme_ena, PF_ADDR_DMA, addr, 843 bdf)); 844 } 845