1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * PX Fault Management Architecture 30 */ 31 #include <sys/types.h> 32 #include <sys/sunndi.h> 33 #include <sys/sunddi.h> 34 #include <sys/fm/protocol.h> 35 #include <sys/fm/util.h> 36 #include <sys/membar.h> 37 #include "px_obj.h" 38 39 typedef struct px_fabric_cfgspace { 40 /* Error information */ 41 msgcode_t msg_code; 42 pcie_req_id_t rid; 43 44 /* Config space header and device type */ 45 uint8_t hdr_type; 46 uint16_t dev_type; 47 48 /* Register pointers */ 49 uint16_t cap_off; 50 uint16_t aer_off; 51 52 /* PCI register values */ 53 uint32_t sts_reg; 54 uint32_t sts_sreg; 55 56 /* PCIE register values */ 57 uint32_t dev_sts_reg; 58 uint32_t aer_ce_reg; 59 uint32_t aer_ue_reg; 60 uint32_t aer_sev_reg; 61 uint32_t aer_ue_sreg; 62 uint32_t aer_sev_sreg; 63 64 /* PCIE Header Log Registers */ 65 uint32_t aer_h1; 66 uint32_t aer_h2; 67 uint32_t aer_h3; 68 uint32_t aer_h4; 69 uint32_t aer_sh1; 70 uint32_t aer_sh2; 71 uint32_t aer_sh3; 72 uint32_t aer_sh4; 73 } px_fabric_cfgspace_t; 74 75 static uint16_t px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid); 76 static uint16_t px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid); 77 static int px_fabric_handle_psts(px_fabric_cfgspace_t *cs); 78 static int px_fabric_handle_ssts(px_fabric_cfgspace_t *cs); 79 static int px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs); 80 static int px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs); 81 static int px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs); 82 static void px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs); 83 static uint_t px_fabric_check(px_t *px_p, msgcode_t msg_code, 84 pcie_req_id_t rid, ddi_fm_error_t *derr); 85 86 /* 87 * Initialize px FMA support 88 */ 89 int 90 px_fm_attach(px_t *px_p) 91 { 92 px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | 93 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; 94 95 /* 96 * check parents' capability 97 */ 98 ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc); 99 100 /* 101 * parents need to be ereport and error handling capable 102 */ 103 ASSERT(px_p->px_fm_cap && 104 (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE)); 105 106 /* 107 * register error callback in parent 108 */ 109 ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p); 110 111 return (DDI_SUCCESS); 112 } 113 114 /* 115 * Deregister FMA 116 */ 117 void 118 px_fm_detach(px_t *px_p) 119 { 120 ddi_fm_handler_unregister(px_p->px_dip); 121 ddi_fm_fini(px_p->px_dip); 122 } 123 124 /* 125 * Function used to setup access functions depending on level of desired 126 * protection. 127 */ 128 void 129 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip) 130 { 131 uchar_t fflag; 132 ddi_acc_hdl_t *hp; 133 ddi_acc_impl_t *ap; 134 135 hp = mp->map_handlep; 136 ap = (ddi_acc_impl_t *)hp->ah_platform_private; 137 fflag = ap->ahi_common.ah_acc.devacc_attr_access; 138 139 if (mp->map_op == DDI_MO_MAP_LOCKED) { 140 ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL); 141 switch (fflag) { 142 case DDI_FLAGERR_ACC: 143 ap->ahi_get8 = i_ddi_prot_get8; 144 ap->ahi_get16 = i_ddi_prot_get16; 145 ap->ahi_get32 = i_ddi_prot_get32; 146 ap->ahi_get64 = i_ddi_prot_get64; 147 ap->ahi_put8 = i_ddi_prot_put8; 148 ap->ahi_put16 = i_ddi_prot_put16; 149 ap->ahi_put32 = i_ddi_prot_put32; 150 ap->ahi_put64 = i_ddi_prot_put64; 151 ap->ahi_rep_get8 = i_ddi_prot_rep_get8; 152 ap->ahi_rep_get16 = i_ddi_prot_rep_get16; 153 ap->ahi_rep_get32 = i_ddi_prot_rep_get32; 154 ap->ahi_rep_get64 = i_ddi_prot_rep_get64; 155 ap->ahi_rep_put8 = i_ddi_prot_rep_put8; 156 ap->ahi_rep_put16 = i_ddi_prot_rep_put16; 157 ap->ahi_rep_put32 = i_ddi_prot_rep_put32; 158 ap->ahi_rep_put64 = i_ddi_prot_rep_put64; 159 break; 160 case DDI_CAUTIOUS_ACC : 161 ap->ahi_get8 = i_ddi_caut_get8; 162 ap->ahi_get16 = i_ddi_caut_get16; 163 ap->ahi_get32 = i_ddi_caut_get32; 164 ap->ahi_get64 = i_ddi_caut_get64; 165 ap->ahi_put8 = i_ddi_caut_put8; 166 ap->ahi_put16 = i_ddi_caut_put16; 167 ap->ahi_put32 = i_ddi_caut_put32; 168 ap->ahi_put64 = i_ddi_caut_put64; 169 ap->ahi_rep_get8 = i_ddi_caut_rep_get8; 170 ap->ahi_rep_get16 = i_ddi_caut_rep_get16; 171 ap->ahi_rep_get32 = i_ddi_caut_rep_get32; 172 ap->ahi_rep_get64 = i_ddi_caut_rep_get64; 173 ap->ahi_rep_put8 = i_ddi_caut_rep_put8; 174 ap->ahi_rep_put16 = i_ddi_caut_rep_put16; 175 ap->ahi_rep_put32 = i_ddi_caut_rep_put32; 176 ap->ahi_rep_put64 = i_ddi_caut_rep_put64; 177 break; 178 default: 179 break; 180 } 181 } else if (mp->map_op == DDI_MO_UNMAP) { 182 ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp); 183 } 184 } 185 186 /* 187 * Function called after a dma fault occurred to find out whether the 188 * fault address is associated with a driver that is able to handle faults 189 * and recover from faults. The driver has to set DDI_DMA_FLAGERR and 190 * cache dma handles in order to make this checking effective to help 191 * recovery from dma faults. 192 */ 193 /* ARGSUSED */ 194 static int 195 px_dma_check(dev_info_t *dip, const void *handle, const void *comp_addr, 196 const void *not_used) 197 { 198 ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle; 199 pfn_t fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 200 pfn_t comp_pfn; 201 int page; 202 203 /* 204 * Assertion failure if DDI_FM_DMACHK_CAPABLE capability has not 205 * been effectively initialized during attach. 206 */ 207 ASSERT(mp); 208 209 for (page = 0; page < mp->dmai_ndvmapages; page++) { 210 comp_pfn = PX_GET_MP_PFN(mp, page); 211 if (fault_pfn == comp_pfn) 212 return (DDI_FM_NONFATAL); 213 } 214 215 return (DDI_FM_UNKNOWN); 216 } 217 218 /* 219 * Function used to check if a given access handle owns the failing address. 220 * Called by ndi_fmc_error, when we detect a PIO error. 221 */ 222 /* ARGSUSED */ 223 static int 224 px_acc_check(dev_info_t *dip, const void *handle, const void *comp_addr, 225 const void *not_used) 226 { 227 pfn_t pfn, fault_pfn; 228 ddi_acc_hdl_t *hp = impl_acc_hdl_get((ddi_acc_handle_t)handle); 229 230 /* 231 * Assertion failure if DDI_FM_ACCCHK_CAPABLE capability has not 232 * been effectively initialized during attach. 233 */ 234 ASSERT(hp); 235 236 pfn = hp->ah_pfn; 237 fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 238 if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum)) 239 return (DDI_FM_NONFATAL); 240 241 return (DDI_FM_UNKNOWN); 242 } 243 244 /* 245 * Function used by PCI error handlers to check if captured address is stored 246 * in the DMA or ACC handle caches. 247 */ 248 int 249 px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar) 250 { 251 uint32_t cap = ((px_t *)DIP_TO_STATE(dip))->px_fm_cap; 252 int ret = DDI_FM_FATAL; 253 254 int (*f)() = type == DMA_HANDLE ? 255 (DDI_FM_DMA_ERR_CAP(cap) ? px_dma_check : NULL) : 256 (DDI_FM_ACC_ERR_CAP(cap) ? px_acc_check : NULL); 257 258 if (f) 259 ret = ndi_fmc_error(dip, NULL, type, f, fme_ena, afar); 260 261 return (ret == DDI_FM_UNKNOWN ? DDI_FM_FATAL : ret); 262 } 263 264 /* 265 * Function used to initialize FMA for our children nodes. Called 266 * through pci busops when child node calls ddi_fm_init. 267 */ 268 /*ARGSUSED*/ 269 int 270 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap, 271 ddi_iblock_cookie_t *ibc_p) 272 { 273 px_t *px_p = DIP_TO_STATE(dip); 274 275 ASSERT(ibc_p != NULL); 276 *ibc_p = px_p->px_fm_ibc; 277 278 return (px_p->px_fm_cap); 279 } 280 281 /* 282 * lock access for exclusive PCIe access 283 */ 284 void 285 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle) 286 { 287 px_pec_t *pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p; 288 289 /* 290 * Exclusive access has been used for cautious put/get, 291 * Both utilize i_ddi_ontrap which, on sparcv9, implements 292 * similar protection as what on_trap() does, and which calls 293 * membar #Sync to flush out all cpu deferred errors 294 * prior to get/put operation, so here we're not calling 295 * membar #Sync - a difference from what's in pci_bus_enter(). 296 */ 297 mutex_enter(&pec_p->pec_pokefault_mutex); 298 pec_p->pec_acc_hdl = handle; 299 } 300 301 /* 302 * unlock access for exclusive PCIe access 303 */ 304 /* ARGSUSED */ 305 void 306 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle) 307 { 308 px_t *px_p = DIP_TO_STATE(dip); 309 px_pec_t *pec_p = px_p->px_pec_p; 310 311 pec_p->pec_acc_hdl = NULL; 312 mutex_exit(&pec_p->pec_pokefault_mutex); 313 } 314 315 316 /* 317 * PCI error callback which is registered with our parent to call 318 * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors 319 * and PCI BERR/TO/UE 320 * 321 * Dispatch on all known leaves of this fire device because we cannot tell 322 * which side the error came from. 323 */ 324 /*ARGSUSED*/ 325 int 326 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) 327 { 328 px_t *px_p = (px_t *)impl_data; 329 px_cb_t *cb_p = px_p->px_cb_p; 330 int err = PX_OK; 331 int fatal = 0; 332 int nonfatal = 0; 333 int unknown = 0; 334 int ret = DDI_FM_OK; 335 int i; 336 337 mutex_enter(&cb_p->xbc_fm_mutex); 338 339 for (i = 0; i < PX_CB_MAX_LEAF; i++) { 340 px_p = cb_p->xbc_px_list[i]; 341 if (px_p != NULL) 342 err |= px_err_handle(px_p, derr, PX_TRAP_CALL, 343 (i == 0)); 344 } 345 346 for (i = 0; i < PX_CB_MAX_LEAF; i++) { 347 px_p = cb_p->xbc_px_list[i]; 348 if (px_p != NULL) { 349 ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr); 350 switch (ret) { 351 case DDI_FM_FATAL: 352 fatal++; 353 break; 354 case DDI_FM_NONFATAL: 355 nonfatal++; 356 break; 357 case DDI_FM_UNKNOWN: 358 unknown++; 359 break; 360 default: 361 break; 362 } 363 } 364 } 365 mutex_exit(&cb_p->xbc_fm_mutex); 366 367 ret = (fatal != 0) ? DDI_FM_FATAL : 368 ((nonfatal != 0) ? DDI_FM_NONFATAL : 369 (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK))); 370 371 /* fire fatal error overrides device error */ 372 if (err & (PX_FATAL_GOS | PX_FATAL_SW)) 373 ret = DDI_FM_FATAL; 374 /* if fire encounts no error, then take whatever device error */ 375 else if ((err != PX_OK) && (ret != DDI_FM_FATAL)) 376 ret = DDI_FM_NONFATAL; 377 378 return (ret); 379 } 380 381 static uint16_t 382 px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid) 383 { 384 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 385 uint16_t offset = PCIE_EXT_CAP; 386 int deadcount = 0; 387 388 /* Find the Advanced Error Register */ 389 hdr = px_fab_get(px_p, rid, offset); 390 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 391 PCIE_EXT_CAP_NEXT_PTR_MASK; 392 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 393 PCIE_EXT_CAP_ID_MASK; 394 395 while ((hdr_next_ptr != PCIE_EXT_CAP_NEXT_PTR_NULL) && 396 (hdr_cap_id != PCIE_EXT_CAP_ID_AER)) { 397 offset = hdr_next_ptr; 398 hdr = px_fab_get(px_p, rid, offset); 399 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 400 PCIE_EXT_CAP_NEXT_PTR_MASK; 401 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 402 PCIE_EXT_CAP_ID_MASK; 403 404 if (deadcount++ > 100) 405 break; 406 } 407 408 if (hdr_cap_id == PCIE_EXT_CAP_ID_AER) 409 return (offset); 410 411 return (0); 412 } 413 414 static uint16_t 415 px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid) 416 { 417 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 418 uint16_t offset = PCI_CONF_STAT; 419 int deadcount = 0; 420 421 hdr = px_fab_get(px_p, rid, PCI_CONF_COMM) >> 16; 422 if (!(hdr & PCI_STAT_CAP)) { 423 /* This is not a PCIE device */ 424 return (0); 425 } 426 427 hdr = px_fab_get(px_p, rid, PCI_CONF_CAP_PTR); 428 hdr_next_ptr = hdr & 0xFF; 429 hdr_cap_id = 0; 430 431 while ((hdr_next_ptr != PCI_CAP_NEXT_PTR_NULL) && 432 (hdr_cap_id != PCI_CAP_ID_PCI_E)) { 433 offset = hdr_next_ptr; 434 435 if (hdr_next_ptr < 0x40) { 436 break; 437 } 438 439 hdr = px_fab_get(px_p, rid, hdr_next_ptr); 440 hdr_next_ptr = (hdr >> 8) & 0xFF; 441 hdr_cap_id = hdr & 0xFF; 442 443 if (deadcount++ > 100) 444 break; 445 } 446 447 if (hdr_cap_id == PCI_CAP_ID_PCI_E) 448 return (offset); 449 450 return (0); 451 } 452 453 /* 454 * This function checks the primary status registers. 455 * Take the PCI status register and translate it to PCIe equivalent. 456 */ 457 static int 458 px_fabric_handle_psts(px_fabric_cfgspace_t *cs) { 459 uint16_t sts_reg = cs->sts_reg >> 16; 460 uint16_t pci_status; 461 uint32_t pcie_status; 462 int ret = PX_NONFATAL; 463 464 /* Parity Err == Send/Recv Poisoned TLP */ 465 pci_status = PCI_STAT_S_PERROR | PCI_STAT_PERROR; 466 pcie_status = PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC; 467 if (sts_reg & pci_status) 468 ret |= PX_FABRIC_ERR_SEV(pcie_status, 469 px_fabric_die_ue, px_fabric_die_ue_gos); 470 471 /* Target Abort == Completer Abort */ 472 pci_status = PCI_STAT_S_TARG_AB | PCI_STAT_R_TARG_AB; 473 pcie_status = PCIE_AER_UCE_CA; 474 if (sts_reg & pci_status) 475 ret |= PX_FABRIC_ERR_SEV(pcie_status, 476 px_fabric_die_ue, px_fabric_die_ue_gos); 477 478 /* Master Abort == Unsupport Request */ 479 pci_status = PCI_STAT_R_MAST_AB; 480 pcie_status = PCIE_AER_UCE_UR; 481 if (sts_reg & pci_status) 482 ret |= PX_FABRIC_ERR_SEV(pcie_status, 483 px_fabric_die_ue, px_fabric_die_ue_gos); 484 485 /* System Error == Uncorrectable Error */ 486 pci_status = PCI_STAT_S_SYSERR; 487 pcie_status = -1; 488 if (sts_reg & pci_status) 489 ret |= PX_FABRIC_ERR_SEV(pcie_status, 490 px_fabric_die_ue, px_fabric_die_ue_gos); 491 492 return (ret); 493 } 494 495 /* 496 * This function checks the secondary status registers. 497 * Switches and Bridges have a different behavior. 498 */ 499 static int 500 px_fabric_handle_ssts(px_fabric_cfgspace_t *cs) { 501 uint16_t sts_reg = cs->sts_sreg >> 16; 502 int ret = PX_NONFATAL; 503 504 if (cs->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { 505 /* 506 * This is a PCIE-PCI bridge, but only check the severity 507 * if this device doesn't support AERs. 508 */ 509 if (!cs->aer_off) 510 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_bdg_sts, 511 px_fabric_die_bdg_sts_gos); 512 } else { 513 /* This is most likely a PCIE switch */ 514 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_sw_sts, 515 px_fabric_die_sw_sts_gos); 516 } 517 518 return (ret); 519 } 520 521 /* 522 * This function checks and clears the primary AER. 523 */ 524 static int 525 px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs) { 526 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 527 int ret = PX_NONFATAL; 528 529 /* Determine severity and clear the AER */ 530 switch (cs->msg_code) { 531 case PCIE_MSG_CODE_ERR_COR: 532 off_reg = PCIE_AER_CE_STS; 533 chk_reg = px_fabric_die_ce; 534 chk_reg_gos = px_fabric_die_ce_gos; 535 reg = cs->aer_ce_reg; 536 break; 537 case PCIE_MSG_CODE_ERR_NONFATAL: 538 off_reg = PCIE_AER_UCE_STS; 539 chk_reg = px_fabric_die_ue; 540 chk_reg_gos = px_fabric_die_ue_gos; 541 reg = cs->aer_ue_reg & ~(cs->aer_sev_reg); 542 break; 543 case PCIE_MSG_CODE_ERR_FATAL: 544 off_reg = PCIE_AER_UCE_STS; 545 chk_reg = px_fabric_die_ue; 546 chk_reg_gos = px_fabric_die_ue_gos; 547 reg = cs->aer_ue_reg & cs->aer_sev_reg; 548 break; 549 default: 550 /* Major error force a panic */ 551 return (PX_FATAL_GOS); 552 } 553 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 554 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 555 556 return (ret); 557 } 558 559 /* 560 * This function checks and clears the secondary AER. 561 */ 562 static int 563 px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs) { 564 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 565 uint32_t sev; 566 int ret = PX_NONFATAL; 567 568 /* Determine severity and clear the AER */ 569 switch (cs->msg_code) { 570 case PCIE_MSG_CODE_ERR_COR: 571 /* Ignore Correctable Errors */ 572 sev = 0; 573 break; 574 case PCIE_MSG_CODE_ERR_NONFATAL: 575 sev = ~(cs->aer_sev_sreg); 576 break; 577 case PCIE_MSG_CODE_ERR_FATAL: 578 sev = cs->aer_sev_sreg; 579 break; 580 default: 581 /* Major error force a panic */ 582 return (DDI_FM_FATAL); 583 } 584 off_reg = PCIE_AER_SUCE_STS; 585 chk_reg = px_fabric_die_sue; 586 chk_reg_gos = px_fabric_die_sue_gos; 587 reg = cs->aer_ue_sreg & sev; 588 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 589 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 590 591 return (ret); 592 } 593 594 static int 595 px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs) 596 { 597 pcie_req_id_t rid = cs->rid; 598 uint16_t cap_off = cs->cap_off; 599 uint16_t aer_off = cs->aer_off; 600 uint8_t hdr_type = cs->hdr_type; 601 uint16_t dev_type = cs->dev_type; 602 int ret = PX_NONFATAL; 603 604 if (hdr_type == PCI_HEADER_PPB) { 605 ret |= px_fabric_handle_ssts(cs); 606 } 607 608 if (!aer_off) { 609 ret |= px_fabric_handle_psts(cs); 610 } 611 612 if (aer_off) { 613 ret |= px_fabric_handle_paer(px_p, cs); 614 } 615 616 if (aer_off && (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)) { 617 ret |= px_fabric_handle_saer(px_p, cs); 618 } 619 620 /* Clear the standard PCIe error registers */ 621 px_fab_set(px_p, rid, cap_off + PCIE_DEVCTL, cs->dev_sts_reg); 622 623 /* Clear the legacy error registers */ 624 px_fab_set(px_p, rid, PCI_CONF_COMM, cs->sts_reg); 625 626 /* Clear the legacy secondary error registers */ 627 if (hdr_type == PCI_HEADER_PPB) { 628 px_fab_set(px_p, rid, PCI_BCNF_IO_BASE_LOW, 629 cs->sts_sreg); 630 } 631 632 return (ret); 633 } 634 635 static void 636 px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs) 637 { 638 uint16_t cap_off, aer_off; 639 pcie_req_id_t rid = cs->rid; 640 641 /* Gather Basic Device Information */ 642 cs->hdr_type = (px_fab_get(px_p, rid, 643 PCI_CONF_CACHE_LINESZ) >> 16) & 0xFF; 644 645 cs->cap_off = px_fabric_get_pciecap(px_p, rid); 646 cap_off = cs->cap_off; 647 if (!cap_off) 648 return; 649 650 cs->aer_off = px_fabric_get_aer(px_p, rid); 651 aer_off = cs->aer_off; 652 653 cs->dev_type = px_fab_get(px_p, rid, cap_off) >> 16; 654 cs->dev_type &= PCIE_PCIECAP_DEV_TYPE_MASK; 655 656 /* Get the Primary Sts Reg */ 657 cs->sts_reg = px_fab_get(px_p, rid, PCI_CONF_COMM); 658 659 /* If it is a bridge/switch get the Secondary Sts Reg */ 660 if (cs->hdr_type == PCI_HEADER_PPB) 661 cs->sts_sreg = px_fab_get(px_p, rid, 662 PCI_BCNF_IO_BASE_LOW); 663 664 /* Get the PCIe Dev Sts Reg */ 665 cs->dev_sts_reg = px_fab_get(px_p, rid, 666 cap_off + PCIE_DEVCTL); 667 668 if (!aer_off) 669 return; 670 671 /* Get the AER register information */ 672 cs->aer_ce_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_CE_STS); 673 cs->aer_ue_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_STS); 674 cs->aer_sev_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_SERV); 675 cs->aer_h1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x0); 676 cs->aer_h2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x4); 677 cs->aer_h3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x8); 678 cs->aer_h4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0xC); 679 680 if (cs->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) 681 return; 682 683 /* If this is a bridge check secondary aer */ 684 cs->aer_ue_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_STS); 685 cs->aer_sev_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_SERV); 686 cs->aer_sh1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x0); 687 cs->aer_sh2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x4); 688 cs->aer_sh3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x8); 689 cs->aer_sh4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0xC); 690 } 691 692 /* 693 * If a fabric intr occurs, query and clear the error registers on that device. 694 * Based on the error found return DDI_FM_OK or DDI_FM_FATAL. 695 */ 696 static uint_t 697 px_fabric_check(px_t *px_p, msgcode_t msg_code, 698 pcie_req_id_t rid, ddi_fm_error_t *derr) 699 { 700 dev_info_t *dip = px_p->px_dip; 701 char buf[FM_MAX_CLASS]; 702 px_fabric_cfgspace_t cs; 703 int ret; 704 705 /* clear cs */ 706 bzero(&cs, sizeof (px_fabric_cfgspace_t)); 707 708 cs.msg_code = msg_code; 709 cs.rid = rid; 710 711 px_fabric_fill_cs(px_p, &cs); 712 if (cs.cap_off) 713 ret = px_fabric_handle(px_p, &cs); 714 else 715 ret = PX_FATAL_GOS; 716 717 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_FABRIC_CLASS); 718 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 719 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 720 PX_FM_FABRIC_MSG_CODE, DATA_TYPE_UINT8, msg_code, 721 PX_FM_FABRIC_REQ_ID, DATA_TYPE_UINT16, rid, 722 "cap_off", DATA_TYPE_UINT16, cs.cap_off, 723 "aer_off", DATA_TYPE_UINT16, cs.aer_off, 724 "sts_reg", DATA_TYPE_UINT16, cs.sts_reg >> 16, 725 "sts_sreg", DATA_TYPE_UINT16, cs.sts_sreg >> 16, 726 "dev_sts_reg", DATA_TYPE_UINT16, cs.dev_sts_reg >> 16, 727 "aer_ce", DATA_TYPE_UINT32, cs.aer_ce_reg, 728 "aer_ue", DATA_TYPE_UINT32, cs.aer_ue_reg, 729 "aer_sev", DATA_TYPE_UINT32, cs.aer_sev_reg, 730 "aer_h1", DATA_TYPE_UINT32, cs.aer_h1, 731 "aer_h2", DATA_TYPE_UINT32, cs.aer_h2, 732 "aer_h3", DATA_TYPE_UINT32, cs.aer_h3, 733 "aer_h4", DATA_TYPE_UINT32, cs.aer_h4, 734 "saer_ue", DATA_TYPE_UINT32, cs.aer_ue_sreg, 735 "saer_sev", DATA_TYPE_UINT32, cs.aer_sev_sreg, 736 "saer_h1", DATA_TYPE_UINT32, cs.aer_sh1, 737 "saer_h2", DATA_TYPE_UINT32, cs.aer_sh2, 738 "saer_h3", DATA_TYPE_UINT32, cs.aer_sh3, 739 "saer_h4", DATA_TYPE_UINT32, cs.aer_sh4, 740 "severity", DATA_TYPE_UINT32, ret, 741 NULL); 742 743 /* Check for protected access */ 744 switch (derr->fme_flag) { 745 case DDI_FM_ERR_EXPECTED: 746 case DDI_FM_ERR_PEEK: 747 case DDI_FM_ERR_POKE: 748 ret &= PX_FATAL_GOS; 749 break; 750 } 751 752 753 if (px_fabric_die && 754 (ret & (PX_FATAL_GOS | PX_FATAL_SW))) 755 ret = DDI_FM_FATAL; 756 757 return (ret); 758 } 759 760 /* 761 * px_err_fabric_intr: 762 * Interrupt handler for PCIE fabric block. 763 * o lock 764 * o create derr 765 * o px_err_handle(leaf, with jbc) 766 * o send ereport(fire fmri, derr, payload = BDF) 767 * o dispatch (leaf) 768 * o unlock 769 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 770 */ 771 /* ARGSUSED */ 772 uint_t 773 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, 774 pcie_req_id_t rid) 775 { 776 dev_info_t *rpdip = px_p->px_dip; 777 px_cb_t *cb_p = px_p->px_cb_p; 778 int err = PX_OK, ret = DDI_FM_OK, fab_err = DDI_FM_OK; 779 ddi_fm_error_t derr; 780 781 mutex_enter(&cb_p->xbc_fm_mutex); 782 783 /* Create the derr */ 784 bzero(&derr, sizeof (ddi_fm_error_t)); 785 derr.fme_version = DDI_FME_VERSION; 786 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 787 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 788 789 /* send ereport/handle/clear fire registers */ 790 err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); 791 792 /* Check and clear the fabric error */ 793 fab_err = px_fabric_check(px_p, msg_code, rid, &derr); 794 795 /* Check all child devices for errors */ 796 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 797 798 mutex_exit(&cb_p->xbc_fm_mutex); 799 800 /* 801 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, 802 * therefore it does not cause panic. 803 */ 804 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || 805 (ret == DDI_FM_FATAL) || (fab_err == DDI_FM_FATAL)) 806 PX_FM_PANIC("%s#%d: Fatal PCIe Fabric Error has occurred" 807 "(%x,%x,%x)\n", ddi_driver_name(rpdip), 808 ddi_get_instance(rpdip), err, fab_err, ret); 809 810 return (DDI_INTR_CLAIMED); 811 } 812 813 /* 814 * px_err_safeacc_check: 815 * Check to see if a peek/poke and cautious access is currently being 816 * done on a particular leaf. 817 * 818 * Safe access reads induced fire errors will be handled by cpu trap handler 819 * which will call px_fm_callback() which calls this function. In that 820 * case, the derr fields will be set by trap handler with the correct values. 821 * 822 * Safe access writes induced errors will be handled by px interrupt 823 * handlers, this function will fill in the derr fields. 824 * 825 * If a cpu trap does occur, it will quiesce all other interrupts allowing 826 * the cpu trap error handling to finish before Fire receives an interrupt. 827 * 828 * If fire does indeed have an error when a cpu trap occurs as a result of 829 * a safe access, a trap followed by a Mondo/Fabric interrupt will occur. 830 * In which case derr will be initialized as "UNEXPECTED" by the interrupt 831 * handler and this function will need to find if this error occured in the 832 * middle of a safe access operation. 833 * 834 * @param px_p leaf in which to check access 835 * @param derr fm err data structure to be updated 836 */ 837 void 838 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr) 839 { 840 px_pec_t *pec_p = px_p->px_pec_p; 841 px_cb_t *cb_p = px_p->px_cb_p; 842 int acctype = pec_p->pec_safeacc_type; 843 844 ASSERT(MUTEX_HELD(&cb_p->xbc_fm_mutex)); 845 846 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 847 return; 848 } 849 850 /* safe access checking */ 851 switch (acctype) { 852 case DDI_FM_ERR_EXPECTED: 853 /* 854 * cautious access protection, protected from all err. 855 */ 856 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 857 ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr, 858 DDI_FME_VERSION); 859 derr->fme_flag = acctype; 860 derr->fme_acc_handle = pec_p->pec_acc_hdl; 861 break; 862 case DDI_FM_ERR_POKE: 863 /* 864 * ddi_poke protection, check nexus and children for 865 * expected errors. 866 */ 867 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 868 membar_sync(); 869 derr->fme_flag = acctype; 870 break; 871 case DDI_FM_ERR_PEEK: 872 derr->fme_flag = acctype; 873 break; 874 } 875 } 876