1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * PX Fault Management Architecture 30 */ 31 #include <sys/types.h> 32 #include <sys/sunndi.h> 33 #include <sys/sunddi.h> 34 #include <sys/fm/protocol.h> 35 #include <sys/fm/util.h> 36 #include <sys/membar.h> 37 #include "px_obj.h" 38 39 typedef struct px_fabric_cfgspace { 40 /* Error information */ 41 msgcode_t msg_code; 42 pcie_req_id_t rid; 43 44 /* Config space header and device type */ 45 uint8_t hdr_type; 46 uint16_t dev_type; 47 48 /* Register pointers */ 49 uint16_t cap_off; 50 uint16_t aer_off; 51 52 /* PCI register values */ 53 uint32_t sts_reg; 54 uint32_t sts_sreg; 55 56 /* PCIE register values */ 57 uint32_t dev_sts_reg; 58 uint32_t aer_ce_reg; 59 uint32_t aer_ue_reg; 60 uint32_t aer_sev_reg; 61 uint32_t aer_ue_sreg; 62 uint32_t aer_sev_sreg; 63 64 /* PCIE Header Log Registers */ 65 uint32_t aer_h1; 66 uint32_t aer_h2; 67 uint32_t aer_h3; 68 uint32_t aer_h4; 69 uint32_t aer_sh1; 70 uint32_t aer_sh2; 71 uint32_t aer_sh3; 72 uint32_t aer_sh4; 73 } px_fabric_cfgspace_t; 74 75 static uint16_t px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid); 76 static uint16_t px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid); 77 static int px_fabric_handle_psts(px_fabric_cfgspace_t *cs); 78 static int px_fabric_handle_ssts(px_fabric_cfgspace_t *cs); 79 static int px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs); 80 static int px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs); 81 static int px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs); 82 static void px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs); 83 static uint_t px_fabric_check(px_t *px_p, msgcode_t msg_code, 84 pcie_req_id_t rid, ddi_fm_error_t *derr); 85 86 /* 87 * Initialize px FMA support 88 */ 89 int 90 px_fm_attach(px_t *px_p) 91 { 92 px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | 93 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; 94 95 /* 96 * check parents' capability 97 */ 98 ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc); 99 100 /* 101 * parents need to be ereport and error handling capable 102 */ 103 ASSERT(px_p->px_fm_cap && 104 (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE)); 105 106 /* 107 * Initialize lock to synchronize fabric error handling 108 */ 109 mutex_init(&px_p->px_fm_mutex, NULL, MUTEX_DRIVER, 110 (void *)px_p->px_fm_ibc); 111 112 /* 113 * register error callback in parent 114 */ 115 ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p); 116 117 return (DDI_SUCCESS); 118 } 119 120 /* 121 * Deregister FMA 122 */ 123 void 124 px_fm_detach(px_t *px_p) 125 { 126 ddi_fm_handler_unregister(px_p->px_dip); 127 mutex_destroy(&px_p->px_fm_mutex); 128 ddi_fm_fini(px_p->px_dip); 129 } 130 131 /* 132 * Function used to setup access functions depending on level of desired 133 * protection. 134 */ 135 void 136 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip) 137 { 138 uchar_t fflag; 139 ddi_acc_hdl_t *hp; 140 ddi_acc_impl_t *ap; 141 142 hp = mp->map_handlep; 143 ap = (ddi_acc_impl_t *)hp->ah_platform_private; 144 fflag = ap->ahi_common.ah_acc.devacc_attr_access; 145 146 if (mp->map_op == DDI_MO_MAP_LOCKED) { 147 ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL); 148 switch (fflag) { 149 case DDI_FLAGERR_ACC: 150 ap->ahi_get8 = i_ddi_prot_get8; 151 ap->ahi_get16 = i_ddi_prot_get16; 152 ap->ahi_get32 = i_ddi_prot_get32; 153 ap->ahi_get64 = i_ddi_prot_get64; 154 ap->ahi_put8 = i_ddi_prot_put8; 155 ap->ahi_put16 = i_ddi_prot_put16; 156 ap->ahi_put32 = i_ddi_prot_put32; 157 ap->ahi_put64 = i_ddi_prot_put64; 158 ap->ahi_rep_get8 = i_ddi_prot_rep_get8; 159 ap->ahi_rep_get16 = i_ddi_prot_rep_get16; 160 ap->ahi_rep_get32 = i_ddi_prot_rep_get32; 161 ap->ahi_rep_get64 = i_ddi_prot_rep_get64; 162 ap->ahi_rep_put8 = i_ddi_prot_rep_put8; 163 ap->ahi_rep_put16 = i_ddi_prot_rep_put16; 164 ap->ahi_rep_put32 = i_ddi_prot_rep_put32; 165 ap->ahi_rep_put64 = i_ddi_prot_rep_put64; 166 break; 167 case DDI_CAUTIOUS_ACC : 168 ap->ahi_get8 = i_ddi_caut_get8; 169 ap->ahi_get16 = i_ddi_caut_get16; 170 ap->ahi_get32 = i_ddi_caut_get32; 171 ap->ahi_get64 = i_ddi_caut_get64; 172 ap->ahi_put8 = i_ddi_caut_put8; 173 ap->ahi_put16 = i_ddi_caut_put16; 174 ap->ahi_put32 = i_ddi_caut_put32; 175 ap->ahi_put64 = i_ddi_caut_put64; 176 ap->ahi_rep_get8 = i_ddi_caut_rep_get8; 177 ap->ahi_rep_get16 = i_ddi_caut_rep_get16; 178 ap->ahi_rep_get32 = i_ddi_caut_rep_get32; 179 ap->ahi_rep_get64 = i_ddi_caut_rep_get64; 180 ap->ahi_rep_put8 = i_ddi_caut_rep_put8; 181 ap->ahi_rep_put16 = i_ddi_caut_rep_put16; 182 ap->ahi_rep_put32 = i_ddi_caut_rep_put32; 183 ap->ahi_rep_put64 = i_ddi_caut_rep_put64; 184 break; 185 default: 186 break; 187 } 188 } else if (mp->map_op == DDI_MO_UNMAP) { 189 ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp); 190 } 191 } 192 193 /* 194 * Function called after a dma fault occurred to find out whether the 195 * fault address is associated with a driver that is able to handle faults 196 * and recover from faults. The driver has to set DDI_DMA_FLAGERR and 197 * cache dma handles in order to make this checking effective to help 198 * recovery from dma faults. 199 */ 200 /* ARGSUSED */ 201 static int 202 px_dma_check(dev_info_t *dip, const void *handle, const void *comp_addr, 203 const void *not_used) 204 { 205 ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle; 206 pfn_t fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 207 pfn_t comp_pfn; 208 int page; 209 210 /* 211 * Assertion failure if DDI_FM_DMACHK_CAPABLE capability has not 212 * been effectively initialized during attach. 213 */ 214 ASSERT(mp); 215 216 for (page = 0; page < mp->dmai_ndvmapages; page++) { 217 comp_pfn = PX_GET_MP_PFN(mp, page); 218 if (fault_pfn == comp_pfn) 219 return (DDI_FM_NONFATAL); 220 } 221 222 return (DDI_FM_UNKNOWN); 223 } 224 225 /* 226 * Function used to check if a given access handle owns the failing address. 227 * Called by ndi_fmc_error, when we detect a PIO error. 228 */ 229 /* ARGSUSED */ 230 static int 231 px_acc_check(dev_info_t *dip, const void *handle, const void *comp_addr, 232 const void *not_used) 233 { 234 pfn_t pfn, fault_pfn; 235 ddi_acc_hdl_t *hp = impl_acc_hdl_get((ddi_acc_handle_t)handle); 236 237 /* 238 * Assertion failure if DDI_FM_ACCCHK_CAPABLE capability has not 239 * been effectively initialized during attach. 240 */ 241 ASSERT(hp); 242 243 pfn = hp->ah_pfn; 244 fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 245 if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum)) 246 return (DDI_FM_NONFATAL); 247 248 return (DDI_FM_UNKNOWN); 249 } 250 251 /* 252 * Function used by PCI error handlers to check if captured address is stored 253 * in the DMA or ACC handle caches. 254 */ 255 int 256 px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar) 257 { 258 uint32_t cap = ((px_t *)DIP_TO_STATE(dip))->px_fm_cap; 259 int ret = DDI_FM_FATAL; 260 261 int (*f)() = type == DMA_HANDLE ? 262 (DDI_FM_DMA_ERR_CAP(cap) ? px_dma_check : NULL) : 263 (DDI_FM_ACC_ERR_CAP(cap) ? px_acc_check : NULL); 264 265 if (f) 266 ret = ndi_fmc_error(dip, NULL, type, f, fme_ena, afar); 267 268 return (ret == DDI_FM_UNKNOWN ? DDI_FM_FATAL : ret); 269 } 270 271 /* 272 * Function used to initialize FMA for our children nodes. Called 273 * through pci busops when child node calls ddi_fm_init. 274 */ 275 /*ARGSUSED*/ 276 int 277 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap, 278 ddi_iblock_cookie_t *ibc_p) 279 { 280 px_t *px_p = DIP_TO_STATE(dip); 281 282 ASSERT(ibc_p != NULL); 283 *ibc_p = px_p->px_fm_ibc; 284 285 return (px_p->px_fm_cap); 286 } 287 288 /* 289 * lock access for exclusive PCIe access 290 */ 291 void 292 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle) 293 { 294 px_pec_t *pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p; 295 296 /* 297 * Exclusive access has been used for cautious put/get, 298 * Both utilize i_ddi_ontrap which, on sparcv9, implements 299 * similar protection as what on_trap() does, and which calls 300 * membar #Sync to flush out all cpu deferred errors 301 * prior to get/put operation, so here we're not calling 302 * membar #Sync - a difference from what's in pci_bus_enter(). 303 */ 304 mutex_enter(&pec_p->pec_pokefault_mutex); 305 pec_p->pec_acc_hdl = handle; 306 } 307 308 /* 309 * unlock access for exclusive PCIe access 310 */ 311 /* ARGSUSED */ 312 void 313 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle) 314 { 315 px_t *px_p = DIP_TO_STATE(dip); 316 px_pec_t *pec_p = px_p->px_pec_p; 317 318 pec_p->pec_acc_hdl = NULL; 319 mutex_exit(&pec_p->pec_pokefault_mutex); 320 } 321 322 323 /* 324 * PCI error callback which is registered with our parent to call 325 * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors 326 * and PCI BERR/TO/UE 327 * 328 * Dispatch on all known leaves of this fire device because we cannot tell 329 * which side the error came from. 330 */ 331 /*ARGSUSED*/ 332 int 333 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) 334 { 335 px_t *px_p = (px_t *)impl_data; 336 int err = PX_OK; 337 int fatal = 0; 338 int nonfatal = 0; 339 int unknown = 0; 340 int ret = DDI_FM_OK; 341 342 mutex_enter(&px_p->px_fm_mutex); 343 344 err = px_err_handle(px_p, derr, PX_TRAP_CALL, B_TRUE); 345 ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr); 346 347 mutex_exit(&px_p->px_fm_mutex); 348 349 switch (ret) { 350 case DDI_FM_FATAL: 351 fatal++; 352 break; 353 case DDI_FM_NONFATAL: 354 nonfatal++; 355 break; 356 case DDI_FM_UNKNOWN: 357 unknown++; 358 break; 359 default: 360 break; 361 } 362 363 ret = (fatal != 0) ? DDI_FM_FATAL : 364 ((nonfatal != 0) ? DDI_FM_NONFATAL : 365 (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK))); 366 367 /* fire fatal error overrides device error */ 368 if (err & (PX_FATAL_GOS | PX_FATAL_SW)) 369 ret = DDI_FM_FATAL; 370 /* if fire encounts no error, then take whatever device error */ 371 else if ((err != PX_OK) && (ret != DDI_FM_FATAL)) 372 ret = DDI_FM_NONFATAL; 373 374 return (ret); 375 } 376 377 static uint16_t 378 px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid) 379 { 380 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 381 uint16_t offset = PCIE_EXT_CAP; 382 int deadcount = 0; 383 384 /* Find the Advanced Error Register */ 385 hdr = px_fab_get(px_p, rid, offset); 386 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 387 PCIE_EXT_CAP_NEXT_PTR_MASK; 388 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 389 PCIE_EXT_CAP_ID_MASK; 390 391 while ((hdr_next_ptr != PCIE_EXT_CAP_NEXT_PTR_NULL) && 392 (hdr_cap_id != PCIE_EXT_CAP_ID_AER)) { 393 offset = hdr_next_ptr; 394 hdr = px_fab_get(px_p, rid, offset); 395 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 396 PCIE_EXT_CAP_NEXT_PTR_MASK; 397 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 398 PCIE_EXT_CAP_ID_MASK; 399 400 if (deadcount++ > 100) 401 break; 402 } 403 404 if (hdr_cap_id == PCIE_EXT_CAP_ID_AER) 405 return (offset); 406 407 return (0); 408 } 409 410 static uint16_t 411 px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid) 412 { 413 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 414 uint16_t offset = PCI_CONF_STAT; 415 int deadcount = 0; 416 417 hdr = px_fab_get(px_p, rid, PCI_CONF_COMM) >> 16; 418 if (!(hdr & PCI_STAT_CAP)) { 419 /* This is not a PCIE device */ 420 return (0); 421 } 422 423 hdr = px_fab_get(px_p, rid, PCI_CONF_CAP_PTR); 424 hdr_next_ptr = hdr & 0xFF; 425 hdr_cap_id = 0; 426 427 while ((hdr_next_ptr != PCI_CAP_NEXT_PTR_NULL) && 428 (hdr_cap_id != PCI_CAP_ID_PCI_E)) { 429 offset = hdr_next_ptr; 430 431 if (hdr_next_ptr < 0x40) { 432 break; 433 } 434 435 hdr = px_fab_get(px_p, rid, hdr_next_ptr); 436 hdr_next_ptr = (hdr >> 8) & 0xFF; 437 hdr_cap_id = hdr & 0xFF; 438 439 if (deadcount++ > 100) 440 break; 441 } 442 443 if (hdr_cap_id == PCI_CAP_ID_PCI_E) 444 return (offset); 445 446 return (0); 447 } 448 449 /* 450 * This function checks the primary status registers. 451 * Take the PCI status register and translate it to PCIe equivalent. 452 */ 453 static int 454 px_fabric_handle_psts(px_fabric_cfgspace_t *cs) { 455 uint16_t sts_reg = cs->sts_reg >> 16; 456 uint16_t pci_status; 457 uint32_t pcie_status; 458 int ret = PX_NONFATAL; 459 460 /* Parity Err == Send/Recv Poisoned TLP */ 461 pci_status = PCI_STAT_S_PERROR | PCI_STAT_PERROR; 462 pcie_status = PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC; 463 if (sts_reg & pci_status) 464 ret |= PX_FABRIC_ERR_SEV(pcie_status, 465 px_fabric_die_ue, px_fabric_die_ue_gos); 466 467 /* Target Abort == Completer Abort */ 468 pci_status = PCI_STAT_S_TARG_AB | PCI_STAT_R_TARG_AB; 469 pcie_status = PCIE_AER_UCE_CA; 470 if (sts_reg & pci_status) 471 ret |= PX_FABRIC_ERR_SEV(pcie_status, 472 px_fabric_die_ue, px_fabric_die_ue_gos); 473 474 /* Master Abort == Unsupport Request */ 475 pci_status = PCI_STAT_R_MAST_AB; 476 pcie_status = PCIE_AER_UCE_UR; 477 if (sts_reg & pci_status) 478 ret |= PX_FABRIC_ERR_SEV(pcie_status, 479 px_fabric_die_ue, px_fabric_die_ue_gos); 480 481 /* System Error == Uncorrectable Error */ 482 pci_status = PCI_STAT_S_SYSERR; 483 pcie_status = -1; 484 if (sts_reg & pci_status) 485 ret |= PX_FABRIC_ERR_SEV(pcie_status, 486 px_fabric_die_ue, px_fabric_die_ue_gos); 487 488 return (ret); 489 } 490 491 /* 492 * This function checks the secondary status registers. 493 * Switches and Bridges have a different behavior. 494 */ 495 static int 496 px_fabric_handle_ssts(px_fabric_cfgspace_t *cs) { 497 uint16_t sts_reg = cs->sts_sreg >> 16; 498 int ret = PX_NONFATAL; 499 500 if (cs->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { 501 /* 502 * This is a PCIE-PCI bridge, but only check the severity 503 * if this device doesn't support AERs. 504 */ 505 if (!cs->aer_off) 506 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_bdg_sts, 507 px_fabric_die_bdg_sts_gos); 508 } else { 509 /* This is most likely a PCIE switch */ 510 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_sw_sts, 511 px_fabric_die_sw_sts_gos); 512 } 513 514 return (ret); 515 } 516 517 /* 518 * This function checks and clears the primary AER. 519 */ 520 static int 521 px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs) { 522 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 523 int ret = PX_NONFATAL; 524 525 /* Determine severity and clear the AER */ 526 switch (cs->msg_code) { 527 case PCIE_MSG_CODE_ERR_COR: 528 off_reg = PCIE_AER_CE_STS; 529 chk_reg = px_fabric_die_ce; 530 chk_reg_gos = px_fabric_die_ce_gos; 531 reg = cs->aer_ce_reg; 532 break; 533 case PCIE_MSG_CODE_ERR_NONFATAL: 534 off_reg = PCIE_AER_UCE_STS; 535 chk_reg = px_fabric_die_ue; 536 chk_reg_gos = px_fabric_die_ue_gos; 537 reg = cs->aer_ue_reg & ~(cs->aer_sev_reg); 538 break; 539 case PCIE_MSG_CODE_ERR_FATAL: 540 off_reg = PCIE_AER_UCE_STS; 541 chk_reg = px_fabric_die_ue; 542 chk_reg_gos = px_fabric_die_ue_gos; 543 reg = cs->aer_ue_reg & cs->aer_sev_reg; 544 break; 545 default: 546 /* Major error force a panic */ 547 return (PX_FATAL_GOS); 548 } 549 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 550 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 551 552 return (ret); 553 } 554 555 /* 556 * This function checks and clears the secondary AER. 557 */ 558 static int 559 px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs) { 560 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 561 uint32_t sev; 562 int ret = PX_NONFATAL; 563 564 /* Determine severity and clear the AER */ 565 switch (cs->msg_code) { 566 case PCIE_MSG_CODE_ERR_COR: 567 /* Ignore Correctable Errors */ 568 sev = 0; 569 break; 570 case PCIE_MSG_CODE_ERR_NONFATAL: 571 sev = ~(cs->aer_sev_sreg); 572 break; 573 case PCIE_MSG_CODE_ERR_FATAL: 574 sev = cs->aer_sev_sreg; 575 break; 576 default: 577 /* Major error force a panic */ 578 return (DDI_FM_FATAL); 579 } 580 off_reg = PCIE_AER_SUCE_STS; 581 chk_reg = px_fabric_die_sue; 582 chk_reg_gos = px_fabric_die_sue_gos; 583 reg = cs->aer_ue_sreg & sev; 584 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 585 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 586 587 return (ret); 588 } 589 590 static int 591 px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs) 592 { 593 pcie_req_id_t rid = cs->rid; 594 uint16_t cap_off = cs->cap_off; 595 uint16_t aer_off = cs->aer_off; 596 uint8_t hdr_type = cs->hdr_type; 597 uint16_t dev_type = cs->dev_type; 598 int ret = PX_NONFATAL; 599 600 if (hdr_type == PCI_HEADER_PPB) { 601 ret |= px_fabric_handle_ssts(cs); 602 } 603 604 if (!aer_off) { 605 ret |= px_fabric_handle_psts(cs); 606 } 607 608 if (aer_off) { 609 ret |= px_fabric_handle_paer(px_p, cs); 610 } 611 612 if (aer_off && (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)) { 613 ret |= px_fabric_handle_saer(px_p, cs); 614 } 615 616 /* Clear the standard PCIe error registers */ 617 px_fab_set(px_p, rid, cap_off + PCIE_DEVCTL, cs->dev_sts_reg); 618 619 /* Clear the legacy error registers */ 620 px_fab_set(px_p, rid, PCI_CONF_COMM, cs->sts_reg); 621 622 /* Clear the legacy secondary error registers */ 623 if (hdr_type == PCI_HEADER_PPB) { 624 px_fab_set(px_p, rid, PCI_BCNF_IO_BASE_LOW, 625 cs->sts_sreg); 626 } 627 628 return (ret); 629 } 630 631 static void 632 px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs) 633 { 634 uint16_t cap_off, aer_off; 635 pcie_req_id_t rid = cs->rid; 636 637 /* Gather Basic Device Information */ 638 cs->hdr_type = (px_fab_get(px_p, rid, 639 PCI_CONF_CACHE_LINESZ) >> 16) & 0xFF; 640 641 cs->cap_off = px_fabric_get_pciecap(px_p, rid); 642 cap_off = cs->cap_off; 643 if (!cap_off) 644 return; 645 646 cs->aer_off = px_fabric_get_aer(px_p, rid); 647 aer_off = cs->aer_off; 648 649 cs->dev_type = px_fab_get(px_p, rid, cap_off) >> 16; 650 cs->dev_type &= PCIE_PCIECAP_DEV_TYPE_MASK; 651 652 /* Get the Primary Sts Reg */ 653 cs->sts_reg = px_fab_get(px_p, rid, PCI_CONF_COMM); 654 655 /* If it is a bridge/switch get the Secondary Sts Reg */ 656 if (cs->hdr_type == PCI_HEADER_PPB) 657 cs->sts_sreg = px_fab_get(px_p, rid, 658 PCI_BCNF_IO_BASE_LOW); 659 660 /* Get the PCIe Dev Sts Reg */ 661 cs->dev_sts_reg = px_fab_get(px_p, rid, 662 cap_off + PCIE_DEVCTL); 663 664 if (!aer_off) 665 return; 666 667 /* Get the AER register information */ 668 cs->aer_ce_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_CE_STS); 669 cs->aer_ue_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_STS); 670 cs->aer_sev_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_SERV); 671 cs->aer_h1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x0); 672 cs->aer_h2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x4); 673 cs->aer_h3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x8); 674 cs->aer_h4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0xC); 675 676 if (cs->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) 677 return; 678 679 /* If this is a bridge check secondary aer */ 680 cs->aer_ue_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_STS); 681 cs->aer_sev_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_SERV); 682 cs->aer_sh1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x0); 683 cs->aer_sh2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x4); 684 cs->aer_sh3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x8); 685 cs->aer_sh4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0xC); 686 } 687 688 /* 689 * If a fabric intr occurs, query and clear the error registers on that device. 690 * Based on the error found return DDI_FM_OK or DDI_FM_FATAL. 691 */ 692 static uint_t 693 px_fabric_check(px_t *px_p, msgcode_t msg_code, 694 pcie_req_id_t rid, ddi_fm_error_t *derr) 695 { 696 dev_info_t *dip = px_p->px_dip; 697 char buf[FM_MAX_CLASS]; 698 px_fabric_cfgspace_t cs; 699 int ret; 700 701 /* clear cs */ 702 bzero(&cs, sizeof (px_fabric_cfgspace_t)); 703 704 cs.msg_code = msg_code; 705 cs.rid = rid; 706 707 px_fabric_fill_cs(px_p, &cs); 708 if (cs.cap_off) 709 ret = px_fabric_handle(px_p, &cs); 710 else 711 ret = PX_FATAL_GOS; 712 713 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_FABRIC_CLASS); 714 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 715 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 716 PX_FM_FABRIC_MSG_CODE, DATA_TYPE_UINT8, msg_code, 717 PX_FM_FABRIC_REQ_ID, DATA_TYPE_UINT16, rid, 718 "cap_off", DATA_TYPE_UINT16, cs.cap_off, 719 "aer_off", DATA_TYPE_UINT16, cs.aer_off, 720 "sts_reg", DATA_TYPE_UINT16, cs.sts_reg >> 16, 721 "sts_sreg", DATA_TYPE_UINT16, cs.sts_sreg >> 16, 722 "dev_sts_reg", DATA_TYPE_UINT16, cs.dev_sts_reg >> 16, 723 "aer_ce", DATA_TYPE_UINT32, cs.aer_ce_reg, 724 "aer_ue", DATA_TYPE_UINT32, cs.aer_ue_reg, 725 "aer_sev", DATA_TYPE_UINT32, cs.aer_sev_reg, 726 "aer_h1", DATA_TYPE_UINT32, cs.aer_h1, 727 "aer_h2", DATA_TYPE_UINT32, cs.aer_h2, 728 "aer_h3", DATA_TYPE_UINT32, cs.aer_h3, 729 "aer_h4", DATA_TYPE_UINT32, cs.aer_h4, 730 "saer_ue", DATA_TYPE_UINT32, cs.aer_ue_sreg, 731 "saer_sev", DATA_TYPE_UINT32, cs.aer_sev_sreg, 732 "saer_h1", DATA_TYPE_UINT32, cs.aer_sh1, 733 "saer_h2", DATA_TYPE_UINT32, cs.aer_sh2, 734 "saer_h3", DATA_TYPE_UINT32, cs.aer_sh3, 735 "saer_h4", DATA_TYPE_UINT32, cs.aer_sh4, 736 "severity", DATA_TYPE_UINT32, ret, 737 NULL); 738 739 /* Check for protected access */ 740 switch (derr->fme_flag) { 741 case DDI_FM_ERR_EXPECTED: 742 case DDI_FM_ERR_PEEK: 743 case DDI_FM_ERR_POKE: 744 ret &= PX_FATAL_GOS; 745 break; 746 } 747 748 749 if (px_fabric_die && 750 (ret & (PX_FATAL_GOS | PX_FATAL_SW))) 751 ret = DDI_FM_FATAL; 752 753 return (ret); 754 } 755 756 /* 757 * px_err_fabric_intr: 758 * Interrupt handler for PCIE fabric block. 759 * o lock 760 * o create derr 761 * o px_err_handle(leaf, with jbc) 762 * o send ereport(fire fmri, derr, payload = BDF) 763 * o dispatch (leaf) 764 * o unlock 765 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 766 */ 767 /* ARGSUSED */ 768 uint_t 769 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, 770 pcie_req_id_t rid) 771 { 772 dev_info_t *rpdip = px_p->px_dip; 773 int err = PX_OK, ret = DDI_FM_OK, fab_err = DDI_FM_OK; 774 ddi_fm_error_t derr; 775 776 mutex_enter(&px_p->px_fm_mutex); 777 778 /* Create the derr */ 779 bzero(&derr, sizeof (ddi_fm_error_t)); 780 derr.fme_version = DDI_FME_VERSION; 781 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 782 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 783 784 /* send ereport/handle/clear fire registers */ 785 err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); 786 787 /* Check and clear the fabric error */ 788 fab_err = px_fabric_check(px_p, msg_code, rid, &derr); 789 790 /* Check all child devices for errors */ 791 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 792 793 mutex_exit(&px_p->px_fm_mutex); 794 795 /* 796 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, 797 * therefore it does not cause panic. 798 */ 799 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || 800 (ret == DDI_FM_FATAL) || (fab_err == DDI_FM_FATAL)) 801 PX_FM_PANIC("%s#%d: Fatal PCIe Fabric Error has occurred" 802 "(%x,%x,%x)\n", ddi_driver_name(rpdip), 803 ddi_get_instance(rpdip), err, fab_err, ret); 804 805 return (DDI_INTR_CLAIMED); 806 } 807 808 /* 809 * px_err_safeacc_check: 810 * Check to see if a peek/poke and cautious access is currently being 811 * done on a particular leaf. 812 * 813 * Safe access reads induced fire errors will be handled by cpu trap handler 814 * which will call px_fm_callback() which calls this function. In that 815 * case, the derr fields will be set by trap handler with the correct values. 816 * 817 * Safe access writes induced errors will be handled by px interrupt 818 * handlers, this function will fill in the derr fields. 819 * 820 * If a cpu trap does occur, it will quiesce all other interrupts allowing 821 * the cpu trap error handling to finish before Fire receives an interrupt. 822 * 823 * If fire does indeed have an error when a cpu trap occurs as a result of 824 * a safe access, a trap followed by a Mondo/Fabric interrupt will occur. 825 * In which case derr will be initialized as "UNEXPECTED" by the interrupt 826 * handler and this function will need to find if this error occured in the 827 * middle of a safe access operation. 828 * 829 * @param px_p leaf in which to check access 830 * @param derr fm err data structure to be updated 831 */ 832 void 833 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr) 834 { 835 px_pec_t *pec_p = px_p->px_pec_p; 836 int acctype = pec_p->pec_safeacc_type; 837 838 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 839 840 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 841 return; 842 } 843 844 /* safe access checking */ 845 switch (acctype) { 846 case DDI_FM_ERR_EXPECTED: 847 /* 848 * cautious access protection, protected from all err. 849 */ 850 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 851 ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr, 852 DDI_FME_VERSION); 853 derr->fme_flag = acctype; 854 derr->fme_acc_handle = pec_p->pec_acc_hdl; 855 break; 856 case DDI_FM_ERR_POKE: 857 /* 858 * ddi_poke protection, check nexus and children for 859 * expected errors. 860 */ 861 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 862 membar_sync(); 863 derr->fme_flag = acctype; 864 break; 865 case DDI_FM_ERR_PEEK: 866 derr->fme_flag = acctype; 867 break; 868 } 869 } 870