1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * PX Fault Management Architecture 31 */ 32 #include <sys/types.h> 33 #include <sys/sunndi.h> 34 #include <sys/sunddi.h> 35 #include <sys/fm/protocol.h> 36 #include <sys/fm/util.h> 37 #include <sys/membar.h> 38 #include "px_obj.h" 39 40 typedef struct px_fabric_cfgspace { 41 /* Error information */ 42 msgcode_t msg_code; 43 pcie_req_id_t rid; 44 45 /* Config space header and device type */ 46 uint8_t hdr_type; 47 uint16_t dev_type; 48 49 /* Register pointers */ 50 uint16_t cap_off; 51 uint16_t aer_off; 52 53 /* PCI register values */ 54 uint32_t sts_reg; 55 uint32_t sts_sreg; 56 57 /* PCIE register values */ 58 uint32_t dev_sts_reg; 59 uint32_t aer_ce_reg; 60 uint32_t aer_ue_reg; 61 uint32_t aer_sev_reg; 62 uint32_t aer_ue_sreg; 63 uint32_t aer_sev_sreg; 64 65 /* PCIE Header Log Registers */ 66 uint32_t aer_h1; 67 uint32_t aer_h2; 68 uint32_t aer_h3; 69 uint32_t aer_h4; 70 uint32_t aer_sh1; 71 uint32_t aer_sh2; 72 uint32_t aer_sh3; 73 uint32_t aer_sh4; 74 } px_fabric_cfgspace_t; 75 76 static uint16_t px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid); 77 static uint16_t px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid); 78 static int px_fabric_handle_psts(px_fabric_cfgspace_t *cs); 79 static int px_fabric_handle_ssts(px_fabric_cfgspace_t *cs); 80 static int px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs); 81 static int px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs); 82 static int px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs); 83 static void px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs); 84 static uint_t px_fabric_check(px_t *px_p, msgcode_t msg_code, 85 pcie_req_id_t rid, ddi_fm_error_t *derr); 86 87 /* 88 * Initialize px FMA support 89 */ 90 int 91 px_fm_attach(px_t *px_p) 92 { 93 px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | 94 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; 95 96 /* 97 * check parents' capability 98 */ 99 ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc); 100 101 /* 102 * parents need to be ereport and error handling capable 103 */ 104 ASSERT(px_p->px_fm_cap && 105 (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE)); 106 107 /* 108 * register error callback in parent 109 */ 110 ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p); 111 112 return (DDI_SUCCESS); 113 } 114 115 /* 116 * Deregister FMA 117 */ 118 void 119 px_fm_detach(px_t *px_p) 120 { 121 ddi_fm_handler_unregister(px_p->px_dip); 122 ddi_fm_fini(px_p->px_dip); 123 } 124 125 /* 126 * Function used to setup access functions depending on level of desired 127 * protection. 128 */ 129 void 130 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip) 131 { 132 uchar_t fflag; 133 ddi_acc_hdl_t *hp; 134 ddi_acc_impl_t *ap; 135 136 hp = mp->map_handlep; 137 ap = (ddi_acc_impl_t *)hp->ah_platform_private; 138 fflag = ap->ahi_common.ah_acc.devacc_attr_access; 139 140 if (mp->map_op == DDI_MO_MAP_LOCKED) { 141 ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL); 142 switch (fflag) { 143 case DDI_FLAGERR_ACC: 144 ap->ahi_get8 = i_ddi_prot_get8; 145 ap->ahi_get16 = i_ddi_prot_get16; 146 ap->ahi_get32 = i_ddi_prot_get32; 147 ap->ahi_get64 = i_ddi_prot_get64; 148 ap->ahi_put8 = i_ddi_prot_put8; 149 ap->ahi_put16 = i_ddi_prot_put16; 150 ap->ahi_put32 = i_ddi_prot_put32; 151 ap->ahi_put64 = i_ddi_prot_put64; 152 ap->ahi_rep_get8 = i_ddi_prot_rep_get8; 153 ap->ahi_rep_get16 = i_ddi_prot_rep_get16; 154 ap->ahi_rep_get32 = i_ddi_prot_rep_get32; 155 ap->ahi_rep_get64 = i_ddi_prot_rep_get64; 156 ap->ahi_rep_put8 = i_ddi_prot_rep_put8; 157 ap->ahi_rep_put16 = i_ddi_prot_rep_put16; 158 ap->ahi_rep_put32 = i_ddi_prot_rep_put32; 159 ap->ahi_rep_put64 = i_ddi_prot_rep_put64; 160 break; 161 case DDI_CAUTIOUS_ACC : 162 ap->ahi_get8 = i_ddi_caut_get8; 163 ap->ahi_get16 = i_ddi_caut_get16; 164 ap->ahi_get32 = i_ddi_caut_get32; 165 ap->ahi_get64 = i_ddi_caut_get64; 166 ap->ahi_put8 = i_ddi_caut_put8; 167 ap->ahi_put16 = i_ddi_caut_put16; 168 ap->ahi_put32 = i_ddi_caut_put32; 169 ap->ahi_put64 = i_ddi_caut_put64; 170 ap->ahi_rep_get8 = i_ddi_caut_rep_get8; 171 ap->ahi_rep_get16 = i_ddi_caut_rep_get16; 172 ap->ahi_rep_get32 = i_ddi_caut_rep_get32; 173 ap->ahi_rep_get64 = i_ddi_caut_rep_get64; 174 ap->ahi_rep_put8 = i_ddi_caut_rep_put8; 175 ap->ahi_rep_put16 = i_ddi_caut_rep_put16; 176 ap->ahi_rep_put32 = i_ddi_caut_rep_put32; 177 ap->ahi_rep_put64 = i_ddi_caut_rep_put64; 178 break; 179 default: 180 break; 181 } 182 } else if (mp->map_op == DDI_MO_UNMAP) { 183 ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp); 184 } 185 } 186 187 /* 188 * Function called after a dma fault occurred to find out whether the 189 * fault address is associated with a driver that is able to handle faults 190 * and recover from faults. The driver has to set DDI_DMA_FLAGERR and 191 * cache dma handles in order to make this checking effective to help 192 * recovery from dma faults. 193 */ 194 /* ARGSUSED */ 195 static int 196 px_dma_check(dev_info_t *dip, const void *handle, const void *comp_addr, 197 const void *not_used) 198 { 199 ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle; 200 pfn_t fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 201 pfn_t comp_pfn; 202 int page; 203 204 /* 205 * Assertion failure if DDI_FM_DMACHK_CAPABLE capability has not 206 * been effectively initialized during attach. 207 */ 208 ASSERT(mp); 209 210 for (page = 0; page < mp->dmai_ndvmapages; page++) { 211 comp_pfn = PX_GET_MP_PFN(mp, page); 212 if (fault_pfn == comp_pfn) 213 return (DDI_FM_NONFATAL); 214 } 215 216 return (DDI_FM_UNKNOWN); 217 } 218 219 /* 220 * Function used to check if a given access handle owns the failing address. 221 * Called by ndi_fmc_error, when we detect a PIO error. 222 */ 223 /* ARGSUSED */ 224 static int 225 px_acc_check(dev_info_t *dip, const void *handle, const void *comp_addr, 226 const void *not_used) 227 { 228 pfn_t pfn, fault_pfn; 229 ddi_acc_hdl_t *hp = impl_acc_hdl_get((ddi_acc_handle_t)handle); 230 231 /* 232 * Assertion failure if DDI_FM_ACCCHK_CAPABLE capability has not 233 * been effectively initialized during attach. 234 */ 235 ASSERT(hp); 236 237 pfn = hp->ah_pfn; 238 fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 239 if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum)) 240 return (DDI_FM_NONFATAL); 241 242 return (DDI_FM_UNKNOWN); 243 } 244 245 /* 246 * Function used by PCI error handlers to check if captured address is stored 247 * in the DMA or ACC handle caches. 248 */ 249 int 250 px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar) 251 { 252 uint32_t cap = ((px_t *)DIP_TO_STATE(dip))->px_fm_cap; 253 int ret = DDI_FM_FATAL; 254 255 int (*f)() = type == DMA_HANDLE ? 256 (DDI_FM_DMA_ERR_CAP(cap) ? px_dma_check : NULL) : 257 (DDI_FM_ACC_ERR_CAP(cap) ? px_acc_check : NULL); 258 259 if (f) 260 ret = ndi_fmc_error(dip, NULL, type, f, fme_ena, afar); 261 262 return (ret == DDI_FM_UNKNOWN ? DDI_FM_FATAL : ret); 263 } 264 265 /* 266 * Function used to initialize FMA for our children nodes. Called 267 * through pci busops when child node calls ddi_fm_init. 268 */ 269 /*ARGSUSED*/ 270 int 271 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap, 272 ddi_iblock_cookie_t *ibc_p) 273 { 274 px_t *px_p = DIP_TO_STATE(dip); 275 276 ASSERT(ibc_p != NULL); 277 *ibc_p = px_p->px_fm_ibc; 278 279 return (px_p->px_fm_cap); 280 } 281 282 /* 283 * lock access for exclusive PCIe access 284 */ 285 void 286 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle) 287 { 288 px_pec_t *pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p; 289 290 /* 291 * Exclusive access has been used for cautious put/get, 292 * Both utilize i_ddi_ontrap which, on sparcv9, implements 293 * similar protection as what on_trap() does, and which calls 294 * membar #Sync to flush out all cpu deferred errors 295 * prior to get/put operation, so here we're not calling 296 * membar #Sync - a difference from what's in pci_bus_enter(). 297 */ 298 mutex_enter(&pec_p->pec_pokefault_mutex); 299 pec_p->pec_acc_hdl = handle; 300 } 301 302 /* 303 * unlock access for exclusive PCIe access 304 */ 305 /* ARGSUSED */ 306 void 307 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle) 308 { 309 px_t *px_p = DIP_TO_STATE(dip); 310 px_pec_t *pec_p = px_p->px_pec_p; 311 312 pec_p->pec_acc_hdl = NULL; 313 mutex_exit(&pec_p->pec_pokefault_mutex); 314 } 315 316 317 /* 318 * PCI error callback which is registered with our parent to call 319 * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors 320 * and PCI BERR/TO/UE 321 * 322 * Dispatch on all known leaves of this fire device because we cannot tell 323 * which side the error came from. 324 */ 325 /*ARGSUSED*/ 326 int 327 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) 328 { 329 px_t *px_p = (px_t *)impl_data; 330 px_cb_t *cb_p = px_p->px_cb_p; 331 int err = PX_OK; 332 int fatal = 0; 333 int nonfatal = 0; 334 int unknown = 0; 335 int ret = DDI_FM_OK; 336 int i; 337 338 mutex_enter(&cb_p->xbc_fm_mutex); 339 340 for (i = 0; i < PX_CB_MAX_LEAF; i++) { 341 px_p = cb_p->xbc_px_list[i]; 342 if (px_p != NULL) 343 err |= px_err_handle(px_p, derr, PX_TRAP_CALL, 344 (i == 0)); 345 } 346 347 for (i = 0; i < PX_CB_MAX_LEAF; i++) { 348 px_p = cb_p->xbc_px_list[i]; 349 if (px_p != NULL) { 350 ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr); 351 switch (ret) { 352 case DDI_FM_FATAL: 353 fatal++; 354 break; 355 case DDI_FM_NONFATAL: 356 nonfatal++; 357 break; 358 case DDI_FM_UNKNOWN: 359 unknown++; 360 break; 361 default: 362 break; 363 } 364 } 365 } 366 mutex_exit(&cb_p->xbc_fm_mutex); 367 368 ret = (fatal != 0) ? DDI_FM_FATAL : 369 ((nonfatal != 0) ? DDI_FM_NONFATAL : 370 (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK))); 371 372 /* fire fatal error overrides device error */ 373 if (err & (PX_FATAL_GOS | PX_FATAL_SW)) 374 ret = DDI_FM_FATAL; 375 /* if fire encounts no error, then take whatever device error */ 376 else if ((err != PX_OK) && (ret != DDI_FM_FATAL)) 377 ret = DDI_FM_NONFATAL; 378 379 return (ret); 380 } 381 382 static uint16_t 383 px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid) 384 { 385 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 386 uint16_t offset = PCIE_EXT_CAP; 387 int deadcount = 0; 388 389 /* Find the Advanced Error Register */ 390 hdr = px_fab_get(px_p, rid, offset); 391 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 392 PCIE_EXT_CAP_NEXT_PTR_MASK; 393 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 394 PCIE_EXT_CAP_ID_MASK; 395 396 while ((hdr_next_ptr != PCIE_EXT_CAP_NEXT_PTR_NULL) && 397 (hdr_cap_id != PCIE_EXT_CAP_ID_AER)) { 398 offset = hdr_next_ptr; 399 hdr = px_fab_get(px_p, rid, offset); 400 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 401 PCIE_EXT_CAP_NEXT_PTR_MASK; 402 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 403 PCIE_EXT_CAP_ID_MASK; 404 405 if (deadcount++ > 100) 406 break; 407 } 408 409 if (hdr_cap_id == PCIE_EXT_CAP_ID_AER) 410 return (offset); 411 412 return (0); 413 } 414 415 static uint16_t 416 px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid) 417 { 418 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 419 uint16_t offset = PCI_CONF_STAT; 420 int deadcount = 0; 421 422 hdr = px_fab_get(px_p, rid, PCI_CONF_COMM) >> 16; 423 if (!(hdr & PCI_STAT_CAP)) { 424 /* This is not a PCIE device */ 425 return (0); 426 } 427 428 hdr = px_fab_get(px_p, rid, PCI_CONF_CAP_PTR); 429 hdr_next_ptr = hdr & 0xFF; 430 hdr_cap_id = 0; 431 432 while ((hdr_next_ptr != PCI_CAP_NEXT_PTR_NULL) && 433 (hdr_cap_id != PCI_CAP_ID_PCI_E)) { 434 offset = hdr_next_ptr; 435 436 if (hdr_next_ptr < 0x40) { 437 break; 438 } 439 440 hdr = px_fab_get(px_p, rid, hdr_next_ptr); 441 hdr_next_ptr = (hdr >> 8) & 0xFF; 442 hdr_cap_id = hdr & 0xFF; 443 444 if (deadcount++ > 100) 445 break; 446 } 447 448 if (hdr_cap_id == PCI_CAP_ID_PCI_E) 449 return (offset); 450 451 return (0); 452 } 453 454 /* 455 * This function checks the primary status registers. 456 * Take the PCI status register and translate it to PCIe equivalent. 457 */ 458 static int 459 px_fabric_handle_psts(px_fabric_cfgspace_t *cs) { 460 uint16_t sts_reg = cs->sts_reg >> 16; 461 uint16_t pci_status; 462 uint32_t pcie_status; 463 int ret = PX_NONFATAL; 464 465 /* Parity Err == Send/Recv Poisoned TLP */ 466 pci_status = PCI_STAT_S_PERROR | PCI_STAT_PERROR; 467 pcie_status = PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC; 468 if (sts_reg & pci_status) 469 ret |= PX_FABRIC_ERR_SEV(pcie_status, 470 px_fabric_die_ue, px_fabric_die_ue_gos); 471 472 /* Target Abort == Completer Abort */ 473 pci_status = PCI_STAT_S_TARG_AB | PCI_STAT_R_TARG_AB; 474 pcie_status = PCIE_AER_UCE_CA; 475 if (sts_reg & pci_status) 476 ret |= PX_FABRIC_ERR_SEV(pcie_status, 477 px_fabric_die_ue, px_fabric_die_ue_gos); 478 479 /* Master Abort == Unsupport Request */ 480 pci_status = PCI_STAT_R_MAST_AB; 481 pcie_status = PCIE_AER_UCE_UR; 482 if (sts_reg & pci_status) 483 ret |= PX_FABRIC_ERR_SEV(pcie_status, 484 px_fabric_die_ue, px_fabric_die_ue_gos); 485 486 /* System Error == Uncorrectable Error */ 487 pci_status = PCI_STAT_S_SYSERR; 488 pcie_status = -1; 489 if (sts_reg & pci_status) 490 ret |= PX_FABRIC_ERR_SEV(pcie_status, 491 px_fabric_die_ue, px_fabric_die_ue_gos); 492 493 return (ret); 494 } 495 496 /* 497 * This function checks the secondary status registers. 498 * Switches and Bridges have a different behavior. 499 */ 500 static int 501 px_fabric_handle_ssts(px_fabric_cfgspace_t *cs) { 502 uint16_t sts_reg = cs->sts_sreg >> 16; 503 int ret = PX_NONFATAL; 504 505 if (cs->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { 506 /* 507 * This is a PCIE-PCI bridge, but only check the severity 508 * if this device doesn't support AERs. 509 */ 510 if (!cs->aer_off) 511 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_bdg_sts, 512 px_fabric_die_bdg_sts_gos); 513 } else { 514 /* This is most likely a PCIE switch */ 515 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_sw_sts, 516 px_fabric_die_sw_sts_gos); 517 } 518 519 return (ret); 520 } 521 522 /* 523 * This function checks and clears the primary AER. 524 */ 525 static int 526 px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs) { 527 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 528 int ret = PX_NONFATAL; 529 530 /* Determine severity and clear the AER */ 531 switch (cs->msg_code) { 532 case PCIE_MSG_CODE_ERR_COR: 533 off_reg = PCIE_AER_CE_STS; 534 chk_reg = px_fabric_die_ce; 535 chk_reg_gos = px_fabric_die_ce_gos; 536 reg = cs->aer_ce_reg; 537 break; 538 case PCIE_MSG_CODE_ERR_NONFATAL: 539 off_reg = PCIE_AER_UCE_STS; 540 chk_reg = px_fabric_die_ue; 541 chk_reg_gos = px_fabric_die_ue_gos; 542 reg = cs->aer_ue_reg & ~(cs->aer_sev_reg); 543 break; 544 case PCIE_MSG_CODE_ERR_FATAL: 545 off_reg = PCIE_AER_UCE_STS; 546 chk_reg = px_fabric_die_ue; 547 chk_reg_gos = px_fabric_die_ue_gos; 548 reg = cs->aer_ue_reg & cs->aer_sev_reg; 549 break; 550 default: 551 /* Major error force a panic */ 552 return (PX_FATAL_GOS); 553 } 554 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 555 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 556 557 return (ret); 558 } 559 560 /* 561 * This function checks and clears the secondary AER. 562 */ 563 static int 564 px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs) { 565 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 566 uint32_t sev; 567 int ret = PX_NONFATAL; 568 569 /* Determine severity and clear the AER */ 570 switch (cs->msg_code) { 571 case PCIE_MSG_CODE_ERR_COR: 572 /* Ignore Correctable Errors */ 573 sev = 0; 574 break; 575 case PCIE_MSG_CODE_ERR_NONFATAL: 576 sev = ~(cs->aer_sev_sreg); 577 break; 578 case PCIE_MSG_CODE_ERR_FATAL: 579 sev = cs->aer_sev_sreg; 580 break; 581 default: 582 /* Major error force a panic */ 583 return (DDI_FM_FATAL); 584 } 585 off_reg = PCIE_AER_SUCE_STS; 586 chk_reg = px_fabric_die_sue; 587 chk_reg_gos = px_fabric_die_sue_gos; 588 reg = cs->aer_ue_sreg & sev; 589 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 590 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 591 592 return (ret); 593 } 594 595 static int 596 px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs) 597 { 598 pcie_req_id_t rid = cs->rid; 599 uint16_t cap_off = cs->cap_off; 600 uint16_t aer_off = cs->aer_off; 601 uint8_t hdr_type = cs->hdr_type; 602 uint16_t dev_type = cs->dev_type; 603 int ret = PX_NONFATAL; 604 605 if (hdr_type == PCI_HEADER_PPB) { 606 ret |= px_fabric_handle_ssts(cs); 607 } 608 609 if (!aer_off) { 610 ret |= px_fabric_handle_psts(cs); 611 } 612 613 if (aer_off) { 614 ret |= px_fabric_handle_paer(px_p, cs); 615 } 616 617 if (aer_off && (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)) { 618 ret |= px_fabric_handle_saer(px_p, cs); 619 } 620 621 /* Clear the standard PCIe error registers */ 622 px_fab_set(px_p, rid, cap_off + PCIE_DEVCTL, cs->dev_sts_reg); 623 624 /* Clear the legacy error registers */ 625 px_fab_set(px_p, rid, PCI_CONF_COMM, cs->sts_reg); 626 627 /* Clear the legacy secondary error registers */ 628 if (hdr_type == PCI_HEADER_PPB) { 629 px_fab_set(px_p, rid, PCI_BCNF_IO_BASE_LOW, 630 cs->sts_sreg); 631 } 632 633 return (ret); 634 } 635 636 static void 637 px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs) 638 { 639 uint16_t cap_off, aer_off; 640 pcie_req_id_t rid = cs->rid; 641 642 /* Gather Basic Device Information */ 643 cs->hdr_type = (px_fab_get(px_p, rid, 644 PCI_CONF_CACHE_LINESZ) >> 16) & 0xFF; 645 646 cs->cap_off = px_fabric_get_pciecap(px_p, rid); 647 cap_off = cs->cap_off; 648 if (!cap_off) 649 return; 650 651 cs->aer_off = px_fabric_get_aer(px_p, rid); 652 aer_off = cs->aer_off; 653 654 cs->dev_type = px_fab_get(px_p, rid, cap_off) >> 16; 655 cs->dev_type &= PCIE_PCIECAP_DEV_TYPE_MASK; 656 657 /* Get the Primary Sts Reg */ 658 cs->sts_reg = px_fab_get(px_p, rid, PCI_CONF_COMM); 659 660 /* If it is a bridge/switch get the Secondary Sts Reg */ 661 if (cs->hdr_type == PCI_HEADER_PPB) 662 cs->sts_sreg = px_fab_get(px_p, rid, 663 PCI_BCNF_IO_BASE_LOW); 664 665 /* Get the PCIe Dev Sts Reg */ 666 cs->dev_sts_reg = px_fab_get(px_p, rid, 667 cap_off + PCIE_DEVCTL); 668 669 if (!aer_off) 670 return; 671 672 /* Get the AER register information */ 673 cs->aer_ce_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_CE_STS); 674 cs->aer_ue_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_STS); 675 cs->aer_sev_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_SERV); 676 cs->aer_h1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x0); 677 cs->aer_h2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x4); 678 cs->aer_h3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x8); 679 cs->aer_h4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0xC); 680 681 if (cs->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) 682 return; 683 684 /* If this is a bridge check secondary aer */ 685 cs->aer_ue_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_STS); 686 cs->aer_sev_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_SERV); 687 cs->aer_sh1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x0); 688 cs->aer_sh2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x4); 689 cs->aer_sh3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x8); 690 cs->aer_sh4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0xC); 691 } 692 693 /* 694 * If a fabric intr occurs, query and clear the error registers on that device. 695 * Based on the error found return DDI_FM_OK or DDI_FM_FATAL. 696 */ 697 static uint_t 698 px_fabric_check(px_t *px_p, msgcode_t msg_code, 699 pcie_req_id_t rid, ddi_fm_error_t *derr) 700 { 701 dev_info_t *dip = px_p->px_dip; 702 char buf[FM_MAX_CLASS]; 703 px_fabric_cfgspace_t cs; 704 int ret; 705 706 /* clear cs */ 707 bzero(&cs, sizeof (px_fabric_cfgspace_t)); 708 709 cs.msg_code = msg_code; 710 cs.rid = rid; 711 712 px_fabric_fill_cs(px_p, &cs); 713 if (cs.cap_off) 714 ret = px_fabric_handle(px_p, &cs); 715 else 716 ret = PX_FATAL_GOS; 717 718 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_FABRIC_CLASS); 719 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 720 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 721 PX_FM_FABRIC_MSG_CODE, DATA_TYPE_UINT8, msg_code, 722 PX_FM_FABRIC_REQ_ID, DATA_TYPE_UINT16, rid, 723 "cap_off", DATA_TYPE_UINT16, cs.cap_off, 724 "aer_off", DATA_TYPE_UINT16, cs.aer_off, 725 "sts_reg", DATA_TYPE_UINT16, cs.sts_reg >> 16, 726 "sts_sreg", DATA_TYPE_UINT16, cs.sts_sreg >> 16, 727 "dev_sts_reg", DATA_TYPE_UINT16, cs.dev_sts_reg >> 16, 728 "aer_ce", DATA_TYPE_UINT32, cs.aer_ce_reg, 729 "aer_ue", DATA_TYPE_UINT32, cs.aer_ue_reg, 730 "aer_sev", DATA_TYPE_UINT32, cs.aer_sev_reg, 731 "aer_h1", DATA_TYPE_UINT32, cs.aer_h1, 732 "aer_h2", DATA_TYPE_UINT32, cs.aer_h2, 733 "aer_h3", DATA_TYPE_UINT32, cs.aer_h3, 734 "aer_h4", DATA_TYPE_UINT32, cs.aer_h4, 735 "saer_ue", DATA_TYPE_UINT32, cs.aer_ue_sreg, 736 "saer_sev", DATA_TYPE_UINT32, cs.aer_sev_sreg, 737 "saer_h1", DATA_TYPE_UINT32, cs.aer_sh1, 738 "saer_h2", DATA_TYPE_UINT32, cs.aer_sh2, 739 "saer_h3", DATA_TYPE_UINT32, cs.aer_sh3, 740 "saer_h4", DATA_TYPE_UINT32, cs.aer_sh4, 741 "severity", DATA_TYPE_UINT32, ret, 742 NULL); 743 744 /* Check for protected access */ 745 switch (derr->fme_flag) { 746 case DDI_FM_ERR_EXPECTED: 747 case DDI_FM_ERR_PEEK: 748 case DDI_FM_ERR_POKE: 749 ret &= PX_FATAL_GOS; 750 break; 751 } 752 753 754 if (px_fabric_die && 755 (ret & (PX_FATAL_GOS | PX_FATAL_SW))) 756 ret = DDI_FM_FATAL; 757 758 return (ret); 759 } 760 761 /* 762 * px_err_fabric_intr: 763 * Interrupt handler for PCIE fabric block. 764 * o lock 765 * o create derr 766 * o px_err_handle(leaf, with jbc) 767 * o send ereport(fire fmri, derr, payload = BDF) 768 * o dispatch (leaf) 769 * o unlock 770 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 771 */ 772 /* ARGSUSED */ 773 uint_t 774 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, 775 pcie_req_id_t rid) 776 { 777 dev_info_t *rpdip = px_p->px_dip; 778 px_cb_t *cb_p = px_p->px_cb_p; 779 int err = PX_OK, ret = DDI_FM_OK, fab_err = DDI_FM_OK; 780 ddi_fm_error_t derr; 781 782 mutex_enter(&cb_p->xbc_fm_mutex); 783 784 /* Create the derr */ 785 bzero(&derr, sizeof (ddi_fm_error_t)); 786 derr.fme_version = DDI_FME_VERSION; 787 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 788 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 789 790 /* send ereport/handle/clear fire registers */ 791 err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); 792 793 /* Check and clear the fabric error */ 794 fab_err = px_fabric_check(px_p, msg_code, rid, &derr); 795 796 /* Check all child devices for errors */ 797 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 798 799 mutex_exit(&cb_p->xbc_fm_mutex); 800 801 /* 802 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, 803 * therefore it does not cause panic. 804 */ 805 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || 806 (ret == DDI_FM_FATAL) || (fab_err == DDI_FM_FATAL)) 807 PX_FM_PANIC("%s#%d: Fatal PCIe Fabric Error has occurred" 808 "(%x,%x,%x)\n", ddi_driver_name(rpdip), 809 ddi_get_instance(rpdip), err, fab_err, ret); 810 811 return (DDI_INTR_CLAIMED); 812 } 813 814 /* 815 * px_err_safeacc_check: 816 * Check to see if a peek/poke and cautious access is currently being 817 * done on a particular leaf. 818 * 819 * Safe access reads induced fire errors will be handled by cpu trap handler 820 * which will call px_fm_callback() which calls this function. In that 821 * case, the derr fields will be set by trap handler with the correct values. 822 * 823 * Safe access writes induced errors will be handled by px interrupt 824 * handlers, this function will fill in the derr fields. 825 * 826 * If a cpu trap does occur, it will quiesce all other interrupts allowing 827 * the cpu trap error handling to finish before Fire receives an interrupt. 828 * 829 * If fire does indeed have an error when a cpu trap occurs as a result of 830 * a safe access, a trap followed by a Mondo/Fabric interrupt will occur. 831 * In which case derr will be initialized as "UNEXPECTED" by the interrupt 832 * handler and this function will need to find if this error occured in the 833 * middle of a safe access operation. 834 * 835 * @param px_p leaf in which to check access 836 * @param derr fm err data structure to be updated 837 */ 838 void 839 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr) 840 { 841 px_pec_t *pec_p = px_p->px_pec_p; 842 px_cb_t *cb_p = px_p->px_cb_p; 843 int acctype = pec_p->pec_safeacc_type; 844 845 ASSERT(MUTEX_HELD(&cb_p->xbc_fm_mutex)); 846 847 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 848 return; 849 } 850 851 /* safe access checking */ 852 switch (acctype) { 853 case DDI_FM_ERR_EXPECTED: 854 /* 855 * cautious access protection, protected from all err. 856 */ 857 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 858 ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr, 859 DDI_FME_VERSION); 860 derr->fme_flag = acctype; 861 derr->fme_acc_handle = pec_p->pec_acc_hdl; 862 break; 863 case DDI_FM_ERR_POKE: 864 /* 865 * ddi_poke protection, check nexus and children for 866 * expected errors. 867 */ 868 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 869 membar_sync(); 870 derr->fme_flag = acctype; 871 break; 872 case DDI_FM_ERR_PEEK: 873 derr->fme_flag = acctype; 874 break; 875 } 876 } 877