1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * PX Fault Management Architecture 31 */ 32 #include <sys/types.h> 33 #include <sys/sunndi.h> 34 #include <sys/sunddi.h> 35 #include <sys/fm/protocol.h> 36 #include <sys/fm/util.h> 37 #include <sys/membar.h> 38 #include "px_obj.h" 39 40 typedef struct px_fabric_cfgspace { 41 /* Error information */ 42 msgcode_t msg_code; 43 pcie_req_id_t rid; 44 45 /* Config space header and device type */ 46 uint8_t hdr_type; 47 uint16_t dev_type; 48 49 /* Register pointers */ 50 uint16_t cap_off; 51 uint16_t aer_off; 52 53 /* PCI register values */ 54 uint32_t sts_reg; 55 uint32_t sts_sreg; 56 57 /* PCIE register values */ 58 uint32_t dev_sts_reg; 59 uint32_t aer_ce_reg; 60 uint32_t aer_ue_reg; 61 uint32_t aer_sev_reg; 62 uint32_t aer_ue_sreg; 63 uint32_t aer_sev_sreg; 64 65 /* PCIE Header Log Registers */ 66 uint32_t aer_h1; 67 uint32_t aer_h2; 68 uint32_t aer_h3; 69 uint32_t aer_h4; 70 uint32_t aer_sh1; 71 uint32_t aer_sh2; 72 uint32_t aer_sh3; 73 uint32_t aer_sh4; 74 } px_fabric_cfgspace_t; 75 76 static uint16_t px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid); 77 static uint16_t px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid); 78 static int px_fabric_handle_psts(px_fabric_cfgspace_t *cs); 79 static int px_fabric_handle_ssts(px_fabric_cfgspace_t *cs); 80 static int px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs); 81 static int px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs); 82 static int px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs); 83 static void px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs); 84 static uint_t px_fabric_check(px_t *px_p, msgcode_t msg_code, 85 pcie_req_id_t rid, ddi_fm_error_t *derr); 86 87 /* 88 * Initialize px FMA support 89 */ 90 int 91 px_fm_attach(px_t *px_p) 92 { 93 px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | 94 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; 95 96 /* 97 * check parents' capability 98 */ 99 ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc); 100 101 /* 102 * parents need to be ereport and error handling capable 103 */ 104 ASSERT(px_p->px_fm_cap && 105 (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE)); 106 107 /* 108 * register error callback in parent 109 */ 110 ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p); 111 112 return (DDI_SUCCESS); 113 } 114 115 /* 116 * Deregister FMA 117 */ 118 void 119 px_fm_detach(px_t *px_p) 120 { 121 ddi_fm_handler_unregister(px_p->px_dip); 122 ddi_fm_fini(px_p->px_dip); 123 } 124 125 /* 126 * Function used to setup access functions depending on level of desired 127 * protection. 128 */ 129 void 130 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip) 131 { 132 uchar_t fflag; 133 ddi_acc_hdl_t *hp; 134 ddi_acc_impl_t *ap; 135 136 hp = mp->map_handlep; 137 ap = (ddi_acc_impl_t *)hp->ah_platform_private; 138 fflag = ap->ahi_common.ah_acc.devacc_attr_access; 139 140 if (mp->map_op == DDI_MO_MAP_LOCKED) { 141 ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL); 142 switch (fflag) { 143 case DDI_FLAGERR_ACC: 144 ap->ahi_get8 = i_ddi_prot_get8; 145 ap->ahi_get16 = i_ddi_prot_get16; 146 ap->ahi_get32 = i_ddi_prot_get32; 147 ap->ahi_get64 = i_ddi_prot_get64; 148 ap->ahi_put8 = i_ddi_prot_put8; 149 ap->ahi_put16 = i_ddi_prot_put16; 150 ap->ahi_put32 = i_ddi_prot_put32; 151 ap->ahi_put64 = i_ddi_prot_put64; 152 ap->ahi_rep_get8 = i_ddi_prot_rep_get8; 153 ap->ahi_rep_get16 = i_ddi_prot_rep_get16; 154 ap->ahi_rep_get32 = i_ddi_prot_rep_get32; 155 ap->ahi_rep_get64 = i_ddi_prot_rep_get64; 156 ap->ahi_rep_put8 = i_ddi_prot_rep_put8; 157 ap->ahi_rep_put16 = i_ddi_prot_rep_put16; 158 ap->ahi_rep_put32 = i_ddi_prot_rep_put32; 159 ap->ahi_rep_put64 = i_ddi_prot_rep_put64; 160 break; 161 case DDI_CAUTIOUS_ACC : 162 ap->ahi_get8 = i_ddi_caut_get8; 163 ap->ahi_get16 = i_ddi_caut_get16; 164 ap->ahi_get32 = i_ddi_caut_get32; 165 ap->ahi_get64 = i_ddi_caut_get64; 166 ap->ahi_put8 = i_ddi_caut_put8; 167 ap->ahi_put16 = i_ddi_caut_put16; 168 ap->ahi_put32 = i_ddi_caut_put32; 169 ap->ahi_put64 = i_ddi_caut_put64; 170 ap->ahi_rep_get8 = i_ddi_caut_rep_get8; 171 ap->ahi_rep_get16 = i_ddi_caut_rep_get16; 172 ap->ahi_rep_get32 = i_ddi_caut_rep_get32; 173 ap->ahi_rep_get64 = i_ddi_caut_rep_get64; 174 ap->ahi_rep_put8 = i_ddi_caut_rep_put8; 175 ap->ahi_rep_put16 = i_ddi_caut_rep_put16; 176 ap->ahi_rep_put32 = i_ddi_caut_rep_put32; 177 ap->ahi_rep_put64 = i_ddi_caut_rep_put64; 178 break; 179 default: 180 break; 181 } 182 } else if (mp->map_op == DDI_MO_UNMAP) { 183 ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp); 184 } 185 } 186 187 /* 188 * Function called after a dma fault occurred to find out whether the 189 * fault address is associated with a driver that is able to handle faults 190 * and recover from faults. The driver has to set DDI_DMA_FLAGERR and 191 * cache dma handles in order to make this checking effective to help 192 * recovery from dma faults. 193 */ 194 /* ARGSUSED */ 195 static int 196 px_dma_check(dev_info_t *dip, const void *handle, const void *comp_addr, 197 const void *not_used) 198 { 199 ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle; 200 pfn_t fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 201 pfn_t comp_pfn; 202 int page; 203 204 /* 205 * Assertion failure if DDI_FM_DMACHK_CAPABLE capability has not 206 * been effectively initialized during attach. 207 */ 208 ASSERT(mp); 209 210 for (page = 0; page < mp->dmai_ndvmapages; page++) { 211 comp_pfn = PX_GET_MP_PFN(mp, page); 212 if (fault_pfn == comp_pfn) 213 return (DDI_FM_NONFATAL); 214 } 215 216 return (DDI_FM_UNKNOWN); 217 } 218 219 /* 220 * Function used to check if a given access handle owns the failing address. 221 * Called by ndi_fmc_error, when we detect a PIO error. 222 */ 223 /* ARGSUSED */ 224 static int 225 px_acc_check(dev_info_t *dip, const void *handle, const void *comp_addr, 226 const void *not_used) 227 { 228 pfn_t pfn, fault_pfn; 229 ddi_acc_hdl_t *hp = impl_acc_hdl_get((ddi_acc_handle_t)handle); 230 231 /* 232 * Assertion failure if DDI_FM_ACCCHK_CAPABLE capability has not 233 * been effectively initialized during attach. 234 */ 235 ASSERT(hp); 236 237 pfn = hp->ah_pfn; 238 fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 239 if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum)) 240 return (DDI_FM_NONFATAL); 241 242 return (DDI_FM_UNKNOWN); 243 } 244 245 /* 246 * Function used by PCI error handlers to check if captured address is stored 247 * in the DMA or ACC handle caches. 248 */ 249 int 250 px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar) 251 { 252 uint32_t cap = ((px_t *)DIP_TO_STATE(dip))->px_fm_cap; 253 int (*f)() = type == DMA_HANDLE ? 254 (DDI_FM_DMA_ERR_CAP(cap) ? px_dma_check : NULL) : 255 (DDI_FM_ACC_ERR_CAP(cap) ? px_acc_check : NULL); 256 257 return (f ? ndi_fmc_error(dip, NULL, type, f, fme_ena, afar) : 258 DDI_FM_UNKNOWN); 259 } 260 261 /* 262 * Function used to initialize FMA for our children nodes. Called 263 * through pci busops when child node calls ddi_fm_init. 264 */ 265 /*ARGSUSED*/ 266 int 267 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap, 268 ddi_iblock_cookie_t *ibc_p) 269 { 270 px_t *px_p = DIP_TO_STATE(dip); 271 272 ASSERT(ibc_p != NULL); 273 *ibc_p = px_p->px_fm_ibc; 274 275 return (px_p->px_fm_cap); 276 } 277 278 /* 279 * lock access for exclusive PCIe access 280 */ 281 void 282 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle) 283 { 284 px_pec_t *pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p; 285 286 /* 287 * Exclusive access has been used for cautious put/get, 288 * Both utilize i_ddi_ontrap which, on sparcv9, implements 289 * similar protection as what on_trap() does, and which calls 290 * membar #Sync to flush out all cpu deferred errors 291 * prior to get/put operation, so here we're not calling 292 * membar #Sync - a difference from what's in pci_bus_enter(). 293 */ 294 mutex_enter(&pec_p->pec_pokefault_mutex); 295 pec_p->pec_acc_hdl = handle; 296 } 297 298 /* 299 * unlock access for exclusive PCIe access 300 */ 301 /* ARGSUSED */ 302 void 303 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle) 304 { 305 px_t *px_p = DIP_TO_STATE(dip); 306 px_pec_t *pec_p = px_p->px_pec_p; 307 308 pec_p->pec_acc_hdl = NULL; 309 mutex_exit(&pec_p->pec_pokefault_mutex); 310 } 311 312 313 /* 314 * PCI error callback which is registered with our parent to call 315 * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors 316 * and PCI BERR/TO/UE 317 * 318 * Dispatch on all known leaves of this fire device because we cannot tell 319 * which side the error came from. 320 */ 321 /*ARGSUSED*/ 322 int 323 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) 324 { 325 px_t *px_p = (px_t *)impl_data; 326 px_cb_t *cb_p = px_p->px_cb_p; 327 int err = PX_OK; 328 int fatal = 0; 329 int nonfatal = 0; 330 int unknown = 0; 331 int ret = DDI_FM_OK; 332 int i; 333 334 mutex_enter(&cb_p->xbc_fm_mutex); 335 336 for (i = 0; i < PX_CB_MAX_LEAF; i++) { 337 px_p = cb_p->xbc_px_list[i]; 338 if (px_p != NULL) 339 err |= px_err_handle(px_p, derr, PX_TRAP_CALL, 340 (i == 0)); 341 } 342 343 for (i = 0; i < PX_CB_MAX_LEAF; i++) { 344 px_p = cb_p->xbc_px_list[i]; 345 if (px_p != NULL) { 346 ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr); 347 switch (ret) { 348 case DDI_FM_FATAL: 349 fatal++; 350 break; 351 case DDI_FM_NONFATAL: 352 nonfatal++; 353 break; 354 case DDI_FM_UNKNOWN: 355 unknown++; 356 break; 357 default: 358 break; 359 } 360 } 361 } 362 mutex_exit(&cb_p->xbc_fm_mutex); 363 364 ret = (fatal != 0) ? DDI_FM_FATAL : 365 ((nonfatal != 0) ? DDI_FM_NONFATAL : 366 (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK))); 367 368 /* fire fatal error overrides device error */ 369 if (err & (PX_FATAL_GOS | PX_FATAL_SW)) 370 ret = DDI_FM_FATAL; 371 /* if fire encounts no error, then take whatever device error */ 372 else if ((err != PX_OK) && (ret != DDI_FM_FATAL)) 373 ret = DDI_FM_NONFATAL; 374 375 return (ret); 376 } 377 378 static uint16_t 379 px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid) 380 { 381 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 382 uint16_t offset = PCIE_EXT_CAP; 383 int deadcount = 0; 384 385 /* Find the Advanced Error Register */ 386 hdr = px_fab_get(px_p, rid, offset); 387 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 388 PCIE_EXT_CAP_NEXT_PTR_MASK; 389 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 390 PCIE_EXT_CAP_ID_MASK; 391 392 while ((hdr_next_ptr != PCIE_EXT_CAP_NEXT_PTR_NULL) && 393 (hdr_cap_id != PCIE_EXT_CAP_ID_AER)) { 394 offset = hdr_next_ptr; 395 hdr = px_fab_get(px_p, rid, offset); 396 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 397 PCIE_EXT_CAP_NEXT_PTR_MASK; 398 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 399 PCIE_EXT_CAP_ID_MASK; 400 401 if (deadcount++ > 100) 402 break; 403 } 404 405 if (hdr_cap_id == PCIE_EXT_CAP_ID_AER) 406 return (offset); 407 408 return (0); 409 } 410 411 static uint16_t 412 px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid) 413 { 414 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 415 uint16_t offset = PCI_CONF_STAT; 416 int deadcount = 0; 417 418 hdr = px_fab_get(px_p, rid, PCI_CONF_COMM) >> 16; 419 if (!(hdr & PCI_STAT_CAP)) { 420 /* This is not a PCIE device */ 421 return (0); 422 } 423 424 hdr = px_fab_get(px_p, rid, PCI_CONF_CAP_PTR); 425 hdr_next_ptr = hdr & 0xFF; 426 hdr_cap_id = 0; 427 428 while ((hdr_next_ptr != PCI_CAP_NEXT_PTR_NULL) && 429 (hdr_cap_id != PCI_CAP_ID_PCI_E)) { 430 offset = hdr_next_ptr; 431 432 if (hdr_next_ptr < 0x40) { 433 break; 434 } 435 436 hdr = px_fab_get(px_p, rid, hdr_next_ptr); 437 hdr_next_ptr = (hdr >> 8) & 0xFF; 438 hdr_cap_id = hdr & 0xFF; 439 440 if (deadcount++ > 100) 441 break; 442 } 443 444 if (hdr_cap_id == PCI_CAP_ID_PCI_E) 445 return (offset); 446 447 return (0); 448 } 449 450 /* 451 * This function checks the primary status registers. 452 * Take the PCI status register and translate it to PCIe equivalent. 453 */ 454 static int 455 px_fabric_handle_psts(px_fabric_cfgspace_t *cs) { 456 uint16_t sts_reg = cs->sts_reg >> 16; 457 uint16_t pci_status; 458 uint32_t pcie_status; 459 int ret = PX_NONFATAL; 460 461 /* Parity Err == Send/Recv Poisoned TLP */ 462 pci_status = PCI_STAT_S_PERROR | PCI_STAT_PERROR; 463 pcie_status = PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC; 464 if (sts_reg & pci_status) 465 ret |= PX_FABRIC_ERR_SEV(pcie_status, 466 px_fabric_die_ue, px_fabric_die_ue_gos); 467 468 /* Target Abort == Completer Abort */ 469 pci_status = PCI_STAT_S_TARG_AB | PCI_STAT_R_TARG_AB; 470 pcie_status = PCIE_AER_UCE_CA; 471 if (sts_reg & pci_status) 472 ret |= PX_FABRIC_ERR_SEV(pcie_status, 473 px_fabric_die_ue, px_fabric_die_ue_gos); 474 475 /* Master Abort == Unsupport Request */ 476 pci_status = PCI_STAT_R_MAST_AB; 477 pcie_status = PCIE_AER_UCE_UR; 478 if (sts_reg & pci_status) 479 ret |= PX_FABRIC_ERR_SEV(pcie_status, 480 px_fabric_die_ue, px_fabric_die_ue_gos); 481 482 /* System Error == Uncorrectable Error */ 483 pci_status = PCI_STAT_S_SYSERR; 484 pcie_status = -1; 485 if (sts_reg & pci_status) 486 ret |= PX_FABRIC_ERR_SEV(pcie_status, 487 px_fabric_die_ue, px_fabric_die_ue_gos); 488 489 return (ret); 490 } 491 492 /* 493 * This function checks the secondary status registers. 494 * Switches and Bridges have a different behavior. 495 */ 496 static int 497 px_fabric_handle_ssts(px_fabric_cfgspace_t *cs) { 498 uint16_t sts_reg = cs->sts_sreg >> 16; 499 int ret = PX_NONFATAL; 500 501 if (cs->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { 502 /* 503 * This is a PCIE-PCI bridge, but only check the severity 504 * if this device doesn't support AERs. 505 */ 506 if (!cs->aer_off) 507 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_bdg_sts, 508 px_fabric_die_bdg_sts_gos); 509 } else { 510 /* This is most likely a PCIE switch */ 511 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_sw_sts, 512 px_fabric_die_sw_sts_gos); 513 } 514 515 return (ret); 516 } 517 518 /* 519 * This function checks and clears the primary AER. 520 */ 521 static int 522 px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs) { 523 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 524 int ret = PX_NONFATAL; 525 526 /* Determine severity and clear the AER */ 527 switch (cs->msg_code) { 528 case PCIE_MSG_CODE_ERR_COR: 529 off_reg = PCIE_AER_CE_STS; 530 chk_reg = px_fabric_die_ce; 531 chk_reg_gos = px_fabric_die_ce_gos; 532 reg = cs->aer_ce_reg; 533 break; 534 case PCIE_MSG_CODE_ERR_NONFATAL: 535 off_reg = PCIE_AER_UCE_STS; 536 chk_reg = px_fabric_die_ue; 537 chk_reg_gos = px_fabric_die_ue_gos; 538 reg = cs->aer_ue_reg & ~(cs->aer_sev_reg); 539 break; 540 case PCIE_MSG_CODE_ERR_FATAL: 541 off_reg = PCIE_AER_UCE_STS; 542 chk_reg = px_fabric_die_ue; 543 chk_reg_gos = px_fabric_die_ue_gos; 544 reg = cs->aer_ue_reg & cs->aer_sev_reg; 545 break; 546 default: 547 /* Major error force a panic */ 548 return (PX_FATAL_GOS); 549 } 550 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 551 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 552 553 return (ret); 554 } 555 556 /* 557 * This function checks and clears the secondary AER. 558 */ 559 static int 560 px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs) { 561 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 562 uint32_t sev; 563 int ret = PX_NONFATAL; 564 565 /* Determine severity and clear the AER */ 566 switch (cs->msg_code) { 567 case PCIE_MSG_CODE_ERR_COR: 568 /* Ignore Correctable Errors */ 569 sev = 0; 570 break; 571 case PCIE_MSG_CODE_ERR_NONFATAL: 572 sev = ~(cs->aer_sev_sreg); 573 break; 574 case PCIE_MSG_CODE_ERR_FATAL: 575 sev = cs->aer_sev_sreg; 576 break; 577 default: 578 /* Major error force a panic */ 579 return (DDI_FM_FATAL); 580 } 581 off_reg = PCIE_AER_SUCE_STS; 582 chk_reg = px_fabric_die_sue; 583 chk_reg_gos = px_fabric_die_sue_gos; 584 reg = cs->aer_ue_sreg & sev; 585 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 586 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 587 588 return (ret); 589 } 590 591 static int 592 px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs) 593 { 594 pcie_req_id_t rid = cs->rid; 595 uint16_t cap_off = cs->cap_off; 596 uint16_t aer_off = cs->aer_off; 597 uint8_t hdr_type = cs->hdr_type; 598 uint16_t dev_type = cs->dev_type; 599 int ret = PX_NONFATAL; 600 601 if (hdr_type == PCI_HEADER_PPB) { 602 ret |= px_fabric_handle_ssts(cs); 603 } 604 605 if (!aer_off) { 606 ret |= px_fabric_handle_psts(cs); 607 } 608 609 if (aer_off) { 610 ret |= px_fabric_handle_paer(px_p, cs); 611 } 612 613 if (aer_off && (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)) { 614 ret |= px_fabric_handle_saer(px_p, cs); 615 } 616 617 /* Clear the standard PCIe error registers */ 618 px_fab_set(px_p, rid, cap_off + PCIE_DEVCTL, cs->dev_sts_reg); 619 620 /* Clear the legacy error registers */ 621 px_fab_set(px_p, rid, PCI_CONF_COMM, cs->sts_reg); 622 623 /* Clear the legacy secondary error registers */ 624 if (hdr_type == PCI_HEADER_PPB) { 625 px_fab_set(px_p, rid, PCI_BCNF_IO_BASE_LOW, 626 cs->sts_sreg); 627 } 628 629 return (ret); 630 } 631 632 static void 633 px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs) 634 { 635 uint16_t cap_off, aer_off; 636 pcie_req_id_t rid = cs->rid; 637 638 /* Gather Basic Device Information */ 639 cs->hdr_type = (px_fab_get(px_p, rid, 640 PCI_CONF_CACHE_LINESZ) >> 16) & 0xFF; 641 642 cs->cap_off = px_fabric_get_pciecap(px_p, rid); 643 cap_off = cs->cap_off; 644 if (!cap_off) 645 return; 646 647 cs->aer_off = px_fabric_get_aer(px_p, rid); 648 aer_off = cs->aer_off; 649 650 cs->dev_type = px_fab_get(px_p, rid, cap_off) >> 16; 651 cs->dev_type &= PCIE_PCIECAP_DEV_TYPE_MASK; 652 653 /* Get the Primary Sts Reg */ 654 cs->sts_reg = px_fab_get(px_p, rid, PCI_CONF_COMM); 655 656 /* If it is a bridge/switch get the Secondary Sts Reg */ 657 if (cs->hdr_type == PCI_HEADER_PPB) 658 cs->sts_sreg = px_fab_get(px_p, rid, 659 PCI_BCNF_IO_BASE_LOW); 660 661 /* Get the PCIe Dev Sts Reg */ 662 cs->dev_sts_reg = px_fab_get(px_p, rid, 663 cap_off + PCIE_DEVCTL); 664 665 if (!aer_off) 666 return; 667 668 /* Get the AER register information */ 669 cs->aer_ce_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_CE_STS); 670 cs->aer_ue_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_STS); 671 cs->aer_sev_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_SERV); 672 cs->aer_h1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x0); 673 cs->aer_h2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x4); 674 cs->aer_h3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x8); 675 cs->aer_h4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0xC); 676 677 if (cs->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) 678 return; 679 680 /* If this is a bridge check secondary aer */ 681 cs->aer_ue_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_STS); 682 cs->aer_sev_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_SERV); 683 cs->aer_sh1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x0); 684 cs->aer_sh2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x4); 685 cs->aer_sh3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x8); 686 cs->aer_sh4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0xC); 687 } 688 689 /* 690 * If a fabric intr occurs, query and clear the error registers on that device. 691 * Based on the error found return DDI_FM_OK or DDI_FM_FATAL. 692 */ 693 static uint_t 694 px_fabric_check(px_t *px_p, msgcode_t msg_code, 695 pcie_req_id_t rid, ddi_fm_error_t *derr) 696 { 697 dev_info_t *dip = px_p->px_dip; 698 char buf[FM_MAX_CLASS]; 699 px_fabric_cfgspace_t cs; 700 int ret; 701 702 /* clear cs */ 703 bzero(&cs, sizeof (px_fabric_cfgspace_t)); 704 705 cs.msg_code = msg_code; 706 cs.rid = rid; 707 708 px_fabric_fill_cs(px_p, &cs); 709 if (cs.cap_off) 710 ret = px_fabric_handle(px_p, &cs); 711 else 712 ret = PX_FATAL_GOS; 713 714 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_FABRIC_CLASS); 715 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 716 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 717 PX_FM_FABRIC_MSG_CODE, DATA_TYPE_UINT8, msg_code, 718 PX_FM_FABRIC_REQ_ID, DATA_TYPE_UINT16, rid, 719 "cap_off", DATA_TYPE_UINT16, cs.cap_off, 720 "aer_off", DATA_TYPE_UINT16, cs.aer_off, 721 "sts_reg", DATA_TYPE_UINT16, cs.sts_reg >> 16, 722 "sts_sreg", DATA_TYPE_UINT16, cs.sts_sreg >> 16, 723 "dev_sts_reg", DATA_TYPE_UINT16, cs.dev_sts_reg >> 16, 724 "aer_ce", DATA_TYPE_UINT32, cs.aer_ce_reg, 725 "aer_ue", DATA_TYPE_UINT32, cs.aer_ue_reg, 726 "aer_sev", DATA_TYPE_UINT32, cs.aer_sev_reg, 727 "aer_h1", DATA_TYPE_UINT32, cs.aer_h1, 728 "aer_h2", DATA_TYPE_UINT32, cs.aer_h2, 729 "aer_h3", DATA_TYPE_UINT32, cs.aer_h3, 730 "aer_h4", DATA_TYPE_UINT32, cs.aer_h4, 731 "saer_ue", DATA_TYPE_UINT32, cs.aer_ue_sreg, 732 "saer_sev", DATA_TYPE_UINT32, cs.aer_sev_sreg, 733 "saer_h1", DATA_TYPE_UINT32, cs.aer_sh1, 734 "saer_h2", DATA_TYPE_UINT32, cs.aer_sh2, 735 "saer_h3", DATA_TYPE_UINT32, cs.aer_sh3, 736 "saer_h4", DATA_TYPE_UINT32, cs.aer_sh4, 737 "severity", DATA_TYPE_UINT32, ret, 738 NULL); 739 740 /* Check for protected access */ 741 switch (derr->fme_flag) { 742 case DDI_FM_ERR_EXPECTED: 743 case DDI_FM_ERR_PEEK: 744 case DDI_FM_ERR_POKE: 745 ret &= PX_FATAL_GOS; 746 break; 747 } 748 749 750 if (px_fabric_die && 751 (ret & (PX_FATAL_GOS | PX_FATAL_SW))) 752 ret = DDI_FM_FATAL; 753 754 return (ret); 755 } 756 757 /* 758 * px_err_fabric_intr: 759 * Interrupt handler for PCIE fabric block. 760 * o lock 761 * o create derr 762 * o px_err_handle(leaf, with jbc) 763 * o send ereport(fire fmri, derr, payload = BDF) 764 * o dispatch (leaf) 765 * o unlock 766 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 767 */ 768 /* ARGSUSED */ 769 uint_t 770 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, 771 pcie_req_id_t rid) 772 { 773 dev_info_t *rpdip = px_p->px_dip; 774 px_cb_t *cb_p = px_p->px_cb_p; 775 int err = PX_OK, ret = DDI_FM_OK, fab_err = DDI_FM_OK; 776 ddi_fm_error_t derr; 777 778 mutex_enter(&cb_p->xbc_fm_mutex); 779 780 /* Create the derr */ 781 bzero(&derr, sizeof (ddi_fm_error_t)); 782 derr.fme_version = DDI_FME_VERSION; 783 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 784 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 785 786 /* send ereport/handle/clear fire registers */ 787 err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); 788 789 /* Check and clear the fabric error */ 790 fab_err = px_fabric_check(px_p, msg_code, rid, &derr); 791 792 /* Check all child devices for errors */ 793 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 794 795 mutex_exit(&cb_p->xbc_fm_mutex); 796 797 /* 798 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, 799 * therefore it does not cause panic. 800 */ 801 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || 802 (ret == DDI_FM_FATAL) || (fab_err == DDI_FM_FATAL)) 803 fm_panic("Fatal PCIe Fabric Error has occurred\n"); 804 805 return (DDI_INTR_CLAIMED); 806 } 807 808 /* 809 * px_err_safeacc_check: 810 * Check to see if a peek/poke and cautious access is currently being 811 * done on a particular leaf. 812 * 813 * Safe access reads induced fire errors will be handled by cpu trap handler 814 * which will call px_fm_callback() which calls this function. In that 815 * case, the derr fields will be set by trap handler with the correct values. 816 * 817 * Safe access writes induced errors will be handled by px interrupt 818 * handlers, this function will fill in the derr fields. 819 * 820 * If a cpu trap does occur, it will quiesce all other interrupts allowing 821 * the cpu trap error handling to finish before Fire receives an interrupt. 822 * 823 * If fire does indeed have an error when a cpu trap occurs as a result of 824 * a safe access, a trap followed by a Mondo/Fabric interrupt will occur. 825 * In which case derr will be initialized as "UNEXPECTED" by the interrupt 826 * handler and this function will need to find if this error occured in the 827 * middle of a safe access operation. 828 * 829 * @param px_p leaf in which to check access 830 * @param derr fm err data structure to be updated 831 */ 832 void 833 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr) 834 { 835 px_pec_t *pec_p = px_p->px_pec_p; 836 px_cb_t *cb_p = px_p->px_cb_p; 837 int acctype = pec_p->pec_safeacc_type; 838 839 ASSERT(MUTEX_HELD(&cb_p->xbc_fm_mutex)); 840 841 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 842 return; 843 } 844 845 /* safe access checking */ 846 switch (acctype) { 847 case DDI_FM_ERR_EXPECTED: 848 /* 849 * cautious access protection, protected from all err. 850 */ 851 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 852 ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr, 853 DDI_FME_VERSION); 854 derr->fme_flag = acctype; 855 derr->fme_acc_handle = pec_p->pec_acc_hdl; 856 break; 857 case DDI_FM_ERR_POKE: 858 /* 859 * ddi_poke protection, check nexus and children for 860 * expected errors. 861 */ 862 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 863 membar_sync(); 864 derr->fme_flag = acctype; 865 break; 866 case DDI_FM_ERR_PEEK: 867 derr->fme_flag = acctype; 868 break; 869 } 870 } 871