1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * PX Fault Management Architecture 30 */ 31 #include <sys/types.h> 32 #include <sys/sunndi.h> 33 #include <sys/sunddi.h> 34 #include <sys/fm/protocol.h> 35 #include <sys/fm/util.h> 36 #include <sys/membar.h> 37 #include "px_obj.h" 38 39 typedef struct px_fabric_cfgspace { 40 /* Error information */ 41 msgcode_t msg_code; 42 pcie_req_id_t rid; 43 44 /* Config space header and device type */ 45 uint8_t hdr_type; 46 uint16_t dev_type; 47 48 /* Register pointers */ 49 uint16_t cap_off; 50 uint16_t aer_off; 51 52 /* PCI register values */ 53 uint32_t sts_reg; 54 uint32_t sts_sreg; 55 56 /* PCIE register values */ 57 uint32_t dev_sts_reg; 58 uint32_t aer_ce_reg; 59 uint32_t aer_ue_reg; 60 uint32_t aer_sev_reg; 61 uint32_t aer_ue_sreg; 62 uint32_t aer_sev_sreg; 63 64 /* PCIE Header Log Registers */ 65 uint32_t aer_h1; 66 uint32_t aer_h2; 67 uint32_t aer_h3; 68 uint32_t aer_h4; 69 uint32_t aer_sh1; 70 uint32_t aer_sh2; 71 uint32_t aer_sh3; 72 uint32_t aer_sh4; 73 } px_fabric_cfgspace_t; 74 75 static uint16_t px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid); 76 static uint16_t px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid); 77 static int px_fabric_handle_psts(px_fabric_cfgspace_t *cs); 78 static int px_fabric_handle_ssts(px_fabric_cfgspace_t *cs); 79 static int px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs); 80 static int px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs); 81 static int px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs); 82 static void px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs); 83 static uint_t px_fabric_check(px_t *px_p, msgcode_t msg_code, 84 pcie_req_id_t rid, ddi_fm_error_t *derr); 85 86 /* 87 * Initialize px FMA support 88 */ 89 int 90 px_fm_attach(px_t *px_p) 91 { 92 px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | 93 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; 94 95 /* 96 * Initialize pci_target_queue for FMA handling of 97 * pci errors. 98 */ 99 pci_targetq_init(); 100 101 /* 102 * check parents' capability 103 */ 104 ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc); 105 106 /* 107 * parents need to be ereport and error handling capable 108 */ 109 ASSERT(px_p->px_fm_cap && 110 (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE)); 111 112 /* 113 * Initialize lock to synchronize fabric error handling 114 */ 115 mutex_init(&px_p->px_fm_mutex, NULL, MUTEX_DRIVER, 116 (void *)px_p->px_fm_ibc); 117 118 /* 119 * register error callback in parent 120 */ 121 ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p); 122 123 return (DDI_SUCCESS); 124 } 125 126 /* 127 * Deregister FMA 128 */ 129 void 130 px_fm_detach(px_t *px_p) 131 { 132 ddi_fm_handler_unregister(px_p->px_dip); 133 mutex_destroy(&px_p->px_fm_mutex); 134 ddi_fm_fini(px_p->px_dip); 135 } 136 137 /* 138 * Function used to setup access functions depending on level of desired 139 * protection. 140 */ 141 void 142 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip) 143 { 144 uchar_t fflag; 145 ddi_acc_hdl_t *hp; 146 ddi_acc_impl_t *ap; 147 148 hp = mp->map_handlep; 149 ap = (ddi_acc_impl_t *)hp->ah_platform_private; 150 fflag = ap->ahi_common.ah_acc.devacc_attr_access; 151 152 if (mp->map_op == DDI_MO_MAP_LOCKED) { 153 ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL); 154 switch (fflag) { 155 case DDI_FLAGERR_ACC: 156 ap->ahi_get8 = i_ddi_prot_get8; 157 ap->ahi_get16 = i_ddi_prot_get16; 158 ap->ahi_get32 = i_ddi_prot_get32; 159 ap->ahi_get64 = i_ddi_prot_get64; 160 ap->ahi_put8 = i_ddi_prot_put8; 161 ap->ahi_put16 = i_ddi_prot_put16; 162 ap->ahi_put32 = i_ddi_prot_put32; 163 ap->ahi_put64 = i_ddi_prot_put64; 164 ap->ahi_rep_get8 = i_ddi_prot_rep_get8; 165 ap->ahi_rep_get16 = i_ddi_prot_rep_get16; 166 ap->ahi_rep_get32 = i_ddi_prot_rep_get32; 167 ap->ahi_rep_get64 = i_ddi_prot_rep_get64; 168 ap->ahi_rep_put8 = i_ddi_prot_rep_put8; 169 ap->ahi_rep_put16 = i_ddi_prot_rep_put16; 170 ap->ahi_rep_put32 = i_ddi_prot_rep_put32; 171 ap->ahi_rep_put64 = i_ddi_prot_rep_put64; 172 break; 173 case DDI_CAUTIOUS_ACC : 174 ap->ahi_get8 = i_ddi_caut_get8; 175 ap->ahi_get16 = i_ddi_caut_get16; 176 ap->ahi_get32 = i_ddi_caut_get32; 177 ap->ahi_get64 = i_ddi_caut_get64; 178 ap->ahi_put8 = i_ddi_caut_put8; 179 ap->ahi_put16 = i_ddi_caut_put16; 180 ap->ahi_put32 = i_ddi_caut_put32; 181 ap->ahi_put64 = i_ddi_caut_put64; 182 ap->ahi_rep_get8 = i_ddi_caut_rep_get8; 183 ap->ahi_rep_get16 = i_ddi_caut_rep_get16; 184 ap->ahi_rep_get32 = i_ddi_caut_rep_get32; 185 ap->ahi_rep_get64 = i_ddi_caut_rep_get64; 186 ap->ahi_rep_put8 = i_ddi_caut_rep_put8; 187 ap->ahi_rep_put16 = i_ddi_caut_rep_put16; 188 ap->ahi_rep_put32 = i_ddi_caut_rep_put32; 189 ap->ahi_rep_put64 = i_ddi_caut_rep_put64; 190 break; 191 default: 192 break; 193 } 194 } else if (mp->map_op == DDI_MO_UNMAP) { 195 ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp); 196 } 197 } 198 199 /* 200 * Function used by PCI error handlers to check if captured address is stored 201 * in the DMA or ACC handle caches. 202 */ 203 int 204 px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar) 205 { 206 int ret = ndi_fmc_error(dip, NULL, type, fme_ena, afar); 207 return (ret == DDI_FM_UNKNOWN ? DDI_FM_FATAL : ret); 208 } 209 210 /* 211 * Function used to initialize FMA for our children nodes. Called 212 * through pci busops when child node calls ddi_fm_init. 213 */ 214 /*ARGSUSED*/ 215 int 216 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap, 217 ddi_iblock_cookie_t *ibc_p) 218 { 219 px_t *px_p = DIP_TO_STATE(dip); 220 221 ASSERT(ibc_p != NULL); 222 *ibc_p = px_p->px_fm_ibc; 223 224 return (px_p->px_fm_cap); 225 } 226 227 /* 228 * lock access for exclusive PCIe access 229 */ 230 void 231 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle) 232 { 233 px_pec_t *pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p; 234 235 /* 236 * Exclusive access has been used for cautious put/get, 237 * Both utilize i_ddi_ontrap which, on sparcv9, implements 238 * similar protection as what on_trap() does, and which calls 239 * membar #Sync to flush out all cpu deferred errors 240 * prior to get/put operation, so here we're not calling 241 * membar #Sync - a difference from what's in pci_bus_enter(). 242 */ 243 mutex_enter(&pec_p->pec_pokefault_mutex); 244 pec_p->pec_acc_hdl = handle; 245 } 246 247 /* 248 * unlock access for exclusive PCIe access 249 */ 250 /* ARGSUSED */ 251 void 252 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle) 253 { 254 px_t *px_p = DIP_TO_STATE(dip); 255 px_pec_t *pec_p = px_p->px_pec_p; 256 257 pec_p->pec_acc_hdl = NULL; 258 mutex_exit(&pec_p->pec_pokefault_mutex); 259 } 260 261 262 /* 263 * PCI error callback which is registered with our parent to call 264 * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors 265 * and PCI BERR/TO/UE 266 * 267 * Dispatch on all known leaves of this fire device because we cannot tell 268 * which side the error came from. 269 */ 270 /*ARGSUSED*/ 271 int 272 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) 273 { 274 px_t *px_p = (px_t *)impl_data; 275 int err = PX_OK; 276 int fatal = 0; 277 int nonfatal = 0; 278 int unknown = 0; 279 int ret = DDI_FM_OK; 280 281 mutex_enter(&px_p->px_fm_mutex); 282 283 err = px_err_handle(px_p, derr, PX_TRAP_CALL, B_TRUE); 284 285 if (!px_lib_is_in_drain_state(px_p)) 286 ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr); 287 288 mutex_exit(&px_p->px_fm_mutex); 289 290 switch (ret) { 291 case DDI_FM_FATAL: 292 fatal++; 293 break; 294 case DDI_FM_NONFATAL: 295 nonfatal++; 296 break; 297 case DDI_FM_UNKNOWN: 298 unknown++; 299 break; 300 default: 301 break; 302 } 303 304 ret = (fatal != 0) ? DDI_FM_FATAL : 305 ((nonfatal != 0) ? DDI_FM_NONFATAL : 306 (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK))); 307 308 /* fire fatal error overrides device error */ 309 if (err & (PX_FATAL_GOS | PX_FATAL_SW)) 310 ret = DDI_FM_FATAL; 311 /* if fire encounts no error, then take whatever device error */ 312 else if ((err != PX_OK) && (ret != DDI_FM_FATAL)) 313 ret = DDI_FM_NONFATAL; 314 315 return (ret); 316 } 317 318 static uint16_t 319 px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid) 320 { 321 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 322 uint16_t offset = PCIE_EXT_CAP; 323 int deadcount = 0; 324 325 /* Find the Advanced Error Register */ 326 hdr = px_fab_get(px_p, rid, offset); 327 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 328 PCIE_EXT_CAP_NEXT_PTR_MASK; 329 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 330 PCIE_EXT_CAP_ID_MASK; 331 332 while ((hdr_next_ptr != PCIE_EXT_CAP_NEXT_PTR_NULL) && 333 (hdr_cap_id != PCIE_EXT_CAP_ID_AER)) { 334 offset = hdr_next_ptr; 335 hdr = px_fab_get(px_p, rid, offset); 336 hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & 337 PCIE_EXT_CAP_NEXT_PTR_MASK; 338 hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & 339 PCIE_EXT_CAP_ID_MASK; 340 341 if (deadcount++ > 100) 342 break; 343 } 344 345 if (hdr_cap_id == PCIE_EXT_CAP_ID_AER) 346 return (offset); 347 348 return (0); 349 } 350 351 static uint16_t 352 px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid) 353 { 354 uint32_t hdr, hdr_next_ptr, hdr_cap_id; 355 uint16_t offset = PCI_CONF_STAT; 356 int deadcount = 0; 357 358 hdr = px_fab_get(px_p, rid, PCI_CONF_COMM) >> 16; 359 if (!(hdr & PCI_STAT_CAP)) { 360 /* This is not a PCIE device */ 361 return (0); 362 } 363 364 hdr = px_fab_get(px_p, rid, PCI_CONF_CAP_PTR); 365 hdr_next_ptr = hdr & 0xFF; 366 hdr_cap_id = 0; 367 368 while ((hdr_next_ptr != PCI_CAP_NEXT_PTR_NULL) && 369 (hdr_cap_id != PCI_CAP_ID_PCI_E)) { 370 offset = hdr_next_ptr; 371 372 if (hdr_next_ptr < 0x40) { 373 break; 374 } 375 376 hdr = px_fab_get(px_p, rid, hdr_next_ptr); 377 hdr_next_ptr = (hdr >> 8) & 0xFF; 378 hdr_cap_id = hdr & 0xFF; 379 380 if (deadcount++ > 100) 381 break; 382 } 383 384 if (hdr_cap_id == PCI_CAP_ID_PCI_E) 385 return (offset); 386 387 return (0); 388 } 389 390 /* 391 * This function checks the primary status registers. 392 * Take the PCI status register and translate it to PCIe equivalent. 393 */ 394 static int 395 px_fabric_handle_psts(px_fabric_cfgspace_t *cs) { 396 uint16_t sts_reg = cs->sts_reg >> 16; 397 uint16_t pci_status; 398 uint32_t pcie_status; 399 int ret = PX_NONFATAL; 400 401 /* Parity Err == Send/Recv Poisoned TLP */ 402 pci_status = PCI_STAT_S_PERROR | PCI_STAT_PERROR; 403 pcie_status = PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC; 404 if (sts_reg & pci_status) 405 ret |= PX_FABRIC_ERR_SEV(pcie_status, 406 px_fabric_die_ue, px_fabric_die_ue_gos); 407 408 /* Target Abort == Completer Abort */ 409 pci_status = PCI_STAT_S_TARG_AB | PCI_STAT_R_TARG_AB; 410 pcie_status = PCIE_AER_UCE_CA; 411 if (sts_reg & pci_status) 412 ret |= PX_FABRIC_ERR_SEV(pcie_status, 413 px_fabric_die_ue, px_fabric_die_ue_gos); 414 415 /* Master Abort == Unsupport Request */ 416 pci_status = PCI_STAT_R_MAST_AB; 417 pcie_status = PCIE_AER_UCE_UR; 418 if (sts_reg & pci_status) 419 ret |= PX_FABRIC_ERR_SEV(pcie_status, 420 px_fabric_die_ue, px_fabric_die_ue_gos); 421 422 /* System Error == Uncorrectable Error */ 423 pci_status = PCI_STAT_S_SYSERR; 424 pcie_status = -1; 425 if (sts_reg & pci_status) 426 ret |= PX_FABRIC_ERR_SEV(pcie_status, 427 px_fabric_die_ue, px_fabric_die_ue_gos); 428 429 return (ret); 430 } 431 432 /* 433 * This function checks the secondary status registers. 434 * Switches and Bridges have a different behavior. 435 */ 436 static int 437 px_fabric_handle_ssts(px_fabric_cfgspace_t *cs) { 438 uint16_t sts_reg = cs->sts_sreg >> 16; 439 int ret = PX_NONFATAL; 440 441 if (cs->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { 442 /* 443 * This is a PCIE-PCI bridge, but only check the severity 444 * if this device doesn't support AERs. 445 */ 446 if (!cs->aer_off) 447 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_bdg_sts, 448 px_fabric_die_bdg_sts_gos); 449 } else { 450 /* This is most likely a PCIE switch */ 451 ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_sw_sts, 452 px_fabric_die_sw_sts_gos); 453 } 454 455 return (ret); 456 } 457 458 /* 459 * This function checks and clears the primary AER. 460 */ 461 static int 462 px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs) { 463 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 464 int ret = PX_NONFATAL; 465 466 /* Determine severity and clear the AER */ 467 switch (cs->msg_code) { 468 case PCIE_MSG_CODE_ERR_COR: 469 off_reg = PCIE_AER_CE_STS; 470 chk_reg = px_fabric_die_ce; 471 chk_reg_gos = px_fabric_die_ce_gos; 472 reg = cs->aer_ce_reg; 473 break; 474 case PCIE_MSG_CODE_ERR_NONFATAL: 475 off_reg = PCIE_AER_UCE_STS; 476 chk_reg = px_fabric_die_ue; 477 chk_reg_gos = px_fabric_die_ue_gos; 478 reg = cs->aer_ue_reg & ~(cs->aer_sev_reg); 479 break; 480 case PCIE_MSG_CODE_ERR_FATAL: 481 off_reg = PCIE_AER_UCE_STS; 482 chk_reg = px_fabric_die_ue; 483 chk_reg_gos = px_fabric_die_ue_gos; 484 reg = cs->aer_ue_reg & cs->aer_sev_reg; 485 break; 486 default: 487 /* Major error force a panic */ 488 return (PX_FATAL_GOS); 489 } 490 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 491 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 492 493 return (ret); 494 } 495 496 /* 497 * This function checks and clears the secondary AER. 498 */ 499 static int 500 px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs) { 501 uint32_t chk_reg, chk_reg_gos, off_reg, reg; 502 uint32_t sev; 503 int ret = PX_NONFATAL; 504 505 /* Determine severity and clear the AER */ 506 switch (cs->msg_code) { 507 case PCIE_MSG_CODE_ERR_COR: 508 /* Ignore Correctable Errors */ 509 sev = 0; 510 break; 511 case PCIE_MSG_CODE_ERR_NONFATAL: 512 sev = ~(cs->aer_sev_sreg); 513 break; 514 case PCIE_MSG_CODE_ERR_FATAL: 515 sev = cs->aer_sev_sreg; 516 break; 517 default: 518 /* Major error force a panic */ 519 return (DDI_FM_FATAL); 520 } 521 off_reg = PCIE_AER_SUCE_STS; 522 chk_reg = px_fabric_die_sue; 523 chk_reg_gos = px_fabric_die_sue_gos; 524 reg = cs->aer_ue_sreg & sev; 525 px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); 526 ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); 527 528 return (ret); 529 } 530 531 static int 532 px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs) 533 { 534 pcie_req_id_t rid = cs->rid; 535 uint16_t cap_off = cs->cap_off; 536 uint16_t aer_off = cs->aer_off; 537 uint8_t hdr_type = cs->hdr_type; 538 uint16_t dev_type = cs->dev_type; 539 int ret = PX_NONFATAL; 540 541 if (hdr_type == PCI_HEADER_PPB) { 542 ret |= px_fabric_handle_ssts(cs); 543 } 544 545 if (!aer_off) { 546 ret |= px_fabric_handle_psts(cs); 547 } 548 549 if (aer_off) { 550 ret |= px_fabric_handle_paer(px_p, cs); 551 } 552 553 if (aer_off && (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)) { 554 ret |= px_fabric_handle_saer(px_p, cs); 555 } 556 557 /* Clear the standard PCIe error registers */ 558 px_fab_set(px_p, rid, cap_off + PCIE_DEVCTL, cs->dev_sts_reg); 559 560 /* Clear the legacy error registers */ 561 px_fab_set(px_p, rid, PCI_CONF_COMM, cs->sts_reg); 562 563 /* Clear the legacy secondary error registers */ 564 if (hdr_type == PCI_HEADER_PPB) { 565 px_fab_set(px_p, rid, PCI_BCNF_IO_BASE_LOW, 566 cs->sts_sreg); 567 } 568 569 return (ret); 570 } 571 572 static void 573 px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs) 574 { 575 uint16_t cap_off, aer_off; 576 pcie_req_id_t rid = cs->rid; 577 578 /* Gather Basic Device Information */ 579 cs->hdr_type = (px_fab_get(px_p, rid, PCI_CONF_CACHE_LINESZ) >> 16) & 580 PCI_HEADER_TYPE_M; 581 582 cs->cap_off = px_fabric_get_pciecap(px_p, rid); 583 cap_off = cs->cap_off; 584 if (!cap_off) 585 return; 586 587 cs->aer_off = px_fabric_get_aer(px_p, rid); 588 aer_off = cs->aer_off; 589 590 cs->dev_type = px_fab_get(px_p, rid, cap_off) >> 16; 591 cs->dev_type &= PCIE_PCIECAP_DEV_TYPE_MASK; 592 593 /* Get the Primary Sts Reg */ 594 cs->sts_reg = px_fab_get(px_p, rid, PCI_CONF_COMM); 595 596 /* If it is a bridge/switch get the Secondary Sts Reg */ 597 if (cs->hdr_type == PCI_HEADER_PPB) 598 cs->sts_sreg = px_fab_get(px_p, rid, 599 PCI_BCNF_IO_BASE_LOW); 600 601 /* Get the PCIe Dev Sts Reg */ 602 cs->dev_sts_reg = px_fab_get(px_p, rid, 603 cap_off + PCIE_DEVCTL); 604 605 if (!aer_off) 606 return; 607 608 /* Get the AER register information */ 609 cs->aer_ce_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_CE_STS); 610 cs->aer_ue_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_STS); 611 cs->aer_sev_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_SERV); 612 cs->aer_h1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x0); 613 cs->aer_h2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x4); 614 cs->aer_h3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x8); 615 cs->aer_h4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0xC); 616 617 if (cs->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) 618 return; 619 620 /* If this is a bridge check secondary aer */ 621 cs->aer_ue_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_STS); 622 cs->aer_sev_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_SERV); 623 cs->aer_sh1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x0); 624 cs->aer_sh2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x4); 625 cs->aer_sh3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x8); 626 cs->aer_sh4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0xC); 627 } 628 629 /* 630 * If a fabric intr occurs, query and clear the error registers on that device. 631 * Based on the error found return DDI_FM_OK or DDI_FM_FATAL. 632 */ 633 static uint_t 634 px_fabric_check(px_t *px_p, msgcode_t msg_code, 635 pcie_req_id_t rid, ddi_fm_error_t *derr) 636 { 637 dev_info_t *dip = px_p->px_dip; 638 char buf[FM_MAX_CLASS]; 639 px_fabric_cfgspace_t cs; 640 int ret; 641 642 /* clear cs */ 643 bzero(&cs, sizeof (px_fabric_cfgspace_t)); 644 645 cs.msg_code = msg_code; 646 cs.rid = rid; 647 648 px_fabric_fill_cs(px_p, &cs); 649 if (cs.cap_off) 650 ret = px_fabric_handle(px_p, &cs); 651 else 652 ret = PX_FATAL_GOS; 653 654 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_FABRIC_CLASS); 655 ddi_fm_ereport_post(dip, buf, derr->fme_ena, 656 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, 657 PX_FM_FABRIC_MSG_CODE, DATA_TYPE_UINT8, msg_code, 658 PX_FM_FABRIC_REQ_ID, DATA_TYPE_UINT16, rid, 659 "cap_off", DATA_TYPE_UINT16, cs.cap_off, 660 "aer_off", DATA_TYPE_UINT16, cs.aer_off, 661 "sts_reg", DATA_TYPE_UINT16, cs.sts_reg >> 16, 662 "sts_sreg", DATA_TYPE_UINT16, cs.sts_sreg >> 16, 663 "dev_sts_reg", DATA_TYPE_UINT16, cs.dev_sts_reg >> 16, 664 "aer_ce", DATA_TYPE_UINT32, cs.aer_ce_reg, 665 "aer_ue", DATA_TYPE_UINT32, cs.aer_ue_reg, 666 "aer_sev", DATA_TYPE_UINT32, cs.aer_sev_reg, 667 "aer_h1", DATA_TYPE_UINT32, cs.aer_h1, 668 "aer_h2", DATA_TYPE_UINT32, cs.aer_h2, 669 "aer_h3", DATA_TYPE_UINT32, cs.aer_h3, 670 "aer_h4", DATA_TYPE_UINT32, cs.aer_h4, 671 "saer_ue", DATA_TYPE_UINT32, cs.aer_ue_sreg, 672 "saer_sev", DATA_TYPE_UINT32, cs.aer_sev_sreg, 673 "saer_h1", DATA_TYPE_UINT32, cs.aer_sh1, 674 "saer_h2", DATA_TYPE_UINT32, cs.aer_sh2, 675 "saer_h3", DATA_TYPE_UINT32, cs.aer_sh3, 676 "saer_h4", DATA_TYPE_UINT32, cs.aer_sh4, 677 "severity", DATA_TYPE_UINT32, ret, 678 NULL); 679 680 /* Check for protected access */ 681 switch (derr->fme_flag) { 682 case DDI_FM_ERR_EXPECTED: 683 case DDI_FM_ERR_PEEK: 684 case DDI_FM_ERR_POKE: 685 ret &= PX_FATAL_GOS; 686 break; 687 } 688 689 690 if (px_fabric_die && 691 (ret & (PX_FATAL_GOS | PX_FATAL_SW))) 692 ret = DDI_FM_FATAL; 693 return (ret); 694 } 695 696 /* 697 * px_err_fabric_intr: 698 * Interrupt handler for PCIE fabric block. 699 * o lock 700 * o create derr 701 * o px_err_handle(leaf, with jbc) 702 * o send ereport(fire fmri, derr, payload = BDF) 703 * o dispatch (leaf) 704 * o unlock 705 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 706 */ 707 /* ARGSUSED */ 708 uint_t 709 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, 710 pcie_req_id_t rid) 711 { 712 dev_info_t *rpdip = px_p->px_dip; 713 int err = PX_OK, ret = DDI_FM_OK, fab_err = DDI_FM_OK; 714 ddi_fm_error_t derr; 715 716 mutex_enter(&px_p->px_fm_mutex); 717 718 /* Create the derr */ 719 bzero(&derr, sizeof (ddi_fm_error_t)); 720 derr.fme_version = DDI_FME_VERSION; 721 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 722 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 723 724 /* send ereport/handle/clear fire registers */ 725 err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); 726 727 /* Check and clear the fabric error */ 728 fab_err = px_fabric_check(px_p, msg_code, rid, &derr); 729 730 /* Check all child devices for errors */ 731 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 732 733 mutex_exit(&px_p->px_fm_mutex); 734 735 /* 736 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, 737 * therefore it does not cause panic. 738 */ 739 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || 740 (ret == DDI_FM_FATAL) || (fab_err == DDI_FM_FATAL)) 741 PX_FM_PANIC("%s#%d: Fatal PCIe Fabric Error has occurred" 742 "(%x,%x,%x)\n", ddi_driver_name(rpdip), 743 ddi_get_instance(rpdip), err, fab_err, ret); 744 745 return (DDI_INTR_CLAIMED); 746 } 747 748 /* 749 * px_err_safeacc_check: 750 * Check to see if a peek/poke and cautious access is currently being 751 * done on a particular leaf. 752 * 753 * Safe access reads induced fire errors will be handled by cpu trap handler 754 * which will call px_fm_callback() which calls this function. In that 755 * case, the derr fields will be set by trap handler with the correct values. 756 * 757 * Safe access writes induced errors will be handled by px interrupt 758 * handlers, this function will fill in the derr fields. 759 * 760 * If a cpu trap does occur, it will quiesce all other interrupts allowing 761 * the cpu trap error handling to finish before Fire receives an interrupt. 762 * 763 * If fire does indeed have an error when a cpu trap occurs as a result of 764 * a safe access, a trap followed by a Mondo/Fabric interrupt will occur. 765 * In which case derr will be initialized as "UNEXPECTED" by the interrupt 766 * handler and this function will need to find if this error occured in the 767 * middle of a safe access operation. 768 * 769 * @param px_p leaf in which to check access 770 * @param derr fm err data structure to be updated 771 */ 772 void 773 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr) 774 { 775 px_pec_t *pec_p = px_p->px_pec_p; 776 int acctype = pec_p->pec_safeacc_type; 777 778 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); 779 780 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 781 return; 782 } 783 784 /* safe access checking */ 785 switch (acctype) { 786 case DDI_FM_ERR_EXPECTED: 787 /* 788 * cautious access protection, protected from all err. 789 */ 790 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 791 ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr, 792 DDI_FME_VERSION); 793 derr->fme_flag = acctype; 794 derr->fme_acc_handle = pec_p->pec_acc_hdl; 795 break; 796 case DDI_FM_ERR_POKE: 797 /* 798 * ddi_poke protection, check nexus and children for 799 * expected errors. 800 */ 801 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 802 membar_sync(); 803 derr->fme_flag = acctype; 804 break; 805 case DDI_FM_ERR_PEEK: 806 derr->fme_flag = acctype; 807 break; 808 } 809 } 810