1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/sysmacros.h> 27 #include <sys/types.h> 28 #include <sys/kmem.h> 29 #include <sys/modctl.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/sunndi.h> 33 #include <sys/fm/protocol.h> 34 #include <sys/fm/util.h> 35 #include <sys/fm/io/ddi.h> 36 #include <sys/fm/io/pci.h> 37 #include <sys/promif.h> 38 #include <sys/disp.h> 39 #include <sys/atomic.h> 40 #include <sys/pcie.h> 41 #include <sys/pci_cap.h> 42 #include <sys/pcie_impl.h> 43 44 #define PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \ 45 PCIE_DEVSTS_CE_DETECTED) 46 47 #define PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \ 48 PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR) 49 50 #define PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\ 51 PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP) 52 #define PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \ 53 PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR) 54 55 #define PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \ 56 PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \ 57 PCIE_AER_SUCE_SERR_ASSERT) 58 #define PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \ 59 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \ 60 PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \ 61 PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \ 62 PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR) 63 64 #define PF_PCI_PARITY_ERR (PCI_STAT_S_PERROR | PCI_STAT_PERROR) 65 66 #define PF_FIRST_AER_ERR(bit, adv) \ 67 (bit & (1 << (adv->pcie_adv_ctl & PCIE_AER_CTL_FST_ERR_PTR_MASK))) 68 69 #define HAS_AER_LOGS(pfd_p, bit) \ 70 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \ 71 PF_FIRST_AER_ERR(bit, PCIE_ADV_REG(pfd_p))) 72 73 #define PF_FIRST_SAER_ERR(bit, adv) \ 74 (bit & (1 << (adv->pcie_sue_ctl & PCIE_AER_SCTL_FST_ERR_PTR_MASK))) 75 76 #define HAS_SAER_LOGS(pfd_p, bit) \ 77 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \ 78 PF_FIRST_SAER_ERR(bit, PCIE_ADV_BDG_REG(pfd_p))) 79 80 #define GET_SAER_CMD(pfd_p) \ 81 ((PCIE_ADV_BDG_HDR(pfd_p, 1) >> \ 82 PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & PCIE_AER_SUCE_HDR_CMD_LWR_MASK) 83 84 #define CE_ADVISORY(pfd_p) \ 85 (PCIE_ADV_REG(pfd_p)->pcie_ce_status & PCIE_AER_CE_AD_NFE) 86 87 /* PCIe Fault Fabric Error analysis table */ 88 typedef struct pf_fab_err_tbl { 89 uint32_t bit; /* Error bit */ 90 int (*handler)(); /* Error handling fuction */ 91 } pf_fab_err_tbl_t; 92 93 static pcie_bus_t *pf_is_ready(dev_info_t *); 94 /* Functions for scanning errors */ 95 static int pf_default_hdl(dev_info_t *, pf_impl_t *); 96 static int pf_dispatch(dev_info_t *, pf_impl_t *, boolean_t); 97 static boolean_t pf_in_bus_range(pcie_bus_t *, pcie_req_id_t); 98 static boolean_t pf_in_addr_range(pcie_bus_t *, uint64_t); 99 100 static int pf_pci_decode(pf_data_t *, uint16_t *); 101 102 /* Functions for gathering errors */ 103 static void pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs, 104 pcie_bus_t *bus_p, boolean_t bdg); 105 static void pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 106 static void pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 107 static void pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 108 static int pf_dummy_cb(dev_info_t *, ddi_fm_error_t *, const void *); 109 static void pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl_p); 110 111 /* Functions for analysing errors */ 112 static int pf_analyse_error(ddi_fm_error_t *, pf_impl_t *); 113 static void pf_adjust_for_no_aer(pf_data_t *); 114 static void pf_adjust_for_no_saer(pf_data_t *); 115 static pf_data_t *pf_get_pcie_bridge(pf_data_t *, pcie_req_id_t); 116 static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *); 117 static boolean_t pf_matched_in_rc(pf_data_t *, pf_data_t *, 118 uint32_t); 119 static int pf_analyse_error_tbl(ddi_fm_error_t *, pf_impl_t *, 120 pf_data_t *, const pf_fab_err_tbl_t *, uint32_t); 121 static int pf_analyse_ca_ur(ddi_fm_error_t *, uint32_t, 122 pf_data_t *, pf_data_t *); 123 static int pf_analyse_ma_ta(ddi_fm_error_t *, uint32_t, 124 pf_data_t *, pf_data_t *); 125 static int pf_analyse_pci(ddi_fm_error_t *, uint32_t, 126 pf_data_t *, pf_data_t *); 127 static int pf_analyse_perr_assert(ddi_fm_error_t *, uint32_t, 128 pf_data_t *, pf_data_t *); 129 static int pf_analyse_ptlp(ddi_fm_error_t *, uint32_t, 130 pf_data_t *, pf_data_t *); 131 static int pf_analyse_sc(ddi_fm_error_t *, uint32_t, 132 pf_data_t *, pf_data_t *); 133 static int pf_analyse_to(ddi_fm_error_t *, uint32_t, 134 pf_data_t *, pf_data_t *); 135 static int pf_analyse_uc(ddi_fm_error_t *, uint32_t, 136 pf_data_t *, pf_data_t *); 137 static int pf_analyse_uc_data(ddi_fm_error_t *, uint32_t, 138 pf_data_t *, pf_data_t *); 139 static int pf_no_panic(ddi_fm_error_t *, uint32_t, 140 pf_data_t *, pf_data_t *); 141 static int pf_panic(ddi_fm_error_t *, uint32_t, 142 pf_data_t *, pf_data_t *); 143 static void pf_send_ereport(ddi_fm_error_t *, pf_impl_t *); 144 static int pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr); 145 146 /* PCIe Fabric Handle Lookup Support Functions. */ 147 static int pf_hdl_child_lookup(dev_info_t *, ddi_fm_error_t *, uint32_t, 148 uint64_t, pcie_req_id_t); 149 static int pf_hdl_compare(dev_info_t *, ddi_fm_error_t *, uint32_t, uint64_t, 150 pcie_req_id_t, ndi_fmc_t *); 151 static int pf_log_hdl_lookup(dev_info_t *, ddi_fm_error_t *, pf_data_t *, 152 boolean_t); 153 154 static int pf_handler_enter(dev_info_t *, pf_impl_t *); 155 static void pf_handler_exit(dev_info_t *); 156 157 boolean_t pcie_full_scan = B_FALSE; /* Force to always do a full scan */ 158 int pcie_disable_scan = 0; /* Disable fabric scan */ 159 160 /* 161 * Scan Fabric is the entry point for PCI/PCIe IO fabric errors. The 162 * caller may create a local pf_data_t with the "root fault" 163 * information populated to either do a precise or full scan. More 164 * than one pf_data_t maybe linked together if there are multiple 165 * errors. Only a PCIe compliant Root Port device may pass in NULL 166 * for the root_pfd_p. 167 * 168 * "Root Complexes" such as NPE and PX should call scan_fabric using itself as 169 * the rdip. PCIe Root ports should call pf_scan_fabric using it's parent as 170 * the rdip. 171 * 172 * Scan fabric initiated from RCs are likely due to a fabric message, traps or 173 * any RC detected errors that propagated to/from the fabric. 174 * 175 * This code assumes that by the time pf_scan_fabric is 176 * called, pf_handler_enter has NOT been called on the rdip. 177 */ 178 int 179 pf_scan_fabric(dev_info_t *rdip, ddi_fm_error_t *derr, pf_data_t *root_pfd_p) 180 { 181 pf_impl_t impl; 182 pf_data_t *pfd_p, *pfd_head_p, *pfd_tail_p; 183 int scan_flag = PF_SCAN_SUCCESS; 184 int analyse_flag = PF_ERR_NO_ERROR; 185 boolean_t full_scan = pcie_full_scan; 186 187 if (pcie_disable_scan) 188 return (analyse_flag); 189 190 /* Find the head and tail of this link list */ 191 pfd_head_p = root_pfd_p; 192 for (pfd_tail_p = root_pfd_p; pfd_tail_p && pfd_tail_p->pe_next; 193 pfd_tail_p = pfd_tail_p->pe_next) 194 ; 195 196 /* Save head/tail */ 197 impl.pf_total = 0; 198 impl.pf_derr = derr; 199 impl.pf_dq_head_p = pfd_head_p; 200 impl.pf_dq_tail_p = pfd_tail_p; 201 202 /* If scan is initiated from RP then RP itself must be scanned. */ 203 if (PCIE_IS_RP(PCIE_DIP2BUS(rdip)) && pf_is_ready(rdip) && 204 !root_pfd_p) { 205 scan_flag = pf_handler_enter(rdip, &impl); 206 if (scan_flag & PF_SCAN_DEADLOCK) 207 goto done; 208 209 scan_flag = pf_default_hdl(rdip, &impl); 210 if (scan_flag & PF_SCAN_NO_ERR_IN_CHILD) 211 goto done; 212 } 213 214 /* 215 * Scan the fabric using the scan_bdf and scan_addr in error q. 216 * scan_bdf will be valid in the following cases: 217 * - Fabric message 218 * - Poisoned TLP 219 * - Signaled UR/CA 220 * - Received UR/CA 221 * - PIO load failures 222 */ 223 for (pfd_p = impl.pf_dq_head_p; pfd_p && PFD_IS_ROOT(pfd_p); 224 pfd_p = pfd_p->pe_next) { 225 impl.pf_fault = PCIE_ROOT_FAULT(pfd_p); 226 227 if (impl.pf_fault->full_scan) 228 full_scan = B_TRUE; 229 230 if (full_scan || 231 PCIE_CHECK_VALID_BDF(impl.pf_fault->scan_bdf) || 232 impl.pf_fault->scan_addr) 233 scan_flag |= pf_dispatch(rdip, &impl, full_scan); 234 235 if (full_scan) 236 break; 237 } 238 239 done: 240 /* 241 * If this is due to safe access, don't analyze the errors and return 242 * success regardless of how scan fabric went. 243 */ 244 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 245 analyse_flag = PF_ERR_NO_PANIC; 246 } else { 247 analyse_flag = pf_analyse_error(derr, &impl); 248 } 249 250 pf_send_ereport(derr, &impl); 251 252 /* 253 * Check if any hardened driver's callback reported a panic or scan 254 * fabric was unable to gather all the information needed. If so panic. 255 */ 256 if (scan_flag & (PF_SCAN_CB_FAILURE | PF_SCAN_BAD_RESPONSE)) 257 analyse_flag |= PF_ERR_PANIC; 258 259 /* 260 * If a deadlock was detected, panic the system as error analysis has 261 * been compromised. 262 */ 263 if (scan_flag & PF_SCAN_DEADLOCK) 264 analyse_flag |= PF_ERR_PANIC_DEADLOCK; 265 266 derr->fme_status = PF_ERR2DDIFM_ERR(scan_flag); 267 268 return (analyse_flag); 269 } 270 271 void 272 pcie_force_fullscan() { 273 pcie_full_scan = B_TRUE; 274 } 275 276 /* 277 * pf_dispatch walks the device tree and calls the pf_default_hdl if the device 278 * falls in the error path. 279 * 280 * Returns PF_SCAN_* flags 281 */ 282 static int 283 pf_dispatch(dev_info_t *pdip, pf_impl_t *impl, boolean_t full_scan) 284 { 285 dev_info_t *dip; 286 pcie_req_id_t rid = impl->pf_fault->scan_bdf; 287 pcie_bus_t *bus_p; 288 int scan_flag = PF_SCAN_SUCCESS; 289 290 for (dip = ddi_get_child(pdip); dip; dip = ddi_get_next_sibling(dip)) { 291 /* Make sure dip is attached and ready */ 292 if (!(bus_p = pf_is_ready(dip))) 293 continue; 294 295 scan_flag |= pf_handler_enter(dip, impl); 296 if (scan_flag & PF_SCAN_DEADLOCK) 297 break; 298 299 /* 300 * Handle this device if it is a: 301 * o Full Scan 302 * o PCI/PCI-X Device 303 * o Fault BDF = Device BDF 304 * o BDF/ADDR is in range of the Bridge/Switch 305 */ 306 if (full_scan || 307 (bus_p->bus_bdf == rid) || 308 pf_in_bus_range(bus_p, rid) || 309 pf_in_addr_range(bus_p, impl->pf_fault->scan_addr)) { 310 int hdl_flag = pf_default_hdl(dip, impl); 311 scan_flag |= hdl_flag; 312 313 /* 314 * If pf_default_hdl was not able gather error 315 * information, it means this device wasn't added to the 316 * error q list. In that case exit the lock now, 317 * otherwise it'll be locked forever. 318 */ 319 if (hdl_flag & PF_SCAN_BAD_RESPONSE) 320 pf_handler_exit(dip); 321 322 /* 323 * A bridge may have detected no errors in which case 324 * there is no need to scan further down. 325 */ 326 if (hdl_flag & PF_SCAN_NO_ERR_IN_CHILD) 327 continue; 328 } else { 329 pf_handler_exit(dip); 330 continue; 331 } 332 333 /* match or in bridge bus-range */ 334 switch (bus_p->bus_dev_type) { 335 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: 336 case PCIE_PCIECAP_DEV_TYPE_PCI2PCIE: 337 scan_flag |= pf_dispatch(dip, impl, B_TRUE); 338 break; 339 case PCIE_PCIECAP_DEV_TYPE_UP: 340 case PCIE_PCIECAP_DEV_TYPE_DOWN: 341 case PCIE_PCIECAP_DEV_TYPE_ROOT: 342 { 343 pf_data_t *pfd_p = PCIE_BUS2PFD(bus_p); 344 pf_pci_err_regs_t *err_p = PCI_ERR_REG(pfd_p); 345 pf_pci_bdg_err_regs_t *serr_p = PCI_BDG_ERR_REG(pfd_p); 346 /* 347 * Continue if the fault BDF != the switch or there is a 348 * parity error 349 */ 350 if ((bus_p->bus_bdf != rid) || 351 (err_p->pci_err_status & PF_PCI_PARITY_ERR) || 352 (serr_p->pci_bdg_sec_stat & PF_PCI_PARITY_ERR)) 353 scan_flag |= pf_dispatch(dip, impl, full_scan); 354 break; 355 } 356 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: 357 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: 358 /* 359 * Reached a PCIe end point so stop. Note dev_type 360 * PCI_DEV is just a PCIe device that requires IO Space 361 */ 362 break; 363 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO: 364 if (PCIE_IS_BDG(bus_p)) 365 scan_flag |= pf_dispatch(dip, impl, B_TRUE); 366 break; 367 default: 368 ASSERT(B_FALSE); 369 } 370 } 371 return (scan_flag); 372 } 373 374 /* Returns whether the "bdf" is in the bus range of a switch/bridge */ 375 static boolean_t 376 pf_in_bus_range(pcie_bus_t *bus_p, pcie_req_id_t bdf) 377 { 378 pci_bus_range_t *br_p = &bus_p->bus_bus_range; 379 uint8_t bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >> 380 PCIE_REQ_ID_BUS_SHIFT; 381 382 /* check if given bdf falls within bridge's bus range */ 383 if (PCIE_IS_BDG(bus_p) && 384 ((bus_no >= br_p->lo) && (bus_no <= br_p->hi))) 385 return (B_TRUE); 386 else 387 return (B_FALSE); 388 } 389 390 /* 391 * Returns whether the "addr" is in the addr range of a switch/bridge, or if the 392 * "addr" is in the assigned addr of a device. 393 */ 394 static boolean_t 395 pf_in_addr_range(pcie_bus_t *bus_p, uint64_t addr) 396 { 397 uint_t i; 398 uint64_t low, hi; 399 ppb_ranges_t *ranges_p = bus_p->bus_addr_ranges; 400 pci_regspec_t *assign_p = bus_p->bus_assigned_addr; 401 402 /* check if given address belongs to this device */ 403 for (i = 0; i < bus_p->bus_assigned_entries; i++, assign_p++) { 404 low = assign_p->pci_phys_low; 405 hi = low + assign_p->pci_size_low; 406 if ((addr < hi) && (addr >= low)) 407 return (B_TRUE); 408 } 409 410 /* check if given address belongs to a child below this device */ 411 if (!PCIE_IS_BDG(bus_p)) 412 return (B_FALSE); 413 414 for (i = 0; i < bus_p->bus_addr_entries; i++, ranges_p++) { 415 switch (ranges_p->child_high & PCI_ADDR_MASK) { 416 case PCI_ADDR_IO: 417 case PCI_ADDR_MEM32: 418 low = ranges_p->child_low; 419 hi = ranges_p->size_low + low; 420 if ((addr < hi) && (addr >= low)) 421 return (B_TRUE); 422 break; 423 case PCI_ADDR_MEM64: 424 low = ((uint64_t)ranges_p->child_mid << 32) | 425 (uint64_t)ranges_p->child_low; 426 hi = (((uint64_t)ranges_p->size_high << 32) | 427 (uint64_t)ranges_p->size_low) + low; 428 if ((addr < hi) && (addr >= low)) 429 return (B_TRUE); 430 break; 431 } 432 } 433 return (B_FALSE); 434 } 435 436 static pcie_bus_t * 437 pf_is_ready(dev_info_t *dip) 438 { 439 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 440 if (!bus_p) 441 return (NULL); 442 443 if (!(bus_p->bus_fm_flags & PF_FM_READY)) 444 return (NULL); 445 return (bus_p); 446 } 447 448 static void 449 pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs, 450 pcie_bus_t *bus_p, boolean_t bdg) 451 { 452 if (bdg) { 453 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p, 454 PCI_PCIX_BDG_ECC_STATUS); 455 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p, 456 PCI_PCIX_BDG_ECC_FST_AD); 457 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p, 458 PCI_PCIX_BDG_ECC_SEC_AD); 459 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p, 460 PCI_PCIX_BDG_ECC_ATTR); 461 } else { 462 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p, 463 PCI_PCIX_ECC_STATUS); 464 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p, 465 PCI_PCIX_ECC_FST_AD); 466 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p, 467 PCI_PCIX_ECC_SEC_AD); 468 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p, 469 PCI_PCIX_ECC_ATTR); 470 } 471 } 472 473 474 static void 475 pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 476 { 477 /* 478 * For PCI-X device PCI-X Capability only exists for Type 0 Headers. 479 * PCI-X Bridge Capability only exists for Type 1 Headers. 480 * Both capabilities do not exist at the same time. 481 */ 482 if (PCIE_IS_BDG(bus_p)) { 483 pf_pcix_bdg_err_regs_t *pcix_bdg_regs; 484 485 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p); 486 487 pcix_bdg_regs->pcix_bdg_sec_stat = PCIX_CAP_GET(16, bus_p, 488 PCI_PCIX_SEC_STATUS); 489 pcix_bdg_regs->pcix_bdg_stat = PCIX_CAP_GET(32, bus_p, 490 PCI_PCIX_BDG_STATUS); 491 492 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 493 /* 494 * PCI Express to PCI-X bridges only implement the 495 * secondary side of the PCI-X ECC registers, bit one is 496 * read-only so we make sure we do not write to it. 497 */ 498 if (!PCIE_IS_PCIE_BDG(bus_p)) { 499 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 500 0); 501 pf_pcix_ecc_regs_gather( 502 PCIX_BDG_ECC_REG(pfd_p, 0), bus_p, B_TRUE); 503 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 504 1); 505 } 506 pf_pcix_ecc_regs_gather(PCIX_BDG_ECC_REG(pfd_p, 0), 507 bus_p, B_TRUE); 508 } 509 } else { 510 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p); 511 512 pcix_regs->pcix_command = PCIX_CAP_GET(16, bus_p, 513 PCI_PCIX_COMMAND); 514 pcix_regs->pcix_status = PCIX_CAP_GET(32, bus_p, 515 PCI_PCIX_STATUS); 516 if (PCIX_ECC_VERSION_CHECK(bus_p)) 517 pf_pcix_ecc_regs_gather(PCIX_ECC_REG(pfd_p), bus_p, 518 B_TRUE); 519 } 520 } 521 522 static void 523 pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 524 { 525 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p); 526 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p); 527 528 pcie_regs->pcie_err_status = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS); 529 pcie_regs->pcie_err_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL); 530 pcie_regs->pcie_dev_cap = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP); 531 532 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) 533 pf_pcix_regs_gather(pfd_p, bus_p); 534 535 if (PCIE_IS_ROOT(bus_p)) { 536 pf_pcie_rp_err_regs_t *pcie_rp_regs = PCIE_RP_REG(pfd_p); 537 538 pcie_rp_regs->pcie_rp_status = PCIE_CAP_GET(32, bus_p, 539 PCIE_ROOTSTS); 540 pcie_rp_regs->pcie_rp_ctl = PCIE_CAP_GET(16, bus_p, 541 PCIE_ROOTCTL); 542 } 543 544 if (!PCIE_HAS_AER(bus_p)) 545 return; 546 547 /* Gather UE AERs */ 548 pcie_adv_regs->pcie_adv_ctl = PCIE_AER_GET(32, bus_p, 549 PCIE_AER_CTL); 550 pcie_adv_regs->pcie_ue_status = PCIE_AER_GET(32, bus_p, 551 PCIE_AER_UCE_STS); 552 pcie_adv_regs->pcie_ue_mask = PCIE_AER_GET(32, bus_p, 553 PCIE_AER_UCE_MASK); 554 pcie_adv_regs->pcie_ue_sev = PCIE_AER_GET(32, bus_p, 555 PCIE_AER_UCE_SERV); 556 PCIE_ADV_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p, 557 PCIE_AER_HDR_LOG); 558 PCIE_ADV_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p, 559 PCIE_AER_HDR_LOG + 0x4); 560 PCIE_ADV_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p, 561 PCIE_AER_HDR_LOG + 0x8); 562 PCIE_ADV_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p, 563 PCIE_AER_HDR_LOG + 0xc); 564 565 /* Gather CE AERs */ 566 pcie_adv_regs->pcie_ce_status = PCIE_AER_GET(32, bus_p, 567 PCIE_AER_CE_STS); 568 pcie_adv_regs->pcie_ce_mask = PCIE_AER_GET(32, bus_p, 569 PCIE_AER_CE_MASK); 570 571 /* 572 * If pci express to pci bridge then grab the bridge 573 * error registers. 574 */ 575 if (PCIE_IS_PCIE_BDG(bus_p)) { 576 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs = 577 PCIE_ADV_BDG_REG(pfd_p); 578 579 pcie_bdg_regs->pcie_sue_ctl = PCIE_AER_GET(32, bus_p, 580 PCIE_AER_SCTL); 581 pcie_bdg_regs->pcie_sue_status = PCIE_AER_GET(32, bus_p, 582 PCIE_AER_SUCE_STS); 583 pcie_bdg_regs->pcie_sue_mask = PCIE_AER_GET(32, bus_p, 584 PCIE_AER_SUCE_MASK); 585 pcie_bdg_regs->pcie_sue_sev = PCIE_AER_GET(32, bus_p, 586 PCIE_AER_SUCE_SERV); 587 PCIE_ADV_BDG_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p, 588 PCIE_AER_SHDR_LOG); 589 PCIE_ADV_BDG_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p, 590 PCIE_AER_SHDR_LOG + 0x4); 591 PCIE_ADV_BDG_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p, 592 PCIE_AER_SHDR_LOG + 0x8); 593 PCIE_ADV_BDG_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p, 594 PCIE_AER_SHDR_LOG + 0xc); 595 } 596 597 /* 598 * If PCI Express root port then grab the root port 599 * error registers. 600 */ 601 if (PCIE_IS_ROOT(bus_p)) { 602 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs = 603 PCIE_ADV_RP_REG(pfd_p); 604 605 pcie_rp_regs->pcie_rp_err_cmd = PCIE_AER_GET(32, bus_p, 606 PCIE_AER_RE_CMD); 607 pcie_rp_regs->pcie_rp_err_status = PCIE_AER_GET(32, bus_p, 608 PCIE_AER_RE_STS); 609 pcie_rp_regs->pcie_rp_ce_src_id = PCIE_AER_GET(16, bus_p, 610 PCIE_AER_CE_SRC_ID); 611 pcie_rp_regs->pcie_rp_ue_src_id = PCIE_AER_GET(16, bus_p, 612 PCIE_AER_ERR_SRC_ID); 613 } 614 } 615 616 static void 617 pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 618 { 619 pf_pci_err_regs_t *pci_regs = PCI_ERR_REG(pfd_p); 620 621 /* 622 * Start by reading all the error registers that are available for 623 * pci and pci express and for leaf devices and bridges/switches 624 */ 625 pci_regs->pci_err_status = PCIE_GET(16, bus_p, PCI_CONF_STAT); 626 pci_regs->pci_cfg_comm = PCIE_GET(16, bus_p, PCI_CONF_COMM); 627 628 /* 629 * If pci-pci bridge grab PCI bridge specific error registers. 630 */ 631 if (PCIE_IS_BDG(bus_p)) { 632 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p); 633 pci_bdg_regs->pci_bdg_sec_stat = 634 PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS); 635 pci_bdg_regs->pci_bdg_ctrl = 636 PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL); 637 } 638 639 /* 640 * If pci express device grab pci express error registers and 641 * check for advanced error reporting features and grab them if 642 * available. 643 */ 644 if (PCIE_IS_PCIE(bus_p)) 645 pf_pcie_regs_gather(pfd_p, bus_p); 646 else if (PCIE_IS_PCIX(bus_p)) 647 pf_pcix_regs_gather(pfd_p, bus_p); 648 649 } 650 651 static void 652 pf_pcix_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 653 { 654 if (PCIE_IS_BDG(bus_p)) { 655 pf_pcix_bdg_err_regs_t *pcix_bdg_regs; 656 657 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p); 658 659 PCIX_CAP_PUT(16, bus_p, PCI_PCIX_SEC_STATUS, 660 pcix_bdg_regs->pcix_bdg_sec_stat); 661 662 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_STATUS, 663 pcix_bdg_regs->pcix_bdg_stat); 664 665 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 666 pf_pcix_ecc_regs_t *pcix_bdg_ecc_regs; 667 /* 668 * PCI Express to PCI-X bridges only implement the 669 * secondary side of the PCI-X ECC registers. For 670 * clearing, there is no need to "select" the ECC 671 * register, just write what was originally read. 672 */ 673 if (!PCIE_IS_PCIE_BDG(bus_p)) { 674 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 0); 675 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 676 pcix_bdg_ecc_regs->pcix_ecc_ctlstat); 677 678 } 679 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 1); 680 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 681 pcix_bdg_ecc_regs->pcix_ecc_ctlstat); 682 } 683 } else { 684 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p); 685 686 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_STATUS, 687 pcix_regs->pcix_status); 688 689 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 690 pf_pcix_ecc_regs_t *pcix_ecc_regs = PCIX_ECC_REG(pfd_p); 691 692 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_ECC_STATUS, 693 pcix_ecc_regs->pcix_ecc_ctlstat); 694 } 695 } 696 } 697 698 static void 699 pf_pcie_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 700 { 701 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p); 702 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p); 703 704 PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS, pcie_regs->pcie_err_status); 705 706 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) 707 pf_pcix_regs_clear(pfd_p, bus_p); 708 709 if (!PCIE_HAS_AER(bus_p)) 710 return; 711 712 PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_STS, 713 pcie_adv_regs->pcie_ue_status); 714 715 PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS, 716 pcie_adv_regs->pcie_ce_status); 717 718 if (PCIE_IS_PCIE_BDG(bus_p)) { 719 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs = 720 PCIE_ADV_BDG_REG(pfd_p); 721 722 PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_STS, 723 pcie_bdg_regs->pcie_sue_status); 724 } 725 726 /* 727 * If PCI Express root complex then clear the root complex 728 * error registers. 729 */ 730 if (PCIE_IS_ROOT(bus_p)) { 731 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs; 732 733 pcie_rp_regs = PCIE_ADV_RP_REG(pfd_p); 734 735 PCIE_AER_PUT(32, bus_p, PCIE_AER_RE_STS, 736 pcie_rp_regs->pcie_rp_err_status); 737 } 738 } 739 740 static void 741 pf_pci_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 742 { 743 if (PCIE_IS_PCIE(bus_p)) 744 pf_pcie_regs_clear(pfd_p, bus_p); 745 else if (PCIE_IS_PCIX(bus_p)) 746 pf_pcix_regs_clear(pfd_p, bus_p); 747 748 PCIE_PUT(16, bus_p, PCI_CONF_STAT, pfd_p->pe_pci_regs->pci_err_status); 749 750 if (PCIE_IS_BDG(bus_p)) { 751 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p); 752 PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS, 753 pci_bdg_regs->pci_bdg_sec_stat); 754 } 755 } 756 757 /* ARGSUSED */ 758 void 759 pcie_clear_errors(dev_info_t *dip) 760 { 761 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 762 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 763 764 ASSERT(bus_p); 765 766 pf_pci_regs_gather(pfd_p, bus_p); 767 pf_pci_regs_clear(pfd_p, bus_p); 768 } 769 770 /* Find the fault BDF, fault Addr or full scan on a PCIe Root Port. */ 771 static void 772 pf_pci_find_rp_fault(pf_data_t *pfd_p, pcie_bus_t *bus_p) 773 { 774 pf_root_fault_t *root_fault = PCIE_ROOT_FAULT(pfd_p); 775 pf_pcie_adv_rp_err_regs_t *rp_regs = PCIE_ADV_RP_REG(pfd_p); 776 uint32_t root_err = rp_regs->pcie_rp_err_status; 777 uint32_t ue_err = PCIE_ADV_REG(pfd_p)->pcie_ue_status; 778 int num_faults = 0; 779 780 /* Since this data structure is reused, make sure to reset it */ 781 root_fault->full_scan = B_FALSE; 782 root_fault->scan_bdf = PCIE_INVALID_BDF; 783 root_fault->scan_addr = 0; 784 785 if (!PCIE_HAS_AER(bus_p) && 786 (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) { 787 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 788 return; 789 } 790 791 /* 792 * Check to see if an error has been received that 793 * requires a scan of the fabric. Count the number of 794 * faults seen. If MUL CE/FE_NFE that counts for 795 * atleast 2 faults, so just return with full_scan. 796 */ 797 if ((root_err & PCIE_AER_RE_STS_MUL_CE_RCVD) || 798 (root_err & PCIE_AER_RE_STS_MUL_FE_NFE_RCVD)) { 799 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 800 return; 801 } 802 803 if (root_err & PCIE_AER_RE_STS_CE_RCVD) 804 num_faults++; 805 806 if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) 807 num_faults++; 808 809 if (ue_err & PCIE_AER_UCE_CA) 810 num_faults++; 811 812 if (ue_err & PCIE_AER_UCE_UR) 813 num_faults++; 814 815 /* If no faults just return */ 816 if (num_faults == 0) 817 return; 818 819 /* If faults > 1 do full scan */ 820 if (num_faults > 1) { 821 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 822 return; 823 } 824 825 /* By this point, there is only 1 fault detected */ 826 if (root_err & PCIE_AER_RE_STS_CE_RCVD) { 827 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ce_src_id; 828 num_faults--; 829 } else if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) { 830 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ue_src_id; 831 num_faults--; 832 } else if ((HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_CA) || 833 HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_UR)) && 834 (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), PCIE_ADV_REG(pfd_p)) == 835 DDI_SUCCESS)) { 836 PCIE_ROOT_FAULT(pfd_p)->scan_addr = 837 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr; 838 num_faults--; 839 } 840 841 /* 842 * This means an error did occur, but we couldn't extract the fault BDF 843 */ 844 if (num_faults > 0) 845 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 846 847 } 848 849 850 /* 851 * Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue 852 * 853 * Returns a scan flag. 854 * o PF_SCAN_SUCCESS - Error gathered and cleared sucessfuly, data added to 855 * Fault Q 856 * o PF_SCAN_BAD_RESPONSE - Unable to talk to device, item not added to fault Q 857 * o PF_SCAN_CB_FAILURE - A hardened device deemed that the error was fatal. 858 * o PF_SCAN_NO_ERR_IN_CHILD - Only applies to bridge to prevent further 859 * unnecessary scanning 860 * o PF_SCAN_IN_DQ - This device has already been scanned; it was skipped this 861 * time. 862 */ 863 static int 864 pf_default_hdl(dev_info_t *dip, pf_impl_t *impl) 865 { 866 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 867 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 868 int cb_sts, scan_flag = PF_SCAN_SUCCESS; 869 870 /* Make sure this device hasn't already been snapshotted and cleared */ 871 if (pfd_p->pe_valid == B_TRUE) { 872 scan_flag |= PF_SCAN_IN_DQ; 873 goto done; 874 } 875 876 /* 877 * Read vendor/device ID and check with cached data, if it doesn't match 878 * could very well be a device that isn't responding anymore. Just 879 * stop. Save the basic info in the error q for post mortem debugging 880 * purposes. 881 */ 882 if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) { 883 char buf[FM_MAX_CLASS]; 884 885 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", 886 PCI_ERROR_SUBCLASS, PCI_NR); 887 ddi_fm_ereport_post(dip, buf, fm_ena_generate(0, FM_ENA_FMT1), 888 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL); 889 890 return (PF_SCAN_BAD_RESPONSE); 891 } 892 893 pf_pci_regs_gather(pfd_p, bus_p); 894 pf_pci_regs_clear(pfd_p, bus_p); 895 if (PCIE_IS_RP(bus_p)) 896 pf_pci_find_rp_fault(pfd_p, bus_p); 897 898 cb_sts = pf_fm_callback(dip, impl->pf_derr); 899 900 if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN) 901 scan_flag |= PF_SCAN_CB_FAILURE; 902 903 /* Add the snapshot to the error q */ 904 pf_en_dq(pfd_p, impl); 905 906 done: 907 /* 908 * If a bridge does not have any error no need to scan any further down. 909 * For PCIe devices, check the PCIe device status and PCI secondary 910 * status. 911 * - Some non-compliant PCIe devices do not utilize PCIe 912 * error registers. If so rely on legacy PCI error registers. 913 * For PCI devices, check the PCI secondary status. 914 */ 915 if (PCIE_IS_PCIE_BDG(bus_p) && 916 !(PCIE_ERR_REG(pfd_p)->pcie_err_status & PF_PCIE_BDG_ERR) && 917 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) 918 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD; 919 920 if (PCIE_IS_PCI_BDG(bus_p) && 921 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) 922 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD; 923 924 pfd_p->pe_valid = B_TRUE; 925 return (scan_flag); 926 } 927 928 /* 929 * Called during postattach to initialize a device's error handling 930 * capabilities. If the devices has already been hardened, then there isn't 931 * much needed. Otherwise initialize the device's default FMA capabilities. 932 * 933 * In a future project where PCIe support is removed from pcifm, several 934 * "properties" that are setup in ddi_fm_init and pci_ereport_setup need to be 935 * created here so that the PCI/PCIe eversholt rules will work properly. 936 */ 937 void 938 pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc, ddi_attach_cmd_t cmd) 939 { 940 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 941 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 942 boolean_t need_cb_register = B_FALSE; 943 944 if (!bus_p) { 945 cmn_err(CE_WARN, "devi_bus information is not set for %s%d.\n", 946 ddi_driver_name(dip), ddi_get_instance(dip)); 947 return; 948 } 949 950 if (fmhdl) { 951 /* 952 * If device is only ereport capable and not callback capable 953 * make it callback capable. The only downside is that the 954 * "fm-errcb-capable" property is not created for this device 955 * which should be ok since it's not used anywhere. 956 */ 957 if (!(fmhdl->fh_cap & DDI_FM_ERRCB_CAPABLE)) 958 need_cb_register = B_TRUE; 959 } else { 960 int cap; 961 /* 962 * fm-capable in driver.conf can be used to set fm_capabilities. 963 * If fm-capable is not defined, set the default 964 * DDI_FM_EREPORT_CAPABLE and DDI_FM_ERRCB_CAPABLE. 965 */ 966 cap = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 967 DDI_PROP_DONTPASS, "fm-capable", 968 DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE); 969 cap &= (DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE); 970 971 bus_p->bus_fm_flags |= PF_FM_IS_NH; 972 973 if (cmd == DDI_ATTACH) { 974 ddi_fm_init(dip, &cap, &ibc); 975 pci_ereport_setup(dip); 976 } 977 978 if (cap & DDI_FM_ERRCB_CAPABLE) 979 need_cb_register = B_TRUE; 980 981 fmhdl = DEVI(dip)->devi_fmhdl; 982 } 983 984 /* If ddi_fm_init fails for any reason RETURN */ 985 if (!fmhdl) { 986 bus_p->bus_fm_flags = 0; 987 return; 988 } 989 990 fmhdl->fh_cap |= DDI_FM_ERRCB_CAPABLE; 991 if (cmd == DDI_ATTACH) { 992 if (need_cb_register) 993 ddi_fm_handler_register(dip, pf_dummy_cb, NULL); 994 } 995 996 bus_p->bus_fm_flags |= PF_FM_READY; 997 } 998 999 /* undo FMA lock, called at predetach */ 1000 void 1001 pf_fini(dev_info_t *dip, ddi_detach_cmd_t cmd) 1002 { 1003 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 1004 1005 if (!bus_p) 1006 return; 1007 1008 /* Don't fini anything if device isn't FM Ready */ 1009 if (!(bus_p->bus_fm_flags & PF_FM_READY)) 1010 return; 1011 1012 /* no other code should set the flag to false */ 1013 bus_p->bus_fm_flags &= ~PF_FM_READY; 1014 1015 /* 1016 * Grab the mutex to make sure device isn't in the middle of 1017 * error handling. Setting the bus_fm_flag to ~PF_FM_READY 1018 * should prevent this device from being error handled after 1019 * the mutex has been released. 1020 */ 1021 (void) pf_handler_enter(dip, NULL); 1022 pf_handler_exit(dip); 1023 1024 /* undo non-hardened drivers */ 1025 if (bus_p->bus_fm_flags & PF_FM_IS_NH) { 1026 if (cmd == DDI_DETACH) { 1027 bus_p->bus_fm_flags &= ~PF_FM_IS_NH; 1028 pci_ereport_teardown(dip); 1029 /* 1030 * ddi_fini itself calls ddi_handler_unregister, 1031 * so no need to explicitly call unregister. 1032 */ 1033 ddi_fm_fini(dip); 1034 } 1035 } 1036 } 1037 1038 /*ARGSUSED*/ 1039 static int 1040 pf_dummy_cb(dev_info_t *dip, ddi_fm_error_t *derr, const void *not_used) 1041 { 1042 return (DDI_FM_OK); 1043 } 1044 1045 /* 1046 * Add PFD to queue. If it is an RC add it to the beginning, 1047 * otherwise add it to the end. 1048 */ 1049 static void 1050 pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl) 1051 { 1052 pf_data_t *head_p = impl->pf_dq_head_p; 1053 pf_data_t *tail_p = impl->pf_dq_tail_p; 1054 1055 impl->pf_total++; 1056 1057 if (!head_p) { 1058 ASSERT(PFD_IS_ROOT(pfd_p)); 1059 impl->pf_dq_head_p = pfd_p; 1060 impl->pf_dq_tail_p = pfd_p; 1061 pfd_p->pe_prev = NULL; 1062 pfd_p->pe_next = NULL; 1063 return; 1064 } 1065 1066 /* Check if this is a Root Port eprt */ 1067 if (PFD_IS_ROOT(pfd_p)) { 1068 pf_data_t *root_p, *last_p = NULL; 1069 1070 /* The first item must be a RP */ 1071 root_p = head_p; 1072 for (last_p = head_p; last_p && PFD_IS_ROOT(last_p); 1073 last_p = last_p->pe_next) 1074 root_p = last_p; 1075 1076 /* root_p is the last RP pfd. last_p is the first non-RP pfd. */ 1077 root_p->pe_next = pfd_p; 1078 pfd_p->pe_prev = root_p; 1079 pfd_p->pe_next = last_p; 1080 1081 if (last_p) 1082 last_p->pe_prev = pfd_p; 1083 else 1084 tail_p = pfd_p; 1085 } else { 1086 tail_p->pe_next = pfd_p; 1087 pfd_p->pe_prev = tail_p; 1088 pfd_p->pe_next = NULL; 1089 tail_p = pfd_p; 1090 } 1091 1092 impl->pf_dq_head_p = head_p; 1093 impl->pf_dq_tail_p = tail_p; 1094 } 1095 1096 /* 1097 * Ignore: 1098 * - TRAINING: as leaves do not have children 1099 * - SD: as leaves do not have children 1100 */ 1101 const pf_fab_err_tbl_t pcie_pcie_tbl[] = { 1102 PCIE_AER_UCE_DLP, pf_panic, 1103 PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1104 PCIE_AER_UCE_FCP, pf_panic, 1105 PCIE_AER_UCE_TO, pf_analyse_to, 1106 PCIE_AER_UCE_CA, pf_analyse_ca_ur, 1107 PCIE_AER_UCE_UC, pf_analyse_uc, 1108 PCIE_AER_UCE_RO, pf_panic, 1109 PCIE_AER_UCE_MTLP, pf_panic, 1110 PCIE_AER_UCE_ECRC, pf_panic, 1111 PCIE_AER_UCE_UR, pf_analyse_ca_ur, 1112 NULL, NULL 1113 }; 1114 1115 const pf_fab_err_tbl_t pcie_rp_tbl[] = { 1116 PCIE_AER_UCE_TRAINING, pf_no_panic, 1117 PCIE_AER_UCE_DLP, pf_panic, 1118 PCIE_AER_UCE_SD, pf_no_panic, 1119 PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1120 PCIE_AER_UCE_FCP, pf_panic, 1121 PCIE_AER_UCE_TO, pf_panic, 1122 PCIE_AER_UCE_CA, pf_no_panic, 1123 PCIE_AER_UCE_UC, pf_analyse_uc, 1124 PCIE_AER_UCE_RO, pf_panic, 1125 PCIE_AER_UCE_MTLP, pf_panic, 1126 PCIE_AER_UCE_ECRC, pf_panic, 1127 PCIE_AER_UCE_UR, pf_no_panic, 1128 NULL, NULL 1129 }; 1130 1131 const pf_fab_err_tbl_t pcie_sw_tbl[] = { 1132 PCIE_AER_UCE_TRAINING, pf_no_panic, 1133 PCIE_AER_UCE_DLP, pf_panic, 1134 PCIE_AER_UCE_SD, pf_no_panic, 1135 PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1136 PCIE_AER_UCE_FCP, pf_panic, 1137 PCIE_AER_UCE_TO, pf_analyse_to, 1138 PCIE_AER_UCE_CA, pf_analyse_ca_ur, 1139 PCIE_AER_UCE_UC, pf_analyse_uc, 1140 PCIE_AER_UCE_RO, pf_panic, 1141 PCIE_AER_UCE_MTLP, pf_panic, 1142 PCIE_AER_UCE_ECRC, pf_panic, 1143 PCIE_AER_UCE_UR, pf_analyse_ca_ur, 1144 NULL, NULL 1145 }; 1146 1147 const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = { 1148 PCIE_AER_SUCE_TA_ON_SC, pf_analyse_sc, 1149 PCIE_AER_SUCE_MA_ON_SC, pf_analyse_sc, 1150 PCIE_AER_SUCE_RCVD_TA, pf_analyse_ma_ta, 1151 PCIE_AER_SUCE_RCVD_MA, pf_analyse_ma_ta, 1152 PCIE_AER_SUCE_USC_ERR, pf_panic, 1153 PCIE_AER_SUCE_USC_MSG_DATA_ERR, pf_analyse_ma_ta, 1154 PCIE_AER_SUCE_UC_DATA_ERR, pf_analyse_uc_data, 1155 PCIE_AER_SUCE_UC_ATTR_ERR, pf_panic, 1156 PCIE_AER_SUCE_UC_ADDR_ERR, pf_panic, 1157 PCIE_AER_SUCE_TIMER_EXPIRED, pf_panic, 1158 PCIE_AER_SUCE_PERR_ASSERT, pf_analyse_perr_assert, 1159 PCIE_AER_SUCE_SERR_ASSERT, pf_no_panic, 1160 PCIE_AER_SUCE_INTERNAL_ERR, pf_panic, 1161 NULL, NULL 1162 }; 1163 1164 const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = { 1165 PCI_STAT_PERROR, pf_analyse_pci, 1166 PCI_STAT_S_PERROR, pf_analyse_pci, 1167 PCI_STAT_S_SYSERR, pf_panic, 1168 PCI_STAT_R_MAST_AB, pf_analyse_pci, 1169 PCI_STAT_R_TARG_AB, pf_analyse_pci, 1170 PCI_STAT_S_TARG_AB, pf_analyse_pci, 1171 NULL, NULL 1172 }; 1173 1174 const pf_fab_err_tbl_t pcie_pci_tbl[] = { 1175 PCI_STAT_PERROR, pf_analyse_pci, 1176 PCI_STAT_S_PERROR, pf_analyse_pci, 1177 PCI_STAT_S_SYSERR, pf_panic, 1178 PCI_STAT_R_MAST_AB, pf_analyse_pci, 1179 PCI_STAT_R_TARG_AB, pf_analyse_pci, 1180 PCI_STAT_S_TARG_AB, pf_analyse_pci, 1181 NULL, NULL 1182 }; 1183 1184 #define PF_MASKED_AER_ERR(pfd_p) \ 1185 (PCIE_ADV_REG(pfd_p)->pcie_ue_status & \ 1186 ((PCIE_ADV_REG(pfd_p)->pcie_ue_mask) ^ 0xFFFFFFFF)) 1187 #define PF_MASKED_SAER_ERR(pfd_p) \ 1188 (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status & \ 1189 ((PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask) ^ 0xFFFFFFFF)) 1190 /* 1191 * Analyse all the PCIe Fault Data (erpt) gathered during dispatch in the erpt 1192 * Queue. 1193 */ 1194 static int 1195 pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl) 1196 { 1197 int sts_flags, error_flags = 0; 1198 pf_data_t *pfd_p; 1199 1200 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 1201 sts_flags = 0; 1202 1203 switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) { 1204 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: 1205 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: 1206 if (PCIE_DEVSTS_CE_DETECTED & 1207 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1208 sts_flags |= PF_ERR_CE; 1209 1210 pf_adjust_for_no_aer(pfd_p); 1211 sts_flags |= pf_analyse_error_tbl(derr, impl, 1212 pfd_p, pcie_pcie_tbl, PF_MASKED_AER_ERR(pfd_p)); 1213 break; 1214 case PCIE_PCIECAP_DEV_TYPE_ROOT: 1215 pf_adjust_for_no_aer(pfd_p); 1216 sts_flags |= pf_analyse_error_tbl(derr, impl, 1217 pfd_p, pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p)); 1218 break; 1219 case PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO: 1220 /* no adjust_for_aer for pseudo RC */ 1221 sts_flags |= pf_analyse_error_tbl(derr, impl, pfd_p, 1222 pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p)); 1223 break; 1224 case PCIE_PCIECAP_DEV_TYPE_UP: 1225 case PCIE_PCIECAP_DEV_TYPE_DOWN: 1226 if (PCIE_DEVSTS_CE_DETECTED & 1227 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1228 sts_flags |= PF_ERR_CE; 1229 1230 pf_adjust_for_no_aer(pfd_p); 1231 sts_flags |= pf_analyse_error_tbl(derr, impl, 1232 pfd_p, pcie_sw_tbl, PF_MASKED_AER_ERR(pfd_p)); 1233 break; 1234 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: 1235 if (PCIE_DEVSTS_CE_DETECTED & 1236 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1237 sts_flags |= PF_ERR_CE; 1238 1239 pf_adjust_for_no_aer(pfd_p); 1240 pf_adjust_for_no_saer(pfd_p); 1241 sts_flags |= pf_analyse_error_tbl(derr, 1242 impl, pfd_p, pcie_pcie_tbl, 1243 PF_MASKED_AER_ERR(pfd_p)); 1244 sts_flags |= pf_analyse_error_tbl(derr, 1245 impl, pfd_p, pcie_pcie_bdg_tbl, 1246 PF_MASKED_SAER_ERR(pfd_p)); 1247 /* 1248 * Some non-compliant PCIe devices do not utilize PCIe 1249 * error registers. So fallthrough and rely on legacy 1250 * PCI error registers. 1251 */ 1252 if ((PCIE_DEVSTS_NFE_DETECTED | PCIE_DEVSTS_FE_DETECTED) 1253 & PCIE_ERR_REG(pfd_p)->pcie_err_status) 1254 break; 1255 /* FALLTHROUGH */ 1256 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO: 1257 sts_flags |= pf_analyse_error_tbl(derr, impl, 1258 pfd_p, pcie_pci_tbl, 1259 PCI_ERR_REG(pfd_p)->pci_err_status); 1260 1261 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) 1262 break; 1263 1264 sts_flags |= pf_analyse_error_tbl(derr, 1265 impl, pfd_p, pcie_pci_bdg_tbl, 1266 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat); 1267 } 1268 1269 pfd_p->pe_severity_flags = sts_flags; 1270 error_flags |= pfd_p->pe_severity_flags; 1271 } 1272 1273 return (error_flags); 1274 } 1275 1276 static int 1277 pf_analyse_error_tbl(ddi_fm_error_t *derr, pf_impl_t *impl, 1278 pf_data_t *pfd_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg) { 1279 const pf_fab_err_tbl_t *row; 1280 int err = 0; 1281 1282 for (row = tbl; err_reg && (row->bit != NULL) && !(err & PF_ERR_PANIC); 1283 row++) { 1284 if (err_reg & row->bit) 1285 err |= row->handler(derr, row->bit, impl->pf_dq_head_p, 1286 pfd_p); 1287 } 1288 1289 if (!err) 1290 err = PF_ERR_NO_ERROR; 1291 1292 return (err); 1293 } 1294 1295 /* 1296 * PCIe Completer Abort and Unsupport Request error analyser. If a PCIe device 1297 * issues a CA/UR a corresponding Received CA/UR should have been seen in the 1298 * PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so then 1299 * this error may be safely ignored. If not check the logs and see if an 1300 * associated handler for this transaction can be found. 1301 */ 1302 /* ARGSUSED */ 1303 static int 1304 pf_analyse_ca_ur(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1305 pf_data_t *pfd_p) 1306 { 1307 uint32_t abort_type; 1308 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1309 1310 /* If UR's are masked forgive this error */ 1311 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1312 (bit == PCIE_AER_UCE_UR)) 1313 return (PF_ERR_NO_PANIC); 1314 1315 /* 1316 * If a RP has an CA/UR it means a leaf sent a bad request to the RP 1317 * such as a config read or a bad DMA address. 1318 */ 1319 if (PCIE_IS_RP(PCIE_PFD2BUS(pfd_p))) 1320 goto handle_lookup; 1321 1322 if (bit == PCIE_AER_UCE_UR) 1323 abort_type = PCI_STAT_R_MAST_AB; 1324 else 1325 abort_type = PCI_STAT_R_TARG_AB; 1326 1327 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type)) 1328 return (PF_ERR_MATCHED_RC); 1329 1330 handle_lookup: 1331 if (HAS_AER_LOGS(pfd_p, bit) && 1332 pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == PF_HDL_FOUND) 1333 return (PF_ERR_MATCHED_DEVICE); 1334 1335 return (PF_ERR_PANIC); 1336 } 1337 1338 /* 1339 * PCIe-PCI Bridge Received Master Abort and Target error analyser. If a PCIe 1340 * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in 1341 * the PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so 1342 * then this error may be safely ignored. If not check the logs and see if an 1343 * associated handler for this transaction can be found. 1344 */ 1345 /* ARGSUSED */ 1346 static int 1347 pf_analyse_ma_ta(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1348 pf_data_t *pfd_p) 1349 { 1350 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1351 uint32_t abort_type; 1352 1353 /* If UR's are masked forgive this error */ 1354 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1355 (bit == PCIE_AER_SUCE_RCVD_MA)) 1356 return (PF_ERR_NO_PANIC); 1357 1358 if (bit == PCIE_AER_SUCE_RCVD_MA) 1359 abort_type = PCI_STAT_R_MAST_AB; 1360 else 1361 abort_type = PCI_STAT_R_TARG_AB; 1362 1363 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type)) 1364 return (PF_ERR_MATCHED_RC); 1365 1366 if (!HAS_SAER_LOGS(pfd_p, bit)) 1367 return (PF_ERR_PANIC); 1368 1369 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND) 1370 return (PF_ERR_MATCHED_DEVICE); 1371 1372 return (PF_ERR_PANIC); 1373 } 1374 1375 /* 1376 * Generic PCI error analyser. This function is used for Parity Errors, 1377 * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts. 1378 * In general PCI devices do not have error logs, it is very difficult to figure 1379 * out what transaction caused the error. Instead find the nearest PCIe-PCI 1380 * Bridge and check to see if it has logs and if it has an error associated with 1381 * this PCI Device. 1382 */ 1383 /* ARGSUSED */ 1384 static int 1385 pf_analyse_pci(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1386 pf_data_t *pfd_p) 1387 { 1388 pf_data_t *parent_pfd_p; 1389 uint16_t cmd; 1390 uint32_t aer_ue_status; 1391 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 1392 pf_pcie_adv_bdg_err_regs_t *parent_saer_p; 1393 1394 if (PCI_ERR_REG(pfd_p)->pci_err_status & PCI_STAT_S_SYSERR) 1395 return (PF_ERR_PANIC); 1396 1397 /* If UR's are masked forgive this error */ 1398 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1399 (bit == PCI_STAT_R_MAST_AB)) 1400 return (PF_ERR_NO_PANIC); 1401 1402 1403 if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) { 1404 aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT; 1405 } else { 1406 aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC | 1407 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | 1408 PCIE_AER_SUCE_RCVD_MA); 1409 } 1410 1411 parent_pfd_p = pf_get_parent_pcie_bridge(pfd_p); 1412 if (parent_pfd_p == NULL) 1413 return (PF_ERR_PANIC); 1414 1415 /* Check if parent bridge has seen this error */ 1416 parent_saer_p = PCIE_ADV_BDG_REG(parent_pfd_p); 1417 if (!(parent_saer_p->pcie_sue_status & aer_ue_status) || 1418 !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status)) 1419 return (PF_ERR_PANIC); 1420 1421 /* 1422 * If the addr or bdf from the parent PCIe bridge logs belong to this 1423 * PCI device, assume the PCIe bridge's error handling has already taken 1424 * care of this PCI device's error. 1425 */ 1426 if (pf_pci_decode(parent_pfd_p, &cmd) != DDI_SUCCESS) 1427 return (PF_ERR_PANIC); 1428 1429 if ((parent_saer_p->pcie_sue_tgt_bdf == bus_p->bus_bdf) || 1430 pf_in_addr_range(bus_p, parent_saer_p->pcie_sue_tgt_addr)) 1431 return (PF_ERR_MATCHED_PARENT); 1432 1433 /* 1434 * If this device is a PCI-PCI bridge, check if the bdf in the parent 1435 * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges. 1436 * If they are, then assume the PCIe bridge's error handling has already 1437 * taken care of this PCI-PCI bridge device's error. 1438 */ 1439 if (PCIE_IS_BDG(bus_p) && 1440 pf_in_bus_range(bus_p, parent_saer_p->pcie_sue_tgt_bdf)) 1441 return (PF_ERR_MATCHED_PARENT); 1442 1443 return (PF_ERR_PANIC); 1444 } 1445 1446 /* 1447 * PCIe Bridge transactions associated with PERR. 1448 * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe 1449 * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe 1450 * o Bridge received a poisoned Completion on a Split Transction from PCIe 1451 * o Bridge received a poisoned Completion on a Delayed Transction from PCIe 1452 * 1453 * Check for non-poisoned PCIe transactions that got forwarded to the secondary 1454 * side and detects a PERR#. Except for delayed read completions, a poisoned 1455 * TLP will be forwarded to the secondary bus and PERR# will be asserted. 1456 */ 1457 /* ARGSUSED */ 1458 static int 1459 pf_analyse_perr_assert(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1460 pf_data_t *pfd_p) 1461 { 1462 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1463 uint16_t cmd; 1464 int hdl_sts = PF_HDL_NOTFOUND; 1465 int err = PF_ERR_NO_ERROR; 1466 pf_pcie_adv_bdg_err_regs_t *saer_p; 1467 1468 1469 if (HAS_SAER_LOGS(pfd_p, bit)) { 1470 saer_p = PCIE_ADV_BDG_REG(pfd_p); 1471 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS) 1472 return (PF_ERR_PANIC); 1473 1474 cmd_switch: 1475 switch (cmd) { 1476 case PCI_PCIX_CMD_IOWR: 1477 case PCI_PCIX_CMD_MEMWR: 1478 case PCI_PCIX_CMD_MEMWR_BL: 1479 case PCI_PCIX_CMD_MEMWRBL: 1480 /* Posted Writes Transactions */ 1481 if (saer_p->pcie_sue_tgt_trans == PF_ADDR_PIO) 1482 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1483 B_FALSE); 1484 break; 1485 case PCI_PCIX_CMD_CFWR: 1486 /* 1487 * Check to see if it is a non-posted write. If so, a 1488 * UR Completion would have been sent. 1489 */ 1490 if (pf_matched_in_rc(dq_head_p, pfd_p, 1491 PCI_STAT_R_MAST_AB)) { 1492 hdl_sts = PF_HDL_FOUND; 1493 err = PF_ERR_MATCHED_RC; 1494 goto done; 1495 } 1496 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1497 B_FALSE); 1498 break; 1499 case PCI_PCIX_CMD_SPL: 1500 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1501 B_FALSE); 1502 break; 1503 case PCI_PCIX_CMD_DADR: 1504 cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >> 1505 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) & 1506 PCIE_AER_SUCE_HDR_CMD_UP_MASK; 1507 if (cmd != PCI_PCIX_CMD_DADR) 1508 goto cmd_switch; 1509 /* FALLTHROUGH */ 1510 default: 1511 /* Unexpected situation, panic */ 1512 hdl_sts = PF_HDL_NOTFOUND; 1513 } 1514 1515 if (hdl_sts == PF_HDL_FOUND) 1516 err = PF_ERR_MATCHED_DEVICE; 1517 else 1518 err = PF_ERR_PANIC; 1519 } else { 1520 /* 1521 * Check to see if it is a non-posted write. If so, a UR 1522 * Completion would have been sent. 1523 */ 1524 if ((PCIE_ERR_REG(pfd_p)->pcie_err_status & 1525 PCIE_DEVSTS_UR_DETECTED) && 1526 pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_R_MAST_AB)) 1527 err = PF_ERR_MATCHED_RC; 1528 1529 /* Check for posted writes. Transaction is lost. */ 1530 if (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & 1531 PCI_STAT_S_PERROR) 1532 err = PF_ERR_PANIC; 1533 1534 /* 1535 * All other scenarios are due to read completions. Check for 1536 * PERR on the primary side. If found the primary side error 1537 * handling will take care of this error. 1538 */ 1539 if (err == PF_ERR_NO_ERROR) { 1540 if (PCI_ERR_REG(pfd_p)->pci_err_status & 1541 PCI_STAT_PERROR) 1542 err = PF_ERR_MATCHED_PARENT; 1543 else 1544 err = PF_ERR_PANIC; 1545 } 1546 } 1547 1548 done: 1549 return (err); 1550 } 1551 1552 /* 1553 * PCIe Poisoned TLP error analyser. If a PCIe device receives a Poisoned TLP, 1554 * check the logs and see if an associated handler for this transaction can be 1555 * found. 1556 */ 1557 /* ARGSUSED */ 1558 static int 1559 pf_analyse_ptlp(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1560 pf_data_t *pfd_p) 1561 { 1562 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1563 1564 /* 1565 * If AERs are supported find the logs in this device, otherwise look in 1566 * it's parent's logs. 1567 */ 1568 if (HAS_AER_LOGS(pfd_p, bit)) { 1569 pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&PCIE_ADV_HDR(pfd_p, 0); 1570 1571 /* 1572 * Double check that the log contains a poisoned TLP. 1573 * Some devices like PLX switch do not log poison TLP headers. 1574 */ 1575 if (hdr->ep) { 1576 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == 1577 PF_HDL_FOUND) 1578 return (PF_ERR_MATCHED_DEVICE); 1579 } 1580 1581 /* 1582 * If an address is found and hdl lookup failed panic. 1583 * Otherwise check parents to see if there was enough 1584 * information recover. 1585 */ 1586 if (PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr) 1587 return (PF_ERR_PANIC); 1588 } 1589 1590 /* 1591 * Check to see if the rc has already handled this error or a parent has 1592 * already handled this error. 1593 * 1594 * If the error info in the RC wasn't enough to find the fault device, 1595 * such as if the faulting device lies behind a PCIe-PCI bridge from a 1596 * poisoned completion, check to see if the PCIe-PCI bridge has enough 1597 * info to recover. For completion TLP's, the AER header logs only 1598 * contain the faulting BDF in the Root Port. For PCIe device the fault 1599 * BDF is the fault device. But if the fault device is behind a 1600 * PCIe-PCI bridge the fault BDF could turn out just to be a PCIe-PCI 1601 * bridge's secondary bus number. 1602 */ 1603 if (!PFD_IS_ROOT(pfd_p)) { 1604 dev_info_t *pdip = ddi_get_parent(PCIE_PFD2DIP(pfd_p)); 1605 pf_data_t *parent_pfd_p; 1606 1607 if (PCIE_PFD2BUS(pfd_p)->bus_rp_dip == pdip) { 1608 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR)) 1609 return (PF_ERR_MATCHED_RC); 1610 } 1611 1612 parent_pfd_p = PCIE_DIP2PFD(pdip); 1613 1614 if (HAS_AER_LOGS(parent_pfd_p, bit)) 1615 return (PF_ERR_MATCHED_PARENT); 1616 } else { 1617 pf_data_t *bdg_pfd_p; 1618 pcie_req_id_t secbus; 1619 1620 /* 1621 * Looking for a pcie bridge only makes sense if the BDF 1622 * Dev/Func = 0/0 1623 */ 1624 if (!PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 1625 goto done; 1626 1627 secbus = PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf; 1628 1629 if (!PCIE_CHECK_VALID_BDF(secbus) || (secbus & 0xFF)) 1630 goto done; 1631 1632 bdg_pfd_p = pf_get_pcie_bridge(pfd_p, secbus); 1633 1634 if (bdg_pfd_p && HAS_SAER_LOGS(bdg_pfd_p, 1635 PCIE_AER_SUCE_PERR_ASSERT)) { 1636 return pf_analyse_perr_assert(derr, 1637 PCIE_AER_SUCE_PERR_ASSERT, dq_head_p, pfd_p); 1638 } 1639 } 1640 done: 1641 return (PF_ERR_PANIC); 1642 } 1643 1644 /* 1645 * PCIe-PCI Bridge Received Master and Target abort error analyser on Split 1646 * Completions. If a PCIe Bridge receives a MA/TA check logs and see if an 1647 * associated handler for this transaction can be found. 1648 */ 1649 /* ARGSUSED */ 1650 static int 1651 pf_analyse_sc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1652 pf_data_t *pfd_p) 1653 { 1654 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1655 uint16_t cmd; 1656 int sts = PF_HDL_NOTFOUND; 1657 1658 if (!HAS_SAER_LOGS(pfd_p, bit)) 1659 return (PF_ERR_PANIC); 1660 1661 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS) 1662 return (PF_ERR_PANIC); 1663 1664 if (cmd == PCI_PCIX_CMD_SPL) 1665 sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE); 1666 1667 if (sts == PF_HDL_FOUND) 1668 return (PF_ERR_MATCHED_DEVICE); 1669 1670 return (PF_ERR_PANIC); 1671 } 1672 1673 /* 1674 * PCIe Timeout error analyser. This error can be forgiven if it is marked as 1675 * CE Advisory. If it is marked as advisory, this means the HW can recover 1676 * and/or retry the transaction automatically. 1677 */ 1678 /* ARGSUSED */ 1679 static int 1680 pf_analyse_to(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1681 pf_data_t *pfd_p) 1682 { 1683 if (HAS_AER_LOGS(pfd_p, bit) && CE_ADVISORY(pfd_p)) 1684 return (PF_ERR_NO_PANIC); 1685 1686 return (PF_ERR_PANIC); 1687 } 1688 1689 /* 1690 * PCIe Unexpected Completion. Check to see if this TLP was misrouted by 1691 * matching the device BDF with the TLP Log. If misrouting panic, otherwise 1692 * don't panic. 1693 */ 1694 /* ARGSUSED */ 1695 static int 1696 pf_analyse_uc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1697 pf_data_t *pfd_p) 1698 { 1699 if (HAS_AER_LOGS(pfd_p, bit) && 1700 (PCIE_PFD2BUS(pfd_p)->bus_bdf == (PCIE_ADV_HDR(pfd_p, 2) >> 16))) 1701 return (PF_ERR_NO_PANIC); 1702 1703 return (PF_ERR_PANIC); 1704 } 1705 1706 /* 1707 * PCIe-PCI Bridge Uncorrectable Data error analyser. All Uncorrectable Data 1708 * errors should have resulted in a PCIe Poisoned TLP to the RC, except for 1709 * Posted Writes. Check the logs for Posted Writes and if the RC did not see a 1710 * Poisoned TLP. 1711 * 1712 * Non-Posted Writes will also generate a UR in the completion status, which the 1713 * RC should also see. 1714 */ 1715 /* ARGSUSED */ 1716 static int 1717 pf_analyse_uc_data(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1718 pf_data_t *pfd_p) 1719 { 1720 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1721 1722 if (!HAS_SAER_LOGS(pfd_p, bit)) 1723 return (PF_ERR_PANIC); 1724 1725 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR)) 1726 return (PF_ERR_MATCHED_RC); 1727 1728 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND) 1729 return (PF_ERR_MATCHED_DEVICE); 1730 1731 return (PF_ERR_PANIC); 1732 } 1733 1734 /* ARGSUSED */ 1735 static int 1736 pf_no_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1737 pf_data_t *pfd_p) 1738 { 1739 return (PF_ERR_NO_PANIC); 1740 } 1741 1742 /* ARGSUSED */ 1743 static int 1744 pf_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1745 pf_data_t *pfd_p) 1746 { 1747 return (PF_ERR_PANIC); 1748 } 1749 1750 /* 1751 * If a PCIe device does not support AER, assume all AER statuses have been set, 1752 * unless other registers do not indicate a certain error occuring. 1753 */ 1754 static void 1755 pf_adjust_for_no_aer(pf_data_t *pfd_p) 1756 { 1757 uint32_t aer_ue = 0; 1758 uint16_t status; 1759 1760 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 1761 return; 1762 1763 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED) 1764 aer_ue = PF_AER_FATAL_ERR; 1765 1766 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) { 1767 aer_ue = PF_AER_NON_FATAL_ERR; 1768 status = PCI_ERR_REG(pfd_p)->pci_err_status; 1769 1770 /* Check if the device received a PTLP */ 1771 if (!(status & PCI_STAT_PERROR)) 1772 aer_ue &= ~PCIE_AER_UCE_PTLP; 1773 1774 /* Check if the device signaled a CA */ 1775 if (!(status & PCI_STAT_S_TARG_AB)) 1776 aer_ue &= ~PCIE_AER_UCE_CA; 1777 1778 /* Check if the device sent a UR */ 1779 if (!(PCIE_ERR_REG(pfd_p)->pcie_err_status & 1780 PCIE_DEVSTS_UR_DETECTED)) 1781 aer_ue &= ~PCIE_AER_UCE_UR; 1782 1783 /* 1784 * Ignore ECRCs as it is optional and will manefest itself as 1785 * another error like PTLP and MFP 1786 */ 1787 aer_ue &= ~PCIE_AER_UCE_ECRC; 1788 1789 /* 1790 * Generally if NFE is set, SERR should also be set. Exception: 1791 * When certain non-fatal errors are masked, and some of them 1792 * happened to be the cause of the NFE, SERR will not be set and 1793 * they can not be the source of this interrupt. 1794 * 1795 * On x86, URs are masked (NFE + UR can be set), if any other 1796 * non-fatal errors (i.e, PTLP, CTO, CA, UC, ECRC, ACS) did 1797 * occur, SERR should be set since they are not masked. So if 1798 * SERR is not set, none of them occurred. 1799 */ 1800 if (!(status & PCI_STAT_S_SYSERR)) 1801 aer_ue &= ~PCIE_AER_UCE_TO; 1802 } 1803 1804 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) { 1805 aer_ue &= ~PCIE_AER_UCE_TRAINING; 1806 aer_ue &= ~PCIE_AER_UCE_SD; 1807 } 1808 1809 PCIE_ADV_REG(pfd_p)->pcie_ue_status = aer_ue; 1810 } 1811 1812 static void 1813 pf_adjust_for_no_saer(pf_data_t *pfd_p) 1814 { 1815 uint32_t s_aer_ue = 0; 1816 uint16_t status; 1817 1818 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 1819 return; 1820 1821 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED) 1822 s_aer_ue = PF_SAER_FATAL_ERR; 1823 1824 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) { 1825 s_aer_ue = PF_SAER_NON_FATAL_ERR; 1826 status = PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat; 1827 1828 /* Check if the device received a UC_DATA */ 1829 if (!(status & PCI_STAT_PERROR)) 1830 s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR; 1831 1832 /* Check if the device received a RCVD_MA/MA_ON_SC */ 1833 if (!(status & (PCI_STAT_R_MAST_AB))) { 1834 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA; 1835 s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC; 1836 } 1837 1838 /* Check if the device received a RCVD_TA/TA_ON_SC */ 1839 if (!(status & (PCI_STAT_R_TARG_AB))) { 1840 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA; 1841 s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC; 1842 } 1843 } 1844 1845 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status = s_aer_ue; 1846 } 1847 1848 /* Find the PCIe-PCI bridge based on secondary bus number */ 1849 static pf_data_t * 1850 pf_get_pcie_bridge(pf_data_t *pfd_p, pcie_req_id_t secbus) 1851 { 1852 pf_data_t *bdg_pfd_p; 1853 1854 /* Search down for the PCIe-PCI device. */ 1855 for (bdg_pfd_p = pfd_p->pe_next; bdg_pfd_p; 1856 bdg_pfd_p = bdg_pfd_p->pe_next) { 1857 if (PCIE_IS_PCIE_BDG(PCIE_PFD2BUS(bdg_pfd_p)) && 1858 PCIE_PFD2BUS(bdg_pfd_p)->bus_bdg_secbus == secbus) 1859 return (bdg_pfd_p); 1860 } 1861 1862 return (NULL); 1863 } 1864 1865 /* Find the PCIe-PCI bridge of a PCI device */ 1866 static pf_data_t * 1867 pf_get_parent_pcie_bridge(pf_data_t *pfd_p) 1868 { 1869 dev_info_t *dip, *rp_dip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1870 1871 /* This only makes sense if the device is a PCI device */ 1872 if (!PCIE_IS_PCI(PCIE_PFD2BUS(pfd_p))) 1873 return (NULL); 1874 1875 /* 1876 * Search up for the PCIe-PCI device. Watchout for x86 where pci 1877 * devices hang directly off of NPE. 1878 */ 1879 for (dip = PCIE_PFD2DIP(pfd_p); dip; dip = ddi_get_parent(dip)) { 1880 if (dip == rp_dip) 1881 dip = NULL; 1882 1883 if (PCIE_IS_PCIE_BDG(PCIE_DIP2BUS(dip))) 1884 return (PCIE_DIP2PFD(dip)); 1885 } 1886 1887 return (NULL); 1888 } 1889 1890 /* 1891 * See if a leaf error was bubbled up to the Root Complex (RC) and handled. 1892 * As of right now only RC's have enough information to have errors found in the 1893 * fabric to be matched to the RC. Note that Root Port's (RP) do not carry 1894 * enough information. Currently known RC's are SPARC Fire architecture and 1895 * it's equivalents, and x86's NPE. 1896 * SPARC Fire architectures have a plethora of error registers, while currently 1897 * NPE only have the address of a failed load. 1898 * 1899 * Check if the RC logged an error with the appropriate status type/abort type. 1900 * Ex: Parity Error, Received Master/Target Abort 1901 * Check if either the fault address found in the rc matches the device's 1902 * assigned address range (PIO's only) or the fault BDF in the rc matches the 1903 * device's BDF or Secondary Bus/Bus Range. 1904 */ 1905 static boolean_t 1906 pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p, 1907 uint32_t abort_type) 1908 { 1909 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 1910 pf_data_t *rc_pfd_p; 1911 pcie_req_id_t fault_bdf; 1912 1913 for (rc_pfd_p = dq_head_p; PFD_IS_ROOT(rc_pfd_p); 1914 rc_pfd_p = rc_pfd_p->pe_next) { 1915 /* Only root complex's have enough information to match */ 1916 if (!PCIE_IS_RC(PCIE_PFD2BUS(rc_pfd_p))) 1917 continue; 1918 1919 /* If device and rc abort type does not match continue */ 1920 if (!(PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & abort_type)) 1921 continue; 1922 1923 fault_bdf = PCIE_ROOT_FAULT(rc_pfd_p)->scan_bdf; 1924 1925 /* The Fault BDF = Device's BDF */ 1926 if (fault_bdf == bus_p->bus_bdf) 1927 return (B_TRUE); 1928 1929 /* The Fault Addr is in device's address range */ 1930 if (pf_in_addr_range(bus_p, 1931 PCIE_ROOT_FAULT(rc_pfd_p)->scan_addr)) 1932 return (B_TRUE); 1933 1934 /* The Fault BDF is from PCIe-PCI Bridge's secondary bus */ 1935 if (PCIE_IS_PCIE_BDG(bus_p) && 1936 pf_in_bus_range(bus_p, fault_bdf)) 1937 return (B_TRUE); 1938 } 1939 1940 return (B_FALSE); 1941 } 1942 1943 /* 1944 * Check the RP and see if the error is PIO/DMA. If the RP also has a PERR then 1945 * it is a DMA, otherwise it's a PIO 1946 */ 1947 static void 1948 pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type, 1949 pcie_req_id_t *bdf) { 1950 pf_data_t *rc_pfd_p; 1951 1952 /* Could be DMA or PIO. Find out by look at error type. */ 1953 switch (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status) { 1954 case PCIE_AER_SUCE_TA_ON_SC: 1955 case PCIE_AER_SUCE_MA_ON_SC: 1956 *trans_type = PF_ADDR_DMA; 1957 return; 1958 case PCIE_AER_SUCE_RCVD_TA: 1959 case PCIE_AER_SUCE_RCVD_MA: 1960 *bdf = PCIE_INVALID_BDF; 1961 *trans_type = PF_ADDR_PIO; 1962 return; 1963 case PCIE_AER_SUCE_USC_ERR: 1964 case PCIE_AER_SUCE_UC_DATA_ERR: 1965 case PCIE_AER_SUCE_PERR_ASSERT: 1966 break; 1967 default: 1968 *addr = 0; 1969 *bdf = PCIE_INVALID_BDF; 1970 *trans_type = 0; 1971 return; 1972 } 1973 1974 *bdf = PCIE_INVALID_BDF; 1975 *trans_type = PF_ADDR_PIO; 1976 for (rc_pfd_p = pfd_p->pe_prev; rc_pfd_p; 1977 rc_pfd_p = rc_pfd_p->pe_prev) { 1978 if (PFD_IS_ROOT(rc_pfd_p) && 1979 (PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & 1980 PCI_STAT_PERROR)) { 1981 *trans_type = PF_ADDR_DMA; 1982 return; 1983 } 1984 } 1985 } 1986 1987 /* 1988 * pf_pci_decode function decodes the secondary aer transaction logs in 1989 * PCIe-PCI bridges. 1990 * 1991 * The log is 128 bits long and arranged in this manner. 1992 * [0:35] Transaction Attribute (s_aer_h0-saer_h1) 1993 * [36:39] Transaction lower command (saer_h1) 1994 * [40:43] Transaction upper command (saer_h1) 1995 * [44:63] Reserved 1996 * [64:127] Address (saer_h2-saer_h3) 1997 */ 1998 /* ARGSUSED */ 1999 static int 2000 pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd) { 2001 pcix_attr_t *attr; 2002 uint64_t addr; 2003 uint32_t trans_type; 2004 pcie_req_id_t bdf = PCIE_INVALID_BDF; 2005 2006 attr = (pcix_attr_t *)&PCIE_ADV_BDG_HDR(pfd_p, 0); 2007 *cmd = GET_SAER_CMD(pfd_p); 2008 2009 cmd_switch: 2010 switch (*cmd) { 2011 case PCI_PCIX_CMD_IORD: 2012 case PCI_PCIX_CMD_IOWR: 2013 /* IO Access should always be down stream */ 2014 addr = PCIE_ADV_BDG_HDR(pfd_p, 2); 2015 bdf = attr->rid; 2016 trans_type = PF_ADDR_PIO; 2017 break; 2018 case PCI_PCIX_CMD_MEMRD_DW: 2019 case PCI_PCIX_CMD_MEMRD_BL: 2020 case PCI_PCIX_CMD_MEMRDBL: 2021 case PCI_PCIX_CMD_MEMWR: 2022 case PCI_PCIX_CMD_MEMWR_BL: 2023 case PCI_PCIX_CMD_MEMWRBL: 2024 addr = ((uint64_t)PCIE_ADV_BDG_HDR(pfd_p, 3) << 2025 PCIE_AER_SUCE_HDR_ADDR_SHIFT) | PCIE_ADV_BDG_HDR(pfd_p, 2); 2026 bdf = attr->rid; 2027 2028 pf_pci_find_trans_type(pfd_p, &addr, &trans_type, &bdf); 2029 break; 2030 case PCI_PCIX_CMD_CFRD: 2031 case PCI_PCIX_CMD_CFWR: 2032 /* 2033 * CFG Access should always be down stream. Match the BDF in 2034 * the address phase. 2035 */ 2036 addr = 0; 2037 bdf = attr->rid; 2038 trans_type = PF_ADDR_CFG; 2039 break; 2040 case PCI_PCIX_CMD_SPL: 2041 /* 2042 * Check for DMA read completions. The requesting BDF is in the 2043 * Address phase. 2044 */ 2045 addr = 0; 2046 bdf = attr->rid; 2047 trans_type = PF_ADDR_DMA; 2048 break; 2049 case PCI_PCIX_CMD_DADR: 2050 /* 2051 * For Dual Address Cycles the transaction command is in the 2nd 2052 * address phase. 2053 */ 2054 *cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >> 2055 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) & 2056 PCIE_AER_SUCE_HDR_CMD_UP_MASK; 2057 if (*cmd != PCI_PCIX_CMD_DADR) 2058 goto cmd_switch; 2059 /* FALLTHROUGH */ 2060 default: 2061 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0; 2062 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = PCIE_INVALID_BDF; 2063 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0; 2064 return (DDI_FAILURE); 2065 } 2066 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = trans_type; 2067 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = bdf; 2068 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = addr; 2069 return (DDI_SUCCESS); 2070 } 2071 2072 /* 2073 * Based on either the BDF/ADDR find and mark the faulting DMA/ACC handler. 2074 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND. 2075 */ 2076 int 2077 pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint64_t addr, 2078 pcie_req_id_t bdf) 2079 { 2080 ddi_fm_error_t derr; 2081 2082 /* If we don't know the addr or rid just return with NOTFOUND */ 2083 if ((addr == NULL) && !PCIE_CHECK_VALID_BDF(bdf)) 2084 return (PF_HDL_NOTFOUND); 2085 2086 if (!(flag & (PF_ADDR_DMA | PF_ADDR_PIO | PF_ADDR_CFG))) { 2087 return (PF_HDL_NOTFOUND); 2088 } 2089 2090 bzero(&derr, sizeof (ddi_fm_error_t)); 2091 derr.fme_version = DDI_FME_VERSION; 2092 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 2093 derr.fme_ena = ena; 2094 2095 return (pf_hdl_child_lookup(dip, &derr, flag, addr, bdf)); 2096 } 2097 2098 static int 2099 pf_hdl_child_lookup(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, 2100 uint64_t addr, pcie_req_id_t bdf) 2101 { 2102 int status = PF_HDL_NOTFOUND; 2103 ndi_fmc_t *fcp = NULL; 2104 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2105 pcie_req_id_t dip_bdf; 2106 boolean_t have_lock = B_FALSE; 2107 pcie_bus_t *bus_p; 2108 dev_info_t *cdip; 2109 2110 if (!(bus_p = pf_is_ready(dip))) { 2111 return (status); 2112 } 2113 2114 ASSERT(fmhdl); 2115 if (!i_ddi_fm_handler_owned(dip)) { 2116 /* 2117 * pf_handler_enter always returns SUCCESS if the 'impl' arg is 2118 * NULL. 2119 */ 2120 (void) pf_handler_enter(dip, NULL); 2121 have_lock = B_TRUE; 2122 } 2123 2124 dip_bdf = PCI_GET_BDF(dip); 2125 2126 /* Check if dip and BDF match, if not recurse to it's children. */ 2127 if (!PCIE_IS_RC(bus_p) && (!PCIE_CHECK_VALID_BDF(bdf) || 2128 dip_bdf == bdf)) { 2129 if ((flag & PF_ADDR_DMA) && DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap)) 2130 fcp = fmhdl->fh_dma_cache; 2131 else 2132 fcp = NULL; 2133 2134 if (fcp) 2135 status = pf_hdl_compare(dip, derr, DMA_HANDLE, addr, 2136 bdf, fcp); 2137 2138 2139 if (((flag & PF_ADDR_PIO) || (flag & PF_ADDR_CFG)) && 2140 DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap)) 2141 fcp = fmhdl->fh_acc_cache; 2142 else 2143 fcp = NULL; 2144 2145 if (fcp) 2146 status = pf_hdl_compare(dip, derr, ACC_HANDLE, addr, 2147 bdf, fcp); 2148 } 2149 2150 /* If we found the handler or know it's this device, we're done */ 2151 if (!PCIE_IS_RC(bus_p) && ((dip_bdf == bdf) || 2152 (status == PF_HDL_FOUND))) 2153 goto done; 2154 2155 /* 2156 * If the current devuce us a PCIe-PCI bridge need to check for special 2157 * cases: 2158 * 2159 * If it is a PIO and we don't have an address or this is a DMA, check 2160 * to see if the BDF = secondary bus. If so stop. The BDF isn't a real 2161 * BDF and the fault device could have come from any device in the PCI 2162 * bus. 2163 */ 2164 if (PCIE_IS_PCIE_BDG(bus_p) && 2165 ((flag & PF_ADDR_DMA || flag & PF_ADDR_PIO)) && 2166 ((bus_p->bus_bdg_secbus << PCIE_REQ_ID_BUS_SHIFT) == bdf)) 2167 goto done; 2168 2169 2170 /* If we can't find the handler check it's children */ 2171 for (cdip = ddi_get_child(dip); cdip; 2172 cdip = ddi_get_next_sibling(cdip)) { 2173 if ((bus_p = PCIE_DIP2BUS(cdip)) == NULL) 2174 continue; 2175 2176 if (pf_in_bus_range(bus_p, bdf) || 2177 pf_in_addr_range(bus_p, addr)) 2178 status = pf_hdl_child_lookup(cdip, derr, flag, addr, 2179 bdf); 2180 2181 if (status == PF_HDL_FOUND) 2182 goto done; 2183 } 2184 2185 done: 2186 if (have_lock == B_TRUE) 2187 pf_handler_exit(dip); 2188 2189 return (status); 2190 } 2191 2192 static int 2193 pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, 2194 uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp) { 2195 ndi_fmcentry_t *fep; 2196 int found = 0; 2197 int status; 2198 2199 mutex_enter(&fcp->fc_lock); 2200 for (fep = fcp->fc_head; fep != NULL; fep = fep->fce_next) { 2201 ddi_fmcompare_t compare_func; 2202 2203 /* 2204 * Compare captured error state with handle 2205 * resources. During the comparison and 2206 * subsequent error handling, we block 2207 * attempts to free the cache entry. 2208 */ 2209 compare_func = (flag == ACC_HANDLE) ? 2210 i_ddi_fm_acc_err_cf_get((ddi_acc_handle_t) 2211 fep->fce_resource) : 2212 i_ddi_fm_dma_err_cf_get((ddi_dma_handle_t) 2213 fep->fce_resource); 2214 2215 status = compare_func(dip, fep->fce_resource, 2216 (void *)&addr, (void *)&bdf); 2217 2218 if (status == DDI_FM_NONFATAL) { 2219 found++; 2220 2221 /* Set the error for this resource handle */ 2222 if (flag == ACC_HANDLE) { 2223 ddi_acc_handle_t ap = fep->fce_resource; 2224 2225 i_ddi_fm_acc_err_set(ap, derr->fme_ena, status, 2226 DDI_FM_ERR_UNEXPECTED); 2227 ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION); 2228 derr->fme_acc_handle = ap; 2229 } else { 2230 ddi_dma_handle_t dp = fep->fce_resource; 2231 2232 i_ddi_fm_dma_err_set(dp, derr->fme_ena, status, 2233 DDI_FM_ERR_UNEXPECTED); 2234 ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION); 2235 derr->fme_dma_handle = dp; 2236 } 2237 } 2238 } 2239 mutex_exit(&fcp->fc_lock); 2240 2241 /* 2242 * If a handler isn't found and we know this is the right device mark 2243 * them all failed. 2244 */ 2245 if ((addr != NULL) && PCIE_CHECK_VALID_BDF(bdf) && (found == 0)) { 2246 status = pf_hdl_compare(dip, derr, flag, addr, bdf, fcp); 2247 if (status == PF_HDL_FOUND) 2248 found++; 2249 } 2250 2251 return ((found) ? PF_HDL_FOUND : PF_HDL_NOTFOUND); 2252 } 2253 2254 /* 2255 * Automatically decode AER header logs and does a handling look up based on the 2256 * AER header decoding. 2257 * 2258 * For this function only the Primary/Secondary AER Header Logs need to be valid 2259 * in the pfd (PCIe Fault Data) arg. 2260 * 2261 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND. 2262 */ 2263 static int 2264 pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p, 2265 boolean_t is_primary) 2266 { 2267 int lookup = PF_HDL_NOTFOUND; 2268 2269 if (is_primary) { 2270 pf_pcie_adv_err_regs_t *reg_p = PCIE_ADV_REG(pfd_p); 2271 if (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), reg_p) == DDI_SUCCESS) { 2272 lookup = pf_hdl_lookup(rpdip, derr->fme_ena, 2273 reg_p->pcie_ue_tgt_trans, 2274 reg_p->pcie_ue_tgt_addr, 2275 reg_p->pcie_ue_tgt_bdf); 2276 } 2277 } else { 2278 pf_pcie_adv_bdg_err_regs_t *reg_p = PCIE_ADV_BDG_REG(pfd_p); 2279 uint16_t cmd; 2280 if (pf_pci_decode(pfd_p, &cmd) == DDI_SUCCESS) { 2281 lookup = pf_hdl_lookup(rpdip, derr->fme_ena, 2282 reg_p->pcie_sue_tgt_trans, 2283 reg_p->pcie_sue_tgt_addr, 2284 reg_p->pcie_sue_tgt_bdf); 2285 } 2286 } 2287 2288 return (lookup); 2289 } 2290 2291 /* 2292 * Decodes the TLP and returns the BDF of the handler, address and transaction 2293 * type if known. 2294 * 2295 * Types of TLP logs seen in RC, and what to extract: 2296 * 2297 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR 2298 * Memory(PIO) - address, PF_PIO_ADDR 2299 * CFG - Should not occur and result in UR 2300 * Completion(DMA) - Requester BDF, PF_DMA_ADDR 2301 * Completion(PIO) - Requester BDF, PF_PIO_ADDR 2302 * 2303 * Types of TLP logs seen in SW/Leaf, and what to extract: 2304 * 2305 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR 2306 * Memory(PIO) - address, PF_PIO_ADDR 2307 * CFG - Destined BDF, address, PF_CFG_ADDR 2308 * Completion(DMA) - Requester BDF, PF_DMA_ADDR 2309 * Completion(PIO) - Requester BDF, PF_PIO_ADDR 2310 * 2311 * The adv_reg_p must be passed in separately for use with SPARC RPs. A 2312 * SPARC RP could have multiple AER header logs which cannot be directly 2313 * accessed via the bus_p. 2314 */ 2315 int 2316 pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p) { 2317 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr; 2318 pcie_req_id_t my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF; 2319 uint64_t flt_addr = 0; 2320 uint32_t flt_trans_type = 0; 2321 2322 adv_reg_p->pcie_ue_tgt_addr = 0; 2323 adv_reg_p->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 2324 adv_reg_p->pcie_ue_tgt_trans = 0; 2325 2326 my_bdf = bus_p->bus_bdf; 2327 switch (tlp_hdr->type) { 2328 case PCIE_TLP_TYPE_IO: 2329 case PCIE_TLP_TYPE_MEM: 2330 case PCIE_TLP_TYPE_MEMLK: 2331 /* Grab the 32/64bit fault address */ 2332 if (tlp_hdr->fmt & 0x1) { 2333 flt_addr = ((uint64_t)adv_reg_p->pcie_ue_hdr[2] << 32); 2334 flt_addr |= adv_reg_p->pcie_ue_hdr[3]; 2335 } else { 2336 flt_addr = adv_reg_p->pcie_ue_hdr[2]; 2337 } 2338 2339 tlp_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[1] >> 16); 2340 2341 /* 2342 * If the req bdf >= this.bdf, then it means the request is this 2343 * device or came from a device below it. Unless this device is 2344 * a PCIe root port then it means is a DMA, otherwise PIO. 2345 */ 2346 if ((tlp_bdf >= my_bdf) && !PCIE_IS_ROOT(bus_p)) { 2347 flt_trans_type = PF_ADDR_DMA; 2348 flt_bdf = tlp_bdf; 2349 } else if (PCIE_IS_ROOT(bus_p) && 2350 (PF_FIRST_AER_ERR(PCIE_AER_UCE_PTLP, adv_reg_p) || 2351 (PF_FIRST_AER_ERR(PCIE_AER_UCE_CA, adv_reg_p)))) { 2352 flt_trans_type = PF_ADDR_DMA; 2353 flt_bdf = tlp_bdf; 2354 } else { 2355 flt_trans_type = PF_ADDR_PIO; 2356 flt_bdf = PCIE_INVALID_BDF; 2357 } 2358 break; 2359 case PCIE_TLP_TYPE_CFG0: 2360 case PCIE_TLP_TYPE_CFG1: 2361 flt_addr = 0; 2362 flt_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[2] >> 16); 2363 flt_trans_type = PF_ADDR_CFG; 2364 break; 2365 case PCIE_TLP_TYPE_CPL: 2366 case PCIE_TLP_TYPE_CPLLK: 2367 { 2368 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)adv_reg_p->pcie_ue_hdr; 2369 2370 flt_addr = NULL; 2371 flt_bdf = cpl_tlp->rid; 2372 2373 /* 2374 * If the cpl bdf < this.bdf, then it means the request is this 2375 * device or came from a device below it. Unless this device is 2376 * a PCIe root port then it means is a DMA, otherwise PIO. 2377 */ 2378 if (cpl_tlp->rid > cpl_tlp->cid) { 2379 flt_trans_type = PF_ADDR_DMA; 2380 } else { 2381 flt_trans_type = PF_ADDR_PIO | PF_ADDR_CFG; 2382 } 2383 break; 2384 } 2385 default: 2386 return (DDI_FAILURE); 2387 } 2388 2389 adv_reg_p->pcie_ue_tgt_addr = flt_addr; 2390 adv_reg_p->pcie_ue_tgt_bdf = flt_bdf; 2391 adv_reg_p->pcie_ue_tgt_trans = flt_trans_type; 2392 2393 return (DDI_SUCCESS); 2394 } 2395 2396 #define PCIE_EREPORT DDI_IO_CLASS "." PCI_ERROR_SUBCLASS "." PCIEX_FABRIC 2397 static int 2398 pf_ereport_setup(dev_info_t *dip, uint64_t ena, nvlist_t **ereport, 2399 nvlist_t **detector, errorq_elem_t **eqep) 2400 { 2401 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2402 char device_path[MAXPATHLEN]; 2403 nv_alloc_t *nva; 2404 2405 *eqep = errorq_reserve(fmhdl->fh_errorq); 2406 if (*eqep == NULL) { 2407 atomic_add_64(&fmhdl->fh_kstat.fek_erpt_dropped.value.ui64, 1); 2408 return (DDI_FAILURE); 2409 } 2410 2411 *ereport = errorq_elem_nvl(fmhdl->fh_errorq, *eqep); 2412 nva = errorq_elem_nva(fmhdl->fh_errorq, *eqep); 2413 2414 ASSERT(*ereport); 2415 ASSERT(nva); 2416 2417 /* 2418 * Use the dev_path/devid for this device instance. 2419 */ 2420 *detector = fm_nvlist_create(nva); 2421 if (dip == ddi_root_node()) { 2422 device_path[0] = '/'; 2423 device_path[1] = '\0'; 2424 } else { 2425 (void) ddi_pathname(dip, device_path); 2426 } 2427 2428 fm_fmri_dev_set(*detector, FM_DEV_SCHEME_VERSION, NULL, 2429 device_path, NULL); 2430 2431 if (ena == 0) 2432 ena = fm_ena_generate(0, FM_ENA_FMT1); 2433 2434 fm_ereport_set(*ereport, 0, PCIE_EREPORT, ena, *detector, NULL); 2435 2436 return (DDI_SUCCESS); 2437 } 2438 2439 /* ARGSUSED */ 2440 static void 2441 pf_ereport_post(dev_info_t *dip, nvlist_t **ereport, nvlist_t **detector, 2442 errorq_elem_t **eqep) 2443 { 2444 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2445 2446 errorq_commit(fmhdl->fh_errorq, *eqep, ERRORQ_ASYNC); 2447 } 2448 2449 static void 2450 pf_send_ereport(ddi_fm_error_t *derr, pf_impl_t *impl) 2451 { 2452 nvlist_t *ereport; 2453 nvlist_t *detector; 2454 errorq_elem_t *eqep; 2455 pcie_bus_t *bus_p; 2456 pf_data_t *pfd_p; 2457 uint32_t total = impl->pf_total; 2458 2459 /* 2460 * Ereports need to be sent in a top down fashion. The fabric translator 2461 * expects the ereports from the Root first. This is needed to tell if 2462 * the system contains a PCIe complaint RC/RP. 2463 */ 2464 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 2465 bus_p = PCIE_PFD2BUS(pfd_p); 2466 pfd_p->pe_valid = B_FALSE; 2467 2468 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED || 2469 PFD_IS_RC(pfd_p) || 2470 !DDI_FM_EREPORT_CAP(ddi_fm_capable(PCIE_PFD2DIP(pfd_p)))) 2471 continue; 2472 2473 if (pf_ereport_setup(PCIE_BUS2DIP(bus_p), derr->fme_ena, 2474 &ereport, &detector, &eqep) != DDI_SUCCESS) 2475 continue; 2476 2477 /* Generic PCI device information */ 2478 fm_payload_set(ereport, 2479 "bdf", DATA_TYPE_UINT16, bus_p->bus_bdf, 2480 "device_id", DATA_TYPE_UINT16, 2481 (bus_p->bus_dev_ven_id >> 16), 2482 "vendor_id", DATA_TYPE_UINT16, 2483 (bus_p->bus_dev_ven_id & 0xFFFF), 2484 "rev_id", DATA_TYPE_UINT8, bus_p->bus_rev_id, 2485 "dev_type", DATA_TYPE_UINT16, bus_p->bus_dev_type, 2486 "pcie_off", DATA_TYPE_UINT16, bus_p->bus_pcie_off, 2487 "pcix_off", DATA_TYPE_UINT16, bus_p->bus_pcix_off, 2488 "aer_off", DATA_TYPE_UINT16, bus_p->bus_aer_off, 2489 "ecc_ver", DATA_TYPE_UINT16, bus_p->bus_ecc_ver, 2490 NULL); 2491 2492 /* PCI registers */ 2493 fm_payload_set(ereport, 2494 "pci_status", DATA_TYPE_UINT16, 2495 PCI_ERR_REG(pfd_p)->pci_err_status, 2496 "pci_command", DATA_TYPE_UINT16, 2497 PCI_ERR_REG(pfd_p)->pci_cfg_comm, 2498 NULL); 2499 2500 /* PCI bridge registers */ 2501 if (PCIE_IS_BDG(bus_p)) { 2502 fm_payload_set(ereport, 2503 "pci_bdg_sec_status", DATA_TYPE_UINT16, 2504 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat, 2505 "pci_bdg_ctrl", DATA_TYPE_UINT16, 2506 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_ctrl, 2507 NULL); 2508 } 2509 2510 /* PCIx registers */ 2511 if (PCIE_IS_PCIX(bus_p) && !PCIE_IS_BDG(bus_p)) { 2512 fm_payload_set(ereport, 2513 "pcix_status", DATA_TYPE_UINT32, 2514 PCIX_ERR_REG(pfd_p)->pcix_status, 2515 "pcix_command", DATA_TYPE_UINT16, 2516 PCIX_ERR_REG(pfd_p)->pcix_command, 2517 NULL); 2518 } 2519 2520 /* PCIx ECC Registers */ 2521 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 2522 pf_pcix_ecc_regs_t *ecc_bdg_reg; 2523 pf_pcix_ecc_regs_t *ecc_reg; 2524 2525 if (PCIE_IS_BDG(bus_p)) 2526 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 0); 2527 ecc_reg = PCIX_ECC_REG(pfd_p); 2528 fm_payload_set(ereport, 2529 "pcix_ecc_control_0", DATA_TYPE_UINT16, 2530 PCIE_IS_BDG(bus_p) ? 2531 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16) : 2532 (ecc_reg->pcix_ecc_ctlstat >> 16), 2533 "pcix_ecc_status_0", DATA_TYPE_UINT16, 2534 PCIE_IS_BDG(bus_p) ? 2535 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF) : 2536 (ecc_reg->pcix_ecc_ctlstat & 0xFFFF), 2537 "pcix_ecc_fst_addr_0", DATA_TYPE_UINT32, 2538 PCIE_IS_BDG(bus_p) ? 2539 ecc_bdg_reg->pcix_ecc_fstaddr : 2540 ecc_reg->pcix_ecc_fstaddr, 2541 "pcix_ecc_sec_addr_0", DATA_TYPE_UINT32, 2542 PCIE_IS_BDG(bus_p) ? 2543 ecc_bdg_reg->pcix_ecc_secaddr : 2544 ecc_reg->pcix_ecc_secaddr, 2545 "pcix_ecc_attr_0", DATA_TYPE_UINT32, 2546 PCIE_IS_BDG(bus_p) ? 2547 ecc_bdg_reg->pcix_ecc_attr : 2548 ecc_reg->pcix_ecc_attr, 2549 NULL); 2550 } 2551 2552 /* PCIx ECC Bridge Registers */ 2553 if (PCIX_ECC_VERSION_CHECK(bus_p) && PCIE_IS_BDG(bus_p)) { 2554 pf_pcix_ecc_regs_t *ecc_bdg_reg; 2555 2556 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 1); 2557 fm_payload_set(ereport, 2558 "pcix_ecc_control_1", DATA_TYPE_UINT16, 2559 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16), 2560 "pcix_ecc_status_1", DATA_TYPE_UINT16, 2561 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF), 2562 "pcix_ecc_fst_addr_1", DATA_TYPE_UINT32, 2563 ecc_bdg_reg->pcix_ecc_fstaddr, 2564 "pcix_ecc_sec_addr_1", DATA_TYPE_UINT32, 2565 ecc_bdg_reg->pcix_ecc_secaddr, 2566 "pcix_ecc_attr_1", DATA_TYPE_UINT32, 2567 ecc_bdg_reg->pcix_ecc_attr, 2568 NULL); 2569 } 2570 2571 /* PCIx Bridge */ 2572 if (PCIE_IS_PCIX(bus_p) && PCIE_IS_BDG(bus_p)) { 2573 fm_payload_set(ereport, 2574 "pcix_bdg_status", DATA_TYPE_UINT32, 2575 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat, 2576 "pcix_bdg_sec_status", DATA_TYPE_UINT16, 2577 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat, 2578 NULL); 2579 } 2580 2581 /* PCIe registers */ 2582 if (PCIE_IS_PCIE(bus_p)) { 2583 fm_payload_set(ereport, 2584 "pcie_status", DATA_TYPE_UINT16, 2585 PCIE_ERR_REG(pfd_p)->pcie_err_status, 2586 "pcie_command", DATA_TYPE_UINT16, 2587 PCIE_ERR_REG(pfd_p)->pcie_err_ctl, 2588 "pcie_dev_cap", DATA_TYPE_UINT32, 2589 PCIE_ERR_REG(pfd_p)->pcie_dev_cap, 2590 NULL); 2591 } 2592 2593 /* PCIe AER registers */ 2594 if (PCIE_HAS_AER(bus_p)) { 2595 fm_payload_set(ereport, 2596 "pcie_adv_ctl", DATA_TYPE_UINT32, 2597 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl, 2598 "pcie_ue_status", DATA_TYPE_UINT32, 2599 PCIE_ADV_REG(pfd_p)->pcie_ue_status, 2600 "pcie_ue_mask", DATA_TYPE_UINT32, 2601 PCIE_ADV_REG(pfd_p)->pcie_ue_mask, 2602 "pcie_ue_sev", DATA_TYPE_UINT32, 2603 PCIE_ADV_REG(pfd_p)->pcie_ue_sev, 2604 "pcie_ue_hdr0", DATA_TYPE_UINT32, 2605 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[0], 2606 "pcie_ue_hdr1", DATA_TYPE_UINT32, 2607 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[1], 2608 "pcie_ue_hdr2", DATA_TYPE_UINT32, 2609 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[2], 2610 "pcie_ue_hdr3", DATA_TYPE_UINT32, 2611 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[3], 2612 "pcie_ce_status", DATA_TYPE_UINT32, 2613 PCIE_ADV_REG(pfd_p)->pcie_ce_status, 2614 "pcie_ce_mask", DATA_TYPE_UINT32, 2615 PCIE_ADV_REG(pfd_p)->pcie_ce_mask, 2616 NULL); 2617 } 2618 2619 /* PCIe AER decoded header */ 2620 if (HAS_AER_LOGS(pfd_p, PCIE_ADV_REG(pfd_p)->pcie_ue_status)) { 2621 fm_payload_set(ereport, 2622 "pcie_ue_tgt_trans", DATA_TYPE_UINT32, 2623 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans, 2624 "pcie_ue_tgt_addr", DATA_TYPE_UINT64, 2625 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr, 2626 "pcie_ue_tgt_bdf", DATA_TYPE_UINT16, 2627 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf, 2628 NULL); 2629 /* Clear these values as they no longer valid */ 2630 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0; 2631 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0; 2632 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 2633 } 2634 2635 /* PCIe BDG AER registers */ 2636 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_HAS_AER(bus_p)) { 2637 fm_payload_set(ereport, 2638 "pcie_sue_adv_ctl", DATA_TYPE_UINT32, 2639 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_ctl, 2640 "pcie_sue_status", DATA_TYPE_UINT32, 2641 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status, 2642 "pcie_sue_mask", DATA_TYPE_UINT32, 2643 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask, 2644 "pcie_sue_sev", DATA_TYPE_UINT32, 2645 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_sev, 2646 "pcie_sue_hdr0", DATA_TYPE_UINT32, 2647 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[0], 2648 "pcie_sue_hdr1", DATA_TYPE_UINT32, 2649 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[1], 2650 "pcie_sue_hdr2", DATA_TYPE_UINT32, 2651 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[2], 2652 "pcie_sue_hdr3", DATA_TYPE_UINT32, 2653 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[3], 2654 NULL); 2655 } 2656 2657 /* PCIe BDG AER decoded header */ 2658 if (PCIE_IS_PCIE_BDG(bus_p) && HAS_SAER_LOGS(pfd_p, 2659 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status)) { 2660 fm_payload_set(ereport, 2661 "pcie_sue_tgt_trans", DATA_TYPE_UINT32, 2662 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans, 2663 "pcie_sue_tgt_addr", DATA_TYPE_UINT64, 2664 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr, 2665 "pcie_sue_tgt_bdf", DATA_TYPE_UINT16, 2666 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf, 2667 NULL); 2668 /* Clear these values as they no longer valid */ 2669 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0; 2670 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0; 2671 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = 2672 PCIE_INVALID_BDF; 2673 } 2674 2675 /* PCIe RP registers */ 2676 if (PCIE_IS_RP(bus_p)) { 2677 fm_payload_set(ereport, 2678 "pcie_rp_status", DATA_TYPE_UINT32, 2679 PCIE_RP_REG(pfd_p)->pcie_rp_status, 2680 "pcie_rp_control", DATA_TYPE_UINT16, 2681 PCIE_RP_REG(pfd_p)->pcie_rp_ctl, 2682 NULL); 2683 } 2684 2685 /* PCIe RP AER registers */ 2686 if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) { 2687 fm_payload_set(ereport, 2688 "pcie_adv_rp_status", DATA_TYPE_UINT32, 2689 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_status, 2690 "pcie_adv_rp_command", DATA_TYPE_UINT32, 2691 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_cmd, 2692 "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16, 2693 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id, 2694 "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16, 2695 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id, 2696 NULL); 2697 } 2698 2699 /* Misc ereport information */ 2700 fm_payload_set(ereport, 2701 "remainder", DATA_TYPE_UINT32, total--, 2702 "severity", DATA_TYPE_UINT32, pfd_p->pe_severity_flags, 2703 NULL); 2704 2705 pf_ereport_post(PCIE_BUS2DIP(bus_p), &ereport, &detector, 2706 &eqep); 2707 } 2708 2709 /* Unlock all the devices in the queue */ 2710 for (pfd_p = impl->pf_dq_tail_p; pfd_p; pfd_p = pfd_p->pe_prev) { 2711 if (pfd_p->pe_lock) { 2712 pf_handler_exit(PCIE_PFD2DIP(pfd_p)); 2713 } 2714 } 2715 } 2716 2717 /* 2718 * pf_handler_enter must be called to serial access to each device's pf_data_t. 2719 * Once error handling is finished with the device call pf_handler_exit to allow 2720 * other threads to access it. The same thread may call pf_handler_enter 2721 * several times without any consequences. 2722 * 2723 * The "impl" variable is passed in during scan fabric to double check that 2724 * there is not a recursive algorithm and to ensure only one thread is doing a 2725 * fabric scan at all times. 2726 * 2727 * In some cases "impl" is not available, such as "child lookup" being called 2728 * from outside of scan fabric, just pass in NULL for this variable and this 2729 * extra check will be skipped. 2730 */ 2731 static int 2732 pf_handler_enter(dev_info_t *dip, pf_impl_t *impl) 2733 { 2734 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 2735 2736 ASSERT(pfd_p); 2737 2738 /* 2739 * Check to see if the lock has already been taken by this 2740 * thread. If so just return and don't take lock again. 2741 */ 2742 if (!pfd_p->pe_lock || !impl) { 2743 i_ddi_fm_handler_enter(dip); 2744 pfd_p->pe_lock = B_TRUE; 2745 return (PF_SCAN_SUCCESS); 2746 } 2747 2748 /* Check to see that this dip is already in the "impl" error queue */ 2749 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 2750 if (PCIE_PFD2DIP(pfd_p) == dip) { 2751 return (PF_SCAN_SUCCESS); 2752 } 2753 } 2754 2755 return (PF_SCAN_DEADLOCK); 2756 } 2757 2758 static void 2759 pf_handler_exit(dev_info_t *dip) 2760 { 2761 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 2762 2763 ASSERT(pfd_p); 2764 2765 ASSERT(pfd_p->pe_lock == B_TRUE); 2766 i_ddi_fm_handler_exit(dip); 2767 pfd_p->pe_lock = B_FALSE; 2768 } 2769 2770 /* 2771 * This function calls the driver's callback function (if it's FMA hardened 2772 * and callback capable). This function relies on the current thread already 2773 * owning the driver's fmhdl lock. 2774 */ 2775 static int 2776 pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr) 2777 { 2778 int cb_sts = DDI_FM_OK; 2779 2780 if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) { 2781 dev_info_t *pdip = ddi_get_parent(dip); 2782 struct i_ddi_fmhdl *hdl = DEVI(pdip)->devi_fmhdl; 2783 struct i_ddi_fmtgt *tgt = hdl->fh_tgts; 2784 struct i_ddi_errhdl *errhdl; 2785 while (tgt != NULL) { 2786 if (dip == tgt->ft_dip) { 2787 errhdl = tgt->ft_errhdl; 2788 cb_sts = errhdl->eh_func(dip, derr, 2789 errhdl->eh_impl); 2790 break; 2791 } 2792 tgt = tgt->ft_next; 2793 } 2794 } 2795 return (cb_sts); 2796 } 2797