1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/sysmacros.h> 27 #include <sys/types.h> 28 #include <sys/kmem.h> 29 #include <sys/modctl.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/sunndi.h> 33 #include <sys/fm/protocol.h> 34 #include <sys/fm/util.h> 35 #include <sys/fm/io/ddi.h> 36 #include <sys/fm/io/pci.h> 37 #include <sys/promif.h> 38 #include <sys/disp.h> 39 #include <sys/atomic.h> 40 #include <sys/pcie.h> 41 #include <sys/pci_cap.h> 42 #include <sys/pcie_impl.h> 43 44 #define PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \ 45 PCIE_DEVSTS_CE_DETECTED) 46 47 #define PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \ 48 PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR) 49 50 #define PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\ 51 PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP) 52 #define PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \ 53 PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR) 54 55 #define PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \ 56 PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \ 57 PCIE_AER_SUCE_SERR_ASSERT) 58 #define PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \ 59 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \ 60 PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \ 61 PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \ 62 PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR) 63 64 #define PF_PCI_PARITY_ERR (PCI_STAT_S_PERROR | PCI_STAT_PERROR) 65 66 #define PF_FIRST_AER_ERR(bit, adv) \ 67 (bit & (1 << (adv->pcie_adv_ctl & PCIE_AER_CTL_FST_ERR_PTR_MASK))) 68 69 #define HAS_AER_LOGS(pfd_p, bit) \ 70 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \ 71 PF_FIRST_AER_ERR(bit, PCIE_ADV_REG(pfd_p))) 72 73 #define PF_FIRST_SAER_ERR(bit, adv) \ 74 (bit & (1 << (adv->pcie_sue_ctl & PCIE_AER_SCTL_FST_ERR_PTR_MASK))) 75 76 #define HAS_SAER_LOGS(pfd_p, bit) \ 77 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \ 78 PF_FIRST_SAER_ERR(bit, PCIE_ADV_BDG_REG(pfd_p))) 79 80 #define GET_SAER_CMD(pfd_p) \ 81 ((PCIE_ADV_BDG_HDR(pfd_p, 1) >> \ 82 PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & PCIE_AER_SUCE_HDR_CMD_LWR_MASK) 83 84 #define CE_ADVISORY(pfd_p) \ 85 (PCIE_ADV_REG(pfd_p)->pcie_ce_status & PCIE_AER_CE_AD_NFE) 86 87 /* PCIe Fault Fabric Error analysis table */ 88 typedef struct pf_fab_err_tbl { 89 uint32_t bit; /* Error bit */ 90 int (*handler)(); /* Error handling fuction */ 91 } pf_fab_err_tbl_t; 92 93 static pcie_bus_t *pf_is_ready(dev_info_t *); 94 /* Functions for scanning errors */ 95 static int pf_default_hdl(dev_info_t *, pf_impl_t *); 96 static int pf_dispatch(dev_info_t *, pf_impl_t *, boolean_t); 97 static boolean_t pf_in_bus_range(pcie_bus_t *, pcie_req_id_t); 98 static boolean_t pf_in_addr_range(pcie_bus_t *, uint64_t); 99 100 static int pf_pci_decode(pf_data_t *, uint16_t *); 101 102 /* Functions for gathering errors */ 103 static void pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs, 104 pcie_bus_t *bus_p, boolean_t bdg); 105 static void pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 106 static void pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 107 static void pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 108 static int pf_dummy_cb(dev_info_t *, ddi_fm_error_t *, const void *); 109 static void pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl_p); 110 111 /* Functions for analysing errors */ 112 static int pf_analyse_error(ddi_fm_error_t *, pf_impl_t *); 113 static void pf_adjust_for_no_aer(pf_data_t *); 114 static void pf_adjust_for_no_saer(pf_data_t *); 115 static pf_data_t *pf_get_pcie_bridge(pf_data_t *, pcie_req_id_t); 116 static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *); 117 static boolean_t pf_matched_in_rc(pf_data_t *, pf_data_t *, 118 uint32_t); 119 static int pf_analyse_error_tbl(ddi_fm_error_t *, pf_impl_t *, 120 pf_data_t *, const pf_fab_err_tbl_t *, uint32_t); 121 static int pf_analyse_ca_ur(ddi_fm_error_t *, uint32_t, 122 pf_data_t *, pf_data_t *); 123 static int pf_analyse_ma_ta(ddi_fm_error_t *, uint32_t, 124 pf_data_t *, pf_data_t *); 125 static int pf_analyse_pci(ddi_fm_error_t *, uint32_t, 126 pf_data_t *, pf_data_t *); 127 static int pf_analyse_perr_assert(ddi_fm_error_t *, uint32_t, 128 pf_data_t *, pf_data_t *); 129 static int pf_analyse_ptlp(ddi_fm_error_t *, uint32_t, 130 pf_data_t *, pf_data_t *); 131 static int pf_analyse_sc(ddi_fm_error_t *, uint32_t, 132 pf_data_t *, pf_data_t *); 133 static int pf_analyse_to(ddi_fm_error_t *, uint32_t, 134 pf_data_t *, pf_data_t *); 135 static int pf_analyse_uc(ddi_fm_error_t *, uint32_t, 136 pf_data_t *, pf_data_t *); 137 static int pf_analyse_uc_data(ddi_fm_error_t *, uint32_t, 138 pf_data_t *, pf_data_t *); 139 static int pf_no_panic(ddi_fm_error_t *, uint32_t, 140 pf_data_t *, pf_data_t *); 141 static int pf_panic(ddi_fm_error_t *, uint32_t, 142 pf_data_t *, pf_data_t *); 143 static void pf_send_ereport(ddi_fm_error_t *, pf_impl_t *); 144 static int pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr); 145 146 /* PCIe Fabric Handle Lookup Support Functions. */ 147 static int pf_hdl_child_lookup(dev_info_t *, ddi_fm_error_t *, uint32_t, 148 uint64_t, pcie_req_id_t); 149 static int pf_hdl_compare(dev_info_t *, ddi_fm_error_t *, uint32_t, uint64_t, 150 pcie_req_id_t, ndi_fmc_t *); 151 static int pf_log_hdl_lookup(dev_info_t *, ddi_fm_error_t *, pf_data_t *, 152 boolean_t); 153 154 static int pf_handler_enter(dev_info_t *, pf_impl_t *); 155 static void pf_handler_exit(dev_info_t *); 156 157 boolean_t pcie_full_scan = B_FALSE; /* Force to always do a full scan */ 158 int pcie_disable_scan = 0; /* Disable fabric scan */ 159 160 /* 161 * Scan Fabric is the entry point for PCI/PCIe IO fabric errors. The 162 * caller may create a local pf_data_t with the "root fault" 163 * information populated to either do a precise or full scan. More 164 * than one pf_data_t maybe linked together if there are multiple 165 * errors. Only a PCIe compliant Root Port device may pass in NULL 166 * for the root_pfd_p. 167 * 168 * "Root Complexes" such as NPE and PX should call scan_fabric using itself as 169 * the rdip. PCIe Root ports should call pf_scan_fabric using it's parent as 170 * the rdip. 171 * 172 * Scan fabric initiated from RCs are likely due to a fabric message, traps or 173 * any RC detected errors that propagated to/from the fabric. 174 * 175 * This code assumes that by the time pf_scan_fabric is 176 * called, pf_handler_enter has NOT been called on the rdip. 177 */ 178 int 179 pf_scan_fabric(dev_info_t *rdip, ddi_fm_error_t *derr, pf_data_t *root_pfd_p) 180 { 181 pf_impl_t impl; 182 pf_data_t *pfd_p, *pfd_head_p, *pfd_tail_p; 183 int scan_flag = PF_SCAN_SUCCESS; 184 int analyse_flag = PF_ERR_NO_ERROR; 185 boolean_t full_scan = pcie_full_scan; 186 187 if (pcie_disable_scan) 188 return (analyse_flag); 189 190 /* Find the head and tail of this link list */ 191 pfd_head_p = root_pfd_p; 192 for (pfd_tail_p = root_pfd_p; pfd_tail_p && pfd_tail_p->pe_next; 193 pfd_tail_p = pfd_tail_p->pe_next) 194 ; 195 196 /* Save head/tail */ 197 impl.pf_total = 0; 198 impl.pf_derr = derr; 199 impl.pf_dq_head_p = pfd_head_p; 200 impl.pf_dq_tail_p = pfd_tail_p; 201 202 /* If scan is initiated from RP then RP itself must be scanned. */ 203 if (PCIE_IS_RP(PCIE_DIP2BUS(rdip)) && pf_is_ready(rdip) && 204 !root_pfd_p) { 205 scan_flag = pf_handler_enter(rdip, &impl); 206 if (scan_flag & PF_SCAN_DEADLOCK) 207 goto done; 208 209 scan_flag = pf_default_hdl(rdip, &impl); 210 if (scan_flag & PF_SCAN_NO_ERR_IN_CHILD) 211 goto done; 212 } 213 214 /* 215 * Scan the fabric using the scan_bdf and scan_addr in error q. 216 * scan_bdf will be valid in the following cases: 217 * - Fabric message 218 * - Poisoned TLP 219 * - Signaled UR/CA 220 * - Received UR/CA 221 * - PIO load failures 222 */ 223 for (pfd_p = impl.pf_dq_head_p; pfd_p && PFD_IS_ROOT(pfd_p); 224 pfd_p = pfd_p->pe_next) { 225 impl.pf_fault = PCIE_ROOT_FAULT(pfd_p); 226 227 if (impl.pf_fault->full_scan) 228 full_scan = B_TRUE; 229 230 if (full_scan || 231 PCIE_CHECK_VALID_BDF(impl.pf_fault->scan_bdf) || 232 impl.pf_fault->scan_addr) 233 scan_flag |= pf_dispatch(rdip, &impl, full_scan); 234 235 if (full_scan) 236 break; 237 } 238 239 done: 240 /* 241 * If this is due to safe access, don't analyze the errors and return 242 * success regardless of how scan fabric went. 243 */ 244 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 245 analyse_flag = PF_ERR_NO_PANIC; 246 } else { 247 analyse_flag = pf_analyse_error(derr, &impl); 248 } 249 250 pf_send_ereport(derr, &impl); 251 252 /* 253 * Check if any hardened driver's callback reported a panic or scan 254 * fabric was unable to gather all the information needed. If so panic. 255 */ 256 if (scan_flag & (PF_SCAN_CB_FAILURE | PF_SCAN_BAD_RESPONSE)) 257 analyse_flag |= PF_ERR_PANIC; 258 259 /* 260 * If a deadlock was detected, panic the system as error analysis has 261 * been compromised. 262 */ 263 if (scan_flag & PF_SCAN_DEADLOCK) 264 analyse_flag |= PF_ERR_PANIC_DEADLOCK; 265 266 derr->fme_status = PF_ERR2DDIFM_ERR(scan_flag); 267 268 return (analyse_flag); 269 } 270 271 /* 272 * pf_dispatch walks the device tree and calls the pf_default_hdl if the device 273 * falls in the error path. 274 * 275 * Returns PF_SCAN_* flags 276 */ 277 static int 278 pf_dispatch(dev_info_t *pdip, pf_impl_t *impl, boolean_t full_scan) 279 { 280 dev_info_t *dip; 281 pcie_req_id_t rid = impl->pf_fault->scan_bdf; 282 pcie_bus_t *bus_p; 283 int scan_flag = PF_SCAN_SUCCESS; 284 285 for (dip = ddi_get_child(pdip); dip; dip = ddi_get_next_sibling(dip)) { 286 /* Make sure dip is attached and ready */ 287 if (!(bus_p = pf_is_ready(dip))) 288 continue; 289 290 scan_flag |= pf_handler_enter(dip, impl); 291 if (scan_flag & PF_SCAN_DEADLOCK) 292 break; 293 294 /* 295 * Handle this device if it is a: 296 * o Full Scan 297 * o PCI/PCI-X Device 298 * o Fault BDF = Device BDF 299 * o BDF/ADDR is in range of the Bridge/Switch 300 */ 301 if (full_scan || 302 (bus_p->bus_bdf == rid) || 303 pf_in_bus_range(bus_p, rid) || 304 pf_in_addr_range(bus_p, impl->pf_fault->scan_addr)) { 305 int hdl_flag = pf_default_hdl(dip, impl); 306 scan_flag |= hdl_flag; 307 308 /* 309 * If pf_default_hdl was not able gather error 310 * information, it means this device wasn't added to the 311 * error q list. In that case exit the lock now, 312 * otherwise it'll be locked forever. 313 */ 314 if (hdl_flag & PF_SCAN_BAD_RESPONSE) 315 pf_handler_exit(dip); 316 317 /* 318 * A bridge may have detected no errors in which case 319 * there is no need to scan further down. 320 */ 321 if (hdl_flag & PF_SCAN_NO_ERR_IN_CHILD) 322 continue; 323 } else { 324 pf_handler_exit(dip); 325 continue; 326 } 327 328 /* match or in bridge bus-range */ 329 switch (bus_p->bus_dev_type) { 330 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: 331 case PCIE_PCIECAP_DEV_TYPE_PCI2PCIE: 332 scan_flag |= pf_dispatch(dip, impl, B_TRUE); 333 break; 334 case PCIE_PCIECAP_DEV_TYPE_UP: 335 case PCIE_PCIECAP_DEV_TYPE_DOWN: 336 case PCIE_PCIECAP_DEV_TYPE_ROOT: 337 { 338 pf_data_t *pfd_p = PCIE_BUS2PFD(bus_p); 339 pf_pci_err_regs_t *err_p = PCI_ERR_REG(pfd_p); 340 pf_pci_bdg_err_regs_t *serr_p = PCI_BDG_ERR_REG(pfd_p); 341 /* 342 * Continue if the fault BDF != the switch or there is a 343 * parity error 344 */ 345 if ((bus_p->bus_bdf != rid) || 346 (err_p->pci_err_status & PF_PCI_PARITY_ERR) || 347 (serr_p->pci_bdg_sec_stat & PF_PCI_PARITY_ERR)) 348 scan_flag |= pf_dispatch(dip, impl, full_scan); 349 break; 350 } 351 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: 352 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: 353 /* 354 * Reached a PCIe end point so stop. Note dev_type 355 * PCI_DEV is just a PCIe device that requires IO Space 356 */ 357 break; 358 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO: 359 if (PCIE_IS_BDG(bus_p)) 360 scan_flag |= pf_dispatch(dip, impl, B_TRUE); 361 break; 362 default: 363 ASSERT(B_FALSE); 364 } 365 } 366 return (scan_flag); 367 } 368 369 /* Returns whether the "bdf" is in the bus range of a switch/bridge */ 370 static boolean_t 371 pf_in_bus_range(pcie_bus_t *bus_p, pcie_req_id_t bdf) 372 { 373 pci_bus_range_t *br_p = &bus_p->bus_bus_range; 374 uint8_t bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >> 375 PCIE_REQ_ID_BUS_SHIFT; 376 377 /* check if given bdf falls within bridge's bus range */ 378 if (PCIE_IS_BDG(bus_p) && 379 ((bus_no >= br_p->lo) && (bus_no <= br_p->hi))) 380 return (B_TRUE); 381 else 382 return (B_FALSE); 383 } 384 385 /* 386 * Returns whether the "addr" is in the addr range of a switch/bridge, or if the 387 * "addr" is in the assigned addr of a device. 388 */ 389 static boolean_t 390 pf_in_addr_range(pcie_bus_t *bus_p, uint64_t addr) 391 { 392 uint_t i; 393 uint64_t low, hi; 394 ppb_ranges_t *ranges_p = bus_p->bus_addr_ranges; 395 pci_regspec_t *assign_p = bus_p->bus_assigned_addr; 396 397 /* check if given address belongs to this device */ 398 for (i = 0; i < bus_p->bus_assigned_entries; i++, assign_p++) { 399 low = assign_p->pci_phys_low; 400 hi = low + assign_p->pci_size_low; 401 if ((addr < hi) && (addr >= low)) 402 return (B_TRUE); 403 } 404 405 /* check if given address belongs to a child below this device */ 406 if (!PCIE_IS_BDG(bus_p)) 407 return (B_FALSE); 408 409 for (i = 0; i < bus_p->bus_addr_entries; i++, ranges_p++) { 410 switch (ranges_p->child_high & PCI_ADDR_MASK) { 411 case PCI_ADDR_IO: 412 case PCI_ADDR_MEM32: 413 low = ranges_p->child_low; 414 hi = ranges_p->size_low + low; 415 if ((addr < hi) && (addr >= low)) 416 return (B_TRUE); 417 break; 418 case PCI_ADDR_MEM64: 419 low = ((uint64_t)ranges_p->child_mid << 32) | 420 (uint64_t)ranges_p->child_low; 421 hi = (((uint64_t)ranges_p->size_high << 32) | 422 (uint64_t)ranges_p->size_low) + low; 423 if ((addr < hi) && (addr >= low)) 424 return (B_TRUE); 425 break; 426 } 427 } 428 return (B_FALSE); 429 } 430 431 static pcie_bus_t * 432 pf_is_ready(dev_info_t *dip) 433 { 434 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 435 if (!bus_p) 436 return (NULL); 437 438 if (!(bus_p->bus_fm_flags & PF_FM_READY)) 439 return (NULL); 440 return (bus_p); 441 } 442 443 static void 444 pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs, 445 pcie_bus_t *bus_p, boolean_t bdg) 446 { 447 if (bdg) { 448 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p, 449 PCI_PCIX_BDG_ECC_STATUS); 450 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p, 451 PCI_PCIX_BDG_ECC_FST_AD); 452 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p, 453 PCI_PCIX_BDG_ECC_SEC_AD); 454 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p, 455 PCI_PCIX_BDG_ECC_ATTR); 456 } else { 457 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p, 458 PCI_PCIX_ECC_STATUS); 459 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p, 460 PCI_PCIX_ECC_FST_AD); 461 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p, 462 PCI_PCIX_ECC_SEC_AD); 463 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p, 464 PCI_PCIX_ECC_ATTR); 465 } 466 } 467 468 469 static void 470 pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 471 { 472 /* 473 * For PCI-X device PCI-X Capability only exists for Type 0 Headers. 474 * PCI-X Bridge Capability only exists for Type 1 Headers. 475 * Both capabilities do not exist at the same time. 476 */ 477 if (PCIE_IS_BDG(bus_p)) { 478 pf_pcix_bdg_err_regs_t *pcix_bdg_regs; 479 480 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p); 481 482 pcix_bdg_regs->pcix_bdg_sec_stat = PCIX_CAP_GET(16, bus_p, 483 PCI_PCIX_SEC_STATUS); 484 pcix_bdg_regs->pcix_bdg_stat = PCIX_CAP_GET(32, bus_p, 485 PCI_PCIX_BDG_STATUS); 486 487 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 488 /* 489 * PCI Express to PCI-X bridges only implement the 490 * secondary side of the PCI-X ECC registers, bit one is 491 * read-only so we make sure we do not write to it. 492 */ 493 if (!PCIE_IS_PCIE_BDG(bus_p)) { 494 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 495 0); 496 pf_pcix_ecc_regs_gather( 497 PCIX_BDG_ECC_REG(pfd_p, 0), bus_p, B_TRUE); 498 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 499 1); 500 } 501 pf_pcix_ecc_regs_gather(PCIX_BDG_ECC_REG(pfd_p, 0), 502 bus_p, B_TRUE); 503 } 504 } else { 505 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p); 506 507 pcix_regs->pcix_command = PCIX_CAP_GET(16, bus_p, 508 PCI_PCIX_COMMAND); 509 pcix_regs->pcix_status = PCIX_CAP_GET(32, bus_p, 510 PCI_PCIX_STATUS); 511 if (PCIX_ECC_VERSION_CHECK(bus_p)) 512 pf_pcix_ecc_regs_gather(PCIX_ECC_REG(pfd_p), bus_p, 513 B_TRUE); 514 } 515 } 516 517 static void 518 pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 519 { 520 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p); 521 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p); 522 523 pcie_regs->pcie_err_status = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS); 524 pcie_regs->pcie_err_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL); 525 pcie_regs->pcie_dev_cap = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP); 526 527 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) 528 pf_pcix_regs_gather(pfd_p, bus_p); 529 530 if (PCIE_IS_ROOT(bus_p)) { 531 pf_pcie_rp_err_regs_t *pcie_rp_regs = PCIE_RP_REG(pfd_p); 532 533 pcie_rp_regs->pcie_rp_status = PCIE_CAP_GET(32, bus_p, 534 PCIE_ROOTSTS); 535 pcie_rp_regs->pcie_rp_ctl = PCIE_CAP_GET(16, bus_p, 536 PCIE_ROOTCTL); 537 } 538 539 if (!PCIE_HAS_AER(bus_p)) 540 return; 541 542 /* Gather UE AERs */ 543 pcie_adv_regs->pcie_adv_ctl = PCIE_AER_GET(32, bus_p, 544 PCIE_AER_CTL); 545 pcie_adv_regs->pcie_ue_status = PCIE_AER_GET(32, bus_p, 546 PCIE_AER_UCE_STS); 547 pcie_adv_regs->pcie_ue_mask = PCIE_AER_GET(32, bus_p, 548 PCIE_AER_UCE_MASK); 549 pcie_adv_regs->pcie_ue_sev = PCIE_AER_GET(32, bus_p, 550 PCIE_AER_UCE_SERV); 551 PCIE_ADV_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p, 552 PCIE_AER_HDR_LOG); 553 PCIE_ADV_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p, 554 PCIE_AER_HDR_LOG + 0x4); 555 PCIE_ADV_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p, 556 PCIE_AER_HDR_LOG + 0x8); 557 PCIE_ADV_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p, 558 PCIE_AER_HDR_LOG + 0xc); 559 560 /* Gather CE AERs */ 561 pcie_adv_regs->pcie_ce_status = PCIE_AER_GET(32, bus_p, 562 PCIE_AER_CE_STS); 563 pcie_adv_regs->pcie_ce_mask = PCIE_AER_GET(32, bus_p, 564 PCIE_AER_CE_MASK); 565 566 /* 567 * If pci express to pci bridge then grab the bridge 568 * error registers. 569 */ 570 if (PCIE_IS_PCIE_BDG(bus_p)) { 571 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs = 572 PCIE_ADV_BDG_REG(pfd_p); 573 574 pcie_bdg_regs->pcie_sue_ctl = PCIE_AER_GET(32, bus_p, 575 PCIE_AER_SCTL); 576 pcie_bdg_regs->pcie_sue_status = PCIE_AER_GET(32, bus_p, 577 PCIE_AER_SUCE_STS); 578 pcie_bdg_regs->pcie_sue_mask = PCIE_AER_GET(32, bus_p, 579 PCIE_AER_SUCE_MASK); 580 pcie_bdg_regs->pcie_sue_sev = PCIE_AER_GET(32, bus_p, 581 PCIE_AER_SUCE_SERV); 582 PCIE_ADV_BDG_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p, 583 PCIE_AER_SHDR_LOG); 584 PCIE_ADV_BDG_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p, 585 PCIE_AER_SHDR_LOG + 0x4); 586 PCIE_ADV_BDG_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p, 587 PCIE_AER_SHDR_LOG + 0x8); 588 PCIE_ADV_BDG_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p, 589 PCIE_AER_SHDR_LOG + 0xc); 590 } 591 592 /* 593 * If PCI Express root port then grab the root port 594 * error registers. 595 */ 596 if (PCIE_IS_ROOT(bus_p)) { 597 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs = 598 PCIE_ADV_RP_REG(pfd_p); 599 600 pcie_rp_regs->pcie_rp_err_cmd = PCIE_AER_GET(32, bus_p, 601 PCIE_AER_RE_CMD); 602 pcie_rp_regs->pcie_rp_err_status = PCIE_AER_GET(32, bus_p, 603 PCIE_AER_RE_STS); 604 pcie_rp_regs->pcie_rp_ce_src_id = PCIE_AER_GET(16, bus_p, 605 PCIE_AER_CE_SRC_ID); 606 pcie_rp_regs->pcie_rp_ue_src_id = PCIE_AER_GET(16, bus_p, 607 PCIE_AER_ERR_SRC_ID); 608 } 609 } 610 611 static void 612 pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 613 { 614 pf_pci_err_regs_t *pci_regs = PCI_ERR_REG(pfd_p); 615 616 /* 617 * Start by reading all the error registers that are available for 618 * pci and pci express and for leaf devices and bridges/switches 619 */ 620 pci_regs->pci_err_status = PCIE_GET(16, bus_p, PCI_CONF_STAT); 621 pci_regs->pci_cfg_comm = PCIE_GET(16, bus_p, PCI_CONF_COMM); 622 623 /* 624 * If pci-pci bridge grab PCI bridge specific error registers. 625 */ 626 if (PCIE_IS_BDG(bus_p)) { 627 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p); 628 pci_bdg_regs->pci_bdg_sec_stat = 629 PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS); 630 pci_bdg_regs->pci_bdg_ctrl = 631 PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL); 632 } 633 634 /* 635 * If pci express device grab pci express error registers and 636 * check for advanced error reporting features and grab them if 637 * available. 638 */ 639 if (PCIE_IS_PCIE(bus_p)) 640 pf_pcie_regs_gather(pfd_p, bus_p); 641 else if (PCIE_IS_PCIX(bus_p)) 642 pf_pcix_regs_gather(pfd_p, bus_p); 643 644 } 645 646 static void 647 pf_pcix_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 648 { 649 if (PCIE_IS_BDG(bus_p)) { 650 pf_pcix_bdg_err_regs_t *pcix_bdg_regs; 651 652 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p); 653 654 PCIX_CAP_PUT(16, bus_p, PCI_PCIX_SEC_STATUS, 655 pcix_bdg_regs->pcix_bdg_sec_stat); 656 657 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_STATUS, 658 pcix_bdg_regs->pcix_bdg_stat); 659 660 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 661 pf_pcix_ecc_regs_t *pcix_bdg_ecc_regs; 662 /* 663 * PCI Express to PCI-X bridges only implement the 664 * secondary side of the PCI-X ECC registers. For 665 * clearing, there is no need to "select" the ECC 666 * register, just write what was originally read. 667 */ 668 if (!PCIE_IS_PCIE_BDG(bus_p)) { 669 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 0); 670 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 671 pcix_bdg_ecc_regs->pcix_ecc_ctlstat); 672 673 } 674 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 1); 675 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 676 pcix_bdg_ecc_regs->pcix_ecc_ctlstat); 677 } 678 } else { 679 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p); 680 681 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_STATUS, 682 pcix_regs->pcix_status); 683 684 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 685 pf_pcix_ecc_regs_t *pcix_ecc_regs = PCIX_ECC_REG(pfd_p); 686 687 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_ECC_STATUS, 688 pcix_ecc_regs->pcix_ecc_ctlstat); 689 } 690 } 691 } 692 693 static void 694 pf_pcie_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 695 { 696 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p); 697 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p); 698 699 PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS, pcie_regs->pcie_err_status); 700 701 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) 702 pf_pcix_regs_clear(pfd_p, bus_p); 703 704 if (!PCIE_HAS_AER(bus_p)) 705 return; 706 707 PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_STS, 708 pcie_adv_regs->pcie_ue_status); 709 710 PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS, 711 pcie_adv_regs->pcie_ce_status); 712 713 if (PCIE_IS_PCIE_BDG(bus_p)) { 714 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs = 715 PCIE_ADV_BDG_REG(pfd_p); 716 717 PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_STS, 718 pcie_bdg_regs->pcie_sue_status); 719 } 720 721 /* 722 * If PCI Express root complex then clear the root complex 723 * error registers. 724 */ 725 if (PCIE_IS_ROOT(bus_p)) { 726 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs; 727 728 pcie_rp_regs = PCIE_ADV_RP_REG(pfd_p); 729 730 PCIE_AER_PUT(32, bus_p, PCIE_AER_RE_STS, 731 pcie_rp_regs->pcie_rp_err_status); 732 } 733 } 734 735 static void 736 pf_pci_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 737 { 738 if (PCIE_IS_PCIE(bus_p)) 739 pf_pcie_regs_clear(pfd_p, bus_p); 740 else if (PCIE_IS_PCIX(bus_p)) 741 pf_pcix_regs_clear(pfd_p, bus_p); 742 743 PCIE_PUT(16, bus_p, PCI_CONF_STAT, pfd_p->pe_pci_regs->pci_err_status); 744 745 if (PCIE_IS_BDG(bus_p)) { 746 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p); 747 PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS, 748 pci_bdg_regs->pci_bdg_sec_stat); 749 } 750 } 751 752 /* ARGSUSED */ 753 void 754 pcie_clear_errors(dev_info_t *dip) 755 { 756 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 757 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 758 759 ASSERT(bus_p); 760 761 pf_pci_regs_gather(pfd_p, bus_p); 762 pf_pci_regs_clear(pfd_p, bus_p); 763 } 764 765 /* Find the fault BDF, fault Addr or full scan on a PCIe Root Port. */ 766 static void 767 pf_pci_find_rp_fault(pf_data_t *pfd_p, pcie_bus_t *bus_p) 768 { 769 pf_root_fault_t *root_fault = PCIE_ROOT_FAULT(pfd_p); 770 pf_pcie_adv_rp_err_regs_t *rp_regs = PCIE_ADV_RP_REG(pfd_p); 771 uint32_t root_err = rp_regs->pcie_rp_err_status; 772 uint32_t ue_err = PCIE_ADV_REG(pfd_p)->pcie_ue_status; 773 int num_faults = 0; 774 775 /* Since this data structure is reused, make sure to reset it */ 776 root_fault->full_scan = B_FALSE; 777 root_fault->scan_bdf = PCIE_INVALID_BDF; 778 root_fault->scan_addr = 0; 779 780 if (!PCIE_HAS_AER(bus_p) && 781 (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) { 782 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 783 return; 784 } 785 786 /* 787 * Check to see if an error has been received that 788 * requires a scan of the fabric. Count the number of 789 * faults seen. If MUL CE/FE_NFE that counts for 790 * atleast 2 faults, so just return with full_scan. 791 */ 792 if ((root_err & PCIE_AER_RE_STS_MUL_CE_RCVD) || 793 (root_err & PCIE_AER_RE_STS_MUL_FE_NFE_RCVD)) { 794 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 795 return; 796 } 797 798 if (root_err & PCIE_AER_RE_STS_CE_RCVD) 799 num_faults++; 800 801 if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) 802 num_faults++; 803 804 if (ue_err & PCIE_AER_UCE_CA) 805 num_faults++; 806 807 if (ue_err & PCIE_AER_UCE_UR) 808 num_faults++; 809 810 /* If no faults just return */ 811 if (num_faults == 0) 812 return; 813 814 /* If faults > 1 do full scan */ 815 if (num_faults > 1) { 816 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 817 return; 818 } 819 820 /* By this point, there is only 1 fault detected */ 821 if (root_err & PCIE_AER_RE_STS_CE_RCVD) { 822 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ce_src_id; 823 num_faults--; 824 } else if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) { 825 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ue_src_id; 826 num_faults--; 827 } else if ((HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_CA) || 828 HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_UR)) && 829 (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), PCIE_ADV_REG(pfd_p)) == 830 DDI_SUCCESS)) { 831 PCIE_ROOT_FAULT(pfd_p)->scan_addr = 832 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr; 833 num_faults--; 834 } 835 836 /* 837 * This means an error did occur, but we couldn't extract the fault BDF 838 */ 839 if (num_faults > 0) 840 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 841 842 } 843 844 845 /* 846 * Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue 847 * 848 * Returns a scan flag. 849 * o PF_SCAN_SUCCESS - Error gathered and cleared sucessfuly, data added to 850 * Fault Q 851 * o PF_SCAN_BAD_RESPONSE - Unable to talk to device, item not added to fault Q 852 * o PF_SCAN_CB_FAILURE - A hardened device deemed that the error was fatal. 853 * o PF_SCAN_NO_ERR_IN_CHILD - Only applies to bridge to prevent further 854 * unnecessary scanning 855 * o PF_SCAN_IN_DQ - This device has already been scanned; it was skipped this 856 * time. 857 */ 858 static int 859 pf_default_hdl(dev_info_t *dip, pf_impl_t *impl) 860 { 861 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 862 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 863 int cb_sts, scan_flag = PF_SCAN_SUCCESS; 864 865 /* Make sure this device hasn't already been snapshotted and cleared */ 866 if (pfd_p->pe_valid == B_TRUE) { 867 scan_flag |= PF_SCAN_IN_DQ; 868 goto done; 869 } 870 871 /* 872 * Read vendor/device ID and check with cached data, if it doesn't match 873 * could very well be a device that isn't responding anymore. Just 874 * stop. Save the basic info in the error q for post mortem debugging 875 * purposes. 876 */ 877 if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) { 878 char buf[FM_MAX_CLASS]; 879 880 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", 881 PCI_ERROR_SUBCLASS, PCI_NR); 882 ddi_fm_ereport_post(dip, buf, fm_ena_generate(0, FM_ENA_FMT1), 883 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL); 884 885 return (PF_SCAN_BAD_RESPONSE); 886 } 887 888 pf_pci_regs_gather(pfd_p, bus_p); 889 pf_pci_regs_clear(pfd_p, bus_p); 890 if (PCIE_IS_RP(bus_p)) 891 pf_pci_find_rp_fault(pfd_p, bus_p); 892 893 cb_sts = pf_fm_callback(dip, impl->pf_derr); 894 895 if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN) 896 scan_flag |= PF_SCAN_CB_FAILURE; 897 898 /* Add the snapshot to the error q */ 899 pf_en_dq(pfd_p, impl); 900 901 done: 902 /* 903 * If a bridge does not have any error no need to scan any further down. 904 * For PCIe devices, check the PCIe device status and PCI secondary 905 * status. 906 * - Some non-compliant PCIe devices do not utilize PCIe 907 * error registers. If so rely on legacy PCI error registers. 908 * For PCI devices, check the PCI secondary status. 909 */ 910 if (PCIE_IS_PCIE_BDG(bus_p) && 911 !(PCIE_ERR_REG(pfd_p)->pcie_err_status & PF_PCIE_BDG_ERR) && 912 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) 913 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD; 914 915 if (PCIE_IS_PCI_BDG(bus_p) && 916 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) 917 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD; 918 919 pfd_p->pe_valid = B_TRUE; 920 return (scan_flag); 921 } 922 923 /* 924 * Called during postattach to initialize a device's error handling 925 * capabilities. If the devices has already been hardened, then there isn't 926 * much needed. Otherwise initialize the device's default FMA capabilities. 927 * 928 * In a future project where PCIe support is removed from pcifm, several 929 * "properties" that are setup in ddi_fm_init and pci_ereport_setup need to be 930 * created here so that the PCI/PCIe eversholt rules will work properly. 931 */ 932 void 933 pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc, ddi_attach_cmd_t cmd) 934 { 935 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 936 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 937 boolean_t need_cb_register = B_FALSE; 938 939 if (!bus_p) { 940 cmn_err(CE_WARN, "devi_bus information is not set for %s%d.\n", 941 ddi_driver_name(dip), ddi_get_instance(dip)); 942 return; 943 } 944 945 if (fmhdl) { 946 /* 947 * If device is only ereport capable and not callback capable 948 * make it callback capable. The only downside is that the 949 * "fm-errcb-capable" property is not created for this device 950 * which should be ok since it's not used anywhere. 951 */ 952 if (!(fmhdl->fh_cap & DDI_FM_ERRCB_CAPABLE)) 953 need_cb_register = B_TRUE; 954 } else { 955 int cap; 956 /* 957 * fm-capable in driver.conf can be used to set fm_capabilities. 958 * If fm-capable is not defined, set the default 959 * DDI_FM_EREPORT_CAPABLE and DDI_FM_ERRCB_CAPABLE. 960 */ 961 cap = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 962 DDI_PROP_DONTPASS, "fm-capable", 963 DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE); 964 cap &= (DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE); 965 966 bus_p->bus_fm_flags |= PF_FM_IS_NH; 967 968 if (cmd == DDI_ATTACH) { 969 ddi_fm_init(dip, &cap, &ibc); 970 pci_ereport_setup(dip); 971 } 972 973 if (cap & DDI_FM_ERRCB_CAPABLE) 974 need_cb_register = B_TRUE; 975 976 fmhdl = DEVI(dip)->devi_fmhdl; 977 } 978 979 /* If ddi_fm_init fails for any reason RETURN */ 980 if (!fmhdl) { 981 bus_p->bus_fm_flags = 0; 982 return; 983 } 984 985 fmhdl->fh_cap |= DDI_FM_ERRCB_CAPABLE; 986 if (cmd == DDI_ATTACH) { 987 if (need_cb_register) 988 ddi_fm_handler_register(dip, pf_dummy_cb, NULL); 989 } 990 991 bus_p->bus_fm_flags |= PF_FM_READY; 992 } 993 994 /* undo FMA lock, called at predetach */ 995 void 996 pf_fini(dev_info_t *dip, ddi_detach_cmd_t cmd) 997 { 998 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 999 1000 if (!bus_p) 1001 return; 1002 1003 /* Don't fini anything if device isn't FM Ready */ 1004 if (!(bus_p->bus_fm_flags & PF_FM_READY)) 1005 return; 1006 1007 /* no other code should set the flag to false */ 1008 bus_p->bus_fm_flags &= ~PF_FM_READY; 1009 1010 /* 1011 * Grab the mutex to make sure device isn't in the middle of 1012 * error handling. Setting the bus_fm_flag to ~PF_FM_READY 1013 * should prevent this device from being error handled after 1014 * the mutex has been released. 1015 */ 1016 (void) pf_handler_enter(dip, NULL); 1017 pf_handler_exit(dip); 1018 1019 /* undo non-hardened drivers */ 1020 if (bus_p->bus_fm_flags & PF_FM_IS_NH) { 1021 if (cmd == DDI_DETACH) { 1022 bus_p->bus_fm_flags &= ~PF_FM_IS_NH; 1023 pci_ereport_teardown(dip); 1024 /* 1025 * ddi_fini itself calls ddi_handler_unregister, 1026 * so no need to explicitly call unregister. 1027 */ 1028 ddi_fm_fini(dip); 1029 } 1030 } 1031 } 1032 1033 /*ARGSUSED*/ 1034 static int 1035 pf_dummy_cb(dev_info_t *dip, ddi_fm_error_t *derr, const void *not_used) 1036 { 1037 return (DDI_FM_OK); 1038 } 1039 1040 /* 1041 * Add PFD to queue. If it is an RC add it to the beginning, 1042 * otherwise add it to the end. 1043 */ 1044 static void 1045 pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl) 1046 { 1047 pf_data_t *head_p = impl->pf_dq_head_p; 1048 pf_data_t *tail_p = impl->pf_dq_tail_p; 1049 1050 impl->pf_total++; 1051 1052 if (!head_p) { 1053 ASSERT(PFD_IS_ROOT(pfd_p)); 1054 impl->pf_dq_head_p = pfd_p; 1055 impl->pf_dq_tail_p = pfd_p; 1056 pfd_p->pe_prev = NULL; 1057 pfd_p->pe_next = NULL; 1058 return; 1059 } 1060 1061 /* Check if this is a Root Port eprt */ 1062 if (PFD_IS_ROOT(pfd_p)) { 1063 pf_data_t *root_p, *last_p = NULL; 1064 1065 /* The first item must be a RP */ 1066 root_p = head_p; 1067 for (last_p = head_p; last_p && PFD_IS_ROOT(last_p); 1068 last_p = last_p->pe_next) 1069 root_p = last_p; 1070 1071 /* root_p is the last RP pfd. last_p is the first non-RP pfd. */ 1072 root_p->pe_next = pfd_p; 1073 pfd_p->pe_prev = root_p; 1074 pfd_p->pe_next = last_p; 1075 1076 if (last_p) 1077 last_p->pe_prev = pfd_p; 1078 else 1079 tail_p = pfd_p; 1080 } else { 1081 tail_p->pe_next = pfd_p; 1082 pfd_p->pe_prev = tail_p; 1083 pfd_p->pe_next = NULL; 1084 tail_p = pfd_p; 1085 } 1086 1087 impl->pf_dq_head_p = head_p; 1088 impl->pf_dq_tail_p = tail_p; 1089 } 1090 1091 /* 1092 * Ignore: 1093 * - TRAINING: as leaves do not have children 1094 * - SD: as leaves do not have children 1095 */ 1096 const pf_fab_err_tbl_t pcie_pcie_tbl[] = { 1097 PCIE_AER_UCE_DLP, pf_panic, 1098 PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1099 PCIE_AER_UCE_FCP, pf_panic, 1100 PCIE_AER_UCE_TO, pf_analyse_to, 1101 PCIE_AER_UCE_CA, pf_analyse_ca_ur, 1102 PCIE_AER_UCE_UC, pf_analyse_uc, 1103 PCIE_AER_UCE_RO, pf_panic, 1104 PCIE_AER_UCE_MTLP, pf_panic, 1105 PCIE_AER_UCE_ECRC, pf_panic, 1106 PCIE_AER_UCE_UR, pf_analyse_ca_ur, 1107 NULL, NULL 1108 }; 1109 1110 const pf_fab_err_tbl_t pcie_rp_tbl[] = { 1111 PCIE_AER_UCE_TRAINING, pf_no_panic, 1112 PCIE_AER_UCE_DLP, pf_panic, 1113 PCIE_AER_UCE_SD, pf_no_panic, 1114 PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1115 PCIE_AER_UCE_FCP, pf_panic, 1116 PCIE_AER_UCE_TO, pf_panic, 1117 PCIE_AER_UCE_CA, pf_no_panic, 1118 PCIE_AER_UCE_UC, pf_analyse_uc, 1119 PCIE_AER_UCE_RO, pf_panic, 1120 PCIE_AER_UCE_MTLP, pf_panic, 1121 PCIE_AER_UCE_ECRC, pf_panic, 1122 PCIE_AER_UCE_UR, pf_no_panic, 1123 NULL, NULL 1124 }; 1125 1126 const pf_fab_err_tbl_t pcie_sw_tbl[] = { 1127 PCIE_AER_UCE_TRAINING, pf_no_panic, 1128 PCIE_AER_UCE_DLP, pf_panic, 1129 PCIE_AER_UCE_SD, pf_no_panic, 1130 PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1131 PCIE_AER_UCE_FCP, pf_panic, 1132 PCIE_AER_UCE_TO, pf_analyse_to, 1133 PCIE_AER_UCE_CA, pf_analyse_ca_ur, 1134 PCIE_AER_UCE_UC, pf_analyse_uc, 1135 PCIE_AER_UCE_RO, pf_panic, 1136 PCIE_AER_UCE_MTLP, pf_panic, 1137 PCIE_AER_UCE_ECRC, pf_panic, 1138 PCIE_AER_UCE_UR, pf_analyse_ca_ur, 1139 NULL, NULL 1140 }; 1141 1142 const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = { 1143 PCIE_AER_SUCE_TA_ON_SC, pf_analyse_sc, 1144 PCIE_AER_SUCE_MA_ON_SC, pf_analyse_sc, 1145 PCIE_AER_SUCE_RCVD_TA, pf_analyse_ma_ta, 1146 PCIE_AER_SUCE_RCVD_MA, pf_analyse_ma_ta, 1147 PCIE_AER_SUCE_USC_ERR, pf_panic, 1148 PCIE_AER_SUCE_USC_MSG_DATA_ERR, pf_analyse_ma_ta, 1149 PCIE_AER_SUCE_UC_DATA_ERR, pf_analyse_uc_data, 1150 PCIE_AER_SUCE_UC_ATTR_ERR, pf_panic, 1151 PCIE_AER_SUCE_UC_ADDR_ERR, pf_panic, 1152 PCIE_AER_SUCE_TIMER_EXPIRED, pf_panic, 1153 PCIE_AER_SUCE_PERR_ASSERT, pf_analyse_perr_assert, 1154 PCIE_AER_SUCE_SERR_ASSERT, pf_no_panic, 1155 PCIE_AER_SUCE_INTERNAL_ERR, pf_panic, 1156 NULL, NULL 1157 }; 1158 1159 const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = { 1160 PCI_STAT_PERROR, pf_analyse_pci, 1161 PCI_STAT_S_PERROR, pf_analyse_pci, 1162 PCI_STAT_S_SYSERR, pf_panic, 1163 PCI_STAT_R_MAST_AB, pf_analyse_pci, 1164 PCI_STAT_R_TARG_AB, pf_analyse_pci, 1165 PCI_STAT_S_TARG_AB, pf_analyse_pci, 1166 NULL, NULL 1167 }; 1168 1169 const pf_fab_err_tbl_t pcie_pci_tbl[] = { 1170 PCI_STAT_PERROR, pf_analyse_pci, 1171 PCI_STAT_S_PERROR, pf_analyse_pci, 1172 PCI_STAT_S_SYSERR, pf_panic, 1173 PCI_STAT_R_MAST_AB, pf_analyse_pci, 1174 PCI_STAT_R_TARG_AB, pf_analyse_pci, 1175 PCI_STAT_S_TARG_AB, pf_analyse_pci, 1176 NULL, NULL 1177 }; 1178 1179 #define PF_MASKED_AER_ERR(pfd_p) \ 1180 (PCIE_ADV_REG(pfd_p)->pcie_ue_status & \ 1181 ((PCIE_ADV_REG(pfd_p)->pcie_ue_mask) ^ 0xFFFFFFFF)) 1182 #define PF_MASKED_SAER_ERR(pfd_p) \ 1183 (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status & \ 1184 ((PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask) ^ 0xFFFFFFFF)) 1185 /* 1186 * Analyse all the PCIe Fault Data (erpt) gathered during dispatch in the erpt 1187 * Queue. 1188 */ 1189 static int 1190 pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl) 1191 { 1192 int sts_flags, error_flags = 0; 1193 pf_data_t *pfd_p; 1194 1195 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 1196 sts_flags = 0; 1197 1198 switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) { 1199 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: 1200 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: 1201 if (PCIE_DEVSTS_CE_DETECTED & 1202 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1203 sts_flags |= PF_ERR_CE; 1204 1205 pf_adjust_for_no_aer(pfd_p); 1206 sts_flags |= pf_analyse_error_tbl(derr, impl, 1207 pfd_p, pcie_pcie_tbl, PF_MASKED_AER_ERR(pfd_p)); 1208 break; 1209 case PCIE_PCIECAP_DEV_TYPE_ROOT: 1210 pf_adjust_for_no_aer(pfd_p); 1211 sts_flags |= pf_analyse_error_tbl(derr, impl, 1212 pfd_p, pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p)); 1213 break; 1214 case PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO: 1215 /* no adjust_for_aer for pseudo RC */ 1216 sts_flags |= pf_analyse_error_tbl(derr, impl, pfd_p, 1217 pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p)); 1218 break; 1219 case PCIE_PCIECAP_DEV_TYPE_UP: 1220 case PCIE_PCIECAP_DEV_TYPE_DOWN: 1221 if (PCIE_DEVSTS_CE_DETECTED & 1222 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1223 sts_flags |= PF_ERR_CE; 1224 1225 pf_adjust_for_no_aer(pfd_p); 1226 sts_flags |= pf_analyse_error_tbl(derr, impl, 1227 pfd_p, pcie_sw_tbl, PF_MASKED_AER_ERR(pfd_p)); 1228 break; 1229 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: 1230 if (PCIE_DEVSTS_CE_DETECTED & 1231 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1232 sts_flags |= PF_ERR_CE; 1233 1234 pf_adjust_for_no_aer(pfd_p); 1235 pf_adjust_for_no_saer(pfd_p); 1236 sts_flags |= pf_analyse_error_tbl(derr, 1237 impl, pfd_p, pcie_pcie_tbl, 1238 PF_MASKED_AER_ERR(pfd_p)); 1239 sts_flags |= pf_analyse_error_tbl(derr, 1240 impl, pfd_p, pcie_pcie_bdg_tbl, 1241 PF_MASKED_SAER_ERR(pfd_p)); 1242 /* 1243 * Some non-compliant PCIe devices do not utilize PCIe 1244 * error registers. So fallthrough and rely on legacy 1245 * PCI error registers. 1246 */ 1247 if ((PCIE_DEVSTS_NFE_DETECTED | PCIE_DEVSTS_FE_DETECTED) 1248 & PCIE_ERR_REG(pfd_p)->pcie_err_status) 1249 break; 1250 /* FALLTHROUGH */ 1251 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO: 1252 sts_flags |= pf_analyse_error_tbl(derr, impl, 1253 pfd_p, pcie_pci_tbl, 1254 PCI_ERR_REG(pfd_p)->pci_err_status); 1255 1256 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) 1257 break; 1258 1259 sts_flags |= pf_analyse_error_tbl(derr, 1260 impl, pfd_p, pcie_pci_bdg_tbl, 1261 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat); 1262 } 1263 1264 pfd_p->pe_severity_flags = sts_flags; 1265 error_flags |= pfd_p->pe_severity_flags; 1266 } 1267 1268 return (error_flags); 1269 } 1270 1271 static int 1272 pf_analyse_error_tbl(ddi_fm_error_t *derr, pf_impl_t *impl, 1273 pf_data_t *pfd_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg) { 1274 const pf_fab_err_tbl_t *row; 1275 int err = 0; 1276 1277 for (row = tbl; err_reg && (row->bit != NULL) && !(err & PF_ERR_PANIC); 1278 row++) { 1279 if (err_reg & row->bit) 1280 err |= row->handler(derr, row->bit, impl->pf_dq_head_p, 1281 pfd_p); 1282 } 1283 1284 if (!err) 1285 err = PF_ERR_NO_ERROR; 1286 1287 return (err); 1288 } 1289 1290 /* 1291 * PCIe Completer Abort and Unsupport Request error analyser. If a PCIe device 1292 * issues a CA/UR a corresponding Received CA/UR should have been seen in the 1293 * PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so then 1294 * this error may be safely ignored. If not check the logs and see if an 1295 * associated handler for this transaction can be found. 1296 */ 1297 /* ARGSUSED */ 1298 static int 1299 pf_analyse_ca_ur(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1300 pf_data_t *pfd_p) 1301 { 1302 uint32_t abort_type; 1303 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1304 1305 /* If UR's are masked forgive this error */ 1306 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1307 (bit == PCIE_AER_UCE_UR)) 1308 return (PF_ERR_NO_PANIC); 1309 1310 /* 1311 * If a RP has an CA/UR it means a leaf sent a bad request to the RP 1312 * such as a config read or a bad DMA address. 1313 */ 1314 if (PCIE_IS_RP(PCIE_PFD2BUS(pfd_p))) 1315 goto handle_lookup; 1316 1317 if (bit == PCIE_AER_UCE_UR) 1318 abort_type = PCI_STAT_R_MAST_AB; 1319 else 1320 abort_type = PCI_STAT_R_TARG_AB; 1321 1322 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type)) 1323 return (PF_ERR_MATCHED_RC); 1324 1325 handle_lookup: 1326 if (HAS_AER_LOGS(pfd_p, bit) && 1327 pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == PF_HDL_FOUND) 1328 return (PF_ERR_MATCHED_DEVICE); 1329 1330 return (PF_ERR_PANIC); 1331 } 1332 1333 /* 1334 * PCIe-PCI Bridge Received Master Abort and Target error analyser. If a PCIe 1335 * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in 1336 * the PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so 1337 * then this error may be safely ignored. If not check the logs and see if an 1338 * associated handler for this transaction can be found. 1339 */ 1340 /* ARGSUSED */ 1341 static int 1342 pf_analyse_ma_ta(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1343 pf_data_t *pfd_p) 1344 { 1345 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1346 uint32_t abort_type; 1347 1348 /* If UR's are masked forgive this error */ 1349 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1350 (bit == PCIE_AER_SUCE_RCVD_MA)) 1351 return (PF_ERR_NO_PANIC); 1352 1353 if (bit == PCIE_AER_SUCE_RCVD_MA) 1354 abort_type = PCI_STAT_R_MAST_AB; 1355 else 1356 abort_type = PCI_STAT_R_TARG_AB; 1357 1358 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type)) 1359 return (PF_ERR_MATCHED_RC); 1360 1361 if (!HAS_SAER_LOGS(pfd_p, bit)) 1362 return (PF_ERR_PANIC); 1363 1364 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND) 1365 return (PF_ERR_MATCHED_DEVICE); 1366 1367 return (PF_ERR_PANIC); 1368 } 1369 1370 /* 1371 * Generic PCI error analyser. This function is used for Parity Errors, 1372 * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts. 1373 * In general PCI devices do not have error logs, it is very difficult to figure 1374 * out what transaction caused the error. Instead find the nearest PCIe-PCI 1375 * Bridge and check to see if it has logs and if it has an error associated with 1376 * this PCI Device. 1377 */ 1378 /* ARGSUSED */ 1379 static int 1380 pf_analyse_pci(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1381 pf_data_t *pfd_p) 1382 { 1383 pf_data_t *parent_pfd_p; 1384 uint16_t cmd; 1385 uint32_t aer_ue_status; 1386 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 1387 pf_pcie_adv_bdg_err_regs_t *parent_saer_p; 1388 1389 if (PCI_ERR_REG(pfd_p)->pci_err_status & PCI_STAT_S_SYSERR) 1390 return (PF_ERR_PANIC); 1391 1392 /* If UR's are masked forgive this error */ 1393 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1394 (bit == PCI_STAT_R_MAST_AB)) 1395 return (PF_ERR_NO_PANIC); 1396 1397 1398 if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) { 1399 aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT; 1400 } else { 1401 aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC | 1402 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | 1403 PCIE_AER_SUCE_RCVD_MA); 1404 } 1405 1406 parent_pfd_p = pf_get_parent_pcie_bridge(pfd_p); 1407 if (parent_pfd_p == NULL) 1408 return (PF_ERR_PANIC); 1409 1410 /* Check if parent bridge has seen this error */ 1411 parent_saer_p = PCIE_ADV_BDG_REG(parent_pfd_p); 1412 if (!(parent_saer_p->pcie_sue_status & aer_ue_status) || 1413 !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status)) 1414 return (PF_ERR_PANIC); 1415 1416 /* 1417 * If the addr or bdf from the parent PCIe bridge logs belong to this 1418 * PCI device, assume the PCIe bridge's error handling has already taken 1419 * care of this PCI device's error. 1420 */ 1421 if (pf_pci_decode(parent_pfd_p, &cmd) != DDI_SUCCESS) 1422 return (PF_ERR_PANIC); 1423 1424 if ((parent_saer_p->pcie_sue_tgt_bdf == bus_p->bus_bdf) || 1425 pf_in_addr_range(bus_p, parent_saer_p->pcie_sue_tgt_addr)) 1426 return (PF_ERR_MATCHED_PARENT); 1427 1428 /* 1429 * If this device is a PCI-PCI bridge, check if the bdf in the parent 1430 * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges. 1431 * If they are, then assume the PCIe bridge's error handling has already 1432 * taken care of this PCI-PCI bridge device's error. 1433 */ 1434 if (PCIE_IS_BDG(bus_p) && 1435 pf_in_bus_range(bus_p, parent_saer_p->pcie_sue_tgt_bdf)) 1436 return (PF_ERR_MATCHED_PARENT); 1437 1438 return (PF_ERR_PANIC); 1439 } 1440 1441 /* 1442 * PCIe Bridge transactions associated with PERR. 1443 * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe 1444 * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe 1445 * o Bridge received a poisoned Completion on a Split Transction from PCIe 1446 * o Bridge received a poisoned Completion on a Delayed Transction from PCIe 1447 * 1448 * Check for non-poisoned PCIe transactions that got forwarded to the secondary 1449 * side and detects a PERR#. Except for delayed read completions, a poisoned 1450 * TLP will be forwarded to the secondary bus and PERR# will be asserted. 1451 */ 1452 /* ARGSUSED */ 1453 static int 1454 pf_analyse_perr_assert(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1455 pf_data_t *pfd_p) 1456 { 1457 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1458 uint16_t cmd; 1459 int hdl_sts = PF_HDL_NOTFOUND; 1460 int err = PF_ERR_NO_ERROR; 1461 pf_pcie_adv_bdg_err_regs_t *saer_p; 1462 1463 1464 if (HAS_SAER_LOGS(pfd_p, bit)) { 1465 saer_p = PCIE_ADV_BDG_REG(pfd_p); 1466 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS) 1467 return (PF_ERR_PANIC); 1468 1469 cmd_switch: 1470 switch (cmd) { 1471 case PCI_PCIX_CMD_IOWR: 1472 case PCI_PCIX_CMD_MEMWR: 1473 case PCI_PCIX_CMD_MEMWR_BL: 1474 case PCI_PCIX_CMD_MEMWRBL: 1475 /* Posted Writes Transactions */ 1476 if (saer_p->pcie_sue_tgt_trans == PF_ADDR_PIO) 1477 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1478 B_FALSE); 1479 break; 1480 case PCI_PCIX_CMD_CFWR: 1481 /* 1482 * Check to see if it is a non-posted write. If so, a 1483 * UR Completion would have been sent. 1484 */ 1485 if (pf_matched_in_rc(dq_head_p, pfd_p, 1486 PCI_STAT_R_MAST_AB)) { 1487 hdl_sts = PF_HDL_FOUND; 1488 err = PF_ERR_MATCHED_RC; 1489 goto done; 1490 } 1491 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1492 B_FALSE); 1493 break; 1494 case PCI_PCIX_CMD_SPL: 1495 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1496 B_FALSE); 1497 break; 1498 case PCI_PCIX_CMD_DADR: 1499 cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >> 1500 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) & 1501 PCIE_AER_SUCE_HDR_CMD_UP_MASK; 1502 if (cmd != PCI_PCIX_CMD_DADR) 1503 goto cmd_switch; 1504 /* FALLTHROUGH */ 1505 default: 1506 /* Unexpected situation, panic */ 1507 hdl_sts = PF_HDL_NOTFOUND; 1508 } 1509 1510 if (hdl_sts == PF_HDL_FOUND) 1511 err = PF_ERR_MATCHED_DEVICE; 1512 else 1513 err = PF_ERR_PANIC; 1514 } else { 1515 /* 1516 * Check to see if it is a non-posted write. If so, a UR 1517 * Completion would have been sent. 1518 */ 1519 if ((PCIE_ERR_REG(pfd_p)->pcie_err_status & 1520 PCIE_DEVSTS_UR_DETECTED) && 1521 pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_R_MAST_AB)) 1522 err = PF_ERR_MATCHED_RC; 1523 1524 /* Check for posted writes. Transaction is lost. */ 1525 if (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & 1526 PCI_STAT_S_PERROR) 1527 err = PF_ERR_PANIC; 1528 1529 /* 1530 * All other scenarios are due to read completions. Check for 1531 * PERR on the primary side. If found the primary side error 1532 * handling will take care of this error. 1533 */ 1534 if (err == PF_ERR_NO_ERROR) { 1535 if (PCI_ERR_REG(pfd_p)->pci_err_status & 1536 PCI_STAT_PERROR) 1537 err = PF_ERR_MATCHED_PARENT; 1538 else 1539 err = PF_ERR_PANIC; 1540 } 1541 } 1542 1543 done: 1544 return (err); 1545 } 1546 1547 /* 1548 * PCIe Poisoned TLP error analyser. If a PCIe device receives a Poisoned TLP, 1549 * check the logs and see if an associated handler for this transaction can be 1550 * found. 1551 */ 1552 /* ARGSUSED */ 1553 static int 1554 pf_analyse_ptlp(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1555 pf_data_t *pfd_p) 1556 { 1557 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1558 1559 /* 1560 * If AERs are supported find the logs in this device, otherwise look in 1561 * it's parent's logs. 1562 */ 1563 if (HAS_AER_LOGS(pfd_p, bit)) { 1564 pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&PCIE_ADV_HDR(pfd_p, 0); 1565 1566 /* 1567 * Double check that the log contains a poisoned TLP. 1568 * Some devices like PLX switch do not log poison TLP headers. 1569 */ 1570 if (hdr->ep) { 1571 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == 1572 PF_HDL_FOUND) 1573 return (PF_ERR_MATCHED_DEVICE); 1574 } 1575 1576 /* 1577 * If an address is found and hdl lookup failed panic. 1578 * Otherwise check parents to see if there was enough 1579 * information recover. 1580 */ 1581 if (PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr) 1582 return (PF_ERR_PANIC); 1583 } 1584 1585 /* 1586 * Check to see if the rc has already handled this error or a parent has 1587 * already handled this error. 1588 * 1589 * If the error info in the RC wasn't enough to find the fault device, 1590 * such as if the faulting device lies behind a PCIe-PCI bridge from a 1591 * poisoned completion, check to see if the PCIe-PCI bridge has enough 1592 * info to recover. For completion TLP's, the AER header logs only 1593 * contain the faulting BDF in the Root Port. For PCIe device the fault 1594 * BDF is the fault device. But if the fault device is behind a 1595 * PCIe-PCI bridge the fault BDF could turn out just to be a PCIe-PCI 1596 * bridge's secondary bus number. 1597 */ 1598 if (!PFD_IS_ROOT(pfd_p)) { 1599 dev_info_t *pdip = ddi_get_parent(PCIE_PFD2DIP(pfd_p)); 1600 pf_data_t *parent_pfd_p; 1601 1602 if (PCIE_PFD2BUS(pfd_p)->bus_rp_dip == pdip) { 1603 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR)) 1604 return (PF_ERR_MATCHED_RC); 1605 } 1606 1607 parent_pfd_p = PCIE_DIP2PFD(pdip); 1608 1609 if (HAS_AER_LOGS(parent_pfd_p, bit)) 1610 return (PF_ERR_MATCHED_PARENT); 1611 } else { 1612 pf_data_t *bdg_pfd_p; 1613 pcie_req_id_t secbus; 1614 1615 /* 1616 * Looking for a pcie bridge only makes sense if the BDF 1617 * Dev/Func = 0/0 1618 */ 1619 if (!PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 1620 goto done; 1621 1622 secbus = PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf; 1623 1624 if (!PCIE_CHECK_VALID_BDF(secbus) || (secbus & 0xFF)) 1625 goto done; 1626 1627 bdg_pfd_p = pf_get_pcie_bridge(pfd_p, secbus); 1628 1629 if (bdg_pfd_p && HAS_SAER_LOGS(bdg_pfd_p, 1630 PCIE_AER_SUCE_PERR_ASSERT)) { 1631 return pf_analyse_perr_assert(derr, 1632 PCIE_AER_SUCE_PERR_ASSERT, dq_head_p, pfd_p); 1633 } 1634 } 1635 done: 1636 return (PF_ERR_PANIC); 1637 } 1638 1639 /* 1640 * PCIe-PCI Bridge Received Master and Target abort error analyser on Split 1641 * Completions. If a PCIe Bridge receives a MA/TA check logs and see if an 1642 * associated handler for this transaction can be found. 1643 */ 1644 /* ARGSUSED */ 1645 static int 1646 pf_analyse_sc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1647 pf_data_t *pfd_p) 1648 { 1649 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1650 uint16_t cmd; 1651 int sts = PF_HDL_NOTFOUND; 1652 1653 if (!HAS_SAER_LOGS(pfd_p, bit)) 1654 return (PF_ERR_PANIC); 1655 1656 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS) 1657 return (PF_ERR_PANIC); 1658 1659 if (cmd == PCI_PCIX_CMD_SPL) 1660 sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE); 1661 1662 if (sts == PF_HDL_FOUND) 1663 return (PF_ERR_MATCHED_DEVICE); 1664 1665 return (PF_ERR_PANIC); 1666 } 1667 1668 /* 1669 * PCIe Timeout error analyser. This error can be forgiven if it is marked as 1670 * CE Advisory. If it is marked as advisory, this means the HW can recover 1671 * and/or retry the transaction automatically. 1672 */ 1673 /* ARGSUSED */ 1674 static int 1675 pf_analyse_to(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1676 pf_data_t *pfd_p) 1677 { 1678 if (HAS_AER_LOGS(pfd_p, bit) && CE_ADVISORY(pfd_p)) 1679 return (PF_ERR_NO_PANIC); 1680 1681 return (PF_ERR_PANIC); 1682 } 1683 1684 /* 1685 * PCIe Unexpected Completion. Check to see if this TLP was misrouted by 1686 * matching the device BDF with the TLP Log. If misrouting panic, otherwise 1687 * don't panic. 1688 */ 1689 /* ARGSUSED */ 1690 static int 1691 pf_analyse_uc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1692 pf_data_t *pfd_p) 1693 { 1694 if (HAS_AER_LOGS(pfd_p, bit) && 1695 (PCIE_PFD2BUS(pfd_p)->bus_bdf == (PCIE_ADV_HDR(pfd_p, 2) >> 16))) 1696 return (PF_ERR_NO_PANIC); 1697 1698 return (PF_ERR_PANIC); 1699 } 1700 1701 /* 1702 * PCIe-PCI Bridge Uncorrectable Data error analyser. All Uncorrectable Data 1703 * errors should have resulted in a PCIe Poisoned TLP to the RC, except for 1704 * Posted Writes. Check the logs for Posted Writes and if the RC did not see a 1705 * Poisoned TLP. 1706 * 1707 * Non-Posted Writes will also generate a UR in the completion status, which the 1708 * RC should also see. 1709 */ 1710 /* ARGSUSED */ 1711 static int 1712 pf_analyse_uc_data(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1713 pf_data_t *pfd_p) 1714 { 1715 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1716 1717 if (!HAS_SAER_LOGS(pfd_p, bit)) 1718 return (PF_ERR_PANIC); 1719 1720 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR)) 1721 return (PF_ERR_MATCHED_RC); 1722 1723 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND) 1724 return (PF_ERR_MATCHED_DEVICE); 1725 1726 return (PF_ERR_PANIC); 1727 } 1728 1729 /* ARGSUSED */ 1730 static int 1731 pf_no_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1732 pf_data_t *pfd_p) 1733 { 1734 return (PF_ERR_NO_PANIC); 1735 } 1736 1737 /* ARGSUSED */ 1738 static int 1739 pf_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1740 pf_data_t *pfd_p) 1741 { 1742 return (PF_ERR_PANIC); 1743 } 1744 1745 /* 1746 * If a PCIe device does not support AER, assume all AER statuses have been set, 1747 * unless other registers do not indicate a certain error occuring. 1748 */ 1749 static void 1750 pf_adjust_for_no_aer(pf_data_t *pfd_p) 1751 { 1752 uint32_t aer_ue = 0; 1753 uint16_t status; 1754 1755 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 1756 return; 1757 1758 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED) 1759 aer_ue = PF_AER_FATAL_ERR; 1760 1761 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) { 1762 aer_ue = PF_AER_NON_FATAL_ERR; 1763 status = PCI_ERR_REG(pfd_p)->pci_err_status; 1764 1765 /* Check if the device received a PTLP */ 1766 if (!(status & PCI_STAT_PERROR)) 1767 aer_ue &= ~PCIE_AER_UCE_PTLP; 1768 1769 /* Check if the device signaled a CA */ 1770 if (!(status & PCI_STAT_S_TARG_AB)) 1771 aer_ue &= ~PCIE_AER_UCE_CA; 1772 1773 /* Check if the device sent a UR */ 1774 if (!(PCIE_ERR_REG(pfd_p)->pcie_err_status & 1775 PCIE_DEVSTS_UR_DETECTED)) 1776 aer_ue &= ~PCIE_AER_UCE_UR; 1777 1778 /* 1779 * Ignore ECRCs as it is optional and will manefest itself as 1780 * another error like PTLP and MFP 1781 */ 1782 aer_ue &= ~PCIE_AER_UCE_ECRC; 1783 1784 /* 1785 * Generally if NFE is set, SERR should also be set. Exception: 1786 * When certain non-fatal errors are masked, and some of them 1787 * happened to be the cause of the NFE, SERR will not be set and 1788 * they can not be the source of this interrupt. 1789 * 1790 * On x86, URs are masked (NFE + UR can be set), if any other 1791 * non-fatal errors (i.e, PTLP, CTO, CA, UC, ECRC, ACS) did 1792 * occur, SERR should be set since they are not masked. So if 1793 * SERR is not set, none of them occurred. 1794 */ 1795 if (!(status & PCI_STAT_S_SYSERR)) 1796 aer_ue &= ~PCIE_AER_UCE_TO; 1797 } 1798 1799 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) { 1800 aer_ue &= ~PCIE_AER_UCE_TRAINING; 1801 aer_ue &= ~PCIE_AER_UCE_SD; 1802 } 1803 1804 PCIE_ADV_REG(pfd_p)->pcie_ue_status = aer_ue; 1805 } 1806 1807 static void 1808 pf_adjust_for_no_saer(pf_data_t *pfd_p) 1809 { 1810 uint32_t s_aer_ue = 0; 1811 uint16_t status; 1812 1813 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 1814 return; 1815 1816 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED) 1817 s_aer_ue = PF_SAER_FATAL_ERR; 1818 1819 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) { 1820 s_aer_ue = PF_SAER_NON_FATAL_ERR; 1821 status = PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat; 1822 1823 /* Check if the device received a UC_DATA */ 1824 if (!(status & PCI_STAT_PERROR)) 1825 s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR; 1826 1827 /* Check if the device received a RCVD_MA/MA_ON_SC */ 1828 if (!(status & (PCI_STAT_R_MAST_AB))) { 1829 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA; 1830 s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC; 1831 } 1832 1833 /* Check if the device received a RCVD_TA/TA_ON_SC */ 1834 if (!(status & (PCI_STAT_R_TARG_AB))) { 1835 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA; 1836 s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC; 1837 } 1838 } 1839 1840 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status = s_aer_ue; 1841 } 1842 1843 /* Find the PCIe-PCI bridge based on secondary bus number */ 1844 static pf_data_t * 1845 pf_get_pcie_bridge(pf_data_t *pfd_p, pcie_req_id_t secbus) 1846 { 1847 pf_data_t *bdg_pfd_p; 1848 1849 /* Search down for the PCIe-PCI device. */ 1850 for (bdg_pfd_p = pfd_p->pe_next; bdg_pfd_p; 1851 bdg_pfd_p = bdg_pfd_p->pe_next) { 1852 if (PCIE_IS_PCIE_BDG(PCIE_PFD2BUS(bdg_pfd_p)) && 1853 PCIE_PFD2BUS(bdg_pfd_p)->bus_bdg_secbus == secbus) 1854 return (bdg_pfd_p); 1855 } 1856 1857 return (NULL); 1858 } 1859 1860 /* Find the PCIe-PCI bridge of a PCI device */ 1861 static pf_data_t * 1862 pf_get_parent_pcie_bridge(pf_data_t *pfd_p) 1863 { 1864 dev_info_t *dip, *rp_dip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1865 1866 /* This only makes sense if the device is a PCI device */ 1867 if (!PCIE_IS_PCI(PCIE_PFD2BUS(pfd_p))) 1868 return (NULL); 1869 1870 /* 1871 * Search up for the PCIe-PCI device. Watchout for x86 where pci 1872 * devices hang directly off of NPE. 1873 */ 1874 for (dip = PCIE_PFD2DIP(pfd_p); dip; dip = ddi_get_parent(dip)) { 1875 if (dip == rp_dip) 1876 dip = NULL; 1877 1878 if (PCIE_IS_PCIE_BDG(PCIE_DIP2BUS(dip))) 1879 return (PCIE_DIP2PFD(dip)); 1880 } 1881 1882 return (NULL); 1883 } 1884 1885 /* 1886 * See if a leaf error was bubbled up to the Root Complex (RC) and handled. 1887 * As of right now only RC's have enough information to have errors found in the 1888 * fabric to be matched to the RC. Note that Root Port's (RP) do not carry 1889 * enough information. Currently known RC's are SPARC Fire architecture and 1890 * it's equivalents, and x86's NPE. 1891 * SPARC Fire architectures have a plethora of error registers, while currently 1892 * NPE only have the address of a failed load. 1893 * 1894 * Check if the RC logged an error with the appropriate status type/abort type. 1895 * Ex: Parity Error, Received Master/Target Abort 1896 * Check if either the fault address found in the rc matches the device's 1897 * assigned address range (PIO's only) or the fault BDF in the rc matches the 1898 * device's BDF or Secondary Bus/Bus Range. 1899 */ 1900 static boolean_t 1901 pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p, 1902 uint32_t abort_type) 1903 { 1904 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 1905 pf_data_t *rc_pfd_p; 1906 pcie_req_id_t fault_bdf; 1907 1908 for (rc_pfd_p = dq_head_p; PFD_IS_ROOT(rc_pfd_p); 1909 rc_pfd_p = rc_pfd_p->pe_next) { 1910 /* Only root complex's have enough information to match */ 1911 if (!PCIE_IS_RC(PCIE_PFD2BUS(rc_pfd_p))) 1912 continue; 1913 1914 /* If device and rc abort type does not match continue */ 1915 if (!(PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & abort_type)) 1916 continue; 1917 1918 fault_bdf = PCIE_ROOT_FAULT(rc_pfd_p)->scan_bdf; 1919 1920 /* The Fault BDF = Device's BDF */ 1921 if (fault_bdf == bus_p->bus_bdf) 1922 return (B_TRUE); 1923 1924 /* The Fault Addr is in device's address range */ 1925 if (pf_in_addr_range(bus_p, 1926 PCIE_ROOT_FAULT(rc_pfd_p)->scan_addr)) 1927 return (B_TRUE); 1928 1929 /* The Fault BDF is from PCIe-PCI Bridge's secondary bus */ 1930 if (PCIE_IS_PCIE_BDG(bus_p) && 1931 pf_in_bus_range(bus_p, fault_bdf)) 1932 return (B_TRUE); 1933 } 1934 1935 return (B_FALSE); 1936 } 1937 1938 /* 1939 * Check the RP and see if the error is PIO/DMA. If the RP also has a PERR then 1940 * it is a DMA, otherwise it's a PIO 1941 */ 1942 static void 1943 pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type, 1944 pcie_req_id_t *bdf) { 1945 pf_data_t *rc_pfd_p; 1946 1947 /* Could be DMA or PIO. Find out by look at error type. */ 1948 switch (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status) { 1949 case PCIE_AER_SUCE_TA_ON_SC: 1950 case PCIE_AER_SUCE_MA_ON_SC: 1951 *trans_type = PF_ADDR_DMA; 1952 return; 1953 case PCIE_AER_SUCE_RCVD_TA: 1954 case PCIE_AER_SUCE_RCVD_MA: 1955 *bdf = PCIE_INVALID_BDF; 1956 *trans_type = PF_ADDR_PIO; 1957 return; 1958 case PCIE_AER_SUCE_USC_ERR: 1959 case PCIE_AER_SUCE_UC_DATA_ERR: 1960 case PCIE_AER_SUCE_PERR_ASSERT: 1961 break; 1962 default: 1963 *addr = 0; 1964 *bdf = PCIE_INVALID_BDF; 1965 *trans_type = 0; 1966 return; 1967 } 1968 1969 *bdf = PCIE_INVALID_BDF; 1970 *trans_type = PF_ADDR_PIO; 1971 for (rc_pfd_p = pfd_p->pe_prev; rc_pfd_p; 1972 rc_pfd_p = rc_pfd_p->pe_prev) { 1973 if (PFD_IS_ROOT(rc_pfd_p) && 1974 (PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & 1975 PCI_STAT_PERROR)) { 1976 *trans_type = PF_ADDR_DMA; 1977 return; 1978 } 1979 } 1980 } 1981 1982 /* 1983 * pf_pci_decode function decodes the secondary aer transaction logs in 1984 * PCIe-PCI bridges. 1985 * 1986 * The log is 128 bits long and arranged in this manner. 1987 * [0:35] Transaction Attribute (s_aer_h0-saer_h1) 1988 * [36:39] Transaction lower command (saer_h1) 1989 * [40:43] Transaction upper command (saer_h1) 1990 * [44:63] Reserved 1991 * [64:127] Address (saer_h2-saer_h3) 1992 */ 1993 /* ARGSUSED */ 1994 static int 1995 pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd) { 1996 pcix_attr_t *attr; 1997 uint64_t addr; 1998 uint32_t trans_type; 1999 pcie_req_id_t bdf = PCIE_INVALID_BDF; 2000 2001 attr = (pcix_attr_t *)&PCIE_ADV_BDG_HDR(pfd_p, 0); 2002 *cmd = GET_SAER_CMD(pfd_p); 2003 2004 cmd_switch: 2005 switch (*cmd) { 2006 case PCI_PCIX_CMD_IORD: 2007 case PCI_PCIX_CMD_IOWR: 2008 /* IO Access should always be down stream */ 2009 addr = PCIE_ADV_BDG_HDR(pfd_p, 2); 2010 bdf = attr->rid; 2011 trans_type = PF_ADDR_PIO; 2012 break; 2013 case PCI_PCIX_CMD_MEMRD_DW: 2014 case PCI_PCIX_CMD_MEMRD_BL: 2015 case PCI_PCIX_CMD_MEMRDBL: 2016 case PCI_PCIX_CMD_MEMWR: 2017 case PCI_PCIX_CMD_MEMWR_BL: 2018 case PCI_PCIX_CMD_MEMWRBL: 2019 addr = ((uint64_t)PCIE_ADV_BDG_HDR(pfd_p, 3) << 2020 PCIE_AER_SUCE_HDR_ADDR_SHIFT) | PCIE_ADV_BDG_HDR(pfd_p, 2); 2021 bdf = attr->rid; 2022 2023 pf_pci_find_trans_type(pfd_p, &addr, &trans_type, &bdf); 2024 break; 2025 case PCI_PCIX_CMD_CFRD: 2026 case PCI_PCIX_CMD_CFWR: 2027 /* 2028 * CFG Access should always be down stream. Match the BDF in 2029 * the address phase. 2030 */ 2031 addr = 0; 2032 bdf = attr->rid; 2033 trans_type = PF_ADDR_CFG; 2034 break; 2035 case PCI_PCIX_CMD_SPL: 2036 /* 2037 * Check for DMA read completions. The requesting BDF is in the 2038 * Address phase. 2039 */ 2040 addr = 0; 2041 bdf = attr->rid; 2042 trans_type = PF_ADDR_DMA; 2043 break; 2044 case PCI_PCIX_CMD_DADR: 2045 /* 2046 * For Dual Address Cycles the transaction command is in the 2nd 2047 * address phase. 2048 */ 2049 *cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >> 2050 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) & 2051 PCIE_AER_SUCE_HDR_CMD_UP_MASK; 2052 if (*cmd != PCI_PCIX_CMD_DADR) 2053 goto cmd_switch; 2054 /* FALLTHROUGH */ 2055 default: 2056 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0; 2057 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = PCIE_INVALID_BDF; 2058 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0; 2059 return (DDI_FAILURE); 2060 } 2061 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = trans_type; 2062 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = bdf; 2063 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = addr; 2064 return (DDI_SUCCESS); 2065 } 2066 2067 /* 2068 * Based on either the BDF/ADDR find and mark the faulting DMA/ACC handler. 2069 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND. 2070 */ 2071 int 2072 pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint64_t addr, 2073 pcie_req_id_t bdf) 2074 { 2075 ddi_fm_error_t derr; 2076 2077 /* If we don't know the addr or rid just return with NOTFOUND */ 2078 if ((addr == NULL) && !PCIE_CHECK_VALID_BDF(bdf)) 2079 return (PF_HDL_NOTFOUND); 2080 2081 if (!(flag & (PF_ADDR_DMA | PF_ADDR_PIO | PF_ADDR_CFG))) { 2082 return (PF_HDL_NOTFOUND); 2083 } 2084 2085 bzero(&derr, sizeof (ddi_fm_error_t)); 2086 derr.fme_version = DDI_FME_VERSION; 2087 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 2088 derr.fme_ena = ena; 2089 2090 return (pf_hdl_child_lookup(dip, &derr, flag, addr, bdf)); 2091 } 2092 2093 static int 2094 pf_hdl_child_lookup(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, 2095 uint64_t addr, pcie_req_id_t bdf) 2096 { 2097 int status = PF_HDL_NOTFOUND; 2098 ndi_fmc_t *fcp = NULL; 2099 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2100 pcie_req_id_t dip_bdf; 2101 boolean_t have_lock = B_FALSE; 2102 pcie_bus_t *bus_p; 2103 dev_info_t *cdip; 2104 2105 if (!(bus_p = pf_is_ready(dip))) { 2106 return (status); 2107 } 2108 2109 ASSERT(fmhdl); 2110 if (!i_ddi_fm_handler_owned(dip)) { 2111 /* 2112 * pf_handler_enter always returns SUCCESS if the 'impl' arg is 2113 * NULL. 2114 */ 2115 (void) pf_handler_enter(dip, NULL); 2116 have_lock = B_TRUE; 2117 } 2118 2119 dip_bdf = PCI_GET_BDF(dip); 2120 2121 /* Check if dip and BDF match, if not recurse to it's children. */ 2122 if (!PCIE_IS_RC(bus_p) && (!PCIE_CHECK_VALID_BDF(bdf) || 2123 dip_bdf == bdf)) { 2124 if ((flag & PF_ADDR_DMA) && DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap)) 2125 fcp = fmhdl->fh_dma_cache; 2126 else 2127 fcp = NULL; 2128 2129 if (fcp) 2130 status = pf_hdl_compare(dip, derr, DMA_HANDLE, addr, 2131 bdf, fcp); 2132 2133 2134 if (((flag & PF_ADDR_PIO) || (flag & PF_ADDR_CFG)) && 2135 DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap)) 2136 fcp = fmhdl->fh_acc_cache; 2137 else 2138 fcp = NULL; 2139 2140 if (fcp) 2141 status = pf_hdl_compare(dip, derr, ACC_HANDLE, addr, 2142 bdf, fcp); 2143 } 2144 2145 /* If we found the handler or know it's this device, we're done */ 2146 if (!PCIE_IS_RC(bus_p) && ((dip_bdf == bdf) || 2147 (status == PF_HDL_FOUND))) 2148 goto done; 2149 2150 /* 2151 * If the current devuce us a PCIe-PCI bridge need to check for special 2152 * cases: 2153 * 2154 * If it is a PIO and we don't have an address or this is a DMA, check 2155 * to see if the BDF = secondary bus. If so stop. The BDF isn't a real 2156 * BDF and the fault device could have come from any device in the PCI 2157 * bus. 2158 */ 2159 if (PCIE_IS_PCIE_BDG(bus_p) && 2160 ((flag & PF_ADDR_DMA || flag & PF_ADDR_PIO)) && 2161 ((bus_p->bus_bdg_secbus << PCIE_REQ_ID_BUS_SHIFT) == bdf)) 2162 goto done; 2163 2164 2165 /* If we can't find the handler check it's children */ 2166 for (cdip = ddi_get_child(dip); cdip; 2167 cdip = ddi_get_next_sibling(cdip)) { 2168 if ((bus_p = PCIE_DIP2BUS(cdip)) == NULL) 2169 continue; 2170 2171 if (pf_in_bus_range(bus_p, bdf) || 2172 pf_in_addr_range(bus_p, addr)) 2173 status = pf_hdl_child_lookup(cdip, derr, flag, addr, 2174 bdf); 2175 2176 if (status == PF_HDL_FOUND) 2177 goto done; 2178 } 2179 2180 done: 2181 if (have_lock == B_TRUE) 2182 pf_handler_exit(dip); 2183 2184 return (status); 2185 } 2186 2187 static int 2188 pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, 2189 uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp) { 2190 ndi_fmcentry_t *fep; 2191 int found = 0; 2192 int status; 2193 2194 mutex_enter(&fcp->fc_lock); 2195 for (fep = fcp->fc_head; fep != NULL; fep = fep->fce_next) { 2196 ddi_fmcompare_t compare_func; 2197 2198 /* 2199 * Compare captured error state with handle 2200 * resources. During the comparison and 2201 * subsequent error handling, we block 2202 * attempts to free the cache entry. 2203 */ 2204 compare_func = (flag == ACC_HANDLE) ? 2205 i_ddi_fm_acc_err_cf_get((ddi_acc_handle_t) 2206 fep->fce_resource) : 2207 i_ddi_fm_dma_err_cf_get((ddi_dma_handle_t) 2208 fep->fce_resource); 2209 2210 status = compare_func(dip, fep->fce_resource, 2211 (void *)&addr, (void *)&bdf); 2212 2213 if (status == DDI_FM_NONFATAL) { 2214 found++; 2215 2216 /* Set the error for this resource handle */ 2217 if (flag == ACC_HANDLE) { 2218 ddi_acc_handle_t ap = fep->fce_resource; 2219 2220 i_ddi_fm_acc_err_set(ap, derr->fme_ena, status, 2221 DDI_FM_ERR_UNEXPECTED); 2222 ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION); 2223 derr->fme_acc_handle = ap; 2224 } else { 2225 ddi_dma_handle_t dp = fep->fce_resource; 2226 2227 i_ddi_fm_dma_err_set(dp, derr->fme_ena, status, 2228 DDI_FM_ERR_UNEXPECTED); 2229 ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION); 2230 derr->fme_dma_handle = dp; 2231 } 2232 } 2233 } 2234 mutex_exit(&fcp->fc_lock); 2235 2236 /* 2237 * If a handler isn't found and we know this is the right device mark 2238 * them all failed. 2239 */ 2240 if ((addr != NULL) && PCIE_CHECK_VALID_BDF(bdf) && (found == 0)) { 2241 status = pf_hdl_compare(dip, derr, flag, addr, bdf, fcp); 2242 if (status == PF_HDL_FOUND) 2243 found++; 2244 } 2245 2246 return ((found) ? PF_HDL_FOUND : PF_HDL_NOTFOUND); 2247 } 2248 2249 /* 2250 * Automatically decode AER header logs and does a handling look up based on the 2251 * AER header decoding. 2252 * 2253 * For this function only the Primary/Secondary AER Header Logs need to be valid 2254 * in the pfd (PCIe Fault Data) arg. 2255 * 2256 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND. 2257 */ 2258 static int 2259 pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p, 2260 boolean_t is_primary) 2261 { 2262 int lookup = PF_HDL_NOTFOUND; 2263 2264 if (is_primary) { 2265 pf_pcie_adv_err_regs_t *reg_p = PCIE_ADV_REG(pfd_p); 2266 if (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), reg_p) == DDI_SUCCESS) { 2267 lookup = pf_hdl_lookup(rpdip, derr->fme_ena, 2268 reg_p->pcie_ue_tgt_trans, 2269 reg_p->pcie_ue_tgt_addr, 2270 reg_p->pcie_ue_tgt_bdf); 2271 } 2272 } else { 2273 pf_pcie_adv_bdg_err_regs_t *reg_p = PCIE_ADV_BDG_REG(pfd_p); 2274 uint16_t cmd; 2275 if (pf_pci_decode(pfd_p, &cmd) == DDI_SUCCESS) { 2276 lookup = pf_hdl_lookup(rpdip, derr->fme_ena, 2277 reg_p->pcie_sue_tgt_trans, 2278 reg_p->pcie_sue_tgt_addr, 2279 reg_p->pcie_sue_tgt_bdf); 2280 } 2281 } 2282 2283 return (lookup); 2284 } 2285 2286 /* 2287 * Decodes the TLP and returns the BDF of the handler, address and transaction 2288 * type if known. 2289 * 2290 * Types of TLP logs seen in RC, and what to extract: 2291 * 2292 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR 2293 * Memory(PIO) - address, PF_PIO_ADDR 2294 * CFG - Should not occur and result in UR 2295 * Completion(DMA) - Requester BDF, PF_DMA_ADDR 2296 * Completion(PIO) - Requester BDF, PF_PIO_ADDR 2297 * 2298 * Types of TLP logs seen in SW/Leaf, and what to extract: 2299 * 2300 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR 2301 * Memory(PIO) - address, PF_PIO_ADDR 2302 * CFG - Destined BDF, address, PF_CFG_ADDR 2303 * Completion(DMA) - Requester BDF, PF_DMA_ADDR 2304 * Completion(PIO) - Requester BDF, PF_PIO_ADDR 2305 * 2306 * The adv_reg_p must be passed in separately for use with SPARC RPs. A 2307 * SPARC RP could have multiple AER header logs which cannot be directly 2308 * accessed via the bus_p. 2309 */ 2310 int 2311 pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p) { 2312 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr; 2313 pcie_req_id_t my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF; 2314 uint64_t flt_addr = 0; 2315 uint32_t flt_trans_type = 0; 2316 2317 adv_reg_p->pcie_ue_tgt_addr = 0; 2318 adv_reg_p->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 2319 adv_reg_p->pcie_ue_tgt_trans = 0; 2320 2321 my_bdf = bus_p->bus_bdf; 2322 switch (tlp_hdr->type) { 2323 case PCIE_TLP_TYPE_IO: 2324 case PCIE_TLP_TYPE_MEM: 2325 case PCIE_TLP_TYPE_MEMLK: 2326 /* Grab the 32/64bit fault address */ 2327 if (tlp_hdr->fmt & 0x1) { 2328 flt_addr = ((uint64_t)adv_reg_p->pcie_ue_hdr[2] << 32); 2329 flt_addr |= adv_reg_p->pcie_ue_hdr[3]; 2330 } else { 2331 flt_addr = adv_reg_p->pcie_ue_hdr[2]; 2332 } 2333 2334 tlp_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[1] >> 16); 2335 2336 /* 2337 * If the req bdf >= this.bdf, then it means the request is this 2338 * device or came from a device below it. Unless this device is 2339 * a PCIe root port then it means is a DMA, otherwise PIO. 2340 */ 2341 if ((tlp_bdf >= my_bdf) && !PCIE_IS_ROOT(bus_p)) { 2342 flt_trans_type = PF_ADDR_DMA; 2343 flt_bdf = tlp_bdf; 2344 } else if (PCIE_IS_ROOT(bus_p) && 2345 (PF_FIRST_AER_ERR(PCIE_AER_UCE_PTLP, adv_reg_p) || 2346 (PF_FIRST_AER_ERR(PCIE_AER_UCE_CA, adv_reg_p)))) { 2347 flt_trans_type = PF_ADDR_DMA; 2348 flt_bdf = tlp_bdf; 2349 } else { 2350 flt_trans_type = PF_ADDR_PIO; 2351 flt_bdf = PCIE_INVALID_BDF; 2352 } 2353 break; 2354 case PCIE_TLP_TYPE_CFG0: 2355 case PCIE_TLP_TYPE_CFG1: 2356 flt_addr = 0; 2357 flt_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[2] >> 16); 2358 flt_trans_type = PF_ADDR_CFG; 2359 break; 2360 case PCIE_TLP_TYPE_CPL: 2361 case PCIE_TLP_TYPE_CPLLK: 2362 { 2363 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)adv_reg_p->pcie_ue_hdr; 2364 2365 flt_addr = NULL; 2366 flt_bdf = cpl_tlp->rid; 2367 2368 /* 2369 * If the cpl bdf < this.bdf, then it means the request is this 2370 * device or came from a device below it. Unless this device is 2371 * a PCIe root port then it means is a DMA, otherwise PIO. 2372 */ 2373 if (cpl_tlp->rid > cpl_tlp->cid) { 2374 flt_trans_type = PF_ADDR_DMA; 2375 } else { 2376 flt_trans_type = PF_ADDR_PIO | PF_ADDR_CFG; 2377 } 2378 break; 2379 } 2380 default: 2381 return (DDI_FAILURE); 2382 } 2383 2384 adv_reg_p->pcie_ue_tgt_addr = flt_addr; 2385 adv_reg_p->pcie_ue_tgt_bdf = flt_bdf; 2386 adv_reg_p->pcie_ue_tgt_trans = flt_trans_type; 2387 2388 return (DDI_SUCCESS); 2389 } 2390 2391 #define PCIE_EREPORT DDI_IO_CLASS "." PCI_ERROR_SUBCLASS "." PCIEX_FABRIC 2392 static int 2393 pf_ereport_setup(dev_info_t *dip, uint64_t ena, nvlist_t **ereport, 2394 nvlist_t **detector, errorq_elem_t **eqep) 2395 { 2396 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2397 char device_path[MAXPATHLEN]; 2398 nv_alloc_t *nva; 2399 2400 *eqep = errorq_reserve(fmhdl->fh_errorq); 2401 if (*eqep == NULL) { 2402 atomic_add_64(&fmhdl->fh_kstat.fek_erpt_dropped.value.ui64, 1); 2403 return (DDI_FAILURE); 2404 } 2405 2406 *ereport = errorq_elem_nvl(fmhdl->fh_errorq, *eqep); 2407 nva = errorq_elem_nva(fmhdl->fh_errorq, *eqep); 2408 2409 ASSERT(*ereport); 2410 ASSERT(nva); 2411 2412 /* 2413 * Use the dev_path/devid for this device instance. 2414 */ 2415 *detector = fm_nvlist_create(nva); 2416 if (dip == ddi_root_node()) { 2417 device_path[0] = '/'; 2418 device_path[1] = '\0'; 2419 } else { 2420 (void) ddi_pathname(dip, device_path); 2421 } 2422 2423 fm_fmri_dev_set(*detector, FM_DEV_SCHEME_VERSION, NULL, 2424 device_path, NULL); 2425 2426 if (ena == 0) 2427 ena = fm_ena_generate(0, FM_ENA_FMT1); 2428 2429 fm_ereport_set(*ereport, 0, PCIE_EREPORT, ena, *detector, NULL); 2430 2431 return (DDI_SUCCESS); 2432 } 2433 2434 /* ARGSUSED */ 2435 static void 2436 pf_ereport_post(dev_info_t *dip, nvlist_t **ereport, nvlist_t **detector, 2437 errorq_elem_t **eqep) 2438 { 2439 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2440 2441 errorq_commit(fmhdl->fh_errorq, *eqep, ERRORQ_ASYNC); 2442 } 2443 2444 static void 2445 pf_send_ereport(ddi_fm_error_t *derr, pf_impl_t *impl) 2446 { 2447 nvlist_t *ereport; 2448 nvlist_t *detector; 2449 errorq_elem_t *eqep; 2450 pcie_bus_t *bus_p; 2451 pf_data_t *pfd_p; 2452 uint32_t total = impl->pf_total; 2453 2454 /* 2455 * Ereports need to be sent in a top down fashion. The fabric translator 2456 * expects the ereports from the Root first. This is needed to tell if 2457 * the system contains a PCIe complaint RC/RP. 2458 */ 2459 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 2460 bus_p = PCIE_PFD2BUS(pfd_p); 2461 pfd_p->pe_valid = B_FALSE; 2462 2463 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED || 2464 PFD_IS_RC(pfd_p) || 2465 !DDI_FM_EREPORT_CAP(ddi_fm_capable(PCIE_PFD2DIP(pfd_p)))) 2466 continue; 2467 2468 if (pf_ereport_setup(PCIE_BUS2DIP(bus_p), derr->fme_ena, 2469 &ereport, &detector, &eqep) != DDI_SUCCESS) 2470 continue; 2471 2472 /* Generic PCI device information */ 2473 fm_payload_set(ereport, 2474 "bdf", DATA_TYPE_UINT16, bus_p->bus_bdf, 2475 "device_id", DATA_TYPE_UINT16, 2476 (bus_p->bus_dev_ven_id >> 16), 2477 "vendor_id", DATA_TYPE_UINT16, 2478 (bus_p->bus_dev_ven_id & 0xFFFF), 2479 "rev_id", DATA_TYPE_UINT8, bus_p->bus_rev_id, 2480 "dev_type", DATA_TYPE_UINT16, bus_p->bus_dev_type, 2481 "pcie_off", DATA_TYPE_UINT16, bus_p->bus_pcie_off, 2482 "pcix_off", DATA_TYPE_UINT16, bus_p->bus_pcix_off, 2483 "aer_off", DATA_TYPE_UINT16, bus_p->bus_aer_off, 2484 "ecc_ver", DATA_TYPE_UINT16, bus_p->bus_ecc_ver, 2485 NULL); 2486 2487 /* PCI registers */ 2488 fm_payload_set(ereport, 2489 "pci_status", DATA_TYPE_UINT16, 2490 PCI_ERR_REG(pfd_p)->pci_err_status, 2491 "pci_command", DATA_TYPE_UINT16, 2492 PCI_ERR_REG(pfd_p)->pci_cfg_comm, 2493 NULL); 2494 2495 /* PCI bridge registers */ 2496 if (PCIE_IS_BDG(bus_p)) { 2497 fm_payload_set(ereport, 2498 "pci_bdg_sec_status", DATA_TYPE_UINT16, 2499 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat, 2500 "pci_bdg_ctrl", DATA_TYPE_UINT16, 2501 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_ctrl, 2502 NULL); 2503 } 2504 2505 /* PCIx registers */ 2506 if (PCIE_IS_PCIX(bus_p) && !PCIE_IS_BDG(bus_p)) { 2507 fm_payload_set(ereport, 2508 "pcix_status", DATA_TYPE_UINT32, 2509 PCIX_ERR_REG(pfd_p)->pcix_status, 2510 "pcix_command", DATA_TYPE_UINT16, 2511 PCIX_ERR_REG(pfd_p)->pcix_command, 2512 NULL); 2513 } 2514 2515 /* PCIx ECC Registers */ 2516 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 2517 pf_pcix_ecc_regs_t *ecc_bdg_reg; 2518 pf_pcix_ecc_regs_t *ecc_reg; 2519 2520 if (PCIE_IS_BDG(bus_p)) 2521 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 0); 2522 ecc_reg = PCIX_ECC_REG(pfd_p); 2523 fm_payload_set(ereport, 2524 "pcix_ecc_control_0", DATA_TYPE_UINT16, 2525 PCIE_IS_BDG(bus_p) ? 2526 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16) : 2527 (ecc_reg->pcix_ecc_ctlstat >> 16), 2528 "pcix_ecc_status_0", DATA_TYPE_UINT16, 2529 PCIE_IS_BDG(bus_p) ? 2530 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF) : 2531 (ecc_reg->pcix_ecc_ctlstat & 0xFFFF), 2532 "pcix_ecc_fst_addr_0", DATA_TYPE_UINT32, 2533 PCIE_IS_BDG(bus_p) ? 2534 ecc_bdg_reg->pcix_ecc_fstaddr : 2535 ecc_reg->pcix_ecc_fstaddr, 2536 "pcix_ecc_sec_addr_0", DATA_TYPE_UINT32, 2537 PCIE_IS_BDG(bus_p) ? 2538 ecc_bdg_reg->pcix_ecc_secaddr : 2539 ecc_reg->pcix_ecc_secaddr, 2540 "pcix_ecc_attr_0", DATA_TYPE_UINT32, 2541 PCIE_IS_BDG(bus_p) ? 2542 ecc_bdg_reg->pcix_ecc_attr : 2543 ecc_reg->pcix_ecc_attr, 2544 NULL); 2545 } 2546 2547 /* PCIx ECC Bridge Registers */ 2548 if (PCIX_ECC_VERSION_CHECK(bus_p) && PCIE_IS_BDG(bus_p)) { 2549 pf_pcix_ecc_regs_t *ecc_bdg_reg; 2550 2551 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 1); 2552 fm_payload_set(ereport, 2553 "pcix_ecc_control_1", DATA_TYPE_UINT16, 2554 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16), 2555 "pcix_ecc_status_1", DATA_TYPE_UINT16, 2556 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF), 2557 "pcix_ecc_fst_addr_1", DATA_TYPE_UINT32, 2558 ecc_bdg_reg->pcix_ecc_fstaddr, 2559 "pcix_ecc_sec_addr_1", DATA_TYPE_UINT32, 2560 ecc_bdg_reg->pcix_ecc_secaddr, 2561 "pcix_ecc_attr_1", DATA_TYPE_UINT32, 2562 ecc_bdg_reg->pcix_ecc_attr, 2563 NULL); 2564 } 2565 2566 /* PCIx Bridge */ 2567 if (PCIE_IS_PCIX(bus_p) && PCIE_IS_BDG(bus_p)) { 2568 fm_payload_set(ereport, 2569 "pcix_bdg_status", DATA_TYPE_UINT32, 2570 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat, 2571 "pcix_bdg_sec_status", DATA_TYPE_UINT16, 2572 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat, 2573 NULL); 2574 } 2575 2576 /* PCIe registers */ 2577 if (PCIE_IS_PCIE(bus_p)) { 2578 fm_payload_set(ereport, 2579 "pcie_status", DATA_TYPE_UINT16, 2580 PCIE_ERR_REG(pfd_p)->pcie_err_status, 2581 "pcie_command", DATA_TYPE_UINT16, 2582 PCIE_ERR_REG(pfd_p)->pcie_err_ctl, 2583 "pcie_dev_cap", DATA_TYPE_UINT32, 2584 PCIE_ERR_REG(pfd_p)->pcie_dev_cap, 2585 NULL); 2586 } 2587 2588 /* PCIe AER registers */ 2589 if (PCIE_HAS_AER(bus_p)) { 2590 fm_payload_set(ereport, 2591 "pcie_adv_ctl", DATA_TYPE_UINT32, 2592 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl, 2593 "pcie_ue_status", DATA_TYPE_UINT32, 2594 PCIE_ADV_REG(pfd_p)->pcie_ue_status, 2595 "pcie_ue_mask", DATA_TYPE_UINT32, 2596 PCIE_ADV_REG(pfd_p)->pcie_ue_mask, 2597 "pcie_ue_sev", DATA_TYPE_UINT32, 2598 PCIE_ADV_REG(pfd_p)->pcie_ue_sev, 2599 "pcie_ue_hdr0", DATA_TYPE_UINT32, 2600 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[0], 2601 "pcie_ue_hdr1", DATA_TYPE_UINT32, 2602 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[1], 2603 "pcie_ue_hdr2", DATA_TYPE_UINT32, 2604 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[2], 2605 "pcie_ue_hdr3", DATA_TYPE_UINT32, 2606 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[3], 2607 "pcie_ce_status", DATA_TYPE_UINT32, 2608 PCIE_ADV_REG(pfd_p)->pcie_ce_status, 2609 "pcie_ce_mask", DATA_TYPE_UINT32, 2610 PCIE_ADV_REG(pfd_p)->pcie_ce_mask, 2611 NULL); 2612 } 2613 2614 /* PCIe AER decoded header */ 2615 if (HAS_AER_LOGS(pfd_p, PCIE_ADV_REG(pfd_p)->pcie_ue_status)) { 2616 fm_payload_set(ereport, 2617 "pcie_ue_tgt_trans", DATA_TYPE_UINT32, 2618 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans, 2619 "pcie_ue_tgt_addr", DATA_TYPE_UINT64, 2620 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr, 2621 "pcie_ue_tgt_bdf", DATA_TYPE_UINT16, 2622 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf, 2623 NULL); 2624 /* Clear these values as they no longer valid */ 2625 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0; 2626 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0; 2627 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 2628 } 2629 2630 /* PCIe BDG AER registers */ 2631 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_HAS_AER(bus_p)) { 2632 fm_payload_set(ereport, 2633 "pcie_sue_adv_ctl", DATA_TYPE_UINT32, 2634 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_ctl, 2635 "pcie_sue_status", DATA_TYPE_UINT32, 2636 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status, 2637 "pcie_sue_mask", DATA_TYPE_UINT32, 2638 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask, 2639 "pcie_sue_sev", DATA_TYPE_UINT32, 2640 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_sev, 2641 "pcie_sue_hdr0", DATA_TYPE_UINT32, 2642 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[0], 2643 "pcie_sue_hdr1", DATA_TYPE_UINT32, 2644 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[1], 2645 "pcie_sue_hdr2", DATA_TYPE_UINT32, 2646 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[2], 2647 "pcie_sue_hdr3", DATA_TYPE_UINT32, 2648 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[3], 2649 NULL); 2650 } 2651 2652 /* PCIe BDG AER decoded header */ 2653 if (PCIE_IS_PCIE_BDG(bus_p) && HAS_SAER_LOGS(pfd_p, 2654 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status)) { 2655 fm_payload_set(ereport, 2656 "pcie_sue_tgt_trans", DATA_TYPE_UINT32, 2657 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans, 2658 "pcie_sue_tgt_addr", DATA_TYPE_UINT64, 2659 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr, 2660 "pcie_sue_tgt_bdf", DATA_TYPE_UINT16, 2661 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf, 2662 NULL); 2663 /* Clear these values as they no longer valid */ 2664 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0; 2665 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0; 2666 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = 2667 PCIE_INVALID_BDF; 2668 } 2669 2670 /* PCIe RP registers */ 2671 if (PCIE_IS_RP(bus_p)) { 2672 fm_payload_set(ereport, 2673 "pcie_rp_status", DATA_TYPE_UINT32, 2674 PCIE_RP_REG(pfd_p)->pcie_rp_status, 2675 "pcie_rp_control", DATA_TYPE_UINT16, 2676 PCIE_RP_REG(pfd_p)->pcie_rp_ctl, 2677 NULL); 2678 } 2679 2680 /* PCIe RP AER registers */ 2681 if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) { 2682 fm_payload_set(ereport, 2683 "pcie_adv_rp_status", DATA_TYPE_UINT32, 2684 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_status, 2685 "pcie_adv_rp_command", DATA_TYPE_UINT32, 2686 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_cmd, 2687 "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16, 2688 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id, 2689 "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16, 2690 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id, 2691 NULL); 2692 } 2693 2694 /* Misc ereport information */ 2695 fm_payload_set(ereport, 2696 "remainder", DATA_TYPE_UINT32, total--, 2697 "severity", DATA_TYPE_UINT32, pfd_p->pe_severity_flags, 2698 NULL); 2699 2700 pf_ereport_post(PCIE_BUS2DIP(bus_p), &ereport, &detector, 2701 &eqep); 2702 } 2703 2704 /* Unlock all the devices in the queue */ 2705 for (pfd_p = impl->pf_dq_tail_p; pfd_p; pfd_p = pfd_p->pe_prev) { 2706 if (pfd_p->pe_lock) { 2707 pf_handler_exit(PCIE_PFD2DIP(pfd_p)); 2708 } 2709 } 2710 } 2711 2712 /* 2713 * pf_handler_enter must be called to serial access to each device's pf_data_t. 2714 * Once error handling is finished with the device call pf_handler_exit to allow 2715 * other threads to access it. The same thread may call pf_handler_enter 2716 * several times without any consequences. 2717 * 2718 * The "impl" variable is passed in during scan fabric to double check that 2719 * there is not a recursive algorithm and to ensure only one thread is doing a 2720 * fabric scan at all times. 2721 * 2722 * In some cases "impl" is not available, such as "child lookup" being called 2723 * from outside of scan fabric, just pass in NULL for this variable and this 2724 * extra check will be skipped. 2725 */ 2726 static int 2727 pf_handler_enter(dev_info_t *dip, pf_impl_t *impl) 2728 { 2729 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 2730 2731 ASSERT(pfd_p); 2732 2733 /* 2734 * Check to see if the lock has already been taken by this 2735 * thread. If so just return and don't take lock again. 2736 */ 2737 if (!pfd_p->pe_lock || !impl) { 2738 i_ddi_fm_handler_enter(dip); 2739 pfd_p->pe_lock = B_TRUE; 2740 return (PF_SCAN_SUCCESS); 2741 } 2742 2743 /* Check to see that this dip is already in the "impl" error queue */ 2744 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 2745 if (PCIE_PFD2DIP(pfd_p) == dip) { 2746 return (PF_SCAN_SUCCESS); 2747 } 2748 } 2749 2750 return (PF_SCAN_DEADLOCK); 2751 } 2752 2753 static void 2754 pf_handler_exit(dev_info_t *dip) 2755 { 2756 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 2757 2758 ASSERT(pfd_p); 2759 2760 ASSERT(pfd_p->pe_lock == B_TRUE); 2761 i_ddi_fm_handler_exit(dip); 2762 pfd_p->pe_lock = B_FALSE; 2763 } 2764 2765 /* 2766 * This function calls the driver's callback function (if it's FMA hardened 2767 * and callback capable). This function relies on the current thread already 2768 * owning the driver's fmhdl lock. 2769 */ 2770 static int 2771 pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr) 2772 { 2773 int cb_sts = DDI_FM_OK; 2774 2775 if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) { 2776 dev_info_t *pdip = ddi_get_parent(dip); 2777 struct i_ddi_fmhdl *hdl = DEVI(pdip)->devi_fmhdl; 2778 struct i_ddi_fmtgt *tgt = hdl->fh_tgts; 2779 struct i_ddi_errhdl *errhdl; 2780 while (tgt != NULL) { 2781 if (dip == tgt->ft_dip) { 2782 errhdl = tgt->ft_errhdl; 2783 cb_sts = errhdl->eh_func(dip, derr, 2784 errhdl->eh_impl); 2785 break; 2786 } 2787 tgt = tgt->ft_next; 2788 } 2789 } 2790 return (cb_sts); 2791 } 2792