1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2019 Joyent, Inc. 24 */ 25 26 #include <sys/sysmacros.h> 27 #include <sys/types.h> 28 #include <sys/kmem.h> 29 #include <sys/modctl.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/sunndi.h> 33 #include <sys/fm/protocol.h> 34 #include <sys/fm/util.h> 35 #include <sys/fm/io/ddi.h> 36 #include <sys/fm/io/pci.h> 37 #include <sys/promif.h> 38 #include <sys/disp.h> 39 #include <sys/atomic.h> 40 #include <sys/pcie.h> 41 #include <sys/pci_cap.h> 42 #include <sys/pcie_impl.h> 43 44 #define PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \ 45 PCIE_DEVSTS_CE_DETECTED) 46 47 #define PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \ 48 PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR) 49 50 #define PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\ 51 PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP) 52 #define PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \ 53 PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR) 54 55 #define PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \ 56 PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \ 57 PCIE_AER_SUCE_SERR_ASSERT) 58 #define PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \ 59 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \ 60 PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \ 61 PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \ 62 PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR) 63 64 #define PF_PCI_PARITY_ERR (PCI_STAT_S_PERROR | PCI_STAT_PERROR) 65 66 #define PF_FIRST_AER_ERR(bit, adv) \ 67 (bit & (1 << (adv->pcie_adv_ctl & PCIE_AER_CTL_FST_ERR_PTR_MASK))) 68 69 #define HAS_AER_LOGS(pfd_p, bit) \ 70 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \ 71 PF_FIRST_AER_ERR(bit, PCIE_ADV_REG(pfd_p))) 72 73 #define PF_FIRST_SAER_ERR(bit, adv) \ 74 (bit & (1 << (adv->pcie_sue_ctl & PCIE_AER_SCTL_FST_ERR_PTR_MASK))) 75 76 #define HAS_SAER_LOGS(pfd_p, bit) \ 77 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \ 78 PF_FIRST_SAER_ERR(bit, PCIE_ADV_BDG_REG(pfd_p))) 79 80 #define GET_SAER_CMD(pfd_p) \ 81 ((PCIE_ADV_BDG_HDR(pfd_p, 1) >> \ 82 PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & PCIE_AER_SUCE_HDR_CMD_LWR_MASK) 83 84 #define CE_ADVISORY(pfd_p) \ 85 (PCIE_ADV_REG(pfd_p)->pcie_ce_status & PCIE_AER_CE_AD_NFE) 86 87 /* PCIe Fault Fabric Error analysis table */ 88 typedef struct pf_fab_err_tbl { 89 uint32_t bit; /* Error bit */ 90 int (*handler)(); /* Error handling fuction */ 91 uint16_t affected_flags; /* Primary affected flag */ 92 /* 93 * Secondary affected flag, effective when the information 94 * indicated by the primary flag is not available, eg. 95 * PF_AFFECTED_AER/SAER/ADDR 96 */ 97 uint16_t sec_affected_flags; 98 } pf_fab_err_tbl_t; 99 100 static pcie_bus_t *pf_is_ready(dev_info_t *); 101 /* Functions for scanning errors */ 102 static int pf_default_hdl(dev_info_t *, pf_impl_t *); 103 static int pf_dispatch(dev_info_t *, pf_impl_t *, boolean_t); 104 static boolean_t pf_in_addr_range(pcie_bus_t *, uint64_t); 105 106 /* Functions for gathering errors */ 107 static void pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs, 108 pcie_bus_t *bus_p, boolean_t bdg); 109 static void pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 110 static void pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 111 static void pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 112 static int pf_dummy_cb(dev_info_t *, ddi_fm_error_t *, const void *); 113 static void pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl_p); 114 115 /* Functions for analysing errors */ 116 static int pf_analyse_error(ddi_fm_error_t *, pf_impl_t *); 117 static void pf_adjust_for_no_aer(pf_data_t *); 118 static void pf_adjust_for_no_saer(pf_data_t *); 119 static pf_data_t *pf_get_pcie_bridge(pf_data_t *, pcie_req_id_t); 120 static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *); 121 static boolean_t pf_matched_in_rc(pf_data_t *, pf_data_t *, 122 uint32_t); 123 static int pf_analyse_error_tbl(ddi_fm_error_t *, pf_impl_t *, 124 pf_data_t *, const pf_fab_err_tbl_t *, uint32_t); 125 static int pf_analyse_ca_ur(ddi_fm_error_t *, uint32_t, 126 pf_data_t *, pf_data_t *); 127 static int pf_analyse_ma_ta(ddi_fm_error_t *, uint32_t, 128 pf_data_t *, pf_data_t *); 129 static int pf_analyse_pci(ddi_fm_error_t *, uint32_t, 130 pf_data_t *, pf_data_t *); 131 static int pf_analyse_perr_assert(ddi_fm_error_t *, uint32_t, 132 pf_data_t *, pf_data_t *); 133 static int pf_analyse_ptlp(ddi_fm_error_t *, uint32_t, 134 pf_data_t *, pf_data_t *); 135 static int pf_analyse_sc(ddi_fm_error_t *, uint32_t, 136 pf_data_t *, pf_data_t *); 137 static int pf_analyse_to(ddi_fm_error_t *, uint32_t, 138 pf_data_t *, pf_data_t *); 139 static int pf_analyse_uc(ddi_fm_error_t *, uint32_t, 140 pf_data_t *, pf_data_t *); 141 static int pf_analyse_uc_data(ddi_fm_error_t *, uint32_t, 142 pf_data_t *, pf_data_t *); 143 static int pf_no_panic(ddi_fm_error_t *, uint32_t, 144 pf_data_t *, pf_data_t *); 145 static int pf_panic(ddi_fm_error_t *, uint32_t, 146 pf_data_t *, pf_data_t *); 147 static void pf_send_ereport(ddi_fm_error_t *, pf_impl_t *); 148 static int pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr); 149 150 /* PCIe Fabric Handle Lookup Support Functions. */ 151 static int pf_hdl_child_lookup(dev_info_t *, ddi_fm_error_t *, uint32_t, 152 uint64_t, pcie_req_id_t); 153 static int pf_hdl_compare(dev_info_t *, ddi_fm_error_t *, uint32_t, uint64_t, 154 pcie_req_id_t, ndi_fmc_t *); 155 static int pf_log_hdl_lookup(dev_info_t *, ddi_fm_error_t *, pf_data_t *, 156 boolean_t); 157 158 static int pf_handler_enter(dev_info_t *, pf_impl_t *); 159 static void pf_handler_exit(dev_info_t *); 160 static void pf_reset_pfd(pf_data_t *); 161 162 boolean_t pcie_full_scan = B_FALSE; /* Force to always do a full scan */ 163 int pcie_disable_scan = 0; /* Disable fabric scan */ 164 165 /* Inform interested parties that error handling is about to begin. */ 166 /* ARGSUSED */ 167 void 168 pf_eh_enter(pcie_bus_t *bus_p) 169 { 170 } 171 172 /* Inform interested parties that error handling has ended. */ 173 void 174 pf_eh_exit(pcie_bus_t *bus_p) 175 { 176 pcie_bus_t *rbus_p = PCIE_DIP2BUS(bus_p->bus_rp_dip); 177 pf_data_t *root_pfd_p = PCIE_BUS2PFD(rbus_p); 178 pf_data_t *pfd_p; 179 uint_t intr_type = PCIE_ROOT_EH_SRC(root_pfd_p)->intr_type; 180 181 pciev_eh_exit(root_pfd_p, intr_type); 182 183 /* Clear affected device info and INTR SRC */ 184 for (pfd_p = root_pfd_p; pfd_p; pfd_p = pfd_p->pe_next) { 185 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0; 186 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF; 187 if (PCIE_IS_ROOT(PCIE_PFD2BUS(pfd_p))) { 188 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE; 189 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL; 190 } 191 } 192 } 193 194 /* 195 * Scan Fabric is the entry point for PCI/PCIe IO fabric errors. The 196 * caller may create a local pf_data_t with the "root fault" 197 * information populated to either do a precise or full scan. More 198 * than one pf_data_t maybe linked together if there are multiple 199 * errors. Only a PCIe compliant Root Port device may pass in NULL 200 * for the root_pfd_p. 201 * 202 * "Root Complexes" such as NPE and PX should call scan_fabric using itself as 203 * the rdip. PCIe Root ports should call pf_scan_fabric using its parent as 204 * the rdip. 205 * 206 * Scan fabric initiated from RCs are likely due to a fabric message, traps or 207 * any RC detected errors that propagated to/from the fabric. 208 * 209 * This code assumes that by the time pf_scan_fabric is 210 * called, pf_handler_enter has NOT been called on the rdip. 211 */ 212 int 213 pf_scan_fabric(dev_info_t *rdip, ddi_fm_error_t *derr, pf_data_t *root_pfd_p) 214 { 215 pf_impl_t impl; 216 pf_data_t *pfd_p, *pfd_head_p, *pfd_tail_p; 217 int scan_flag = PF_SCAN_SUCCESS; 218 int analyse_flag = PF_ERR_NO_ERROR; 219 boolean_t full_scan = pcie_full_scan; 220 221 if (pcie_disable_scan) 222 return (analyse_flag); 223 224 /* Find the head and tail of this link list */ 225 pfd_head_p = root_pfd_p; 226 for (pfd_tail_p = root_pfd_p; pfd_tail_p && pfd_tail_p->pe_next; 227 pfd_tail_p = pfd_tail_p->pe_next) 228 ; 229 230 /* Save head/tail */ 231 impl.pf_total = 0; 232 impl.pf_derr = derr; 233 impl.pf_dq_head_p = pfd_head_p; 234 impl.pf_dq_tail_p = pfd_tail_p; 235 236 /* If scan is initiated from RP then RP itself must be scanned. */ 237 if (PCIE_IS_RP(PCIE_DIP2BUS(rdip)) && pf_is_ready(rdip) && 238 !root_pfd_p) { 239 scan_flag = pf_handler_enter(rdip, &impl); 240 if (scan_flag & PF_SCAN_DEADLOCK) 241 goto done; 242 243 scan_flag = pf_default_hdl(rdip, &impl); 244 if (scan_flag & PF_SCAN_NO_ERR_IN_CHILD) 245 goto done; 246 } 247 248 /* 249 * Scan the fabric using the scan_bdf and scan_addr in error q. 250 * scan_bdf will be valid in the following cases: 251 * - Fabric message 252 * - Poisoned TLP 253 * - Signaled UR/CA 254 * - Received UR/CA 255 * - PIO load failures 256 */ 257 for (pfd_p = impl.pf_dq_head_p; pfd_p && PFD_IS_ROOT(pfd_p); 258 pfd_p = pfd_p->pe_next) { 259 impl.pf_fault = PCIE_ROOT_FAULT(pfd_p); 260 261 if (PFD_IS_RC(pfd_p)) 262 impl.pf_total++; 263 264 if (impl.pf_fault->full_scan) 265 full_scan = B_TRUE; 266 267 if (full_scan || 268 PCIE_CHECK_VALID_BDF(impl.pf_fault->scan_bdf) || 269 impl.pf_fault->scan_addr) 270 scan_flag |= pf_dispatch(rdip, &impl, full_scan); 271 272 if (full_scan) 273 break; 274 } 275 276 done: 277 /* 278 * If this is due to safe access, don't analyze the errors and return 279 * success regardless of how scan fabric went. 280 */ 281 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 282 analyse_flag = PF_ERR_NO_PANIC; 283 } else { 284 analyse_flag = pf_analyse_error(derr, &impl); 285 } 286 287 pf_send_ereport(derr, &impl); 288 289 /* 290 * Check if any hardened driver's callback reported a panic. 291 * If so panic. 292 */ 293 if (scan_flag & PF_SCAN_CB_FAILURE) 294 analyse_flag |= PF_ERR_PANIC; 295 296 /* 297 * If a deadlock was detected, panic the system as error analysis has 298 * been compromised. 299 */ 300 if (scan_flag & PF_SCAN_DEADLOCK) 301 analyse_flag |= PF_ERR_PANIC_DEADLOCK; 302 303 derr->fme_status = PF_ERR2DDIFM_ERR(scan_flag); 304 305 return (analyse_flag); 306 } 307 308 void 309 pcie_force_fullscan(void) 310 { 311 pcie_full_scan = B_TRUE; 312 } 313 314 /* 315 * pf_dispatch walks the device tree and calls the pf_default_hdl if the device 316 * falls in the error path. 317 * 318 * Returns PF_SCAN_* flags 319 */ 320 static int 321 pf_dispatch(dev_info_t *pdip, pf_impl_t *impl, boolean_t full_scan) 322 { 323 dev_info_t *dip; 324 pcie_req_id_t rid = impl->pf_fault->scan_bdf; 325 pcie_bus_t *bus_p; 326 int scan_flag = PF_SCAN_SUCCESS; 327 328 for (dip = ddi_get_child(pdip); dip; dip = ddi_get_next_sibling(dip)) { 329 /* Make sure dip is attached and ready */ 330 if (!(bus_p = pf_is_ready(dip))) 331 continue; 332 333 scan_flag |= pf_handler_enter(dip, impl); 334 if (scan_flag & PF_SCAN_DEADLOCK) 335 break; 336 337 /* 338 * Handle this device if it is a: 339 * o Full Scan 340 * o PCI/PCI-X Device 341 * o Fault BDF = Device BDF 342 * o BDF/ADDR is in range of the Bridge/Switch 343 */ 344 if (full_scan || 345 (bus_p->bus_bdf == rid) || 346 pf_in_bus_range(bus_p, rid) || 347 pf_in_addr_range(bus_p, impl->pf_fault->scan_addr)) { 348 int hdl_flag = pf_default_hdl(dip, impl); 349 scan_flag |= hdl_flag; 350 351 /* 352 * A bridge may have detected no errors in which case 353 * there is no need to scan further down. 354 */ 355 if (hdl_flag & PF_SCAN_NO_ERR_IN_CHILD) 356 continue; 357 } else { 358 pf_handler_exit(dip); 359 continue; 360 } 361 362 /* match or in bridge bus-range */ 363 switch (bus_p->bus_dev_type) { 364 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: 365 case PCIE_PCIECAP_DEV_TYPE_PCI2PCIE: 366 scan_flag |= pf_dispatch(dip, impl, B_TRUE); 367 break; 368 case PCIE_PCIECAP_DEV_TYPE_UP: 369 case PCIE_PCIECAP_DEV_TYPE_DOWN: 370 case PCIE_PCIECAP_DEV_TYPE_ROOT: 371 { 372 pf_data_t *pfd_p = PCIE_BUS2PFD(bus_p); 373 pf_pci_err_regs_t *err_p = PCI_ERR_REG(pfd_p); 374 pf_pci_bdg_err_regs_t *serr_p = PCI_BDG_ERR_REG(pfd_p); 375 /* 376 * Continue if the fault BDF != the switch or there is a 377 * parity error 378 */ 379 if ((bus_p->bus_bdf != rid) || 380 (err_p->pci_err_status & PF_PCI_PARITY_ERR) || 381 (serr_p->pci_bdg_sec_stat & PF_PCI_PARITY_ERR)) 382 scan_flag |= pf_dispatch(dip, impl, full_scan); 383 break; 384 } 385 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: 386 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: 387 /* 388 * Reached a PCIe end point so stop. Note dev_type 389 * PCI_DEV is just a PCIe device that requires IO Space 390 */ 391 break; 392 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO: 393 if (PCIE_IS_BDG(bus_p)) 394 scan_flag |= pf_dispatch(dip, impl, B_TRUE); 395 break; 396 default: 397 ASSERT(B_FALSE); 398 } 399 } 400 return (scan_flag); 401 } 402 403 /* Returns whether the "bdf" is in the bus range of a switch/bridge */ 404 boolean_t 405 pf_in_bus_range(pcie_bus_t *bus_p, pcie_req_id_t bdf) 406 { 407 pci_bus_range_t *br_p = &bus_p->bus_bus_range; 408 uint8_t bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >> 409 PCIE_REQ_ID_BUS_SHIFT; 410 411 /* check if given bdf falls within bridge's bus range */ 412 if (PCIE_IS_BDG(bus_p) && 413 ((bus_no >= br_p->lo) && (bus_no <= br_p->hi))) 414 return (B_TRUE); 415 else 416 return (B_FALSE); 417 } 418 419 /* 420 * Return whether the "addr" is in the assigned addr of a device. 421 */ 422 boolean_t 423 pf_in_assigned_addr(pcie_bus_t *bus_p, uint64_t addr) 424 { 425 uint_t i; 426 uint64_t low, hi; 427 pci_regspec_t *assign_p = bus_p->bus_assigned_addr; 428 429 for (i = 0; i < bus_p->bus_assigned_entries; i++, assign_p++) { 430 low = assign_p->pci_phys_low; 431 hi = low + assign_p->pci_size_low; 432 if ((addr < hi) && (addr >= low)) 433 return (B_TRUE); 434 } 435 return (B_FALSE); 436 } 437 438 /* 439 * Returns whether the "addr" is in the addr range of a switch/bridge, or if the 440 * "addr" is in the assigned addr of a device. 441 */ 442 static boolean_t 443 pf_in_addr_range(pcie_bus_t *bus_p, uint64_t addr) 444 { 445 uint_t i; 446 uint64_t low, hi; 447 ppb_ranges_t *ranges_p = bus_p->bus_addr_ranges; 448 449 if (!addr) 450 return (B_FALSE); 451 452 /* check if given address belongs to this device */ 453 if (pf_in_assigned_addr(bus_p, addr)) 454 return (B_TRUE); 455 456 /* check if given address belongs to a child below this device */ 457 if (!PCIE_IS_BDG(bus_p)) 458 return (B_FALSE); 459 460 for (i = 0; i < bus_p->bus_addr_entries; i++, ranges_p++) { 461 switch (ranges_p->child_high & PCI_ADDR_MASK) { 462 case PCI_ADDR_IO: 463 case PCI_ADDR_MEM32: 464 low = ranges_p->child_low; 465 hi = ranges_p->size_low + low; 466 if ((addr < hi) && (addr >= low)) 467 return (B_TRUE); 468 break; 469 case PCI_ADDR_MEM64: 470 low = ((uint64_t)ranges_p->child_mid << 32) | 471 (uint64_t)ranges_p->child_low; 472 hi = (((uint64_t)ranges_p->size_high << 32) | 473 (uint64_t)ranges_p->size_low) + low; 474 if ((addr < hi) && (addr >= low)) 475 return (B_TRUE); 476 break; 477 } 478 } 479 return (B_FALSE); 480 } 481 482 static pcie_bus_t * 483 pf_is_ready(dev_info_t *dip) 484 { 485 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 486 if (!bus_p) 487 return (NULL); 488 489 if (!(bus_p->bus_fm_flags & PF_FM_READY)) 490 return (NULL); 491 return (bus_p); 492 } 493 494 static void 495 pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs, 496 pcie_bus_t *bus_p, boolean_t bdg) 497 { 498 if (bdg) { 499 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p, 500 PCI_PCIX_BDG_ECC_STATUS); 501 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p, 502 PCI_PCIX_BDG_ECC_FST_AD); 503 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p, 504 PCI_PCIX_BDG_ECC_SEC_AD); 505 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p, 506 PCI_PCIX_BDG_ECC_ATTR); 507 } else { 508 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p, 509 PCI_PCIX_ECC_STATUS); 510 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p, 511 PCI_PCIX_ECC_FST_AD); 512 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p, 513 PCI_PCIX_ECC_SEC_AD); 514 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p, 515 PCI_PCIX_ECC_ATTR); 516 } 517 } 518 519 520 static void 521 pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 522 { 523 /* 524 * For PCI-X device PCI-X Capability only exists for Type 0 Headers. 525 * PCI-X Bridge Capability only exists for Type 1 Headers. 526 * Both capabilities do not exist at the same time. 527 */ 528 if (PCIE_IS_BDG(bus_p)) { 529 pf_pcix_bdg_err_regs_t *pcix_bdg_regs; 530 531 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p); 532 533 pcix_bdg_regs->pcix_bdg_sec_stat = PCIX_CAP_GET(16, bus_p, 534 PCI_PCIX_SEC_STATUS); 535 pcix_bdg_regs->pcix_bdg_stat = PCIX_CAP_GET(32, bus_p, 536 PCI_PCIX_BDG_STATUS); 537 538 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 539 /* 540 * PCI Express to PCI-X bridges only implement the 541 * secondary side of the PCI-X ECC registers, bit one is 542 * read-only so we make sure we do not write to it. 543 */ 544 if (!PCIE_IS_PCIE_BDG(bus_p)) { 545 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 546 0); 547 pf_pcix_ecc_regs_gather( 548 PCIX_BDG_ECC_REG(pfd_p, 0), bus_p, B_TRUE); 549 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 550 1); 551 } 552 pf_pcix_ecc_regs_gather(PCIX_BDG_ECC_REG(pfd_p, 0), 553 bus_p, B_TRUE); 554 } 555 } else { 556 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p); 557 558 pcix_regs->pcix_command = PCIX_CAP_GET(16, bus_p, 559 PCI_PCIX_COMMAND); 560 pcix_regs->pcix_status = PCIX_CAP_GET(32, bus_p, 561 PCI_PCIX_STATUS); 562 if (PCIX_ECC_VERSION_CHECK(bus_p)) 563 pf_pcix_ecc_regs_gather(PCIX_ECC_REG(pfd_p), bus_p, 564 B_TRUE); 565 } 566 } 567 568 static void 569 pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 570 { 571 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p); 572 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p); 573 574 pcie_regs->pcie_err_status = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS); 575 pcie_regs->pcie_err_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL); 576 pcie_regs->pcie_dev_cap = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP); 577 578 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) 579 pf_pcix_regs_gather(pfd_p, bus_p); 580 581 if (PCIE_IS_ROOT(bus_p)) { 582 pf_pcie_rp_err_regs_t *pcie_rp_regs = PCIE_RP_REG(pfd_p); 583 584 pcie_rp_regs->pcie_rp_status = PCIE_CAP_GET(32, bus_p, 585 PCIE_ROOTSTS); 586 pcie_rp_regs->pcie_rp_ctl = PCIE_CAP_GET(16, bus_p, 587 PCIE_ROOTCTL); 588 } 589 590 /* 591 * For eligible components, we gather Slot Register state. 592 * 593 * Eligible components are: 594 * - a Downstream Port or a Root Port with the Slot Implemented 595 * capability bit set 596 * - hotplug capable 597 * 598 * Slot register state is useful, for instance, to determine whether the 599 * Slot's child device is physically present (via the Slot Status 600 * register). 601 */ 602 if ((PCIE_IS_SWD(bus_p) || PCIE_IS_ROOT(bus_p)) && 603 PCIE_IS_HOTPLUG_ENABLED(PCIE_BUS2DIP(bus_p))) { 604 pf_pcie_slot_regs_t *pcie_slot_regs = PCIE_SLOT_REG(pfd_p); 605 pcie_slot_regs->pcie_slot_cap = PCIE_CAP_GET(32, bus_p, 606 PCIE_SLOTCAP); 607 pcie_slot_regs->pcie_slot_control = PCIE_CAP_GET(16, bus_p, 608 PCIE_SLOTCTL); 609 pcie_slot_regs->pcie_slot_status = PCIE_CAP_GET(16, bus_p, 610 PCIE_SLOTSTS); 611 612 if (pcie_slot_regs->pcie_slot_cap != PCI_EINVAL32 && 613 pcie_slot_regs->pcie_slot_control != PCI_EINVAL16 && 614 pcie_slot_regs->pcie_slot_status != PCI_EINVAL16) { 615 pcie_slot_regs->pcie_slot_regs_valid = B_TRUE; 616 } 617 } 618 619 if (!PCIE_HAS_AER(bus_p)) 620 return; 621 622 /* Gather UE AERs */ 623 pcie_adv_regs->pcie_adv_ctl = PCIE_AER_GET(32, bus_p, 624 PCIE_AER_CTL); 625 pcie_adv_regs->pcie_ue_status = PCIE_AER_GET(32, bus_p, 626 PCIE_AER_UCE_STS); 627 pcie_adv_regs->pcie_ue_mask = PCIE_AER_GET(32, bus_p, 628 PCIE_AER_UCE_MASK); 629 pcie_adv_regs->pcie_ue_sev = PCIE_AER_GET(32, bus_p, 630 PCIE_AER_UCE_SERV); 631 PCIE_ADV_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p, 632 PCIE_AER_HDR_LOG); 633 PCIE_ADV_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p, 634 PCIE_AER_HDR_LOG + 0x4); 635 PCIE_ADV_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p, 636 PCIE_AER_HDR_LOG + 0x8); 637 PCIE_ADV_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p, 638 PCIE_AER_HDR_LOG + 0xc); 639 640 /* Gather CE AERs */ 641 pcie_adv_regs->pcie_ce_status = PCIE_AER_GET(32, bus_p, 642 PCIE_AER_CE_STS); 643 pcie_adv_regs->pcie_ce_mask = PCIE_AER_GET(32, bus_p, 644 PCIE_AER_CE_MASK); 645 646 /* 647 * If pci express to pci bridge then grab the bridge 648 * error registers. 649 */ 650 if (PCIE_IS_PCIE_BDG(bus_p)) { 651 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs = 652 PCIE_ADV_BDG_REG(pfd_p); 653 654 pcie_bdg_regs->pcie_sue_ctl = PCIE_AER_GET(32, bus_p, 655 PCIE_AER_SCTL); 656 pcie_bdg_regs->pcie_sue_status = PCIE_AER_GET(32, bus_p, 657 PCIE_AER_SUCE_STS); 658 pcie_bdg_regs->pcie_sue_mask = PCIE_AER_GET(32, bus_p, 659 PCIE_AER_SUCE_MASK); 660 pcie_bdg_regs->pcie_sue_sev = PCIE_AER_GET(32, bus_p, 661 PCIE_AER_SUCE_SERV); 662 PCIE_ADV_BDG_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p, 663 PCIE_AER_SHDR_LOG); 664 PCIE_ADV_BDG_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p, 665 PCIE_AER_SHDR_LOG + 0x4); 666 PCIE_ADV_BDG_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p, 667 PCIE_AER_SHDR_LOG + 0x8); 668 PCIE_ADV_BDG_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p, 669 PCIE_AER_SHDR_LOG + 0xc); 670 } 671 672 /* 673 * If PCI Express root port then grab the root port 674 * error registers. 675 */ 676 if (PCIE_IS_ROOT(bus_p)) { 677 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs = 678 PCIE_ADV_RP_REG(pfd_p); 679 680 pcie_rp_regs->pcie_rp_err_cmd = PCIE_AER_GET(32, bus_p, 681 PCIE_AER_RE_CMD); 682 pcie_rp_regs->pcie_rp_err_status = PCIE_AER_GET(32, bus_p, 683 PCIE_AER_RE_STS); 684 pcie_rp_regs->pcie_rp_ce_src_id = PCIE_AER_GET(16, bus_p, 685 PCIE_AER_CE_SRC_ID); 686 pcie_rp_regs->pcie_rp_ue_src_id = PCIE_AER_GET(16, bus_p, 687 PCIE_AER_ERR_SRC_ID); 688 } 689 } 690 691 static void 692 pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 693 { 694 pf_pci_err_regs_t *pci_regs = PCI_ERR_REG(pfd_p); 695 696 /* 697 * Start by reading all the error registers that are available for 698 * pci and pci express and for leaf devices and bridges/switches 699 */ 700 pci_regs->pci_err_status = PCIE_GET(16, bus_p, PCI_CONF_STAT); 701 pci_regs->pci_cfg_comm = PCIE_GET(16, bus_p, PCI_CONF_COMM); 702 703 /* 704 * If pci-pci bridge grab PCI bridge specific error registers. 705 */ 706 if (PCIE_IS_BDG(bus_p)) { 707 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p); 708 pci_bdg_regs->pci_bdg_sec_stat = 709 PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS); 710 pci_bdg_regs->pci_bdg_ctrl = 711 PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL); 712 } 713 714 /* 715 * If pci express device grab pci express error registers and 716 * check for advanced error reporting features and grab them if 717 * available. 718 */ 719 if (PCIE_IS_PCIE(bus_p)) 720 pf_pcie_regs_gather(pfd_p, bus_p); 721 else if (PCIE_IS_PCIX(bus_p)) 722 pf_pcix_regs_gather(pfd_p, bus_p); 723 724 } 725 726 static void 727 pf_pcix_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 728 { 729 if (PCIE_IS_BDG(bus_p)) { 730 pf_pcix_bdg_err_regs_t *pcix_bdg_regs; 731 732 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p); 733 734 PCIX_CAP_PUT(16, bus_p, PCI_PCIX_SEC_STATUS, 735 pcix_bdg_regs->pcix_bdg_sec_stat); 736 737 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_STATUS, 738 pcix_bdg_regs->pcix_bdg_stat); 739 740 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 741 pf_pcix_ecc_regs_t *pcix_bdg_ecc_regs; 742 /* 743 * PCI Express to PCI-X bridges only implement the 744 * secondary side of the PCI-X ECC registers. For 745 * clearing, there is no need to "select" the ECC 746 * register, just write what was originally read. 747 */ 748 if (!PCIE_IS_PCIE_BDG(bus_p)) { 749 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 0); 750 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 751 pcix_bdg_ecc_regs->pcix_ecc_ctlstat); 752 753 } 754 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 1); 755 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 756 pcix_bdg_ecc_regs->pcix_ecc_ctlstat); 757 } 758 } else { 759 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p); 760 761 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_STATUS, 762 pcix_regs->pcix_status); 763 764 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 765 pf_pcix_ecc_regs_t *pcix_ecc_regs = PCIX_ECC_REG(pfd_p); 766 767 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_ECC_STATUS, 768 pcix_ecc_regs->pcix_ecc_ctlstat); 769 } 770 } 771 } 772 773 static void 774 pf_pcie_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 775 { 776 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p); 777 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p); 778 779 PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS, pcie_regs->pcie_err_status); 780 781 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) 782 pf_pcix_regs_clear(pfd_p, bus_p); 783 784 if (!PCIE_HAS_AER(bus_p)) 785 return; 786 787 PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_STS, 788 pcie_adv_regs->pcie_ue_status); 789 790 PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS, 791 pcie_adv_regs->pcie_ce_status); 792 793 if (PCIE_IS_PCIE_BDG(bus_p)) { 794 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs = 795 PCIE_ADV_BDG_REG(pfd_p); 796 797 PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_STS, 798 pcie_bdg_regs->pcie_sue_status); 799 } 800 801 /* 802 * If PCI Express root complex then clear the root complex 803 * error registers. 804 */ 805 if (PCIE_IS_ROOT(bus_p)) { 806 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs; 807 808 pcie_rp_regs = PCIE_ADV_RP_REG(pfd_p); 809 810 PCIE_AER_PUT(32, bus_p, PCIE_AER_RE_STS, 811 pcie_rp_regs->pcie_rp_err_status); 812 } 813 } 814 815 static void 816 pf_pci_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 817 { 818 if (PCIE_IS_PCIE(bus_p)) 819 pf_pcie_regs_clear(pfd_p, bus_p); 820 else if (PCIE_IS_PCIX(bus_p)) 821 pf_pcix_regs_clear(pfd_p, bus_p); 822 823 PCIE_PUT(16, bus_p, PCI_CONF_STAT, pfd_p->pe_pci_regs->pci_err_status); 824 825 if (PCIE_IS_BDG(bus_p)) { 826 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p); 827 PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS, 828 pci_bdg_regs->pci_bdg_sec_stat); 829 } 830 } 831 832 /* ARGSUSED */ 833 void 834 pcie_clear_errors(dev_info_t *dip) 835 { 836 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 837 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 838 839 ASSERT(bus_p); 840 841 pf_pci_regs_gather(pfd_p, bus_p); 842 pf_pci_regs_clear(pfd_p, bus_p); 843 } 844 845 /* Find the fault BDF, fault Addr or full scan on a PCIe Root Port. */ 846 static void 847 pf_pci_find_rp_fault(pf_data_t *pfd_p, pcie_bus_t *bus_p) 848 { 849 pf_root_fault_t *root_fault = PCIE_ROOT_FAULT(pfd_p); 850 pf_pcie_adv_rp_err_regs_t *rp_regs = PCIE_ADV_RP_REG(pfd_p); 851 uint32_t root_err = rp_regs->pcie_rp_err_status; 852 uint32_t ue_err = PCIE_ADV_REG(pfd_p)->pcie_ue_status; 853 int num_faults = 0; 854 855 /* Since this data structure is reused, make sure to reset it */ 856 root_fault->full_scan = B_FALSE; 857 root_fault->scan_bdf = PCIE_INVALID_BDF; 858 root_fault->scan_addr = 0; 859 860 if (!PCIE_HAS_AER(bus_p) && 861 (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) { 862 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 863 return; 864 } 865 866 /* 867 * Check to see if an error has been received that 868 * requires a scan of the fabric. Count the number of 869 * faults seen. If MUL CE/FE_NFE that counts for 870 * at least 2 faults, so just return with full_scan. 871 */ 872 if ((root_err & PCIE_AER_RE_STS_MUL_CE_RCVD) || 873 (root_err & PCIE_AER_RE_STS_MUL_FE_NFE_RCVD)) { 874 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 875 return; 876 } 877 878 if (root_err & PCIE_AER_RE_STS_CE_RCVD) 879 num_faults++; 880 881 if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) 882 num_faults++; 883 884 if (ue_err & PCIE_AER_UCE_CA) 885 num_faults++; 886 887 if (ue_err & PCIE_AER_UCE_UR) 888 num_faults++; 889 890 /* If no faults just return */ 891 if (num_faults == 0) 892 return; 893 894 /* If faults > 1 do full scan */ 895 if (num_faults > 1) { 896 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 897 return; 898 } 899 900 /* By this point, there is only 1 fault detected */ 901 if (root_err & PCIE_AER_RE_STS_CE_RCVD) { 902 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ce_src_id; 903 num_faults--; 904 } else if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) { 905 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ue_src_id; 906 num_faults--; 907 } else if ((HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_CA) || 908 HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_UR)) && 909 (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), PCIE_ADV_REG(pfd_p)) == 910 DDI_SUCCESS)) { 911 PCIE_ROOT_FAULT(pfd_p)->scan_addr = 912 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr; 913 num_faults--; 914 } 915 916 /* 917 * This means an error did occur, but we couldn't extract the fault BDF 918 */ 919 if (num_faults > 0) 920 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 921 922 } 923 924 925 /* 926 * Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue 927 * 928 * Returns a scan flag. 929 * o PF_SCAN_SUCCESS - Error gathered and cleared sucessfuly, data added to 930 * Fault Q 931 * o PF_SCAN_BAD_RESPONSE - Unable to talk to device, item added to fault Q 932 * o PF_SCAN_CB_FAILURE - A hardened device deemed that the error was fatal. 933 * o PF_SCAN_NO_ERR_IN_CHILD - Only applies to bridge to prevent further 934 * unnecessary scanning 935 * o PF_SCAN_IN_DQ - This device has already been scanned; it was skipped this 936 * time. 937 */ 938 static int 939 pf_default_hdl(dev_info_t *dip, pf_impl_t *impl) 940 { 941 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 942 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 943 int cb_sts, scan_flag = PF_SCAN_SUCCESS; 944 945 /* Make sure this device hasn't already been snapshotted and cleared */ 946 if (pfd_p->pe_valid == B_TRUE) { 947 scan_flag |= PF_SCAN_IN_DQ; 948 goto done; 949 } 950 951 /* 952 * If this is a device used for PCI passthrough into a virtual machine, 953 * don't let any error it caused panic the system. 954 */ 955 if (bus_p->bus_fm_flags & PF_FM_IS_PASSTHRU) 956 pfd_p->pe_severity_mask |= PF_ERR_PANIC; 957 958 /* 959 * Read vendor/device ID and check with cached data; if it doesn't 960 * match, it could very well mean that the device is no longer 961 * responding. In this case, we return PF_SCAN_BAD_RESPONSE; should 962 * the caller choose to panic in this case, we will have the basic 963 * info in the error queue for the purposes of postmortem debugging. 964 */ 965 if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) { 966 char buf[FM_MAX_CLASS]; 967 968 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", 969 PCI_ERROR_SUBCLASS, PCI_NR); 970 ddi_fm_ereport_post(dip, buf, fm_ena_generate(0, FM_ENA_FMT1), 971 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL); 972 973 /* 974 * For IOV/Hotplug purposes skip gathering info for this device, 975 * but populate affected info and severity. Clear out any data 976 * that maybe been saved in the last fabric scan. 977 */ 978 pf_reset_pfd(pfd_p); 979 pfd_p->pe_severity_flags = PF_ERR_BAD_RESPONSE; 980 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; 981 982 /* Add the snapshot to the error q */ 983 pf_en_dq(pfd_p, impl); 984 pfd_p->pe_valid = B_TRUE; 985 986 return (PF_SCAN_BAD_RESPONSE); 987 } 988 989 pf_pci_regs_gather(pfd_p, bus_p); 990 pf_pci_regs_clear(pfd_p, bus_p); 991 992 if (PCIE_IS_RP(bus_p)) 993 pf_pci_find_rp_fault(pfd_p, bus_p); 994 995 cb_sts = pf_fm_callback(dip, impl->pf_derr); 996 997 if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN) 998 scan_flag |= PF_SCAN_CB_FAILURE; 999 1000 /* Add the snapshot to the error q */ 1001 pf_en_dq(pfd_p, impl); 1002 1003 done: 1004 /* 1005 * If a bridge does not have any error no need to scan any further down. 1006 * For PCIe devices, check the PCIe device status and PCI secondary 1007 * status. 1008 * - Some non-compliant PCIe devices do not utilize PCIe 1009 * error registers. If so rely on legacy PCI error registers. 1010 * For PCI devices, check the PCI secondary status. 1011 */ 1012 if (PCIE_IS_PCIE_BDG(bus_p) && 1013 !(PCIE_ERR_REG(pfd_p)->pcie_err_status & PF_PCIE_BDG_ERR) && 1014 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) 1015 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD; 1016 1017 if (PCIE_IS_PCI_BDG(bus_p) && 1018 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) 1019 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD; 1020 1021 pfd_p->pe_valid = B_TRUE; 1022 return (scan_flag); 1023 } 1024 1025 /* 1026 * Set the passthru flag on a device bus_p. Called by passthru drivers to 1027 * indicate when a device is or is no longer under passthru control. 1028 */ 1029 void 1030 pf_set_passthru(dev_info_t *dip, boolean_t is_passthru) 1031 { 1032 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 1033 1034 if (is_passthru) { 1035 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_PASSTHRU); 1036 } else { 1037 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_PASSTHRU); 1038 } 1039 } 1040 1041 /* 1042 * Called during postattach to initialize a device's error handling 1043 * capabilities. If the devices has already been hardened, then there isn't 1044 * much needed. Otherwise initialize the device's default FMA capabilities. 1045 * 1046 * In a future project where PCIe support is removed from pcifm, several 1047 * "properties" that are setup in ddi_fm_init and pci_ereport_setup need to be 1048 * created here so that the PCI/PCIe eversholt rules will work properly. 1049 */ 1050 void 1051 pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc, ddi_attach_cmd_t cmd) 1052 { 1053 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 1054 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 1055 boolean_t need_cb_register = B_FALSE; 1056 1057 if (!bus_p) { 1058 cmn_err(CE_WARN, "devi_bus information is not set for %s%d.\n", 1059 ddi_driver_name(dip), ddi_get_instance(dip)); 1060 return; 1061 } 1062 1063 if (fmhdl) { 1064 /* 1065 * If device is only ereport capable and not callback capable 1066 * make it callback capable. The only downside is that the 1067 * "fm-errcb-capable" property is not created for this device 1068 * which should be ok since it's not used anywhere. 1069 */ 1070 if (!(fmhdl->fh_cap & DDI_FM_ERRCB_CAPABLE)) 1071 need_cb_register = B_TRUE; 1072 } else { 1073 int cap; 1074 /* 1075 * fm-capable in driver.conf can be used to set fm_capabilities. 1076 * If fm-capable is not defined, set the default 1077 * DDI_FM_EREPORT_CAPABLE and DDI_FM_ERRCB_CAPABLE. 1078 */ 1079 cap = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 1080 DDI_PROP_DONTPASS, "fm-capable", 1081 DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE); 1082 cap &= (DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE); 1083 1084 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_NH); 1085 1086 if (cmd == DDI_ATTACH) { 1087 ddi_fm_init(dip, &cap, &ibc); 1088 pci_ereport_setup(dip); 1089 } 1090 1091 if (cap & DDI_FM_ERRCB_CAPABLE) 1092 need_cb_register = B_TRUE; 1093 1094 fmhdl = DEVI(dip)->devi_fmhdl; 1095 } 1096 1097 /* If ddi_fm_init fails for any reason RETURN */ 1098 if (!fmhdl) { 1099 (void) atomic_swap_uint(&bus_p->bus_fm_flags, 0); 1100 return; 1101 } 1102 1103 fmhdl->fh_cap |= DDI_FM_ERRCB_CAPABLE; 1104 if (cmd == DDI_ATTACH) { 1105 if (need_cb_register) 1106 ddi_fm_handler_register(dip, pf_dummy_cb, NULL); 1107 } 1108 1109 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_READY); 1110 } 1111 1112 /* undo FMA lock, called at predetach */ 1113 void 1114 pf_fini(dev_info_t *dip, ddi_detach_cmd_t cmd) 1115 { 1116 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 1117 1118 if (!bus_p) 1119 return; 1120 1121 /* Don't fini anything if device isn't FM Ready */ 1122 if (!(bus_p->bus_fm_flags & PF_FM_READY)) 1123 return; 1124 1125 /* no other code should set the flag to false */ 1126 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_READY); 1127 1128 /* 1129 * Grab the mutex to make sure device isn't in the middle of 1130 * error handling. Setting the bus_fm_flag to ~PF_FM_READY 1131 * should prevent this device from being error handled after 1132 * the mutex has been released. 1133 */ 1134 (void) pf_handler_enter(dip, NULL); 1135 pf_handler_exit(dip); 1136 1137 /* undo non-hardened drivers */ 1138 if (bus_p->bus_fm_flags & PF_FM_IS_NH) { 1139 if (cmd == DDI_DETACH) { 1140 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_NH); 1141 pci_ereport_teardown(dip); 1142 /* 1143 * ddi_fini itself calls ddi_handler_unregister, 1144 * so no need to explicitly call unregister. 1145 */ 1146 ddi_fm_fini(dip); 1147 } 1148 } 1149 } 1150 1151 /*ARGSUSED*/ 1152 static int 1153 pf_dummy_cb(dev_info_t *dip, ddi_fm_error_t *derr, const void *not_used) 1154 { 1155 return (DDI_FM_OK); 1156 } 1157 1158 /* 1159 * Add PFD to queue. If it is an RC add it to the beginning, 1160 * otherwise add it to the end. 1161 */ 1162 static void 1163 pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl) 1164 { 1165 pf_data_t *head_p = impl->pf_dq_head_p; 1166 pf_data_t *tail_p = impl->pf_dq_tail_p; 1167 1168 impl->pf_total++; 1169 1170 if (!head_p) { 1171 ASSERT(PFD_IS_ROOT(pfd_p)); 1172 impl->pf_dq_head_p = pfd_p; 1173 impl->pf_dq_tail_p = pfd_p; 1174 pfd_p->pe_prev = NULL; 1175 pfd_p->pe_next = NULL; 1176 return; 1177 } 1178 1179 /* Check if this is a Root Port eprt */ 1180 if (PFD_IS_ROOT(pfd_p)) { 1181 pf_data_t *root_p, *last_p = NULL; 1182 1183 /* The first item must be a RP */ 1184 root_p = head_p; 1185 for (last_p = head_p; last_p && PFD_IS_ROOT(last_p); 1186 last_p = last_p->pe_next) 1187 root_p = last_p; 1188 1189 /* root_p is the last RP pfd. last_p is the first non-RP pfd. */ 1190 root_p->pe_next = pfd_p; 1191 pfd_p->pe_prev = root_p; 1192 pfd_p->pe_next = last_p; 1193 1194 if (last_p) 1195 last_p->pe_prev = pfd_p; 1196 else 1197 tail_p = pfd_p; 1198 } else { 1199 tail_p->pe_next = pfd_p; 1200 pfd_p->pe_prev = tail_p; 1201 pfd_p->pe_next = NULL; 1202 tail_p = pfd_p; 1203 } 1204 1205 impl->pf_dq_head_p = head_p; 1206 impl->pf_dq_tail_p = tail_p; 1207 } 1208 1209 /* 1210 * Ignore: 1211 * - TRAINING: as leaves do not have children 1212 * - SD: as leaves do not have children 1213 */ 1214 const pf_fab_err_tbl_t pcie_pcie_tbl[] = { 1215 {PCIE_AER_UCE_DLP, pf_panic, 1216 PF_AFFECTED_PARENT, 0}, 1217 1218 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1219 PF_AFFECTED_SELF, 0}, 1220 1221 {PCIE_AER_UCE_FCP, pf_panic, 1222 PF_AFFECTED_PARENT, 0}, 1223 1224 {PCIE_AER_UCE_TO, pf_analyse_to, 1225 PF_AFFECTED_SELF, 0}, 1226 1227 {PCIE_AER_UCE_CA, pf_analyse_ca_ur, 1228 PF_AFFECTED_SELF, 0}, 1229 1230 {PCIE_AER_UCE_UC, pf_analyse_uc, 1231 0, 0}, 1232 1233 {PCIE_AER_UCE_RO, pf_panic, 1234 PF_AFFECTED_PARENT, 0}, 1235 1236 {PCIE_AER_UCE_MTLP, pf_panic, 1237 PF_AFFECTED_PARENT, 0}, 1238 1239 {PCIE_AER_UCE_ECRC, pf_no_panic, 1240 PF_AFFECTED_SELF, 0}, 1241 1242 {PCIE_AER_UCE_UR, pf_analyse_ca_ur, 1243 PF_AFFECTED_SELF, 0}, 1244 1245 {0, NULL, 0, 0} 1246 }; 1247 1248 const pf_fab_err_tbl_t pcie_rp_tbl[] = { 1249 {PCIE_AER_UCE_TRAINING, pf_no_panic, 1250 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1251 1252 {PCIE_AER_UCE_DLP, pf_panic, 1253 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1254 1255 {PCIE_AER_UCE_SD, pf_no_panic, 1256 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1257 1258 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1259 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN}, 1260 1261 {PCIE_AER_UCE_FCP, pf_panic, 1262 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1263 1264 {PCIE_AER_UCE_TO, pf_analyse_to, 1265 PF_AFFECTED_ADDR, PF_AFFECTED_CHILDREN}, 1266 1267 {PCIE_AER_UCE_CA, pf_no_panic, 1268 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN}, 1269 1270 {PCIE_AER_UCE_UC, pf_analyse_uc, 1271 0, 0}, 1272 1273 {PCIE_AER_UCE_RO, pf_panic, 1274 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1275 1276 {PCIE_AER_UCE_MTLP, pf_panic, 1277 PF_AFFECTED_SELF | PF_AFFECTED_AER, 1278 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1279 1280 {PCIE_AER_UCE_ECRC, pf_no_panic, 1281 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN}, 1282 1283 {PCIE_AER_UCE_UR, pf_no_panic, 1284 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN}, 1285 1286 {0, NULL, 0, 0} 1287 }; 1288 1289 const pf_fab_err_tbl_t pcie_sw_tbl[] = { 1290 {PCIE_AER_UCE_TRAINING, pf_no_panic, 1291 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1292 1293 {PCIE_AER_UCE_DLP, pf_panic, 1294 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1295 1296 {PCIE_AER_UCE_SD, pf_no_panic, 1297 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1298 1299 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1300 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1301 1302 {PCIE_AER_UCE_FCP, pf_panic, 1303 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1304 1305 {PCIE_AER_UCE_TO, pf_analyse_to, 1306 PF_AFFECTED_CHILDREN, 0}, 1307 1308 {PCIE_AER_UCE_CA, pf_analyse_ca_ur, 1309 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1310 1311 {PCIE_AER_UCE_UC, pf_analyse_uc, 1312 0, 0}, 1313 1314 {PCIE_AER_UCE_RO, pf_panic, 1315 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1316 1317 {PCIE_AER_UCE_MTLP, pf_panic, 1318 PF_AFFECTED_SELF | PF_AFFECTED_AER, 1319 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1320 1321 {PCIE_AER_UCE_ECRC, pf_no_panic, 1322 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1323 1324 {PCIE_AER_UCE_UR, pf_analyse_ca_ur, 1325 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1326 1327 {0, NULL, 0, 0} 1328 }; 1329 1330 const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = { 1331 {PCIE_AER_SUCE_TA_ON_SC, pf_analyse_sc, 1332 0, 0}, 1333 1334 {PCIE_AER_SUCE_MA_ON_SC, pf_analyse_sc, 1335 0, 0}, 1336 1337 {PCIE_AER_SUCE_RCVD_TA, pf_analyse_ma_ta, 1338 0, 0}, 1339 1340 {PCIE_AER_SUCE_RCVD_MA, pf_analyse_ma_ta, 1341 0, 0}, 1342 1343 {PCIE_AER_SUCE_USC_ERR, pf_panic, 1344 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN}, 1345 1346 {PCIE_AER_SUCE_USC_MSG_DATA_ERR, pf_analyse_ma_ta, 1347 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN}, 1348 1349 {PCIE_AER_SUCE_UC_DATA_ERR, pf_analyse_uc_data, 1350 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN}, 1351 1352 {PCIE_AER_SUCE_UC_ATTR_ERR, pf_panic, 1353 PF_AFFECTED_CHILDREN, 0}, 1354 1355 {PCIE_AER_SUCE_UC_ADDR_ERR, pf_panic, 1356 PF_AFFECTED_CHILDREN, 0}, 1357 1358 {PCIE_AER_SUCE_TIMER_EXPIRED, pf_panic, 1359 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1360 1361 {PCIE_AER_SUCE_PERR_ASSERT, pf_analyse_perr_assert, 1362 0, 0}, 1363 1364 {PCIE_AER_SUCE_SERR_ASSERT, pf_no_panic, 1365 0, 0}, 1366 1367 {PCIE_AER_SUCE_INTERNAL_ERR, pf_panic, 1368 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1369 1370 {0, NULL, 0, 0} 1371 }; 1372 1373 const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = { 1374 {PCI_STAT_PERROR, pf_analyse_pci, 1375 PF_AFFECTED_SELF, 0}, 1376 1377 {PCI_STAT_S_PERROR, pf_analyse_pci, 1378 PF_AFFECTED_SELF, 0}, 1379 1380 {PCI_STAT_S_SYSERR, pf_panic, 1381 PF_AFFECTED_SELF, 0}, 1382 1383 {PCI_STAT_R_MAST_AB, pf_analyse_pci, 1384 PF_AFFECTED_SELF, 0}, 1385 1386 {PCI_STAT_R_TARG_AB, pf_analyse_pci, 1387 PF_AFFECTED_SELF, 0}, 1388 1389 {PCI_STAT_S_TARG_AB, pf_analyse_pci, 1390 PF_AFFECTED_SELF, 0}, 1391 1392 {0, NULL, 0, 0} 1393 }; 1394 1395 const pf_fab_err_tbl_t pcie_pci_tbl[] = { 1396 {PCI_STAT_PERROR, pf_analyse_pci, 1397 PF_AFFECTED_SELF, 0}, 1398 1399 {PCI_STAT_S_PERROR, pf_analyse_pci, 1400 PF_AFFECTED_SELF, 0}, 1401 1402 {PCI_STAT_S_SYSERR, pf_panic, 1403 PF_AFFECTED_SELF, 0}, 1404 1405 {PCI_STAT_R_MAST_AB, pf_analyse_pci, 1406 PF_AFFECTED_SELF, 0}, 1407 1408 {PCI_STAT_R_TARG_AB, pf_analyse_pci, 1409 PF_AFFECTED_SELF, 0}, 1410 1411 {PCI_STAT_S_TARG_AB, pf_analyse_pci, 1412 PF_AFFECTED_SELF, 0}, 1413 1414 {0, NULL, 0, 0} 1415 }; 1416 1417 #define PF_MASKED_AER_ERR(pfd_p) \ 1418 (PCIE_ADV_REG(pfd_p)->pcie_ue_status & \ 1419 ((PCIE_ADV_REG(pfd_p)->pcie_ue_mask) ^ 0xFFFFFFFF)) 1420 #define PF_MASKED_SAER_ERR(pfd_p) \ 1421 (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status & \ 1422 ((PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask) ^ 0xFFFFFFFF)) 1423 /* 1424 * Analyse all the PCIe Fault Data (erpt) gathered during dispatch in the erpt 1425 * Queue. 1426 */ 1427 static int 1428 pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl) 1429 { 1430 int sts_flags, error_flags = 0; 1431 pf_data_t *pfd_p; 1432 1433 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 1434 sts_flags = 0; 1435 1436 /* skip analysing error when no error info is gathered */ 1437 if (pfd_p->pe_severity_flags == PF_ERR_BAD_RESPONSE) 1438 goto done; 1439 1440 switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) { 1441 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: 1442 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: 1443 if (PCIE_DEVSTS_CE_DETECTED & 1444 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1445 sts_flags |= PF_ERR_CE; 1446 1447 pf_adjust_for_no_aer(pfd_p); 1448 sts_flags |= pf_analyse_error_tbl(derr, impl, 1449 pfd_p, pcie_pcie_tbl, PF_MASKED_AER_ERR(pfd_p)); 1450 break; 1451 case PCIE_PCIECAP_DEV_TYPE_ROOT: 1452 pf_adjust_for_no_aer(pfd_p); 1453 sts_flags |= pf_analyse_error_tbl(derr, impl, 1454 pfd_p, pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p)); 1455 break; 1456 case PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO: 1457 /* no adjust_for_aer for pseudo RC */ 1458 /* keep the severity passed on from RC if any */ 1459 sts_flags |= pfd_p->pe_severity_flags; 1460 sts_flags |= pf_analyse_error_tbl(derr, impl, pfd_p, 1461 pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p)); 1462 break; 1463 case PCIE_PCIECAP_DEV_TYPE_UP: 1464 case PCIE_PCIECAP_DEV_TYPE_DOWN: 1465 if (PCIE_DEVSTS_CE_DETECTED & 1466 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1467 sts_flags |= PF_ERR_CE; 1468 1469 pf_adjust_for_no_aer(pfd_p); 1470 sts_flags |= pf_analyse_error_tbl(derr, impl, 1471 pfd_p, pcie_sw_tbl, PF_MASKED_AER_ERR(pfd_p)); 1472 break; 1473 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: 1474 if (PCIE_DEVSTS_CE_DETECTED & 1475 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1476 sts_flags |= PF_ERR_CE; 1477 1478 pf_adjust_for_no_aer(pfd_p); 1479 pf_adjust_for_no_saer(pfd_p); 1480 sts_flags |= pf_analyse_error_tbl(derr, 1481 impl, pfd_p, pcie_pcie_tbl, 1482 PF_MASKED_AER_ERR(pfd_p)); 1483 sts_flags |= pf_analyse_error_tbl(derr, 1484 impl, pfd_p, pcie_pcie_bdg_tbl, 1485 PF_MASKED_SAER_ERR(pfd_p)); 1486 /* 1487 * Some non-compliant PCIe devices do not utilize PCIe 1488 * error registers. So fallthrough and rely on legacy 1489 * PCI error registers. 1490 */ 1491 if ((PCIE_DEVSTS_NFE_DETECTED | PCIE_DEVSTS_FE_DETECTED) 1492 & PCIE_ERR_REG(pfd_p)->pcie_err_status) 1493 break; 1494 /* FALLTHROUGH */ 1495 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO: 1496 sts_flags |= pf_analyse_error_tbl(derr, impl, 1497 pfd_p, pcie_pci_tbl, 1498 PCI_ERR_REG(pfd_p)->pci_err_status); 1499 1500 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) 1501 break; 1502 1503 sts_flags |= pf_analyse_error_tbl(derr, 1504 impl, pfd_p, pcie_pci_bdg_tbl, 1505 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat); 1506 } 1507 1508 pfd_p->pe_severity_flags = sts_flags; 1509 1510 done: 1511 pfd_p->pe_orig_severity_flags = pfd_p->pe_severity_flags; 1512 /* Have pciev_eh adjust the severity */ 1513 pfd_p->pe_severity_flags = pciev_eh(pfd_p, impl); 1514 1515 pfd_p->pe_severity_flags &= ~pfd_p->pe_severity_mask; 1516 1517 error_flags |= pfd_p->pe_severity_flags; 1518 } 1519 1520 return (error_flags); 1521 } 1522 1523 static int 1524 pf_analyse_error_tbl(ddi_fm_error_t *derr, pf_impl_t *impl, 1525 pf_data_t *pfd_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg) 1526 { 1527 const pf_fab_err_tbl_t *row; 1528 int err = 0; 1529 uint16_t flags; 1530 uint32_t bit; 1531 1532 for (row = tbl; err_reg && (row->bit != 0); row++) { 1533 bit = row->bit; 1534 if (!(err_reg & bit)) 1535 continue; 1536 err |= row->handler(derr, bit, impl->pf_dq_head_p, pfd_p); 1537 1538 flags = row->affected_flags; 1539 /* 1540 * check if the primary flag is valid; 1541 * if not, use the secondary flag 1542 */ 1543 if (flags & PF_AFFECTED_AER) { 1544 if (!HAS_AER_LOGS(pfd_p, bit)) { 1545 flags = row->sec_affected_flags; 1546 } 1547 } else if (flags & PF_AFFECTED_SAER) { 1548 if (!HAS_SAER_LOGS(pfd_p, bit)) { 1549 flags = row->sec_affected_flags; 1550 } 1551 } else if (flags & PF_AFFECTED_ADDR) { 1552 /* only Root has this flag */ 1553 if (PCIE_ROOT_FAULT(pfd_p)->scan_addr == 0) { 1554 flags = row->sec_affected_flags; 1555 } 1556 } 1557 1558 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags |= flags; 1559 } 1560 1561 if (!err) 1562 err = PF_ERR_NO_ERROR; 1563 1564 return (err); 1565 } 1566 1567 /* 1568 * PCIe Completer Abort and Unsupport Request error analyser. If a PCIe device 1569 * issues a CA/UR a corresponding Received CA/UR should have been seen in the 1570 * PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so then 1571 * this error may be safely ignored. If not check the logs and see if an 1572 * associated handler for this transaction can be found. 1573 */ 1574 /* ARGSUSED */ 1575 static int 1576 pf_analyse_ca_ur(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1577 pf_data_t *pfd_p) 1578 { 1579 uint32_t abort_type; 1580 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1581 1582 /* If UR's are masked forgive this error */ 1583 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1584 (bit == PCIE_AER_UCE_UR)) 1585 return (PF_ERR_NO_PANIC); 1586 1587 /* 1588 * If a RP has an CA/UR it means a leaf sent a bad request to the RP 1589 * such as a config read or a bad DMA address. 1590 */ 1591 if (PCIE_IS_RP(PCIE_PFD2BUS(pfd_p))) 1592 goto handle_lookup; 1593 1594 if (bit == PCIE_AER_UCE_UR) 1595 abort_type = PCI_STAT_R_MAST_AB; 1596 else 1597 abort_type = PCI_STAT_R_TARG_AB; 1598 1599 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type)) 1600 return (PF_ERR_MATCHED_RC); 1601 1602 handle_lookup: 1603 if (HAS_AER_LOGS(pfd_p, bit) && 1604 pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == PF_HDL_FOUND) 1605 return (PF_ERR_MATCHED_DEVICE); 1606 1607 return (PF_ERR_PANIC); 1608 } 1609 1610 /* 1611 * PCIe-PCI Bridge Received Master Abort and Target error analyser. If a PCIe 1612 * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in 1613 * the PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so 1614 * then this error may be safely ignored. If not check the logs and see if an 1615 * associated handler for this transaction can be found. 1616 */ 1617 /* ARGSUSED */ 1618 static int 1619 pf_analyse_ma_ta(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1620 pf_data_t *pfd_p) 1621 { 1622 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1623 uint32_t abort_type; 1624 1625 /* If UR's are masked forgive this error */ 1626 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1627 (bit == PCIE_AER_SUCE_RCVD_MA)) 1628 return (PF_ERR_NO_PANIC); 1629 1630 if (bit == PCIE_AER_SUCE_RCVD_MA) 1631 abort_type = PCI_STAT_R_MAST_AB; 1632 else 1633 abort_type = PCI_STAT_R_TARG_AB; 1634 1635 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type)) 1636 return (PF_ERR_MATCHED_RC); 1637 1638 if (!HAS_SAER_LOGS(pfd_p, bit)) 1639 return (PF_ERR_PANIC); 1640 1641 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND) 1642 return (PF_ERR_MATCHED_DEVICE); 1643 1644 return (PF_ERR_PANIC); 1645 } 1646 1647 /* 1648 * Generic PCI error analyser. This function is used for Parity Errors, 1649 * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts. 1650 * In general PCI devices do not have error logs, it is very difficult to figure 1651 * out what transaction caused the error. Instead find the nearest PCIe-PCI 1652 * Bridge and check to see if it has logs and if it has an error associated with 1653 * this PCI Device. 1654 */ 1655 /* ARGSUSED */ 1656 static int 1657 pf_analyse_pci(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1658 pf_data_t *pfd_p) 1659 { 1660 pf_data_t *parent_pfd_p; 1661 uint16_t cmd; 1662 uint32_t aer_ue_status; 1663 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 1664 pf_pcie_adv_bdg_err_regs_t *parent_saer_p; 1665 1666 if (PCI_ERR_REG(pfd_p)->pci_err_status & PCI_STAT_S_SYSERR) 1667 return (PF_ERR_PANIC); 1668 1669 /* If UR's are masked forgive this error */ 1670 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1671 (bit == PCI_STAT_R_MAST_AB)) 1672 return (PF_ERR_NO_PANIC); 1673 1674 1675 if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) { 1676 aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT; 1677 } else { 1678 aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC | 1679 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | 1680 PCIE_AER_SUCE_RCVD_MA); 1681 } 1682 1683 parent_pfd_p = pf_get_parent_pcie_bridge(pfd_p); 1684 if (parent_pfd_p == NULL) 1685 return (PF_ERR_PANIC); 1686 1687 /* Check if parent bridge has seen this error */ 1688 parent_saer_p = PCIE_ADV_BDG_REG(parent_pfd_p); 1689 if (!(parent_saer_p->pcie_sue_status & aer_ue_status) || 1690 !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status)) 1691 return (PF_ERR_PANIC); 1692 1693 /* 1694 * If the addr or bdf from the parent PCIe bridge logs belong to this 1695 * PCI device, assume the PCIe bridge's error handling has already taken 1696 * care of this PCI device's error. 1697 */ 1698 if (pf_pci_decode(parent_pfd_p, &cmd) != DDI_SUCCESS) 1699 return (PF_ERR_PANIC); 1700 1701 if ((parent_saer_p->pcie_sue_tgt_bdf == bus_p->bus_bdf) || 1702 pf_in_addr_range(bus_p, parent_saer_p->pcie_sue_tgt_addr)) 1703 return (PF_ERR_MATCHED_PARENT); 1704 1705 /* 1706 * If this device is a PCI-PCI bridge, check if the bdf in the parent 1707 * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges. 1708 * If they are, then assume the PCIe bridge's error handling has already 1709 * taken care of this PCI-PCI bridge device's error. 1710 */ 1711 if (PCIE_IS_BDG(bus_p) && 1712 pf_in_bus_range(bus_p, parent_saer_p->pcie_sue_tgt_bdf)) 1713 return (PF_ERR_MATCHED_PARENT); 1714 1715 return (PF_ERR_PANIC); 1716 } 1717 1718 /* 1719 * PCIe Bridge transactions associated with PERR. 1720 * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe 1721 * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe 1722 * o Bridge received a poisoned Completion on a Split Transction from PCIe 1723 * o Bridge received a poisoned Completion on a Delayed Transction from PCIe 1724 * 1725 * Check for non-poisoned PCIe transactions that got forwarded to the secondary 1726 * side and detects a PERR#. Except for delayed read completions, a poisoned 1727 * TLP will be forwarded to the secondary bus and PERR# will be asserted. 1728 */ 1729 /* ARGSUSED */ 1730 static int 1731 pf_analyse_perr_assert(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1732 pf_data_t *pfd_p) 1733 { 1734 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1735 uint16_t cmd; 1736 int hdl_sts = PF_HDL_NOTFOUND; 1737 int err = PF_ERR_NO_ERROR; 1738 pf_pcie_adv_bdg_err_regs_t *saer_p; 1739 1740 1741 if (HAS_SAER_LOGS(pfd_p, bit)) { 1742 saer_p = PCIE_ADV_BDG_REG(pfd_p); 1743 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS) 1744 return (PF_ERR_PANIC); 1745 1746 cmd_switch: 1747 switch (cmd) { 1748 case PCI_PCIX_CMD_IOWR: 1749 case PCI_PCIX_CMD_MEMWR: 1750 case PCI_PCIX_CMD_MEMWR_BL: 1751 case PCI_PCIX_CMD_MEMWRBL: 1752 /* Posted Writes Transactions */ 1753 if (saer_p->pcie_sue_tgt_trans == PF_ADDR_PIO) 1754 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1755 B_FALSE); 1756 break; 1757 case PCI_PCIX_CMD_CFWR: 1758 /* 1759 * Check to see if it is a non-posted write. If so, a 1760 * UR Completion would have been sent. 1761 */ 1762 if (pf_matched_in_rc(dq_head_p, pfd_p, 1763 PCI_STAT_R_MAST_AB)) { 1764 hdl_sts = PF_HDL_FOUND; 1765 err = PF_ERR_MATCHED_RC; 1766 goto done; 1767 } 1768 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1769 B_FALSE); 1770 break; 1771 case PCI_PCIX_CMD_SPL: 1772 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1773 B_FALSE); 1774 break; 1775 case PCI_PCIX_CMD_DADR: 1776 cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >> 1777 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) & 1778 PCIE_AER_SUCE_HDR_CMD_UP_MASK; 1779 if (cmd != PCI_PCIX_CMD_DADR) 1780 goto cmd_switch; 1781 /* FALLTHROUGH */ 1782 default: 1783 /* Unexpected situation, panic */ 1784 hdl_sts = PF_HDL_NOTFOUND; 1785 } 1786 1787 if (hdl_sts == PF_HDL_FOUND) 1788 err = PF_ERR_MATCHED_DEVICE; 1789 else 1790 err = PF_ERR_PANIC; 1791 } else { 1792 /* 1793 * Check to see if it is a non-posted write. If so, a UR 1794 * Completion would have been sent. 1795 */ 1796 if ((PCIE_ERR_REG(pfd_p)->pcie_err_status & 1797 PCIE_DEVSTS_UR_DETECTED) && 1798 pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_R_MAST_AB)) 1799 err = PF_ERR_MATCHED_RC; 1800 1801 /* Check for posted writes. Transaction is lost. */ 1802 if (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & 1803 PCI_STAT_S_PERROR) 1804 err = PF_ERR_PANIC; 1805 1806 /* 1807 * All other scenarios are due to read completions. Check for 1808 * PERR on the primary side. If found the primary side error 1809 * handling will take care of this error. 1810 */ 1811 if (err == PF_ERR_NO_ERROR) { 1812 if (PCI_ERR_REG(pfd_p)->pci_err_status & 1813 PCI_STAT_PERROR) 1814 err = PF_ERR_MATCHED_PARENT; 1815 else 1816 err = PF_ERR_PANIC; 1817 } 1818 } 1819 1820 done: 1821 return (err); 1822 } 1823 1824 /* 1825 * PCIe Poisoned TLP error analyser. If a PCIe device receives a Poisoned TLP, 1826 * check the logs and see if an associated handler for this transaction can be 1827 * found. 1828 */ 1829 /* ARGSUSED */ 1830 static int 1831 pf_analyse_ptlp(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1832 pf_data_t *pfd_p) 1833 { 1834 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1835 1836 /* 1837 * If AERs are supported find the logs in this device, otherwise look in 1838 * it's parent's logs. 1839 */ 1840 if (HAS_AER_LOGS(pfd_p, bit)) { 1841 pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&PCIE_ADV_HDR(pfd_p, 0); 1842 1843 /* 1844 * Double check that the log contains a poisoned TLP. 1845 * Some devices like PLX switch do not log poison TLP headers. 1846 */ 1847 if (hdr->ep) { 1848 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == 1849 PF_HDL_FOUND) 1850 return (PF_ERR_MATCHED_DEVICE); 1851 } 1852 1853 /* 1854 * If an address is found and hdl lookup failed panic. 1855 * Otherwise check parents to see if there was enough 1856 * information recover. 1857 */ 1858 if (PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr) 1859 return (PF_ERR_PANIC); 1860 } 1861 1862 /* 1863 * Check to see if the rc has already handled this error or a parent has 1864 * already handled this error. 1865 * 1866 * If the error info in the RC wasn't enough to find the fault device, 1867 * such as if the faulting device lies behind a PCIe-PCI bridge from a 1868 * poisoned completion, check to see if the PCIe-PCI bridge has enough 1869 * info to recover. For completion TLP's, the AER header logs only 1870 * contain the faulting BDF in the Root Port. For PCIe device the fault 1871 * BDF is the fault device. But if the fault device is behind a 1872 * PCIe-PCI bridge the fault BDF could turn out just to be a PCIe-PCI 1873 * bridge's secondary bus number. 1874 */ 1875 if (!PFD_IS_ROOT(pfd_p)) { 1876 dev_info_t *pdip = ddi_get_parent(PCIE_PFD2DIP(pfd_p)); 1877 pf_data_t *parent_pfd_p; 1878 1879 if (PCIE_PFD2BUS(pfd_p)->bus_rp_dip == pdip) { 1880 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR)) 1881 return (PF_ERR_MATCHED_RC); 1882 } 1883 1884 parent_pfd_p = PCIE_DIP2PFD(pdip); 1885 1886 if (HAS_AER_LOGS(parent_pfd_p, bit)) 1887 return (PF_ERR_MATCHED_PARENT); 1888 } else { 1889 pf_data_t *bdg_pfd_p; 1890 pcie_req_id_t secbus; 1891 1892 /* 1893 * Looking for a pcie bridge only makes sense if the BDF 1894 * Dev/Func = 0/0 1895 */ 1896 if (!PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 1897 goto done; 1898 1899 secbus = PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf; 1900 1901 if (!PCIE_CHECK_VALID_BDF(secbus) || (secbus & 0xFF)) 1902 goto done; 1903 1904 bdg_pfd_p = pf_get_pcie_bridge(pfd_p, secbus); 1905 1906 if (bdg_pfd_p && HAS_SAER_LOGS(bdg_pfd_p, 1907 PCIE_AER_SUCE_PERR_ASSERT)) { 1908 return pf_analyse_perr_assert(derr, 1909 PCIE_AER_SUCE_PERR_ASSERT, dq_head_p, pfd_p); 1910 } 1911 } 1912 done: 1913 return (PF_ERR_PANIC); 1914 } 1915 1916 /* 1917 * PCIe-PCI Bridge Received Master and Target abort error analyser on Split 1918 * Completions. If a PCIe Bridge receives a MA/TA check logs and see if an 1919 * associated handler for this transaction can be found. 1920 */ 1921 /* ARGSUSED */ 1922 static int 1923 pf_analyse_sc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1924 pf_data_t *pfd_p) 1925 { 1926 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1927 uint16_t cmd; 1928 int sts = PF_HDL_NOTFOUND; 1929 1930 if (!HAS_SAER_LOGS(pfd_p, bit)) 1931 return (PF_ERR_PANIC); 1932 1933 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS) 1934 return (PF_ERR_PANIC); 1935 1936 if (cmd == PCI_PCIX_CMD_SPL) 1937 sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE); 1938 1939 if (sts == PF_HDL_FOUND) 1940 return (PF_ERR_MATCHED_DEVICE); 1941 1942 return (PF_ERR_PANIC); 1943 } 1944 1945 /* 1946 * PCIe Timeout error analyser. This error can be forgiven if it is marked as 1947 * CE Advisory. If it is marked as advisory, this means the HW can recover 1948 * and/or retry the transaction automatically. Additionally, if a device's 1949 * parent slot reports that it is no longer physically present, we do not panic, 1950 * as one would not expect a missing device to respond to a command. 1951 */ 1952 /* ARGSUSED */ 1953 static int 1954 pf_analyse_to(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1955 pf_data_t *pfd_p) 1956 { 1957 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1958 pf_data_t *rppfd = PCIE_DIP2PFD(rpdip); 1959 pf_pcie_slot_regs_t *p_pcie_slot_regs; 1960 1961 if (HAS_AER_LOGS(pfd_p, bit) && CE_ADVISORY(pfd_p)) 1962 return (PF_ERR_NO_PANIC); 1963 1964 p_pcie_slot_regs = PCIE_SLOT_REG(rppfd); 1965 if (p_pcie_slot_regs->pcie_slot_regs_valid) { 1966 /* 1967 * If the device is reported gone from its parent slot, then it 1968 * is expected that any outstanding commands would time out. In 1969 * this case, do not panic. 1970 */ 1971 if ((p_pcie_slot_regs->pcie_slot_status & 1972 PCIE_SLOTSTS_PRESENCE_DETECTED) == 0x0) { 1973 return (PF_ERR_NO_PANIC); 1974 } 1975 } 1976 1977 return (PF_ERR_PANIC); 1978 } 1979 1980 /* 1981 * PCIe Unexpected Completion. Check to see if this TLP was misrouted by 1982 * matching the device BDF with the TLP Log. If misrouting panic, otherwise 1983 * don't panic. 1984 */ 1985 /* ARGSUSED */ 1986 static int 1987 pf_analyse_uc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1988 pf_data_t *pfd_p) 1989 { 1990 if (HAS_AER_LOGS(pfd_p, bit) && 1991 (PCIE_PFD2BUS(pfd_p)->bus_bdf == (PCIE_ADV_HDR(pfd_p, 2) >> 16))) 1992 return (PF_ERR_NO_PANIC); 1993 1994 /* 1995 * This is a case of mis-routing. Any of the switches above this 1996 * device could be at fault. 1997 */ 1998 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_ROOT; 1999 2000 return (PF_ERR_PANIC); 2001 } 2002 2003 /* 2004 * PCIe-PCI Bridge Uncorrectable Data error analyser. All Uncorrectable Data 2005 * errors should have resulted in a PCIe Poisoned TLP to the RC, except for 2006 * Posted Writes. Check the logs for Posted Writes and if the RC did not see a 2007 * Poisoned TLP. 2008 * 2009 * Non-Posted Writes will also generate a UR in the completion status, which the 2010 * RC should also see. 2011 */ 2012 /* ARGSUSED */ 2013 static int 2014 pf_analyse_uc_data(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 2015 pf_data_t *pfd_p) 2016 { 2017 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 2018 2019 if (!HAS_SAER_LOGS(pfd_p, bit)) 2020 return (PF_ERR_PANIC); 2021 2022 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR)) 2023 return (PF_ERR_MATCHED_RC); 2024 2025 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND) 2026 return (PF_ERR_MATCHED_DEVICE); 2027 2028 return (PF_ERR_PANIC); 2029 } 2030 2031 /* ARGSUSED */ 2032 static int 2033 pf_no_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 2034 pf_data_t *pfd_p) 2035 { 2036 return (PF_ERR_NO_PANIC); 2037 } 2038 2039 /* ARGSUSED */ 2040 static int 2041 pf_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 2042 pf_data_t *pfd_p) 2043 { 2044 return (PF_ERR_PANIC); 2045 } 2046 2047 /* 2048 * If a PCIe device does not support AER, assume all AER statuses have been set, 2049 * unless other registers do not indicate a certain error occuring. 2050 */ 2051 static void 2052 pf_adjust_for_no_aer(pf_data_t *pfd_p) 2053 { 2054 uint32_t aer_ue = 0; 2055 uint16_t status; 2056 2057 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 2058 return; 2059 2060 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED) 2061 aer_ue = PF_AER_FATAL_ERR; 2062 2063 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) { 2064 aer_ue = PF_AER_NON_FATAL_ERR; 2065 status = PCI_ERR_REG(pfd_p)->pci_err_status; 2066 2067 /* Check if the device received a PTLP */ 2068 if (!(status & PCI_STAT_PERROR)) 2069 aer_ue &= ~PCIE_AER_UCE_PTLP; 2070 2071 /* Check if the device signaled a CA */ 2072 if (!(status & PCI_STAT_S_TARG_AB)) 2073 aer_ue &= ~PCIE_AER_UCE_CA; 2074 2075 /* Check if the device sent a UR */ 2076 if (!(PCIE_ERR_REG(pfd_p)->pcie_err_status & 2077 PCIE_DEVSTS_UR_DETECTED)) 2078 aer_ue &= ~PCIE_AER_UCE_UR; 2079 2080 /* 2081 * Ignore ECRCs as it is optional and will manefest itself as 2082 * another error like PTLP and MFP 2083 */ 2084 aer_ue &= ~PCIE_AER_UCE_ECRC; 2085 2086 /* 2087 * Generally if NFE is set, SERR should also be set. Exception: 2088 * When certain non-fatal errors are masked, and some of them 2089 * happened to be the cause of the NFE, SERR will not be set and 2090 * they can not be the source of this interrupt. 2091 * 2092 * On x86, URs are masked (NFE + UR can be set), if any other 2093 * non-fatal errors (i.e, PTLP, CTO, CA, UC, ECRC, ACS) did 2094 * occur, SERR should be set since they are not masked. So if 2095 * SERR is not set, none of them occurred. 2096 */ 2097 if (!(status & PCI_STAT_S_SYSERR)) 2098 aer_ue &= ~PCIE_AER_UCE_TO; 2099 } 2100 2101 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) { 2102 aer_ue &= ~PCIE_AER_UCE_TRAINING; 2103 aer_ue &= ~PCIE_AER_UCE_SD; 2104 } 2105 2106 PCIE_ADV_REG(pfd_p)->pcie_ue_status = aer_ue; 2107 } 2108 2109 static void 2110 pf_adjust_for_no_saer(pf_data_t *pfd_p) 2111 { 2112 uint32_t s_aer_ue = 0; 2113 uint16_t status; 2114 2115 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 2116 return; 2117 2118 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED) 2119 s_aer_ue = PF_SAER_FATAL_ERR; 2120 2121 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) { 2122 s_aer_ue = PF_SAER_NON_FATAL_ERR; 2123 status = PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat; 2124 2125 /* Check if the device received a UC_DATA */ 2126 if (!(status & PCI_STAT_PERROR)) 2127 s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR; 2128 2129 /* Check if the device received a RCVD_MA/MA_ON_SC */ 2130 if (!(status & (PCI_STAT_R_MAST_AB))) { 2131 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA; 2132 s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC; 2133 } 2134 2135 /* Check if the device received a RCVD_TA/TA_ON_SC */ 2136 if (!(status & (PCI_STAT_R_TARG_AB))) { 2137 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA; 2138 s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC; 2139 } 2140 } 2141 2142 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status = s_aer_ue; 2143 } 2144 2145 /* Find the PCIe-PCI bridge based on secondary bus number */ 2146 static pf_data_t * 2147 pf_get_pcie_bridge(pf_data_t *pfd_p, pcie_req_id_t secbus) 2148 { 2149 pf_data_t *bdg_pfd_p; 2150 2151 /* Search down for the PCIe-PCI device. */ 2152 for (bdg_pfd_p = pfd_p->pe_next; bdg_pfd_p; 2153 bdg_pfd_p = bdg_pfd_p->pe_next) { 2154 if (PCIE_IS_PCIE_BDG(PCIE_PFD2BUS(bdg_pfd_p)) && 2155 PCIE_PFD2BUS(bdg_pfd_p)->bus_bdg_secbus == secbus) 2156 return (bdg_pfd_p); 2157 } 2158 2159 return (NULL); 2160 } 2161 2162 /* Find the PCIe-PCI bridge of a PCI device */ 2163 static pf_data_t * 2164 pf_get_parent_pcie_bridge(pf_data_t *pfd_p) 2165 { 2166 dev_info_t *dip, *rp_dip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 2167 2168 /* This only makes sense if the device is a PCI device */ 2169 if (!PCIE_IS_PCI(PCIE_PFD2BUS(pfd_p))) 2170 return (NULL); 2171 2172 /* 2173 * Search up for the PCIe-PCI device. Watchout for x86 where pci 2174 * devices hang directly off of NPE. 2175 */ 2176 for (dip = PCIE_PFD2DIP(pfd_p); dip; dip = ddi_get_parent(dip)) { 2177 if (dip == rp_dip) 2178 dip = NULL; 2179 2180 if (PCIE_IS_PCIE_BDG(PCIE_DIP2BUS(dip))) 2181 return (PCIE_DIP2PFD(dip)); 2182 } 2183 2184 return (NULL); 2185 } 2186 2187 /* 2188 * See if a leaf error was bubbled up to the Root Complex (RC) and handled. 2189 * As of right now only RC's have enough information to have errors found in the 2190 * fabric to be matched to the RC. Note that Root Port's (RP) do not carry 2191 * enough information. Currently known RC's are SPARC Fire architecture and 2192 * it's equivalents, and x86's NPE. 2193 * SPARC Fire architectures have a plethora of error registers, while currently 2194 * NPE only have the address of a failed load. 2195 * 2196 * Check if the RC logged an error with the appropriate status type/abort type. 2197 * Ex: Parity Error, Received Master/Target Abort 2198 * Check if either the fault address found in the rc matches the device's 2199 * assigned address range (PIO's only) or the fault BDF in the rc matches the 2200 * device's BDF or Secondary Bus/Bus Range. 2201 */ 2202 static boolean_t 2203 pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p, 2204 uint32_t abort_type) 2205 { 2206 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 2207 pf_data_t *rc_pfd_p; 2208 pcie_req_id_t fault_bdf; 2209 2210 for (rc_pfd_p = dq_head_p; PFD_IS_ROOT(rc_pfd_p); 2211 rc_pfd_p = rc_pfd_p->pe_next) { 2212 /* Only root complex's have enough information to match */ 2213 if (!PCIE_IS_RC(PCIE_PFD2BUS(rc_pfd_p))) 2214 continue; 2215 2216 /* If device and rc abort type does not match continue */ 2217 if (!(PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & abort_type)) 2218 continue; 2219 2220 fault_bdf = PCIE_ROOT_FAULT(rc_pfd_p)->scan_bdf; 2221 2222 /* The Fault BDF = Device's BDF */ 2223 if (fault_bdf == bus_p->bus_bdf) 2224 return (B_TRUE); 2225 2226 /* The Fault Addr is in device's address range */ 2227 if (pf_in_addr_range(bus_p, 2228 PCIE_ROOT_FAULT(rc_pfd_p)->scan_addr)) 2229 return (B_TRUE); 2230 2231 /* The Fault BDF is from PCIe-PCI Bridge's secondary bus */ 2232 if (PCIE_IS_PCIE_BDG(bus_p) && 2233 pf_in_bus_range(bus_p, fault_bdf)) 2234 return (B_TRUE); 2235 } 2236 2237 return (B_FALSE); 2238 } 2239 2240 /* 2241 * Check the RP and see if the error is PIO/DMA. If the RP also has a PERR then 2242 * it is a DMA, otherwise it's a PIO 2243 */ 2244 static void 2245 pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type, 2246 pcie_req_id_t *bdf) 2247 { 2248 pf_data_t *rc_pfd_p; 2249 2250 /* Could be DMA or PIO. Find out by look at error type. */ 2251 switch (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status) { 2252 case PCIE_AER_SUCE_TA_ON_SC: 2253 case PCIE_AER_SUCE_MA_ON_SC: 2254 *trans_type = PF_ADDR_DMA; 2255 return; 2256 case PCIE_AER_SUCE_RCVD_TA: 2257 case PCIE_AER_SUCE_RCVD_MA: 2258 *bdf = PCIE_INVALID_BDF; 2259 *trans_type = PF_ADDR_PIO; 2260 return; 2261 case PCIE_AER_SUCE_USC_ERR: 2262 case PCIE_AER_SUCE_UC_DATA_ERR: 2263 case PCIE_AER_SUCE_PERR_ASSERT: 2264 break; 2265 default: 2266 *addr = 0; 2267 *bdf = PCIE_INVALID_BDF; 2268 *trans_type = 0; 2269 return; 2270 } 2271 2272 *bdf = PCIE_INVALID_BDF; 2273 *trans_type = PF_ADDR_PIO; 2274 for (rc_pfd_p = pfd_p->pe_prev; rc_pfd_p; 2275 rc_pfd_p = rc_pfd_p->pe_prev) { 2276 if (PFD_IS_ROOT(rc_pfd_p) && 2277 (PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & 2278 PCI_STAT_PERROR)) { 2279 *trans_type = PF_ADDR_DMA; 2280 return; 2281 } 2282 } 2283 } 2284 2285 /* 2286 * pf_pci_decode function decodes the secondary aer transaction logs in 2287 * PCIe-PCI bridges. 2288 * 2289 * The log is 128 bits long and arranged in this manner. 2290 * [0:35] Transaction Attribute (s_aer_h0-saer_h1) 2291 * [36:39] Transaction lower command (saer_h1) 2292 * [40:43] Transaction upper command (saer_h1) 2293 * [44:63] Reserved 2294 * [64:127] Address (saer_h2-saer_h3) 2295 */ 2296 /* ARGSUSED */ 2297 int 2298 pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd) 2299 { 2300 pcix_attr_t *attr; 2301 uint64_t addr; 2302 uint32_t trans_type; 2303 pcie_req_id_t bdf = PCIE_INVALID_BDF; 2304 2305 attr = (pcix_attr_t *)&PCIE_ADV_BDG_HDR(pfd_p, 0); 2306 *cmd = GET_SAER_CMD(pfd_p); 2307 2308 cmd_switch: 2309 switch (*cmd) { 2310 case PCI_PCIX_CMD_IORD: 2311 case PCI_PCIX_CMD_IOWR: 2312 /* IO Access should always be down stream */ 2313 addr = PCIE_ADV_BDG_HDR(pfd_p, 2); 2314 bdf = attr->rid; 2315 trans_type = PF_ADDR_PIO; 2316 break; 2317 case PCI_PCIX_CMD_MEMRD_DW: 2318 case PCI_PCIX_CMD_MEMRD_BL: 2319 case PCI_PCIX_CMD_MEMRDBL: 2320 case PCI_PCIX_CMD_MEMWR: 2321 case PCI_PCIX_CMD_MEMWR_BL: 2322 case PCI_PCIX_CMD_MEMWRBL: 2323 addr = ((uint64_t)PCIE_ADV_BDG_HDR(pfd_p, 3) << 2324 PCIE_AER_SUCE_HDR_ADDR_SHIFT) | PCIE_ADV_BDG_HDR(pfd_p, 2); 2325 bdf = attr->rid; 2326 2327 pf_pci_find_trans_type(pfd_p, &addr, &trans_type, &bdf); 2328 break; 2329 case PCI_PCIX_CMD_CFRD: 2330 case PCI_PCIX_CMD_CFWR: 2331 /* 2332 * CFG Access should always be down stream. Match the BDF in 2333 * the address phase. 2334 */ 2335 addr = 0; 2336 bdf = attr->rid; 2337 trans_type = PF_ADDR_CFG; 2338 break; 2339 case PCI_PCIX_CMD_SPL: 2340 /* 2341 * Check for DMA read completions. The requesting BDF is in the 2342 * Address phase. 2343 */ 2344 addr = 0; 2345 bdf = attr->rid; 2346 trans_type = PF_ADDR_DMA; 2347 break; 2348 case PCI_PCIX_CMD_DADR: 2349 /* 2350 * For Dual Address Cycles the transaction command is in the 2nd 2351 * address phase. 2352 */ 2353 *cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >> 2354 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) & 2355 PCIE_AER_SUCE_HDR_CMD_UP_MASK; 2356 if (*cmd != PCI_PCIX_CMD_DADR) 2357 goto cmd_switch; 2358 /* FALLTHROUGH */ 2359 default: 2360 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0; 2361 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = PCIE_INVALID_BDF; 2362 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0; 2363 return (DDI_FAILURE); 2364 } 2365 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = trans_type; 2366 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = bdf; 2367 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = addr; 2368 return (DDI_SUCCESS); 2369 } 2370 2371 /* 2372 * Based on either the BDF/ADDR find and mark the faulting DMA/ACC handler. 2373 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND. 2374 */ 2375 int 2376 pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint64_t addr, 2377 pcie_req_id_t bdf) 2378 { 2379 ddi_fm_error_t derr; 2380 2381 /* If we don't know the addr or rid just return with NOTFOUND */ 2382 if ((addr == 0) && !PCIE_CHECK_VALID_BDF(bdf)) 2383 return (PF_HDL_NOTFOUND); 2384 2385 /* 2386 * Disable DMA handle lookup until DMA errors can be handled and 2387 * reported synchronously. When enabled again, check for the 2388 * PF_ADDR_DMA flag 2389 */ 2390 if (!(flag & (PF_ADDR_PIO | PF_ADDR_CFG))) { 2391 return (PF_HDL_NOTFOUND); 2392 } 2393 2394 bzero(&derr, sizeof (ddi_fm_error_t)); 2395 derr.fme_version = DDI_FME_VERSION; 2396 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 2397 derr.fme_ena = ena; 2398 2399 return (pf_hdl_child_lookup(dip, &derr, flag, addr, bdf)); 2400 } 2401 2402 static int 2403 pf_hdl_child_lookup(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, 2404 uint64_t addr, pcie_req_id_t bdf) 2405 { 2406 int status = PF_HDL_NOTFOUND; 2407 ndi_fmc_t *fcp = NULL; 2408 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2409 pcie_req_id_t dip_bdf; 2410 boolean_t have_lock = B_FALSE; 2411 pcie_bus_t *bus_p; 2412 dev_info_t *cdip; 2413 2414 if (!(bus_p = pf_is_ready(dip))) { 2415 return (status); 2416 } 2417 2418 ASSERT(fmhdl); 2419 if (!i_ddi_fm_handler_owned(dip)) { 2420 /* 2421 * pf_handler_enter always returns SUCCESS if the 'impl' arg is 2422 * NULL. 2423 */ 2424 (void) pf_handler_enter(dip, NULL); 2425 have_lock = B_TRUE; 2426 } 2427 2428 dip_bdf = PCI_GET_BDF(dip); 2429 2430 /* Check if dip and BDF match, if not recurse to it's children. */ 2431 if (!PCIE_IS_RC(bus_p) && (!PCIE_CHECK_VALID_BDF(bdf) || 2432 dip_bdf == bdf)) { 2433 if ((flag & PF_ADDR_DMA) && DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap)) 2434 fcp = fmhdl->fh_dma_cache; 2435 else 2436 fcp = NULL; 2437 2438 if (fcp) 2439 status = pf_hdl_compare(dip, derr, DMA_HANDLE, addr, 2440 bdf, fcp); 2441 2442 2443 if (((flag & PF_ADDR_PIO) || (flag & PF_ADDR_CFG)) && 2444 DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap)) 2445 fcp = fmhdl->fh_acc_cache; 2446 else 2447 fcp = NULL; 2448 2449 if (fcp) 2450 status = pf_hdl_compare(dip, derr, ACC_HANDLE, addr, 2451 bdf, fcp); 2452 } 2453 2454 /* If we found the handler or know it's this device, we're done */ 2455 if (!PCIE_IS_RC(bus_p) && ((dip_bdf == bdf) || 2456 (status == PF_HDL_FOUND))) 2457 goto done; 2458 2459 /* 2460 * If the current devuce us a PCIe-PCI bridge need to check for special 2461 * cases: 2462 * 2463 * If it is a PIO and we don't have an address or this is a DMA, check 2464 * to see if the BDF = secondary bus. If so stop. The BDF isn't a real 2465 * BDF and the fault device could have come from any device in the PCI 2466 * bus. 2467 */ 2468 if (PCIE_IS_PCIE_BDG(bus_p) && 2469 ((flag & PF_ADDR_DMA || flag & PF_ADDR_PIO)) && 2470 ((bus_p->bus_bdg_secbus << PCIE_REQ_ID_BUS_SHIFT) == bdf)) 2471 goto done; 2472 2473 2474 /* If we can't find the handler check it's children */ 2475 for (cdip = ddi_get_child(dip); cdip; 2476 cdip = ddi_get_next_sibling(cdip)) { 2477 if ((bus_p = PCIE_DIP2BUS(cdip)) == NULL) 2478 continue; 2479 2480 if (pf_in_bus_range(bus_p, bdf) || 2481 pf_in_addr_range(bus_p, addr)) 2482 status = pf_hdl_child_lookup(cdip, derr, flag, addr, 2483 bdf); 2484 2485 if (status == PF_HDL_FOUND) 2486 goto done; 2487 } 2488 2489 done: 2490 if (have_lock == B_TRUE) 2491 pf_handler_exit(dip); 2492 2493 return (status); 2494 } 2495 2496 static int 2497 pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, 2498 uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp) 2499 { 2500 ndi_fmcentry_t *fep; 2501 int found = 0; 2502 int status; 2503 2504 mutex_enter(&fcp->fc_lock); 2505 for (fep = fcp->fc_head; fep != NULL; fep = fep->fce_next) { 2506 ddi_fmcompare_t compare_func; 2507 2508 /* 2509 * Compare captured error state with handle 2510 * resources. During the comparison and 2511 * subsequent error handling, we block 2512 * attempts to free the cache entry. 2513 */ 2514 if (flag == ACC_HANDLE) { 2515 compare_func = 2516 i_ddi_fm_acc_err_cf_get((ddi_acc_handle_t) 2517 fep->fce_resource); 2518 } else { 2519 compare_func = 2520 i_ddi_fm_dma_err_cf_get((ddi_dma_handle_t) 2521 fep->fce_resource); 2522 } 2523 2524 if (compare_func == NULL) /* unbound or not FLAGERR */ 2525 continue; 2526 2527 status = compare_func(dip, fep->fce_resource, 2528 (void *)&addr, (void *)&bdf); 2529 2530 if (status == DDI_FM_NONFATAL) { 2531 found++; 2532 2533 /* Set the error for this resource handle */ 2534 if (flag == ACC_HANDLE) { 2535 ddi_acc_handle_t ap = fep->fce_resource; 2536 2537 i_ddi_fm_acc_err_set(ap, derr->fme_ena, status, 2538 DDI_FM_ERR_UNEXPECTED); 2539 ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION); 2540 derr->fme_acc_handle = ap; 2541 } else { 2542 ddi_dma_handle_t dp = fep->fce_resource; 2543 2544 i_ddi_fm_dma_err_set(dp, derr->fme_ena, status, 2545 DDI_FM_ERR_UNEXPECTED); 2546 ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION); 2547 derr->fme_dma_handle = dp; 2548 } 2549 } 2550 } 2551 mutex_exit(&fcp->fc_lock); 2552 2553 /* 2554 * If a handler isn't found and we know this is the right device mark 2555 * them all failed. 2556 */ 2557 if ((addr != 0) && PCIE_CHECK_VALID_BDF(bdf) && (found == 0)) { 2558 status = pf_hdl_compare(dip, derr, flag, addr, bdf, fcp); 2559 if (status == PF_HDL_FOUND) 2560 found++; 2561 } 2562 2563 return ((found) ? PF_HDL_FOUND : PF_HDL_NOTFOUND); 2564 } 2565 2566 /* 2567 * Automatically decode AER header logs and does a handling look up based on the 2568 * AER header decoding. 2569 * 2570 * For this function only the Primary/Secondary AER Header Logs need to be valid 2571 * in the pfd (PCIe Fault Data) arg. 2572 * 2573 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND. 2574 */ 2575 /* ARGSUSED */ 2576 static int 2577 pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p, 2578 boolean_t is_primary) 2579 { 2580 /* 2581 * Disabling this function temporarily until errors can be handled 2582 * synchronously. 2583 * 2584 * This function is currently only called during the middle of a fabric 2585 * scan. If the fabric scan is called synchronously with an error seen 2586 * in the RP/RC, then the related errors in the fabric will have a 2587 * PF_ERR_MATCHED_RC error severity. pf_log_hdl_lookup code will be by 2588 * passed when the severity is PF_ERR_MATCHED_RC. Handle lookup would 2589 * have already happened in RP/RC error handling in a synchronous 2590 * manner. Errors unrelated should panic, because they are being 2591 * handled asynchronously. 2592 * 2593 * If fabric scan is called asynchronously from any RP/RC error, then 2594 * DMA/PIO UE errors seen in the fabric should panic. pf_lop_hdl_lookup 2595 * will return PF_HDL_NOTFOUND to ensure that the system panics. 2596 */ 2597 return (PF_HDL_NOTFOUND); 2598 } 2599 2600 /* 2601 * Decodes the TLP and returns the BDF of the handler, address and transaction 2602 * type if known. 2603 * 2604 * Types of TLP logs seen in RC, and what to extract: 2605 * 2606 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR 2607 * Memory(PIO) - address, PF_PIO_ADDR 2608 * CFG - Should not occur and result in UR 2609 * Completion(DMA) - Requester BDF, PF_DMA_ADDR 2610 * Completion(PIO) - Requester BDF, PF_PIO_ADDR 2611 * 2612 * Types of TLP logs seen in SW/Leaf, and what to extract: 2613 * 2614 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR 2615 * Memory(PIO) - address, PF_PIO_ADDR 2616 * CFG - Destined BDF, address, PF_CFG_ADDR 2617 * Completion(DMA) - Requester BDF, PF_DMA_ADDR 2618 * Completion(PIO) - Requester BDF, PF_PIO_ADDR 2619 * 2620 * The adv_reg_p must be passed in separately for use with SPARC RPs. A 2621 * SPARC RP could have multiple AER header logs which cannot be directly 2622 * accessed via the bus_p. 2623 */ 2624 int 2625 pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p) 2626 { 2627 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr; 2628 pcie_req_id_t my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF; 2629 uint64_t flt_addr = 0; 2630 uint32_t flt_trans_type = 0; 2631 2632 adv_reg_p->pcie_ue_tgt_addr = 0; 2633 adv_reg_p->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 2634 adv_reg_p->pcie_ue_tgt_trans = 0; 2635 2636 my_bdf = bus_p->bus_bdf; 2637 switch (tlp_hdr->type) { 2638 case PCIE_TLP_TYPE_IO: 2639 case PCIE_TLP_TYPE_MEM: 2640 case PCIE_TLP_TYPE_MEMLK: 2641 /* Grab the 32/64bit fault address */ 2642 if (tlp_hdr->fmt & 0x1) { 2643 flt_addr = ((uint64_t)adv_reg_p->pcie_ue_hdr[2] << 32); 2644 flt_addr |= adv_reg_p->pcie_ue_hdr[3]; 2645 } else { 2646 flt_addr = adv_reg_p->pcie_ue_hdr[2]; 2647 } 2648 2649 tlp_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[1] >> 16); 2650 2651 /* 2652 * If the req bdf >= this.bdf, then it means the request is this 2653 * device or came from a device below it. Unless this device is 2654 * a PCIe root port then it means is a DMA, otherwise PIO. 2655 */ 2656 if ((tlp_bdf >= my_bdf) && !PCIE_IS_ROOT(bus_p)) { 2657 flt_trans_type = PF_ADDR_DMA; 2658 flt_bdf = tlp_bdf; 2659 } else if (PCIE_IS_ROOT(bus_p) && 2660 (PF_FIRST_AER_ERR(PCIE_AER_UCE_PTLP, adv_reg_p) || 2661 (PF_FIRST_AER_ERR(PCIE_AER_UCE_CA, adv_reg_p)))) { 2662 flt_trans_type = PF_ADDR_DMA; 2663 flt_bdf = tlp_bdf; 2664 } else { 2665 flt_trans_type = PF_ADDR_PIO; 2666 flt_bdf = PCIE_INVALID_BDF; 2667 } 2668 break; 2669 case PCIE_TLP_TYPE_CFG0: 2670 case PCIE_TLP_TYPE_CFG1: 2671 flt_addr = 0; 2672 flt_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[2] >> 16); 2673 flt_trans_type = PF_ADDR_CFG; 2674 break; 2675 case PCIE_TLP_TYPE_CPL: 2676 case PCIE_TLP_TYPE_CPLLK: 2677 { 2678 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)&adv_reg_p->pcie_ue_hdr[1]; 2679 2680 flt_addr = 0; 2681 flt_bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid : 2682 cpl_tlp->cid; 2683 2684 /* 2685 * If the cpl bdf < this.bdf, then it means the request is this 2686 * device or came from a device below it. Unless this device is 2687 * a PCIe root port then it means is a DMA, otherwise PIO. 2688 */ 2689 if (cpl_tlp->rid > cpl_tlp->cid) { 2690 flt_trans_type = PF_ADDR_DMA; 2691 } else { 2692 flt_trans_type = PF_ADDR_PIO | PF_ADDR_CFG; 2693 } 2694 break; 2695 } 2696 default: 2697 return (DDI_FAILURE); 2698 } 2699 2700 adv_reg_p->pcie_ue_tgt_addr = flt_addr; 2701 adv_reg_p->pcie_ue_tgt_bdf = flt_bdf; 2702 adv_reg_p->pcie_ue_tgt_trans = flt_trans_type; 2703 2704 return (DDI_SUCCESS); 2705 } 2706 2707 #define PCIE_EREPORT DDI_IO_CLASS "." PCI_ERROR_SUBCLASS "." PCIEX_FABRIC 2708 static int 2709 pf_ereport_setup(dev_info_t *dip, uint64_t ena, nvlist_t **ereport, 2710 nvlist_t **detector, errorq_elem_t **eqep) 2711 { 2712 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2713 char device_path[MAXPATHLEN]; 2714 nv_alloc_t *nva; 2715 2716 *eqep = errorq_reserve(fmhdl->fh_errorq); 2717 if (*eqep == NULL) { 2718 atomic_inc_64(&fmhdl->fh_kstat.fek_erpt_dropped.value.ui64); 2719 return (DDI_FAILURE); 2720 } 2721 2722 *ereport = errorq_elem_nvl(fmhdl->fh_errorq, *eqep); 2723 nva = errorq_elem_nva(fmhdl->fh_errorq, *eqep); 2724 2725 ASSERT(*ereport); 2726 ASSERT(nva); 2727 2728 /* 2729 * Use the dev_path/devid for this device instance. 2730 */ 2731 *detector = fm_nvlist_create(nva); 2732 if (dip == ddi_root_node()) { 2733 device_path[0] = '/'; 2734 device_path[1] = '\0'; 2735 } else { 2736 (void) ddi_pathname(dip, device_path); 2737 } 2738 2739 fm_fmri_dev_set(*detector, FM_DEV_SCHEME_VERSION, NULL, 2740 device_path, NULL, NULL); 2741 2742 if (ena == 0) 2743 ena = fm_ena_generate(0, FM_ENA_FMT1); 2744 2745 fm_ereport_set(*ereport, 0, PCIE_EREPORT, ena, *detector, NULL); 2746 2747 return (DDI_SUCCESS); 2748 } 2749 2750 /* ARGSUSED */ 2751 static void 2752 pf_ereport_post(dev_info_t *dip, nvlist_t **ereport, nvlist_t **detector, 2753 errorq_elem_t **eqep) 2754 { 2755 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2756 2757 errorq_commit(fmhdl->fh_errorq, *eqep, ERRORQ_ASYNC); 2758 } 2759 2760 static void 2761 pf_send_ereport(ddi_fm_error_t *derr, pf_impl_t *impl) 2762 { 2763 nvlist_t *ereport; 2764 nvlist_t *detector; 2765 errorq_elem_t *eqep; 2766 pcie_bus_t *bus_p; 2767 pf_data_t *pfd_p; 2768 uint32_t total = impl->pf_total; 2769 2770 /* 2771 * Ereports need to be sent in a top down fashion. The fabric translator 2772 * expects the ereports from the Root first. This is needed to tell if 2773 * the system contains a PCIe complaint RC/RP. 2774 */ 2775 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 2776 bus_p = PCIE_PFD2BUS(pfd_p); 2777 pfd_p->pe_valid = B_FALSE; 2778 2779 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED || 2780 !DDI_FM_EREPORT_CAP(ddi_fm_capable(PCIE_PFD2DIP(pfd_p)))) 2781 continue; 2782 2783 if (pf_ereport_setup(PCIE_BUS2DIP(bus_p), derr->fme_ena, 2784 &ereport, &detector, &eqep) != DDI_SUCCESS) 2785 continue; 2786 2787 if (PFD_IS_RC(pfd_p)) { 2788 fm_payload_set(ereport, 2789 "scan_bdf", DATA_TYPE_UINT16, 2790 PCIE_ROOT_FAULT(pfd_p)->scan_bdf, 2791 "scan_addr", DATA_TYPE_UINT64, 2792 PCIE_ROOT_FAULT(pfd_p)->scan_addr, 2793 "intr_src", DATA_TYPE_UINT16, 2794 PCIE_ROOT_EH_SRC(pfd_p)->intr_type, 2795 NULL); 2796 goto generic; 2797 } 2798 2799 /* Generic PCI device information */ 2800 fm_payload_set(ereport, 2801 "bdf", DATA_TYPE_UINT16, bus_p->bus_bdf, 2802 "device_id", DATA_TYPE_UINT16, 2803 (bus_p->bus_dev_ven_id >> 16), 2804 "vendor_id", DATA_TYPE_UINT16, 2805 (bus_p->bus_dev_ven_id & 0xFFFF), 2806 "rev_id", DATA_TYPE_UINT8, bus_p->bus_rev_id, 2807 "dev_type", DATA_TYPE_UINT16, bus_p->bus_dev_type, 2808 "pcie_off", DATA_TYPE_UINT16, bus_p->bus_pcie_off, 2809 "pcix_off", DATA_TYPE_UINT16, bus_p->bus_pcix_off, 2810 "aer_off", DATA_TYPE_UINT16, bus_p->bus_aer_off, 2811 "ecc_ver", DATA_TYPE_UINT16, bus_p->bus_ecc_ver, 2812 NULL); 2813 2814 /* PCI registers */ 2815 fm_payload_set(ereport, 2816 "pci_status", DATA_TYPE_UINT16, 2817 PCI_ERR_REG(pfd_p)->pci_err_status, 2818 "pci_command", DATA_TYPE_UINT16, 2819 PCI_ERR_REG(pfd_p)->pci_cfg_comm, 2820 NULL); 2821 2822 /* PCI bridge registers */ 2823 if (PCIE_IS_BDG(bus_p)) { 2824 fm_payload_set(ereport, 2825 "pci_bdg_sec_status", DATA_TYPE_UINT16, 2826 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat, 2827 "pci_bdg_ctrl", DATA_TYPE_UINT16, 2828 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_ctrl, 2829 NULL); 2830 } 2831 2832 /* PCIx registers */ 2833 if (PCIE_IS_PCIX(bus_p) && !PCIE_IS_BDG(bus_p)) { 2834 fm_payload_set(ereport, 2835 "pcix_status", DATA_TYPE_UINT32, 2836 PCIX_ERR_REG(pfd_p)->pcix_status, 2837 "pcix_command", DATA_TYPE_UINT16, 2838 PCIX_ERR_REG(pfd_p)->pcix_command, 2839 NULL); 2840 } 2841 2842 /* PCIx ECC Registers */ 2843 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 2844 pf_pcix_ecc_regs_t *ecc_bdg_reg; 2845 pf_pcix_ecc_regs_t *ecc_reg; 2846 2847 if (PCIE_IS_BDG(bus_p)) 2848 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 0); 2849 ecc_reg = PCIX_ECC_REG(pfd_p); 2850 fm_payload_set(ereport, 2851 "pcix_ecc_control_0", DATA_TYPE_UINT16, 2852 PCIE_IS_BDG(bus_p) ? 2853 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16) : 2854 (ecc_reg->pcix_ecc_ctlstat >> 16), 2855 "pcix_ecc_status_0", DATA_TYPE_UINT16, 2856 PCIE_IS_BDG(bus_p) ? 2857 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF) : 2858 (ecc_reg->pcix_ecc_ctlstat & 0xFFFF), 2859 "pcix_ecc_fst_addr_0", DATA_TYPE_UINT32, 2860 PCIE_IS_BDG(bus_p) ? 2861 ecc_bdg_reg->pcix_ecc_fstaddr : 2862 ecc_reg->pcix_ecc_fstaddr, 2863 "pcix_ecc_sec_addr_0", DATA_TYPE_UINT32, 2864 PCIE_IS_BDG(bus_p) ? 2865 ecc_bdg_reg->pcix_ecc_secaddr : 2866 ecc_reg->pcix_ecc_secaddr, 2867 "pcix_ecc_attr_0", DATA_TYPE_UINT32, 2868 PCIE_IS_BDG(bus_p) ? 2869 ecc_bdg_reg->pcix_ecc_attr : 2870 ecc_reg->pcix_ecc_attr, 2871 NULL); 2872 } 2873 2874 /* PCIx ECC Bridge Registers */ 2875 if (PCIX_ECC_VERSION_CHECK(bus_p) && PCIE_IS_BDG(bus_p)) { 2876 pf_pcix_ecc_regs_t *ecc_bdg_reg; 2877 2878 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 1); 2879 fm_payload_set(ereport, 2880 "pcix_ecc_control_1", DATA_TYPE_UINT16, 2881 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16), 2882 "pcix_ecc_status_1", DATA_TYPE_UINT16, 2883 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF), 2884 "pcix_ecc_fst_addr_1", DATA_TYPE_UINT32, 2885 ecc_bdg_reg->pcix_ecc_fstaddr, 2886 "pcix_ecc_sec_addr_1", DATA_TYPE_UINT32, 2887 ecc_bdg_reg->pcix_ecc_secaddr, 2888 "pcix_ecc_attr_1", DATA_TYPE_UINT32, 2889 ecc_bdg_reg->pcix_ecc_attr, 2890 NULL); 2891 } 2892 2893 /* PCIx Bridge */ 2894 if (PCIE_IS_PCIX(bus_p) && PCIE_IS_BDG(bus_p)) { 2895 fm_payload_set(ereport, 2896 "pcix_bdg_status", DATA_TYPE_UINT32, 2897 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat, 2898 "pcix_bdg_sec_status", DATA_TYPE_UINT16, 2899 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat, 2900 NULL); 2901 } 2902 2903 /* PCIe registers */ 2904 if (PCIE_IS_PCIE(bus_p)) { 2905 fm_payload_set(ereport, 2906 "pcie_status", DATA_TYPE_UINT16, 2907 PCIE_ERR_REG(pfd_p)->pcie_err_status, 2908 "pcie_command", DATA_TYPE_UINT16, 2909 PCIE_ERR_REG(pfd_p)->pcie_err_ctl, 2910 "pcie_dev_cap", DATA_TYPE_UINT32, 2911 PCIE_ERR_REG(pfd_p)->pcie_dev_cap, 2912 NULL); 2913 } 2914 2915 /* PCIe AER registers */ 2916 if (PCIE_HAS_AER(bus_p)) { 2917 fm_payload_set(ereport, 2918 "pcie_adv_ctl", DATA_TYPE_UINT32, 2919 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl, 2920 "pcie_ue_status", DATA_TYPE_UINT32, 2921 PCIE_ADV_REG(pfd_p)->pcie_ue_status, 2922 "pcie_ue_mask", DATA_TYPE_UINT32, 2923 PCIE_ADV_REG(pfd_p)->pcie_ue_mask, 2924 "pcie_ue_sev", DATA_TYPE_UINT32, 2925 PCIE_ADV_REG(pfd_p)->pcie_ue_sev, 2926 "pcie_ue_hdr0", DATA_TYPE_UINT32, 2927 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[0], 2928 "pcie_ue_hdr1", DATA_TYPE_UINT32, 2929 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[1], 2930 "pcie_ue_hdr2", DATA_TYPE_UINT32, 2931 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[2], 2932 "pcie_ue_hdr3", DATA_TYPE_UINT32, 2933 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[3], 2934 "pcie_ce_status", DATA_TYPE_UINT32, 2935 PCIE_ADV_REG(pfd_p)->pcie_ce_status, 2936 "pcie_ce_mask", DATA_TYPE_UINT32, 2937 PCIE_ADV_REG(pfd_p)->pcie_ce_mask, 2938 NULL); 2939 } 2940 2941 /* PCIe AER decoded header */ 2942 if (HAS_AER_LOGS(pfd_p, PCIE_ADV_REG(pfd_p)->pcie_ue_status)) { 2943 fm_payload_set(ereport, 2944 "pcie_ue_tgt_trans", DATA_TYPE_UINT32, 2945 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans, 2946 "pcie_ue_tgt_addr", DATA_TYPE_UINT64, 2947 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr, 2948 "pcie_ue_tgt_bdf", DATA_TYPE_UINT16, 2949 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf, 2950 NULL); 2951 /* Clear these values as they no longer valid */ 2952 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0; 2953 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0; 2954 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 2955 } 2956 2957 /* PCIe BDG AER registers */ 2958 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_HAS_AER(bus_p)) { 2959 fm_payload_set(ereport, 2960 "pcie_sue_adv_ctl", DATA_TYPE_UINT32, 2961 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_ctl, 2962 "pcie_sue_status", DATA_TYPE_UINT32, 2963 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status, 2964 "pcie_sue_mask", DATA_TYPE_UINT32, 2965 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask, 2966 "pcie_sue_sev", DATA_TYPE_UINT32, 2967 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_sev, 2968 "pcie_sue_hdr0", DATA_TYPE_UINT32, 2969 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[0], 2970 "pcie_sue_hdr1", DATA_TYPE_UINT32, 2971 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[1], 2972 "pcie_sue_hdr2", DATA_TYPE_UINT32, 2973 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[2], 2974 "pcie_sue_hdr3", DATA_TYPE_UINT32, 2975 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[3], 2976 NULL); 2977 } 2978 2979 /* PCIe BDG AER decoded header */ 2980 if (PCIE_IS_PCIE_BDG(bus_p) && HAS_SAER_LOGS(pfd_p, 2981 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status)) { 2982 fm_payload_set(ereport, 2983 "pcie_sue_tgt_trans", DATA_TYPE_UINT32, 2984 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans, 2985 "pcie_sue_tgt_addr", DATA_TYPE_UINT64, 2986 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr, 2987 "pcie_sue_tgt_bdf", DATA_TYPE_UINT16, 2988 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf, 2989 NULL); 2990 /* Clear these values as they no longer valid */ 2991 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0; 2992 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0; 2993 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = 2994 PCIE_INVALID_BDF; 2995 } 2996 2997 /* PCIe RP registers */ 2998 if (PCIE_IS_RP(bus_p)) { 2999 fm_payload_set(ereport, 3000 "pcie_rp_status", DATA_TYPE_UINT32, 3001 PCIE_RP_REG(pfd_p)->pcie_rp_status, 3002 "pcie_rp_control", DATA_TYPE_UINT16, 3003 PCIE_RP_REG(pfd_p)->pcie_rp_ctl, 3004 NULL); 3005 } 3006 3007 /* PCIe RP AER registers */ 3008 if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) { 3009 fm_payload_set(ereport, 3010 "pcie_adv_rp_status", DATA_TYPE_UINT32, 3011 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_status, 3012 "pcie_adv_rp_command", DATA_TYPE_UINT32, 3013 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_cmd, 3014 "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16, 3015 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id, 3016 "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16, 3017 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id, 3018 NULL); 3019 } 3020 3021 /* 3022 * Slot Status registers 3023 * 3024 * Since we only gather these for certain types of components, 3025 * only put these registers into the ereport if we have valid 3026 * data. 3027 */ 3028 if (PCIE_SLOT_REG(pfd_p)->pcie_slot_regs_valid) { 3029 fm_payload_set(ereport, 3030 "pcie_slot_cap", DATA_TYPE_UINT32, 3031 PCIE_SLOT_REG(pfd_p)->pcie_slot_cap, 3032 "pcie_slot_control", DATA_TYPE_UINT16, 3033 PCIE_SLOT_REG(pfd_p)->pcie_slot_control, 3034 "pcie_slot_status", DATA_TYPE_UINT16, 3035 PCIE_SLOT_REG(pfd_p)->pcie_slot_status, 3036 NULL); 3037 } 3038 3039 generic: 3040 /* IOV related information */ 3041 if (!PCIE_BDG_IS_UNASSIGNED(PCIE_PFD2BUS(impl->pf_dq_head_p))) { 3042 fm_payload_set(ereport, 3043 "pcie_aff_flags", DATA_TYPE_UINT16, 3044 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags, 3045 "pcie_aff_bdf", DATA_TYPE_UINT16, 3046 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf, 3047 "orig_sev", DATA_TYPE_UINT32, 3048 pfd_p->pe_orig_severity_flags, 3049 NULL); 3050 } 3051 3052 /* Misc ereport information */ 3053 fm_payload_set(ereport, 3054 "remainder", DATA_TYPE_UINT32, --total, 3055 "severity", DATA_TYPE_UINT32, pfd_p->pe_severity_flags, 3056 NULL); 3057 3058 pf_ereport_post(PCIE_BUS2DIP(bus_p), &ereport, &detector, 3059 &eqep); 3060 } 3061 3062 /* Unlock all the devices in the queue */ 3063 for (pfd_p = impl->pf_dq_tail_p; pfd_p; pfd_p = pfd_p->pe_prev) { 3064 if (pfd_p->pe_lock) { 3065 pf_handler_exit(PCIE_PFD2DIP(pfd_p)); 3066 } 3067 } 3068 } 3069 3070 /* 3071 * pf_handler_enter must be called to serial access to each device's pf_data_t. 3072 * Once error handling is finished with the device call pf_handler_exit to allow 3073 * other threads to access it. The same thread may call pf_handler_enter 3074 * several times without any consequences. 3075 * 3076 * The "impl" variable is passed in during scan fabric to double check that 3077 * there is not a recursive algorithm and to ensure only one thread is doing a 3078 * fabric scan at all times. 3079 * 3080 * In some cases "impl" is not available, such as "child lookup" being called 3081 * from outside of scan fabric, just pass in NULL for this variable and this 3082 * extra check will be skipped. 3083 */ 3084 static int 3085 pf_handler_enter(dev_info_t *dip, pf_impl_t *impl) 3086 { 3087 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 3088 3089 ASSERT(pfd_p); 3090 3091 /* 3092 * Check to see if the lock has already been taken by this 3093 * thread. If so just return and don't take lock again. 3094 */ 3095 if (!pfd_p->pe_lock || !impl) { 3096 i_ddi_fm_handler_enter(dip); 3097 pfd_p->pe_lock = B_TRUE; 3098 return (PF_SCAN_SUCCESS); 3099 } 3100 3101 /* Check to see that this dip is already in the "impl" error queue */ 3102 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 3103 if (PCIE_PFD2DIP(pfd_p) == dip) { 3104 return (PF_SCAN_SUCCESS); 3105 } 3106 } 3107 3108 return (PF_SCAN_DEADLOCK); 3109 } 3110 3111 static void 3112 pf_handler_exit(dev_info_t *dip) 3113 { 3114 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 3115 3116 ASSERT(pfd_p); 3117 3118 ASSERT(pfd_p->pe_lock == B_TRUE); 3119 i_ddi_fm_handler_exit(dip); 3120 pfd_p->pe_lock = B_FALSE; 3121 } 3122 3123 /* 3124 * This function calls the driver's callback function (if it's FMA hardened 3125 * and callback capable). This function relies on the current thread already 3126 * owning the driver's fmhdl lock. 3127 */ 3128 static int 3129 pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr) 3130 { 3131 int cb_sts = DDI_FM_OK; 3132 3133 if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) { 3134 dev_info_t *pdip = ddi_get_parent(dip); 3135 struct i_ddi_fmhdl *hdl = DEVI(pdip)->devi_fmhdl; 3136 struct i_ddi_fmtgt *tgt = hdl->fh_tgts; 3137 struct i_ddi_errhdl *errhdl; 3138 while (tgt != NULL) { 3139 if (dip == tgt->ft_dip) { 3140 errhdl = tgt->ft_errhdl; 3141 cb_sts = errhdl->eh_func(dip, derr, 3142 errhdl->eh_impl); 3143 break; 3144 } 3145 tgt = tgt->ft_next; 3146 } 3147 } 3148 return (cb_sts); 3149 } 3150 3151 static void 3152 pf_reset_pfd(pf_data_t *pfd_p) 3153 { 3154 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 3155 3156 pfd_p->pe_severity_flags = 0; 3157 pfd_p->pe_severity_mask = 0; 3158 pfd_p->pe_orig_severity_flags = 0; 3159 /* pe_lock and pe_valid were reset in pf_send_ereport */ 3160 3161 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0; 3162 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF; 3163 3164 if (PCIE_IS_ROOT(bus_p)) { 3165 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF; 3166 PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0; 3167 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_FALSE; 3168 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE; 3169 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL; 3170 } 3171 3172 if (PCIE_IS_BDG(bus_p)) { 3173 bzero(PCI_BDG_ERR_REG(pfd_p), sizeof (pf_pci_bdg_err_regs_t)); 3174 } 3175 3176 PCI_ERR_REG(pfd_p)->pci_err_status = 0; 3177 PCI_ERR_REG(pfd_p)->pci_cfg_comm = 0; 3178 3179 if (PCIE_IS_PCIE(bus_p)) { 3180 if (PCIE_IS_ROOT(bus_p)) { 3181 bzero(PCIE_RP_REG(pfd_p), 3182 sizeof (pf_pcie_rp_err_regs_t)); 3183 bzero(PCIE_ADV_RP_REG(pfd_p), 3184 sizeof (pf_pcie_adv_rp_err_regs_t)); 3185 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id = 3186 PCIE_INVALID_BDF; 3187 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id = 3188 PCIE_INVALID_BDF; 3189 } else if (PCIE_IS_PCIE_BDG(bus_p)) { 3190 bzero(PCIE_ADV_BDG_REG(pfd_p), 3191 sizeof (pf_pcie_adv_bdg_err_regs_t)); 3192 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = 3193 PCIE_INVALID_BDF; 3194 } 3195 3196 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) { 3197 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 3198 bzero(PCIX_BDG_ECC_REG(pfd_p, 0), 3199 sizeof (pf_pcix_ecc_regs_t)); 3200 bzero(PCIX_BDG_ECC_REG(pfd_p, 1), 3201 sizeof (pf_pcix_ecc_regs_t)); 3202 } 3203 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0; 3204 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0; 3205 } 3206 3207 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl = 0; 3208 PCIE_ADV_REG(pfd_p)->pcie_ue_status = 0; 3209 PCIE_ADV_REG(pfd_p)->pcie_ue_mask = 0; 3210 PCIE_ADV_REG(pfd_p)->pcie_ue_sev = 0; 3211 PCIE_ADV_HDR(pfd_p, 0) = 0; 3212 PCIE_ADV_HDR(pfd_p, 1) = 0; 3213 PCIE_ADV_HDR(pfd_p, 2) = 0; 3214 PCIE_ADV_HDR(pfd_p, 3) = 0; 3215 PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0; 3216 PCIE_ADV_REG(pfd_p)->pcie_ce_mask = 0; 3217 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0; 3218 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0; 3219 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 3220 3221 PCIE_ERR_REG(pfd_p)->pcie_err_status = 0; 3222 PCIE_ERR_REG(pfd_p)->pcie_err_ctl = 0; 3223 PCIE_ERR_REG(pfd_p)->pcie_dev_cap = 0; 3224 3225 } else if (PCIE_IS_PCIX(bus_p)) { 3226 if (PCIE_IS_BDG(bus_p)) { 3227 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 3228 bzero(PCIX_BDG_ECC_REG(pfd_p, 0), 3229 sizeof (pf_pcix_ecc_regs_t)); 3230 bzero(PCIX_BDG_ECC_REG(pfd_p, 1), 3231 sizeof (pf_pcix_ecc_regs_t)); 3232 } 3233 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0; 3234 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0; 3235 } else { 3236 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 3237 bzero(PCIX_ECC_REG(pfd_p), 3238 sizeof (pf_pcix_ecc_regs_t)); 3239 } 3240 PCIX_ERR_REG(pfd_p)->pcix_command = 0; 3241 PCIX_ERR_REG(pfd_p)->pcix_status = 0; 3242 } 3243 } 3244 3245 pfd_p->pe_prev = NULL; 3246 pfd_p->pe_next = NULL; 3247 pfd_p->pe_rber_fatal = B_FALSE; 3248 } 3249 3250 pcie_bus_t * 3251 pf_find_busp_by_bdf(pf_impl_t *impl, pcie_req_id_t bdf) 3252 { 3253 pcie_bus_t *temp_bus_p; 3254 pf_data_t *temp_pfd_p; 3255 3256 for (temp_pfd_p = impl->pf_dq_head_p; 3257 temp_pfd_p; 3258 temp_pfd_p = temp_pfd_p->pe_next) { 3259 temp_bus_p = PCIE_PFD2BUS(temp_pfd_p); 3260 3261 if (bdf == temp_bus_p->bus_bdf) { 3262 return (temp_bus_p); 3263 } 3264 } 3265 3266 return (NULL); 3267 } 3268 3269 pcie_bus_t * 3270 pf_find_busp_by_addr(pf_impl_t *impl, uint64_t addr) 3271 { 3272 pcie_bus_t *temp_bus_p; 3273 pf_data_t *temp_pfd_p; 3274 3275 for (temp_pfd_p = impl->pf_dq_head_p; 3276 temp_pfd_p; 3277 temp_pfd_p = temp_pfd_p->pe_next) { 3278 temp_bus_p = PCIE_PFD2BUS(temp_pfd_p); 3279 3280 if (pf_in_assigned_addr(temp_bus_p, addr)) { 3281 return (temp_bus_p); 3282 } 3283 } 3284 3285 return (NULL); 3286 } 3287 3288 pcie_bus_t * 3289 pf_find_busp_by_aer(pf_impl_t *impl, pf_data_t *pfd_p) 3290 { 3291 pf_pcie_adv_err_regs_t *reg_p = PCIE_ADV_REG(pfd_p); 3292 pcie_bus_t *temp_bus_p = NULL; 3293 pcie_req_id_t bdf; 3294 uint64_t addr; 3295 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)reg_p->pcie_ue_hdr; 3296 uint32_t trans_type = reg_p->pcie_ue_tgt_trans; 3297 3298 if ((tlp_hdr->type == PCIE_TLP_TYPE_CPL) || 3299 (tlp_hdr->type == PCIE_TLP_TYPE_CPLLK)) { 3300 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)®_p->pcie_ue_hdr[1]; 3301 3302 bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid : 3303 cpl_tlp->cid; 3304 temp_bus_p = pf_find_busp_by_bdf(impl, bdf); 3305 } else if (trans_type == PF_ADDR_PIO) { 3306 addr = reg_p->pcie_ue_tgt_addr; 3307 temp_bus_p = pf_find_busp_by_addr(impl, addr); 3308 } else { 3309 /* PF_ADDR_DMA type */ 3310 bdf = reg_p->pcie_ue_tgt_bdf; 3311 temp_bus_p = pf_find_busp_by_bdf(impl, bdf); 3312 } 3313 3314 return (temp_bus_p); 3315 } 3316 3317 pcie_bus_t * 3318 pf_find_busp_by_saer(pf_impl_t *impl, pf_data_t *pfd_p) 3319 { 3320 pf_pcie_adv_bdg_err_regs_t *reg_p = PCIE_ADV_BDG_REG(pfd_p); 3321 pcie_bus_t *temp_bus_p = NULL; 3322 pcie_req_id_t bdf; 3323 uint64_t addr; 3324 3325 addr = reg_p->pcie_sue_tgt_addr; 3326 bdf = reg_p->pcie_sue_tgt_bdf; 3327 3328 if (addr != 0) { 3329 temp_bus_p = pf_find_busp_by_addr(impl, addr); 3330 } else if (PCIE_CHECK_VALID_BDF(bdf)) { 3331 temp_bus_p = pf_find_busp_by_bdf(impl, bdf); 3332 } 3333 3334 return (temp_bus_p); 3335 } 3336