1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2019 Joyent, Inc. 24 * Copyright 2023 Oxide Computer Company 25 */ 26 27 #include <sys/sysmacros.h> 28 #include <sys/types.h> 29 #include <sys/kmem.h> 30 #include <sys/modctl.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/sunndi.h> 34 #include <sys/fm/protocol.h> 35 #include <sys/fm/util.h> 36 #include <sys/fm/io/ddi.h> 37 #include <sys/fm/io/pci.h> 38 #include <sys/promif.h> 39 #include <sys/disp.h> 40 #include <sys/atomic.h> 41 #include <sys/pcie.h> 42 #include <sys/pci_cap.h> 43 #include <sys/pcie_impl.h> 44 45 #define PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \ 46 PCIE_DEVSTS_CE_DETECTED) 47 48 #define PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \ 49 PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR) 50 51 #define PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\ 52 PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP) 53 #define PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \ 54 PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR) 55 56 #define PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \ 57 PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \ 58 PCIE_AER_SUCE_SERR_ASSERT) 59 #define PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \ 60 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \ 61 PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \ 62 PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \ 63 PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR) 64 65 #define PF_PCI_PARITY_ERR (PCI_STAT_S_PERROR | PCI_STAT_PERROR) 66 67 #define PF_FIRST_AER_ERR(bit, adv) \ 68 (bit & (1 << (adv->pcie_adv_ctl & PCIE_AER_CTL_FST_ERR_PTR_MASK))) 69 70 #define HAS_AER_LOGS(pfd_p, bit) \ 71 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \ 72 PF_FIRST_AER_ERR(bit, PCIE_ADV_REG(pfd_p))) 73 74 #define PF_FIRST_SAER_ERR(bit, adv) \ 75 (bit & (1 << (adv->pcie_sue_ctl & PCIE_AER_SCTL_FST_ERR_PTR_MASK))) 76 77 #define HAS_SAER_LOGS(pfd_p, bit) \ 78 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \ 79 PF_FIRST_SAER_ERR(bit, PCIE_ADV_BDG_REG(pfd_p))) 80 81 #define GET_SAER_CMD(pfd_p) \ 82 ((PCIE_ADV_BDG_HDR(pfd_p, 1) >> \ 83 PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & PCIE_AER_SUCE_HDR_CMD_LWR_MASK) 84 85 #define CE_ADVISORY(pfd_p) \ 86 (PCIE_ADV_REG(pfd_p)->pcie_ce_status & PCIE_AER_CE_AD_NFE) 87 88 /* PCIe Fault Fabric Error analysis table */ 89 typedef struct pf_fab_err_tbl { 90 uint32_t bit; /* Error bit */ 91 int (*handler)(); /* Error handling fuction */ 92 uint16_t affected_flags; /* Primary affected flag */ 93 /* 94 * Secondary affected flag, effective when the information 95 * indicated by the primary flag is not available, eg. 96 * PF_AFFECTED_AER/SAER/ADDR 97 */ 98 uint16_t sec_affected_flags; 99 } pf_fab_err_tbl_t; 100 101 static pcie_bus_t *pf_is_ready(dev_info_t *); 102 /* Functions for scanning errors */ 103 static int pf_default_hdl(dev_info_t *, pf_impl_t *); 104 static int pf_dispatch(dev_info_t *, pf_impl_t *, boolean_t); 105 static boolean_t pf_in_addr_range(pcie_bus_t *, uint64_t); 106 107 /* Functions for gathering errors */ 108 static void pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs, 109 pcie_bus_t *bus_p, boolean_t bdg); 110 static void pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 111 static void pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 112 static void pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p); 113 static int pf_dummy_cb(dev_info_t *, ddi_fm_error_t *, const void *); 114 static void pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl_p); 115 116 /* Functions for analysing errors */ 117 static int pf_analyse_error(ddi_fm_error_t *, pf_impl_t *); 118 static void pf_adjust_for_no_aer(pf_data_t *); 119 static void pf_adjust_for_no_saer(pf_data_t *); 120 static pf_data_t *pf_get_pcie_bridge(pf_data_t *, pcie_req_id_t); 121 static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *); 122 static boolean_t pf_matched_in_rc(pf_data_t *, pf_data_t *, 123 uint32_t); 124 static int pf_analyse_error_tbl(ddi_fm_error_t *, pf_impl_t *, 125 pf_data_t *, const pf_fab_err_tbl_t *, uint32_t); 126 static int pf_analyse_ca_ur(ddi_fm_error_t *, uint32_t, 127 pf_data_t *, pf_data_t *); 128 static int pf_analyse_ma_ta(ddi_fm_error_t *, uint32_t, 129 pf_data_t *, pf_data_t *); 130 static int pf_analyse_pci(ddi_fm_error_t *, uint32_t, 131 pf_data_t *, pf_data_t *); 132 static int pf_analyse_perr_assert(ddi_fm_error_t *, uint32_t, 133 pf_data_t *, pf_data_t *); 134 static int pf_analyse_ptlp(ddi_fm_error_t *, uint32_t, 135 pf_data_t *, pf_data_t *); 136 static int pf_analyse_sc(ddi_fm_error_t *, uint32_t, 137 pf_data_t *, pf_data_t *); 138 static int pf_analyse_to(ddi_fm_error_t *, uint32_t, 139 pf_data_t *, pf_data_t *); 140 static int pf_analyse_uc(ddi_fm_error_t *, uint32_t, 141 pf_data_t *, pf_data_t *); 142 static int pf_analyse_uc_data(ddi_fm_error_t *, uint32_t, 143 pf_data_t *, pf_data_t *); 144 static int pf_no_panic(ddi_fm_error_t *, uint32_t, 145 pf_data_t *, pf_data_t *); 146 static int pf_panic(ddi_fm_error_t *, uint32_t, 147 pf_data_t *, pf_data_t *); 148 static void pf_send_ereport(ddi_fm_error_t *, pf_impl_t *); 149 static int pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr); 150 151 /* PCIe Fabric Handle Lookup Support Functions. */ 152 static int pf_hdl_child_lookup(dev_info_t *, ddi_fm_error_t *, uint32_t, 153 uint64_t, pcie_req_id_t); 154 static int pf_hdl_compare(dev_info_t *, ddi_fm_error_t *, uint32_t, uint64_t, 155 pcie_req_id_t, ndi_fmc_t *); 156 static int pf_log_hdl_lookup(dev_info_t *, ddi_fm_error_t *, pf_data_t *, 157 boolean_t); 158 159 static int pf_handler_enter(dev_info_t *, pf_impl_t *); 160 static void pf_handler_exit(dev_info_t *); 161 static void pf_reset_pfd(pf_data_t *); 162 163 boolean_t pcie_full_scan = B_FALSE; /* Force to always do a full scan */ 164 int pcie_disable_scan = 0; /* Disable fabric scan */ 165 166 /* Inform interested parties that error handling is about to begin. */ 167 /* ARGSUSED */ 168 void 169 pf_eh_enter(pcie_bus_t *bus_p) 170 { 171 } 172 173 /* Inform interested parties that error handling has ended. */ 174 void 175 pf_eh_exit(pcie_bus_t *bus_p) 176 { 177 pcie_bus_t *rbus_p = PCIE_DIP2BUS(bus_p->bus_rp_dip); 178 pf_data_t *root_pfd_p = PCIE_BUS2PFD(rbus_p); 179 pf_data_t *pfd_p; 180 uint_t intr_type = PCIE_ROOT_EH_SRC(root_pfd_p)->intr_type; 181 182 pciev_eh_exit(root_pfd_p, intr_type); 183 184 /* Clear affected device info and INTR SRC */ 185 for (pfd_p = root_pfd_p; pfd_p; pfd_p = pfd_p->pe_next) { 186 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0; 187 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF; 188 if (PCIE_IS_ROOT(PCIE_PFD2BUS(pfd_p))) { 189 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE; 190 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL; 191 } 192 } 193 } 194 195 /* 196 * After sending an ereport, or in lieu of doing so, unlock all the devices in 197 * the data queue. We also must clear pe_valid here; this function is called in 198 * the path where we decide not to send an ereport because there is no error 199 * (spurious AER interrupt), as well as from pf_send_ereport() which has already 200 * cleared it. Failing to do this will result in a different path through 201 * pf_dispatch() and the potential for deadlocks. It is safe to do as we are 202 * still holding the handler lock here, just as in pf_send_ereport(). 203 */ 204 static void 205 pf_dq_unlock_chain(pf_impl_t *impl) 206 { 207 pf_data_t *pfd_p; 208 209 for (pfd_p = impl->pf_dq_tail_p; pfd_p; pfd_p = pfd_p->pe_prev) { 210 pfd_p->pe_valid = B_FALSE; 211 if (pfd_p->pe_lock) { 212 pf_handler_exit(PCIE_PFD2DIP(pfd_p)); 213 } 214 } 215 } 216 217 /* 218 * Scan Fabric is the entry point for PCI/PCIe IO fabric errors. The 219 * caller may create a local pf_data_t with the "root fault" 220 * information populated to either do a precise or full scan. More 221 * than one pf_data_t maybe linked together if there are multiple 222 * errors. Only a PCIe compliant Root Port device may pass in NULL 223 * for the root_pfd_p. 224 * 225 * "Root Complexes" such as NPE and PX should call scan_fabric using itself as 226 * the rdip. PCIe Root ports should call pf_scan_fabric using its parent as 227 * the rdip. 228 * 229 * Scan fabric initiated from RCs are likely due to a fabric message, traps or 230 * any RC detected errors that propagated to/from the fabric. 231 * 232 * This code assumes that by the time pf_scan_fabric is 233 * called, pf_handler_enter has NOT been called on the rdip. 234 */ 235 int 236 pf_scan_fabric(dev_info_t *rdip, ddi_fm_error_t *derr, pf_data_t *root_pfd_p) 237 { 238 pf_impl_t impl; 239 pf_data_t *pfd_p, *pfd_head_p, *pfd_tail_p; 240 int scan_flag = PF_SCAN_SUCCESS; 241 int analyse_flag = PF_ERR_NO_ERROR; 242 boolean_t full_scan = pcie_full_scan; 243 244 if (pcie_disable_scan) 245 return (analyse_flag); 246 247 /* Find the head and tail of this link list */ 248 pfd_head_p = root_pfd_p; 249 for (pfd_tail_p = root_pfd_p; pfd_tail_p && pfd_tail_p->pe_next; 250 pfd_tail_p = pfd_tail_p->pe_next) 251 ; 252 253 /* Save head/tail */ 254 impl.pf_total = 0; 255 impl.pf_derr = derr; 256 impl.pf_dq_head_p = pfd_head_p; 257 impl.pf_dq_tail_p = pfd_tail_p; 258 259 /* If scan is initiated from RP then RP itself must be scanned. */ 260 if (PCIE_IS_RP(PCIE_DIP2BUS(rdip)) && pf_is_ready(rdip) && 261 !root_pfd_p) { 262 scan_flag = pf_handler_enter(rdip, &impl); 263 if (scan_flag & PF_SCAN_DEADLOCK) 264 goto done; 265 266 scan_flag = pf_default_hdl(rdip, &impl); 267 if (scan_flag & PF_SCAN_NO_ERR_IN_CHILD) 268 goto done; 269 } 270 271 /* 272 * Scan the fabric using the scan_bdf and scan_addr in error q. 273 * scan_bdf will be valid in the following cases: 274 * - Fabric message 275 * - Poisoned TLP 276 * - Signaled UR/CA 277 * - Received UR/CA 278 * - PIO load failures 279 */ 280 for (pfd_p = impl.pf_dq_head_p; pfd_p && PFD_IS_ROOT(pfd_p); 281 pfd_p = pfd_p->pe_next) { 282 impl.pf_fault = PCIE_ROOT_FAULT(pfd_p); 283 284 if (PFD_IS_RC(pfd_p)) 285 impl.pf_total++; 286 287 if (impl.pf_fault->full_scan) 288 full_scan = B_TRUE; 289 290 if (full_scan || 291 PCIE_CHECK_VALID_BDF(impl.pf_fault->scan_bdf) || 292 impl.pf_fault->scan_addr) 293 scan_flag |= pf_dispatch(rdip, &impl, full_scan); 294 295 if (full_scan) 296 break; 297 } 298 299 done: 300 /* 301 * If this is due to safe access, don't analyze the errors and return 302 * success regardless of how scan fabric went. 303 */ 304 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 305 analyse_flag = PF_ERR_NO_PANIC; 306 } else { 307 analyse_flag = pf_analyse_error(derr, &impl); 308 } 309 310 /* 311 * If analyse_flag is 0 or PF_ERR_NO_ERROR, there's nothing here. Skip 312 * ereport generation unless something went wrong with the scan. 313 */ 314 if ((analyse_flag & ~PF_ERR_NO_ERROR) != 0 || 315 (scan_flag & (PF_SCAN_CB_FAILURE | PF_SCAN_DEADLOCK)) != 0) { 316 pf_send_ereport(derr, &impl); 317 } else { 318 pf_dq_unlock_chain(&impl); 319 } 320 321 /* 322 * Check if any hardened driver's callback reported a panic. 323 * If so panic. 324 */ 325 if (scan_flag & PF_SCAN_CB_FAILURE) 326 analyse_flag |= PF_ERR_PANIC; 327 328 /* 329 * If a deadlock was detected, panic the system as error analysis has 330 * been compromised. 331 */ 332 if (scan_flag & PF_SCAN_DEADLOCK) 333 analyse_flag |= PF_ERR_PANIC_DEADLOCK; 334 335 derr->fme_status = PF_ERR2DDIFM_ERR(scan_flag); 336 337 return (analyse_flag); 338 } 339 340 void 341 pcie_force_fullscan(void) 342 { 343 pcie_full_scan = B_TRUE; 344 } 345 346 /* 347 * pf_dispatch walks the device tree and calls the pf_default_hdl if the device 348 * falls in the error path. 349 * 350 * Returns PF_SCAN_* flags 351 */ 352 static int 353 pf_dispatch(dev_info_t *pdip, pf_impl_t *impl, boolean_t full_scan) 354 { 355 dev_info_t *dip; 356 pcie_req_id_t rid = impl->pf_fault->scan_bdf; 357 pcie_bus_t *bus_p; 358 int scan_flag = PF_SCAN_SUCCESS; 359 360 for (dip = ddi_get_child(pdip); dip; dip = ddi_get_next_sibling(dip)) { 361 /* Make sure dip is attached and ready */ 362 if (!(bus_p = pf_is_ready(dip))) 363 continue; 364 365 scan_flag |= pf_handler_enter(dip, impl); 366 if (scan_flag & PF_SCAN_DEADLOCK) 367 break; 368 369 /* 370 * Handle this device if it is a: 371 * o Full Scan 372 * o PCI/PCI-X Device 373 * o Fault BDF = Device BDF 374 * o BDF/ADDR is in range of the Bridge/Switch 375 */ 376 if (full_scan || 377 (bus_p->bus_bdf == rid) || 378 pf_in_bus_range(bus_p, rid) || 379 pf_in_addr_range(bus_p, impl->pf_fault->scan_addr)) { 380 int hdl_flag = pf_default_hdl(dip, impl); 381 scan_flag |= hdl_flag; 382 383 /* 384 * A bridge may have detected no errors in which case 385 * there is no need to scan further down. 386 */ 387 if (hdl_flag & PF_SCAN_NO_ERR_IN_CHILD) 388 continue; 389 } else { 390 pf_handler_exit(dip); 391 continue; 392 } 393 394 /* match or in bridge bus-range */ 395 switch (bus_p->bus_dev_type) { 396 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: 397 case PCIE_PCIECAP_DEV_TYPE_PCI2PCIE: 398 scan_flag |= pf_dispatch(dip, impl, B_TRUE); 399 break; 400 case PCIE_PCIECAP_DEV_TYPE_UP: 401 case PCIE_PCIECAP_DEV_TYPE_DOWN: 402 case PCIE_PCIECAP_DEV_TYPE_ROOT: 403 { 404 pf_data_t *pfd_p = PCIE_BUS2PFD(bus_p); 405 pf_pci_err_regs_t *err_p = PCI_ERR_REG(pfd_p); 406 pf_pci_bdg_err_regs_t *serr_p = PCI_BDG_ERR_REG(pfd_p); 407 /* 408 * Continue if the fault BDF != the switch or there is a 409 * parity error 410 */ 411 if ((bus_p->bus_bdf != rid) || 412 (err_p->pci_err_status & PF_PCI_PARITY_ERR) || 413 (serr_p->pci_bdg_sec_stat & PF_PCI_PARITY_ERR)) 414 scan_flag |= pf_dispatch(dip, impl, full_scan); 415 break; 416 } 417 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: 418 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: 419 /* 420 * Reached a PCIe end point so stop. Note dev_type 421 * PCI_DEV is just a PCIe device that requires IO Space 422 */ 423 break; 424 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO: 425 if (PCIE_IS_BDG(bus_p)) 426 scan_flag |= pf_dispatch(dip, impl, B_TRUE); 427 break; 428 default: 429 ASSERT(B_FALSE); 430 } 431 } 432 return (scan_flag); 433 } 434 435 /* Returns whether the "bdf" is in the bus range of a switch/bridge */ 436 boolean_t 437 pf_in_bus_range(pcie_bus_t *bus_p, pcie_req_id_t bdf) 438 { 439 pci_bus_range_t *br_p = &bus_p->bus_bus_range; 440 uint8_t bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >> 441 PCIE_REQ_ID_BUS_SHIFT; 442 443 /* check if given bdf falls within bridge's bus range */ 444 if (PCIE_IS_BDG(bus_p) && 445 ((bus_no >= br_p->lo) && (bus_no <= br_p->hi))) 446 return (B_TRUE); 447 else 448 return (B_FALSE); 449 } 450 451 /* 452 * Return whether the "addr" is in the assigned addr of a device. 453 */ 454 boolean_t 455 pf_in_assigned_addr(pcie_bus_t *bus_p, uint64_t addr) 456 { 457 uint_t i; 458 uint64_t low, hi; 459 pci_regspec_t *assign_p = bus_p->bus_assigned_addr; 460 461 for (i = 0; i < bus_p->bus_assigned_entries; i++, assign_p++) { 462 low = assign_p->pci_phys_low; 463 hi = low + assign_p->pci_size_low; 464 if ((addr < hi) && (addr >= low)) 465 return (B_TRUE); 466 } 467 return (B_FALSE); 468 } 469 470 /* 471 * Returns whether the "addr" is in the addr range of a switch/bridge, or if the 472 * "addr" is in the assigned addr of a device. 473 */ 474 static boolean_t 475 pf_in_addr_range(pcie_bus_t *bus_p, uint64_t addr) 476 { 477 uint_t i; 478 uint64_t low, hi; 479 ppb_ranges_t *ranges_p = bus_p->bus_addr_ranges; 480 481 if (!addr) 482 return (B_FALSE); 483 484 /* check if given address belongs to this device */ 485 if (pf_in_assigned_addr(bus_p, addr)) 486 return (B_TRUE); 487 488 /* check if given address belongs to a child below this device */ 489 if (!PCIE_IS_BDG(bus_p)) 490 return (B_FALSE); 491 492 for (i = 0; i < bus_p->bus_addr_entries; i++, ranges_p++) { 493 switch (ranges_p->child_high & PCI_ADDR_MASK) { 494 case PCI_ADDR_IO: 495 case PCI_ADDR_MEM32: 496 low = ranges_p->child_low; 497 hi = ranges_p->size_low + low; 498 if ((addr < hi) && (addr >= low)) 499 return (B_TRUE); 500 break; 501 case PCI_ADDR_MEM64: 502 low = ((uint64_t)ranges_p->child_mid << 32) | 503 (uint64_t)ranges_p->child_low; 504 hi = (((uint64_t)ranges_p->size_high << 32) | 505 (uint64_t)ranges_p->size_low) + low; 506 if ((addr < hi) && (addr >= low)) 507 return (B_TRUE); 508 break; 509 } 510 } 511 return (B_FALSE); 512 } 513 514 static pcie_bus_t * 515 pf_is_ready(dev_info_t *dip) 516 { 517 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 518 if (!bus_p) 519 return (NULL); 520 521 if (!(bus_p->bus_fm_flags & PF_FM_READY)) 522 return (NULL); 523 return (bus_p); 524 } 525 526 static void 527 pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs, 528 pcie_bus_t *bus_p, boolean_t bdg) 529 { 530 if (bdg) { 531 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p, 532 PCI_PCIX_BDG_ECC_STATUS); 533 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p, 534 PCI_PCIX_BDG_ECC_FST_AD); 535 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p, 536 PCI_PCIX_BDG_ECC_SEC_AD); 537 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p, 538 PCI_PCIX_BDG_ECC_ATTR); 539 } else { 540 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p, 541 PCI_PCIX_ECC_STATUS); 542 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p, 543 PCI_PCIX_ECC_FST_AD); 544 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p, 545 PCI_PCIX_ECC_SEC_AD); 546 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p, 547 PCI_PCIX_ECC_ATTR); 548 } 549 } 550 551 552 static void 553 pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 554 { 555 /* 556 * For PCI-X device PCI-X Capability only exists for Type 0 Headers. 557 * PCI-X Bridge Capability only exists for Type 1 Headers. 558 * Both capabilities do not exist at the same time. 559 */ 560 if (PCIE_IS_BDG(bus_p)) { 561 pf_pcix_bdg_err_regs_t *pcix_bdg_regs; 562 563 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p); 564 565 pcix_bdg_regs->pcix_bdg_sec_stat = PCIX_CAP_GET(16, bus_p, 566 PCI_PCIX_SEC_STATUS); 567 pcix_bdg_regs->pcix_bdg_stat = PCIX_CAP_GET(32, bus_p, 568 PCI_PCIX_BDG_STATUS); 569 570 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 571 /* 572 * PCI Express to PCI-X bridges only implement the 573 * secondary side of the PCI-X ECC registers, bit one is 574 * read-only so we make sure we do not write to it. 575 */ 576 if (!PCIE_IS_PCIE_BDG(bus_p)) { 577 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 578 0); 579 pf_pcix_ecc_regs_gather( 580 PCIX_BDG_ECC_REG(pfd_p, 0), bus_p, B_TRUE); 581 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 582 1); 583 } 584 pf_pcix_ecc_regs_gather(PCIX_BDG_ECC_REG(pfd_p, 0), 585 bus_p, B_TRUE); 586 } 587 } else { 588 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p); 589 590 pcix_regs->pcix_command = PCIX_CAP_GET(16, bus_p, 591 PCI_PCIX_COMMAND); 592 pcix_regs->pcix_status = PCIX_CAP_GET(32, bus_p, 593 PCI_PCIX_STATUS); 594 if (PCIX_ECC_VERSION_CHECK(bus_p)) 595 pf_pcix_ecc_regs_gather(PCIX_ECC_REG(pfd_p), bus_p, 596 B_TRUE); 597 } 598 } 599 600 static void 601 pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 602 { 603 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p); 604 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p); 605 606 pcie_regs->pcie_err_status = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS); 607 pcie_regs->pcie_err_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL); 608 pcie_regs->pcie_dev_cap = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP); 609 610 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) 611 pf_pcix_regs_gather(pfd_p, bus_p); 612 613 if (PCIE_IS_ROOT(bus_p)) { 614 pf_pcie_rp_err_regs_t *pcie_rp_regs = PCIE_RP_REG(pfd_p); 615 616 pcie_rp_regs->pcie_rp_status = PCIE_CAP_GET(32, bus_p, 617 PCIE_ROOTSTS); 618 pcie_rp_regs->pcie_rp_ctl = PCIE_CAP_GET(16, bus_p, 619 PCIE_ROOTCTL); 620 } 621 622 /* 623 * For eligible components, we gather Slot Register state. 624 * 625 * Eligible components are: 626 * - a Downstream Port or a Root Port with the Slot Implemented 627 * capability bit set 628 * - hotplug capable 629 * 630 * Slot register state is useful, for instance, to determine whether the 631 * Slot's child device is physically present (via the Slot Status 632 * register). 633 */ 634 if ((PCIE_IS_SWD(bus_p) || PCIE_IS_ROOT(bus_p)) && 635 PCIE_IS_HOTPLUG_ENABLED(PCIE_BUS2DIP(bus_p))) { 636 pf_pcie_slot_regs_t *pcie_slot_regs = PCIE_SLOT_REG(pfd_p); 637 pcie_slot_regs->pcie_slot_cap = PCIE_CAP_GET(32, bus_p, 638 PCIE_SLOTCAP); 639 pcie_slot_regs->pcie_slot_control = PCIE_CAP_GET(16, bus_p, 640 PCIE_SLOTCTL); 641 pcie_slot_regs->pcie_slot_status = PCIE_CAP_GET(16, bus_p, 642 PCIE_SLOTSTS); 643 644 if (pcie_slot_regs->pcie_slot_cap != PCI_EINVAL32 && 645 pcie_slot_regs->pcie_slot_control != PCI_EINVAL16 && 646 pcie_slot_regs->pcie_slot_status != PCI_EINVAL16) { 647 pcie_slot_regs->pcie_slot_regs_valid = B_TRUE; 648 } 649 } 650 651 if (!PCIE_HAS_AER(bus_p)) 652 return; 653 654 /* Gather UE AERs */ 655 pcie_adv_regs->pcie_adv_ctl = PCIE_AER_GET(32, bus_p, 656 PCIE_AER_CTL); 657 pcie_adv_regs->pcie_ue_status = PCIE_AER_GET(32, bus_p, 658 PCIE_AER_UCE_STS); 659 pcie_adv_regs->pcie_ue_mask = PCIE_AER_GET(32, bus_p, 660 PCIE_AER_UCE_MASK); 661 pcie_adv_regs->pcie_ue_sev = PCIE_AER_GET(32, bus_p, 662 PCIE_AER_UCE_SERV); 663 PCIE_ADV_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p, 664 PCIE_AER_HDR_LOG); 665 PCIE_ADV_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p, 666 PCIE_AER_HDR_LOG + 0x4); 667 PCIE_ADV_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p, 668 PCIE_AER_HDR_LOG + 0x8); 669 PCIE_ADV_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p, 670 PCIE_AER_HDR_LOG + 0xc); 671 672 /* Gather CE AERs */ 673 pcie_adv_regs->pcie_ce_status = PCIE_AER_GET(32, bus_p, 674 PCIE_AER_CE_STS); 675 pcie_adv_regs->pcie_ce_mask = PCIE_AER_GET(32, bus_p, 676 PCIE_AER_CE_MASK); 677 678 /* 679 * If pci express to pci bridge then grab the bridge 680 * error registers. 681 */ 682 if (PCIE_IS_PCIE_BDG(bus_p)) { 683 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs = 684 PCIE_ADV_BDG_REG(pfd_p); 685 686 pcie_bdg_regs->pcie_sue_ctl = PCIE_AER_GET(32, bus_p, 687 PCIE_AER_SCTL); 688 pcie_bdg_regs->pcie_sue_status = PCIE_AER_GET(32, bus_p, 689 PCIE_AER_SUCE_STS); 690 pcie_bdg_regs->pcie_sue_mask = PCIE_AER_GET(32, bus_p, 691 PCIE_AER_SUCE_MASK); 692 pcie_bdg_regs->pcie_sue_sev = PCIE_AER_GET(32, bus_p, 693 PCIE_AER_SUCE_SERV); 694 PCIE_ADV_BDG_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p, 695 PCIE_AER_SHDR_LOG); 696 PCIE_ADV_BDG_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p, 697 PCIE_AER_SHDR_LOG + 0x4); 698 PCIE_ADV_BDG_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p, 699 PCIE_AER_SHDR_LOG + 0x8); 700 PCIE_ADV_BDG_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p, 701 PCIE_AER_SHDR_LOG + 0xc); 702 } 703 704 /* 705 * If PCI Express root port then grab the root port 706 * error registers. 707 */ 708 if (PCIE_IS_ROOT(bus_p)) { 709 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs = 710 PCIE_ADV_RP_REG(pfd_p); 711 712 pcie_rp_regs->pcie_rp_err_cmd = PCIE_AER_GET(32, bus_p, 713 PCIE_AER_RE_CMD); 714 pcie_rp_regs->pcie_rp_err_status = PCIE_AER_GET(32, bus_p, 715 PCIE_AER_RE_STS); 716 pcie_rp_regs->pcie_rp_ce_src_id = PCIE_AER_GET(16, bus_p, 717 PCIE_AER_CE_SRC_ID); 718 pcie_rp_regs->pcie_rp_ue_src_id = PCIE_AER_GET(16, bus_p, 719 PCIE_AER_ERR_SRC_ID); 720 } 721 } 722 723 static void 724 pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p) 725 { 726 pf_pci_err_regs_t *pci_regs = PCI_ERR_REG(pfd_p); 727 728 /* 729 * Start by reading all the error registers that are available for 730 * pci and pci express and for leaf devices and bridges/switches 731 */ 732 pci_regs->pci_err_status = PCIE_GET(16, bus_p, PCI_CONF_STAT); 733 pci_regs->pci_cfg_comm = PCIE_GET(16, bus_p, PCI_CONF_COMM); 734 735 /* 736 * If pci-pci bridge grab PCI bridge specific error registers. 737 */ 738 if (PCIE_IS_BDG(bus_p)) { 739 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p); 740 pci_bdg_regs->pci_bdg_sec_stat = 741 PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS); 742 pci_bdg_regs->pci_bdg_ctrl = 743 PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL); 744 } 745 746 /* 747 * If pci express device grab pci express error registers and 748 * check for advanced error reporting features and grab them if 749 * available. 750 */ 751 if (PCIE_IS_PCIE(bus_p)) 752 pf_pcie_regs_gather(pfd_p, bus_p); 753 else if (PCIE_IS_PCIX(bus_p)) 754 pf_pcix_regs_gather(pfd_p, bus_p); 755 756 } 757 758 static void 759 pf_pcix_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 760 { 761 if (PCIE_IS_BDG(bus_p)) { 762 pf_pcix_bdg_err_regs_t *pcix_bdg_regs; 763 764 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p); 765 766 PCIX_CAP_PUT(16, bus_p, PCI_PCIX_SEC_STATUS, 767 pcix_bdg_regs->pcix_bdg_sec_stat); 768 769 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_STATUS, 770 pcix_bdg_regs->pcix_bdg_stat); 771 772 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 773 pf_pcix_ecc_regs_t *pcix_bdg_ecc_regs; 774 /* 775 * PCI Express to PCI-X bridges only implement the 776 * secondary side of the PCI-X ECC registers. For 777 * clearing, there is no need to "select" the ECC 778 * register, just write what was originally read. 779 */ 780 if (!PCIE_IS_PCIE_BDG(bus_p)) { 781 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 0); 782 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 783 pcix_bdg_ecc_regs->pcix_ecc_ctlstat); 784 785 } 786 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 1); 787 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS, 788 pcix_bdg_ecc_regs->pcix_ecc_ctlstat); 789 } 790 } else { 791 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p); 792 793 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_STATUS, 794 pcix_regs->pcix_status); 795 796 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 797 pf_pcix_ecc_regs_t *pcix_ecc_regs = PCIX_ECC_REG(pfd_p); 798 799 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_ECC_STATUS, 800 pcix_ecc_regs->pcix_ecc_ctlstat); 801 } 802 } 803 } 804 805 static void 806 pf_pcie_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 807 { 808 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p); 809 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p); 810 811 PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS, pcie_regs->pcie_err_status); 812 813 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) 814 pf_pcix_regs_clear(pfd_p, bus_p); 815 816 if (!PCIE_HAS_AER(bus_p)) 817 return; 818 819 PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_STS, 820 pcie_adv_regs->pcie_ue_status); 821 822 PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS, 823 pcie_adv_regs->pcie_ce_status); 824 825 if (PCIE_IS_PCIE_BDG(bus_p)) { 826 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs = 827 PCIE_ADV_BDG_REG(pfd_p); 828 829 PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_STS, 830 pcie_bdg_regs->pcie_sue_status); 831 } 832 833 /* 834 * If PCI Express root complex then clear the root complex 835 * error registers. 836 */ 837 if (PCIE_IS_ROOT(bus_p)) { 838 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs; 839 840 pcie_rp_regs = PCIE_ADV_RP_REG(pfd_p); 841 842 PCIE_AER_PUT(32, bus_p, PCIE_AER_RE_STS, 843 pcie_rp_regs->pcie_rp_err_status); 844 } 845 } 846 847 static void 848 pf_pci_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p) 849 { 850 if (PCIE_IS_PCIE(bus_p)) 851 pf_pcie_regs_clear(pfd_p, bus_p); 852 else if (PCIE_IS_PCIX(bus_p)) 853 pf_pcix_regs_clear(pfd_p, bus_p); 854 855 PCIE_PUT(16, bus_p, PCI_CONF_STAT, pfd_p->pe_pci_regs->pci_err_status); 856 857 if (PCIE_IS_BDG(bus_p)) { 858 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p); 859 PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS, 860 pci_bdg_regs->pci_bdg_sec_stat); 861 } 862 } 863 864 /* ARGSUSED */ 865 void 866 pcie_clear_errors(dev_info_t *dip) 867 { 868 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 869 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 870 871 ASSERT(bus_p); 872 873 pf_pci_regs_gather(pfd_p, bus_p); 874 pf_pci_regs_clear(pfd_p, bus_p); 875 } 876 877 /* Find the fault BDF, fault Addr or full scan on a PCIe Root Port. */ 878 static void 879 pf_pci_find_rp_fault(pf_data_t *pfd_p, pcie_bus_t *bus_p) 880 { 881 pf_root_fault_t *root_fault = PCIE_ROOT_FAULT(pfd_p); 882 pf_pcie_adv_rp_err_regs_t *rp_regs = PCIE_ADV_RP_REG(pfd_p); 883 uint32_t root_err = rp_regs->pcie_rp_err_status; 884 uint32_t ue_err = PCIE_ADV_REG(pfd_p)->pcie_ue_status; 885 int num_faults = 0; 886 887 /* Since this data structure is reused, make sure to reset it */ 888 root_fault->full_scan = B_FALSE; 889 root_fault->scan_bdf = PCIE_INVALID_BDF; 890 root_fault->scan_addr = 0; 891 892 if (!PCIE_HAS_AER(bus_p) && 893 (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) { 894 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 895 return; 896 } 897 898 /* 899 * Check to see if an error has been received that 900 * requires a scan of the fabric. Count the number of 901 * faults seen. If MUL CE/FE_NFE that counts for 902 * at least 2 faults, so just return with full_scan. 903 */ 904 if ((root_err & PCIE_AER_RE_STS_MUL_CE_RCVD) || 905 (root_err & PCIE_AER_RE_STS_MUL_FE_NFE_RCVD)) { 906 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 907 return; 908 } 909 910 if (root_err & PCIE_AER_RE_STS_CE_RCVD) 911 num_faults++; 912 913 if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) 914 num_faults++; 915 916 if (ue_err & PCIE_AER_UCE_CA) 917 num_faults++; 918 919 if (ue_err & PCIE_AER_UCE_UR) 920 num_faults++; 921 922 /* If no faults just return */ 923 if (num_faults == 0) 924 return; 925 926 /* If faults > 1 do full scan */ 927 if (num_faults > 1) { 928 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 929 return; 930 } 931 932 /* By this point, there is only 1 fault detected */ 933 if (root_err & PCIE_AER_RE_STS_CE_RCVD) { 934 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ce_src_id; 935 num_faults--; 936 } else if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) { 937 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ue_src_id; 938 num_faults--; 939 } else if ((HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_CA) || 940 HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_UR)) && 941 (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), PCIE_ADV_REG(pfd_p)) == 942 DDI_SUCCESS)) { 943 PCIE_ROOT_FAULT(pfd_p)->scan_addr = 944 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr; 945 num_faults--; 946 } 947 948 /* 949 * This means an error did occur, but we couldn't extract the fault BDF 950 */ 951 if (num_faults > 0) 952 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE; 953 954 } 955 956 957 /* 958 * Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue 959 * 960 * Returns a scan flag. 961 * o PF_SCAN_SUCCESS - Error gathered and cleared sucessfuly, data added to 962 * Fault Q 963 * o PF_SCAN_BAD_RESPONSE - Unable to talk to device, item added to fault Q 964 * o PF_SCAN_CB_FAILURE - A hardened device deemed that the error was fatal. 965 * o PF_SCAN_NO_ERR_IN_CHILD - Only applies to bridge to prevent further 966 * unnecessary scanning 967 * o PF_SCAN_IN_DQ - This device has already been scanned; it was skipped this 968 * time. 969 */ 970 static int 971 pf_default_hdl(dev_info_t *dip, pf_impl_t *impl) 972 { 973 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 974 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 975 int cb_sts, scan_flag = PF_SCAN_SUCCESS; 976 977 /* Make sure this device hasn't already been snapshotted and cleared */ 978 if (pfd_p->pe_valid == B_TRUE) { 979 scan_flag |= PF_SCAN_IN_DQ; 980 goto done; 981 } 982 983 /* 984 * If this is a device used for PCI passthrough into a virtual machine, 985 * don't let any error it caused panic the system. 986 */ 987 if (bus_p->bus_fm_flags & PF_FM_IS_PASSTHRU) 988 pfd_p->pe_severity_mask |= PF_ERR_PANIC; 989 990 /* 991 * Read vendor/device ID and check with cached data; if it doesn't 992 * match, it could very well mean that the device is no longer 993 * responding. In this case, we return PF_SCAN_BAD_RESPONSE; should 994 * the caller choose to panic in this case, we will have the basic 995 * info in the error queue for the purposes of postmortem debugging. 996 */ 997 if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) { 998 char buf[FM_MAX_CLASS]; 999 1000 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", 1001 PCI_ERROR_SUBCLASS, PCI_NR); 1002 ddi_fm_ereport_post(dip, buf, fm_ena_generate(0, FM_ENA_FMT1), 1003 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL); 1004 1005 /* 1006 * For IOV/Hotplug purposes skip gathering info for this device, 1007 * but populate affected info and severity. Clear out any data 1008 * that maybe been saved in the last fabric scan. 1009 */ 1010 pf_reset_pfd(pfd_p); 1011 pfd_p->pe_severity_flags = PF_ERR_BAD_RESPONSE; 1012 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; 1013 1014 /* Add the snapshot to the error q */ 1015 pf_en_dq(pfd_p, impl); 1016 pfd_p->pe_valid = B_TRUE; 1017 1018 return (PF_SCAN_BAD_RESPONSE); 1019 } 1020 1021 pf_pci_regs_gather(pfd_p, bus_p); 1022 pf_pci_regs_clear(pfd_p, bus_p); 1023 1024 if (PCIE_IS_RP(bus_p)) 1025 pf_pci_find_rp_fault(pfd_p, bus_p); 1026 1027 cb_sts = pf_fm_callback(dip, impl->pf_derr); 1028 1029 if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN) 1030 scan_flag |= PF_SCAN_CB_FAILURE; 1031 1032 /* Add the snapshot to the error q */ 1033 pf_en_dq(pfd_p, impl); 1034 1035 done: 1036 /* 1037 * If a bridge does not have any error no need to scan any further down. 1038 * For PCIe devices, check the PCIe device status and PCI secondary 1039 * status. 1040 * - Some non-compliant PCIe devices do not utilize PCIe 1041 * error registers. If so rely on legacy PCI error registers. 1042 * For PCI devices, check the PCI secondary status. 1043 */ 1044 if (PCIE_IS_PCIE_BDG(bus_p) && 1045 !(PCIE_ERR_REG(pfd_p)->pcie_err_status & PF_PCIE_BDG_ERR) && 1046 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) 1047 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD; 1048 1049 if (PCIE_IS_PCI_BDG(bus_p) && 1050 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) 1051 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD; 1052 1053 pfd_p->pe_valid = B_TRUE; 1054 return (scan_flag); 1055 } 1056 1057 /* 1058 * Set the passthru flag on a device bus_p. Called by passthru drivers to 1059 * indicate when a device is or is no longer under passthru control. 1060 */ 1061 void 1062 pf_set_passthru(dev_info_t *dip, boolean_t is_passthru) 1063 { 1064 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 1065 1066 if (is_passthru) { 1067 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_PASSTHRU); 1068 } else { 1069 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_PASSTHRU); 1070 } 1071 } 1072 1073 /* 1074 * Called during postattach to initialize a device's error handling 1075 * capabilities. If the devices has already been hardened, then there isn't 1076 * much needed. Otherwise initialize the device's default FMA capabilities. 1077 * 1078 * In a future project where PCIe support is removed from pcifm, several 1079 * "properties" that are setup in ddi_fm_init and pci_ereport_setup need to be 1080 * created here so that the PCI/PCIe eversholt rules will work properly. 1081 */ 1082 void 1083 pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc, ddi_attach_cmd_t cmd) 1084 { 1085 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 1086 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 1087 boolean_t need_cb_register = B_FALSE; 1088 1089 if (!bus_p) { 1090 cmn_err(CE_WARN, "devi_bus information is not set for %s%d.\n", 1091 ddi_driver_name(dip), ddi_get_instance(dip)); 1092 return; 1093 } 1094 1095 if (fmhdl) { 1096 /* 1097 * If device is only ereport capable and not callback capable 1098 * make it callback capable. The only downside is that the 1099 * "fm-errcb-capable" property is not created for this device 1100 * which should be ok since it's not used anywhere. 1101 */ 1102 if (!(fmhdl->fh_cap & DDI_FM_ERRCB_CAPABLE)) 1103 need_cb_register = B_TRUE; 1104 } else { 1105 int cap; 1106 /* 1107 * fm-capable in driver.conf can be used to set fm_capabilities. 1108 * If fm-capable is not defined, set the default 1109 * DDI_FM_EREPORT_CAPABLE and DDI_FM_ERRCB_CAPABLE. 1110 */ 1111 cap = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 1112 DDI_PROP_DONTPASS, "fm-capable", 1113 DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE); 1114 cap &= (DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE); 1115 1116 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_NH); 1117 1118 if (cmd == DDI_ATTACH) { 1119 ddi_fm_init(dip, &cap, &ibc); 1120 pci_ereport_setup(dip); 1121 } 1122 1123 if (cap & DDI_FM_ERRCB_CAPABLE) 1124 need_cb_register = B_TRUE; 1125 1126 fmhdl = DEVI(dip)->devi_fmhdl; 1127 } 1128 1129 /* If ddi_fm_init fails for any reason RETURN */ 1130 if (!fmhdl) { 1131 (void) atomic_swap_uint(&bus_p->bus_fm_flags, 0); 1132 return; 1133 } 1134 1135 fmhdl->fh_cap |= DDI_FM_ERRCB_CAPABLE; 1136 if (cmd == DDI_ATTACH) { 1137 if (need_cb_register) 1138 ddi_fm_handler_register(dip, pf_dummy_cb, NULL); 1139 } 1140 1141 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_READY); 1142 } 1143 1144 /* undo FMA lock, called at predetach */ 1145 void 1146 pf_fini(dev_info_t *dip, ddi_detach_cmd_t cmd) 1147 { 1148 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 1149 1150 if (!bus_p) 1151 return; 1152 1153 /* Don't fini anything if device isn't FM Ready */ 1154 if (!(bus_p->bus_fm_flags & PF_FM_READY)) 1155 return; 1156 1157 /* no other code should set the flag to false */ 1158 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_READY); 1159 1160 /* 1161 * Grab the mutex to make sure device isn't in the middle of 1162 * error handling. Setting the bus_fm_flag to ~PF_FM_READY 1163 * should prevent this device from being error handled after 1164 * the mutex has been released. 1165 */ 1166 (void) pf_handler_enter(dip, NULL); 1167 pf_handler_exit(dip); 1168 1169 /* undo non-hardened drivers */ 1170 if (bus_p->bus_fm_flags & PF_FM_IS_NH) { 1171 if (cmd == DDI_DETACH) { 1172 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_NH); 1173 pci_ereport_teardown(dip); 1174 /* 1175 * ddi_fini itself calls ddi_handler_unregister, 1176 * so no need to explicitly call unregister. 1177 */ 1178 ddi_fm_fini(dip); 1179 } 1180 } 1181 } 1182 1183 /*ARGSUSED*/ 1184 static int 1185 pf_dummy_cb(dev_info_t *dip, ddi_fm_error_t *derr, const void *not_used) 1186 { 1187 return (DDI_FM_OK); 1188 } 1189 1190 /* 1191 * Add PFD to queue. If it is an RC add it to the beginning, 1192 * otherwise add it to the end. 1193 */ 1194 static void 1195 pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl) 1196 { 1197 pf_data_t *head_p = impl->pf_dq_head_p; 1198 pf_data_t *tail_p = impl->pf_dq_tail_p; 1199 1200 impl->pf_total++; 1201 1202 if (!head_p) { 1203 ASSERT(PFD_IS_ROOT(pfd_p)); 1204 impl->pf_dq_head_p = pfd_p; 1205 impl->pf_dq_tail_p = pfd_p; 1206 pfd_p->pe_prev = NULL; 1207 pfd_p->pe_next = NULL; 1208 return; 1209 } 1210 1211 /* Check if this is a Root Port eprt */ 1212 if (PFD_IS_ROOT(pfd_p)) { 1213 pf_data_t *root_p, *last_p = NULL; 1214 1215 /* The first item must be a RP */ 1216 root_p = head_p; 1217 for (last_p = head_p; last_p && PFD_IS_ROOT(last_p); 1218 last_p = last_p->pe_next) 1219 root_p = last_p; 1220 1221 /* root_p is the last RP pfd. last_p is the first non-RP pfd. */ 1222 root_p->pe_next = pfd_p; 1223 pfd_p->pe_prev = root_p; 1224 pfd_p->pe_next = last_p; 1225 1226 if (last_p) 1227 last_p->pe_prev = pfd_p; 1228 else 1229 tail_p = pfd_p; 1230 } else { 1231 tail_p->pe_next = pfd_p; 1232 pfd_p->pe_prev = tail_p; 1233 pfd_p->pe_next = NULL; 1234 tail_p = pfd_p; 1235 } 1236 1237 impl->pf_dq_head_p = head_p; 1238 impl->pf_dq_tail_p = tail_p; 1239 } 1240 1241 /* 1242 * Ignore: 1243 * - TRAINING: as leaves do not have children 1244 * - SD: as leaves do not have children 1245 */ 1246 const pf_fab_err_tbl_t pcie_pcie_tbl[] = { 1247 {PCIE_AER_UCE_DLP, pf_panic, 1248 PF_AFFECTED_PARENT, 0}, 1249 1250 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1251 PF_AFFECTED_SELF, 0}, 1252 1253 {PCIE_AER_UCE_FCP, pf_panic, 1254 PF_AFFECTED_PARENT, 0}, 1255 1256 {PCIE_AER_UCE_TO, pf_analyse_to, 1257 PF_AFFECTED_SELF, 0}, 1258 1259 {PCIE_AER_UCE_CA, pf_analyse_ca_ur, 1260 PF_AFFECTED_SELF, 0}, 1261 1262 {PCIE_AER_UCE_UC, pf_analyse_uc, 1263 0, 0}, 1264 1265 {PCIE_AER_UCE_RO, pf_panic, 1266 PF_AFFECTED_PARENT, 0}, 1267 1268 {PCIE_AER_UCE_MTLP, pf_panic, 1269 PF_AFFECTED_PARENT, 0}, 1270 1271 {PCIE_AER_UCE_ECRC, pf_no_panic, 1272 PF_AFFECTED_SELF, 0}, 1273 1274 {PCIE_AER_UCE_UR, pf_analyse_ca_ur, 1275 PF_AFFECTED_SELF, 0}, 1276 1277 {0, NULL, 0, 0} 1278 }; 1279 1280 const pf_fab_err_tbl_t pcie_rp_tbl[] = { 1281 {PCIE_AER_UCE_TRAINING, pf_no_panic, 1282 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1283 1284 {PCIE_AER_UCE_DLP, pf_panic, 1285 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1286 1287 {PCIE_AER_UCE_SD, pf_no_panic, 1288 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1289 1290 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1291 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN}, 1292 1293 {PCIE_AER_UCE_FCP, pf_panic, 1294 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1295 1296 {PCIE_AER_UCE_TO, pf_analyse_to, 1297 PF_AFFECTED_ADDR, PF_AFFECTED_CHILDREN}, 1298 1299 {PCIE_AER_UCE_CA, pf_no_panic, 1300 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN}, 1301 1302 {PCIE_AER_UCE_UC, pf_analyse_uc, 1303 0, 0}, 1304 1305 {PCIE_AER_UCE_RO, pf_panic, 1306 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1307 1308 {PCIE_AER_UCE_MTLP, pf_panic, 1309 PF_AFFECTED_SELF | PF_AFFECTED_AER, 1310 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1311 1312 {PCIE_AER_UCE_ECRC, pf_no_panic, 1313 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN}, 1314 1315 {PCIE_AER_UCE_UR, pf_no_panic, 1316 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN}, 1317 1318 {0, NULL, 0, 0} 1319 }; 1320 1321 const pf_fab_err_tbl_t pcie_sw_tbl[] = { 1322 {PCIE_AER_UCE_TRAINING, pf_no_panic, 1323 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1324 1325 {PCIE_AER_UCE_DLP, pf_panic, 1326 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1327 1328 {PCIE_AER_UCE_SD, pf_no_panic, 1329 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1330 1331 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp, 1332 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1333 1334 {PCIE_AER_UCE_FCP, pf_panic, 1335 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1336 1337 {PCIE_AER_UCE_TO, pf_analyse_to, 1338 PF_AFFECTED_CHILDREN, 0}, 1339 1340 {PCIE_AER_UCE_CA, pf_analyse_ca_ur, 1341 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1342 1343 {PCIE_AER_UCE_UC, pf_analyse_uc, 1344 0, 0}, 1345 1346 {PCIE_AER_UCE_RO, pf_panic, 1347 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1348 1349 {PCIE_AER_UCE_MTLP, pf_panic, 1350 PF_AFFECTED_SELF | PF_AFFECTED_AER, 1351 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1352 1353 {PCIE_AER_UCE_ECRC, pf_no_panic, 1354 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1355 1356 {PCIE_AER_UCE_UR, pf_analyse_ca_ur, 1357 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN}, 1358 1359 {0, NULL, 0, 0} 1360 }; 1361 1362 const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = { 1363 {PCIE_AER_SUCE_TA_ON_SC, pf_analyse_sc, 1364 0, 0}, 1365 1366 {PCIE_AER_SUCE_MA_ON_SC, pf_analyse_sc, 1367 0, 0}, 1368 1369 {PCIE_AER_SUCE_RCVD_TA, pf_analyse_ma_ta, 1370 0, 0}, 1371 1372 {PCIE_AER_SUCE_RCVD_MA, pf_analyse_ma_ta, 1373 0, 0}, 1374 1375 {PCIE_AER_SUCE_USC_ERR, pf_panic, 1376 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN}, 1377 1378 {PCIE_AER_SUCE_USC_MSG_DATA_ERR, pf_analyse_ma_ta, 1379 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN}, 1380 1381 {PCIE_AER_SUCE_UC_DATA_ERR, pf_analyse_uc_data, 1382 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN}, 1383 1384 {PCIE_AER_SUCE_UC_ATTR_ERR, pf_panic, 1385 PF_AFFECTED_CHILDREN, 0}, 1386 1387 {PCIE_AER_SUCE_UC_ADDR_ERR, pf_panic, 1388 PF_AFFECTED_CHILDREN, 0}, 1389 1390 {PCIE_AER_SUCE_TIMER_EXPIRED, pf_panic, 1391 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1392 1393 {PCIE_AER_SUCE_PERR_ASSERT, pf_analyse_perr_assert, 1394 0, 0}, 1395 1396 {PCIE_AER_SUCE_SERR_ASSERT, pf_no_panic, 1397 0, 0}, 1398 1399 {PCIE_AER_SUCE_INTERNAL_ERR, pf_panic, 1400 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0}, 1401 1402 {0, NULL, 0, 0} 1403 }; 1404 1405 const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = { 1406 {PCI_STAT_PERROR, pf_analyse_pci, 1407 PF_AFFECTED_SELF, 0}, 1408 1409 {PCI_STAT_S_PERROR, pf_analyse_pci, 1410 PF_AFFECTED_SELF, 0}, 1411 1412 {PCI_STAT_S_SYSERR, pf_panic, 1413 PF_AFFECTED_SELF, 0}, 1414 1415 {PCI_STAT_R_MAST_AB, pf_analyse_pci, 1416 PF_AFFECTED_SELF, 0}, 1417 1418 {PCI_STAT_R_TARG_AB, pf_analyse_pci, 1419 PF_AFFECTED_SELF, 0}, 1420 1421 {PCI_STAT_S_TARG_AB, pf_analyse_pci, 1422 PF_AFFECTED_SELF, 0}, 1423 1424 {0, NULL, 0, 0} 1425 }; 1426 1427 const pf_fab_err_tbl_t pcie_pci_tbl[] = { 1428 {PCI_STAT_PERROR, pf_analyse_pci, 1429 PF_AFFECTED_SELF, 0}, 1430 1431 {PCI_STAT_S_PERROR, pf_analyse_pci, 1432 PF_AFFECTED_SELF, 0}, 1433 1434 {PCI_STAT_S_SYSERR, pf_panic, 1435 PF_AFFECTED_SELF, 0}, 1436 1437 {PCI_STAT_R_MAST_AB, pf_analyse_pci, 1438 PF_AFFECTED_SELF, 0}, 1439 1440 {PCI_STAT_R_TARG_AB, pf_analyse_pci, 1441 PF_AFFECTED_SELF, 0}, 1442 1443 {PCI_STAT_S_TARG_AB, pf_analyse_pci, 1444 PF_AFFECTED_SELF, 0}, 1445 1446 {0, NULL, 0, 0} 1447 }; 1448 1449 #define PF_MASKED_AER_ERR(pfd_p) \ 1450 (PCIE_ADV_REG(pfd_p)->pcie_ue_status & \ 1451 ((PCIE_ADV_REG(pfd_p)->pcie_ue_mask) ^ 0xFFFFFFFF)) 1452 #define PF_MASKED_SAER_ERR(pfd_p) \ 1453 (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status & \ 1454 ((PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask) ^ 0xFFFFFFFF)) 1455 /* 1456 * Analyse all the PCIe Fault Data (erpt) gathered during dispatch in the erpt 1457 * Queue. 1458 */ 1459 static int 1460 pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl) 1461 { 1462 int sts_flags, error_flags = 0; 1463 pf_data_t *pfd_p; 1464 1465 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 1466 sts_flags = 0; 1467 1468 /* skip analysing error when no error info is gathered */ 1469 if (pfd_p->pe_severity_flags == PF_ERR_BAD_RESPONSE) 1470 goto done; 1471 1472 switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) { 1473 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: 1474 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: 1475 if (PCIE_DEVSTS_CE_DETECTED & 1476 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1477 sts_flags |= PF_ERR_CE; 1478 1479 pf_adjust_for_no_aer(pfd_p); 1480 sts_flags |= pf_analyse_error_tbl(derr, impl, 1481 pfd_p, pcie_pcie_tbl, PF_MASKED_AER_ERR(pfd_p)); 1482 break; 1483 case PCIE_PCIECAP_DEV_TYPE_ROOT: 1484 pf_adjust_for_no_aer(pfd_p); 1485 sts_flags |= pf_analyse_error_tbl(derr, impl, 1486 pfd_p, pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p)); 1487 break; 1488 case PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO: 1489 /* no adjust_for_aer for pseudo RC */ 1490 /* keep the severity passed on from RC if any */ 1491 sts_flags |= pfd_p->pe_severity_flags; 1492 sts_flags |= pf_analyse_error_tbl(derr, impl, pfd_p, 1493 pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p)); 1494 break; 1495 case PCIE_PCIECAP_DEV_TYPE_UP: 1496 case PCIE_PCIECAP_DEV_TYPE_DOWN: 1497 if (PCIE_DEVSTS_CE_DETECTED & 1498 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1499 sts_flags |= PF_ERR_CE; 1500 1501 pf_adjust_for_no_aer(pfd_p); 1502 sts_flags |= pf_analyse_error_tbl(derr, impl, 1503 pfd_p, pcie_sw_tbl, PF_MASKED_AER_ERR(pfd_p)); 1504 break; 1505 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: 1506 if (PCIE_DEVSTS_CE_DETECTED & 1507 PCIE_ERR_REG(pfd_p)->pcie_err_status) 1508 sts_flags |= PF_ERR_CE; 1509 1510 pf_adjust_for_no_aer(pfd_p); 1511 pf_adjust_for_no_saer(pfd_p); 1512 sts_flags |= pf_analyse_error_tbl(derr, 1513 impl, pfd_p, pcie_pcie_tbl, 1514 PF_MASKED_AER_ERR(pfd_p)); 1515 sts_flags |= pf_analyse_error_tbl(derr, 1516 impl, pfd_p, pcie_pcie_bdg_tbl, 1517 PF_MASKED_SAER_ERR(pfd_p)); 1518 /* 1519 * Some non-compliant PCIe devices do not utilize PCIe 1520 * error registers. So fallthrough and rely on legacy 1521 * PCI error registers. 1522 */ 1523 if ((PCIE_DEVSTS_NFE_DETECTED | PCIE_DEVSTS_FE_DETECTED) 1524 & PCIE_ERR_REG(pfd_p)->pcie_err_status) 1525 break; 1526 /* FALLTHROUGH */ 1527 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO: 1528 sts_flags |= pf_analyse_error_tbl(derr, impl, 1529 pfd_p, pcie_pci_tbl, 1530 PCI_ERR_REG(pfd_p)->pci_err_status); 1531 1532 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) 1533 break; 1534 1535 sts_flags |= pf_analyse_error_tbl(derr, 1536 impl, pfd_p, pcie_pci_bdg_tbl, 1537 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat); 1538 } 1539 1540 pfd_p->pe_severity_flags = sts_flags; 1541 1542 done: 1543 pfd_p->pe_orig_severity_flags = pfd_p->pe_severity_flags; 1544 /* Have pciev_eh adjust the severity */ 1545 pfd_p->pe_severity_flags = pciev_eh(pfd_p, impl); 1546 1547 pfd_p->pe_severity_flags &= ~pfd_p->pe_severity_mask; 1548 1549 error_flags |= pfd_p->pe_severity_flags; 1550 } 1551 1552 return (error_flags); 1553 } 1554 1555 static int 1556 pf_analyse_error_tbl(ddi_fm_error_t *derr, pf_impl_t *impl, 1557 pf_data_t *pfd_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg) 1558 { 1559 const pf_fab_err_tbl_t *row; 1560 int err = 0; 1561 uint16_t flags; 1562 uint32_t bit; 1563 1564 for (row = tbl; err_reg && (row->bit != 0); row++) { 1565 bit = row->bit; 1566 if (!(err_reg & bit)) 1567 continue; 1568 err |= row->handler(derr, bit, impl->pf_dq_head_p, pfd_p); 1569 1570 flags = row->affected_flags; 1571 /* 1572 * check if the primary flag is valid; 1573 * if not, use the secondary flag 1574 */ 1575 if (flags & PF_AFFECTED_AER) { 1576 if (!HAS_AER_LOGS(pfd_p, bit)) { 1577 flags = row->sec_affected_flags; 1578 } 1579 } else if (flags & PF_AFFECTED_SAER) { 1580 if (!HAS_SAER_LOGS(pfd_p, bit)) { 1581 flags = row->sec_affected_flags; 1582 } 1583 } else if (flags & PF_AFFECTED_ADDR) { 1584 /* only Root has this flag */ 1585 if (PCIE_ROOT_FAULT(pfd_p)->scan_addr == 0) { 1586 flags = row->sec_affected_flags; 1587 } 1588 } 1589 1590 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags |= flags; 1591 } 1592 1593 if (!err) 1594 err = PF_ERR_NO_ERROR; 1595 1596 return (err); 1597 } 1598 1599 /* 1600 * PCIe Completer Abort and Unsupport Request error analyser. If a PCIe device 1601 * issues a CA/UR a corresponding Received CA/UR should have been seen in the 1602 * PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so then 1603 * this error may be safely ignored. If not check the logs and see if an 1604 * associated handler for this transaction can be found. 1605 */ 1606 /* ARGSUSED */ 1607 static int 1608 pf_analyse_ca_ur(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1609 pf_data_t *pfd_p) 1610 { 1611 uint32_t abort_type; 1612 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1613 1614 /* If UR's are masked forgive this error */ 1615 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1616 (bit == PCIE_AER_UCE_UR)) 1617 return (PF_ERR_NO_PANIC); 1618 1619 /* 1620 * If a RP has an CA/UR it means a leaf sent a bad request to the RP 1621 * such as a config read or a bad DMA address. 1622 */ 1623 if (PCIE_IS_RP(PCIE_PFD2BUS(pfd_p))) 1624 goto handle_lookup; 1625 1626 if (bit == PCIE_AER_UCE_UR) 1627 abort_type = PCI_STAT_R_MAST_AB; 1628 else 1629 abort_type = PCI_STAT_R_TARG_AB; 1630 1631 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type)) 1632 return (PF_ERR_MATCHED_RC); 1633 1634 handle_lookup: 1635 if (HAS_AER_LOGS(pfd_p, bit) && 1636 pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == PF_HDL_FOUND) 1637 return (PF_ERR_MATCHED_DEVICE); 1638 1639 return (PF_ERR_PANIC); 1640 } 1641 1642 /* 1643 * PCIe-PCI Bridge Received Master Abort and Target error analyser. If a PCIe 1644 * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in 1645 * the PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so 1646 * then this error may be safely ignored. If not check the logs and see if an 1647 * associated handler for this transaction can be found. 1648 */ 1649 /* ARGSUSED */ 1650 static int 1651 pf_analyse_ma_ta(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1652 pf_data_t *pfd_p) 1653 { 1654 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1655 uint32_t abort_type; 1656 1657 /* If UR's are masked forgive this error */ 1658 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1659 (bit == PCIE_AER_SUCE_RCVD_MA)) 1660 return (PF_ERR_NO_PANIC); 1661 1662 if (bit == PCIE_AER_SUCE_RCVD_MA) 1663 abort_type = PCI_STAT_R_MAST_AB; 1664 else 1665 abort_type = PCI_STAT_R_TARG_AB; 1666 1667 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type)) 1668 return (PF_ERR_MATCHED_RC); 1669 1670 if (!HAS_SAER_LOGS(pfd_p, bit)) 1671 return (PF_ERR_PANIC); 1672 1673 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND) 1674 return (PF_ERR_MATCHED_DEVICE); 1675 1676 return (PF_ERR_PANIC); 1677 } 1678 1679 /* 1680 * Generic PCI error analyser. This function is used for Parity Errors, 1681 * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts. 1682 * In general PCI devices do not have error logs, it is very difficult to figure 1683 * out what transaction caused the error. Instead find the nearest PCIe-PCI 1684 * Bridge and check to see if it has logs and if it has an error associated with 1685 * this PCI Device. 1686 */ 1687 /* ARGSUSED */ 1688 static int 1689 pf_analyse_pci(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1690 pf_data_t *pfd_p) 1691 { 1692 pf_data_t *parent_pfd_p; 1693 uint16_t cmd; 1694 uint32_t aer_ue_status; 1695 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 1696 pf_pcie_adv_bdg_err_regs_t *parent_saer_p; 1697 1698 if (PCI_ERR_REG(pfd_p)->pci_err_status & PCI_STAT_S_SYSERR) 1699 return (PF_ERR_PANIC); 1700 1701 /* If UR's are masked forgive this error */ 1702 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) && 1703 (bit == PCI_STAT_R_MAST_AB)) 1704 return (PF_ERR_NO_PANIC); 1705 1706 1707 if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) { 1708 aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT; 1709 } else { 1710 aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC | 1711 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | 1712 PCIE_AER_SUCE_RCVD_MA); 1713 } 1714 1715 parent_pfd_p = pf_get_parent_pcie_bridge(pfd_p); 1716 if (parent_pfd_p == NULL) 1717 return (PF_ERR_PANIC); 1718 1719 /* Check if parent bridge has seen this error */ 1720 parent_saer_p = PCIE_ADV_BDG_REG(parent_pfd_p); 1721 if (!(parent_saer_p->pcie_sue_status & aer_ue_status) || 1722 !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status)) 1723 return (PF_ERR_PANIC); 1724 1725 /* 1726 * If the addr or bdf from the parent PCIe bridge logs belong to this 1727 * PCI device, assume the PCIe bridge's error handling has already taken 1728 * care of this PCI device's error. 1729 */ 1730 if (pf_pci_decode(parent_pfd_p, &cmd) != DDI_SUCCESS) 1731 return (PF_ERR_PANIC); 1732 1733 if ((parent_saer_p->pcie_sue_tgt_bdf == bus_p->bus_bdf) || 1734 pf_in_addr_range(bus_p, parent_saer_p->pcie_sue_tgt_addr)) 1735 return (PF_ERR_MATCHED_PARENT); 1736 1737 /* 1738 * If this device is a PCI-PCI bridge, check if the bdf in the parent 1739 * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges. 1740 * If they are, then assume the PCIe bridge's error handling has already 1741 * taken care of this PCI-PCI bridge device's error. 1742 */ 1743 if (PCIE_IS_BDG(bus_p) && 1744 pf_in_bus_range(bus_p, parent_saer_p->pcie_sue_tgt_bdf)) 1745 return (PF_ERR_MATCHED_PARENT); 1746 1747 return (PF_ERR_PANIC); 1748 } 1749 1750 /* 1751 * PCIe Bridge transactions associated with PERR. 1752 * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe 1753 * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe 1754 * o Bridge received a poisoned Completion on a Split Transction from PCIe 1755 * o Bridge received a poisoned Completion on a Delayed Transction from PCIe 1756 * 1757 * Check for non-poisoned PCIe transactions that got forwarded to the secondary 1758 * side and detects a PERR#. Except for delayed read completions, a poisoned 1759 * TLP will be forwarded to the secondary bus and PERR# will be asserted. 1760 */ 1761 /* ARGSUSED */ 1762 static int 1763 pf_analyse_perr_assert(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1764 pf_data_t *pfd_p) 1765 { 1766 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1767 uint16_t cmd; 1768 int hdl_sts = PF_HDL_NOTFOUND; 1769 int err = PF_ERR_NO_ERROR; 1770 pf_pcie_adv_bdg_err_regs_t *saer_p; 1771 1772 1773 if (HAS_SAER_LOGS(pfd_p, bit)) { 1774 saer_p = PCIE_ADV_BDG_REG(pfd_p); 1775 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS) 1776 return (PF_ERR_PANIC); 1777 1778 cmd_switch: 1779 switch (cmd) { 1780 case PCI_PCIX_CMD_IOWR: 1781 case PCI_PCIX_CMD_MEMWR: 1782 case PCI_PCIX_CMD_MEMWR_BL: 1783 case PCI_PCIX_CMD_MEMWRBL: 1784 /* Posted Writes Transactions */ 1785 if (saer_p->pcie_sue_tgt_trans == PF_ADDR_PIO) 1786 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1787 B_FALSE); 1788 break; 1789 case PCI_PCIX_CMD_CFWR: 1790 /* 1791 * Check to see if it is a non-posted write. If so, a 1792 * UR Completion would have been sent. 1793 */ 1794 if (pf_matched_in_rc(dq_head_p, pfd_p, 1795 PCI_STAT_R_MAST_AB)) { 1796 hdl_sts = PF_HDL_FOUND; 1797 err = PF_ERR_MATCHED_RC; 1798 goto done; 1799 } 1800 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1801 B_FALSE); 1802 break; 1803 case PCI_PCIX_CMD_SPL: 1804 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, 1805 B_FALSE); 1806 break; 1807 case PCI_PCIX_CMD_DADR: 1808 cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >> 1809 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) & 1810 PCIE_AER_SUCE_HDR_CMD_UP_MASK; 1811 if (cmd != PCI_PCIX_CMD_DADR) 1812 goto cmd_switch; 1813 /* FALLTHROUGH */ 1814 default: 1815 /* Unexpected situation, panic */ 1816 hdl_sts = PF_HDL_NOTFOUND; 1817 } 1818 1819 if (hdl_sts == PF_HDL_FOUND) 1820 err = PF_ERR_MATCHED_DEVICE; 1821 else 1822 err = PF_ERR_PANIC; 1823 } else { 1824 /* 1825 * Check to see if it is a non-posted write. If so, a UR 1826 * Completion would have been sent. 1827 */ 1828 if ((PCIE_ERR_REG(pfd_p)->pcie_err_status & 1829 PCIE_DEVSTS_UR_DETECTED) && 1830 pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_R_MAST_AB)) 1831 err = PF_ERR_MATCHED_RC; 1832 1833 /* Check for posted writes. Transaction is lost. */ 1834 if (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & 1835 PCI_STAT_S_PERROR) 1836 err = PF_ERR_PANIC; 1837 1838 /* 1839 * All other scenarios are due to read completions. Check for 1840 * PERR on the primary side. If found the primary side error 1841 * handling will take care of this error. 1842 */ 1843 if (err == PF_ERR_NO_ERROR) { 1844 if (PCI_ERR_REG(pfd_p)->pci_err_status & 1845 PCI_STAT_PERROR) 1846 err = PF_ERR_MATCHED_PARENT; 1847 else 1848 err = PF_ERR_PANIC; 1849 } 1850 } 1851 1852 done: 1853 return (err); 1854 } 1855 1856 /* 1857 * PCIe Poisoned TLP error analyser. If a PCIe device receives a Poisoned TLP, 1858 * check the logs and see if an associated handler for this transaction can be 1859 * found. 1860 */ 1861 /* ARGSUSED */ 1862 static int 1863 pf_analyse_ptlp(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1864 pf_data_t *pfd_p) 1865 { 1866 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1867 1868 /* 1869 * If AERs are supported find the logs in this device, otherwise look in 1870 * it's parent's logs. 1871 */ 1872 if (HAS_AER_LOGS(pfd_p, bit)) { 1873 pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&PCIE_ADV_HDR(pfd_p, 0); 1874 1875 /* 1876 * Double check that the log contains a poisoned TLP. 1877 * Some devices like PLX switch do not log poison TLP headers. 1878 */ 1879 if (hdr->ep) { 1880 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == 1881 PF_HDL_FOUND) 1882 return (PF_ERR_MATCHED_DEVICE); 1883 } 1884 1885 /* 1886 * If an address is found and hdl lookup failed panic. 1887 * Otherwise check parents to see if there was enough 1888 * information recover. 1889 */ 1890 if (PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr) 1891 return (PF_ERR_PANIC); 1892 } 1893 1894 /* 1895 * Check to see if the rc has already handled this error or a parent has 1896 * already handled this error. 1897 * 1898 * If the error info in the RC wasn't enough to find the fault device, 1899 * such as if the faulting device lies behind a PCIe-PCI bridge from a 1900 * poisoned completion, check to see if the PCIe-PCI bridge has enough 1901 * info to recover. For completion TLP's, the AER header logs only 1902 * contain the faulting BDF in the Root Port. For PCIe device the fault 1903 * BDF is the fault device. But if the fault device is behind a 1904 * PCIe-PCI bridge the fault BDF could turn out just to be a PCIe-PCI 1905 * bridge's secondary bus number. 1906 */ 1907 if (!PFD_IS_ROOT(pfd_p)) { 1908 dev_info_t *pdip = ddi_get_parent(PCIE_PFD2DIP(pfd_p)); 1909 pf_data_t *parent_pfd_p; 1910 1911 if (PCIE_PFD2BUS(pfd_p)->bus_rp_dip == pdip) { 1912 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR)) 1913 return (PF_ERR_MATCHED_RC); 1914 } 1915 1916 parent_pfd_p = PCIE_DIP2PFD(pdip); 1917 1918 if (HAS_AER_LOGS(parent_pfd_p, bit)) 1919 return (PF_ERR_MATCHED_PARENT); 1920 } else { 1921 pf_data_t *bdg_pfd_p; 1922 pcie_req_id_t secbus; 1923 1924 /* 1925 * Looking for a pcie bridge only makes sense if the BDF 1926 * Dev/Func = 0/0 1927 */ 1928 if (!PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 1929 goto done; 1930 1931 secbus = PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf; 1932 1933 if (!PCIE_CHECK_VALID_BDF(secbus) || (secbus & 0xFF)) 1934 goto done; 1935 1936 bdg_pfd_p = pf_get_pcie_bridge(pfd_p, secbus); 1937 1938 if (bdg_pfd_p && HAS_SAER_LOGS(bdg_pfd_p, 1939 PCIE_AER_SUCE_PERR_ASSERT)) { 1940 return pf_analyse_perr_assert(derr, 1941 PCIE_AER_SUCE_PERR_ASSERT, dq_head_p, pfd_p); 1942 } 1943 } 1944 done: 1945 return (PF_ERR_PANIC); 1946 } 1947 1948 /* 1949 * PCIe-PCI Bridge Received Master and Target abort error analyser on Split 1950 * Completions. If a PCIe Bridge receives a MA/TA check logs and see if an 1951 * associated handler for this transaction can be found. 1952 */ 1953 /* ARGSUSED */ 1954 static int 1955 pf_analyse_sc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1956 pf_data_t *pfd_p) 1957 { 1958 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1959 uint16_t cmd; 1960 int sts = PF_HDL_NOTFOUND; 1961 1962 if (!HAS_SAER_LOGS(pfd_p, bit)) 1963 return (PF_ERR_PANIC); 1964 1965 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS) 1966 return (PF_ERR_PANIC); 1967 1968 if (cmd == PCI_PCIX_CMD_SPL) 1969 sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE); 1970 1971 if (sts == PF_HDL_FOUND) 1972 return (PF_ERR_MATCHED_DEVICE); 1973 1974 return (PF_ERR_PANIC); 1975 } 1976 1977 /* 1978 * PCIe Timeout error analyser. This error can be forgiven if it is marked as 1979 * CE Advisory. If it is marked as advisory, this means the HW can recover 1980 * and/or retry the transaction automatically. Additionally, if a device's 1981 * parent slot reports that it is no longer physically present, we do not panic, 1982 * as one would not expect a missing device to respond to a command. 1983 */ 1984 /* ARGSUSED */ 1985 static int 1986 pf_analyse_to(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 1987 pf_data_t *pfd_p) 1988 { 1989 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 1990 pf_data_t *rppfd = PCIE_DIP2PFD(rpdip); 1991 pf_pcie_slot_regs_t *p_pcie_slot_regs; 1992 1993 if (HAS_AER_LOGS(pfd_p, bit) && CE_ADVISORY(pfd_p)) 1994 return (PF_ERR_NO_PANIC); 1995 1996 p_pcie_slot_regs = PCIE_SLOT_REG(rppfd); 1997 if (p_pcie_slot_regs->pcie_slot_regs_valid) { 1998 /* 1999 * If the device is reported gone from its parent slot, then it 2000 * is expected that any outstanding commands would time out. In 2001 * this case, do not panic. 2002 */ 2003 if ((p_pcie_slot_regs->pcie_slot_status & 2004 PCIE_SLOTSTS_PRESENCE_DETECTED) == 0x0) { 2005 return (PF_ERR_NO_PANIC); 2006 } 2007 } 2008 2009 return (PF_ERR_PANIC); 2010 } 2011 2012 /* 2013 * PCIe Unexpected Completion. Check to see if this TLP was misrouted by 2014 * matching the device BDF with the TLP Log. If misrouting panic, otherwise 2015 * don't panic. 2016 */ 2017 /* ARGSUSED */ 2018 static int 2019 pf_analyse_uc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 2020 pf_data_t *pfd_p) 2021 { 2022 if (HAS_AER_LOGS(pfd_p, bit) && 2023 (PCIE_PFD2BUS(pfd_p)->bus_bdf == (PCIE_ADV_HDR(pfd_p, 2) >> 16))) 2024 return (PF_ERR_NO_PANIC); 2025 2026 /* 2027 * This is a case of mis-routing. Any of the switches above this 2028 * device could be at fault. 2029 */ 2030 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_ROOT; 2031 2032 return (PF_ERR_PANIC); 2033 } 2034 2035 /* 2036 * PCIe-PCI Bridge Uncorrectable Data error analyser. All Uncorrectable Data 2037 * errors should have resulted in a PCIe Poisoned TLP to the RC, except for 2038 * Posted Writes. Check the logs for Posted Writes and if the RC did not see a 2039 * Poisoned TLP. 2040 * 2041 * Non-Posted Writes will also generate a UR in the completion status, which the 2042 * RC should also see. 2043 */ 2044 /* ARGSUSED */ 2045 static int 2046 pf_analyse_uc_data(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 2047 pf_data_t *pfd_p) 2048 { 2049 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 2050 2051 if (!HAS_SAER_LOGS(pfd_p, bit)) 2052 return (PF_ERR_PANIC); 2053 2054 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR)) 2055 return (PF_ERR_MATCHED_RC); 2056 2057 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND) 2058 return (PF_ERR_MATCHED_DEVICE); 2059 2060 return (PF_ERR_PANIC); 2061 } 2062 2063 /* ARGSUSED */ 2064 static int 2065 pf_no_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 2066 pf_data_t *pfd_p) 2067 { 2068 return (PF_ERR_NO_PANIC); 2069 } 2070 2071 /* ARGSUSED */ 2072 static int 2073 pf_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p, 2074 pf_data_t *pfd_p) 2075 { 2076 return (PF_ERR_PANIC); 2077 } 2078 2079 /* 2080 * If a PCIe device does not support AER, assume all AER statuses have been set, 2081 * unless other registers do not indicate a certain error occuring. 2082 */ 2083 static void 2084 pf_adjust_for_no_aer(pf_data_t *pfd_p) 2085 { 2086 uint32_t aer_ue = 0; 2087 uint16_t status; 2088 2089 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 2090 return; 2091 2092 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED) 2093 aer_ue = PF_AER_FATAL_ERR; 2094 2095 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) { 2096 aer_ue = PF_AER_NON_FATAL_ERR; 2097 status = PCI_ERR_REG(pfd_p)->pci_err_status; 2098 2099 /* Check if the device received a PTLP */ 2100 if (!(status & PCI_STAT_PERROR)) 2101 aer_ue &= ~PCIE_AER_UCE_PTLP; 2102 2103 /* Check if the device signaled a CA */ 2104 if (!(status & PCI_STAT_S_TARG_AB)) 2105 aer_ue &= ~PCIE_AER_UCE_CA; 2106 2107 /* Check if the device sent a UR */ 2108 if (!(PCIE_ERR_REG(pfd_p)->pcie_err_status & 2109 PCIE_DEVSTS_UR_DETECTED)) 2110 aer_ue &= ~PCIE_AER_UCE_UR; 2111 2112 /* 2113 * Ignore ECRCs as it is optional and will manefest itself as 2114 * another error like PTLP and MFP 2115 */ 2116 aer_ue &= ~PCIE_AER_UCE_ECRC; 2117 2118 /* 2119 * Generally if NFE is set, SERR should also be set. Exception: 2120 * When certain non-fatal errors are masked, and some of them 2121 * happened to be the cause of the NFE, SERR will not be set and 2122 * they can not be the source of this interrupt. 2123 * 2124 * On x86, URs are masked (NFE + UR can be set), if any other 2125 * non-fatal errors (i.e, PTLP, CTO, CA, UC, ECRC, ACS) did 2126 * occur, SERR should be set since they are not masked. So if 2127 * SERR is not set, none of them occurred. 2128 */ 2129 if (!(status & PCI_STAT_S_SYSERR)) 2130 aer_ue &= ~PCIE_AER_UCE_TO; 2131 } 2132 2133 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) { 2134 aer_ue &= ~PCIE_AER_UCE_TRAINING; 2135 aer_ue &= ~PCIE_AER_UCE_SD; 2136 } 2137 2138 PCIE_ADV_REG(pfd_p)->pcie_ue_status = aer_ue; 2139 } 2140 2141 static void 2142 pf_adjust_for_no_saer(pf_data_t *pfd_p) 2143 { 2144 uint32_t s_aer_ue = 0; 2145 uint16_t status; 2146 2147 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p))) 2148 return; 2149 2150 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED) 2151 s_aer_ue = PF_SAER_FATAL_ERR; 2152 2153 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) { 2154 s_aer_ue = PF_SAER_NON_FATAL_ERR; 2155 status = PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat; 2156 2157 /* Check if the device received a UC_DATA */ 2158 if (!(status & PCI_STAT_PERROR)) 2159 s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR; 2160 2161 /* Check if the device received a RCVD_MA/MA_ON_SC */ 2162 if (!(status & (PCI_STAT_R_MAST_AB))) { 2163 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA; 2164 s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC; 2165 } 2166 2167 /* Check if the device received a RCVD_TA/TA_ON_SC */ 2168 if (!(status & (PCI_STAT_R_TARG_AB))) { 2169 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA; 2170 s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC; 2171 } 2172 } 2173 2174 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status = s_aer_ue; 2175 } 2176 2177 /* Find the PCIe-PCI bridge based on secondary bus number */ 2178 static pf_data_t * 2179 pf_get_pcie_bridge(pf_data_t *pfd_p, pcie_req_id_t secbus) 2180 { 2181 pf_data_t *bdg_pfd_p; 2182 2183 /* Search down for the PCIe-PCI device. */ 2184 for (bdg_pfd_p = pfd_p->pe_next; bdg_pfd_p; 2185 bdg_pfd_p = bdg_pfd_p->pe_next) { 2186 if (PCIE_IS_PCIE_BDG(PCIE_PFD2BUS(bdg_pfd_p)) && 2187 PCIE_PFD2BUS(bdg_pfd_p)->bus_bdg_secbus == secbus) 2188 return (bdg_pfd_p); 2189 } 2190 2191 return (NULL); 2192 } 2193 2194 /* Find the PCIe-PCI bridge of a PCI device */ 2195 static pf_data_t * 2196 pf_get_parent_pcie_bridge(pf_data_t *pfd_p) 2197 { 2198 dev_info_t *dip, *rp_dip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip; 2199 2200 /* This only makes sense if the device is a PCI device */ 2201 if (!PCIE_IS_PCI(PCIE_PFD2BUS(pfd_p))) 2202 return (NULL); 2203 2204 /* 2205 * Search up for the PCIe-PCI device. Watchout for x86 where pci 2206 * devices hang directly off of NPE. 2207 */ 2208 for (dip = PCIE_PFD2DIP(pfd_p); dip; dip = ddi_get_parent(dip)) { 2209 if (dip == rp_dip) 2210 dip = NULL; 2211 2212 if (PCIE_IS_PCIE_BDG(PCIE_DIP2BUS(dip))) 2213 return (PCIE_DIP2PFD(dip)); 2214 } 2215 2216 return (NULL); 2217 } 2218 2219 /* 2220 * See if a leaf error was bubbled up to the Root Complex (RC) and handled. 2221 * As of right now only RC's have enough information to have errors found in the 2222 * fabric to be matched to the RC. Note that Root Port's (RP) do not carry 2223 * enough information. Currently known RC's are SPARC Fire architecture and 2224 * it's equivalents, and x86's NPE. 2225 * SPARC Fire architectures have a plethora of error registers, while currently 2226 * NPE only have the address of a failed load. 2227 * 2228 * Check if the RC logged an error with the appropriate status type/abort type. 2229 * Ex: Parity Error, Received Master/Target Abort 2230 * Check if either the fault address found in the rc matches the device's 2231 * assigned address range (PIO's only) or the fault BDF in the rc matches the 2232 * device's BDF or Secondary Bus/Bus Range. 2233 */ 2234 static boolean_t 2235 pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p, 2236 uint32_t abort_type) 2237 { 2238 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 2239 pf_data_t *rc_pfd_p; 2240 pcie_req_id_t fault_bdf; 2241 2242 for (rc_pfd_p = dq_head_p; PFD_IS_ROOT(rc_pfd_p); 2243 rc_pfd_p = rc_pfd_p->pe_next) { 2244 /* Only root complex's have enough information to match */ 2245 if (!PCIE_IS_RC(PCIE_PFD2BUS(rc_pfd_p))) 2246 continue; 2247 2248 /* If device and rc abort type does not match continue */ 2249 if (!(PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & abort_type)) 2250 continue; 2251 2252 fault_bdf = PCIE_ROOT_FAULT(rc_pfd_p)->scan_bdf; 2253 2254 /* The Fault BDF = Device's BDF */ 2255 if (fault_bdf == bus_p->bus_bdf) 2256 return (B_TRUE); 2257 2258 /* The Fault Addr is in device's address range */ 2259 if (pf_in_addr_range(bus_p, 2260 PCIE_ROOT_FAULT(rc_pfd_p)->scan_addr)) 2261 return (B_TRUE); 2262 2263 /* The Fault BDF is from PCIe-PCI Bridge's secondary bus */ 2264 if (PCIE_IS_PCIE_BDG(bus_p) && 2265 pf_in_bus_range(bus_p, fault_bdf)) 2266 return (B_TRUE); 2267 } 2268 2269 return (B_FALSE); 2270 } 2271 2272 /* 2273 * Check the RP and see if the error is PIO/DMA. If the RP also has a PERR then 2274 * it is a DMA, otherwise it's a PIO 2275 */ 2276 static void 2277 pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type, 2278 pcie_req_id_t *bdf) 2279 { 2280 pf_data_t *rc_pfd_p; 2281 2282 /* Could be DMA or PIO. Find out by look at error type. */ 2283 switch (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status) { 2284 case PCIE_AER_SUCE_TA_ON_SC: 2285 case PCIE_AER_SUCE_MA_ON_SC: 2286 *trans_type = PF_ADDR_DMA; 2287 return; 2288 case PCIE_AER_SUCE_RCVD_TA: 2289 case PCIE_AER_SUCE_RCVD_MA: 2290 *bdf = PCIE_INVALID_BDF; 2291 *trans_type = PF_ADDR_PIO; 2292 return; 2293 case PCIE_AER_SUCE_USC_ERR: 2294 case PCIE_AER_SUCE_UC_DATA_ERR: 2295 case PCIE_AER_SUCE_PERR_ASSERT: 2296 break; 2297 default: 2298 *addr = 0; 2299 *bdf = PCIE_INVALID_BDF; 2300 *trans_type = 0; 2301 return; 2302 } 2303 2304 *bdf = PCIE_INVALID_BDF; 2305 *trans_type = PF_ADDR_PIO; 2306 for (rc_pfd_p = pfd_p->pe_prev; rc_pfd_p; 2307 rc_pfd_p = rc_pfd_p->pe_prev) { 2308 if (PFD_IS_ROOT(rc_pfd_p) && 2309 (PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & 2310 PCI_STAT_PERROR)) { 2311 *trans_type = PF_ADDR_DMA; 2312 return; 2313 } 2314 } 2315 } 2316 2317 /* 2318 * pf_pci_decode function decodes the secondary aer transaction logs in 2319 * PCIe-PCI bridges. 2320 * 2321 * The log is 128 bits long and arranged in this manner. 2322 * [0:35] Transaction Attribute (s_aer_h0-saer_h1) 2323 * [36:39] Transaction lower command (saer_h1) 2324 * [40:43] Transaction upper command (saer_h1) 2325 * [44:63] Reserved 2326 * [64:127] Address (saer_h2-saer_h3) 2327 */ 2328 /* ARGSUSED */ 2329 int 2330 pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd) 2331 { 2332 pcix_attr_t *attr; 2333 uint64_t addr; 2334 uint32_t trans_type; 2335 pcie_req_id_t bdf = PCIE_INVALID_BDF; 2336 2337 attr = (pcix_attr_t *)&PCIE_ADV_BDG_HDR(pfd_p, 0); 2338 *cmd = GET_SAER_CMD(pfd_p); 2339 2340 cmd_switch: 2341 switch (*cmd) { 2342 case PCI_PCIX_CMD_IORD: 2343 case PCI_PCIX_CMD_IOWR: 2344 /* IO Access should always be down stream */ 2345 addr = PCIE_ADV_BDG_HDR(pfd_p, 2); 2346 bdf = attr->rid; 2347 trans_type = PF_ADDR_PIO; 2348 break; 2349 case PCI_PCIX_CMD_MEMRD_DW: 2350 case PCI_PCIX_CMD_MEMRD_BL: 2351 case PCI_PCIX_CMD_MEMRDBL: 2352 case PCI_PCIX_CMD_MEMWR: 2353 case PCI_PCIX_CMD_MEMWR_BL: 2354 case PCI_PCIX_CMD_MEMWRBL: 2355 addr = ((uint64_t)PCIE_ADV_BDG_HDR(pfd_p, 3) << 2356 PCIE_AER_SUCE_HDR_ADDR_SHIFT) | PCIE_ADV_BDG_HDR(pfd_p, 2); 2357 bdf = attr->rid; 2358 2359 pf_pci_find_trans_type(pfd_p, &addr, &trans_type, &bdf); 2360 break; 2361 case PCI_PCIX_CMD_CFRD: 2362 case PCI_PCIX_CMD_CFWR: 2363 /* 2364 * CFG Access should always be down stream. Match the BDF in 2365 * the address phase. 2366 */ 2367 addr = 0; 2368 bdf = attr->rid; 2369 trans_type = PF_ADDR_CFG; 2370 break; 2371 case PCI_PCIX_CMD_SPL: 2372 /* 2373 * Check for DMA read completions. The requesting BDF is in the 2374 * Address phase. 2375 */ 2376 addr = 0; 2377 bdf = attr->rid; 2378 trans_type = PF_ADDR_DMA; 2379 break; 2380 case PCI_PCIX_CMD_DADR: 2381 /* 2382 * For Dual Address Cycles the transaction command is in the 2nd 2383 * address phase. 2384 */ 2385 *cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >> 2386 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) & 2387 PCIE_AER_SUCE_HDR_CMD_UP_MASK; 2388 if (*cmd != PCI_PCIX_CMD_DADR) 2389 goto cmd_switch; 2390 /* FALLTHROUGH */ 2391 default: 2392 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0; 2393 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = PCIE_INVALID_BDF; 2394 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0; 2395 return (DDI_FAILURE); 2396 } 2397 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = trans_type; 2398 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = bdf; 2399 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = addr; 2400 return (DDI_SUCCESS); 2401 } 2402 2403 /* 2404 * Based on either the BDF/ADDR find and mark the faulting DMA/ACC handler. 2405 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND. 2406 */ 2407 int 2408 pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint64_t addr, 2409 pcie_req_id_t bdf) 2410 { 2411 ddi_fm_error_t derr; 2412 2413 /* If we don't know the addr or rid just return with NOTFOUND */ 2414 if ((addr == 0) && !PCIE_CHECK_VALID_BDF(bdf)) 2415 return (PF_HDL_NOTFOUND); 2416 2417 /* 2418 * Disable DMA handle lookup until DMA errors can be handled and 2419 * reported synchronously. When enabled again, check for the 2420 * PF_ADDR_DMA flag 2421 */ 2422 if (!(flag & (PF_ADDR_PIO | PF_ADDR_CFG))) { 2423 return (PF_HDL_NOTFOUND); 2424 } 2425 2426 bzero(&derr, sizeof (ddi_fm_error_t)); 2427 derr.fme_version = DDI_FME_VERSION; 2428 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 2429 derr.fme_ena = ena; 2430 2431 return (pf_hdl_child_lookup(dip, &derr, flag, addr, bdf)); 2432 } 2433 2434 static int 2435 pf_hdl_child_lookup(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, 2436 uint64_t addr, pcie_req_id_t bdf) 2437 { 2438 int status = PF_HDL_NOTFOUND; 2439 ndi_fmc_t *fcp = NULL; 2440 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2441 pcie_req_id_t dip_bdf; 2442 boolean_t have_lock = B_FALSE; 2443 pcie_bus_t *bus_p; 2444 dev_info_t *cdip; 2445 2446 if (!(bus_p = pf_is_ready(dip))) { 2447 return (status); 2448 } 2449 2450 ASSERT(fmhdl); 2451 if (!i_ddi_fm_handler_owned(dip)) { 2452 /* 2453 * pf_handler_enter always returns SUCCESS if the 'impl' arg is 2454 * NULL. 2455 */ 2456 (void) pf_handler_enter(dip, NULL); 2457 have_lock = B_TRUE; 2458 } 2459 2460 dip_bdf = PCI_GET_BDF(dip); 2461 2462 /* Check if dip and BDF match, if not recurse to it's children. */ 2463 if (!PCIE_IS_RC(bus_p) && (!PCIE_CHECK_VALID_BDF(bdf) || 2464 dip_bdf == bdf)) { 2465 if ((flag & PF_ADDR_DMA) && DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap)) 2466 fcp = fmhdl->fh_dma_cache; 2467 else 2468 fcp = NULL; 2469 2470 if (fcp) 2471 status = pf_hdl_compare(dip, derr, DMA_HANDLE, addr, 2472 bdf, fcp); 2473 2474 2475 if (((flag & PF_ADDR_PIO) || (flag & PF_ADDR_CFG)) && 2476 DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap)) 2477 fcp = fmhdl->fh_acc_cache; 2478 else 2479 fcp = NULL; 2480 2481 if (fcp) 2482 status = pf_hdl_compare(dip, derr, ACC_HANDLE, addr, 2483 bdf, fcp); 2484 } 2485 2486 /* If we found the handler or know it's this device, we're done */ 2487 if (!PCIE_IS_RC(bus_p) && ((dip_bdf == bdf) || 2488 (status == PF_HDL_FOUND))) 2489 goto done; 2490 2491 /* 2492 * If the current devuce us a PCIe-PCI bridge need to check for special 2493 * cases: 2494 * 2495 * If it is a PIO and we don't have an address or this is a DMA, check 2496 * to see if the BDF = secondary bus. If so stop. The BDF isn't a real 2497 * BDF and the fault device could have come from any device in the PCI 2498 * bus. 2499 */ 2500 if (PCIE_IS_PCIE_BDG(bus_p) && 2501 ((flag & PF_ADDR_DMA || flag & PF_ADDR_PIO)) && 2502 ((bus_p->bus_bdg_secbus << PCIE_REQ_ID_BUS_SHIFT) == bdf)) 2503 goto done; 2504 2505 2506 /* If we can't find the handler check it's children */ 2507 for (cdip = ddi_get_child(dip); cdip; 2508 cdip = ddi_get_next_sibling(cdip)) { 2509 if ((bus_p = PCIE_DIP2BUS(cdip)) == NULL) 2510 continue; 2511 2512 if (pf_in_bus_range(bus_p, bdf) || 2513 pf_in_addr_range(bus_p, addr)) 2514 status = pf_hdl_child_lookup(cdip, derr, flag, addr, 2515 bdf); 2516 2517 if (status == PF_HDL_FOUND) 2518 goto done; 2519 } 2520 2521 done: 2522 if (have_lock == B_TRUE) 2523 pf_handler_exit(dip); 2524 2525 return (status); 2526 } 2527 2528 static int 2529 pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, 2530 uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp) 2531 { 2532 ndi_fmcentry_t *fep; 2533 int found = 0; 2534 int status; 2535 2536 mutex_enter(&fcp->fc_lock); 2537 for (fep = fcp->fc_head; fep != NULL; fep = fep->fce_next) { 2538 ddi_fmcompare_t compare_func; 2539 2540 /* 2541 * Compare captured error state with handle 2542 * resources. During the comparison and 2543 * subsequent error handling, we block 2544 * attempts to free the cache entry. 2545 */ 2546 if (flag == ACC_HANDLE) { 2547 compare_func = 2548 i_ddi_fm_acc_err_cf_get((ddi_acc_handle_t) 2549 fep->fce_resource); 2550 } else { 2551 compare_func = 2552 i_ddi_fm_dma_err_cf_get((ddi_dma_handle_t) 2553 fep->fce_resource); 2554 } 2555 2556 if (compare_func == NULL) /* unbound or not FLAGERR */ 2557 continue; 2558 2559 status = compare_func(dip, fep->fce_resource, 2560 (void *)&addr, (void *)&bdf); 2561 2562 if (status == DDI_FM_NONFATAL) { 2563 found++; 2564 2565 /* Set the error for this resource handle */ 2566 if (flag == ACC_HANDLE) { 2567 ddi_acc_handle_t ap = fep->fce_resource; 2568 2569 i_ddi_fm_acc_err_set(ap, derr->fme_ena, status, 2570 DDI_FM_ERR_UNEXPECTED); 2571 ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION); 2572 derr->fme_acc_handle = ap; 2573 } else { 2574 ddi_dma_handle_t dp = fep->fce_resource; 2575 2576 i_ddi_fm_dma_err_set(dp, derr->fme_ena, status, 2577 DDI_FM_ERR_UNEXPECTED); 2578 ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION); 2579 derr->fme_dma_handle = dp; 2580 } 2581 } 2582 } 2583 mutex_exit(&fcp->fc_lock); 2584 2585 /* 2586 * If a handler isn't found and we know this is the right device mark 2587 * them all failed. 2588 */ 2589 if ((addr != 0) && PCIE_CHECK_VALID_BDF(bdf) && (found == 0)) { 2590 status = pf_hdl_compare(dip, derr, flag, addr, bdf, fcp); 2591 if (status == PF_HDL_FOUND) 2592 found++; 2593 } 2594 2595 return ((found) ? PF_HDL_FOUND : PF_HDL_NOTFOUND); 2596 } 2597 2598 /* 2599 * Automatically decode AER header logs and does a handling look up based on the 2600 * AER header decoding. 2601 * 2602 * For this function only the Primary/Secondary AER Header Logs need to be valid 2603 * in the pfd (PCIe Fault Data) arg. 2604 * 2605 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND. 2606 */ 2607 /* ARGSUSED */ 2608 static int 2609 pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p, 2610 boolean_t is_primary) 2611 { 2612 /* 2613 * Disabling this function temporarily until errors can be handled 2614 * synchronously. 2615 * 2616 * This function is currently only called during the middle of a fabric 2617 * scan. If the fabric scan is called synchronously with an error seen 2618 * in the RP/RC, then the related errors in the fabric will have a 2619 * PF_ERR_MATCHED_RC error severity. pf_log_hdl_lookup code will be by 2620 * passed when the severity is PF_ERR_MATCHED_RC. Handle lookup would 2621 * have already happened in RP/RC error handling in a synchronous 2622 * manner. Errors unrelated should panic, because they are being 2623 * handled asynchronously. 2624 * 2625 * If fabric scan is called asynchronously from any RP/RC error, then 2626 * DMA/PIO UE errors seen in the fabric should panic. pf_lop_hdl_lookup 2627 * will return PF_HDL_NOTFOUND to ensure that the system panics. 2628 */ 2629 return (PF_HDL_NOTFOUND); 2630 } 2631 2632 /* 2633 * Decodes the TLP and returns the BDF of the handler, address and transaction 2634 * type if known. 2635 * 2636 * Types of TLP logs seen in RC, and what to extract: 2637 * 2638 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR 2639 * Memory(PIO) - address, PF_PIO_ADDR 2640 * CFG - Should not occur and result in UR 2641 * Completion(DMA) - Requester BDF, PF_DMA_ADDR 2642 * Completion(PIO) - Requester BDF, PF_PIO_ADDR 2643 * 2644 * Types of TLP logs seen in SW/Leaf, and what to extract: 2645 * 2646 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR 2647 * Memory(PIO) - address, PF_PIO_ADDR 2648 * CFG - Destined BDF, address, PF_CFG_ADDR 2649 * Completion(DMA) - Requester BDF, PF_DMA_ADDR 2650 * Completion(PIO) - Requester BDF, PF_PIO_ADDR 2651 * 2652 * The adv_reg_p must be passed in separately for use with SPARC RPs. A 2653 * SPARC RP could have multiple AER header logs which cannot be directly 2654 * accessed via the bus_p. 2655 */ 2656 int 2657 pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p) 2658 { 2659 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr; 2660 pcie_req_id_t my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF; 2661 uint64_t flt_addr = 0; 2662 uint32_t flt_trans_type = 0; 2663 2664 adv_reg_p->pcie_ue_tgt_addr = 0; 2665 adv_reg_p->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 2666 adv_reg_p->pcie_ue_tgt_trans = 0; 2667 2668 my_bdf = bus_p->bus_bdf; 2669 switch (tlp_hdr->type) { 2670 case PCIE_TLP_TYPE_IO: 2671 case PCIE_TLP_TYPE_MEM: 2672 case PCIE_TLP_TYPE_MEMLK: 2673 /* Grab the 32/64bit fault address */ 2674 if (tlp_hdr->fmt & 0x1) { 2675 flt_addr = ((uint64_t)adv_reg_p->pcie_ue_hdr[2] << 32); 2676 flt_addr |= adv_reg_p->pcie_ue_hdr[3]; 2677 } else { 2678 flt_addr = adv_reg_p->pcie_ue_hdr[2]; 2679 } 2680 2681 tlp_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[1] >> 16); 2682 2683 /* 2684 * If the req bdf >= this.bdf, then it means the request is this 2685 * device or came from a device below it. Unless this device is 2686 * a PCIe root port then it means is a DMA, otherwise PIO. 2687 */ 2688 if ((tlp_bdf >= my_bdf) && !PCIE_IS_ROOT(bus_p)) { 2689 flt_trans_type = PF_ADDR_DMA; 2690 flt_bdf = tlp_bdf; 2691 } else if (PCIE_IS_ROOT(bus_p) && 2692 (PF_FIRST_AER_ERR(PCIE_AER_UCE_PTLP, adv_reg_p) || 2693 (PF_FIRST_AER_ERR(PCIE_AER_UCE_CA, adv_reg_p)))) { 2694 flt_trans_type = PF_ADDR_DMA; 2695 flt_bdf = tlp_bdf; 2696 } else { 2697 flt_trans_type = PF_ADDR_PIO; 2698 flt_bdf = PCIE_INVALID_BDF; 2699 } 2700 break; 2701 case PCIE_TLP_TYPE_CFG0: 2702 case PCIE_TLP_TYPE_CFG1: 2703 flt_addr = 0; 2704 flt_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[2] >> 16); 2705 flt_trans_type = PF_ADDR_CFG; 2706 break; 2707 case PCIE_TLP_TYPE_CPL: 2708 case PCIE_TLP_TYPE_CPLLK: 2709 { 2710 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)&adv_reg_p->pcie_ue_hdr[1]; 2711 2712 flt_addr = 0; 2713 flt_bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid : 2714 cpl_tlp->cid; 2715 2716 /* 2717 * If the cpl bdf < this.bdf, then it means the request is this 2718 * device or came from a device below it. Unless this device is 2719 * a PCIe root port then it means is a DMA, otherwise PIO. 2720 */ 2721 if (cpl_tlp->rid > cpl_tlp->cid) { 2722 flt_trans_type = PF_ADDR_DMA; 2723 } else { 2724 flt_trans_type = PF_ADDR_PIO | PF_ADDR_CFG; 2725 } 2726 break; 2727 } 2728 default: 2729 return (DDI_FAILURE); 2730 } 2731 2732 adv_reg_p->pcie_ue_tgt_addr = flt_addr; 2733 adv_reg_p->pcie_ue_tgt_bdf = flt_bdf; 2734 adv_reg_p->pcie_ue_tgt_trans = flt_trans_type; 2735 2736 return (DDI_SUCCESS); 2737 } 2738 2739 #define PCIE_EREPORT DDI_IO_CLASS "." PCI_ERROR_SUBCLASS "." PCIEX_FABRIC 2740 static int 2741 pf_ereport_setup(dev_info_t *dip, uint64_t ena, nvlist_t **ereport, 2742 nvlist_t **detector, errorq_elem_t **eqep) 2743 { 2744 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2745 char device_path[MAXPATHLEN]; 2746 nv_alloc_t *nva; 2747 2748 *eqep = errorq_reserve(fmhdl->fh_errorq); 2749 if (*eqep == NULL) { 2750 atomic_inc_64(&fmhdl->fh_kstat.fek_erpt_dropped.value.ui64); 2751 return (DDI_FAILURE); 2752 } 2753 2754 *ereport = errorq_elem_nvl(fmhdl->fh_errorq, *eqep); 2755 nva = errorq_elem_nva(fmhdl->fh_errorq, *eqep); 2756 2757 ASSERT(*ereport); 2758 ASSERT(nva); 2759 2760 /* 2761 * Use the dev_path/devid for this device instance. 2762 */ 2763 *detector = fm_nvlist_create(nva); 2764 if (dip == ddi_root_node()) { 2765 device_path[0] = '/'; 2766 device_path[1] = '\0'; 2767 } else { 2768 (void) ddi_pathname(dip, device_path); 2769 } 2770 2771 fm_fmri_dev_set(*detector, FM_DEV_SCHEME_VERSION, NULL, 2772 device_path, NULL, NULL); 2773 2774 if (ena == 0) 2775 ena = fm_ena_generate(0, FM_ENA_FMT1); 2776 2777 fm_ereport_set(*ereport, 0, PCIE_EREPORT, ena, *detector, NULL); 2778 2779 return (DDI_SUCCESS); 2780 } 2781 2782 /* ARGSUSED */ 2783 static void 2784 pf_ereport_post(dev_info_t *dip, nvlist_t **ereport, nvlist_t **detector, 2785 errorq_elem_t **eqep) 2786 { 2787 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; 2788 2789 errorq_commit(fmhdl->fh_errorq, *eqep, ERRORQ_ASYNC); 2790 } 2791 2792 static void 2793 pf_send_ereport(ddi_fm_error_t *derr, pf_impl_t *impl) 2794 { 2795 nvlist_t *ereport; 2796 nvlist_t *detector; 2797 errorq_elem_t *eqep; 2798 pcie_bus_t *bus_p; 2799 pf_data_t *pfd_p; 2800 uint32_t total = impl->pf_total; 2801 2802 /* 2803 * Ereports need to be sent in a top down fashion. The fabric translator 2804 * expects the ereports from the Root first. This is needed to tell if 2805 * the system contains a PCIe complaint RC/RP. 2806 */ 2807 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 2808 bus_p = PCIE_PFD2BUS(pfd_p); 2809 pfd_p->pe_valid = B_FALSE; 2810 2811 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED || 2812 !DDI_FM_EREPORT_CAP(ddi_fm_capable(PCIE_PFD2DIP(pfd_p)))) 2813 continue; 2814 2815 if (pf_ereport_setup(PCIE_BUS2DIP(bus_p), derr->fme_ena, 2816 &ereport, &detector, &eqep) != DDI_SUCCESS) 2817 continue; 2818 2819 if (PFD_IS_RC(pfd_p)) { 2820 fm_payload_set(ereport, 2821 "scan_bdf", DATA_TYPE_UINT16, 2822 PCIE_ROOT_FAULT(pfd_p)->scan_bdf, 2823 "scan_addr", DATA_TYPE_UINT64, 2824 PCIE_ROOT_FAULT(pfd_p)->scan_addr, 2825 "intr_src", DATA_TYPE_UINT16, 2826 PCIE_ROOT_EH_SRC(pfd_p)->intr_type, 2827 NULL); 2828 goto generic; 2829 } 2830 2831 /* Generic PCI device information */ 2832 fm_payload_set(ereport, 2833 "bdf", DATA_TYPE_UINT16, bus_p->bus_bdf, 2834 "device_id", DATA_TYPE_UINT16, 2835 (bus_p->bus_dev_ven_id >> 16), 2836 "vendor_id", DATA_TYPE_UINT16, 2837 (bus_p->bus_dev_ven_id & 0xFFFF), 2838 "rev_id", DATA_TYPE_UINT8, bus_p->bus_rev_id, 2839 "dev_type", DATA_TYPE_UINT16, bus_p->bus_dev_type, 2840 "pcie_off", DATA_TYPE_UINT16, bus_p->bus_pcie_off, 2841 "pcix_off", DATA_TYPE_UINT16, bus_p->bus_pcix_off, 2842 "aer_off", DATA_TYPE_UINT16, bus_p->bus_aer_off, 2843 "ecc_ver", DATA_TYPE_UINT16, bus_p->bus_ecc_ver, 2844 NULL); 2845 2846 /* PCI registers */ 2847 fm_payload_set(ereport, 2848 "pci_status", DATA_TYPE_UINT16, 2849 PCI_ERR_REG(pfd_p)->pci_err_status, 2850 "pci_command", DATA_TYPE_UINT16, 2851 PCI_ERR_REG(pfd_p)->pci_cfg_comm, 2852 NULL); 2853 2854 /* PCI bridge registers */ 2855 if (PCIE_IS_BDG(bus_p)) { 2856 fm_payload_set(ereport, 2857 "pci_bdg_sec_status", DATA_TYPE_UINT16, 2858 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat, 2859 "pci_bdg_ctrl", DATA_TYPE_UINT16, 2860 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_ctrl, 2861 NULL); 2862 } 2863 2864 /* PCIx registers */ 2865 if (PCIE_IS_PCIX(bus_p) && !PCIE_IS_BDG(bus_p)) { 2866 fm_payload_set(ereport, 2867 "pcix_status", DATA_TYPE_UINT32, 2868 PCIX_ERR_REG(pfd_p)->pcix_status, 2869 "pcix_command", DATA_TYPE_UINT16, 2870 PCIX_ERR_REG(pfd_p)->pcix_command, 2871 NULL); 2872 } 2873 2874 /* PCIx ECC Registers */ 2875 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 2876 pf_pcix_ecc_regs_t *ecc_bdg_reg; 2877 pf_pcix_ecc_regs_t *ecc_reg; 2878 2879 if (PCIE_IS_BDG(bus_p)) 2880 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 0); 2881 ecc_reg = PCIX_ECC_REG(pfd_p); 2882 fm_payload_set(ereport, 2883 "pcix_ecc_control_0", DATA_TYPE_UINT16, 2884 PCIE_IS_BDG(bus_p) ? 2885 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16) : 2886 (ecc_reg->pcix_ecc_ctlstat >> 16), 2887 "pcix_ecc_status_0", DATA_TYPE_UINT16, 2888 PCIE_IS_BDG(bus_p) ? 2889 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF) : 2890 (ecc_reg->pcix_ecc_ctlstat & 0xFFFF), 2891 "pcix_ecc_fst_addr_0", DATA_TYPE_UINT32, 2892 PCIE_IS_BDG(bus_p) ? 2893 ecc_bdg_reg->pcix_ecc_fstaddr : 2894 ecc_reg->pcix_ecc_fstaddr, 2895 "pcix_ecc_sec_addr_0", DATA_TYPE_UINT32, 2896 PCIE_IS_BDG(bus_p) ? 2897 ecc_bdg_reg->pcix_ecc_secaddr : 2898 ecc_reg->pcix_ecc_secaddr, 2899 "pcix_ecc_attr_0", DATA_TYPE_UINT32, 2900 PCIE_IS_BDG(bus_p) ? 2901 ecc_bdg_reg->pcix_ecc_attr : 2902 ecc_reg->pcix_ecc_attr, 2903 NULL); 2904 } 2905 2906 /* PCIx ECC Bridge Registers */ 2907 if (PCIX_ECC_VERSION_CHECK(bus_p) && PCIE_IS_BDG(bus_p)) { 2908 pf_pcix_ecc_regs_t *ecc_bdg_reg; 2909 2910 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 1); 2911 fm_payload_set(ereport, 2912 "pcix_ecc_control_1", DATA_TYPE_UINT16, 2913 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16), 2914 "pcix_ecc_status_1", DATA_TYPE_UINT16, 2915 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF), 2916 "pcix_ecc_fst_addr_1", DATA_TYPE_UINT32, 2917 ecc_bdg_reg->pcix_ecc_fstaddr, 2918 "pcix_ecc_sec_addr_1", DATA_TYPE_UINT32, 2919 ecc_bdg_reg->pcix_ecc_secaddr, 2920 "pcix_ecc_attr_1", DATA_TYPE_UINT32, 2921 ecc_bdg_reg->pcix_ecc_attr, 2922 NULL); 2923 } 2924 2925 /* PCIx Bridge */ 2926 if (PCIE_IS_PCIX(bus_p) && PCIE_IS_BDG(bus_p)) { 2927 fm_payload_set(ereport, 2928 "pcix_bdg_status", DATA_TYPE_UINT32, 2929 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat, 2930 "pcix_bdg_sec_status", DATA_TYPE_UINT16, 2931 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat, 2932 NULL); 2933 } 2934 2935 /* PCIe registers */ 2936 if (PCIE_IS_PCIE(bus_p)) { 2937 fm_payload_set(ereport, 2938 "pcie_status", DATA_TYPE_UINT16, 2939 PCIE_ERR_REG(pfd_p)->pcie_err_status, 2940 "pcie_command", DATA_TYPE_UINT16, 2941 PCIE_ERR_REG(pfd_p)->pcie_err_ctl, 2942 "pcie_dev_cap", DATA_TYPE_UINT32, 2943 PCIE_ERR_REG(pfd_p)->pcie_dev_cap, 2944 NULL); 2945 } 2946 2947 /* PCIe AER registers */ 2948 if (PCIE_HAS_AER(bus_p)) { 2949 fm_payload_set(ereport, 2950 "pcie_adv_ctl", DATA_TYPE_UINT32, 2951 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl, 2952 "pcie_ue_status", DATA_TYPE_UINT32, 2953 PCIE_ADV_REG(pfd_p)->pcie_ue_status, 2954 "pcie_ue_mask", DATA_TYPE_UINT32, 2955 PCIE_ADV_REG(pfd_p)->pcie_ue_mask, 2956 "pcie_ue_sev", DATA_TYPE_UINT32, 2957 PCIE_ADV_REG(pfd_p)->pcie_ue_sev, 2958 "pcie_ue_hdr0", DATA_TYPE_UINT32, 2959 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[0], 2960 "pcie_ue_hdr1", DATA_TYPE_UINT32, 2961 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[1], 2962 "pcie_ue_hdr2", DATA_TYPE_UINT32, 2963 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[2], 2964 "pcie_ue_hdr3", DATA_TYPE_UINT32, 2965 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[3], 2966 "pcie_ce_status", DATA_TYPE_UINT32, 2967 PCIE_ADV_REG(pfd_p)->pcie_ce_status, 2968 "pcie_ce_mask", DATA_TYPE_UINT32, 2969 PCIE_ADV_REG(pfd_p)->pcie_ce_mask, 2970 NULL); 2971 } 2972 2973 /* PCIe AER decoded header */ 2974 if (HAS_AER_LOGS(pfd_p, PCIE_ADV_REG(pfd_p)->pcie_ue_status)) { 2975 fm_payload_set(ereport, 2976 "pcie_ue_tgt_trans", DATA_TYPE_UINT32, 2977 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans, 2978 "pcie_ue_tgt_addr", DATA_TYPE_UINT64, 2979 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr, 2980 "pcie_ue_tgt_bdf", DATA_TYPE_UINT16, 2981 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf, 2982 NULL); 2983 /* Clear these values as they no longer valid */ 2984 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0; 2985 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0; 2986 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 2987 } 2988 2989 /* PCIe BDG AER registers */ 2990 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_HAS_AER(bus_p)) { 2991 fm_payload_set(ereport, 2992 "pcie_sue_adv_ctl", DATA_TYPE_UINT32, 2993 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_ctl, 2994 "pcie_sue_status", DATA_TYPE_UINT32, 2995 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status, 2996 "pcie_sue_mask", DATA_TYPE_UINT32, 2997 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask, 2998 "pcie_sue_sev", DATA_TYPE_UINT32, 2999 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_sev, 3000 "pcie_sue_hdr0", DATA_TYPE_UINT32, 3001 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[0], 3002 "pcie_sue_hdr1", DATA_TYPE_UINT32, 3003 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[1], 3004 "pcie_sue_hdr2", DATA_TYPE_UINT32, 3005 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[2], 3006 "pcie_sue_hdr3", DATA_TYPE_UINT32, 3007 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[3], 3008 NULL); 3009 } 3010 3011 /* PCIe BDG AER decoded header */ 3012 if (PCIE_IS_PCIE_BDG(bus_p) && HAS_SAER_LOGS(pfd_p, 3013 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status)) { 3014 fm_payload_set(ereport, 3015 "pcie_sue_tgt_trans", DATA_TYPE_UINT32, 3016 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans, 3017 "pcie_sue_tgt_addr", DATA_TYPE_UINT64, 3018 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr, 3019 "pcie_sue_tgt_bdf", DATA_TYPE_UINT16, 3020 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf, 3021 NULL); 3022 /* Clear these values as they no longer valid */ 3023 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0; 3024 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0; 3025 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = 3026 PCIE_INVALID_BDF; 3027 } 3028 3029 /* PCIe RP registers */ 3030 if (PCIE_IS_RP(bus_p)) { 3031 fm_payload_set(ereport, 3032 "pcie_rp_status", DATA_TYPE_UINT32, 3033 PCIE_RP_REG(pfd_p)->pcie_rp_status, 3034 "pcie_rp_control", DATA_TYPE_UINT16, 3035 PCIE_RP_REG(pfd_p)->pcie_rp_ctl, 3036 NULL); 3037 } 3038 3039 /* PCIe RP AER registers */ 3040 if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) { 3041 fm_payload_set(ereport, 3042 "pcie_adv_rp_status", DATA_TYPE_UINT32, 3043 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_status, 3044 "pcie_adv_rp_command", DATA_TYPE_UINT32, 3045 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_cmd, 3046 "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16, 3047 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id, 3048 "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16, 3049 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id, 3050 NULL); 3051 } 3052 3053 /* 3054 * Slot Status registers 3055 * 3056 * Since we only gather these for certain types of components, 3057 * only put these registers into the ereport if we have valid 3058 * data. 3059 */ 3060 if (PCIE_SLOT_REG(pfd_p)->pcie_slot_regs_valid) { 3061 fm_payload_set(ereport, 3062 "pcie_slot_cap", DATA_TYPE_UINT32, 3063 PCIE_SLOT_REG(pfd_p)->pcie_slot_cap, 3064 "pcie_slot_control", DATA_TYPE_UINT16, 3065 PCIE_SLOT_REG(pfd_p)->pcie_slot_control, 3066 "pcie_slot_status", DATA_TYPE_UINT16, 3067 PCIE_SLOT_REG(pfd_p)->pcie_slot_status, 3068 NULL); 3069 } 3070 3071 generic: 3072 /* IOV related information */ 3073 if (!PCIE_BDG_IS_UNASSIGNED(PCIE_PFD2BUS(impl->pf_dq_head_p))) { 3074 fm_payload_set(ereport, 3075 "pcie_aff_flags", DATA_TYPE_UINT16, 3076 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags, 3077 "pcie_aff_bdf", DATA_TYPE_UINT16, 3078 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf, 3079 "orig_sev", DATA_TYPE_UINT32, 3080 pfd_p->pe_orig_severity_flags, 3081 NULL); 3082 } 3083 3084 /* Misc ereport information */ 3085 fm_payload_set(ereport, 3086 "remainder", DATA_TYPE_UINT32, --total, 3087 "severity", DATA_TYPE_UINT32, pfd_p->pe_severity_flags, 3088 NULL); 3089 3090 pf_ereport_post(PCIE_BUS2DIP(bus_p), &ereport, &detector, 3091 &eqep); 3092 } 3093 3094 pf_dq_unlock_chain(impl); 3095 } 3096 3097 /* 3098 * pf_handler_enter must be called to serial access to each device's pf_data_t. 3099 * Once error handling is finished with the device call pf_handler_exit to allow 3100 * other threads to access it. The same thread may call pf_handler_enter 3101 * several times without any consequences. 3102 * 3103 * The "impl" variable is passed in during scan fabric to double check that 3104 * there is not a recursive algorithm and to ensure only one thread is doing a 3105 * fabric scan at all times. 3106 * 3107 * In some cases "impl" is not available, such as "child lookup" being called 3108 * from outside of scan fabric, just pass in NULL for this variable and this 3109 * extra check will be skipped. 3110 */ 3111 static int 3112 pf_handler_enter(dev_info_t *dip, pf_impl_t *impl) 3113 { 3114 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 3115 3116 ASSERT(pfd_p); 3117 3118 /* 3119 * Check to see if the lock has already been taken by this 3120 * thread. If so just return and don't take lock again. 3121 */ 3122 if (!pfd_p->pe_lock || !impl) { 3123 i_ddi_fm_handler_enter(dip); 3124 pfd_p->pe_lock = B_TRUE; 3125 return (PF_SCAN_SUCCESS); 3126 } 3127 3128 /* Check to see that this dip is already in the "impl" error queue */ 3129 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) { 3130 if (PCIE_PFD2DIP(pfd_p) == dip) { 3131 return (PF_SCAN_SUCCESS); 3132 } 3133 } 3134 3135 return (PF_SCAN_DEADLOCK); 3136 } 3137 3138 static void 3139 pf_handler_exit(dev_info_t *dip) 3140 { 3141 pf_data_t *pfd_p = PCIE_DIP2PFD(dip); 3142 3143 ASSERT(pfd_p); 3144 3145 ASSERT(pfd_p->pe_lock == B_TRUE); 3146 i_ddi_fm_handler_exit(dip); 3147 pfd_p->pe_lock = B_FALSE; 3148 } 3149 3150 /* 3151 * This function calls the driver's callback function (if it's FMA hardened 3152 * and callback capable). This function relies on the current thread already 3153 * owning the driver's fmhdl lock. 3154 */ 3155 static int 3156 pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr) 3157 { 3158 int cb_sts = DDI_FM_OK; 3159 3160 if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) { 3161 dev_info_t *pdip = ddi_get_parent(dip); 3162 struct i_ddi_fmhdl *hdl = DEVI(pdip)->devi_fmhdl; 3163 struct i_ddi_fmtgt *tgt = hdl->fh_tgts; 3164 struct i_ddi_errhdl *errhdl; 3165 while (tgt != NULL) { 3166 if (dip == tgt->ft_dip) { 3167 errhdl = tgt->ft_errhdl; 3168 cb_sts = errhdl->eh_func(dip, derr, 3169 errhdl->eh_impl); 3170 break; 3171 } 3172 tgt = tgt->ft_next; 3173 } 3174 } 3175 return (cb_sts); 3176 } 3177 3178 static void 3179 pf_reset_pfd(pf_data_t *pfd_p) 3180 { 3181 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p); 3182 3183 pfd_p->pe_severity_flags = 0; 3184 pfd_p->pe_severity_mask = 0; 3185 pfd_p->pe_orig_severity_flags = 0; 3186 /* pe_lock and pe_valid were reset in pf_send_ereport */ 3187 3188 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0; 3189 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF; 3190 3191 if (PCIE_IS_ROOT(bus_p)) { 3192 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF; 3193 PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0; 3194 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_FALSE; 3195 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE; 3196 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL; 3197 } 3198 3199 if (PCIE_IS_BDG(bus_p)) { 3200 bzero(PCI_BDG_ERR_REG(pfd_p), sizeof (pf_pci_bdg_err_regs_t)); 3201 } 3202 3203 PCI_ERR_REG(pfd_p)->pci_err_status = 0; 3204 PCI_ERR_REG(pfd_p)->pci_cfg_comm = 0; 3205 3206 if (PCIE_IS_PCIE(bus_p)) { 3207 if (PCIE_IS_ROOT(bus_p)) { 3208 bzero(PCIE_RP_REG(pfd_p), 3209 sizeof (pf_pcie_rp_err_regs_t)); 3210 bzero(PCIE_ADV_RP_REG(pfd_p), 3211 sizeof (pf_pcie_adv_rp_err_regs_t)); 3212 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id = 3213 PCIE_INVALID_BDF; 3214 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id = 3215 PCIE_INVALID_BDF; 3216 } else if (PCIE_IS_PCIE_BDG(bus_p)) { 3217 bzero(PCIE_ADV_BDG_REG(pfd_p), 3218 sizeof (pf_pcie_adv_bdg_err_regs_t)); 3219 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = 3220 PCIE_INVALID_BDF; 3221 } 3222 3223 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) { 3224 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 3225 bzero(PCIX_BDG_ECC_REG(pfd_p, 0), 3226 sizeof (pf_pcix_ecc_regs_t)); 3227 bzero(PCIX_BDG_ECC_REG(pfd_p, 1), 3228 sizeof (pf_pcix_ecc_regs_t)); 3229 } 3230 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0; 3231 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0; 3232 } 3233 3234 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl = 0; 3235 PCIE_ADV_REG(pfd_p)->pcie_ue_status = 0; 3236 PCIE_ADV_REG(pfd_p)->pcie_ue_mask = 0; 3237 PCIE_ADV_REG(pfd_p)->pcie_ue_sev = 0; 3238 PCIE_ADV_HDR(pfd_p, 0) = 0; 3239 PCIE_ADV_HDR(pfd_p, 1) = 0; 3240 PCIE_ADV_HDR(pfd_p, 2) = 0; 3241 PCIE_ADV_HDR(pfd_p, 3) = 0; 3242 PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0; 3243 PCIE_ADV_REG(pfd_p)->pcie_ce_mask = 0; 3244 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0; 3245 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0; 3246 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF; 3247 3248 PCIE_ERR_REG(pfd_p)->pcie_err_status = 0; 3249 PCIE_ERR_REG(pfd_p)->pcie_err_ctl = 0; 3250 PCIE_ERR_REG(pfd_p)->pcie_dev_cap = 0; 3251 3252 } else if (PCIE_IS_PCIX(bus_p)) { 3253 if (PCIE_IS_BDG(bus_p)) { 3254 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 3255 bzero(PCIX_BDG_ECC_REG(pfd_p, 0), 3256 sizeof (pf_pcix_ecc_regs_t)); 3257 bzero(PCIX_BDG_ECC_REG(pfd_p, 1), 3258 sizeof (pf_pcix_ecc_regs_t)); 3259 } 3260 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0; 3261 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0; 3262 } else { 3263 if (PCIX_ECC_VERSION_CHECK(bus_p)) { 3264 bzero(PCIX_ECC_REG(pfd_p), 3265 sizeof (pf_pcix_ecc_regs_t)); 3266 } 3267 PCIX_ERR_REG(pfd_p)->pcix_command = 0; 3268 PCIX_ERR_REG(pfd_p)->pcix_status = 0; 3269 } 3270 } 3271 3272 pfd_p->pe_prev = NULL; 3273 pfd_p->pe_next = NULL; 3274 pfd_p->pe_rber_fatal = B_FALSE; 3275 } 3276 3277 pcie_bus_t * 3278 pf_find_busp_by_bdf(pf_impl_t *impl, pcie_req_id_t bdf) 3279 { 3280 pcie_bus_t *temp_bus_p; 3281 pf_data_t *temp_pfd_p; 3282 3283 for (temp_pfd_p = impl->pf_dq_head_p; 3284 temp_pfd_p; 3285 temp_pfd_p = temp_pfd_p->pe_next) { 3286 temp_bus_p = PCIE_PFD2BUS(temp_pfd_p); 3287 3288 if (bdf == temp_bus_p->bus_bdf) { 3289 return (temp_bus_p); 3290 } 3291 } 3292 3293 return (NULL); 3294 } 3295 3296 pcie_bus_t * 3297 pf_find_busp_by_addr(pf_impl_t *impl, uint64_t addr) 3298 { 3299 pcie_bus_t *temp_bus_p; 3300 pf_data_t *temp_pfd_p; 3301 3302 for (temp_pfd_p = impl->pf_dq_head_p; 3303 temp_pfd_p; 3304 temp_pfd_p = temp_pfd_p->pe_next) { 3305 temp_bus_p = PCIE_PFD2BUS(temp_pfd_p); 3306 3307 if (pf_in_assigned_addr(temp_bus_p, addr)) { 3308 return (temp_bus_p); 3309 } 3310 } 3311 3312 return (NULL); 3313 } 3314 3315 pcie_bus_t * 3316 pf_find_busp_by_aer(pf_impl_t *impl, pf_data_t *pfd_p) 3317 { 3318 pf_pcie_adv_err_regs_t *reg_p = PCIE_ADV_REG(pfd_p); 3319 pcie_bus_t *temp_bus_p = NULL; 3320 pcie_req_id_t bdf; 3321 uint64_t addr; 3322 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)reg_p->pcie_ue_hdr; 3323 uint32_t trans_type = reg_p->pcie_ue_tgt_trans; 3324 3325 if ((tlp_hdr->type == PCIE_TLP_TYPE_CPL) || 3326 (tlp_hdr->type == PCIE_TLP_TYPE_CPLLK)) { 3327 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)®_p->pcie_ue_hdr[1]; 3328 3329 bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid : 3330 cpl_tlp->cid; 3331 temp_bus_p = pf_find_busp_by_bdf(impl, bdf); 3332 } else if (trans_type == PF_ADDR_PIO) { 3333 addr = reg_p->pcie_ue_tgt_addr; 3334 temp_bus_p = pf_find_busp_by_addr(impl, addr); 3335 } else { 3336 /* PF_ADDR_DMA type */ 3337 bdf = reg_p->pcie_ue_tgt_bdf; 3338 temp_bus_p = pf_find_busp_by_bdf(impl, bdf); 3339 } 3340 3341 return (temp_bus_p); 3342 } 3343 3344 pcie_bus_t * 3345 pf_find_busp_by_saer(pf_impl_t *impl, pf_data_t *pfd_p) 3346 { 3347 pf_pcie_adv_bdg_err_regs_t *reg_p = PCIE_ADV_BDG_REG(pfd_p); 3348 pcie_bus_t *temp_bus_p = NULL; 3349 pcie_req_id_t bdf; 3350 uint64_t addr; 3351 3352 addr = reg_p->pcie_sue_tgt_addr; 3353 bdf = reg_p->pcie_sue_tgt_bdf; 3354 3355 if (addr != 0) { 3356 temp_bus_p = pf_find_busp_by_addr(impl, addr); 3357 } else if (PCIE_CHECK_VALID_BDF(bdf)) { 3358 temp_bus_p = pf_find_busp_by_bdf(impl, bdf); 3359 } 3360 3361 return (temp_bus_p); 3362 } 3363