1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Joyent, Inc.
24 * Copyright 2023 Oxide Computer Company
25 */
26
27 #include <sys/sysmacros.h>
28 #include <sys/types.h>
29 #include <sys/kmem.h>
30 #include <sys/modctl.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/sunndi.h>
34 #include <sys/fm/protocol.h>
35 #include <sys/fm/util.h>
36 #include <sys/fm/io/ddi.h>
37 #include <sys/fm/io/pci.h>
38 #include <sys/promif.h>
39 #include <sys/disp.h>
40 #include <sys/atomic.h>
41 #include <sys/pcie.h>
42 #include <sys/pci_cap.h>
43 #include <sys/pcie_impl.h>
44
45 #define PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \
46 PCIE_DEVSTS_CE_DETECTED)
47
48 #define PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \
49 PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR)
50
51 #define PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\
52 PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP)
53 #define PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \
54 PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR)
55
56 #define PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \
57 PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \
58 PCIE_AER_SUCE_SERR_ASSERT)
59 #define PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \
60 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \
61 PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \
62 PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \
63 PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR)
64
65 #define PF_PCI_PARITY_ERR (PCI_STAT_S_PERROR | PCI_STAT_PERROR)
66
67 #define PF_FIRST_AER_ERR(bit, adv) \
68 (bit & (1 << (adv->pcie_adv_ctl & PCIE_AER_CTL_FST_ERR_PTR_MASK)))
69
70 #define HAS_AER_LOGS(pfd_p, bit) \
71 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \
72 PF_FIRST_AER_ERR(bit, PCIE_ADV_REG(pfd_p)))
73
74 #define PF_FIRST_SAER_ERR(bit, adv) \
75 (bit & (1 << (adv->pcie_sue_ctl & PCIE_AER_SCTL_FST_ERR_PTR_MASK)))
76
77 #define HAS_SAER_LOGS(pfd_p, bit) \
78 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \
79 PF_FIRST_SAER_ERR(bit, PCIE_ADV_BDG_REG(pfd_p)))
80
81 #define GET_SAER_CMD(pfd_p) \
82 ((PCIE_ADV_BDG_HDR(pfd_p, 1) >> \
83 PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & PCIE_AER_SUCE_HDR_CMD_LWR_MASK)
84
85 #define CE_ADVISORY(pfd_p) \
86 (PCIE_ADV_REG(pfd_p)->pcie_ce_status & PCIE_AER_CE_AD_NFE)
87
88 /* PCIe Fault Fabric Error analysis table */
89 typedef struct pf_fab_err_tbl {
90 uint32_t bit; /* Error bit */
91 int (*handler)(); /* Error handling fuction */
92 uint16_t affected_flags; /* Primary affected flag */
93 /*
94 * Secondary affected flag, effective when the information
95 * indicated by the primary flag is not available, eg.
96 * PF_AFFECTED_AER/SAER/ADDR
97 */
98 uint16_t sec_affected_flags;
99 } pf_fab_err_tbl_t;
100
101 static pcie_bus_t *pf_is_ready(dev_info_t *);
102 /* Functions for scanning errors */
103 static int pf_default_hdl(dev_info_t *, pf_impl_t *);
104 static int pf_dispatch(dev_info_t *, pf_impl_t *, boolean_t);
105 static boolean_t pf_in_addr_range(pcie_bus_t *, uint64_t);
106
107 /* Functions for gathering errors */
108 static void pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs,
109 pcie_bus_t *bus_p, boolean_t bdg);
110 static void pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
111 static void pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
112 static void pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
113 static int pf_dummy_cb(dev_info_t *, ddi_fm_error_t *, const void *);
114 static void pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl_p);
115
116 /* Functions for analysing errors */
117 static int pf_analyse_error(ddi_fm_error_t *, pf_impl_t *);
118 static void pf_adjust_for_no_aer(pf_data_t *);
119 static void pf_adjust_for_no_saer(pf_data_t *);
120 static pf_data_t *pf_get_pcie_bridge(pf_data_t *, pcie_req_id_t);
121 static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *);
122 static boolean_t pf_matched_in_rc(pf_data_t *, pf_data_t *,
123 uint32_t);
124 static int pf_analyse_error_tbl(ddi_fm_error_t *, pf_impl_t *,
125 pf_data_t *, const pf_fab_err_tbl_t *, uint32_t);
126 static int pf_analyse_ca_ur(ddi_fm_error_t *, uint32_t,
127 pf_data_t *, pf_data_t *);
128 static int pf_analyse_ma_ta(ddi_fm_error_t *, uint32_t,
129 pf_data_t *, pf_data_t *);
130 static int pf_analyse_pci(ddi_fm_error_t *, uint32_t,
131 pf_data_t *, pf_data_t *);
132 static int pf_analyse_perr_assert(ddi_fm_error_t *, uint32_t,
133 pf_data_t *, pf_data_t *);
134 static int pf_analyse_ptlp(ddi_fm_error_t *, uint32_t,
135 pf_data_t *, pf_data_t *);
136 static int pf_analyse_sc(ddi_fm_error_t *, uint32_t,
137 pf_data_t *, pf_data_t *);
138 static int pf_analyse_to(ddi_fm_error_t *, uint32_t,
139 pf_data_t *, pf_data_t *);
140 static int pf_analyse_uc(ddi_fm_error_t *, uint32_t,
141 pf_data_t *, pf_data_t *);
142 static int pf_analyse_uc_data(ddi_fm_error_t *, uint32_t,
143 pf_data_t *, pf_data_t *);
144 static int pf_no_panic(ddi_fm_error_t *, uint32_t,
145 pf_data_t *, pf_data_t *);
146 static int pf_panic(ddi_fm_error_t *, uint32_t,
147 pf_data_t *, pf_data_t *);
148 static void pf_send_ereport(ddi_fm_error_t *, pf_impl_t *);
149 static int pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr);
150
151 /* PCIe Fabric Handle Lookup Support Functions. */
152 static int pf_hdl_child_lookup(dev_info_t *, ddi_fm_error_t *, uint32_t,
153 uint64_t, pcie_req_id_t);
154 static int pf_hdl_compare(dev_info_t *, ddi_fm_error_t *, uint32_t, uint64_t,
155 pcie_req_id_t, ndi_fmc_t *);
156 static int pf_log_hdl_lookup(dev_info_t *, ddi_fm_error_t *, pf_data_t *,
157 boolean_t);
158
159 static int pf_handler_enter(dev_info_t *, pf_impl_t *);
160 static void pf_handler_exit(dev_info_t *);
161 static void pf_reset_pfd(pf_data_t *);
162
163 boolean_t pcie_full_scan = B_FALSE; /* Force to always do a full scan */
164 int pcie_disable_scan = 0; /* Disable fabric scan */
165
166 /* Inform interested parties that error handling is about to begin. */
167 /* ARGSUSED */
168 void
pf_eh_enter(pcie_bus_t * bus_p)169 pf_eh_enter(pcie_bus_t *bus_p)
170 {
171 }
172
173 /* Inform interested parties that error handling has ended. */
174 void
pf_eh_exit(pcie_bus_t * bus_p)175 pf_eh_exit(pcie_bus_t *bus_p)
176 {
177 pcie_bus_t *rbus_p = PCIE_DIP2BUS(bus_p->bus_rp_dip);
178 pf_data_t *root_pfd_p = PCIE_BUS2PFD(rbus_p);
179 pf_data_t *pfd_p;
180 uint_t intr_type = PCIE_ROOT_EH_SRC(root_pfd_p)->intr_type;
181
182 pciev_eh_exit(root_pfd_p, intr_type);
183
184 /* Clear affected device info and INTR SRC */
185 for (pfd_p = root_pfd_p; pfd_p; pfd_p = pfd_p->pe_next) {
186 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0;
187 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
188 if (PCIE_IS_ROOT(PCIE_PFD2BUS(pfd_p))) {
189 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE;
190 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL;
191 }
192 }
193 }
194
195 /*
196 * After sending an ereport, or in lieu of doing so, unlock all the devices in
197 * the data queue. We also must clear pe_valid here; this function is called in
198 * the path where we decide not to send an ereport because there is no error
199 * (spurious AER interrupt), as well as from pf_send_ereport() which has already
200 * cleared it. Failing to do this will result in a different path through
201 * pf_dispatch() and the potential for deadlocks. It is safe to do as we are
202 * still holding the handler lock here, just as in pf_send_ereport().
203 */
204 static void
pf_dq_unlock_chain(pf_impl_t * impl)205 pf_dq_unlock_chain(pf_impl_t *impl)
206 {
207 pf_data_t *pfd_p;
208
209 for (pfd_p = impl->pf_dq_tail_p; pfd_p; pfd_p = pfd_p->pe_prev) {
210 pfd_p->pe_valid = B_FALSE;
211 if (pfd_p->pe_lock) {
212 pf_handler_exit(PCIE_PFD2DIP(pfd_p));
213 }
214 }
215 }
216
217 /*
218 * Scan Fabric is the entry point for PCI/PCIe IO fabric errors. The
219 * caller may create a local pf_data_t with the "root fault"
220 * information populated to either do a precise or full scan. More
221 * than one pf_data_t maybe linked together if there are multiple
222 * errors. Only a PCIe compliant Root Port device may pass in NULL
223 * for the root_pfd_p.
224 *
225 * "Root Complexes" such as NPE and PX should call scan_fabric using itself as
226 * the rdip. PCIe Root ports should call pf_scan_fabric using its parent as
227 * the rdip.
228 *
229 * Scan fabric initiated from RCs are likely due to a fabric message, traps or
230 * any RC detected errors that propagated to/from the fabric.
231 *
232 * This code assumes that by the time pf_scan_fabric is
233 * called, pf_handler_enter has NOT been called on the rdip.
234 */
235 int
pf_scan_fabric(dev_info_t * rdip,ddi_fm_error_t * derr,pf_data_t * root_pfd_p)236 pf_scan_fabric(dev_info_t *rdip, ddi_fm_error_t *derr, pf_data_t *root_pfd_p)
237 {
238 pf_impl_t impl;
239 pf_data_t *pfd_p, *pfd_head_p, *pfd_tail_p;
240 int scan_flag = PF_SCAN_SUCCESS;
241 int analyse_flag = PF_ERR_NO_ERROR;
242 boolean_t full_scan = pcie_full_scan;
243
244 if (pcie_disable_scan)
245 return (analyse_flag);
246
247 /* Find the head and tail of this link list */
248 pfd_head_p = root_pfd_p;
249 for (pfd_tail_p = root_pfd_p; pfd_tail_p && pfd_tail_p->pe_next;
250 pfd_tail_p = pfd_tail_p->pe_next)
251 ;
252
253 /* Save head/tail */
254 impl.pf_total = 0;
255 impl.pf_derr = derr;
256 impl.pf_dq_head_p = pfd_head_p;
257 impl.pf_dq_tail_p = pfd_tail_p;
258
259 /* If scan is initiated from RP then RP itself must be scanned. */
260 if (PCIE_IS_RP(PCIE_DIP2BUS(rdip)) && pf_is_ready(rdip) &&
261 !root_pfd_p) {
262 scan_flag = pf_handler_enter(rdip, &impl);
263 if (scan_flag & PF_SCAN_DEADLOCK)
264 goto done;
265
266 scan_flag = pf_default_hdl(rdip, &impl);
267 if (scan_flag & PF_SCAN_NO_ERR_IN_CHILD)
268 goto done;
269 }
270
271 /*
272 * Scan the fabric using the scan_bdf and scan_addr in error q.
273 * scan_bdf will be valid in the following cases:
274 * - Fabric message
275 * - Poisoned TLP
276 * - Signaled UR/CA
277 * - Received UR/CA
278 * - PIO load failures
279 */
280 for (pfd_p = impl.pf_dq_head_p; pfd_p && PFD_IS_ROOT(pfd_p);
281 pfd_p = pfd_p->pe_next) {
282 impl.pf_fault = PCIE_ROOT_FAULT(pfd_p);
283
284 if (PFD_IS_RC(pfd_p))
285 impl.pf_total++;
286
287 if (impl.pf_fault->full_scan)
288 full_scan = B_TRUE;
289
290 if (full_scan ||
291 PCIE_CHECK_VALID_BDF(impl.pf_fault->scan_bdf) ||
292 impl.pf_fault->scan_addr)
293 scan_flag |= pf_dispatch(rdip, &impl, full_scan);
294
295 if (full_scan)
296 break;
297 }
298
299 done:
300 /*
301 * If this is due to safe access, don't analyze the errors and return
302 * success regardless of how scan fabric went.
303 */
304 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
305 analyse_flag = PF_ERR_NO_PANIC;
306 } else {
307 analyse_flag = pf_analyse_error(derr, &impl);
308 }
309
310 /*
311 * If analyse_flag is 0 or PF_ERR_NO_ERROR, there's nothing here. Skip
312 * ereport generation unless something went wrong with the scan.
313 */
314 if ((analyse_flag & ~PF_ERR_NO_ERROR) != 0 ||
315 (scan_flag & (PF_SCAN_CB_FAILURE | PF_SCAN_DEADLOCK)) != 0) {
316 pf_send_ereport(derr, &impl);
317 } else {
318 pf_dq_unlock_chain(&impl);
319 }
320
321 /*
322 * Check if any hardened driver's callback reported a panic.
323 * If so panic.
324 */
325 if (scan_flag & PF_SCAN_CB_FAILURE)
326 analyse_flag |= PF_ERR_PANIC;
327
328 /*
329 * If a deadlock was detected, panic the system as error analysis has
330 * been compromised.
331 */
332 if (scan_flag & PF_SCAN_DEADLOCK)
333 analyse_flag |= PF_ERR_PANIC_DEADLOCK;
334
335 derr->fme_status = PF_ERR2DDIFM_ERR(scan_flag);
336
337 return (analyse_flag);
338 }
339
340 void
pcie_force_fullscan(void)341 pcie_force_fullscan(void)
342 {
343 pcie_full_scan = B_TRUE;
344 }
345
346 /*
347 * pf_dispatch walks the device tree and calls the pf_default_hdl if the device
348 * falls in the error path.
349 *
350 * Returns PF_SCAN_* flags
351 */
352 static int
pf_dispatch(dev_info_t * pdip,pf_impl_t * impl,boolean_t full_scan)353 pf_dispatch(dev_info_t *pdip, pf_impl_t *impl, boolean_t full_scan)
354 {
355 dev_info_t *dip;
356 pcie_req_id_t rid = impl->pf_fault->scan_bdf;
357 pcie_bus_t *bus_p;
358 int scan_flag = PF_SCAN_SUCCESS;
359
360 for (dip = ddi_get_child(pdip); dip; dip = ddi_get_next_sibling(dip)) {
361 /* Make sure dip is attached and ready */
362 if (!(bus_p = pf_is_ready(dip)))
363 continue;
364
365 scan_flag |= pf_handler_enter(dip, impl);
366 if (scan_flag & PF_SCAN_DEADLOCK)
367 break;
368
369 /*
370 * Handle this device if it is a:
371 * o Full Scan
372 * o PCI/PCI-X Device
373 * o Fault BDF = Device BDF
374 * o BDF/ADDR is in range of the Bridge/Switch
375 */
376 if (full_scan ||
377 (bus_p->bus_bdf == rid) ||
378 pf_in_bus_range(bus_p, rid) ||
379 pf_in_addr_range(bus_p, impl->pf_fault->scan_addr)) {
380 int hdl_flag = pf_default_hdl(dip, impl);
381 scan_flag |= hdl_flag;
382
383 /*
384 * A bridge may have detected no errors in which case
385 * there is no need to scan further down.
386 */
387 if (hdl_flag & PF_SCAN_NO_ERR_IN_CHILD)
388 continue;
389 } else {
390 pf_handler_exit(dip);
391 continue;
392 }
393
394 /* match or in bridge bus-range */
395 switch (bus_p->bus_dev_type) {
396 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI:
397 case PCIE_PCIECAP_DEV_TYPE_PCI2PCIE:
398 scan_flag |= pf_dispatch(dip, impl, B_TRUE);
399 break;
400 case PCIE_PCIECAP_DEV_TYPE_UP:
401 case PCIE_PCIECAP_DEV_TYPE_DOWN:
402 case PCIE_PCIECAP_DEV_TYPE_ROOT:
403 {
404 pf_data_t *pfd_p = PCIE_BUS2PFD(bus_p);
405 pf_pci_err_regs_t *err_p = PCI_ERR_REG(pfd_p);
406 pf_pci_bdg_err_regs_t *serr_p = PCI_BDG_ERR_REG(pfd_p);
407 /*
408 * Continue if the fault BDF != the switch or there is a
409 * parity error
410 */
411 if ((bus_p->bus_bdf != rid) ||
412 (err_p->pci_err_status & PF_PCI_PARITY_ERR) ||
413 (serr_p->pci_bdg_sec_stat & PF_PCI_PARITY_ERR))
414 scan_flag |= pf_dispatch(dip, impl, full_scan);
415 break;
416 }
417 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV:
418 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV:
419 /*
420 * Reached a PCIe end point so stop. Note dev_type
421 * PCI_DEV is just a PCIe device that requires IO Space
422 */
423 break;
424 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO:
425 if (PCIE_IS_BDG(bus_p))
426 scan_flag |= pf_dispatch(dip, impl, B_TRUE);
427 break;
428 default:
429 ASSERT(B_FALSE);
430 }
431 }
432 return (scan_flag);
433 }
434
435 /* Returns whether the "bdf" is in the bus range of a switch/bridge */
436 boolean_t
pf_in_bus_range(pcie_bus_t * bus_p,pcie_req_id_t bdf)437 pf_in_bus_range(pcie_bus_t *bus_p, pcie_req_id_t bdf)
438 {
439 pci_bus_range_t *br_p = &bus_p->bus_bus_range;
440 uint8_t bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >>
441 PCIE_REQ_ID_BUS_SHIFT;
442
443 /* check if given bdf falls within bridge's bus range */
444 if (PCIE_IS_BDG(bus_p) &&
445 ((bus_no >= br_p->lo) && (bus_no <= br_p->hi)))
446 return (B_TRUE);
447 else
448 return (B_FALSE);
449 }
450
451 /*
452 * Return whether the "addr" is in the assigned addr of a device.
453 */
454 boolean_t
pf_in_assigned_addr(pcie_bus_t * bus_p,uint64_t addr)455 pf_in_assigned_addr(pcie_bus_t *bus_p, uint64_t addr)
456 {
457 uint_t i;
458 uint64_t low, hi;
459 pci_regspec_t *assign_p = bus_p->bus_assigned_addr;
460
461 for (i = 0; i < bus_p->bus_assigned_entries; i++, assign_p++) {
462 low = assign_p->pci_phys_low;
463 hi = low + assign_p->pci_size_low;
464 if ((addr < hi) && (addr >= low))
465 return (B_TRUE);
466 }
467 return (B_FALSE);
468 }
469
470 /*
471 * Returns whether the "addr" is in the addr range of a switch/bridge, or if the
472 * "addr" is in the assigned addr of a device.
473 */
474 static boolean_t
pf_in_addr_range(pcie_bus_t * bus_p,uint64_t addr)475 pf_in_addr_range(pcie_bus_t *bus_p, uint64_t addr)
476 {
477 uint_t i;
478 uint64_t low, hi;
479 ppb_ranges_t *ranges_p = bus_p->bus_addr_ranges;
480
481 if (!addr)
482 return (B_FALSE);
483
484 /* check if given address belongs to this device */
485 if (pf_in_assigned_addr(bus_p, addr))
486 return (B_TRUE);
487
488 /* check if given address belongs to a child below this device */
489 if (!PCIE_IS_BDG(bus_p))
490 return (B_FALSE);
491
492 for (i = 0; i < bus_p->bus_addr_entries; i++, ranges_p++) {
493 switch (ranges_p->child_high & PCI_ADDR_MASK) {
494 case PCI_ADDR_IO:
495 case PCI_ADDR_MEM32:
496 low = ranges_p->child_low;
497 hi = ranges_p->size_low + low;
498 if ((addr < hi) && (addr >= low))
499 return (B_TRUE);
500 break;
501 case PCI_ADDR_MEM64:
502 low = ((uint64_t)ranges_p->child_mid << 32) |
503 (uint64_t)ranges_p->child_low;
504 hi = (((uint64_t)ranges_p->size_high << 32) |
505 (uint64_t)ranges_p->size_low) + low;
506 if ((addr < hi) && (addr >= low))
507 return (B_TRUE);
508 break;
509 }
510 }
511 return (B_FALSE);
512 }
513
514 static pcie_bus_t *
pf_is_ready(dev_info_t * dip)515 pf_is_ready(dev_info_t *dip)
516 {
517 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
518 if (!bus_p)
519 return (NULL);
520
521 if (!(bus_p->bus_fm_flags & PF_FM_READY))
522 return (NULL);
523 return (bus_p);
524 }
525
526 static void
pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t * pcix_ecc_regs,pcie_bus_t * bus_p,boolean_t bdg)527 pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs,
528 pcie_bus_t *bus_p, boolean_t bdg)
529 {
530 if (bdg) {
531 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p,
532 PCI_PCIX_BDG_ECC_STATUS);
533 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p,
534 PCI_PCIX_BDG_ECC_FST_AD);
535 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p,
536 PCI_PCIX_BDG_ECC_SEC_AD);
537 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p,
538 PCI_PCIX_BDG_ECC_ATTR);
539 } else {
540 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p,
541 PCI_PCIX_ECC_STATUS);
542 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p,
543 PCI_PCIX_ECC_FST_AD);
544 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p,
545 PCI_PCIX_ECC_SEC_AD);
546 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p,
547 PCI_PCIX_ECC_ATTR);
548 }
549 }
550
551
552 static void
pf_pcix_regs_gather(pf_data_t * pfd_p,pcie_bus_t * bus_p)553 pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
554 {
555 /*
556 * For PCI-X device PCI-X Capability only exists for Type 0 Headers.
557 * PCI-X Bridge Capability only exists for Type 1 Headers.
558 * Both capabilities do not exist at the same time.
559 */
560 if (PCIE_IS_BDG(bus_p)) {
561 pf_pcix_bdg_err_regs_t *pcix_bdg_regs;
562
563 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p);
564
565 pcix_bdg_regs->pcix_bdg_sec_stat = PCIX_CAP_GET(16, bus_p,
566 PCI_PCIX_SEC_STATUS);
567 pcix_bdg_regs->pcix_bdg_stat = PCIX_CAP_GET(32, bus_p,
568 PCI_PCIX_BDG_STATUS);
569
570 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
571 /*
572 * PCI Express to PCI-X bridges only implement the
573 * secondary side of the PCI-X ECC registers, bit one is
574 * read-only so we make sure we do not write to it.
575 */
576 if (!PCIE_IS_PCIE_BDG(bus_p)) {
577 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
578 0);
579 pf_pcix_ecc_regs_gather(
580 PCIX_BDG_ECC_REG(pfd_p, 0), bus_p, B_TRUE);
581 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
582 1);
583 }
584 pf_pcix_ecc_regs_gather(PCIX_BDG_ECC_REG(pfd_p, 0),
585 bus_p, B_TRUE);
586 }
587 } else {
588 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p);
589
590 pcix_regs->pcix_command = PCIX_CAP_GET(16, bus_p,
591 PCI_PCIX_COMMAND);
592 pcix_regs->pcix_status = PCIX_CAP_GET(32, bus_p,
593 PCI_PCIX_STATUS);
594 if (PCIX_ECC_VERSION_CHECK(bus_p))
595 pf_pcix_ecc_regs_gather(PCIX_ECC_REG(pfd_p), bus_p,
596 B_TRUE);
597 }
598 }
599
600 static void
pf_pcie_regs_gather(pf_data_t * pfd_p,pcie_bus_t * bus_p)601 pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
602 {
603 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p);
604 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p);
605
606 pcie_regs->pcie_err_status = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS);
607 pcie_regs->pcie_err_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
608 pcie_regs->pcie_dev_cap = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP);
609
610 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p))
611 pf_pcix_regs_gather(pfd_p, bus_p);
612
613 if (PCIE_IS_ROOT(bus_p)) {
614 pf_pcie_rp_err_regs_t *pcie_rp_regs = PCIE_RP_REG(pfd_p);
615
616 pcie_rp_regs->pcie_rp_status = PCIE_CAP_GET(32, bus_p,
617 PCIE_ROOTSTS);
618 pcie_rp_regs->pcie_rp_ctl = PCIE_CAP_GET(16, bus_p,
619 PCIE_ROOTCTL);
620 }
621
622 /*
623 * For eligible components, we gather Slot Register state.
624 *
625 * Eligible components are:
626 * - a Downstream Port or a Root Port with the Slot Implemented
627 * capability bit set
628 * - hotplug capable
629 *
630 * Slot register state is useful, for instance, to determine whether the
631 * Slot's child device is physically present (via the Slot Status
632 * register).
633 */
634 if ((PCIE_IS_SWD(bus_p) || PCIE_IS_ROOT(bus_p)) &&
635 PCIE_IS_HOTPLUG_ENABLED(PCIE_BUS2DIP(bus_p))) {
636 pf_pcie_slot_regs_t *pcie_slot_regs = PCIE_SLOT_REG(pfd_p);
637 pcie_slot_regs->pcie_slot_cap = PCIE_CAP_GET(32, bus_p,
638 PCIE_SLOTCAP);
639 pcie_slot_regs->pcie_slot_control = PCIE_CAP_GET(16, bus_p,
640 PCIE_SLOTCTL);
641 pcie_slot_regs->pcie_slot_status = PCIE_CAP_GET(16, bus_p,
642 PCIE_SLOTSTS);
643
644 if (pcie_slot_regs->pcie_slot_cap != PCI_EINVAL32 &&
645 pcie_slot_regs->pcie_slot_control != PCI_EINVAL16 &&
646 pcie_slot_regs->pcie_slot_status != PCI_EINVAL16) {
647 pcie_slot_regs->pcie_slot_regs_valid = B_TRUE;
648 }
649 }
650
651 if (!PCIE_HAS_AER(bus_p))
652 return;
653
654 /* Gather UE AERs */
655 pcie_adv_regs->pcie_adv_ctl = PCIE_AER_GET(32, bus_p,
656 PCIE_AER_CTL);
657 pcie_adv_regs->pcie_ue_status = PCIE_AER_GET(32, bus_p,
658 PCIE_AER_UCE_STS);
659 pcie_adv_regs->pcie_ue_mask = PCIE_AER_GET(32, bus_p,
660 PCIE_AER_UCE_MASK);
661 pcie_adv_regs->pcie_ue_sev = PCIE_AER_GET(32, bus_p,
662 PCIE_AER_UCE_SERV);
663 PCIE_ADV_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p,
664 PCIE_AER_HDR_LOG);
665 PCIE_ADV_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p,
666 PCIE_AER_HDR_LOG + 0x4);
667 PCIE_ADV_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p,
668 PCIE_AER_HDR_LOG + 0x8);
669 PCIE_ADV_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p,
670 PCIE_AER_HDR_LOG + 0xc);
671
672 /* Gather CE AERs */
673 pcie_adv_regs->pcie_ce_status = PCIE_AER_GET(32, bus_p,
674 PCIE_AER_CE_STS);
675 pcie_adv_regs->pcie_ce_mask = PCIE_AER_GET(32, bus_p,
676 PCIE_AER_CE_MASK);
677
678 /*
679 * If pci express to pci bridge then grab the bridge
680 * error registers.
681 */
682 if (PCIE_IS_PCIE_BDG(bus_p)) {
683 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs =
684 PCIE_ADV_BDG_REG(pfd_p);
685
686 pcie_bdg_regs->pcie_sue_ctl = PCIE_AER_GET(32, bus_p,
687 PCIE_AER_SCTL);
688 pcie_bdg_regs->pcie_sue_status = PCIE_AER_GET(32, bus_p,
689 PCIE_AER_SUCE_STS);
690 pcie_bdg_regs->pcie_sue_mask = PCIE_AER_GET(32, bus_p,
691 PCIE_AER_SUCE_MASK);
692 pcie_bdg_regs->pcie_sue_sev = PCIE_AER_GET(32, bus_p,
693 PCIE_AER_SUCE_SERV);
694 PCIE_ADV_BDG_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p,
695 PCIE_AER_SHDR_LOG);
696 PCIE_ADV_BDG_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p,
697 PCIE_AER_SHDR_LOG + 0x4);
698 PCIE_ADV_BDG_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p,
699 PCIE_AER_SHDR_LOG + 0x8);
700 PCIE_ADV_BDG_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p,
701 PCIE_AER_SHDR_LOG + 0xc);
702 }
703
704 /*
705 * If PCI Express root port then grab the root port
706 * error registers.
707 */
708 if (PCIE_IS_ROOT(bus_p)) {
709 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs =
710 PCIE_ADV_RP_REG(pfd_p);
711
712 pcie_rp_regs->pcie_rp_err_cmd = PCIE_AER_GET(32, bus_p,
713 PCIE_AER_RE_CMD);
714 pcie_rp_regs->pcie_rp_err_status = PCIE_AER_GET(32, bus_p,
715 PCIE_AER_RE_STS);
716 pcie_rp_regs->pcie_rp_ce_src_id = PCIE_AER_GET(16, bus_p,
717 PCIE_AER_CE_SRC_ID);
718 pcie_rp_regs->pcie_rp_ue_src_id = PCIE_AER_GET(16, bus_p,
719 PCIE_AER_ERR_SRC_ID);
720 }
721 }
722
723 static void
pf_pci_regs_gather(pf_data_t * pfd_p,pcie_bus_t * bus_p)724 pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
725 {
726 pf_pci_err_regs_t *pci_regs = PCI_ERR_REG(pfd_p);
727
728 /*
729 * Start by reading all the error registers that are available for
730 * pci and pci express and for leaf devices and bridges/switches
731 */
732 pci_regs->pci_err_status = PCIE_GET(16, bus_p, PCI_CONF_STAT);
733 pci_regs->pci_cfg_comm = PCIE_GET(16, bus_p, PCI_CONF_COMM);
734
735 /*
736 * If pci-pci bridge grab PCI bridge specific error registers.
737 */
738 if (PCIE_IS_BDG(bus_p)) {
739 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p);
740 pci_bdg_regs->pci_bdg_sec_stat =
741 PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS);
742 pci_bdg_regs->pci_bdg_ctrl =
743 PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL);
744 }
745
746 /*
747 * If pci express device grab pci express error registers and
748 * check for advanced error reporting features and grab them if
749 * available.
750 */
751 if (PCIE_IS_PCIE(bus_p))
752 pf_pcie_regs_gather(pfd_p, bus_p);
753 else if (PCIE_IS_PCIX(bus_p))
754 pf_pcix_regs_gather(pfd_p, bus_p);
755
756 }
757
758 static void
pf_pcix_regs_clear(pf_data_t * pfd_p,pcie_bus_t * bus_p)759 pf_pcix_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
760 {
761 if (PCIE_IS_BDG(bus_p)) {
762 pf_pcix_bdg_err_regs_t *pcix_bdg_regs;
763
764 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p);
765
766 PCIX_CAP_PUT(16, bus_p, PCI_PCIX_SEC_STATUS,
767 pcix_bdg_regs->pcix_bdg_sec_stat);
768
769 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_STATUS,
770 pcix_bdg_regs->pcix_bdg_stat);
771
772 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
773 pf_pcix_ecc_regs_t *pcix_bdg_ecc_regs;
774 /*
775 * PCI Express to PCI-X bridges only implement the
776 * secondary side of the PCI-X ECC registers. For
777 * clearing, there is no need to "select" the ECC
778 * register, just write what was originally read.
779 */
780 if (!PCIE_IS_PCIE_BDG(bus_p)) {
781 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 0);
782 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
783 pcix_bdg_ecc_regs->pcix_ecc_ctlstat);
784
785 }
786 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 1);
787 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
788 pcix_bdg_ecc_regs->pcix_ecc_ctlstat);
789 }
790 } else {
791 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p);
792
793 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_STATUS,
794 pcix_regs->pcix_status);
795
796 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
797 pf_pcix_ecc_regs_t *pcix_ecc_regs = PCIX_ECC_REG(pfd_p);
798
799 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_ECC_STATUS,
800 pcix_ecc_regs->pcix_ecc_ctlstat);
801 }
802 }
803 }
804
805 static void
pf_pcie_regs_clear(pf_data_t * pfd_p,pcie_bus_t * bus_p)806 pf_pcie_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
807 {
808 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p);
809 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p);
810
811 PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS, pcie_regs->pcie_err_status);
812
813 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p))
814 pf_pcix_regs_clear(pfd_p, bus_p);
815
816 if (!PCIE_HAS_AER(bus_p))
817 return;
818
819 PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_STS,
820 pcie_adv_regs->pcie_ue_status);
821
822 PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS,
823 pcie_adv_regs->pcie_ce_status);
824
825 if (PCIE_IS_PCIE_BDG(bus_p)) {
826 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs =
827 PCIE_ADV_BDG_REG(pfd_p);
828
829 PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_STS,
830 pcie_bdg_regs->pcie_sue_status);
831 }
832
833 /*
834 * If PCI Express root complex then clear the root complex
835 * error registers.
836 */
837 if (PCIE_IS_ROOT(bus_p)) {
838 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs;
839
840 pcie_rp_regs = PCIE_ADV_RP_REG(pfd_p);
841
842 PCIE_AER_PUT(32, bus_p, PCIE_AER_RE_STS,
843 pcie_rp_regs->pcie_rp_err_status);
844 }
845 }
846
847 static void
pf_pci_regs_clear(pf_data_t * pfd_p,pcie_bus_t * bus_p)848 pf_pci_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
849 {
850 if (PCIE_IS_PCIE(bus_p))
851 pf_pcie_regs_clear(pfd_p, bus_p);
852 else if (PCIE_IS_PCIX(bus_p))
853 pf_pcix_regs_clear(pfd_p, bus_p);
854
855 PCIE_PUT(16, bus_p, PCI_CONF_STAT, pfd_p->pe_pci_regs->pci_err_status);
856
857 if (PCIE_IS_BDG(bus_p)) {
858 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p);
859 PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS,
860 pci_bdg_regs->pci_bdg_sec_stat);
861 }
862 }
863
864 /* ARGSUSED */
865 void
pcie_clear_errors(dev_info_t * dip)866 pcie_clear_errors(dev_info_t *dip)
867 {
868 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
869 pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
870
871 ASSERT(bus_p);
872
873 pf_pci_regs_gather(pfd_p, bus_p);
874 pf_pci_regs_clear(pfd_p, bus_p);
875 }
876
877 /* Find the fault BDF, fault Addr or full scan on a PCIe Root Port. */
878 static void
pf_pci_find_rp_fault(pf_data_t * pfd_p,pcie_bus_t * bus_p)879 pf_pci_find_rp_fault(pf_data_t *pfd_p, pcie_bus_t *bus_p)
880 {
881 pf_root_fault_t *root_fault = PCIE_ROOT_FAULT(pfd_p);
882 pf_pcie_adv_rp_err_regs_t *rp_regs = PCIE_ADV_RP_REG(pfd_p);
883 uint32_t root_err = rp_regs->pcie_rp_err_status;
884 uint32_t ue_err = PCIE_ADV_REG(pfd_p)->pcie_ue_status;
885 int num_faults = 0;
886
887 /* Since this data structure is reused, make sure to reset it */
888 root_fault->full_scan = B_FALSE;
889 root_fault->scan_bdf = PCIE_INVALID_BDF;
890 root_fault->scan_addr = 0;
891
892 if (!PCIE_HAS_AER(bus_p) &&
893 (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) {
894 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
895 return;
896 }
897
898 /*
899 * Check to see if an error has been received that
900 * requires a scan of the fabric. Count the number of
901 * faults seen. If MUL CE/FE_NFE that counts for
902 * at least 2 faults, so just return with full_scan.
903 */
904 if ((root_err & PCIE_AER_RE_STS_MUL_CE_RCVD) ||
905 (root_err & PCIE_AER_RE_STS_MUL_FE_NFE_RCVD)) {
906 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
907 return;
908 }
909
910 if (root_err & PCIE_AER_RE_STS_CE_RCVD)
911 num_faults++;
912
913 if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD)
914 num_faults++;
915
916 if (ue_err & PCIE_AER_UCE_CA)
917 num_faults++;
918
919 if (ue_err & PCIE_AER_UCE_UR)
920 num_faults++;
921
922 /* If no faults just return */
923 if (num_faults == 0)
924 return;
925
926 /* If faults > 1 do full scan */
927 if (num_faults > 1) {
928 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
929 return;
930 }
931
932 /* By this point, there is only 1 fault detected */
933 if (root_err & PCIE_AER_RE_STS_CE_RCVD) {
934 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ce_src_id;
935 num_faults--;
936 } else if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) {
937 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ue_src_id;
938 num_faults--;
939 } else if ((HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_CA) ||
940 HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_UR)) &&
941 (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), PCIE_ADV_REG(pfd_p)) ==
942 DDI_SUCCESS)) {
943 PCIE_ROOT_FAULT(pfd_p)->scan_addr =
944 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr;
945 num_faults--;
946 }
947
948 /*
949 * This means an error did occur, but we couldn't extract the fault BDF
950 */
951 if (num_faults > 0)
952 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
953
954 }
955
956
957 /*
958 * Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue
959 *
960 * Returns a scan flag.
961 * o PF_SCAN_SUCCESS - Error gathered and cleared sucessfuly, data added to
962 * Fault Q
963 * o PF_SCAN_BAD_RESPONSE - Unable to talk to device, item added to fault Q
964 * o PF_SCAN_CB_FAILURE - A hardened device deemed that the error was fatal.
965 * o PF_SCAN_NO_ERR_IN_CHILD - Only applies to bridge to prevent further
966 * unnecessary scanning
967 * o PF_SCAN_IN_DQ - This device has already been scanned; it was skipped this
968 * time.
969 */
970 static int
pf_default_hdl(dev_info_t * dip,pf_impl_t * impl)971 pf_default_hdl(dev_info_t *dip, pf_impl_t *impl)
972 {
973 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
974 pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
975 int cb_sts, scan_flag = PF_SCAN_SUCCESS;
976
977 /* Make sure this device hasn't already been snapshotted and cleared */
978 if (pfd_p->pe_valid == B_TRUE) {
979 scan_flag |= PF_SCAN_IN_DQ;
980 goto done;
981 }
982
983 /*
984 * If this is a device used for PCI passthrough into a virtual machine,
985 * don't let any error it caused panic the system.
986 */
987 if (bus_p->bus_fm_flags & PF_FM_IS_PASSTHRU)
988 pfd_p->pe_severity_mask |= PF_ERR_PANIC;
989
990 /*
991 * Read vendor/device ID and check with cached data; if it doesn't
992 * match, it could very well mean that the device is no longer
993 * responding. In this case, we return PF_SCAN_BAD_RESPONSE; should
994 * the caller choose to panic in this case, we will have the basic
995 * info in the error queue for the purposes of postmortem debugging.
996 */
997 if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) {
998 char buf[FM_MAX_CLASS];
999
1000 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
1001 PCI_ERROR_SUBCLASS, PCI_NR);
1002 ddi_fm_ereport_post(dip, buf, fm_ena_generate(0, FM_ENA_FMT1),
1003 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL);
1004
1005 /*
1006 * For IOV/Hotplug purposes skip gathering info for this device,
1007 * but populate affected info and severity. Clear out any data
1008 * that maybe been saved in the last fabric scan.
1009 */
1010 pf_reset_pfd(pfd_p);
1011 pfd_p->pe_severity_flags = PF_ERR_BAD_RESPONSE;
1012 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF;
1013
1014 /* Add the snapshot to the error q */
1015 pf_en_dq(pfd_p, impl);
1016 pfd_p->pe_valid = B_TRUE;
1017
1018 return (PF_SCAN_BAD_RESPONSE);
1019 }
1020
1021 pf_pci_regs_gather(pfd_p, bus_p);
1022 pf_pci_regs_clear(pfd_p, bus_p);
1023
1024 if (PCIE_IS_RP(bus_p))
1025 pf_pci_find_rp_fault(pfd_p, bus_p);
1026
1027 cb_sts = pf_fm_callback(dip, impl->pf_derr);
1028
1029 if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN)
1030 scan_flag |= PF_SCAN_CB_FAILURE;
1031
1032 /* Add the snapshot to the error q */
1033 pf_en_dq(pfd_p, impl);
1034
1035 done:
1036 /*
1037 * If a bridge does not have any error no need to scan any further down.
1038 * For PCIe devices, check the PCIe device status and PCI secondary
1039 * status.
1040 * - Some non-compliant PCIe devices do not utilize PCIe
1041 * error registers. If so rely on legacy PCI error registers.
1042 * For PCI devices, check the PCI secondary status.
1043 */
1044 if (PCIE_IS_PCIE_BDG(bus_p) &&
1045 !(PCIE_ERR_REG(pfd_p)->pcie_err_status & PF_PCIE_BDG_ERR) &&
1046 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR))
1047 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD;
1048
1049 if (PCIE_IS_PCI_BDG(bus_p) &&
1050 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR))
1051 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD;
1052
1053 pfd_p->pe_valid = B_TRUE;
1054 return (scan_flag);
1055 }
1056
1057 /*
1058 * Set the passthru flag on a device bus_p. Called by passthru drivers to
1059 * indicate when a device is or is no longer under passthru control.
1060 */
1061 void
pf_set_passthru(dev_info_t * dip,boolean_t is_passthru)1062 pf_set_passthru(dev_info_t *dip, boolean_t is_passthru)
1063 {
1064 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1065
1066 if (is_passthru) {
1067 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_PASSTHRU);
1068 } else {
1069 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_PASSTHRU);
1070 }
1071 }
1072
1073 /*
1074 * Called during postattach to initialize a device's error handling
1075 * capabilities. If the devices has already been hardened, then there isn't
1076 * much needed. Otherwise initialize the device's default FMA capabilities.
1077 *
1078 * In a future project where PCIe support is removed from pcifm, several
1079 * "properties" that are setup in ddi_fm_init and pci_ereport_setup need to be
1080 * created here so that the PCI/PCIe eversholt rules will work properly.
1081 */
1082 void
pf_init(dev_info_t * dip,ddi_iblock_cookie_t ibc,ddi_attach_cmd_t cmd)1083 pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc, ddi_attach_cmd_t cmd)
1084 {
1085 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1086 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
1087 boolean_t need_cb_register = B_FALSE;
1088
1089 if (!bus_p) {
1090 cmn_err(CE_WARN, "devi_bus information is not set for %s%d.\n",
1091 ddi_driver_name(dip), ddi_get_instance(dip));
1092 return;
1093 }
1094
1095 if (fmhdl) {
1096 /*
1097 * If device is only ereport capable and not callback capable
1098 * make it callback capable. The only downside is that the
1099 * "fm-errcb-capable" property is not created for this device
1100 * which should be ok since it's not used anywhere.
1101 */
1102 if (!(fmhdl->fh_cap & DDI_FM_ERRCB_CAPABLE))
1103 need_cb_register = B_TRUE;
1104 } else {
1105 int cap;
1106 /*
1107 * fm-capable in driver.conf can be used to set fm_capabilities.
1108 * If fm-capable is not defined, set the default
1109 * DDI_FM_EREPORT_CAPABLE and DDI_FM_ERRCB_CAPABLE.
1110 */
1111 cap = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1112 DDI_PROP_DONTPASS, "fm-capable",
1113 DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE);
1114 cap &= (DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE);
1115
1116 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_NH);
1117
1118 if (cmd == DDI_ATTACH) {
1119 ddi_fm_init(dip, &cap, &ibc);
1120 pci_ereport_setup(dip);
1121 }
1122
1123 if (cap & DDI_FM_ERRCB_CAPABLE)
1124 need_cb_register = B_TRUE;
1125
1126 fmhdl = DEVI(dip)->devi_fmhdl;
1127 }
1128
1129 /* If ddi_fm_init fails for any reason RETURN */
1130 if (!fmhdl) {
1131 (void) atomic_swap_uint(&bus_p->bus_fm_flags, 0);
1132 return;
1133 }
1134
1135 fmhdl->fh_cap |= DDI_FM_ERRCB_CAPABLE;
1136 if (cmd == DDI_ATTACH) {
1137 if (need_cb_register)
1138 ddi_fm_handler_register(dip, pf_dummy_cb, NULL);
1139 }
1140
1141 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_READY);
1142 }
1143
1144 /* undo FMA lock, called at predetach */
1145 void
pf_fini(dev_info_t * dip,ddi_detach_cmd_t cmd)1146 pf_fini(dev_info_t *dip, ddi_detach_cmd_t cmd)
1147 {
1148 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1149
1150 if (!bus_p)
1151 return;
1152
1153 /* Don't fini anything if device isn't FM Ready */
1154 if (!(bus_p->bus_fm_flags & PF_FM_READY))
1155 return;
1156
1157 /* no other code should set the flag to false */
1158 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_READY);
1159
1160 /*
1161 * Grab the mutex to make sure device isn't in the middle of
1162 * error handling. Setting the bus_fm_flag to ~PF_FM_READY
1163 * should prevent this device from being error handled after
1164 * the mutex has been released.
1165 */
1166 (void) pf_handler_enter(dip, NULL);
1167 pf_handler_exit(dip);
1168
1169 /* undo non-hardened drivers */
1170 if (bus_p->bus_fm_flags & PF_FM_IS_NH) {
1171 if (cmd == DDI_DETACH) {
1172 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_NH);
1173 pci_ereport_teardown(dip);
1174 /*
1175 * ddi_fini itself calls ddi_handler_unregister,
1176 * so no need to explicitly call unregister.
1177 */
1178 ddi_fm_fini(dip);
1179 }
1180 }
1181 }
1182
1183 /*ARGSUSED*/
1184 static int
pf_dummy_cb(dev_info_t * dip,ddi_fm_error_t * derr,const void * not_used)1185 pf_dummy_cb(dev_info_t *dip, ddi_fm_error_t *derr, const void *not_used)
1186 {
1187 return (DDI_FM_OK);
1188 }
1189
1190 /*
1191 * Add PFD to queue. If it is an RC add it to the beginning,
1192 * otherwise add it to the end.
1193 */
1194 static void
pf_en_dq(pf_data_t * pfd_p,pf_impl_t * impl)1195 pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl)
1196 {
1197 pf_data_t *head_p = impl->pf_dq_head_p;
1198 pf_data_t *tail_p = impl->pf_dq_tail_p;
1199
1200 impl->pf_total++;
1201
1202 if (!head_p) {
1203 ASSERT(PFD_IS_ROOT(pfd_p));
1204 impl->pf_dq_head_p = pfd_p;
1205 impl->pf_dq_tail_p = pfd_p;
1206 pfd_p->pe_prev = NULL;
1207 pfd_p->pe_next = NULL;
1208 return;
1209 }
1210
1211 /* Check if this is a Root Port eprt */
1212 if (PFD_IS_ROOT(pfd_p)) {
1213 pf_data_t *root_p, *last_p = NULL;
1214
1215 /* The first item must be a RP */
1216 root_p = head_p;
1217 for (last_p = head_p; last_p && PFD_IS_ROOT(last_p);
1218 last_p = last_p->pe_next)
1219 root_p = last_p;
1220
1221 /* root_p is the last RP pfd. last_p is the first non-RP pfd. */
1222 root_p->pe_next = pfd_p;
1223 pfd_p->pe_prev = root_p;
1224 pfd_p->pe_next = last_p;
1225
1226 if (last_p)
1227 last_p->pe_prev = pfd_p;
1228 else
1229 tail_p = pfd_p;
1230 } else {
1231 tail_p->pe_next = pfd_p;
1232 pfd_p->pe_prev = tail_p;
1233 pfd_p->pe_next = NULL;
1234 tail_p = pfd_p;
1235 }
1236
1237 impl->pf_dq_head_p = head_p;
1238 impl->pf_dq_tail_p = tail_p;
1239 }
1240
1241 /*
1242 * Ignore:
1243 * - TRAINING: as leaves do not have children
1244 * - SD: as leaves do not have children
1245 */
1246 const pf_fab_err_tbl_t pcie_pcie_tbl[] = {
1247 {PCIE_AER_UCE_DLP, pf_panic,
1248 PF_AFFECTED_PARENT, 0},
1249
1250 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp,
1251 PF_AFFECTED_SELF, 0},
1252
1253 {PCIE_AER_UCE_FCP, pf_panic,
1254 PF_AFFECTED_PARENT, 0},
1255
1256 {PCIE_AER_UCE_TO, pf_analyse_to,
1257 PF_AFFECTED_SELF, 0},
1258
1259 {PCIE_AER_UCE_CA, pf_analyse_ca_ur,
1260 PF_AFFECTED_SELF, 0},
1261
1262 {PCIE_AER_UCE_UC, pf_analyse_uc,
1263 0, 0},
1264
1265 {PCIE_AER_UCE_RO, pf_panic,
1266 PF_AFFECTED_PARENT, 0},
1267
1268 {PCIE_AER_UCE_MTLP, pf_panic,
1269 PF_AFFECTED_PARENT, 0},
1270
1271 {PCIE_AER_UCE_ECRC, pf_no_panic,
1272 PF_AFFECTED_SELF, 0},
1273
1274 {PCIE_AER_UCE_UR, pf_analyse_ca_ur,
1275 PF_AFFECTED_SELF, 0},
1276
1277 {0, NULL, 0, 0}
1278 };
1279
1280 const pf_fab_err_tbl_t pcie_rp_tbl[] = {
1281 {PCIE_AER_UCE_TRAINING, pf_no_panic,
1282 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1283
1284 {PCIE_AER_UCE_DLP, pf_panic,
1285 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1286
1287 {PCIE_AER_UCE_SD, pf_no_panic,
1288 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1289
1290 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp,
1291 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1292
1293 {PCIE_AER_UCE_FCP, pf_panic,
1294 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1295
1296 {PCIE_AER_UCE_TO, pf_analyse_to,
1297 PF_AFFECTED_ADDR, PF_AFFECTED_CHILDREN},
1298
1299 {PCIE_AER_UCE_CA, pf_no_panic,
1300 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1301
1302 {PCIE_AER_UCE_UC, pf_analyse_uc,
1303 0, 0},
1304
1305 {PCIE_AER_UCE_RO, pf_panic,
1306 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1307
1308 {PCIE_AER_UCE_MTLP, pf_panic,
1309 PF_AFFECTED_SELF | PF_AFFECTED_AER,
1310 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1311
1312 {PCIE_AER_UCE_ECRC, pf_no_panic,
1313 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1314
1315 {PCIE_AER_UCE_UR, pf_no_panic,
1316 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1317
1318 {0, NULL, 0, 0}
1319 };
1320
1321 const pf_fab_err_tbl_t pcie_sw_tbl[] = {
1322 {PCIE_AER_UCE_TRAINING, pf_no_panic,
1323 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1324
1325 {PCIE_AER_UCE_DLP, pf_panic,
1326 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1327
1328 {PCIE_AER_UCE_SD, pf_no_panic,
1329 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1330
1331 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp,
1332 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1333
1334 {PCIE_AER_UCE_FCP, pf_panic,
1335 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1336
1337 {PCIE_AER_UCE_TO, pf_analyse_to,
1338 PF_AFFECTED_CHILDREN, 0},
1339
1340 {PCIE_AER_UCE_CA, pf_analyse_ca_ur,
1341 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1342
1343 {PCIE_AER_UCE_UC, pf_analyse_uc,
1344 0, 0},
1345
1346 {PCIE_AER_UCE_RO, pf_panic,
1347 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1348
1349 {PCIE_AER_UCE_MTLP, pf_panic,
1350 PF_AFFECTED_SELF | PF_AFFECTED_AER,
1351 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1352
1353 {PCIE_AER_UCE_ECRC, pf_no_panic,
1354 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1355
1356 {PCIE_AER_UCE_UR, pf_analyse_ca_ur,
1357 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1358
1359 {0, NULL, 0, 0}
1360 };
1361
1362 const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = {
1363 {PCIE_AER_SUCE_TA_ON_SC, pf_analyse_sc,
1364 0, 0},
1365
1366 {PCIE_AER_SUCE_MA_ON_SC, pf_analyse_sc,
1367 0, 0},
1368
1369 {PCIE_AER_SUCE_RCVD_TA, pf_analyse_ma_ta,
1370 0, 0},
1371
1372 {PCIE_AER_SUCE_RCVD_MA, pf_analyse_ma_ta,
1373 0, 0},
1374
1375 {PCIE_AER_SUCE_USC_ERR, pf_panic,
1376 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN},
1377
1378 {PCIE_AER_SUCE_USC_MSG_DATA_ERR, pf_analyse_ma_ta,
1379 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN},
1380
1381 {PCIE_AER_SUCE_UC_DATA_ERR, pf_analyse_uc_data,
1382 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN},
1383
1384 {PCIE_AER_SUCE_UC_ATTR_ERR, pf_panic,
1385 PF_AFFECTED_CHILDREN, 0},
1386
1387 {PCIE_AER_SUCE_UC_ADDR_ERR, pf_panic,
1388 PF_AFFECTED_CHILDREN, 0},
1389
1390 {PCIE_AER_SUCE_TIMER_EXPIRED, pf_panic,
1391 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1392
1393 {PCIE_AER_SUCE_PERR_ASSERT, pf_analyse_perr_assert,
1394 0, 0},
1395
1396 {PCIE_AER_SUCE_SERR_ASSERT, pf_no_panic,
1397 0, 0},
1398
1399 {PCIE_AER_SUCE_INTERNAL_ERR, pf_panic,
1400 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1401
1402 {0, NULL, 0, 0}
1403 };
1404
1405 const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = {
1406 {PCI_STAT_PERROR, pf_analyse_pci,
1407 PF_AFFECTED_SELF, 0},
1408
1409 {PCI_STAT_S_PERROR, pf_analyse_pci,
1410 PF_AFFECTED_SELF, 0},
1411
1412 {PCI_STAT_S_SYSERR, pf_panic,
1413 PF_AFFECTED_SELF, 0},
1414
1415 {PCI_STAT_R_MAST_AB, pf_analyse_pci,
1416 PF_AFFECTED_SELF, 0},
1417
1418 {PCI_STAT_R_TARG_AB, pf_analyse_pci,
1419 PF_AFFECTED_SELF, 0},
1420
1421 {PCI_STAT_S_TARG_AB, pf_analyse_pci,
1422 PF_AFFECTED_SELF, 0},
1423
1424 {0, NULL, 0, 0}
1425 };
1426
1427 const pf_fab_err_tbl_t pcie_pci_tbl[] = {
1428 {PCI_STAT_PERROR, pf_analyse_pci,
1429 PF_AFFECTED_SELF, 0},
1430
1431 {PCI_STAT_S_PERROR, pf_analyse_pci,
1432 PF_AFFECTED_SELF, 0},
1433
1434 {PCI_STAT_S_SYSERR, pf_panic,
1435 PF_AFFECTED_SELF, 0},
1436
1437 {PCI_STAT_R_MAST_AB, pf_analyse_pci,
1438 PF_AFFECTED_SELF, 0},
1439
1440 {PCI_STAT_R_TARG_AB, pf_analyse_pci,
1441 PF_AFFECTED_SELF, 0},
1442
1443 {PCI_STAT_S_TARG_AB, pf_analyse_pci,
1444 PF_AFFECTED_SELF, 0},
1445
1446 {0, NULL, 0, 0}
1447 };
1448
1449 #define PF_MASKED_AER_ERR(pfd_p) \
1450 (PCIE_ADV_REG(pfd_p)->pcie_ue_status & \
1451 ((PCIE_ADV_REG(pfd_p)->pcie_ue_mask) ^ 0xFFFFFFFF))
1452 #define PF_MASKED_SAER_ERR(pfd_p) \
1453 (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status & \
1454 ((PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask) ^ 0xFFFFFFFF))
1455 /*
1456 * Analyse all the PCIe Fault Data (erpt) gathered during dispatch in the erpt
1457 * Queue.
1458 */
1459 static int
pf_analyse_error(ddi_fm_error_t * derr,pf_impl_t * impl)1460 pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl)
1461 {
1462 int sts_flags, error_flags = 0;
1463 pf_data_t *pfd_p;
1464
1465 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
1466 sts_flags = 0;
1467
1468 /* skip analysing error when no error info is gathered */
1469 if (pfd_p->pe_severity_flags == PF_ERR_BAD_RESPONSE)
1470 goto done;
1471
1472 switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) {
1473 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV:
1474 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV:
1475 if (PCIE_DEVSTS_CE_DETECTED &
1476 PCIE_ERR_REG(pfd_p)->pcie_err_status)
1477 sts_flags |= PF_ERR_CE;
1478
1479 pf_adjust_for_no_aer(pfd_p);
1480 sts_flags |= pf_analyse_error_tbl(derr, impl,
1481 pfd_p, pcie_pcie_tbl, PF_MASKED_AER_ERR(pfd_p));
1482 break;
1483 case PCIE_PCIECAP_DEV_TYPE_ROOT:
1484 pf_adjust_for_no_aer(pfd_p);
1485 sts_flags |= pf_analyse_error_tbl(derr, impl,
1486 pfd_p, pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p));
1487 break;
1488 case PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO:
1489 /* no adjust_for_aer for pseudo RC */
1490 /* keep the severity passed on from RC if any */
1491 sts_flags |= pfd_p->pe_severity_flags;
1492 sts_flags |= pf_analyse_error_tbl(derr, impl, pfd_p,
1493 pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p));
1494 break;
1495 case PCIE_PCIECAP_DEV_TYPE_UP:
1496 case PCIE_PCIECAP_DEV_TYPE_DOWN:
1497 if (PCIE_DEVSTS_CE_DETECTED &
1498 PCIE_ERR_REG(pfd_p)->pcie_err_status)
1499 sts_flags |= PF_ERR_CE;
1500
1501 pf_adjust_for_no_aer(pfd_p);
1502 sts_flags |= pf_analyse_error_tbl(derr, impl,
1503 pfd_p, pcie_sw_tbl, PF_MASKED_AER_ERR(pfd_p));
1504 break;
1505 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI:
1506 if (PCIE_DEVSTS_CE_DETECTED &
1507 PCIE_ERR_REG(pfd_p)->pcie_err_status)
1508 sts_flags |= PF_ERR_CE;
1509
1510 pf_adjust_for_no_aer(pfd_p);
1511 pf_adjust_for_no_saer(pfd_p);
1512 sts_flags |= pf_analyse_error_tbl(derr,
1513 impl, pfd_p, pcie_pcie_tbl,
1514 PF_MASKED_AER_ERR(pfd_p));
1515 sts_flags |= pf_analyse_error_tbl(derr,
1516 impl, pfd_p, pcie_pcie_bdg_tbl,
1517 PF_MASKED_SAER_ERR(pfd_p));
1518 /*
1519 * Some non-compliant PCIe devices do not utilize PCIe
1520 * error registers. So fallthrough and rely on legacy
1521 * PCI error registers.
1522 */
1523 if ((PCIE_DEVSTS_NFE_DETECTED | PCIE_DEVSTS_FE_DETECTED)
1524 & PCIE_ERR_REG(pfd_p)->pcie_err_status)
1525 break;
1526 /* FALLTHROUGH */
1527 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO:
1528 sts_flags |= pf_analyse_error_tbl(derr, impl,
1529 pfd_p, pcie_pci_tbl,
1530 PCI_ERR_REG(pfd_p)->pci_err_status);
1531
1532 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p)))
1533 break;
1534
1535 sts_flags |= pf_analyse_error_tbl(derr,
1536 impl, pfd_p, pcie_pci_bdg_tbl,
1537 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat);
1538 }
1539
1540 pfd_p->pe_severity_flags = sts_flags;
1541
1542 done:
1543 pfd_p->pe_orig_severity_flags = pfd_p->pe_severity_flags;
1544 /* Have pciev_eh adjust the severity */
1545 pfd_p->pe_severity_flags = pciev_eh(pfd_p, impl);
1546
1547 pfd_p->pe_severity_flags &= ~pfd_p->pe_severity_mask;
1548
1549 error_flags |= pfd_p->pe_severity_flags;
1550 }
1551
1552 return (error_flags);
1553 }
1554
1555 static int
pf_analyse_error_tbl(ddi_fm_error_t * derr,pf_impl_t * impl,pf_data_t * pfd_p,const pf_fab_err_tbl_t * tbl,uint32_t err_reg)1556 pf_analyse_error_tbl(ddi_fm_error_t *derr, pf_impl_t *impl,
1557 pf_data_t *pfd_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg)
1558 {
1559 const pf_fab_err_tbl_t *row;
1560 int err = 0;
1561 uint16_t flags;
1562 uint32_t bit;
1563
1564 for (row = tbl; err_reg && (row->bit != 0); row++) {
1565 bit = row->bit;
1566 if (!(err_reg & bit))
1567 continue;
1568 err |= row->handler(derr, bit, impl->pf_dq_head_p, pfd_p);
1569
1570 flags = row->affected_flags;
1571 /*
1572 * check if the primary flag is valid;
1573 * if not, use the secondary flag
1574 */
1575 if (flags & PF_AFFECTED_AER) {
1576 if (!HAS_AER_LOGS(pfd_p, bit)) {
1577 flags = row->sec_affected_flags;
1578 }
1579 } else if (flags & PF_AFFECTED_SAER) {
1580 if (!HAS_SAER_LOGS(pfd_p, bit)) {
1581 flags = row->sec_affected_flags;
1582 }
1583 } else if (flags & PF_AFFECTED_ADDR) {
1584 /* only Root has this flag */
1585 if (PCIE_ROOT_FAULT(pfd_p)->scan_addr == 0) {
1586 flags = row->sec_affected_flags;
1587 }
1588 }
1589
1590 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags |= flags;
1591 }
1592
1593 if (!err)
1594 err = PF_ERR_NO_ERROR;
1595
1596 return (err);
1597 }
1598
1599 /*
1600 * PCIe Completer Abort and Unsupport Request error analyser. If a PCIe device
1601 * issues a CA/UR a corresponding Received CA/UR should have been seen in the
1602 * PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so then
1603 * this error may be safely ignored. If not check the logs and see if an
1604 * associated handler for this transaction can be found.
1605 */
1606 /* ARGSUSED */
1607 static int
pf_analyse_ca_ur(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1608 pf_analyse_ca_ur(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1609 pf_data_t *pfd_p)
1610 {
1611 uint32_t abort_type;
1612 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1613
1614 /* If UR's are masked forgive this error */
1615 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1616 (bit == PCIE_AER_UCE_UR))
1617 return (PF_ERR_NO_PANIC);
1618
1619 /*
1620 * If a RP has an CA/UR it means a leaf sent a bad request to the RP
1621 * such as a config read or a bad DMA address.
1622 */
1623 if (PCIE_IS_RP(PCIE_PFD2BUS(pfd_p)))
1624 goto handle_lookup;
1625
1626 if (bit == PCIE_AER_UCE_UR)
1627 abort_type = PCI_STAT_R_MAST_AB;
1628 else
1629 abort_type = PCI_STAT_R_TARG_AB;
1630
1631 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type))
1632 return (PF_ERR_MATCHED_RC);
1633
1634 handle_lookup:
1635 if (HAS_AER_LOGS(pfd_p, bit) &&
1636 pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == PF_HDL_FOUND)
1637 return (PF_ERR_MATCHED_DEVICE);
1638
1639 return (PF_ERR_PANIC);
1640 }
1641
1642 /*
1643 * PCIe-PCI Bridge Received Master Abort and Target error analyser. If a PCIe
1644 * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in
1645 * the PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so
1646 * then this error may be safely ignored. If not check the logs and see if an
1647 * associated handler for this transaction can be found.
1648 */
1649 /* ARGSUSED */
1650 static int
pf_analyse_ma_ta(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1651 pf_analyse_ma_ta(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1652 pf_data_t *pfd_p)
1653 {
1654 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1655 uint32_t abort_type;
1656
1657 /* If UR's are masked forgive this error */
1658 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1659 (bit == PCIE_AER_SUCE_RCVD_MA))
1660 return (PF_ERR_NO_PANIC);
1661
1662 if (bit == PCIE_AER_SUCE_RCVD_MA)
1663 abort_type = PCI_STAT_R_MAST_AB;
1664 else
1665 abort_type = PCI_STAT_R_TARG_AB;
1666
1667 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type))
1668 return (PF_ERR_MATCHED_RC);
1669
1670 if (!HAS_SAER_LOGS(pfd_p, bit))
1671 return (PF_ERR_PANIC);
1672
1673 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND)
1674 return (PF_ERR_MATCHED_DEVICE);
1675
1676 return (PF_ERR_PANIC);
1677 }
1678
1679 /*
1680 * Generic PCI error analyser. This function is used for Parity Errors,
1681 * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts.
1682 * In general PCI devices do not have error logs, it is very difficult to figure
1683 * out what transaction caused the error. Instead find the nearest PCIe-PCI
1684 * Bridge and check to see if it has logs and if it has an error associated with
1685 * this PCI Device.
1686 */
1687 /* ARGSUSED */
1688 static int
pf_analyse_pci(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1689 pf_analyse_pci(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1690 pf_data_t *pfd_p)
1691 {
1692 pf_data_t *parent_pfd_p;
1693 uint16_t cmd;
1694 uint32_t aer_ue_status;
1695 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
1696 pf_pcie_adv_bdg_err_regs_t *parent_saer_p;
1697
1698 if (PCI_ERR_REG(pfd_p)->pci_err_status & PCI_STAT_S_SYSERR)
1699 return (PF_ERR_PANIC);
1700
1701 /* If UR's are masked forgive this error */
1702 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1703 (bit == PCI_STAT_R_MAST_AB))
1704 return (PF_ERR_NO_PANIC);
1705
1706
1707 if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) {
1708 aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT;
1709 } else {
1710 aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC |
1711 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA |
1712 PCIE_AER_SUCE_RCVD_MA);
1713 }
1714
1715 parent_pfd_p = pf_get_parent_pcie_bridge(pfd_p);
1716 if (parent_pfd_p == NULL)
1717 return (PF_ERR_PANIC);
1718
1719 /* Check if parent bridge has seen this error */
1720 parent_saer_p = PCIE_ADV_BDG_REG(parent_pfd_p);
1721 if (!(parent_saer_p->pcie_sue_status & aer_ue_status) ||
1722 !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status))
1723 return (PF_ERR_PANIC);
1724
1725 /*
1726 * If the addr or bdf from the parent PCIe bridge logs belong to this
1727 * PCI device, assume the PCIe bridge's error handling has already taken
1728 * care of this PCI device's error.
1729 */
1730 if (pf_pci_decode(parent_pfd_p, &cmd) != DDI_SUCCESS)
1731 return (PF_ERR_PANIC);
1732
1733 if ((parent_saer_p->pcie_sue_tgt_bdf == bus_p->bus_bdf) ||
1734 pf_in_addr_range(bus_p, parent_saer_p->pcie_sue_tgt_addr))
1735 return (PF_ERR_MATCHED_PARENT);
1736
1737 /*
1738 * If this device is a PCI-PCI bridge, check if the bdf in the parent
1739 * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges.
1740 * If they are, then assume the PCIe bridge's error handling has already
1741 * taken care of this PCI-PCI bridge device's error.
1742 */
1743 if (PCIE_IS_BDG(bus_p) &&
1744 pf_in_bus_range(bus_p, parent_saer_p->pcie_sue_tgt_bdf))
1745 return (PF_ERR_MATCHED_PARENT);
1746
1747 return (PF_ERR_PANIC);
1748 }
1749
1750 /*
1751 * PCIe Bridge transactions associated with PERR.
1752 * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe
1753 * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe
1754 * o Bridge received a poisoned Completion on a Split Transction from PCIe
1755 * o Bridge received a poisoned Completion on a Delayed Transction from PCIe
1756 *
1757 * Check for non-poisoned PCIe transactions that got forwarded to the secondary
1758 * side and detects a PERR#. Except for delayed read completions, a poisoned
1759 * TLP will be forwarded to the secondary bus and PERR# will be asserted.
1760 */
1761 /* ARGSUSED */
1762 static int
pf_analyse_perr_assert(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1763 pf_analyse_perr_assert(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1764 pf_data_t *pfd_p)
1765 {
1766 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1767 uint16_t cmd;
1768 int hdl_sts = PF_HDL_NOTFOUND;
1769 int err = PF_ERR_NO_ERROR;
1770 pf_pcie_adv_bdg_err_regs_t *saer_p;
1771
1772
1773 if (HAS_SAER_LOGS(pfd_p, bit)) {
1774 saer_p = PCIE_ADV_BDG_REG(pfd_p);
1775 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS)
1776 return (PF_ERR_PANIC);
1777
1778 cmd_switch:
1779 switch (cmd) {
1780 case PCI_PCIX_CMD_IOWR:
1781 case PCI_PCIX_CMD_MEMWR:
1782 case PCI_PCIX_CMD_MEMWR_BL:
1783 case PCI_PCIX_CMD_MEMWRBL:
1784 /* Posted Writes Transactions */
1785 if (saer_p->pcie_sue_tgt_trans == PF_ADDR_PIO)
1786 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1787 B_FALSE);
1788 break;
1789 case PCI_PCIX_CMD_CFWR:
1790 /*
1791 * Check to see if it is a non-posted write. If so, a
1792 * UR Completion would have been sent.
1793 */
1794 if (pf_matched_in_rc(dq_head_p, pfd_p,
1795 PCI_STAT_R_MAST_AB)) {
1796 hdl_sts = PF_HDL_FOUND;
1797 err = PF_ERR_MATCHED_RC;
1798 goto done;
1799 }
1800 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1801 B_FALSE);
1802 break;
1803 case PCI_PCIX_CMD_SPL:
1804 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1805 B_FALSE);
1806 break;
1807 case PCI_PCIX_CMD_DADR:
1808 cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >>
1809 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) &
1810 PCIE_AER_SUCE_HDR_CMD_UP_MASK;
1811 if (cmd != PCI_PCIX_CMD_DADR)
1812 goto cmd_switch;
1813 /* FALLTHROUGH */
1814 default:
1815 /* Unexpected situation, panic */
1816 hdl_sts = PF_HDL_NOTFOUND;
1817 }
1818
1819 if (hdl_sts == PF_HDL_FOUND)
1820 err = PF_ERR_MATCHED_DEVICE;
1821 else
1822 err = PF_ERR_PANIC;
1823 } else {
1824 /*
1825 * Check to see if it is a non-posted write. If so, a UR
1826 * Completion would have been sent.
1827 */
1828 if ((PCIE_ERR_REG(pfd_p)->pcie_err_status &
1829 PCIE_DEVSTS_UR_DETECTED) &&
1830 pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_R_MAST_AB))
1831 err = PF_ERR_MATCHED_RC;
1832
1833 /* Check for posted writes. Transaction is lost. */
1834 if (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat &
1835 PCI_STAT_S_PERROR)
1836 err = PF_ERR_PANIC;
1837
1838 /*
1839 * All other scenarios are due to read completions. Check for
1840 * PERR on the primary side. If found the primary side error
1841 * handling will take care of this error.
1842 */
1843 if (err == PF_ERR_NO_ERROR) {
1844 if (PCI_ERR_REG(pfd_p)->pci_err_status &
1845 PCI_STAT_PERROR)
1846 err = PF_ERR_MATCHED_PARENT;
1847 else
1848 err = PF_ERR_PANIC;
1849 }
1850 }
1851
1852 done:
1853 return (err);
1854 }
1855
1856 /*
1857 * PCIe Poisoned TLP error analyser. If a PCIe device receives a Poisoned TLP,
1858 * check the logs and see if an associated handler for this transaction can be
1859 * found.
1860 */
1861 /* ARGSUSED */
1862 static int
pf_analyse_ptlp(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1863 pf_analyse_ptlp(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1864 pf_data_t *pfd_p)
1865 {
1866 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1867
1868 /*
1869 * If AERs are supported find the logs in this device, otherwise look in
1870 * it's parent's logs.
1871 */
1872 if (HAS_AER_LOGS(pfd_p, bit)) {
1873 pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&PCIE_ADV_HDR(pfd_p, 0);
1874
1875 /*
1876 * Double check that the log contains a poisoned TLP.
1877 * Some devices like PLX switch do not log poison TLP headers.
1878 */
1879 if (hdr->ep) {
1880 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) ==
1881 PF_HDL_FOUND)
1882 return (PF_ERR_MATCHED_DEVICE);
1883 }
1884
1885 /*
1886 * If an address is found and hdl lookup failed panic.
1887 * Otherwise check parents to see if there was enough
1888 * information recover.
1889 */
1890 if (PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr)
1891 return (PF_ERR_PANIC);
1892 }
1893
1894 /*
1895 * Check to see if the rc has already handled this error or a parent has
1896 * already handled this error.
1897 *
1898 * If the error info in the RC wasn't enough to find the fault device,
1899 * such as if the faulting device lies behind a PCIe-PCI bridge from a
1900 * poisoned completion, check to see if the PCIe-PCI bridge has enough
1901 * info to recover. For completion TLP's, the AER header logs only
1902 * contain the faulting BDF in the Root Port. For PCIe device the fault
1903 * BDF is the fault device. But if the fault device is behind a
1904 * PCIe-PCI bridge the fault BDF could turn out just to be a PCIe-PCI
1905 * bridge's secondary bus number.
1906 */
1907 if (!PFD_IS_ROOT(pfd_p)) {
1908 dev_info_t *pdip = ddi_get_parent(PCIE_PFD2DIP(pfd_p));
1909 pf_data_t *parent_pfd_p;
1910
1911 if (PCIE_PFD2BUS(pfd_p)->bus_rp_dip == pdip) {
1912 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR))
1913 return (PF_ERR_MATCHED_RC);
1914 }
1915
1916 parent_pfd_p = PCIE_DIP2PFD(pdip);
1917
1918 if (HAS_AER_LOGS(parent_pfd_p, bit))
1919 return (PF_ERR_MATCHED_PARENT);
1920 } else {
1921 pf_data_t *bdg_pfd_p;
1922 pcie_req_id_t secbus;
1923
1924 /*
1925 * Looking for a pcie bridge only makes sense if the BDF
1926 * Dev/Func = 0/0
1927 */
1928 if (!PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
1929 goto done;
1930
1931 secbus = PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf;
1932
1933 if (!PCIE_CHECK_VALID_BDF(secbus) || (secbus & 0xFF))
1934 goto done;
1935
1936 bdg_pfd_p = pf_get_pcie_bridge(pfd_p, secbus);
1937
1938 if (bdg_pfd_p && HAS_SAER_LOGS(bdg_pfd_p,
1939 PCIE_AER_SUCE_PERR_ASSERT)) {
1940 return pf_analyse_perr_assert(derr,
1941 PCIE_AER_SUCE_PERR_ASSERT, dq_head_p, pfd_p);
1942 }
1943 }
1944 done:
1945 return (PF_ERR_PANIC);
1946 }
1947
1948 /*
1949 * PCIe-PCI Bridge Received Master and Target abort error analyser on Split
1950 * Completions. If a PCIe Bridge receives a MA/TA check logs and see if an
1951 * associated handler for this transaction can be found.
1952 */
1953 /* ARGSUSED */
1954 static int
pf_analyse_sc(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1955 pf_analyse_sc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1956 pf_data_t *pfd_p)
1957 {
1958 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1959 uint16_t cmd;
1960 int sts = PF_HDL_NOTFOUND;
1961
1962 if (!HAS_SAER_LOGS(pfd_p, bit))
1963 return (PF_ERR_PANIC);
1964
1965 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS)
1966 return (PF_ERR_PANIC);
1967
1968 if (cmd == PCI_PCIX_CMD_SPL)
1969 sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE);
1970
1971 if (sts == PF_HDL_FOUND)
1972 return (PF_ERR_MATCHED_DEVICE);
1973
1974 return (PF_ERR_PANIC);
1975 }
1976
1977 /*
1978 * PCIe Timeout error analyser. This error can be forgiven if it is marked as
1979 * CE Advisory. If it is marked as advisory, this means the HW can recover
1980 * and/or retry the transaction automatically. Additionally, if a device's
1981 * parent slot reports that it is no longer physically present, we do not panic,
1982 * as one would not expect a missing device to respond to a command.
1983 */
1984 /* ARGSUSED */
1985 static int
pf_analyse_to(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1986 pf_analyse_to(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1987 pf_data_t *pfd_p)
1988 {
1989 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1990 pf_data_t *rppfd = PCIE_DIP2PFD(rpdip);
1991 pf_pcie_slot_regs_t *p_pcie_slot_regs;
1992
1993 if (HAS_AER_LOGS(pfd_p, bit) && CE_ADVISORY(pfd_p))
1994 return (PF_ERR_NO_PANIC);
1995
1996 p_pcie_slot_regs = PCIE_SLOT_REG(rppfd);
1997 if (p_pcie_slot_regs->pcie_slot_regs_valid) {
1998 /*
1999 * If the device is reported gone from its parent slot, then it
2000 * is expected that any outstanding commands would time out. In
2001 * this case, do not panic.
2002 */
2003 if ((p_pcie_slot_regs->pcie_slot_status &
2004 PCIE_SLOTSTS_PRESENCE_DETECTED) == 0x0) {
2005 return (PF_ERR_NO_PANIC);
2006 }
2007 }
2008
2009 return (PF_ERR_PANIC);
2010 }
2011
2012 /*
2013 * PCIe Unexpected Completion. Check to see if this TLP was misrouted by
2014 * matching the device BDF with the TLP Log. If misrouting panic, otherwise
2015 * don't panic.
2016 */
2017 /* ARGSUSED */
2018 static int
pf_analyse_uc(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2019 pf_analyse_uc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2020 pf_data_t *pfd_p)
2021 {
2022 if (HAS_AER_LOGS(pfd_p, bit) &&
2023 (PCIE_PFD2BUS(pfd_p)->bus_bdf == (PCIE_ADV_HDR(pfd_p, 2) >> 16)))
2024 return (PF_ERR_NO_PANIC);
2025
2026 /*
2027 * This is a case of mis-routing. Any of the switches above this
2028 * device could be at fault.
2029 */
2030 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_ROOT;
2031
2032 return (PF_ERR_PANIC);
2033 }
2034
2035 /*
2036 * PCIe-PCI Bridge Uncorrectable Data error analyser. All Uncorrectable Data
2037 * errors should have resulted in a PCIe Poisoned TLP to the RC, except for
2038 * Posted Writes. Check the logs for Posted Writes and if the RC did not see a
2039 * Poisoned TLP.
2040 *
2041 * Non-Posted Writes will also generate a UR in the completion status, which the
2042 * RC should also see.
2043 */
2044 /* ARGSUSED */
2045 static int
pf_analyse_uc_data(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2046 pf_analyse_uc_data(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2047 pf_data_t *pfd_p)
2048 {
2049 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2050
2051 if (!HAS_SAER_LOGS(pfd_p, bit))
2052 return (PF_ERR_PANIC);
2053
2054 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR))
2055 return (PF_ERR_MATCHED_RC);
2056
2057 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND)
2058 return (PF_ERR_MATCHED_DEVICE);
2059
2060 return (PF_ERR_PANIC);
2061 }
2062
2063 /* ARGSUSED */
2064 static int
pf_no_panic(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2065 pf_no_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2066 pf_data_t *pfd_p)
2067 {
2068 return (PF_ERR_NO_PANIC);
2069 }
2070
2071 /* ARGSUSED */
2072 static int
pf_panic(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2073 pf_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2074 pf_data_t *pfd_p)
2075 {
2076 return (PF_ERR_PANIC);
2077 }
2078
2079 /*
2080 * If a PCIe device does not support AER, assume all AER statuses have been set,
2081 * unless other registers do not indicate a certain error occuring.
2082 */
2083 static void
pf_adjust_for_no_aer(pf_data_t * pfd_p)2084 pf_adjust_for_no_aer(pf_data_t *pfd_p)
2085 {
2086 uint32_t aer_ue = 0;
2087 uint16_t status;
2088
2089 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
2090 return;
2091
2092 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED)
2093 aer_ue = PF_AER_FATAL_ERR;
2094
2095 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) {
2096 aer_ue = PF_AER_NON_FATAL_ERR;
2097 status = PCI_ERR_REG(pfd_p)->pci_err_status;
2098
2099 /* Check if the device received a PTLP */
2100 if (!(status & PCI_STAT_PERROR))
2101 aer_ue &= ~PCIE_AER_UCE_PTLP;
2102
2103 /* Check if the device signaled a CA */
2104 if (!(status & PCI_STAT_S_TARG_AB))
2105 aer_ue &= ~PCIE_AER_UCE_CA;
2106
2107 /* Check if the device sent a UR */
2108 if (!(PCIE_ERR_REG(pfd_p)->pcie_err_status &
2109 PCIE_DEVSTS_UR_DETECTED))
2110 aer_ue &= ~PCIE_AER_UCE_UR;
2111
2112 /*
2113 * Ignore ECRCs as it is optional and will manefest itself as
2114 * another error like PTLP and MFP
2115 */
2116 aer_ue &= ~PCIE_AER_UCE_ECRC;
2117
2118 /*
2119 * Generally if NFE is set, SERR should also be set. Exception:
2120 * When certain non-fatal errors are masked, and some of them
2121 * happened to be the cause of the NFE, SERR will not be set and
2122 * they can not be the source of this interrupt.
2123 *
2124 * On x86, URs are masked (NFE + UR can be set), if any other
2125 * non-fatal errors (i.e, PTLP, CTO, CA, UC, ECRC, ACS) did
2126 * occur, SERR should be set since they are not masked. So if
2127 * SERR is not set, none of them occurred.
2128 */
2129 if (!(status & PCI_STAT_S_SYSERR))
2130 aer_ue &= ~PCIE_AER_UCE_TO;
2131 }
2132
2133 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) {
2134 aer_ue &= ~PCIE_AER_UCE_TRAINING;
2135 aer_ue &= ~PCIE_AER_UCE_SD;
2136 }
2137
2138 PCIE_ADV_REG(pfd_p)->pcie_ue_status = aer_ue;
2139 }
2140
2141 static void
pf_adjust_for_no_saer(pf_data_t * pfd_p)2142 pf_adjust_for_no_saer(pf_data_t *pfd_p)
2143 {
2144 uint32_t s_aer_ue = 0;
2145 uint16_t status;
2146
2147 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
2148 return;
2149
2150 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED)
2151 s_aer_ue = PF_SAER_FATAL_ERR;
2152
2153 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) {
2154 s_aer_ue = PF_SAER_NON_FATAL_ERR;
2155 status = PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat;
2156
2157 /* Check if the device received a UC_DATA */
2158 if (!(status & PCI_STAT_PERROR))
2159 s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR;
2160
2161 /* Check if the device received a RCVD_MA/MA_ON_SC */
2162 if (!(status & (PCI_STAT_R_MAST_AB))) {
2163 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA;
2164 s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC;
2165 }
2166
2167 /* Check if the device received a RCVD_TA/TA_ON_SC */
2168 if (!(status & (PCI_STAT_R_TARG_AB))) {
2169 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA;
2170 s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC;
2171 }
2172 }
2173
2174 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status = s_aer_ue;
2175 }
2176
2177 /* Find the PCIe-PCI bridge based on secondary bus number */
2178 static pf_data_t *
pf_get_pcie_bridge(pf_data_t * pfd_p,pcie_req_id_t secbus)2179 pf_get_pcie_bridge(pf_data_t *pfd_p, pcie_req_id_t secbus)
2180 {
2181 pf_data_t *bdg_pfd_p;
2182
2183 /* Search down for the PCIe-PCI device. */
2184 for (bdg_pfd_p = pfd_p->pe_next; bdg_pfd_p;
2185 bdg_pfd_p = bdg_pfd_p->pe_next) {
2186 if (PCIE_IS_PCIE_BDG(PCIE_PFD2BUS(bdg_pfd_p)) &&
2187 PCIE_PFD2BUS(bdg_pfd_p)->bus_bdg_secbus == secbus)
2188 return (bdg_pfd_p);
2189 }
2190
2191 return (NULL);
2192 }
2193
2194 /* Find the PCIe-PCI bridge of a PCI device */
2195 static pf_data_t *
pf_get_parent_pcie_bridge(pf_data_t * pfd_p)2196 pf_get_parent_pcie_bridge(pf_data_t *pfd_p)
2197 {
2198 dev_info_t *dip, *rp_dip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2199
2200 /* This only makes sense if the device is a PCI device */
2201 if (!PCIE_IS_PCI(PCIE_PFD2BUS(pfd_p)))
2202 return (NULL);
2203
2204 /*
2205 * Search up for the PCIe-PCI device. Watchout for x86 where pci
2206 * devices hang directly off of NPE.
2207 */
2208 for (dip = PCIE_PFD2DIP(pfd_p); dip; dip = ddi_get_parent(dip)) {
2209 if (dip == rp_dip)
2210 dip = NULL;
2211
2212 if (PCIE_IS_PCIE_BDG(PCIE_DIP2BUS(dip)))
2213 return (PCIE_DIP2PFD(dip));
2214 }
2215
2216 return (NULL);
2217 }
2218
2219 /*
2220 * See if a leaf error was bubbled up to the Root Complex (RC) and handled.
2221 * As of right now only RC's have enough information to have errors found in the
2222 * fabric to be matched to the RC. Note that Root Port's (RP) do not carry
2223 * enough information. Currently known RC's are SPARC Fire architecture and
2224 * it's equivalents, and x86's NPE.
2225 * SPARC Fire architectures have a plethora of error registers, while currently
2226 * NPE only have the address of a failed load.
2227 *
2228 * Check if the RC logged an error with the appropriate status type/abort type.
2229 * Ex: Parity Error, Received Master/Target Abort
2230 * Check if either the fault address found in the rc matches the device's
2231 * assigned address range (PIO's only) or the fault BDF in the rc matches the
2232 * device's BDF or Secondary Bus/Bus Range.
2233 */
2234 static boolean_t
pf_matched_in_rc(pf_data_t * dq_head_p,pf_data_t * pfd_p,uint32_t abort_type)2235 pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p,
2236 uint32_t abort_type)
2237 {
2238 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
2239 pf_data_t *rc_pfd_p;
2240 pcie_req_id_t fault_bdf;
2241
2242 for (rc_pfd_p = dq_head_p; PFD_IS_ROOT(rc_pfd_p);
2243 rc_pfd_p = rc_pfd_p->pe_next) {
2244 /* Only root complex's have enough information to match */
2245 if (!PCIE_IS_RC(PCIE_PFD2BUS(rc_pfd_p)))
2246 continue;
2247
2248 /* If device and rc abort type does not match continue */
2249 if (!(PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & abort_type))
2250 continue;
2251
2252 fault_bdf = PCIE_ROOT_FAULT(rc_pfd_p)->scan_bdf;
2253
2254 /* The Fault BDF = Device's BDF */
2255 if (fault_bdf == bus_p->bus_bdf)
2256 return (B_TRUE);
2257
2258 /* The Fault Addr is in device's address range */
2259 if (pf_in_addr_range(bus_p,
2260 PCIE_ROOT_FAULT(rc_pfd_p)->scan_addr))
2261 return (B_TRUE);
2262
2263 /* The Fault BDF is from PCIe-PCI Bridge's secondary bus */
2264 if (PCIE_IS_PCIE_BDG(bus_p) &&
2265 pf_in_bus_range(bus_p, fault_bdf))
2266 return (B_TRUE);
2267 }
2268
2269 return (B_FALSE);
2270 }
2271
2272 /*
2273 * Check the RP and see if the error is PIO/DMA. If the RP also has a PERR then
2274 * it is a DMA, otherwise it's a PIO
2275 */
2276 static void
pf_pci_find_trans_type(pf_data_t * pfd_p,uint64_t * addr,uint32_t * trans_type,pcie_req_id_t * bdf)2277 pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type,
2278 pcie_req_id_t *bdf)
2279 {
2280 pf_data_t *rc_pfd_p;
2281
2282 /* Could be DMA or PIO. Find out by look at error type. */
2283 switch (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status) {
2284 case PCIE_AER_SUCE_TA_ON_SC:
2285 case PCIE_AER_SUCE_MA_ON_SC:
2286 *trans_type = PF_ADDR_DMA;
2287 return;
2288 case PCIE_AER_SUCE_RCVD_TA:
2289 case PCIE_AER_SUCE_RCVD_MA:
2290 *bdf = PCIE_INVALID_BDF;
2291 *trans_type = PF_ADDR_PIO;
2292 return;
2293 case PCIE_AER_SUCE_USC_ERR:
2294 case PCIE_AER_SUCE_UC_DATA_ERR:
2295 case PCIE_AER_SUCE_PERR_ASSERT:
2296 break;
2297 default:
2298 *addr = 0;
2299 *bdf = PCIE_INVALID_BDF;
2300 *trans_type = 0;
2301 return;
2302 }
2303
2304 *bdf = PCIE_INVALID_BDF;
2305 *trans_type = PF_ADDR_PIO;
2306 for (rc_pfd_p = pfd_p->pe_prev; rc_pfd_p;
2307 rc_pfd_p = rc_pfd_p->pe_prev) {
2308 if (PFD_IS_ROOT(rc_pfd_p) &&
2309 (PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat &
2310 PCI_STAT_PERROR)) {
2311 *trans_type = PF_ADDR_DMA;
2312 return;
2313 }
2314 }
2315 }
2316
2317 /*
2318 * pf_pci_decode function decodes the secondary aer transaction logs in
2319 * PCIe-PCI bridges.
2320 *
2321 * The log is 128 bits long and arranged in this manner.
2322 * [0:35] Transaction Attribute (s_aer_h0-saer_h1)
2323 * [36:39] Transaction lower command (saer_h1)
2324 * [40:43] Transaction upper command (saer_h1)
2325 * [44:63] Reserved
2326 * [64:127] Address (saer_h2-saer_h3)
2327 */
2328 /* ARGSUSED */
2329 int
pf_pci_decode(pf_data_t * pfd_p,uint16_t * cmd)2330 pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd)
2331 {
2332 pcix_attr_t *attr;
2333 uint64_t addr;
2334 uint32_t trans_type;
2335 pcie_req_id_t bdf = PCIE_INVALID_BDF;
2336
2337 attr = (pcix_attr_t *)&PCIE_ADV_BDG_HDR(pfd_p, 0);
2338 *cmd = GET_SAER_CMD(pfd_p);
2339
2340 cmd_switch:
2341 switch (*cmd) {
2342 case PCI_PCIX_CMD_IORD:
2343 case PCI_PCIX_CMD_IOWR:
2344 /* IO Access should always be down stream */
2345 addr = PCIE_ADV_BDG_HDR(pfd_p, 2);
2346 bdf = attr->rid;
2347 trans_type = PF_ADDR_PIO;
2348 break;
2349 case PCI_PCIX_CMD_MEMRD_DW:
2350 case PCI_PCIX_CMD_MEMRD_BL:
2351 case PCI_PCIX_CMD_MEMRDBL:
2352 case PCI_PCIX_CMD_MEMWR:
2353 case PCI_PCIX_CMD_MEMWR_BL:
2354 case PCI_PCIX_CMD_MEMWRBL:
2355 addr = ((uint64_t)PCIE_ADV_BDG_HDR(pfd_p, 3) <<
2356 PCIE_AER_SUCE_HDR_ADDR_SHIFT) | PCIE_ADV_BDG_HDR(pfd_p, 2);
2357 bdf = attr->rid;
2358
2359 pf_pci_find_trans_type(pfd_p, &addr, &trans_type, &bdf);
2360 break;
2361 case PCI_PCIX_CMD_CFRD:
2362 case PCI_PCIX_CMD_CFWR:
2363 /*
2364 * CFG Access should always be down stream. Match the BDF in
2365 * the address phase.
2366 */
2367 addr = 0;
2368 bdf = attr->rid;
2369 trans_type = PF_ADDR_CFG;
2370 break;
2371 case PCI_PCIX_CMD_SPL:
2372 /*
2373 * Check for DMA read completions. The requesting BDF is in the
2374 * Address phase.
2375 */
2376 addr = 0;
2377 bdf = attr->rid;
2378 trans_type = PF_ADDR_DMA;
2379 break;
2380 case PCI_PCIX_CMD_DADR:
2381 /*
2382 * For Dual Address Cycles the transaction command is in the 2nd
2383 * address phase.
2384 */
2385 *cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >>
2386 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) &
2387 PCIE_AER_SUCE_HDR_CMD_UP_MASK;
2388 if (*cmd != PCI_PCIX_CMD_DADR)
2389 goto cmd_switch;
2390 /* FALLTHROUGH */
2391 default:
2392 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0;
2393 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = PCIE_INVALID_BDF;
2394 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0;
2395 return (DDI_FAILURE);
2396 }
2397 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = trans_type;
2398 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = bdf;
2399 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = addr;
2400 return (DDI_SUCCESS);
2401 }
2402
2403 /*
2404 * Based on either the BDF/ADDR find and mark the faulting DMA/ACC handler.
2405 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
2406 */
2407 int
pf_hdl_lookup(dev_info_t * dip,uint64_t ena,uint32_t flag,uint64_t addr,pcie_req_id_t bdf)2408 pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint64_t addr,
2409 pcie_req_id_t bdf)
2410 {
2411 ddi_fm_error_t derr;
2412
2413 /* If we don't know the addr or rid just return with NOTFOUND */
2414 if ((addr == 0) && !PCIE_CHECK_VALID_BDF(bdf))
2415 return (PF_HDL_NOTFOUND);
2416
2417 /*
2418 * Disable DMA handle lookup until DMA errors can be handled and
2419 * reported synchronously. When enabled again, check for the
2420 * PF_ADDR_DMA flag
2421 */
2422 if (!(flag & (PF_ADDR_PIO | PF_ADDR_CFG))) {
2423 return (PF_HDL_NOTFOUND);
2424 }
2425
2426 bzero(&derr, sizeof (ddi_fm_error_t));
2427 derr.fme_version = DDI_FME_VERSION;
2428 derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
2429 derr.fme_ena = ena;
2430
2431 return (pf_hdl_child_lookup(dip, &derr, flag, addr, bdf));
2432 }
2433
2434 static int
pf_hdl_child_lookup(dev_info_t * dip,ddi_fm_error_t * derr,uint32_t flag,uint64_t addr,pcie_req_id_t bdf)2435 pf_hdl_child_lookup(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
2436 uint64_t addr, pcie_req_id_t bdf)
2437 {
2438 int status = PF_HDL_NOTFOUND;
2439 ndi_fmc_t *fcp = NULL;
2440 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2441 pcie_req_id_t dip_bdf;
2442 boolean_t have_lock = B_FALSE;
2443 pcie_bus_t *bus_p;
2444 dev_info_t *cdip;
2445
2446 if (!(bus_p = pf_is_ready(dip))) {
2447 return (status);
2448 }
2449
2450 ASSERT(fmhdl);
2451 if (!i_ddi_fm_handler_owned(dip)) {
2452 /*
2453 * pf_handler_enter always returns SUCCESS if the 'impl' arg is
2454 * NULL.
2455 */
2456 (void) pf_handler_enter(dip, NULL);
2457 have_lock = B_TRUE;
2458 }
2459
2460 dip_bdf = PCI_GET_BDF(dip);
2461
2462 /* Check if dip and BDF match, if not recurse to it's children. */
2463 if (!PCIE_IS_RC(bus_p) && (!PCIE_CHECK_VALID_BDF(bdf) ||
2464 dip_bdf == bdf)) {
2465 if ((flag & PF_ADDR_DMA) && DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap))
2466 fcp = fmhdl->fh_dma_cache;
2467 else
2468 fcp = NULL;
2469
2470 if (fcp)
2471 status = pf_hdl_compare(dip, derr, DMA_HANDLE, addr,
2472 bdf, fcp);
2473
2474
2475 if (((flag & PF_ADDR_PIO) || (flag & PF_ADDR_CFG)) &&
2476 DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap))
2477 fcp = fmhdl->fh_acc_cache;
2478 else
2479 fcp = NULL;
2480
2481 if (fcp)
2482 status = pf_hdl_compare(dip, derr, ACC_HANDLE, addr,
2483 bdf, fcp);
2484 }
2485
2486 /* If we found the handler or know it's this device, we're done */
2487 if (!PCIE_IS_RC(bus_p) && ((dip_bdf == bdf) ||
2488 (status == PF_HDL_FOUND)))
2489 goto done;
2490
2491 /*
2492 * If the current devuce us a PCIe-PCI bridge need to check for special
2493 * cases:
2494 *
2495 * If it is a PIO and we don't have an address or this is a DMA, check
2496 * to see if the BDF = secondary bus. If so stop. The BDF isn't a real
2497 * BDF and the fault device could have come from any device in the PCI
2498 * bus.
2499 */
2500 if (PCIE_IS_PCIE_BDG(bus_p) &&
2501 ((flag & PF_ADDR_DMA || flag & PF_ADDR_PIO)) &&
2502 ((bus_p->bus_bdg_secbus << PCIE_REQ_ID_BUS_SHIFT) == bdf))
2503 goto done;
2504
2505
2506 /* If we can't find the handler check it's children */
2507 for (cdip = ddi_get_child(dip); cdip;
2508 cdip = ddi_get_next_sibling(cdip)) {
2509 if ((bus_p = PCIE_DIP2BUS(cdip)) == NULL)
2510 continue;
2511
2512 if (pf_in_bus_range(bus_p, bdf) ||
2513 pf_in_addr_range(bus_p, addr))
2514 status = pf_hdl_child_lookup(cdip, derr, flag, addr,
2515 bdf);
2516
2517 if (status == PF_HDL_FOUND)
2518 goto done;
2519 }
2520
2521 done:
2522 if (have_lock == B_TRUE)
2523 pf_handler_exit(dip);
2524
2525 return (status);
2526 }
2527
2528 static int
pf_hdl_compare(dev_info_t * dip,ddi_fm_error_t * derr,uint32_t flag,uint64_t addr,pcie_req_id_t bdf,ndi_fmc_t * fcp)2529 pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
2530 uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp)
2531 {
2532 ndi_fmcentry_t *fep;
2533 int found = 0;
2534 int status;
2535
2536 mutex_enter(&fcp->fc_lock);
2537 for (fep = fcp->fc_head; fep != NULL; fep = fep->fce_next) {
2538 ddi_fmcompare_t compare_func;
2539
2540 /*
2541 * Compare captured error state with handle
2542 * resources. During the comparison and
2543 * subsequent error handling, we block
2544 * attempts to free the cache entry.
2545 */
2546 if (flag == ACC_HANDLE) {
2547 compare_func =
2548 i_ddi_fm_acc_err_cf_get((ddi_acc_handle_t)
2549 fep->fce_resource);
2550 } else {
2551 compare_func =
2552 i_ddi_fm_dma_err_cf_get((ddi_dma_handle_t)
2553 fep->fce_resource);
2554 }
2555
2556 if (compare_func == NULL) /* unbound or not FLAGERR */
2557 continue;
2558
2559 status = compare_func(dip, fep->fce_resource,
2560 (void *)&addr, (void *)&bdf);
2561
2562 if (status == DDI_FM_NONFATAL) {
2563 found++;
2564
2565 /* Set the error for this resource handle */
2566 if (flag == ACC_HANDLE) {
2567 ddi_acc_handle_t ap = fep->fce_resource;
2568
2569 i_ddi_fm_acc_err_set(ap, derr->fme_ena, status,
2570 DDI_FM_ERR_UNEXPECTED);
2571 ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION);
2572 derr->fme_acc_handle = ap;
2573 } else {
2574 ddi_dma_handle_t dp = fep->fce_resource;
2575
2576 i_ddi_fm_dma_err_set(dp, derr->fme_ena, status,
2577 DDI_FM_ERR_UNEXPECTED);
2578 ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION);
2579 derr->fme_dma_handle = dp;
2580 }
2581 }
2582 }
2583 mutex_exit(&fcp->fc_lock);
2584
2585 /*
2586 * If a handler isn't found and we know this is the right device mark
2587 * them all failed.
2588 */
2589 if ((addr != 0) && PCIE_CHECK_VALID_BDF(bdf) && (found == 0)) {
2590 status = pf_hdl_compare(dip, derr, flag, addr, bdf, fcp);
2591 if (status == PF_HDL_FOUND)
2592 found++;
2593 }
2594
2595 return ((found) ? PF_HDL_FOUND : PF_HDL_NOTFOUND);
2596 }
2597
2598 /*
2599 * Automatically decode AER header logs and does a handling look up based on the
2600 * AER header decoding.
2601 *
2602 * For this function only the Primary/Secondary AER Header Logs need to be valid
2603 * in the pfd (PCIe Fault Data) arg.
2604 *
2605 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
2606 */
2607 /* ARGSUSED */
2608 static int
pf_log_hdl_lookup(dev_info_t * rpdip,ddi_fm_error_t * derr,pf_data_t * pfd_p,boolean_t is_primary)2609 pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p,
2610 boolean_t is_primary)
2611 {
2612 /*
2613 * Disabling this function temporarily until errors can be handled
2614 * synchronously.
2615 *
2616 * This function is currently only called during the middle of a fabric
2617 * scan. If the fabric scan is called synchronously with an error seen
2618 * in the RP/RC, then the related errors in the fabric will have a
2619 * PF_ERR_MATCHED_RC error severity. pf_log_hdl_lookup code will be by
2620 * passed when the severity is PF_ERR_MATCHED_RC. Handle lookup would
2621 * have already happened in RP/RC error handling in a synchronous
2622 * manner. Errors unrelated should panic, because they are being
2623 * handled asynchronously.
2624 *
2625 * If fabric scan is called asynchronously from any RP/RC error, then
2626 * DMA/PIO UE errors seen in the fabric should panic. pf_lop_hdl_lookup
2627 * will return PF_HDL_NOTFOUND to ensure that the system panics.
2628 */
2629 return (PF_HDL_NOTFOUND);
2630 }
2631
2632 /*
2633 * Decodes the TLP and returns the BDF of the handler, address and transaction
2634 * type if known.
2635 *
2636 * Types of TLP logs seen in RC, and what to extract:
2637 *
2638 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
2639 * Memory(PIO) - address, PF_PIO_ADDR
2640 * CFG - Should not occur and result in UR
2641 * Completion(DMA) - Requester BDF, PF_DMA_ADDR
2642 * Completion(PIO) - Requester BDF, PF_PIO_ADDR
2643 *
2644 * Types of TLP logs seen in SW/Leaf, and what to extract:
2645 *
2646 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
2647 * Memory(PIO) - address, PF_PIO_ADDR
2648 * CFG - Destined BDF, address, PF_CFG_ADDR
2649 * Completion(DMA) - Requester BDF, PF_DMA_ADDR
2650 * Completion(PIO) - Requester BDF, PF_PIO_ADDR
2651 *
2652 * The adv_reg_p must be passed in separately for use with SPARC RPs. A
2653 * SPARC RP could have multiple AER header logs which cannot be directly
2654 * accessed via the bus_p.
2655 */
2656 int
pf_tlp_decode(pcie_bus_t * bus_p,pf_pcie_adv_err_regs_t * adv_reg_p)2657 pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p)
2658 {
2659 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr;
2660 pcie_req_id_t my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF;
2661 uint64_t flt_addr = 0;
2662 uint32_t flt_trans_type = 0;
2663
2664 adv_reg_p->pcie_ue_tgt_addr = 0;
2665 adv_reg_p->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
2666 adv_reg_p->pcie_ue_tgt_trans = 0;
2667
2668 my_bdf = bus_p->bus_bdf;
2669 switch (tlp_hdr->type) {
2670 case PCIE_TLP_TYPE_IO:
2671 case PCIE_TLP_TYPE_MEM:
2672 case PCIE_TLP_TYPE_MEMLK:
2673 /* Grab the 32/64bit fault address */
2674 if (tlp_hdr->fmt & 0x1) {
2675 flt_addr = ((uint64_t)adv_reg_p->pcie_ue_hdr[2] << 32);
2676 flt_addr |= adv_reg_p->pcie_ue_hdr[3];
2677 } else {
2678 flt_addr = adv_reg_p->pcie_ue_hdr[2];
2679 }
2680
2681 tlp_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[1] >> 16);
2682
2683 /*
2684 * If the req bdf >= this.bdf, then it means the request is this
2685 * device or came from a device below it. Unless this device is
2686 * a PCIe root port then it means is a DMA, otherwise PIO.
2687 */
2688 if ((tlp_bdf >= my_bdf) && !PCIE_IS_ROOT(bus_p)) {
2689 flt_trans_type = PF_ADDR_DMA;
2690 flt_bdf = tlp_bdf;
2691 } else if (PCIE_IS_ROOT(bus_p) &&
2692 (PF_FIRST_AER_ERR(PCIE_AER_UCE_PTLP, adv_reg_p) ||
2693 (PF_FIRST_AER_ERR(PCIE_AER_UCE_CA, adv_reg_p)))) {
2694 flt_trans_type = PF_ADDR_DMA;
2695 flt_bdf = tlp_bdf;
2696 } else {
2697 flt_trans_type = PF_ADDR_PIO;
2698 flt_bdf = PCIE_INVALID_BDF;
2699 }
2700 break;
2701 case PCIE_TLP_TYPE_CFG0:
2702 case PCIE_TLP_TYPE_CFG1:
2703 flt_addr = 0;
2704 flt_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[2] >> 16);
2705 flt_trans_type = PF_ADDR_CFG;
2706 break;
2707 case PCIE_TLP_TYPE_CPL:
2708 case PCIE_TLP_TYPE_CPLLK:
2709 {
2710 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)&adv_reg_p->pcie_ue_hdr[1];
2711
2712 flt_addr = 0;
2713 flt_bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid :
2714 cpl_tlp->cid;
2715
2716 /*
2717 * If the cpl bdf < this.bdf, then it means the request is this
2718 * device or came from a device below it. Unless this device is
2719 * a PCIe root port then it means is a DMA, otherwise PIO.
2720 */
2721 if (cpl_tlp->rid > cpl_tlp->cid) {
2722 flt_trans_type = PF_ADDR_DMA;
2723 } else {
2724 flt_trans_type = PF_ADDR_PIO | PF_ADDR_CFG;
2725 }
2726 break;
2727 }
2728 default:
2729 return (DDI_FAILURE);
2730 }
2731
2732 adv_reg_p->pcie_ue_tgt_addr = flt_addr;
2733 adv_reg_p->pcie_ue_tgt_bdf = flt_bdf;
2734 adv_reg_p->pcie_ue_tgt_trans = flt_trans_type;
2735
2736 return (DDI_SUCCESS);
2737 }
2738
2739 #define PCIE_EREPORT DDI_IO_CLASS "." PCI_ERROR_SUBCLASS "." PCIEX_FABRIC
2740 static int
pf_ereport_setup(dev_info_t * dip,uint64_t ena,nvlist_t ** ereport,nvlist_t ** detector,errorq_elem_t ** eqep)2741 pf_ereport_setup(dev_info_t *dip, uint64_t ena, nvlist_t **ereport,
2742 nvlist_t **detector, errorq_elem_t **eqep)
2743 {
2744 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2745 char device_path[MAXPATHLEN];
2746 nv_alloc_t *nva;
2747
2748 *eqep = errorq_reserve(fmhdl->fh_errorq);
2749 if (*eqep == NULL) {
2750 atomic_inc_64(&fmhdl->fh_kstat.fek_erpt_dropped.value.ui64);
2751 return (DDI_FAILURE);
2752 }
2753
2754 *ereport = errorq_elem_nvl(fmhdl->fh_errorq, *eqep);
2755 nva = errorq_elem_nva(fmhdl->fh_errorq, *eqep);
2756
2757 ASSERT(*ereport);
2758 ASSERT(nva);
2759
2760 /*
2761 * Use the dev_path/devid for this device instance.
2762 */
2763 *detector = fm_nvlist_create(nva);
2764 if (dip == ddi_root_node()) {
2765 device_path[0] = '/';
2766 device_path[1] = '\0';
2767 } else {
2768 (void) ddi_pathname(dip, device_path);
2769 }
2770
2771 fm_fmri_dev_set(*detector, FM_DEV_SCHEME_VERSION, NULL,
2772 device_path, NULL, NULL);
2773
2774 if (ena == 0)
2775 ena = fm_ena_generate(0, FM_ENA_FMT1);
2776
2777 fm_ereport_set(*ereport, 0, PCIE_EREPORT, ena, *detector, NULL);
2778
2779 return (DDI_SUCCESS);
2780 }
2781
2782 /* ARGSUSED */
2783 static void
pf_ereport_post(dev_info_t * dip,nvlist_t ** ereport,nvlist_t ** detector,errorq_elem_t ** eqep)2784 pf_ereport_post(dev_info_t *dip, nvlist_t **ereport, nvlist_t **detector,
2785 errorq_elem_t **eqep)
2786 {
2787 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2788
2789 errorq_commit(fmhdl->fh_errorq, *eqep, ERRORQ_ASYNC);
2790 }
2791
2792 static void
pf_send_ereport(ddi_fm_error_t * derr,pf_impl_t * impl)2793 pf_send_ereport(ddi_fm_error_t *derr, pf_impl_t *impl)
2794 {
2795 nvlist_t *ereport;
2796 nvlist_t *detector;
2797 errorq_elem_t *eqep;
2798 pcie_bus_t *bus_p;
2799 pf_data_t *pfd_p;
2800 uint32_t total = impl->pf_total;
2801
2802 /*
2803 * Ereports need to be sent in a top down fashion. The fabric translator
2804 * expects the ereports from the Root first. This is needed to tell if
2805 * the system contains a PCIe complaint RC/RP.
2806 */
2807 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
2808 bus_p = PCIE_PFD2BUS(pfd_p);
2809 pfd_p->pe_valid = B_FALSE;
2810
2811 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED ||
2812 !DDI_FM_EREPORT_CAP(ddi_fm_capable(PCIE_PFD2DIP(pfd_p))))
2813 continue;
2814
2815 if (pf_ereport_setup(PCIE_BUS2DIP(bus_p), derr->fme_ena,
2816 &ereport, &detector, &eqep) != DDI_SUCCESS)
2817 continue;
2818
2819 if (PFD_IS_RC(pfd_p)) {
2820 fm_payload_set(ereport,
2821 "scan_bdf", DATA_TYPE_UINT16,
2822 PCIE_ROOT_FAULT(pfd_p)->scan_bdf,
2823 "scan_addr", DATA_TYPE_UINT64,
2824 PCIE_ROOT_FAULT(pfd_p)->scan_addr,
2825 "intr_src", DATA_TYPE_UINT16,
2826 PCIE_ROOT_EH_SRC(pfd_p)->intr_type,
2827 NULL);
2828 goto generic;
2829 }
2830
2831 /* Generic PCI device information */
2832 fm_payload_set(ereport,
2833 "bdf", DATA_TYPE_UINT16, bus_p->bus_bdf,
2834 "device_id", DATA_TYPE_UINT16,
2835 (bus_p->bus_dev_ven_id >> 16),
2836 "vendor_id", DATA_TYPE_UINT16,
2837 (bus_p->bus_dev_ven_id & 0xFFFF),
2838 "rev_id", DATA_TYPE_UINT8, bus_p->bus_rev_id,
2839 "dev_type", DATA_TYPE_UINT16, bus_p->bus_dev_type,
2840 "pcie_off", DATA_TYPE_UINT16, bus_p->bus_pcie_off,
2841 "pcix_off", DATA_TYPE_UINT16, bus_p->bus_pcix_off,
2842 "aer_off", DATA_TYPE_UINT16, bus_p->bus_aer_off,
2843 "ecc_ver", DATA_TYPE_UINT16, bus_p->bus_ecc_ver,
2844 NULL);
2845
2846 /* PCI registers */
2847 fm_payload_set(ereport,
2848 "pci_status", DATA_TYPE_UINT16,
2849 PCI_ERR_REG(pfd_p)->pci_err_status,
2850 "pci_command", DATA_TYPE_UINT16,
2851 PCI_ERR_REG(pfd_p)->pci_cfg_comm,
2852 NULL);
2853
2854 /* PCI bridge registers */
2855 if (PCIE_IS_BDG(bus_p)) {
2856 fm_payload_set(ereport,
2857 "pci_bdg_sec_status", DATA_TYPE_UINT16,
2858 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat,
2859 "pci_bdg_ctrl", DATA_TYPE_UINT16,
2860 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_ctrl,
2861 NULL);
2862 }
2863
2864 /* PCIx registers */
2865 if (PCIE_IS_PCIX(bus_p) && !PCIE_IS_BDG(bus_p)) {
2866 fm_payload_set(ereport,
2867 "pcix_status", DATA_TYPE_UINT32,
2868 PCIX_ERR_REG(pfd_p)->pcix_status,
2869 "pcix_command", DATA_TYPE_UINT16,
2870 PCIX_ERR_REG(pfd_p)->pcix_command,
2871 NULL);
2872 }
2873
2874 /* PCIx ECC Registers */
2875 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
2876 pf_pcix_ecc_regs_t *ecc_bdg_reg;
2877 pf_pcix_ecc_regs_t *ecc_reg;
2878
2879 if (PCIE_IS_BDG(bus_p))
2880 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 0);
2881 ecc_reg = PCIX_ECC_REG(pfd_p);
2882 fm_payload_set(ereport,
2883 "pcix_ecc_control_0", DATA_TYPE_UINT16,
2884 PCIE_IS_BDG(bus_p) ?
2885 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16) :
2886 (ecc_reg->pcix_ecc_ctlstat >> 16),
2887 "pcix_ecc_status_0", DATA_TYPE_UINT16,
2888 PCIE_IS_BDG(bus_p) ?
2889 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF) :
2890 (ecc_reg->pcix_ecc_ctlstat & 0xFFFF),
2891 "pcix_ecc_fst_addr_0", DATA_TYPE_UINT32,
2892 PCIE_IS_BDG(bus_p) ?
2893 ecc_bdg_reg->pcix_ecc_fstaddr :
2894 ecc_reg->pcix_ecc_fstaddr,
2895 "pcix_ecc_sec_addr_0", DATA_TYPE_UINT32,
2896 PCIE_IS_BDG(bus_p) ?
2897 ecc_bdg_reg->pcix_ecc_secaddr :
2898 ecc_reg->pcix_ecc_secaddr,
2899 "pcix_ecc_attr_0", DATA_TYPE_UINT32,
2900 PCIE_IS_BDG(bus_p) ?
2901 ecc_bdg_reg->pcix_ecc_attr :
2902 ecc_reg->pcix_ecc_attr,
2903 NULL);
2904 }
2905
2906 /* PCIx ECC Bridge Registers */
2907 if (PCIX_ECC_VERSION_CHECK(bus_p) && PCIE_IS_BDG(bus_p)) {
2908 pf_pcix_ecc_regs_t *ecc_bdg_reg;
2909
2910 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 1);
2911 fm_payload_set(ereport,
2912 "pcix_ecc_control_1", DATA_TYPE_UINT16,
2913 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16),
2914 "pcix_ecc_status_1", DATA_TYPE_UINT16,
2915 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF),
2916 "pcix_ecc_fst_addr_1", DATA_TYPE_UINT32,
2917 ecc_bdg_reg->pcix_ecc_fstaddr,
2918 "pcix_ecc_sec_addr_1", DATA_TYPE_UINT32,
2919 ecc_bdg_reg->pcix_ecc_secaddr,
2920 "pcix_ecc_attr_1", DATA_TYPE_UINT32,
2921 ecc_bdg_reg->pcix_ecc_attr,
2922 NULL);
2923 }
2924
2925 /* PCIx Bridge */
2926 if (PCIE_IS_PCIX(bus_p) && PCIE_IS_BDG(bus_p)) {
2927 fm_payload_set(ereport,
2928 "pcix_bdg_status", DATA_TYPE_UINT32,
2929 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat,
2930 "pcix_bdg_sec_status", DATA_TYPE_UINT16,
2931 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat,
2932 NULL);
2933 }
2934
2935 /* PCIe registers */
2936 if (PCIE_IS_PCIE(bus_p)) {
2937 fm_payload_set(ereport,
2938 "pcie_status", DATA_TYPE_UINT16,
2939 PCIE_ERR_REG(pfd_p)->pcie_err_status,
2940 "pcie_command", DATA_TYPE_UINT16,
2941 PCIE_ERR_REG(pfd_p)->pcie_err_ctl,
2942 "pcie_dev_cap", DATA_TYPE_UINT32,
2943 PCIE_ERR_REG(pfd_p)->pcie_dev_cap,
2944 NULL);
2945 }
2946
2947 /* PCIe AER registers */
2948 if (PCIE_HAS_AER(bus_p)) {
2949 fm_payload_set(ereport,
2950 "pcie_adv_ctl", DATA_TYPE_UINT32,
2951 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl,
2952 "pcie_ue_status", DATA_TYPE_UINT32,
2953 PCIE_ADV_REG(pfd_p)->pcie_ue_status,
2954 "pcie_ue_mask", DATA_TYPE_UINT32,
2955 PCIE_ADV_REG(pfd_p)->pcie_ue_mask,
2956 "pcie_ue_sev", DATA_TYPE_UINT32,
2957 PCIE_ADV_REG(pfd_p)->pcie_ue_sev,
2958 "pcie_ue_hdr0", DATA_TYPE_UINT32,
2959 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[0],
2960 "pcie_ue_hdr1", DATA_TYPE_UINT32,
2961 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[1],
2962 "pcie_ue_hdr2", DATA_TYPE_UINT32,
2963 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[2],
2964 "pcie_ue_hdr3", DATA_TYPE_UINT32,
2965 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[3],
2966 "pcie_ce_status", DATA_TYPE_UINT32,
2967 PCIE_ADV_REG(pfd_p)->pcie_ce_status,
2968 "pcie_ce_mask", DATA_TYPE_UINT32,
2969 PCIE_ADV_REG(pfd_p)->pcie_ce_mask,
2970 NULL);
2971 }
2972
2973 /* PCIe AER decoded header */
2974 if (HAS_AER_LOGS(pfd_p, PCIE_ADV_REG(pfd_p)->pcie_ue_status)) {
2975 fm_payload_set(ereport,
2976 "pcie_ue_tgt_trans", DATA_TYPE_UINT32,
2977 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans,
2978 "pcie_ue_tgt_addr", DATA_TYPE_UINT64,
2979 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr,
2980 "pcie_ue_tgt_bdf", DATA_TYPE_UINT16,
2981 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf,
2982 NULL);
2983 /* Clear these values as they no longer valid */
2984 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0;
2985 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0;
2986 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
2987 }
2988
2989 /* PCIe BDG AER registers */
2990 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_HAS_AER(bus_p)) {
2991 fm_payload_set(ereport,
2992 "pcie_sue_adv_ctl", DATA_TYPE_UINT32,
2993 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_ctl,
2994 "pcie_sue_status", DATA_TYPE_UINT32,
2995 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status,
2996 "pcie_sue_mask", DATA_TYPE_UINT32,
2997 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask,
2998 "pcie_sue_sev", DATA_TYPE_UINT32,
2999 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_sev,
3000 "pcie_sue_hdr0", DATA_TYPE_UINT32,
3001 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[0],
3002 "pcie_sue_hdr1", DATA_TYPE_UINT32,
3003 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[1],
3004 "pcie_sue_hdr2", DATA_TYPE_UINT32,
3005 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[2],
3006 "pcie_sue_hdr3", DATA_TYPE_UINT32,
3007 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[3],
3008 NULL);
3009 }
3010
3011 /* PCIe BDG AER decoded header */
3012 if (PCIE_IS_PCIE_BDG(bus_p) && HAS_SAER_LOGS(pfd_p,
3013 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status)) {
3014 fm_payload_set(ereport,
3015 "pcie_sue_tgt_trans", DATA_TYPE_UINT32,
3016 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans,
3017 "pcie_sue_tgt_addr", DATA_TYPE_UINT64,
3018 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr,
3019 "pcie_sue_tgt_bdf", DATA_TYPE_UINT16,
3020 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf,
3021 NULL);
3022 /* Clear these values as they no longer valid */
3023 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0;
3024 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0;
3025 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf =
3026 PCIE_INVALID_BDF;
3027 }
3028
3029 /* PCIe RP registers */
3030 if (PCIE_IS_RP(bus_p)) {
3031 fm_payload_set(ereport,
3032 "pcie_rp_status", DATA_TYPE_UINT32,
3033 PCIE_RP_REG(pfd_p)->pcie_rp_status,
3034 "pcie_rp_control", DATA_TYPE_UINT16,
3035 PCIE_RP_REG(pfd_p)->pcie_rp_ctl,
3036 NULL);
3037 }
3038
3039 /* PCIe RP AER registers */
3040 if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) {
3041 fm_payload_set(ereport,
3042 "pcie_adv_rp_status", DATA_TYPE_UINT32,
3043 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_status,
3044 "pcie_adv_rp_command", DATA_TYPE_UINT32,
3045 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_cmd,
3046 "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16,
3047 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id,
3048 "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16,
3049 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id,
3050 NULL);
3051 }
3052
3053 /*
3054 * Slot Status registers
3055 *
3056 * Since we only gather these for certain types of components,
3057 * only put these registers into the ereport if we have valid
3058 * data.
3059 */
3060 if (PCIE_SLOT_REG(pfd_p)->pcie_slot_regs_valid) {
3061 fm_payload_set(ereport,
3062 "pcie_slot_cap", DATA_TYPE_UINT32,
3063 PCIE_SLOT_REG(pfd_p)->pcie_slot_cap,
3064 "pcie_slot_control", DATA_TYPE_UINT16,
3065 PCIE_SLOT_REG(pfd_p)->pcie_slot_control,
3066 "pcie_slot_status", DATA_TYPE_UINT16,
3067 PCIE_SLOT_REG(pfd_p)->pcie_slot_status,
3068 NULL);
3069 }
3070
3071 generic:
3072 /* IOV related information */
3073 if (!PCIE_BDG_IS_UNASSIGNED(PCIE_PFD2BUS(impl->pf_dq_head_p))) {
3074 fm_payload_set(ereport,
3075 "pcie_aff_flags", DATA_TYPE_UINT16,
3076 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags,
3077 "pcie_aff_bdf", DATA_TYPE_UINT16,
3078 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf,
3079 "orig_sev", DATA_TYPE_UINT32,
3080 pfd_p->pe_orig_severity_flags,
3081 NULL);
3082 }
3083
3084 /* Misc ereport information */
3085 fm_payload_set(ereport,
3086 "remainder", DATA_TYPE_UINT32, --total,
3087 "severity", DATA_TYPE_UINT32, pfd_p->pe_severity_flags,
3088 NULL);
3089
3090 pf_ereport_post(PCIE_BUS2DIP(bus_p), &ereport, &detector,
3091 &eqep);
3092 }
3093
3094 pf_dq_unlock_chain(impl);
3095 }
3096
3097 /*
3098 * pf_handler_enter must be called to serial access to each device's pf_data_t.
3099 * Once error handling is finished with the device call pf_handler_exit to allow
3100 * other threads to access it. The same thread may call pf_handler_enter
3101 * several times without any consequences.
3102 *
3103 * The "impl" variable is passed in during scan fabric to double check that
3104 * there is not a recursive algorithm and to ensure only one thread is doing a
3105 * fabric scan at all times.
3106 *
3107 * In some cases "impl" is not available, such as "child lookup" being called
3108 * from outside of scan fabric, just pass in NULL for this variable and this
3109 * extra check will be skipped.
3110 */
3111 static int
pf_handler_enter(dev_info_t * dip,pf_impl_t * impl)3112 pf_handler_enter(dev_info_t *dip, pf_impl_t *impl)
3113 {
3114 pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
3115
3116 ASSERT(pfd_p);
3117
3118 /*
3119 * Check to see if the lock has already been taken by this
3120 * thread. If so just return and don't take lock again.
3121 */
3122 if (!pfd_p->pe_lock || !impl) {
3123 i_ddi_fm_handler_enter(dip);
3124 pfd_p->pe_lock = B_TRUE;
3125 return (PF_SCAN_SUCCESS);
3126 }
3127
3128 /* Check to see that this dip is already in the "impl" error queue */
3129 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
3130 if (PCIE_PFD2DIP(pfd_p) == dip) {
3131 return (PF_SCAN_SUCCESS);
3132 }
3133 }
3134
3135 return (PF_SCAN_DEADLOCK);
3136 }
3137
3138 static void
pf_handler_exit(dev_info_t * dip)3139 pf_handler_exit(dev_info_t *dip)
3140 {
3141 pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
3142
3143 ASSERT(pfd_p);
3144
3145 ASSERT(pfd_p->pe_lock == B_TRUE);
3146 i_ddi_fm_handler_exit(dip);
3147 pfd_p->pe_lock = B_FALSE;
3148 }
3149
3150 /*
3151 * This function calls the driver's callback function (if it's FMA hardened
3152 * and callback capable). This function relies on the current thread already
3153 * owning the driver's fmhdl lock.
3154 */
3155 static int
pf_fm_callback(dev_info_t * dip,ddi_fm_error_t * derr)3156 pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr)
3157 {
3158 int cb_sts = DDI_FM_OK;
3159
3160 if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) {
3161 dev_info_t *pdip = ddi_get_parent(dip);
3162 struct i_ddi_fmhdl *hdl = DEVI(pdip)->devi_fmhdl;
3163 struct i_ddi_fmtgt *tgt = hdl->fh_tgts;
3164 struct i_ddi_errhdl *errhdl;
3165 while (tgt != NULL) {
3166 if (dip == tgt->ft_dip) {
3167 errhdl = tgt->ft_errhdl;
3168 cb_sts = errhdl->eh_func(dip, derr,
3169 errhdl->eh_impl);
3170 break;
3171 }
3172 tgt = tgt->ft_next;
3173 }
3174 }
3175 return (cb_sts);
3176 }
3177
3178 static void
pf_reset_pfd(pf_data_t * pfd_p)3179 pf_reset_pfd(pf_data_t *pfd_p)
3180 {
3181 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
3182
3183 pfd_p->pe_severity_flags = 0;
3184 pfd_p->pe_severity_mask = 0;
3185 pfd_p->pe_orig_severity_flags = 0;
3186 /* pe_lock and pe_valid were reset in pf_send_ereport */
3187
3188 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0;
3189 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
3190
3191 if (PCIE_IS_ROOT(bus_p)) {
3192 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
3193 PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0;
3194 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_FALSE;
3195 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE;
3196 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL;
3197 }
3198
3199 if (PCIE_IS_BDG(bus_p)) {
3200 bzero(PCI_BDG_ERR_REG(pfd_p), sizeof (pf_pci_bdg_err_regs_t));
3201 }
3202
3203 PCI_ERR_REG(pfd_p)->pci_err_status = 0;
3204 PCI_ERR_REG(pfd_p)->pci_cfg_comm = 0;
3205
3206 if (PCIE_IS_PCIE(bus_p)) {
3207 if (PCIE_IS_ROOT(bus_p)) {
3208 bzero(PCIE_RP_REG(pfd_p),
3209 sizeof (pf_pcie_rp_err_regs_t));
3210 bzero(PCIE_ADV_RP_REG(pfd_p),
3211 sizeof (pf_pcie_adv_rp_err_regs_t));
3212 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id =
3213 PCIE_INVALID_BDF;
3214 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id =
3215 PCIE_INVALID_BDF;
3216 } else if (PCIE_IS_PCIE_BDG(bus_p)) {
3217 bzero(PCIE_ADV_BDG_REG(pfd_p),
3218 sizeof (pf_pcie_adv_bdg_err_regs_t));
3219 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf =
3220 PCIE_INVALID_BDF;
3221 }
3222
3223 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) {
3224 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
3225 bzero(PCIX_BDG_ECC_REG(pfd_p, 0),
3226 sizeof (pf_pcix_ecc_regs_t));
3227 bzero(PCIX_BDG_ECC_REG(pfd_p, 1),
3228 sizeof (pf_pcix_ecc_regs_t));
3229 }
3230 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0;
3231 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0;
3232 }
3233
3234 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl = 0;
3235 PCIE_ADV_REG(pfd_p)->pcie_ue_status = 0;
3236 PCIE_ADV_REG(pfd_p)->pcie_ue_mask = 0;
3237 PCIE_ADV_REG(pfd_p)->pcie_ue_sev = 0;
3238 PCIE_ADV_HDR(pfd_p, 0) = 0;
3239 PCIE_ADV_HDR(pfd_p, 1) = 0;
3240 PCIE_ADV_HDR(pfd_p, 2) = 0;
3241 PCIE_ADV_HDR(pfd_p, 3) = 0;
3242 PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0;
3243 PCIE_ADV_REG(pfd_p)->pcie_ce_mask = 0;
3244 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0;
3245 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0;
3246 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
3247
3248 PCIE_ERR_REG(pfd_p)->pcie_err_status = 0;
3249 PCIE_ERR_REG(pfd_p)->pcie_err_ctl = 0;
3250 PCIE_ERR_REG(pfd_p)->pcie_dev_cap = 0;
3251
3252 } else if (PCIE_IS_PCIX(bus_p)) {
3253 if (PCIE_IS_BDG(bus_p)) {
3254 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
3255 bzero(PCIX_BDG_ECC_REG(pfd_p, 0),
3256 sizeof (pf_pcix_ecc_regs_t));
3257 bzero(PCIX_BDG_ECC_REG(pfd_p, 1),
3258 sizeof (pf_pcix_ecc_regs_t));
3259 }
3260 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0;
3261 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0;
3262 } else {
3263 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
3264 bzero(PCIX_ECC_REG(pfd_p),
3265 sizeof (pf_pcix_ecc_regs_t));
3266 }
3267 PCIX_ERR_REG(pfd_p)->pcix_command = 0;
3268 PCIX_ERR_REG(pfd_p)->pcix_status = 0;
3269 }
3270 }
3271
3272 pfd_p->pe_prev = NULL;
3273 pfd_p->pe_next = NULL;
3274 pfd_p->pe_rber_fatal = B_FALSE;
3275 }
3276
3277 pcie_bus_t *
pf_find_busp_by_bdf(pf_impl_t * impl,pcie_req_id_t bdf)3278 pf_find_busp_by_bdf(pf_impl_t *impl, pcie_req_id_t bdf)
3279 {
3280 pcie_bus_t *temp_bus_p;
3281 pf_data_t *temp_pfd_p;
3282
3283 for (temp_pfd_p = impl->pf_dq_head_p;
3284 temp_pfd_p;
3285 temp_pfd_p = temp_pfd_p->pe_next) {
3286 temp_bus_p = PCIE_PFD2BUS(temp_pfd_p);
3287
3288 if (bdf == temp_bus_p->bus_bdf) {
3289 return (temp_bus_p);
3290 }
3291 }
3292
3293 return (NULL);
3294 }
3295
3296 pcie_bus_t *
pf_find_busp_by_addr(pf_impl_t * impl,uint64_t addr)3297 pf_find_busp_by_addr(pf_impl_t *impl, uint64_t addr)
3298 {
3299 pcie_bus_t *temp_bus_p;
3300 pf_data_t *temp_pfd_p;
3301
3302 for (temp_pfd_p = impl->pf_dq_head_p;
3303 temp_pfd_p;
3304 temp_pfd_p = temp_pfd_p->pe_next) {
3305 temp_bus_p = PCIE_PFD2BUS(temp_pfd_p);
3306
3307 if (pf_in_assigned_addr(temp_bus_p, addr)) {
3308 return (temp_bus_p);
3309 }
3310 }
3311
3312 return (NULL);
3313 }
3314
3315 pcie_bus_t *
pf_find_busp_by_aer(pf_impl_t * impl,pf_data_t * pfd_p)3316 pf_find_busp_by_aer(pf_impl_t *impl, pf_data_t *pfd_p)
3317 {
3318 pf_pcie_adv_err_regs_t *reg_p = PCIE_ADV_REG(pfd_p);
3319 pcie_bus_t *temp_bus_p = NULL;
3320 pcie_req_id_t bdf;
3321 uint64_t addr;
3322 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)reg_p->pcie_ue_hdr;
3323 uint32_t trans_type = reg_p->pcie_ue_tgt_trans;
3324
3325 if ((tlp_hdr->type == PCIE_TLP_TYPE_CPL) ||
3326 (tlp_hdr->type == PCIE_TLP_TYPE_CPLLK)) {
3327 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)®_p->pcie_ue_hdr[1];
3328
3329 bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid :
3330 cpl_tlp->cid;
3331 temp_bus_p = pf_find_busp_by_bdf(impl, bdf);
3332 } else if (trans_type == PF_ADDR_PIO) {
3333 addr = reg_p->pcie_ue_tgt_addr;
3334 temp_bus_p = pf_find_busp_by_addr(impl, addr);
3335 } else {
3336 /* PF_ADDR_DMA type */
3337 bdf = reg_p->pcie_ue_tgt_bdf;
3338 temp_bus_p = pf_find_busp_by_bdf(impl, bdf);
3339 }
3340
3341 return (temp_bus_p);
3342 }
3343
3344 pcie_bus_t *
pf_find_busp_by_saer(pf_impl_t * impl,pf_data_t * pfd_p)3345 pf_find_busp_by_saer(pf_impl_t *impl, pf_data_t *pfd_p)
3346 {
3347 pf_pcie_adv_bdg_err_regs_t *reg_p = PCIE_ADV_BDG_REG(pfd_p);
3348 pcie_bus_t *temp_bus_p = NULL;
3349 pcie_req_id_t bdf;
3350 uint64_t addr;
3351
3352 addr = reg_p->pcie_sue_tgt_addr;
3353 bdf = reg_p->pcie_sue_tgt_bdf;
3354
3355 if (addr != 0) {
3356 temp_bus_p = pf_find_busp_by_addr(impl, addr);
3357 } else if (PCIE_CHECK_VALID_BDF(bdf)) {
3358 temp_bus_p = pf_find_busp_by_bdf(impl, bdf);
3359 }
3360
3361 return (temp_bus_p);
3362 }
3363