1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Joyent, Inc.
24 * Copyright 2026 Oxide Computer Company
25 */
26
27 #include <sys/sysmacros.h>
28 #include <sys/types.h>
29 #include <sys/kmem.h>
30 #include <sys/modctl.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/sunndi.h>
34 #include <sys/fm/protocol.h>
35 #include <sys/fm/util.h>
36 #include <sys/fm/io/ddi.h>
37 #include <sys/fm/io/pci.h>
38 #include <sys/promif.h>
39 #include <sys/disp.h>
40 #include <sys/atomic.h>
41 #include <sys/pcie.h>
42 #include <sys/pci_cap.h>
43 #include <sys/pcie_impl.h>
44
45 #define PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \
46 PCIE_DEVSTS_CE_DETECTED)
47
48 #define PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \
49 PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR)
50
51 #define PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\
52 PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP)
53 #define PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \
54 PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR)
55
56 #define PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \
57 PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \
58 PCIE_AER_SUCE_SERR_ASSERT)
59 #define PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \
60 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \
61 PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \
62 PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \
63 PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR)
64
65 #define PF_PCI_PARITY_ERR (PCI_STAT_S_PERROR | PCI_STAT_PERROR)
66
67 #define PF_FIRST_AER_ERR(bit, adv) \
68 (bit & (1 << (adv->pcie_adv_ctl & PCIE_AER_CTL_FST_ERR_PTR_MASK)))
69
70 #define HAS_AER_LOGS(pfd_p, bit) \
71 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \
72 PF_FIRST_AER_ERR(bit, PCIE_ADV_REG(pfd_p)))
73
74 #define PF_FIRST_SAER_ERR(bit, adv) \
75 (bit & (1 << (adv->pcie_sue_ctl & PCIE_AER_SCTL_FST_ERR_PTR_MASK)))
76
77 #define HAS_SAER_LOGS(pfd_p, bit) \
78 (PCIE_HAS_AER(pfd_p->pe_bus_p) && \
79 PF_FIRST_SAER_ERR(bit, PCIE_ADV_BDG_REG(pfd_p)))
80
81 #define GET_SAER_CMD(pfd_p) \
82 ((PCIE_ADV_BDG_HDR(pfd_p, 1) >> \
83 PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & PCIE_AER_SUCE_HDR_CMD_LWR_MASK)
84
85 #define CE_ADVISORY(pfd_p) \
86 (PCIE_ADV_REG(pfd_p)->pcie_ce_status & PCIE_AER_CE_AD_NFE)
87
88 /* PCIe Fault Fabric Error analysis table */
89 typedef struct pf_fab_err_tbl {
90 uint32_t bit; /* Error bit */
91 int (*handler)(); /* Error handling fuction */
92 uint16_t affected_flags; /* Primary affected flag */
93 /*
94 * Secondary affected flag, effective when the information
95 * indicated by the primary flag is not available, eg.
96 * PF_AFFECTED_AER/SAER/ADDR
97 */
98 uint16_t sec_affected_flags;
99 } pf_fab_err_tbl_t;
100
101 static pcie_bus_t *pf_is_ready(dev_info_t *);
102 /* Functions for scanning errors */
103 static int pf_default_hdl(dev_info_t *, pf_impl_t *);
104 static int pf_dispatch(dev_info_t *, pf_impl_t *, boolean_t);
105 static boolean_t pf_in_addr_range(pcie_bus_t *, uint64_t);
106
107 /* Functions for gathering errors */
108 static void pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs,
109 pcie_bus_t *bus_p, boolean_t bdg);
110 static void pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
111 static void pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
112 static void pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p);
113 static int pf_dummy_cb(dev_info_t *, ddi_fm_error_t *, const void *);
114 static void pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl_p);
115
116 /* Functions for analysing errors */
117 static int pf_analyse_error(ddi_fm_error_t *, pf_impl_t *);
118 static void pf_adjust_for_no_aer(pf_data_t *);
119 static void pf_adjust_for_no_saer(pf_data_t *);
120 static pf_data_t *pf_get_pcie_bridge(pf_data_t *, pcie_req_id_t);
121 static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *);
122 static boolean_t pf_matched_in_rc(pf_data_t *, pf_data_t *,
123 uint32_t);
124 static int pf_analyse_error_tbl(ddi_fm_error_t *, pf_impl_t *,
125 pf_data_t *, const pf_fab_err_tbl_t *, uint32_t);
126 static int pf_analyse_ca_ur(ddi_fm_error_t *, uint32_t,
127 pf_data_t *, pf_data_t *);
128 static int pf_analyse_ma_ta(ddi_fm_error_t *, uint32_t,
129 pf_data_t *, pf_data_t *);
130 static int pf_analyse_pci(ddi_fm_error_t *, uint32_t,
131 pf_data_t *, pf_data_t *);
132 static int pf_analyse_perr_assert(ddi_fm_error_t *, uint32_t,
133 pf_data_t *, pf_data_t *);
134 static int pf_analyse_ptlp(ddi_fm_error_t *, uint32_t,
135 pf_data_t *, pf_data_t *);
136 static int pf_analyse_sc(ddi_fm_error_t *, uint32_t,
137 pf_data_t *, pf_data_t *);
138 static int pf_analyse_to(ddi_fm_error_t *, uint32_t,
139 pf_data_t *, pf_data_t *);
140 static int pf_analyse_uc(ddi_fm_error_t *, uint32_t,
141 pf_data_t *, pf_data_t *);
142 static int pf_analyse_uc_data(ddi_fm_error_t *, uint32_t,
143 pf_data_t *, pf_data_t *);
144 static int pf_no_panic(ddi_fm_error_t *, uint32_t,
145 pf_data_t *, pf_data_t *);
146 static int pf_panic(ddi_fm_error_t *, uint32_t,
147 pf_data_t *, pf_data_t *);
148 static void pf_send_ereport(ddi_fm_error_t *, pf_impl_t *);
149 static int pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr);
150
151 /* PCIe Fabric Handle Lookup Support Functions. */
152 static int pf_hdl_child_lookup(dev_info_t *, ddi_fm_error_t *, uint32_t,
153 uint64_t, pcie_req_id_t);
154 static int pf_hdl_compare(dev_info_t *, ddi_fm_error_t *, uint32_t, uint64_t,
155 pcie_req_id_t, ndi_fmc_t *);
156 static int pf_log_hdl_lookup(dev_info_t *, ddi_fm_error_t *, pf_data_t *,
157 boolean_t);
158
159 static int pf_handler_enter(dev_info_t *, pf_impl_t *);
160 static void pf_handler_exit(dev_info_t *);
161 static void pf_reset_pfd(pf_data_t *);
162
163 boolean_t pcie_full_scan = B_FALSE; /* Force to always do a full scan */
164 int pcie_disable_scan = 0; /* Disable fabric scan */
165
166 /*
167 * Cache of pf_impl_t that triggered a fatal error. This is stored before
168 * the system will likely panic. The pf_impl_t contains the scan results
169 * including all error data queues that led to the fatal error. The primary
170 * purpose of this cache is post-mortem debugging of such fatal PCIe errors.
171 * The cached data will be present and valid in crash dumps taken immediately
172 * after fatal error detection.
173 *
174 * This cache stores a shallow copy of the pf_impl_t structure which contains
175 * pointers to other structures. The validity of these pointers in a crash dump
176 * depends on their allocation:
177 *
178 * 1. pf_dq_head_p/pf_dq_tail_p (pf_data_t chain):
179 * These point to heap-allocated pf_data_t structures that live in each
180 * device's pcie_bus_t->bus_pfd. These are long-lived structures that
181 * persist for the lifetime of the device node in the device tree.
182 *
183 * 2. pf_fault (pf_root_fault_t):
184 * Points to heap-allocated structure in the root port's pf_data_t.
185 *
186 * 3. Error register structures (pf_pcie_err_regs_t, etc.):
187 * Heap-allocated as part of each device's pf_data_t; long lived.
188 *
189 * 4. pf_derr (ddi_fm_error_t):
190 * This points to a structure on the caller's stack. Normally this would be
191 * invalid after the function returns, but in a crash dump the panic
192 * preserves the stack contents.
193 *
194 * The cached pointers could theoretically become invalid if device hotplug/
195 * detach occurs between error detection and panic. However this is not a
196 * concern in practice because there is no window for this to occur between a
197 * fatal error and a system panic.
198 */
199 pf_impl_t pcie_faulty_pf_impl;
200
201 /* Inform interested parties that error handling is about to begin. */
202 /* ARGSUSED */
203 void
pf_eh_enter(pcie_bus_t * bus_p)204 pf_eh_enter(pcie_bus_t *bus_p)
205 {
206 }
207
208 /* Inform interested parties that error handling has ended. */
209 void
pf_eh_exit(pcie_bus_t * bus_p)210 pf_eh_exit(pcie_bus_t *bus_p)
211 {
212 pcie_bus_t *rbus_p = PCIE_DIP2BUS(bus_p->bus_rp_dip);
213 pf_data_t *root_pfd_p = PCIE_BUS2PFD(rbus_p);
214 pf_data_t *pfd_p;
215 uint_t intr_type = PCIE_ROOT_EH_SRC(root_pfd_p)->intr_type;
216
217 pciev_eh_exit(root_pfd_p, intr_type);
218
219 /* Clear affected device info and INTR SRC */
220 for (pfd_p = root_pfd_p; pfd_p; pfd_p = pfd_p->pe_next) {
221 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0;
222 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
223 if (PCIE_IS_ROOT(PCIE_PFD2BUS(pfd_p))) {
224 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE;
225 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL;
226 }
227 }
228 }
229
230 /*
231 * After sending an ereport, or in lieu of doing so, unlock all the devices in
232 * the data queue. We also must clear pe_valid here; this function is called in
233 * the path where we decide not to send an ereport because there is no error
234 * (spurious AER interrupt), as well as from pf_send_ereport() which has already
235 * cleared it. Failing to do this will result in a different path through
236 * pf_dispatch() and the potential for deadlocks. It is safe to do as we are
237 * still holding the handler lock here, just as in pf_send_ereport().
238 */
239 static void
pf_dq_unlock_chain(pf_impl_t * impl)240 pf_dq_unlock_chain(pf_impl_t *impl)
241 {
242 pf_data_t *pfd_p;
243
244 for (pfd_p = impl->pf_dq_tail_p; pfd_p; pfd_p = pfd_p->pe_prev) {
245 pfd_p->pe_valid = B_FALSE;
246 if (pfd_p->pe_lock) {
247 pf_handler_exit(PCIE_PFD2DIP(pfd_p));
248 }
249 }
250 }
251
252 /*
253 * Scan Fabric is the entry point for PCI/PCIe IO fabric errors. The
254 * caller may create a local pf_data_t with the "root fault"
255 * information populated to either do a precise or full scan. More
256 * than one pf_data_t maybe linked together if there are multiple
257 * errors. Only a PCIe compliant Root Port device may pass in NULL
258 * for the root_pfd_p.
259 *
260 * "Root Complexes" such as NPE and PX should call scan_fabric using itself as
261 * the rdip. PCIe Root ports should call pf_scan_fabric using its parent as
262 * the rdip.
263 *
264 * Scan fabric initiated from RCs are likely due to a fabric message, traps or
265 * any RC detected errors that propagated to/from the fabric.
266 *
267 * This code assumes that by the time pf_scan_fabric is
268 * called, pf_handler_enter has NOT been called on the rdip.
269 */
270 int
pf_scan_fabric(dev_info_t * rdip,ddi_fm_error_t * derr,pf_data_t * root_pfd_p)271 pf_scan_fabric(dev_info_t *rdip, ddi_fm_error_t *derr, pf_data_t *root_pfd_p)
272 {
273 pf_impl_t impl;
274 pf_data_t *pfd_p, *pfd_head_p, *pfd_tail_p;
275 int scan_flag = PF_SCAN_SUCCESS;
276 int analyse_flag = PF_ERR_NO_ERROR;
277 boolean_t full_scan = pcie_full_scan;
278
279 if (pcie_disable_scan)
280 return (analyse_flag);
281
282 /* Find the head and tail of this link list */
283 pfd_head_p = root_pfd_p;
284 for (pfd_tail_p = root_pfd_p; pfd_tail_p && pfd_tail_p->pe_next;
285 pfd_tail_p = pfd_tail_p->pe_next)
286 ;
287
288 /* Save head/tail */
289 impl.pf_total = 0;
290 impl.pf_derr = derr;
291 impl.pf_dq_head_p = pfd_head_p;
292 impl.pf_dq_tail_p = pfd_tail_p;
293
294 /* If scan is initiated from RP then RP itself must be scanned. */
295 if (PCIE_IS_RP(PCIE_DIP2BUS(rdip)) && pf_is_ready(rdip) &&
296 !root_pfd_p) {
297 scan_flag = pf_handler_enter(rdip, &impl);
298 if (scan_flag & PF_SCAN_DEADLOCK)
299 goto done;
300
301 scan_flag = pf_default_hdl(rdip, &impl);
302 if (scan_flag & PF_SCAN_NO_ERR_IN_CHILD)
303 goto done;
304 }
305
306 /*
307 * Scan the fabric using the scan_bdf and scan_addr in error q.
308 * scan_bdf will be valid in the following cases:
309 * - Fabric message
310 * - Poisoned TLP
311 * - Signaled UR/CA
312 * - Received UR/CA
313 * - PIO load failures
314 */
315 for (pfd_p = impl.pf_dq_head_p; pfd_p && PFD_IS_ROOT(pfd_p);
316 pfd_p = pfd_p->pe_next) {
317 impl.pf_fault = PCIE_ROOT_FAULT(pfd_p);
318
319 if (PFD_IS_RC(pfd_p))
320 impl.pf_total++;
321
322 if (impl.pf_fault->full_scan)
323 full_scan = B_TRUE;
324
325 if (full_scan ||
326 PCIE_CHECK_VALID_BDF(impl.pf_fault->scan_bdf) ||
327 impl.pf_fault->scan_addr)
328 scan_flag |= pf_dispatch(rdip, &impl, full_scan);
329
330 if (full_scan)
331 break;
332 }
333
334 done:
335 /*
336 * If this is due to safe access, don't analyze the errors and return
337 * success regardless of how scan fabric went.
338 */
339 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
340 analyse_flag = PF_ERR_NO_PANIC;
341 } else {
342 analyse_flag = pf_analyse_error(derr, &impl);
343 }
344
345 /*
346 * If analyse_flag is 0 or PF_ERR_NO_ERROR, there's nothing here. Skip
347 * ereport generation unless something went wrong with the scan.
348 */
349 if ((analyse_flag & ~PF_ERR_NO_ERROR) != 0 ||
350 (scan_flag & (PF_SCAN_CB_FAILURE | PF_SCAN_DEADLOCK)) != 0) {
351 pf_send_ereport(derr, &impl);
352 } else {
353 pf_dq_unlock_chain(&impl);
354 }
355
356 /*
357 * Check if any hardened driver's callback reported a panic.
358 * If so panic.
359 */
360 if (scan_flag & PF_SCAN_CB_FAILURE)
361 analyse_flag |= PF_ERR_PANIC;
362
363 /*
364 * If a deadlock was detected, panic the system as error analysis has
365 * been compromised.
366 */
367 if (scan_flag & PF_SCAN_DEADLOCK)
368 analyse_flag |= PF_ERR_PANIC_DEADLOCK;
369
370 /*
371 * For fatal errors, cache a copy of the pf_impl_t for post-mortem
372 * analysis (kmdb or mdb against a system crash dump). The ereports
373 * may not make it into the crash dump (errorq_dump can fill up - its
374 * size is 16 * ncpus, so on a 256-CPU system it holds just 4096
375 * entries, and fatal uncorrectable errors can be lost among
376 * correctable errors), but this cached structure will be available for
377 * inspection via the ::pcie_fatal_errors mdb dcmd.
378 *
379 * Note: Whether the system actually panics depends on the caller's
380 * configuration (e.g., the pcieb_die tunable). This cache is populated
381 * whenever PF_ERR_FATAL_FLAGS is set, regardless of whether a panic
382 * will actually occur.
383 */
384 if ((analyse_flag & PF_ERR_FATAL_FLAGS) != 0)
385 pcie_faulty_pf_impl = impl;
386
387 derr->fme_status = PF_ERR2DDIFM_ERR(scan_flag);
388
389 return (analyse_flag);
390 }
391
392 void
pcie_force_fullscan(void)393 pcie_force_fullscan(void)
394 {
395 pcie_full_scan = B_TRUE;
396 }
397
398 /*
399 * pf_dispatch walks the device tree and calls the pf_default_hdl if the device
400 * falls in the error path.
401 *
402 * Returns PF_SCAN_* flags
403 */
404 static int
pf_dispatch(dev_info_t * pdip,pf_impl_t * impl,boolean_t full_scan)405 pf_dispatch(dev_info_t *pdip, pf_impl_t *impl, boolean_t full_scan)
406 {
407 dev_info_t *dip;
408 pcie_req_id_t rid = impl->pf_fault->scan_bdf;
409 pcie_bus_t *bus_p;
410 int scan_flag = PF_SCAN_SUCCESS;
411
412 for (dip = ddi_get_child(pdip); dip; dip = ddi_get_next_sibling(dip)) {
413 /* Make sure dip is attached and ready */
414 if (!(bus_p = pf_is_ready(dip)))
415 continue;
416
417 scan_flag |= pf_handler_enter(dip, impl);
418 if (scan_flag & PF_SCAN_DEADLOCK)
419 break;
420
421 /*
422 * Handle this device if it is a:
423 * o Full Scan
424 * o PCI/PCI-X Device
425 * o Fault BDF = Device BDF
426 * o BDF/ADDR is in range of the Bridge/Switch
427 */
428 if (full_scan ||
429 (bus_p->bus_bdf == rid) ||
430 pf_in_bus_range(bus_p, rid) ||
431 pf_in_addr_range(bus_p, impl->pf_fault->scan_addr)) {
432 int hdl_flag = pf_default_hdl(dip, impl);
433 scan_flag |= hdl_flag;
434
435 /*
436 * A bridge may have detected no errors in which case
437 * there is no need to scan further down.
438 */
439 if (hdl_flag & PF_SCAN_NO_ERR_IN_CHILD)
440 continue;
441 } else {
442 pf_handler_exit(dip);
443 continue;
444 }
445
446 /* match or in bridge bus-range */
447 switch (bus_p->bus_dev_type) {
448 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI:
449 case PCIE_PCIECAP_DEV_TYPE_PCI2PCIE:
450 scan_flag |= pf_dispatch(dip, impl, B_TRUE);
451 break;
452 case PCIE_PCIECAP_DEV_TYPE_UP:
453 case PCIE_PCIECAP_DEV_TYPE_DOWN:
454 case PCIE_PCIECAP_DEV_TYPE_ROOT:
455 {
456 pf_data_t *pfd_p = PCIE_BUS2PFD(bus_p);
457 pf_pci_err_regs_t *err_p = PCI_ERR_REG(pfd_p);
458 pf_pci_bdg_err_regs_t *serr_p = PCI_BDG_ERR_REG(pfd_p);
459 /*
460 * Continue if the fault BDF != the switch or there is a
461 * parity error
462 */
463 if ((bus_p->bus_bdf != rid) ||
464 (err_p->pci_err_status & PF_PCI_PARITY_ERR) ||
465 (serr_p->pci_bdg_sec_stat & PF_PCI_PARITY_ERR))
466 scan_flag |= pf_dispatch(dip, impl, full_scan);
467 break;
468 }
469 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV:
470 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV:
471 /*
472 * Reached a PCIe end point so stop. Note dev_type
473 * PCI_DEV is just a PCIe device that requires IO Space
474 */
475 break;
476 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO:
477 if (PCIE_IS_BDG(bus_p))
478 scan_flag |= pf_dispatch(dip, impl, B_TRUE);
479 break;
480 default:
481 ASSERT(B_FALSE);
482 }
483 }
484 return (scan_flag);
485 }
486
487 /* Returns whether the "bdf" is in the bus range of a switch/bridge */
488 boolean_t
pf_in_bus_range(pcie_bus_t * bus_p,pcie_req_id_t bdf)489 pf_in_bus_range(pcie_bus_t *bus_p, pcie_req_id_t bdf)
490 {
491 pci_bus_range_t *br_p = &bus_p->bus_bus_range;
492 uint8_t bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >>
493 PCIE_REQ_ID_BUS_SHIFT;
494
495 /* check if given bdf falls within bridge's bus range */
496 if (PCIE_IS_BDG(bus_p) &&
497 ((bus_no >= br_p->lo) && (bus_no <= br_p->hi)))
498 return (B_TRUE);
499 else
500 return (B_FALSE);
501 }
502
503 /*
504 * Return whether the "addr" is in the assigned addr of a device.
505 */
506 boolean_t
pf_in_assigned_addr(pcie_bus_t * bus_p,uint64_t addr)507 pf_in_assigned_addr(pcie_bus_t *bus_p, uint64_t addr)
508 {
509 uint_t i;
510 uint64_t low, hi;
511 pci_regspec_t *assign_p = bus_p->bus_assigned_addr;
512
513 for (i = 0; i < bus_p->bus_assigned_entries; i++, assign_p++) {
514 low = assign_p->pci_phys_low;
515 hi = low + assign_p->pci_size_low;
516 if ((addr < hi) && (addr >= low))
517 return (B_TRUE);
518 }
519 return (B_FALSE);
520 }
521
522 /*
523 * Returns whether the "addr" is in the addr range of a switch/bridge, or if the
524 * "addr" is in the assigned addr of a device.
525 */
526 static boolean_t
pf_in_addr_range(pcie_bus_t * bus_p,uint64_t addr)527 pf_in_addr_range(pcie_bus_t *bus_p, uint64_t addr)
528 {
529 uint_t i;
530 uint64_t low, hi;
531 ppb_ranges_t *ranges_p = bus_p->bus_addr_ranges;
532
533 if (!addr)
534 return (B_FALSE);
535
536 /* check if given address belongs to this device */
537 if (pf_in_assigned_addr(bus_p, addr))
538 return (B_TRUE);
539
540 /* check if given address belongs to a child below this device */
541 if (!PCIE_IS_BDG(bus_p))
542 return (B_FALSE);
543
544 for (i = 0; i < bus_p->bus_addr_entries; i++, ranges_p++) {
545 switch (ranges_p->child_high & PCI_ADDR_MASK) {
546 case PCI_ADDR_IO:
547 case PCI_ADDR_MEM32:
548 low = ranges_p->child_low;
549 hi = ranges_p->size_low + low;
550 if ((addr < hi) && (addr >= low))
551 return (B_TRUE);
552 break;
553 case PCI_ADDR_MEM64:
554 low = ((uint64_t)ranges_p->child_mid << 32) |
555 (uint64_t)ranges_p->child_low;
556 hi = (((uint64_t)ranges_p->size_high << 32) |
557 (uint64_t)ranges_p->size_low) + low;
558 if ((addr < hi) && (addr >= low))
559 return (B_TRUE);
560 break;
561 }
562 }
563 return (B_FALSE);
564 }
565
566 static pcie_bus_t *
pf_is_ready(dev_info_t * dip)567 pf_is_ready(dev_info_t *dip)
568 {
569 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
570 if (!bus_p)
571 return (NULL);
572
573 if (!(bus_p->bus_fm_flags & PF_FM_READY))
574 return (NULL);
575 return (bus_p);
576 }
577
578 static void
pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t * pcix_ecc_regs,pcie_bus_t * bus_p,boolean_t bdg)579 pf_pcix_ecc_regs_gather(pf_pcix_ecc_regs_t *pcix_ecc_regs,
580 pcie_bus_t *bus_p, boolean_t bdg)
581 {
582 if (bdg) {
583 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p,
584 PCI_PCIX_BDG_ECC_STATUS);
585 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p,
586 PCI_PCIX_BDG_ECC_FST_AD);
587 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p,
588 PCI_PCIX_BDG_ECC_SEC_AD);
589 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p,
590 PCI_PCIX_BDG_ECC_ATTR);
591 } else {
592 pcix_ecc_regs->pcix_ecc_ctlstat = PCIX_CAP_GET(32, bus_p,
593 PCI_PCIX_ECC_STATUS);
594 pcix_ecc_regs->pcix_ecc_fstaddr = PCIX_CAP_GET(32, bus_p,
595 PCI_PCIX_ECC_FST_AD);
596 pcix_ecc_regs->pcix_ecc_secaddr = PCIX_CAP_GET(32, bus_p,
597 PCI_PCIX_ECC_SEC_AD);
598 pcix_ecc_regs->pcix_ecc_attr = PCIX_CAP_GET(32, bus_p,
599 PCI_PCIX_ECC_ATTR);
600 }
601 }
602
603
604 static void
pf_pcix_regs_gather(pf_data_t * pfd_p,pcie_bus_t * bus_p)605 pf_pcix_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
606 {
607 /*
608 * For PCI-X device PCI-X Capability only exists for Type 0 Headers.
609 * PCI-X Bridge Capability only exists for Type 1 Headers.
610 * Both capabilities do not exist at the same time.
611 */
612 if (PCIE_IS_BDG(bus_p)) {
613 pf_pcix_bdg_err_regs_t *pcix_bdg_regs;
614
615 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p);
616
617 pcix_bdg_regs->pcix_bdg_sec_stat = PCIX_CAP_GET(16, bus_p,
618 PCI_PCIX_SEC_STATUS);
619 pcix_bdg_regs->pcix_bdg_stat = PCIX_CAP_GET(32, bus_p,
620 PCI_PCIX_BDG_STATUS);
621
622 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
623 /*
624 * PCI Express to PCI-X bridges only implement the
625 * secondary side of the PCI-X ECC registers, bit one is
626 * read-only so we make sure we do not write to it.
627 */
628 if (!PCIE_IS_PCIE_BDG(bus_p)) {
629 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
630 0);
631 pf_pcix_ecc_regs_gather(
632 PCIX_BDG_ECC_REG(pfd_p, 0), bus_p, B_TRUE);
633 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
634 1);
635 }
636 pf_pcix_ecc_regs_gather(PCIX_BDG_ECC_REG(pfd_p, 0),
637 bus_p, B_TRUE);
638 }
639 } else {
640 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p);
641
642 pcix_regs->pcix_command = PCIX_CAP_GET(16, bus_p,
643 PCI_PCIX_COMMAND);
644 pcix_regs->pcix_status = PCIX_CAP_GET(32, bus_p,
645 PCI_PCIX_STATUS);
646 if (PCIX_ECC_VERSION_CHECK(bus_p))
647 pf_pcix_ecc_regs_gather(PCIX_ECC_REG(pfd_p), bus_p,
648 B_TRUE);
649 }
650 }
651
652 static void
pf_pcie_regs_gather(pf_data_t * pfd_p,pcie_bus_t * bus_p)653 pf_pcie_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
654 {
655 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p);
656 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p);
657
658 pcie_regs->pcie_err_status = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS);
659 pcie_regs->pcie_err_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
660 pcie_regs->pcie_dev_cap = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP);
661
662 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p))
663 pf_pcix_regs_gather(pfd_p, bus_p);
664
665 if (PCIE_IS_ROOT(bus_p)) {
666 pf_pcie_rp_err_regs_t *pcie_rp_regs = PCIE_RP_REG(pfd_p);
667
668 pcie_rp_regs->pcie_rp_status = PCIE_CAP_GET(32, bus_p,
669 PCIE_ROOTSTS);
670 pcie_rp_regs->pcie_rp_ctl = PCIE_CAP_GET(16, bus_p,
671 PCIE_ROOTCTL);
672 }
673
674 /*
675 * For eligible components, we gather Slot Register state.
676 *
677 * Eligible components are:
678 * - a Downstream Port or a Root Port with the Slot Implemented
679 * capability bit set
680 * - hotplug capable
681 *
682 * Slot register state is useful, for instance, to determine whether the
683 * Slot's child device is physically present (via the Slot Status
684 * register).
685 */
686 if ((PCIE_IS_SWD(bus_p) || PCIE_IS_ROOT(bus_p)) &&
687 PCIE_IS_HOTPLUG_ENABLED(PCIE_BUS2DIP(bus_p))) {
688 pf_pcie_slot_regs_t *pcie_slot_regs = PCIE_SLOT_REG(pfd_p);
689 pcie_slot_regs->pcie_slot_cap = PCIE_CAP_GET(32, bus_p,
690 PCIE_SLOTCAP);
691 pcie_slot_regs->pcie_slot_control = PCIE_CAP_GET(16, bus_p,
692 PCIE_SLOTCTL);
693 pcie_slot_regs->pcie_slot_status = PCIE_CAP_GET(16, bus_p,
694 PCIE_SLOTSTS);
695
696 if (pcie_slot_regs->pcie_slot_cap != PCI_EINVAL32 &&
697 pcie_slot_regs->pcie_slot_control != PCI_EINVAL16 &&
698 pcie_slot_regs->pcie_slot_status != PCI_EINVAL16) {
699 pcie_slot_regs->pcie_slot_regs_valid = B_TRUE;
700 }
701 }
702
703 if (!PCIE_HAS_AER(bus_p))
704 return;
705
706 /* Gather UE AERs */
707 pcie_adv_regs->pcie_adv_ctl = PCIE_AER_GET(32, bus_p,
708 PCIE_AER_CTL);
709 pcie_adv_regs->pcie_ue_status = PCIE_AER_GET(32, bus_p,
710 PCIE_AER_UCE_STS);
711 pcie_adv_regs->pcie_ue_mask = PCIE_AER_GET(32, bus_p,
712 PCIE_AER_UCE_MASK);
713 pcie_adv_regs->pcie_ue_sev = PCIE_AER_GET(32, bus_p,
714 PCIE_AER_UCE_SERV);
715 PCIE_ADV_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p,
716 PCIE_AER_HDR_LOG);
717 PCIE_ADV_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p,
718 PCIE_AER_HDR_LOG + 0x4);
719 PCIE_ADV_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p,
720 PCIE_AER_HDR_LOG + 0x8);
721 PCIE_ADV_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p,
722 PCIE_AER_HDR_LOG + 0xc);
723
724 /* Gather CE AERs */
725 pcie_adv_regs->pcie_ce_status = PCIE_AER_GET(32, bus_p,
726 PCIE_AER_CE_STS);
727 pcie_adv_regs->pcie_ce_mask = PCIE_AER_GET(32, bus_p,
728 PCIE_AER_CE_MASK);
729
730 /*
731 * If pci express to pci bridge then grab the bridge
732 * error registers.
733 */
734 if (PCIE_IS_PCIE_BDG(bus_p)) {
735 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs =
736 PCIE_ADV_BDG_REG(pfd_p);
737
738 pcie_bdg_regs->pcie_sue_ctl = PCIE_AER_GET(32, bus_p,
739 PCIE_AER_SCTL);
740 pcie_bdg_regs->pcie_sue_status = PCIE_AER_GET(32, bus_p,
741 PCIE_AER_SUCE_STS);
742 pcie_bdg_regs->pcie_sue_mask = PCIE_AER_GET(32, bus_p,
743 PCIE_AER_SUCE_MASK);
744 pcie_bdg_regs->pcie_sue_sev = PCIE_AER_GET(32, bus_p,
745 PCIE_AER_SUCE_SERV);
746 PCIE_ADV_BDG_HDR(pfd_p, 0) = PCIE_AER_GET(32, bus_p,
747 PCIE_AER_SHDR_LOG);
748 PCIE_ADV_BDG_HDR(pfd_p, 1) = PCIE_AER_GET(32, bus_p,
749 PCIE_AER_SHDR_LOG + 0x4);
750 PCIE_ADV_BDG_HDR(pfd_p, 2) = PCIE_AER_GET(32, bus_p,
751 PCIE_AER_SHDR_LOG + 0x8);
752 PCIE_ADV_BDG_HDR(pfd_p, 3) = PCIE_AER_GET(32, bus_p,
753 PCIE_AER_SHDR_LOG + 0xc);
754 }
755
756 /*
757 * If PCI Express root port then grab the root port
758 * error registers.
759 */
760 if (PCIE_IS_ROOT(bus_p)) {
761 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs =
762 PCIE_ADV_RP_REG(pfd_p);
763
764 pcie_rp_regs->pcie_rp_err_cmd = PCIE_AER_GET(32, bus_p,
765 PCIE_AER_RE_CMD);
766 pcie_rp_regs->pcie_rp_err_status = PCIE_AER_GET(32, bus_p,
767 PCIE_AER_RE_STS);
768 pcie_rp_regs->pcie_rp_ce_src_id = PCIE_AER_GET(16, bus_p,
769 PCIE_AER_CE_SRC_ID);
770 pcie_rp_regs->pcie_rp_ue_src_id = PCIE_AER_GET(16, bus_p,
771 PCIE_AER_ERR_SRC_ID);
772 }
773 }
774
775 static void
pf_pci_regs_gather(pf_data_t * pfd_p,pcie_bus_t * bus_p)776 pf_pci_regs_gather(pf_data_t *pfd_p, pcie_bus_t *bus_p)
777 {
778 pf_pci_err_regs_t *pci_regs = PCI_ERR_REG(pfd_p);
779
780 /*
781 * Start by reading all the error registers that are available for
782 * pci and pci express and for leaf devices and bridges/switches
783 */
784 pci_regs->pci_err_status = PCIE_GET(16, bus_p, PCI_CONF_STAT);
785 pci_regs->pci_cfg_comm = PCIE_GET(16, bus_p, PCI_CONF_COMM);
786
787 /*
788 * If pci-pci bridge grab PCI bridge specific error registers.
789 */
790 if (PCIE_IS_BDG(bus_p)) {
791 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p);
792 pci_bdg_regs->pci_bdg_sec_stat =
793 PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS);
794 pci_bdg_regs->pci_bdg_ctrl =
795 PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL);
796 }
797
798 /*
799 * If pci express device grab pci express error registers and
800 * check for advanced error reporting features and grab them if
801 * available.
802 */
803 if (PCIE_IS_PCIE(bus_p))
804 pf_pcie_regs_gather(pfd_p, bus_p);
805 else if (PCIE_IS_PCIX(bus_p))
806 pf_pcix_regs_gather(pfd_p, bus_p);
807
808 }
809
810 static void
pf_pcix_regs_clear(pf_data_t * pfd_p,pcie_bus_t * bus_p)811 pf_pcix_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
812 {
813 if (PCIE_IS_BDG(bus_p)) {
814 pf_pcix_bdg_err_regs_t *pcix_bdg_regs;
815
816 pcix_bdg_regs = PCIX_BDG_ERR_REG(pfd_p);
817
818 PCIX_CAP_PUT(16, bus_p, PCI_PCIX_SEC_STATUS,
819 pcix_bdg_regs->pcix_bdg_sec_stat);
820
821 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_STATUS,
822 pcix_bdg_regs->pcix_bdg_stat);
823
824 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
825 pf_pcix_ecc_regs_t *pcix_bdg_ecc_regs;
826 /*
827 * PCI Express to PCI-X bridges only implement the
828 * secondary side of the PCI-X ECC registers. For
829 * clearing, there is no need to "select" the ECC
830 * register, just write what was originally read.
831 */
832 if (!PCIE_IS_PCIE_BDG(bus_p)) {
833 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 0);
834 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
835 pcix_bdg_ecc_regs->pcix_ecc_ctlstat);
836
837 }
838 pcix_bdg_ecc_regs = PCIX_BDG_ECC_REG(pfd_p, 1);
839 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_BDG_ECC_STATUS,
840 pcix_bdg_ecc_regs->pcix_ecc_ctlstat);
841 }
842 } else {
843 pf_pcix_err_regs_t *pcix_regs = PCIX_ERR_REG(pfd_p);
844
845 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_STATUS,
846 pcix_regs->pcix_status);
847
848 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
849 pf_pcix_ecc_regs_t *pcix_ecc_regs = PCIX_ECC_REG(pfd_p);
850
851 PCIX_CAP_PUT(32, bus_p, PCI_PCIX_ECC_STATUS,
852 pcix_ecc_regs->pcix_ecc_ctlstat);
853 }
854 }
855 }
856
857 static void
pf_pcie_regs_clear(pf_data_t * pfd_p,pcie_bus_t * bus_p)858 pf_pcie_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
859 {
860 pf_pcie_err_regs_t *pcie_regs = PCIE_ERR_REG(pfd_p);
861 pf_pcie_adv_err_regs_t *pcie_adv_regs = PCIE_ADV_REG(pfd_p);
862
863 PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS, pcie_regs->pcie_err_status);
864
865 if (PCIE_IS_BDG(bus_p) && PCIE_IS_PCIX(bus_p))
866 pf_pcix_regs_clear(pfd_p, bus_p);
867
868 if (!PCIE_HAS_AER(bus_p))
869 return;
870
871 PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_STS,
872 pcie_adv_regs->pcie_ue_status);
873
874 PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS,
875 pcie_adv_regs->pcie_ce_status);
876
877 if (PCIE_IS_PCIE_BDG(bus_p)) {
878 pf_pcie_adv_bdg_err_regs_t *pcie_bdg_regs =
879 PCIE_ADV_BDG_REG(pfd_p);
880
881 PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_STS,
882 pcie_bdg_regs->pcie_sue_status);
883 }
884
885 /*
886 * If PCI Express root complex then clear the root complex
887 * error registers.
888 */
889 if (PCIE_IS_ROOT(bus_p)) {
890 pf_pcie_adv_rp_err_regs_t *pcie_rp_regs;
891
892 pcie_rp_regs = PCIE_ADV_RP_REG(pfd_p);
893
894 PCIE_AER_PUT(32, bus_p, PCIE_AER_RE_STS,
895 pcie_rp_regs->pcie_rp_err_status);
896 }
897 }
898
899 static void
pf_pci_regs_clear(pf_data_t * pfd_p,pcie_bus_t * bus_p)900 pf_pci_regs_clear(pf_data_t *pfd_p, pcie_bus_t *bus_p)
901 {
902 if (PCIE_IS_PCIE(bus_p))
903 pf_pcie_regs_clear(pfd_p, bus_p);
904 else if (PCIE_IS_PCIX(bus_p))
905 pf_pcix_regs_clear(pfd_p, bus_p);
906
907 PCIE_PUT(16, bus_p, PCI_CONF_STAT, pfd_p->pe_pci_regs->pci_err_status);
908
909 if (PCIE_IS_BDG(bus_p)) {
910 pf_pci_bdg_err_regs_t *pci_bdg_regs = PCI_BDG_ERR_REG(pfd_p);
911 PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS,
912 pci_bdg_regs->pci_bdg_sec_stat);
913 }
914 }
915
916 /* ARGSUSED */
917 void
pcie_clear_errors(dev_info_t * dip)918 pcie_clear_errors(dev_info_t *dip)
919 {
920 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
921 pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
922
923 ASSERT(bus_p);
924
925 pf_pci_regs_gather(pfd_p, bus_p);
926 pf_pci_regs_clear(pfd_p, bus_p);
927 }
928
929 /* Find the fault BDF, fault Addr or full scan on a PCIe Root Port. */
930 static void
pf_pci_find_rp_fault(pf_data_t * pfd_p,pcie_bus_t * bus_p)931 pf_pci_find_rp_fault(pf_data_t *pfd_p, pcie_bus_t *bus_p)
932 {
933 pf_root_fault_t *root_fault = PCIE_ROOT_FAULT(pfd_p);
934 pf_pcie_adv_rp_err_regs_t *rp_regs = PCIE_ADV_RP_REG(pfd_p);
935 uint32_t root_err = rp_regs->pcie_rp_err_status;
936 uint32_t ue_err = PCIE_ADV_REG(pfd_p)->pcie_ue_status;
937 int num_faults = 0;
938
939 /* Since this data structure is reused, make sure to reset it */
940 root_fault->full_scan = B_FALSE;
941 root_fault->scan_bdf = PCIE_INVALID_BDF;
942 root_fault->scan_addr = 0;
943
944 if (!PCIE_HAS_AER(bus_p) &&
945 (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR)) {
946 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
947 return;
948 }
949
950 /*
951 * Check to see if an error has been received that
952 * requires a scan of the fabric. Count the number of
953 * faults seen. If MUL CE/FE_NFE that counts for
954 * at least 2 faults, so just return with full_scan.
955 */
956 if ((root_err & PCIE_AER_RE_STS_MUL_CE_RCVD) ||
957 (root_err & PCIE_AER_RE_STS_MUL_FE_NFE_RCVD)) {
958 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
959 return;
960 }
961
962 if (root_err & PCIE_AER_RE_STS_CE_RCVD)
963 num_faults++;
964
965 if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD)
966 num_faults++;
967
968 if (ue_err & PCIE_AER_UCE_CA)
969 num_faults++;
970
971 if (ue_err & PCIE_AER_UCE_UR)
972 num_faults++;
973
974 /* If no faults just return */
975 if (num_faults == 0)
976 return;
977
978 /* If faults > 1 do full scan */
979 if (num_faults > 1) {
980 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
981 return;
982 }
983
984 /* By this point, there is only 1 fault detected */
985 if (root_err & PCIE_AER_RE_STS_CE_RCVD) {
986 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ce_src_id;
987 num_faults--;
988 } else if (root_err & PCIE_AER_RE_STS_FE_NFE_RCVD) {
989 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = rp_regs->pcie_rp_ue_src_id;
990 num_faults--;
991 } else if ((HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_CA) ||
992 HAS_AER_LOGS(pfd_p, PCIE_AER_UCE_UR)) &&
993 (pf_tlp_decode(PCIE_PFD2BUS(pfd_p), PCIE_ADV_REG(pfd_p)) ==
994 DDI_SUCCESS)) {
995 PCIE_ROOT_FAULT(pfd_p)->scan_addr =
996 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr;
997 num_faults--;
998 }
999
1000 /*
1001 * This means an error did occur, but we couldn't extract the fault BDF
1002 */
1003 if (num_faults > 0)
1004 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_TRUE;
1005
1006 }
1007
1008
1009 /*
1010 * Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue
1011 *
1012 * Returns a scan flag.
1013 * o PF_SCAN_SUCCESS - Error gathered and cleared sucessfuly, data added to
1014 * Fault Q
1015 * o PF_SCAN_BAD_RESPONSE - Unable to talk to device, item added to fault Q
1016 * o PF_SCAN_CB_FAILURE - A hardened device deemed that the error was fatal.
1017 * o PF_SCAN_NO_ERR_IN_CHILD - Only applies to bridge to prevent further
1018 * unnecessary scanning
1019 * o PF_SCAN_IN_DQ - This device has already been scanned; it was skipped this
1020 * time.
1021 */
1022 static int
pf_default_hdl(dev_info_t * dip,pf_impl_t * impl)1023 pf_default_hdl(dev_info_t *dip, pf_impl_t *impl)
1024 {
1025 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1026 pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
1027 int cb_sts, scan_flag = PF_SCAN_SUCCESS;
1028
1029 /* Make sure this device hasn't already been snapshotted and cleared */
1030 if (pfd_p->pe_valid == B_TRUE) {
1031 scan_flag |= PF_SCAN_IN_DQ;
1032 goto done;
1033 }
1034
1035 /*
1036 * If this is a device used for PCI passthrough into a virtual machine,
1037 * don't let any error it caused panic the system.
1038 */
1039 if (bus_p->bus_fm_flags & PF_FM_IS_PASSTHRU)
1040 pfd_p->pe_severity_mask |= PF_ERR_PANIC;
1041
1042 /*
1043 * Read vendor/device ID and check with cached data; if it doesn't
1044 * match, it could very well mean that the device is no longer
1045 * responding. In this case, we return PF_SCAN_BAD_RESPONSE; should
1046 * the caller choose to panic in this case, we will have the basic
1047 * info in the error queue for the purposes of postmortem debugging.
1048 */
1049 if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) {
1050 char buf[FM_MAX_CLASS];
1051
1052 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
1053 PCI_ERROR_SUBCLASS, PCI_NR);
1054 ddi_fm_ereport_post(dip, buf, fm_ena_generate(0, FM_ENA_FMT1),
1055 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL);
1056
1057 /*
1058 * For IOV/Hotplug purposes skip gathering info for this device,
1059 * but populate affected info and severity. Clear out any data
1060 * that maybe been saved in the last fabric scan.
1061 */
1062 pf_reset_pfd(pfd_p);
1063 pfd_p->pe_severity_flags = PF_ERR_BAD_RESPONSE;
1064 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF;
1065
1066 /* Add the snapshot to the error q */
1067 pf_en_dq(pfd_p, impl);
1068 pfd_p->pe_valid = B_TRUE;
1069
1070 return (PF_SCAN_BAD_RESPONSE);
1071 }
1072
1073 pf_pci_regs_gather(pfd_p, bus_p);
1074 pf_pci_regs_clear(pfd_p, bus_p);
1075
1076 if (PCIE_IS_RP(bus_p))
1077 pf_pci_find_rp_fault(pfd_p, bus_p);
1078
1079 cb_sts = pf_fm_callback(dip, impl->pf_derr);
1080
1081 if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN)
1082 scan_flag |= PF_SCAN_CB_FAILURE;
1083
1084 /* Add the snapshot to the error q */
1085 pf_en_dq(pfd_p, impl);
1086
1087 done:
1088 /*
1089 * If a bridge does not have any error no need to scan any further down.
1090 * For PCIe devices, check the PCIe device status and PCI secondary
1091 * status.
1092 * - Some non-compliant PCIe devices do not utilize PCIe
1093 * error registers. If so rely on legacy PCI error registers.
1094 * For PCI devices, check the PCI secondary status.
1095 */
1096 if (PCIE_IS_PCIE_BDG(bus_p) &&
1097 !(PCIE_ERR_REG(pfd_p)->pcie_err_status & PF_PCIE_BDG_ERR) &&
1098 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR))
1099 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD;
1100
1101 if (PCIE_IS_PCI_BDG(bus_p) &&
1102 !(PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat & PF_PCI_BDG_ERR))
1103 scan_flag |= PF_SCAN_NO_ERR_IN_CHILD;
1104
1105 pfd_p->pe_valid = B_TRUE;
1106 return (scan_flag);
1107 }
1108
1109 /*
1110 * Set the passthru flag on a device bus_p. Called by passthru drivers to
1111 * indicate when a device is or is no longer under passthru control.
1112 */
1113 void
pf_set_passthru(dev_info_t * dip,boolean_t is_passthru)1114 pf_set_passthru(dev_info_t *dip, boolean_t is_passthru)
1115 {
1116 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1117
1118 if (is_passthru) {
1119 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_PASSTHRU);
1120 } else {
1121 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_PASSTHRU);
1122 }
1123 }
1124
1125 /*
1126 * Called during postattach to initialize a device's error handling
1127 * capabilities. If the devices has already been hardened, then there isn't
1128 * much needed. Otherwise initialize the device's default FMA capabilities.
1129 *
1130 * In a future project where PCIe support is removed from pcifm, several
1131 * "properties" that are setup in ddi_fm_init and pci_ereport_setup need to be
1132 * created here so that the PCI/PCIe eversholt rules will work properly.
1133 */
1134 void
pf_init(dev_info_t * dip,ddi_iblock_cookie_t ibc,ddi_attach_cmd_t cmd)1135 pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc, ddi_attach_cmd_t cmd)
1136 {
1137 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1138 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
1139 boolean_t need_cb_register = B_FALSE;
1140
1141 if (!bus_p) {
1142 cmn_err(CE_WARN, "devi_bus information is not set for %s%d.\n",
1143 ddi_driver_name(dip), ddi_get_instance(dip));
1144 return;
1145 }
1146
1147 if (fmhdl) {
1148 /*
1149 * If device is only ereport capable and not callback capable
1150 * make it callback capable. The only downside is that the
1151 * "fm-errcb-capable" property is not created for this device
1152 * which should be ok since it's not used anywhere.
1153 */
1154 if (!(fmhdl->fh_cap & DDI_FM_ERRCB_CAPABLE))
1155 need_cb_register = B_TRUE;
1156 } else {
1157 int cap;
1158 /*
1159 * fm-capable in driver.conf can be used to set fm_capabilities.
1160 * If fm-capable is not defined, set the default
1161 * DDI_FM_EREPORT_CAPABLE and DDI_FM_ERRCB_CAPABLE.
1162 */
1163 cap = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1164 DDI_PROP_DONTPASS, "fm-capable",
1165 DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE);
1166 cap &= (DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE);
1167
1168 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_IS_NH);
1169
1170 if (cmd == DDI_ATTACH) {
1171 ddi_fm_init(dip, &cap, &ibc);
1172 pci_ereport_setup(dip);
1173 }
1174
1175 if (cap & DDI_FM_ERRCB_CAPABLE)
1176 need_cb_register = B_TRUE;
1177
1178 fmhdl = DEVI(dip)->devi_fmhdl;
1179 }
1180
1181 /* If ddi_fm_init fails for any reason RETURN */
1182 if (!fmhdl) {
1183 (void) atomic_swap_uint(&bus_p->bus_fm_flags, 0);
1184 return;
1185 }
1186
1187 fmhdl->fh_cap |= DDI_FM_ERRCB_CAPABLE;
1188 if (cmd == DDI_ATTACH) {
1189 if (need_cb_register)
1190 ddi_fm_handler_register(dip, pf_dummy_cb, NULL);
1191 }
1192
1193 atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_READY);
1194 }
1195
1196 /* undo FMA lock, called at predetach */
1197 void
pf_fini(dev_info_t * dip,ddi_detach_cmd_t cmd)1198 pf_fini(dev_info_t *dip, ddi_detach_cmd_t cmd)
1199 {
1200 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
1201
1202 if (!bus_p)
1203 return;
1204
1205 /* Don't fini anything if device isn't FM Ready */
1206 if (!(bus_p->bus_fm_flags & PF_FM_READY))
1207 return;
1208
1209 /* no other code should set the flag to false */
1210 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_READY);
1211
1212 /*
1213 * Grab the mutex to make sure device isn't in the middle of
1214 * error handling. Setting the bus_fm_flag to ~PF_FM_READY
1215 * should prevent this device from being error handled after
1216 * the mutex has been released.
1217 */
1218 (void) pf_handler_enter(dip, NULL);
1219 pf_handler_exit(dip);
1220
1221 /* undo non-hardened drivers */
1222 if (bus_p->bus_fm_flags & PF_FM_IS_NH) {
1223 if (cmd == DDI_DETACH) {
1224 atomic_and_uint(&bus_p->bus_fm_flags, ~PF_FM_IS_NH);
1225 pci_ereport_teardown(dip);
1226 /*
1227 * ddi_fini itself calls ddi_handler_unregister,
1228 * so no need to explicitly call unregister.
1229 */
1230 ddi_fm_fini(dip);
1231 }
1232 }
1233 }
1234
1235 /*ARGSUSED*/
1236 static int
pf_dummy_cb(dev_info_t * dip,ddi_fm_error_t * derr,const void * not_used)1237 pf_dummy_cb(dev_info_t *dip, ddi_fm_error_t *derr, const void *not_used)
1238 {
1239 return (DDI_FM_OK);
1240 }
1241
1242 /*
1243 * Add PFD to queue. If it is an RC add it to the beginning,
1244 * otherwise add it to the end.
1245 */
1246 static void
pf_en_dq(pf_data_t * pfd_p,pf_impl_t * impl)1247 pf_en_dq(pf_data_t *pfd_p, pf_impl_t *impl)
1248 {
1249 pf_data_t *head_p = impl->pf_dq_head_p;
1250 pf_data_t *tail_p = impl->pf_dq_tail_p;
1251
1252 impl->pf_total++;
1253
1254 if (!head_p) {
1255 ASSERT(PFD_IS_ROOT(pfd_p));
1256 impl->pf_dq_head_p = pfd_p;
1257 impl->pf_dq_tail_p = pfd_p;
1258 pfd_p->pe_prev = NULL;
1259 pfd_p->pe_next = NULL;
1260 return;
1261 }
1262
1263 /* Check if this is a Root Port eprt */
1264 if (PFD_IS_ROOT(pfd_p)) {
1265 pf_data_t *root_p, *last_p = NULL;
1266
1267 /* The first item must be a RP */
1268 root_p = head_p;
1269 for (last_p = head_p; last_p && PFD_IS_ROOT(last_p);
1270 last_p = last_p->pe_next)
1271 root_p = last_p;
1272
1273 /* root_p is the last RP pfd. last_p is the first non-RP pfd. */
1274 root_p->pe_next = pfd_p;
1275 pfd_p->pe_prev = root_p;
1276 pfd_p->pe_next = last_p;
1277
1278 if (last_p)
1279 last_p->pe_prev = pfd_p;
1280 else
1281 tail_p = pfd_p;
1282 } else {
1283 tail_p->pe_next = pfd_p;
1284 pfd_p->pe_prev = tail_p;
1285 pfd_p->pe_next = NULL;
1286 tail_p = pfd_p;
1287 }
1288
1289 impl->pf_dq_head_p = head_p;
1290 impl->pf_dq_tail_p = tail_p;
1291 }
1292
1293 /*
1294 * Ignore:
1295 * - TRAINING: as leaves do not have children
1296 * - SD: as leaves do not have children
1297 */
1298 const pf_fab_err_tbl_t pcie_pcie_tbl[] = {
1299 {PCIE_AER_UCE_DLP, pf_panic,
1300 PF_AFFECTED_PARENT, 0},
1301
1302 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp,
1303 PF_AFFECTED_SELF, 0},
1304
1305 {PCIE_AER_UCE_FCP, pf_panic,
1306 PF_AFFECTED_PARENT, 0},
1307
1308 {PCIE_AER_UCE_TO, pf_analyse_to,
1309 PF_AFFECTED_SELF, 0},
1310
1311 {PCIE_AER_UCE_CA, pf_analyse_ca_ur,
1312 PF_AFFECTED_SELF, 0},
1313
1314 {PCIE_AER_UCE_UC, pf_analyse_uc,
1315 0, 0},
1316
1317 {PCIE_AER_UCE_RO, pf_panic,
1318 PF_AFFECTED_PARENT, 0},
1319
1320 {PCIE_AER_UCE_MTLP, pf_panic,
1321 PF_AFFECTED_PARENT, 0},
1322
1323 {PCIE_AER_UCE_ECRC, pf_no_panic,
1324 PF_AFFECTED_SELF, 0},
1325
1326 {PCIE_AER_UCE_UR, pf_analyse_ca_ur,
1327 PF_AFFECTED_SELF, 0},
1328
1329 {0, NULL, 0, 0}
1330 };
1331
1332 const pf_fab_err_tbl_t pcie_rp_tbl[] = {
1333 {PCIE_AER_UCE_TRAINING, pf_no_panic,
1334 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1335
1336 {PCIE_AER_UCE_DLP, pf_panic,
1337 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1338
1339 {PCIE_AER_UCE_SD, pf_no_panic,
1340 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1341
1342 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp,
1343 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1344
1345 {PCIE_AER_UCE_FCP, pf_panic,
1346 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1347
1348 {PCIE_AER_UCE_TO, pf_analyse_to,
1349 PF_AFFECTED_ADDR, PF_AFFECTED_CHILDREN},
1350
1351 {PCIE_AER_UCE_CA, pf_no_panic,
1352 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1353
1354 {PCIE_AER_UCE_UC, pf_analyse_uc,
1355 0, 0},
1356
1357 {PCIE_AER_UCE_RO, pf_panic,
1358 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1359
1360 {PCIE_AER_UCE_MTLP, pf_panic,
1361 PF_AFFECTED_SELF | PF_AFFECTED_AER,
1362 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1363
1364 {PCIE_AER_UCE_ECRC, pf_no_panic,
1365 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1366
1367 {PCIE_AER_UCE_UR, pf_no_panic,
1368 PF_AFFECTED_AER, PF_AFFECTED_CHILDREN},
1369
1370 {0, NULL, 0, 0}
1371 };
1372
1373 const pf_fab_err_tbl_t pcie_sw_tbl[] = {
1374 {PCIE_AER_UCE_TRAINING, pf_no_panic,
1375 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1376
1377 {PCIE_AER_UCE_DLP, pf_panic,
1378 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1379
1380 {PCIE_AER_UCE_SD, pf_no_panic,
1381 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1382
1383 {PCIE_AER_UCE_PTLP, pf_analyse_ptlp,
1384 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1385
1386 {PCIE_AER_UCE_FCP, pf_panic,
1387 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1388
1389 {PCIE_AER_UCE_TO, pf_analyse_to,
1390 PF_AFFECTED_CHILDREN, 0},
1391
1392 {PCIE_AER_UCE_CA, pf_analyse_ca_ur,
1393 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1394
1395 {PCIE_AER_UCE_UC, pf_analyse_uc,
1396 0, 0},
1397
1398 {PCIE_AER_UCE_RO, pf_panic,
1399 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1400
1401 {PCIE_AER_UCE_MTLP, pf_panic,
1402 PF_AFFECTED_SELF | PF_AFFECTED_AER,
1403 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1404
1405 {PCIE_AER_UCE_ECRC, pf_no_panic,
1406 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1407
1408 {PCIE_AER_UCE_UR, pf_analyse_ca_ur,
1409 PF_AFFECTED_AER, PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN},
1410
1411 {0, NULL, 0, 0}
1412 };
1413
1414 const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = {
1415 {PCIE_AER_SUCE_TA_ON_SC, pf_analyse_sc,
1416 0, 0},
1417
1418 {PCIE_AER_SUCE_MA_ON_SC, pf_analyse_sc,
1419 0, 0},
1420
1421 {PCIE_AER_SUCE_RCVD_TA, pf_analyse_ma_ta,
1422 0, 0},
1423
1424 {PCIE_AER_SUCE_RCVD_MA, pf_analyse_ma_ta,
1425 0, 0},
1426
1427 {PCIE_AER_SUCE_USC_ERR, pf_panic,
1428 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN},
1429
1430 {PCIE_AER_SUCE_USC_MSG_DATA_ERR, pf_analyse_ma_ta,
1431 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN},
1432
1433 {PCIE_AER_SUCE_UC_DATA_ERR, pf_analyse_uc_data,
1434 PF_AFFECTED_SAER, PF_AFFECTED_CHILDREN},
1435
1436 {PCIE_AER_SUCE_UC_ATTR_ERR, pf_panic,
1437 PF_AFFECTED_CHILDREN, 0},
1438
1439 {PCIE_AER_SUCE_UC_ADDR_ERR, pf_panic,
1440 PF_AFFECTED_CHILDREN, 0},
1441
1442 {PCIE_AER_SUCE_TIMER_EXPIRED, pf_panic,
1443 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1444
1445 {PCIE_AER_SUCE_PERR_ASSERT, pf_analyse_perr_assert,
1446 0, 0},
1447
1448 {PCIE_AER_SUCE_SERR_ASSERT, pf_no_panic,
1449 0, 0},
1450
1451 {PCIE_AER_SUCE_INTERNAL_ERR, pf_panic,
1452 PF_AFFECTED_SELF | PF_AFFECTED_CHILDREN, 0},
1453
1454 {0, NULL, 0, 0}
1455 };
1456
1457 const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = {
1458 {PCI_STAT_PERROR, pf_analyse_pci,
1459 PF_AFFECTED_SELF, 0},
1460
1461 {PCI_STAT_S_PERROR, pf_analyse_pci,
1462 PF_AFFECTED_SELF, 0},
1463
1464 {PCI_STAT_S_SYSERR, pf_panic,
1465 PF_AFFECTED_SELF, 0},
1466
1467 {PCI_STAT_R_MAST_AB, pf_analyse_pci,
1468 PF_AFFECTED_SELF, 0},
1469
1470 {PCI_STAT_R_TARG_AB, pf_analyse_pci,
1471 PF_AFFECTED_SELF, 0},
1472
1473 {PCI_STAT_S_TARG_AB, pf_analyse_pci,
1474 PF_AFFECTED_SELF, 0},
1475
1476 {0, NULL, 0, 0}
1477 };
1478
1479 const pf_fab_err_tbl_t pcie_pci_tbl[] = {
1480 {PCI_STAT_PERROR, pf_analyse_pci,
1481 PF_AFFECTED_SELF, 0},
1482
1483 {PCI_STAT_S_PERROR, pf_analyse_pci,
1484 PF_AFFECTED_SELF, 0},
1485
1486 {PCI_STAT_S_SYSERR, pf_panic,
1487 PF_AFFECTED_SELF, 0},
1488
1489 {PCI_STAT_R_MAST_AB, pf_analyse_pci,
1490 PF_AFFECTED_SELF, 0},
1491
1492 {PCI_STAT_R_TARG_AB, pf_analyse_pci,
1493 PF_AFFECTED_SELF, 0},
1494
1495 {PCI_STAT_S_TARG_AB, pf_analyse_pci,
1496 PF_AFFECTED_SELF, 0},
1497
1498 {0, NULL, 0, 0}
1499 };
1500
1501 #define PF_MASKED_AER_ERR(pfd_p) \
1502 (PCIE_ADV_REG(pfd_p)->pcie_ue_status & \
1503 ((PCIE_ADV_REG(pfd_p)->pcie_ue_mask) ^ 0xFFFFFFFF))
1504 #define PF_MASKED_SAER_ERR(pfd_p) \
1505 (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status & \
1506 ((PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask) ^ 0xFFFFFFFF))
1507 /*
1508 * Analyse all the PCIe Fault Data (erpt) gathered during dispatch in the erpt
1509 * Queue.
1510 */
1511 static int
pf_analyse_error(ddi_fm_error_t * derr,pf_impl_t * impl)1512 pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl)
1513 {
1514 int sts_flags, error_flags = 0;
1515 pf_data_t *pfd_p;
1516
1517 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
1518 sts_flags = 0;
1519
1520 /* skip analysing error when no error info is gathered */
1521 if (pfd_p->pe_severity_flags == PF_ERR_BAD_RESPONSE)
1522 goto done;
1523
1524 switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) {
1525 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV:
1526 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV:
1527 if (PCIE_DEVSTS_CE_DETECTED &
1528 PCIE_ERR_REG(pfd_p)->pcie_err_status)
1529 sts_flags |= PF_ERR_CE;
1530
1531 pf_adjust_for_no_aer(pfd_p);
1532 sts_flags |= pf_analyse_error_tbl(derr, impl,
1533 pfd_p, pcie_pcie_tbl, PF_MASKED_AER_ERR(pfd_p));
1534 break;
1535 case PCIE_PCIECAP_DEV_TYPE_ROOT:
1536 pf_adjust_for_no_aer(pfd_p);
1537 sts_flags |= pf_analyse_error_tbl(derr, impl,
1538 pfd_p, pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p));
1539 break;
1540 case PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO:
1541 /* no adjust_for_aer for pseudo RC */
1542 /* keep the severity passed on from RC if any */
1543 sts_flags |= pfd_p->pe_severity_flags;
1544 sts_flags |= pf_analyse_error_tbl(derr, impl, pfd_p,
1545 pcie_rp_tbl, PF_MASKED_AER_ERR(pfd_p));
1546 break;
1547 case PCIE_PCIECAP_DEV_TYPE_UP:
1548 case PCIE_PCIECAP_DEV_TYPE_DOWN:
1549 if (PCIE_DEVSTS_CE_DETECTED &
1550 PCIE_ERR_REG(pfd_p)->pcie_err_status)
1551 sts_flags |= PF_ERR_CE;
1552
1553 pf_adjust_for_no_aer(pfd_p);
1554 sts_flags |= pf_analyse_error_tbl(derr, impl,
1555 pfd_p, pcie_sw_tbl, PF_MASKED_AER_ERR(pfd_p));
1556 break;
1557 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI:
1558 if (PCIE_DEVSTS_CE_DETECTED &
1559 PCIE_ERR_REG(pfd_p)->pcie_err_status)
1560 sts_flags |= PF_ERR_CE;
1561
1562 pf_adjust_for_no_aer(pfd_p);
1563 pf_adjust_for_no_saer(pfd_p);
1564 sts_flags |= pf_analyse_error_tbl(derr,
1565 impl, pfd_p, pcie_pcie_tbl,
1566 PF_MASKED_AER_ERR(pfd_p));
1567 sts_flags |= pf_analyse_error_tbl(derr,
1568 impl, pfd_p, pcie_pcie_bdg_tbl,
1569 PF_MASKED_SAER_ERR(pfd_p));
1570 /*
1571 * Some non-compliant PCIe devices do not utilize PCIe
1572 * error registers. So fallthrough and rely on legacy
1573 * PCI error registers.
1574 */
1575 if ((PCIE_DEVSTS_NFE_DETECTED | PCIE_DEVSTS_FE_DETECTED)
1576 & PCIE_ERR_REG(pfd_p)->pcie_err_status)
1577 break;
1578 /* FALLTHROUGH */
1579 case PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO:
1580 sts_flags |= pf_analyse_error_tbl(derr, impl,
1581 pfd_p, pcie_pci_tbl,
1582 PCI_ERR_REG(pfd_p)->pci_err_status);
1583
1584 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p)))
1585 break;
1586
1587 sts_flags |= pf_analyse_error_tbl(derr,
1588 impl, pfd_p, pcie_pci_bdg_tbl,
1589 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat);
1590 }
1591
1592 pfd_p->pe_severity_flags = sts_flags;
1593
1594 done:
1595 pfd_p->pe_orig_severity_flags = pfd_p->pe_severity_flags;
1596 /* Have pciev_eh adjust the severity */
1597 pfd_p->pe_severity_flags = pciev_eh(pfd_p, impl);
1598
1599 pfd_p->pe_severity_flags &= ~pfd_p->pe_severity_mask;
1600
1601 error_flags |= pfd_p->pe_severity_flags;
1602 }
1603
1604 return (error_flags);
1605 }
1606
1607 static int
pf_analyse_error_tbl(ddi_fm_error_t * derr,pf_impl_t * impl,pf_data_t * pfd_p,const pf_fab_err_tbl_t * tbl,uint32_t err_reg)1608 pf_analyse_error_tbl(ddi_fm_error_t *derr, pf_impl_t *impl,
1609 pf_data_t *pfd_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg)
1610 {
1611 const pf_fab_err_tbl_t *row;
1612 int err = 0;
1613 uint16_t flags;
1614 uint32_t bit;
1615
1616 for (row = tbl; err_reg && (row->bit != 0); row++) {
1617 bit = row->bit;
1618 if (!(err_reg & bit))
1619 continue;
1620 err |= row->handler(derr, bit, impl->pf_dq_head_p, pfd_p);
1621
1622 flags = row->affected_flags;
1623 /*
1624 * check if the primary flag is valid;
1625 * if not, use the secondary flag
1626 */
1627 if (flags & PF_AFFECTED_AER) {
1628 if (!HAS_AER_LOGS(pfd_p, bit)) {
1629 flags = row->sec_affected_flags;
1630 }
1631 } else if (flags & PF_AFFECTED_SAER) {
1632 if (!HAS_SAER_LOGS(pfd_p, bit)) {
1633 flags = row->sec_affected_flags;
1634 }
1635 } else if (flags & PF_AFFECTED_ADDR) {
1636 /* only Root has this flag */
1637 if (PCIE_ROOT_FAULT(pfd_p)->scan_addr == 0) {
1638 flags = row->sec_affected_flags;
1639 }
1640 }
1641
1642 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags |= flags;
1643 }
1644
1645 if (!err)
1646 err = PF_ERR_NO_ERROR;
1647
1648 return (err);
1649 }
1650
1651 /*
1652 * PCIe Completer Abort and Unsupport Request error analyser. If a PCIe device
1653 * issues a CA/UR a corresponding Received CA/UR should have been seen in the
1654 * PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so then
1655 * this error may be safely ignored. If not check the logs and see if an
1656 * associated handler for this transaction can be found.
1657 */
1658 /* ARGSUSED */
1659 static int
pf_analyse_ca_ur(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1660 pf_analyse_ca_ur(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1661 pf_data_t *pfd_p)
1662 {
1663 uint32_t abort_type;
1664 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1665
1666 /* If UR's are masked forgive this error */
1667 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1668 (bit == PCIE_AER_UCE_UR))
1669 return (PF_ERR_NO_PANIC);
1670
1671 /*
1672 * If a RP has an CA/UR it means a leaf sent a bad request to the RP
1673 * such as a config read or a bad DMA address.
1674 */
1675 if (PCIE_IS_RP(PCIE_PFD2BUS(pfd_p)))
1676 goto handle_lookup;
1677
1678 if (bit == PCIE_AER_UCE_UR)
1679 abort_type = PCI_STAT_R_MAST_AB;
1680 else
1681 abort_type = PCI_STAT_R_TARG_AB;
1682
1683 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type))
1684 return (PF_ERR_MATCHED_RC);
1685
1686 handle_lookup:
1687 if (HAS_AER_LOGS(pfd_p, bit) &&
1688 pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) == PF_HDL_FOUND)
1689 return (PF_ERR_MATCHED_DEVICE);
1690
1691 return (PF_ERR_PANIC);
1692 }
1693
1694 /*
1695 * PCIe-PCI Bridge Received Master Abort and Target error analyser. If a PCIe
1696 * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in
1697 * the PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so
1698 * then this error may be safely ignored. If not check the logs and see if an
1699 * associated handler for this transaction can be found.
1700 */
1701 /* ARGSUSED */
1702 static int
pf_analyse_ma_ta(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1703 pf_analyse_ma_ta(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1704 pf_data_t *pfd_p)
1705 {
1706 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1707 uint32_t abort_type;
1708
1709 /* If UR's are masked forgive this error */
1710 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1711 (bit == PCIE_AER_SUCE_RCVD_MA))
1712 return (PF_ERR_NO_PANIC);
1713
1714 if (bit == PCIE_AER_SUCE_RCVD_MA)
1715 abort_type = PCI_STAT_R_MAST_AB;
1716 else
1717 abort_type = PCI_STAT_R_TARG_AB;
1718
1719 if (pf_matched_in_rc(dq_head_p, pfd_p, abort_type))
1720 return (PF_ERR_MATCHED_RC);
1721
1722 if (!HAS_SAER_LOGS(pfd_p, bit))
1723 return (PF_ERR_PANIC);
1724
1725 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND)
1726 return (PF_ERR_MATCHED_DEVICE);
1727
1728 return (PF_ERR_PANIC);
1729 }
1730
1731 /*
1732 * Generic PCI error analyser. This function is used for Parity Errors,
1733 * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts.
1734 * In general PCI devices do not have error logs, it is very difficult to figure
1735 * out what transaction caused the error. Instead find the nearest PCIe-PCI
1736 * Bridge and check to see if it has logs and if it has an error associated with
1737 * this PCI Device.
1738 */
1739 /* ARGSUSED */
1740 static int
pf_analyse_pci(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1741 pf_analyse_pci(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1742 pf_data_t *pfd_p)
1743 {
1744 pf_data_t *parent_pfd_p;
1745 uint16_t cmd;
1746 uint32_t aer_ue_status;
1747 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
1748 pf_pcie_adv_bdg_err_regs_t *parent_saer_p;
1749
1750 if (PCI_ERR_REG(pfd_p)->pci_err_status & PCI_STAT_S_SYSERR)
1751 return (PF_ERR_PANIC);
1752
1753 /* If UR's are masked forgive this error */
1754 if ((pcie_get_aer_uce_mask() & PCIE_AER_UCE_UR) &&
1755 (bit == PCI_STAT_R_MAST_AB))
1756 return (PF_ERR_NO_PANIC);
1757
1758
1759 if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) {
1760 aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT;
1761 } else {
1762 aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC |
1763 PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA |
1764 PCIE_AER_SUCE_RCVD_MA);
1765 }
1766
1767 parent_pfd_p = pf_get_parent_pcie_bridge(pfd_p);
1768 if (parent_pfd_p == NULL)
1769 return (PF_ERR_PANIC);
1770
1771 /* Check if parent bridge has seen this error */
1772 parent_saer_p = PCIE_ADV_BDG_REG(parent_pfd_p);
1773 if (!(parent_saer_p->pcie_sue_status & aer_ue_status) ||
1774 !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status))
1775 return (PF_ERR_PANIC);
1776
1777 /*
1778 * If the addr or bdf from the parent PCIe bridge logs belong to this
1779 * PCI device, assume the PCIe bridge's error handling has already taken
1780 * care of this PCI device's error.
1781 */
1782 if (pf_pci_decode(parent_pfd_p, &cmd) != DDI_SUCCESS)
1783 return (PF_ERR_PANIC);
1784
1785 if ((parent_saer_p->pcie_sue_tgt_bdf == bus_p->bus_bdf) ||
1786 pf_in_addr_range(bus_p, parent_saer_p->pcie_sue_tgt_addr))
1787 return (PF_ERR_MATCHED_PARENT);
1788
1789 /*
1790 * If this device is a PCI-PCI bridge, check if the bdf in the parent
1791 * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges.
1792 * If they are, then assume the PCIe bridge's error handling has already
1793 * taken care of this PCI-PCI bridge device's error.
1794 */
1795 if (PCIE_IS_BDG(bus_p) &&
1796 pf_in_bus_range(bus_p, parent_saer_p->pcie_sue_tgt_bdf))
1797 return (PF_ERR_MATCHED_PARENT);
1798
1799 return (PF_ERR_PANIC);
1800 }
1801
1802 /*
1803 * PCIe Bridge transactions associated with PERR.
1804 * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe
1805 * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe
1806 * o Bridge received a poisoned Completion on a Split Transction from PCIe
1807 * o Bridge received a poisoned Completion on a Delayed Transction from PCIe
1808 *
1809 * Check for non-poisoned PCIe transactions that got forwarded to the secondary
1810 * side and detects a PERR#. Except for delayed read completions, a poisoned
1811 * TLP will be forwarded to the secondary bus and PERR# will be asserted.
1812 */
1813 /* ARGSUSED */
1814 static int
pf_analyse_perr_assert(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1815 pf_analyse_perr_assert(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1816 pf_data_t *pfd_p)
1817 {
1818 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1819 uint16_t cmd;
1820 int hdl_sts = PF_HDL_NOTFOUND;
1821 int err = PF_ERR_NO_ERROR;
1822 pf_pcie_adv_bdg_err_regs_t *saer_p;
1823
1824
1825 if (HAS_SAER_LOGS(pfd_p, bit)) {
1826 saer_p = PCIE_ADV_BDG_REG(pfd_p);
1827 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS)
1828 return (PF_ERR_PANIC);
1829
1830 cmd_switch:
1831 switch (cmd) {
1832 case PCI_PCIX_CMD_IOWR:
1833 case PCI_PCIX_CMD_MEMWR:
1834 case PCI_PCIX_CMD_MEMWR_BL:
1835 case PCI_PCIX_CMD_MEMWRBL:
1836 /* Posted Writes Transactions */
1837 if (saer_p->pcie_sue_tgt_trans == PF_ADDR_PIO)
1838 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1839 B_FALSE);
1840 break;
1841 case PCI_PCIX_CMD_CFWR:
1842 /*
1843 * Check to see if it is a non-posted write. If so, a
1844 * UR Completion would have been sent.
1845 */
1846 if (pf_matched_in_rc(dq_head_p, pfd_p,
1847 PCI_STAT_R_MAST_AB)) {
1848 hdl_sts = PF_HDL_FOUND;
1849 err = PF_ERR_MATCHED_RC;
1850 goto done;
1851 }
1852 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1853 B_FALSE);
1854 break;
1855 case PCI_PCIX_CMD_SPL:
1856 hdl_sts = pf_log_hdl_lookup(rpdip, derr, pfd_p,
1857 B_FALSE);
1858 break;
1859 case PCI_PCIX_CMD_DADR:
1860 cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >>
1861 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) &
1862 PCIE_AER_SUCE_HDR_CMD_UP_MASK;
1863 if (cmd != PCI_PCIX_CMD_DADR)
1864 goto cmd_switch;
1865 /* FALLTHROUGH */
1866 default:
1867 /* Unexpected situation, panic */
1868 hdl_sts = PF_HDL_NOTFOUND;
1869 }
1870
1871 if (hdl_sts == PF_HDL_FOUND)
1872 err = PF_ERR_MATCHED_DEVICE;
1873 else
1874 err = PF_ERR_PANIC;
1875 } else {
1876 /*
1877 * Check to see if it is a non-posted write. If so, a UR
1878 * Completion would have been sent.
1879 */
1880 if ((PCIE_ERR_REG(pfd_p)->pcie_err_status &
1881 PCIE_DEVSTS_UR_DETECTED) &&
1882 pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_R_MAST_AB))
1883 err = PF_ERR_MATCHED_RC;
1884
1885 /* Check for posted writes. Transaction is lost. */
1886 if (PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat &
1887 PCI_STAT_S_PERROR)
1888 err = PF_ERR_PANIC;
1889
1890 /*
1891 * All other scenarios are due to read completions. Check for
1892 * PERR on the primary side. If found the primary side error
1893 * handling will take care of this error.
1894 */
1895 if (err == PF_ERR_NO_ERROR) {
1896 if (PCI_ERR_REG(pfd_p)->pci_err_status &
1897 PCI_STAT_PERROR)
1898 err = PF_ERR_MATCHED_PARENT;
1899 else
1900 err = PF_ERR_PANIC;
1901 }
1902 }
1903
1904 done:
1905 return (err);
1906 }
1907
1908 /*
1909 * PCIe Poisoned TLP error analyser. If a PCIe device receives a Poisoned TLP,
1910 * check the logs and see if an associated handler for this transaction can be
1911 * found.
1912 */
1913 /* ARGSUSED */
1914 static int
pf_analyse_ptlp(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)1915 pf_analyse_ptlp(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
1916 pf_data_t *pfd_p)
1917 {
1918 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
1919
1920 /*
1921 * If AERs are supported find the logs in this device, otherwise look in
1922 * it's parent's logs.
1923 */
1924 if (HAS_AER_LOGS(pfd_p, bit)) {
1925 pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&PCIE_ADV_HDR(pfd_p, 0);
1926
1927 /*
1928 * Double check that the log contains a poisoned TLP.
1929 * Some devices like PLX switch do not log poison TLP headers.
1930 */
1931 if (hdr->ep) {
1932 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_TRUE) ==
1933 PF_HDL_FOUND)
1934 return (PF_ERR_MATCHED_DEVICE);
1935 }
1936
1937 /*
1938 * If an address is found and hdl lookup failed panic.
1939 * Otherwise check parents to see if there was enough
1940 * information recover.
1941 */
1942 if (PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr)
1943 return (PF_ERR_PANIC);
1944 }
1945
1946 /*
1947 * Check to see if the rc has already handled this error or a parent has
1948 * already handled this error.
1949 *
1950 * If the error info in the RC wasn't enough to find the fault device,
1951 * such as if the faulting device lies behind a PCIe-PCI bridge from a
1952 * poisoned completion, check to see if the PCIe-PCI bridge has enough
1953 * info to recover. For completion TLP's, the AER header logs only
1954 * contain the faulting BDF in the Root Port. For PCIe device the fault
1955 * BDF is the fault device. But if the fault device is behind a
1956 * PCIe-PCI bridge the fault BDF could turn out just to be a PCIe-PCI
1957 * bridge's secondary bus number.
1958 */
1959 if (!PFD_IS_ROOT(pfd_p)) {
1960 dev_info_t *pdip = ddi_get_parent(PCIE_PFD2DIP(pfd_p));
1961 pf_data_t *parent_pfd_p;
1962
1963 if (PCIE_PFD2BUS(pfd_p)->bus_rp_dip == pdip) {
1964 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR))
1965 return (PF_ERR_MATCHED_RC);
1966 }
1967
1968 parent_pfd_p = PCIE_DIP2PFD(pdip);
1969
1970 if (HAS_AER_LOGS(parent_pfd_p, bit))
1971 return (PF_ERR_MATCHED_PARENT);
1972 } else {
1973 pf_data_t *bdg_pfd_p;
1974 pcie_req_id_t secbus;
1975
1976 /*
1977 * Looking for a pcie bridge only makes sense if the BDF
1978 * Dev/Func = 0/0
1979 */
1980 if (!PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
1981 goto done;
1982
1983 secbus = PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf;
1984
1985 if (!PCIE_CHECK_VALID_BDF(secbus) || (secbus & 0xFF))
1986 goto done;
1987
1988 bdg_pfd_p = pf_get_pcie_bridge(pfd_p, secbus);
1989
1990 if (bdg_pfd_p && HAS_SAER_LOGS(bdg_pfd_p,
1991 PCIE_AER_SUCE_PERR_ASSERT)) {
1992 return pf_analyse_perr_assert(derr,
1993 PCIE_AER_SUCE_PERR_ASSERT, dq_head_p, pfd_p);
1994 }
1995 }
1996 done:
1997 return (PF_ERR_PANIC);
1998 }
1999
2000 /*
2001 * PCIe-PCI Bridge Received Master and Target abort error analyser on Split
2002 * Completions. If a PCIe Bridge receives a MA/TA check logs and see if an
2003 * associated handler for this transaction can be found.
2004 */
2005 /* ARGSUSED */
2006 static int
pf_analyse_sc(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2007 pf_analyse_sc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2008 pf_data_t *pfd_p)
2009 {
2010 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2011 uint16_t cmd;
2012 int sts = PF_HDL_NOTFOUND;
2013
2014 if (!HAS_SAER_LOGS(pfd_p, bit))
2015 return (PF_ERR_PANIC);
2016
2017 if (pf_pci_decode(pfd_p, &cmd) != DDI_SUCCESS)
2018 return (PF_ERR_PANIC);
2019
2020 if (cmd == PCI_PCIX_CMD_SPL)
2021 sts = pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE);
2022
2023 if (sts == PF_HDL_FOUND)
2024 return (PF_ERR_MATCHED_DEVICE);
2025
2026 return (PF_ERR_PANIC);
2027 }
2028
2029 /*
2030 * PCIe Timeout error analyser. This error can be forgiven if it is marked as
2031 * CE Advisory. If it is marked as advisory, this means the HW can recover
2032 * and/or retry the transaction automatically. Additionally, if a device's
2033 * parent slot reports that it is no longer physically present, we do not panic,
2034 * as one would not expect a missing device to respond to a command.
2035 */
2036 /* ARGSUSED */
2037 static int
pf_analyse_to(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2038 pf_analyse_to(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2039 pf_data_t *pfd_p)
2040 {
2041 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2042 pf_data_t *rppfd = PCIE_DIP2PFD(rpdip);
2043 pf_pcie_slot_regs_t *p_pcie_slot_regs;
2044
2045 if (HAS_AER_LOGS(pfd_p, bit) && CE_ADVISORY(pfd_p))
2046 return (PF_ERR_NO_PANIC);
2047
2048 p_pcie_slot_regs = PCIE_SLOT_REG(rppfd);
2049 if (p_pcie_slot_regs->pcie_slot_regs_valid) {
2050 /*
2051 * If the device is reported gone from its parent slot, then it
2052 * is expected that any outstanding commands would time out. In
2053 * this case, do not panic.
2054 */
2055 if ((p_pcie_slot_regs->pcie_slot_status &
2056 PCIE_SLOTSTS_PRESENCE_DETECTED) == 0x0) {
2057 return (PF_ERR_NO_PANIC);
2058 }
2059 }
2060
2061 return (PF_ERR_PANIC);
2062 }
2063
2064 /*
2065 * PCIe Unexpected Completion. Check to see if this TLP was misrouted by
2066 * matching the device BDF with the TLP Log. If misrouting panic, otherwise
2067 * don't panic.
2068 */
2069 /* ARGSUSED */
2070 static int
pf_analyse_uc(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2071 pf_analyse_uc(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2072 pf_data_t *pfd_p)
2073 {
2074 if (HAS_AER_LOGS(pfd_p, bit) &&
2075 (PCIE_PFD2BUS(pfd_p)->bus_bdf == (PCIE_ADV_HDR(pfd_p, 2) >> 16)))
2076 return (PF_ERR_NO_PANIC);
2077
2078 /*
2079 * This is a case of mis-routing. Any of the switches above this
2080 * device could be at fault.
2081 */
2082 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_ROOT;
2083
2084 return (PF_ERR_PANIC);
2085 }
2086
2087 /*
2088 * PCIe-PCI Bridge Uncorrectable Data error analyser. All Uncorrectable Data
2089 * errors should have resulted in a PCIe Poisoned TLP to the RC, except for
2090 * Posted Writes. Check the logs for Posted Writes and if the RC did not see a
2091 * Poisoned TLP.
2092 *
2093 * Non-Posted Writes will also generate a UR in the completion status, which the
2094 * RC should also see.
2095 */
2096 /* ARGSUSED */
2097 static int
pf_analyse_uc_data(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2098 pf_analyse_uc_data(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2099 pf_data_t *pfd_p)
2100 {
2101 dev_info_t *rpdip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2102
2103 if (!HAS_SAER_LOGS(pfd_p, bit))
2104 return (PF_ERR_PANIC);
2105
2106 if (pf_matched_in_rc(dq_head_p, pfd_p, PCI_STAT_PERROR))
2107 return (PF_ERR_MATCHED_RC);
2108
2109 if (pf_log_hdl_lookup(rpdip, derr, pfd_p, B_FALSE) == PF_HDL_FOUND)
2110 return (PF_ERR_MATCHED_DEVICE);
2111
2112 return (PF_ERR_PANIC);
2113 }
2114
2115 /* ARGSUSED */
2116 static int
pf_no_panic(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2117 pf_no_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2118 pf_data_t *pfd_p)
2119 {
2120 return (PF_ERR_NO_PANIC);
2121 }
2122
2123 /* ARGSUSED */
2124 static int
pf_panic(ddi_fm_error_t * derr,uint32_t bit,pf_data_t * dq_head_p,pf_data_t * pfd_p)2125 pf_panic(ddi_fm_error_t *derr, uint32_t bit, pf_data_t *dq_head_p,
2126 pf_data_t *pfd_p)
2127 {
2128 return (PF_ERR_PANIC);
2129 }
2130
2131 /*
2132 * If a PCIe device does not support AER, assume all AER statuses have been set,
2133 * unless other registers do not indicate a certain error occuring.
2134 */
2135 static void
pf_adjust_for_no_aer(pf_data_t * pfd_p)2136 pf_adjust_for_no_aer(pf_data_t *pfd_p)
2137 {
2138 uint32_t aer_ue = 0;
2139 uint16_t status;
2140
2141 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
2142 return;
2143
2144 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED)
2145 aer_ue = PF_AER_FATAL_ERR;
2146
2147 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) {
2148 aer_ue = PF_AER_NON_FATAL_ERR;
2149 status = PCI_ERR_REG(pfd_p)->pci_err_status;
2150
2151 /* Check if the device received a PTLP */
2152 if (!(status & PCI_STAT_PERROR))
2153 aer_ue &= ~PCIE_AER_UCE_PTLP;
2154
2155 /* Check if the device signaled a CA */
2156 if (!(status & PCI_STAT_S_TARG_AB))
2157 aer_ue &= ~PCIE_AER_UCE_CA;
2158
2159 /* Check if the device sent a UR */
2160 if (!(PCIE_ERR_REG(pfd_p)->pcie_err_status &
2161 PCIE_DEVSTS_UR_DETECTED))
2162 aer_ue &= ~PCIE_AER_UCE_UR;
2163
2164 /*
2165 * Ignore ECRCs as it is optional and will manefest itself as
2166 * another error like PTLP and MFP
2167 */
2168 aer_ue &= ~PCIE_AER_UCE_ECRC;
2169
2170 /*
2171 * Generally if NFE is set, SERR should also be set. Exception:
2172 * When certain non-fatal errors are masked, and some of them
2173 * happened to be the cause of the NFE, SERR will not be set and
2174 * they can not be the source of this interrupt.
2175 *
2176 * On x86, URs are masked (NFE + UR can be set), if any other
2177 * non-fatal errors (i.e, PTLP, CTO, CA, UC, ECRC, ACS) did
2178 * occur, SERR should be set since they are not masked. So if
2179 * SERR is not set, none of them occurred.
2180 */
2181 if (!(status & PCI_STAT_S_SYSERR))
2182 aer_ue &= ~PCIE_AER_UCE_TO;
2183 }
2184
2185 if (!PCIE_IS_BDG(PCIE_PFD2BUS(pfd_p))) {
2186 aer_ue &= ~PCIE_AER_UCE_TRAINING;
2187 aer_ue &= ~PCIE_AER_UCE_SD;
2188 }
2189
2190 PCIE_ADV_REG(pfd_p)->pcie_ue_status = aer_ue;
2191 }
2192
2193 static void
pf_adjust_for_no_saer(pf_data_t * pfd_p)2194 pf_adjust_for_no_saer(pf_data_t *pfd_p)
2195 {
2196 uint32_t s_aer_ue = 0;
2197 uint16_t status;
2198
2199 if (PCIE_HAS_AER(PCIE_PFD2BUS(pfd_p)))
2200 return;
2201
2202 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_FE_DETECTED)
2203 s_aer_ue = PF_SAER_FATAL_ERR;
2204
2205 if (PCIE_ERR_REG(pfd_p)->pcie_err_status & PCIE_DEVSTS_NFE_DETECTED) {
2206 s_aer_ue = PF_SAER_NON_FATAL_ERR;
2207 status = PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat;
2208
2209 /* Check if the device received a UC_DATA */
2210 if (!(status & PCI_STAT_PERROR))
2211 s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR;
2212
2213 /* Check if the device received a RCVD_MA/MA_ON_SC */
2214 if (!(status & (PCI_STAT_R_MAST_AB))) {
2215 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA;
2216 s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC;
2217 }
2218
2219 /* Check if the device received a RCVD_TA/TA_ON_SC */
2220 if (!(status & (PCI_STAT_R_TARG_AB))) {
2221 s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA;
2222 s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC;
2223 }
2224 }
2225
2226 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status = s_aer_ue;
2227 }
2228
2229 /* Find the PCIe-PCI bridge based on secondary bus number */
2230 static pf_data_t *
pf_get_pcie_bridge(pf_data_t * pfd_p,pcie_req_id_t secbus)2231 pf_get_pcie_bridge(pf_data_t *pfd_p, pcie_req_id_t secbus)
2232 {
2233 pf_data_t *bdg_pfd_p;
2234
2235 /* Search down for the PCIe-PCI device. */
2236 for (bdg_pfd_p = pfd_p->pe_next; bdg_pfd_p;
2237 bdg_pfd_p = bdg_pfd_p->pe_next) {
2238 if (PCIE_IS_PCIE_BDG(PCIE_PFD2BUS(bdg_pfd_p)) &&
2239 PCIE_PFD2BUS(bdg_pfd_p)->bus_bdg_secbus == secbus)
2240 return (bdg_pfd_p);
2241 }
2242
2243 return (NULL);
2244 }
2245
2246 /* Find the PCIe-PCI bridge of a PCI device */
2247 static pf_data_t *
pf_get_parent_pcie_bridge(pf_data_t * pfd_p)2248 pf_get_parent_pcie_bridge(pf_data_t *pfd_p)
2249 {
2250 dev_info_t *dip, *rp_dip = PCIE_PFD2BUS(pfd_p)->bus_rp_dip;
2251
2252 /* This only makes sense if the device is a PCI device */
2253 if (!PCIE_IS_PCI(PCIE_PFD2BUS(pfd_p)))
2254 return (NULL);
2255
2256 /*
2257 * Search up for the PCIe-PCI device. Watchout for x86 where pci
2258 * devices hang directly off of NPE.
2259 */
2260 for (dip = PCIE_PFD2DIP(pfd_p); dip; dip = ddi_get_parent(dip)) {
2261 if (dip == rp_dip)
2262 dip = NULL;
2263
2264 if (PCIE_IS_PCIE_BDG(PCIE_DIP2BUS(dip)))
2265 return (PCIE_DIP2PFD(dip));
2266 }
2267
2268 return (NULL);
2269 }
2270
2271 /*
2272 * See if a leaf error was bubbled up to the Root Complex (RC) and handled.
2273 * As of right now only RC's have enough information to have errors found in the
2274 * fabric to be matched to the RC. Note that Root Port's (RP) do not carry
2275 * enough information. Currently known RC's are SPARC Fire architecture and
2276 * it's equivalents, and x86's NPE.
2277 * SPARC Fire architectures have a plethora of error registers, while currently
2278 * NPE only have the address of a failed load.
2279 *
2280 * Check if the RC logged an error with the appropriate status type/abort type.
2281 * Ex: Parity Error, Received Master/Target Abort
2282 * Check if either the fault address found in the rc matches the device's
2283 * assigned address range (PIO's only) or the fault BDF in the rc matches the
2284 * device's BDF or Secondary Bus/Bus Range.
2285 */
2286 static boolean_t
pf_matched_in_rc(pf_data_t * dq_head_p,pf_data_t * pfd_p,uint32_t abort_type)2287 pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p,
2288 uint32_t abort_type)
2289 {
2290 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
2291 pf_data_t *rc_pfd_p;
2292 pcie_req_id_t fault_bdf;
2293
2294 for (rc_pfd_p = dq_head_p; PFD_IS_ROOT(rc_pfd_p);
2295 rc_pfd_p = rc_pfd_p->pe_next) {
2296 /* Only root complex's have enough information to match */
2297 if (!PCIE_IS_RC(PCIE_PFD2BUS(rc_pfd_p)))
2298 continue;
2299
2300 /* If device and rc abort type does not match continue */
2301 if (!(PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat & abort_type))
2302 continue;
2303
2304 fault_bdf = PCIE_ROOT_FAULT(rc_pfd_p)->scan_bdf;
2305
2306 /* The Fault BDF = Device's BDF */
2307 if (fault_bdf == bus_p->bus_bdf)
2308 return (B_TRUE);
2309
2310 /* The Fault Addr is in device's address range */
2311 if (pf_in_addr_range(bus_p,
2312 PCIE_ROOT_FAULT(rc_pfd_p)->scan_addr))
2313 return (B_TRUE);
2314
2315 /* The Fault BDF is from PCIe-PCI Bridge's secondary bus */
2316 if (PCIE_IS_PCIE_BDG(bus_p) &&
2317 pf_in_bus_range(bus_p, fault_bdf))
2318 return (B_TRUE);
2319 }
2320
2321 return (B_FALSE);
2322 }
2323
2324 /*
2325 * Check the RP and see if the error is PIO/DMA. If the RP also has a PERR then
2326 * it is a DMA, otherwise it's a PIO
2327 */
2328 static void
pf_pci_find_trans_type(pf_data_t * pfd_p,uint64_t * addr,uint32_t * trans_type,pcie_req_id_t * bdf)2329 pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type,
2330 pcie_req_id_t *bdf)
2331 {
2332 pf_data_t *rc_pfd_p;
2333
2334 /* Could be DMA or PIO. Find out by look at error type. */
2335 switch (PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status) {
2336 case PCIE_AER_SUCE_TA_ON_SC:
2337 case PCIE_AER_SUCE_MA_ON_SC:
2338 *trans_type = PF_ADDR_DMA;
2339 return;
2340 case PCIE_AER_SUCE_RCVD_TA:
2341 case PCIE_AER_SUCE_RCVD_MA:
2342 *bdf = PCIE_INVALID_BDF;
2343 *trans_type = PF_ADDR_PIO;
2344 return;
2345 case PCIE_AER_SUCE_USC_ERR:
2346 case PCIE_AER_SUCE_UC_DATA_ERR:
2347 case PCIE_AER_SUCE_PERR_ASSERT:
2348 break;
2349 default:
2350 *addr = 0;
2351 *bdf = PCIE_INVALID_BDF;
2352 *trans_type = 0;
2353 return;
2354 }
2355
2356 *bdf = PCIE_INVALID_BDF;
2357 *trans_type = PF_ADDR_PIO;
2358 for (rc_pfd_p = pfd_p->pe_prev; rc_pfd_p;
2359 rc_pfd_p = rc_pfd_p->pe_prev) {
2360 if (PFD_IS_ROOT(rc_pfd_p) &&
2361 (PCI_BDG_ERR_REG(rc_pfd_p)->pci_bdg_sec_stat &
2362 PCI_STAT_PERROR)) {
2363 *trans_type = PF_ADDR_DMA;
2364 return;
2365 }
2366 }
2367 }
2368
2369 /*
2370 * pf_pci_decode function decodes the secondary aer transaction logs in
2371 * PCIe-PCI bridges.
2372 *
2373 * The log is 128 bits long and arranged in this manner.
2374 * [0:35] Transaction Attribute (s_aer_h0-saer_h1)
2375 * [36:39] Transaction lower command (saer_h1)
2376 * [40:43] Transaction upper command (saer_h1)
2377 * [44:63] Reserved
2378 * [64:127] Address (saer_h2-saer_h3)
2379 */
2380 /* ARGSUSED */
2381 int
pf_pci_decode(pf_data_t * pfd_p,uint16_t * cmd)2382 pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd)
2383 {
2384 pcix_attr_t *attr;
2385 uint64_t addr;
2386 uint32_t trans_type;
2387 pcie_req_id_t bdf = PCIE_INVALID_BDF;
2388
2389 attr = (pcix_attr_t *)&PCIE_ADV_BDG_HDR(pfd_p, 0);
2390 *cmd = GET_SAER_CMD(pfd_p);
2391
2392 cmd_switch:
2393 switch (*cmd) {
2394 case PCI_PCIX_CMD_IORD:
2395 case PCI_PCIX_CMD_IOWR:
2396 /* IO Access should always be down stream */
2397 addr = PCIE_ADV_BDG_HDR(pfd_p, 2);
2398 bdf = attr->rid;
2399 trans_type = PF_ADDR_PIO;
2400 break;
2401 case PCI_PCIX_CMD_MEMRD_DW:
2402 case PCI_PCIX_CMD_MEMRD_BL:
2403 case PCI_PCIX_CMD_MEMRDBL:
2404 case PCI_PCIX_CMD_MEMWR:
2405 case PCI_PCIX_CMD_MEMWR_BL:
2406 case PCI_PCIX_CMD_MEMWRBL:
2407 addr = ((uint64_t)PCIE_ADV_BDG_HDR(pfd_p, 3) <<
2408 PCIE_AER_SUCE_HDR_ADDR_SHIFT) | PCIE_ADV_BDG_HDR(pfd_p, 2);
2409 bdf = attr->rid;
2410
2411 pf_pci_find_trans_type(pfd_p, &addr, &trans_type, &bdf);
2412 break;
2413 case PCI_PCIX_CMD_CFRD:
2414 case PCI_PCIX_CMD_CFWR:
2415 /*
2416 * CFG Access should always be down stream. Match the BDF in
2417 * the address phase.
2418 */
2419 addr = 0;
2420 bdf = attr->rid;
2421 trans_type = PF_ADDR_CFG;
2422 break;
2423 case PCI_PCIX_CMD_SPL:
2424 /*
2425 * Check for DMA read completions. The requesting BDF is in the
2426 * Address phase.
2427 */
2428 addr = 0;
2429 bdf = attr->rid;
2430 trans_type = PF_ADDR_DMA;
2431 break;
2432 case PCI_PCIX_CMD_DADR:
2433 /*
2434 * For Dual Address Cycles the transaction command is in the 2nd
2435 * address phase.
2436 */
2437 *cmd = (PCIE_ADV_BDG_HDR(pfd_p, 1) >>
2438 PCIE_AER_SUCE_HDR_CMD_UP_SHIFT) &
2439 PCIE_AER_SUCE_HDR_CMD_UP_MASK;
2440 if (*cmd != PCI_PCIX_CMD_DADR)
2441 goto cmd_switch;
2442 /* FALLTHROUGH */
2443 default:
2444 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0;
2445 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = PCIE_INVALID_BDF;
2446 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0;
2447 return (DDI_FAILURE);
2448 }
2449 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = trans_type;
2450 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf = bdf;
2451 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = addr;
2452 return (DDI_SUCCESS);
2453 }
2454
2455 /*
2456 * Based on either the BDF/ADDR find and mark the faulting DMA/ACC handler.
2457 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
2458 */
2459 int
pf_hdl_lookup(dev_info_t * dip,uint64_t ena,uint32_t flag,uint64_t addr,pcie_req_id_t bdf)2460 pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint64_t addr,
2461 pcie_req_id_t bdf)
2462 {
2463 ddi_fm_error_t derr;
2464
2465 /* If we don't know the addr or rid just return with NOTFOUND */
2466 if ((addr == 0) && !PCIE_CHECK_VALID_BDF(bdf))
2467 return (PF_HDL_NOTFOUND);
2468
2469 /*
2470 * Disable DMA handle lookup until DMA errors can be handled and
2471 * reported synchronously. When enabled again, check for the
2472 * PF_ADDR_DMA flag
2473 */
2474 if (!(flag & (PF_ADDR_PIO | PF_ADDR_CFG))) {
2475 return (PF_HDL_NOTFOUND);
2476 }
2477
2478 bzero(&derr, sizeof (ddi_fm_error_t));
2479 derr.fme_version = DDI_FME_VERSION;
2480 derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
2481 derr.fme_ena = ena;
2482
2483 return (pf_hdl_child_lookup(dip, &derr, flag, addr, bdf));
2484 }
2485
2486 static int
pf_hdl_child_lookup(dev_info_t * dip,ddi_fm_error_t * derr,uint32_t flag,uint64_t addr,pcie_req_id_t bdf)2487 pf_hdl_child_lookup(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
2488 uint64_t addr, pcie_req_id_t bdf)
2489 {
2490 int status = PF_HDL_NOTFOUND;
2491 ndi_fmc_t *fcp = NULL;
2492 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2493 pcie_req_id_t dip_bdf;
2494 boolean_t have_lock = B_FALSE;
2495 pcie_bus_t *bus_p;
2496 dev_info_t *cdip;
2497
2498 if (!(bus_p = pf_is_ready(dip))) {
2499 return (status);
2500 }
2501
2502 ASSERT(fmhdl);
2503 if (!i_ddi_fm_handler_owned(dip)) {
2504 /*
2505 * pf_handler_enter always returns SUCCESS if the 'impl' arg is
2506 * NULL.
2507 */
2508 (void) pf_handler_enter(dip, NULL);
2509 have_lock = B_TRUE;
2510 }
2511
2512 dip_bdf = PCI_GET_BDF(dip);
2513
2514 /* Check if dip and BDF match, if not recurse to it's children. */
2515 if (!PCIE_IS_RC(bus_p) && (!PCIE_CHECK_VALID_BDF(bdf) ||
2516 dip_bdf == bdf)) {
2517 if ((flag & PF_ADDR_DMA) && DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap))
2518 fcp = fmhdl->fh_dma_cache;
2519 else
2520 fcp = NULL;
2521
2522 if (fcp)
2523 status = pf_hdl_compare(dip, derr, DMA_HANDLE, addr,
2524 bdf, fcp);
2525
2526
2527 if (((flag & PF_ADDR_PIO) || (flag & PF_ADDR_CFG)) &&
2528 DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap))
2529 fcp = fmhdl->fh_acc_cache;
2530 else
2531 fcp = NULL;
2532
2533 if (fcp)
2534 status = pf_hdl_compare(dip, derr, ACC_HANDLE, addr,
2535 bdf, fcp);
2536 }
2537
2538 /* If we found the handler or know it's this device, we're done */
2539 if (!PCIE_IS_RC(bus_p) && ((dip_bdf == bdf) ||
2540 (status == PF_HDL_FOUND)))
2541 goto done;
2542
2543 /*
2544 * If the current devuce us a PCIe-PCI bridge need to check for special
2545 * cases:
2546 *
2547 * If it is a PIO and we don't have an address or this is a DMA, check
2548 * to see if the BDF = secondary bus. If so stop. The BDF isn't a real
2549 * BDF and the fault device could have come from any device in the PCI
2550 * bus.
2551 */
2552 if (PCIE_IS_PCIE_BDG(bus_p) &&
2553 ((flag & PF_ADDR_DMA || flag & PF_ADDR_PIO)) &&
2554 ((bus_p->bus_bdg_secbus << PCIE_REQ_ID_BUS_SHIFT) == bdf))
2555 goto done;
2556
2557
2558 /* If we can't find the handler check it's children */
2559 for (cdip = ddi_get_child(dip); cdip;
2560 cdip = ddi_get_next_sibling(cdip)) {
2561 if ((bus_p = PCIE_DIP2BUS(cdip)) == NULL)
2562 continue;
2563
2564 if (pf_in_bus_range(bus_p, bdf) ||
2565 pf_in_addr_range(bus_p, addr))
2566 status = pf_hdl_child_lookup(cdip, derr, flag, addr,
2567 bdf);
2568
2569 if (status == PF_HDL_FOUND)
2570 goto done;
2571 }
2572
2573 done:
2574 if (have_lock == B_TRUE)
2575 pf_handler_exit(dip);
2576
2577 return (status);
2578 }
2579
2580 static int
pf_hdl_compare(dev_info_t * dip,ddi_fm_error_t * derr,uint32_t flag,uint64_t addr,pcie_req_id_t bdf,ndi_fmc_t * fcp)2581 pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
2582 uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp)
2583 {
2584 ndi_fmcentry_t *fep;
2585 int found = 0;
2586 int status;
2587
2588 mutex_enter(&fcp->fc_lock);
2589 for (fep = fcp->fc_head; fep != NULL; fep = fep->fce_next) {
2590 ddi_fmcompare_t compare_func;
2591
2592 /*
2593 * Compare captured error state with handle
2594 * resources. During the comparison and
2595 * subsequent error handling, we block
2596 * attempts to free the cache entry.
2597 */
2598 if (flag == ACC_HANDLE) {
2599 compare_func =
2600 i_ddi_fm_acc_err_cf_get((ddi_acc_handle_t)
2601 fep->fce_resource);
2602 } else {
2603 compare_func =
2604 i_ddi_fm_dma_err_cf_get((ddi_dma_handle_t)
2605 fep->fce_resource);
2606 }
2607
2608 if (compare_func == NULL) /* unbound or not FLAGERR */
2609 continue;
2610
2611 status = compare_func(dip, fep->fce_resource,
2612 (void *)&addr, (void *)&bdf);
2613
2614 if (status == DDI_FM_NONFATAL) {
2615 found++;
2616
2617 /* Set the error for this resource handle */
2618 if (flag == ACC_HANDLE) {
2619 ddi_acc_handle_t ap = fep->fce_resource;
2620
2621 i_ddi_fm_acc_err_set(ap, derr->fme_ena, status,
2622 DDI_FM_ERR_UNEXPECTED);
2623 ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION);
2624 derr->fme_acc_handle = ap;
2625 } else {
2626 ddi_dma_handle_t dp = fep->fce_resource;
2627
2628 i_ddi_fm_dma_err_set(dp, derr->fme_ena, status,
2629 DDI_FM_ERR_UNEXPECTED);
2630 ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION);
2631 derr->fme_dma_handle = dp;
2632 }
2633 }
2634 }
2635 mutex_exit(&fcp->fc_lock);
2636
2637 /*
2638 * If a handler isn't found and we know this is the right device mark
2639 * them all failed.
2640 */
2641 if ((addr != 0) && PCIE_CHECK_VALID_BDF(bdf) && (found == 0)) {
2642 status = pf_hdl_compare(dip, derr, flag, addr, bdf, fcp);
2643 if (status == PF_HDL_FOUND)
2644 found++;
2645 }
2646
2647 return ((found) ? PF_HDL_FOUND : PF_HDL_NOTFOUND);
2648 }
2649
2650 /*
2651 * Automatically decode AER header logs and does a handling look up based on the
2652 * AER header decoding.
2653 *
2654 * For this function only the Primary/Secondary AER Header Logs need to be valid
2655 * in the pfd (PCIe Fault Data) arg.
2656 *
2657 * Returns either PF_HDL_NOTFOUND or PF_HDL_FOUND.
2658 */
2659 /* ARGSUSED */
2660 static int
pf_log_hdl_lookup(dev_info_t * rpdip,ddi_fm_error_t * derr,pf_data_t * pfd_p,boolean_t is_primary)2661 pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p,
2662 boolean_t is_primary)
2663 {
2664 /*
2665 * Disabling this function temporarily until errors can be handled
2666 * synchronously.
2667 *
2668 * This function is currently only called during the middle of a fabric
2669 * scan. If the fabric scan is called synchronously with an error seen
2670 * in the RP/RC, then the related errors in the fabric will have a
2671 * PF_ERR_MATCHED_RC error severity. pf_log_hdl_lookup code will be by
2672 * passed when the severity is PF_ERR_MATCHED_RC. Handle lookup would
2673 * have already happened in RP/RC error handling in a synchronous
2674 * manner. Errors unrelated should panic, because they are being
2675 * handled asynchronously.
2676 *
2677 * If fabric scan is called asynchronously from any RP/RC error, then
2678 * DMA/PIO UE errors seen in the fabric should panic. pf_lop_hdl_lookup
2679 * will return PF_HDL_NOTFOUND to ensure that the system panics.
2680 */
2681 return (PF_HDL_NOTFOUND);
2682 }
2683
2684 /*
2685 * Decodes the TLP and returns the BDF of the handler, address and transaction
2686 * type if known.
2687 *
2688 * Types of TLP logs seen in RC, and what to extract:
2689 *
2690 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
2691 * Memory(PIO) - address, PF_PIO_ADDR
2692 * CFG - Should not occur and result in UR
2693 * Completion(DMA) - Requester BDF, PF_DMA_ADDR
2694 * Completion(PIO) - Requester BDF, PF_PIO_ADDR
2695 *
2696 * Types of TLP logs seen in SW/Leaf, and what to extract:
2697 *
2698 * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR
2699 * Memory(PIO) - address, PF_PIO_ADDR
2700 * CFG - Destined BDF, address, PF_CFG_ADDR
2701 * Completion(DMA) - Requester BDF, PF_DMA_ADDR
2702 * Completion(PIO) - Requester BDF, PF_PIO_ADDR
2703 *
2704 * The adv_reg_p must be passed in separately for use with SPARC RPs. A
2705 * SPARC RP could have multiple AER header logs which cannot be directly
2706 * accessed via the bus_p.
2707 */
2708 int
pf_tlp_decode(pcie_bus_t * bus_p,pf_pcie_adv_err_regs_t * adv_reg_p)2709 pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p)
2710 {
2711 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr;
2712 pcie_req_id_t my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF;
2713 uint64_t flt_addr = 0;
2714 uint32_t flt_trans_type = 0;
2715
2716 adv_reg_p->pcie_ue_tgt_addr = 0;
2717 adv_reg_p->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
2718 adv_reg_p->pcie_ue_tgt_trans = 0;
2719
2720 my_bdf = bus_p->bus_bdf;
2721 switch (tlp_hdr->type) {
2722 case PCIE_TLP_TYPE_IO:
2723 case PCIE_TLP_TYPE_MEM:
2724 case PCIE_TLP_TYPE_MEMLK:
2725 /* Grab the 32/64bit fault address */
2726 if (tlp_hdr->fmt & 0x1) {
2727 flt_addr = ((uint64_t)adv_reg_p->pcie_ue_hdr[2] << 32);
2728 flt_addr |= adv_reg_p->pcie_ue_hdr[3];
2729 } else {
2730 flt_addr = adv_reg_p->pcie_ue_hdr[2];
2731 }
2732
2733 tlp_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[1] >> 16);
2734
2735 /*
2736 * If the req bdf >= this.bdf, then it means the request is this
2737 * device or came from a device below it. Unless this device is
2738 * a PCIe root port then it means is a DMA, otherwise PIO.
2739 */
2740 if ((tlp_bdf >= my_bdf) && !PCIE_IS_ROOT(bus_p)) {
2741 flt_trans_type = PF_ADDR_DMA;
2742 flt_bdf = tlp_bdf;
2743 } else if (PCIE_IS_ROOT(bus_p) &&
2744 (PF_FIRST_AER_ERR(PCIE_AER_UCE_PTLP, adv_reg_p) ||
2745 (PF_FIRST_AER_ERR(PCIE_AER_UCE_CA, adv_reg_p)))) {
2746 flt_trans_type = PF_ADDR_DMA;
2747 flt_bdf = tlp_bdf;
2748 } else {
2749 flt_trans_type = PF_ADDR_PIO;
2750 flt_bdf = PCIE_INVALID_BDF;
2751 }
2752 break;
2753 case PCIE_TLP_TYPE_CFG0:
2754 case PCIE_TLP_TYPE_CFG1:
2755 flt_addr = 0;
2756 flt_bdf = (pcie_req_id_t)(adv_reg_p->pcie_ue_hdr[2] >> 16);
2757 flt_trans_type = PF_ADDR_CFG;
2758 break;
2759 case PCIE_TLP_TYPE_CPL:
2760 case PCIE_TLP_TYPE_CPLLK:
2761 {
2762 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)&adv_reg_p->pcie_ue_hdr[1];
2763
2764 flt_addr = 0;
2765 flt_bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid :
2766 cpl_tlp->cid;
2767
2768 /*
2769 * If the cpl bdf < this.bdf, then it means the request is this
2770 * device or came from a device below it. Unless this device is
2771 * a PCIe root port then it means is a DMA, otherwise PIO.
2772 */
2773 if (cpl_tlp->rid > cpl_tlp->cid) {
2774 flt_trans_type = PF_ADDR_DMA;
2775 } else {
2776 flt_trans_type = PF_ADDR_PIO | PF_ADDR_CFG;
2777 }
2778 break;
2779 }
2780 default:
2781 return (DDI_FAILURE);
2782 }
2783
2784 adv_reg_p->pcie_ue_tgt_addr = flt_addr;
2785 adv_reg_p->pcie_ue_tgt_bdf = flt_bdf;
2786 adv_reg_p->pcie_ue_tgt_trans = flt_trans_type;
2787
2788 return (DDI_SUCCESS);
2789 }
2790
2791 #define PCIE_EREPORT DDI_IO_CLASS "." PCI_ERROR_SUBCLASS "." PCIEX_FABRIC
2792 static int
pf_ereport_setup(dev_info_t * dip,uint64_t ena,nvlist_t ** ereport,nvlist_t ** detector,errorq_elem_t ** eqep)2793 pf_ereport_setup(dev_info_t *dip, uint64_t ena, nvlist_t **ereport,
2794 nvlist_t **detector, errorq_elem_t **eqep)
2795 {
2796 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2797 char device_path[MAXPATHLEN];
2798 nv_alloc_t *nva;
2799
2800 *eqep = errorq_reserve(fmhdl->fh_errorq);
2801 if (*eqep == NULL) {
2802 atomic_inc_64(&fmhdl->fh_kstat.fek_erpt_dropped.value.ui64);
2803 return (DDI_FAILURE);
2804 }
2805
2806 *ereport = errorq_elem_nvl(fmhdl->fh_errorq, *eqep);
2807 nva = errorq_elem_nva(fmhdl->fh_errorq, *eqep);
2808
2809 ASSERT(*ereport);
2810 ASSERT(nva);
2811
2812 /*
2813 * Use the dev_path/devid for this device instance.
2814 */
2815 *detector = fm_nvlist_create(nva);
2816 if (dip == ddi_root_node()) {
2817 device_path[0] = '/';
2818 device_path[1] = '\0';
2819 } else {
2820 (void) ddi_pathname(dip, device_path);
2821 }
2822
2823 fm_fmri_dev_set(*detector, FM_DEV_SCHEME_VERSION, NULL,
2824 device_path, NULL, NULL);
2825
2826 if (ena == 0)
2827 ena = fm_ena_generate(0, FM_ENA_FMT1);
2828
2829 fm_ereport_set(*ereport, 0, PCIE_EREPORT, ena, *detector, NULL);
2830
2831 return (DDI_SUCCESS);
2832 }
2833
2834 /* ARGSUSED */
2835 static void
pf_ereport_post(dev_info_t * dip,nvlist_t ** ereport,nvlist_t ** detector,errorq_elem_t ** eqep)2836 pf_ereport_post(dev_info_t *dip, nvlist_t **ereport, nvlist_t **detector,
2837 errorq_elem_t **eqep)
2838 {
2839 struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
2840
2841 errorq_commit(fmhdl->fh_errorq, *eqep, ERRORQ_ASYNC);
2842 }
2843
2844 static void
pf_send_ereport(ddi_fm_error_t * derr,pf_impl_t * impl)2845 pf_send_ereport(ddi_fm_error_t *derr, pf_impl_t *impl)
2846 {
2847 nvlist_t *ereport;
2848 nvlist_t *detector;
2849 errorq_elem_t *eqep;
2850 pcie_bus_t *bus_p;
2851 pf_data_t *pfd_p;
2852 uint32_t total = impl->pf_total;
2853
2854 /*
2855 * Ereports need to be sent in a top down fashion. The fabric translator
2856 * expects the ereports from the Root first. This is needed to tell if
2857 * the system contains a PCIe complaint RC/RP.
2858 */
2859 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
2860 bus_p = PCIE_PFD2BUS(pfd_p);
2861 pfd_p->pe_valid = B_FALSE;
2862
2863 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED ||
2864 !DDI_FM_EREPORT_CAP(ddi_fm_capable(PCIE_PFD2DIP(pfd_p))))
2865 continue;
2866
2867 if (pf_ereport_setup(PCIE_BUS2DIP(bus_p), derr->fme_ena,
2868 &ereport, &detector, &eqep) != DDI_SUCCESS)
2869 continue;
2870
2871 if (PFD_IS_RC(pfd_p)) {
2872 fm_payload_set(ereport,
2873 "scan_bdf", DATA_TYPE_UINT16,
2874 PCIE_ROOT_FAULT(pfd_p)->scan_bdf,
2875 "scan_addr", DATA_TYPE_UINT64,
2876 PCIE_ROOT_FAULT(pfd_p)->scan_addr,
2877 "intr_src", DATA_TYPE_UINT16,
2878 PCIE_ROOT_EH_SRC(pfd_p)->intr_type,
2879 NULL);
2880 goto generic;
2881 }
2882
2883 /* Generic PCI device information */
2884 fm_payload_set(ereport,
2885 "bdf", DATA_TYPE_UINT16, bus_p->bus_bdf,
2886 "device_id", DATA_TYPE_UINT16,
2887 (bus_p->bus_dev_ven_id >> 16),
2888 "vendor_id", DATA_TYPE_UINT16,
2889 (bus_p->bus_dev_ven_id & 0xFFFF),
2890 "rev_id", DATA_TYPE_UINT8, bus_p->bus_rev_id,
2891 "dev_type", DATA_TYPE_UINT16, bus_p->bus_dev_type,
2892 "pcie_off", DATA_TYPE_UINT16, bus_p->bus_pcie_off,
2893 "pcix_off", DATA_TYPE_UINT16, bus_p->bus_pcix_off,
2894 "aer_off", DATA_TYPE_UINT16, bus_p->bus_aer_off,
2895 "ecc_ver", DATA_TYPE_UINT16, bus_p->bus_ecc_ver,
2896 NULL);
2897
2898 /* PCI registers */
2899 fm_payload_set(ereport,
2900 "pci_status", DATA_TYPE_UINT16,
2901 PCI_ERR_REG(pfd_p)->pci_err_status,
2902 "pci_command", DATA_TYPE_UINT16,
2903 PCI_ERR_REG(pfd_p)->pci_cfg_comm,
2904 NULL);
2905
2906 /* PCI bridge registers */
2907 if (PCIE_IS_BDG(bus_p)) {
2908 fm_payload_set(ereport,
2909 "pci_bdg_sec_status", DATA_TYPE_UINT16,
2910 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat,
2911 "pci_bdg_ctrl", DATA_TYPE_UINT16,
2912 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_ctrl,
2913 NULL);
2914 }
2915
2916 /* PCIx registers */
2917 if (PCIE_IS_PCIX(bus_p) && !PCIE_IS_BDG(bus_p)) {
2918 fm_payload_set(ereport,
2919 "pcix_status", DATA_TYPE_UINT32,
2920 PCIX_ERR_REG(pfd_p)->pcix_status,
2921 "pcix_command", DATA_TYPE_UINT16,
2922 PCIX_ERR_REG(pfd_p)->pcix_command,
2923 NULL);
2924 }
2925
2926 /* PCIx ECC Registers */
2927 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
2928 pf_pcix_ecc_regs_t *ecc_bdg_reg;
2929 pf_pcix_ecc_regs_t *ecc_reg;
2930
2931 if (PCIE_IS_BDG(bus_p))
2932 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 0);
2933 ecc_reg = PCIX_ECC_REG(pfd_p);
2934 fm_payload_set(ereport,
2935 "pcix_ecc_control_0", DATA_TYPE_UINT16,
2936 PCIE_IS_BDG(bus_p) ?
2937 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16) :
2938 (ecc_reg->pcix_ecc_ctlstat >> 16),
2939 "pcix_ecc_status_0", DATA_TYPE_UINT16,
2940 PCIE_IS_BDG(bus_p) ?
2941 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF) :
2942 (ecc_reg->pcix_ecc_ctlstat & 0xFFFF),
2943 "pcix_ecc_fst_addr_0", DATA_TYPE_UINT32,
2944 PCIE_IS_BDG(bus_p) ?
2945 ecc_bdg_reg->pcix_ecc_fstaddr :
2946 ecc_reg->pcix_ecc_fstaddr,
2947 "pcix_ecc_sec_addr_0", DATA_TYPE_UINT32,
2948 PCIE_IS_BDG(bus_p) ?
2949 ecc_bdg_reg->pcix_ecc_secaddr :
2950 ecc_reg->pcix_ecc_secaddr,
2951 "pcix_ecc_attr_0", DATA_TYPE_UINT32,
2952 PCIE_IS_BDG(bus_p) ?
2953 ecc_bdg_reg->pcix_ecc_attr :
2954 ecc_reg->pcix_ecc_attr,
2955 NULL);
2956 }
2957
2958 /* PCIx ECC Bridge Registers */
2959 if (PCIX_ECC_VERSION_CHECK(bus_p) && PCIE_IS_BDG(bus_p)) {
2960 pf_pcix_ecc_regs_t *ecc_bdg_reg;
2961
2962 ecc_bdg_reg = PCIX_BDG_ECC_REG(pfd_p, 1);
2963 fm_payload_set(ereport,
2964 "pcix_ecc_control_1", DATA_TYPE_UINT16,
2965 (ecc_bdg_reg->pcix_ecc_ctlstat >> 16),
2966 "pcix_ecc_status_1", DATA_TYPE_UINT16,
2967 (ecc_bdg_reg->pcix_ecc_ctlstat & 0xFFFF),
2968 "pcix_ecc_fst_addr_1", DATA_TYPE_UINT32,
2969 ecc_bdg_reg->pcix_ecc_fstaddr,
2970 "pcix_ecc_sec_addr_1", DATA_TYPE_UINT32,
2971 ecc_bdg_reg->pcix_ecc_secaddr,
2972 "pcix_ecc_attr_1", DATA_TYPE_UINT32,
2973 ecc_bdg_reg->pcix_ecc_attr,
2974 NULL);
2975 }
2976
2977 /* PCIx Bridge */
2978 if (PCIE_IS_PCIX(bus_p) && PCIE_IS_BDG(bus_p)) {
2979 fm_payload_set(ereport,
2980 "pcix_bdg_status", DATA_TYPE_UINT32,
2981 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat,
2982 "pcix_bdg_sec_status", DATA_TYPE_UINT16,
2983 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat,
2984 NULL);
2985 }
2986
2987 /* PCIe registers */
2988 if (PCIE_IS_PCIE(bus_p)) {
2989 fm_payload_set(ereport,
2990 "pcie_status", DATA_TYPE_UINT16,
2991 PCIE_ERR_REG(pfd_p)->pcie_err_status,
2992 "pcie_command", DATA_TYPE_UINT16,
2993 PCIE_ERR_REG(pfd_p)->pcie_err_ctl,
2994 "pcie_dev_cap", DATA_TYPE_UINT32,
2995 PCIE_ERR_REG(pfd_p)->pcie_dev_cap,
2996 NULL);
2997 }
2998
2999 /* PCIe AER registers */
3000 if (PCIE_HAS_AER(bus_p)) {
3001 fm_payload_set(ereport,
3002 "pcie_adv_ctl", DATA_TYPE_UINT32,
3003 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl,
3004 "pcie_ue_status", DATA_TYPE_UINT32,
3005 PCIE_ADV_REG(pfd_p)->pcie_ue_status,
3006 "pcie_ue_mask", DATA_TYPE_UINT32,
3007 PCIE_ADV_REG(pfd_p)->pcie_ue_mask,
3008 "pcie_ue_sev", DATA_TYPE_UINT32,
3009 PCIE_ADV_REG(pfd_p)->pcie_ue_sev,
3010 "pcie_ue_hdr0", DATA_TYPE_UINT32,
3011 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[0],
3012 "pcie_ue_hdr1", DATA_TYPE_UINT32,
3013 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[1],
3014 "pcie_ue_hdr2", DATA_TYPE_UINT32,
3015 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[2],
3016 "pcie_ue_hdr3", DATA_TYPE_UINT32,
3017 PCIE_ADV_REG(pfd_p)->pcie_ue_hdr[3],
3018 "pcie_ce_status", DATA_TYPE_UINT32,
3019 PCIE_ADV_REG(pfd_p)->pcie_ce_status,
3020 "pcie_ce_mask", DATA_TYPE_UINT32,
3021 PCIE_ADV_REG(pfd_p)->pcie_ce_mask,
3022 NULL);
3023 }
3024
3025 /* PCIe AER decoded header */
3026 if (HAS_AER_LOGS(pfd_p, PCIE_ADV_REG(pfd_p)->pcie_ue_status)) {
3027 fm_payload_set(ereport,
3028 "pcie_ue_tgt_trans", DATA_TYPE_UINT32,
3029 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans,
3030 "pcie_ue_tgt_addr", DATA_TYPE_UINT64,
3031 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr,
3032 "pcie_ue_tgt_bdf", DATA_TYPE_UINT16,
3033 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf,
3034 NULL);
3035 /* Clear these values as they no longer valid */
3036 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0;
3037 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0;
3038 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
3039 }
3040
3041 /* PCIe BDG AER registers */
3042 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_HAS_AER(bus_p)) {
3043 fm_payload_set(ereport,
3044 "pcie_sue_adv_ctl", DATA_TYPE_UINT32,
3045 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_ctl,
3046 "pcie_sue_status", DATA_TYPE_UINT32,
3047 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status,
3048 "pcie_sue_mask", DATA_TYPE_UINT32,
3049 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_mask,
3050 "pcie_sue_sev", DATA_TYPE_UINT32,
3051 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_sev,
3052 "pcie_sue_hdr0", DATA_TYPE_UINT32,
3053 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[0],
3054 "pcie_sue_hdr1", DATA_TYPE_UINT32,
3055 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[1],
3056 "pcie_sue_hdr2", DATA_TYPE_UINT32,
3057 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[2],
3058 "pcie_sue_hdr3", DATA_TYPE_UINT32,
3059 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_hdr[3],
3060 NULL);
3061 }
3062
3063 /* PCIe BDG AER decoded header */
3064 if (PCIE_IS_PCIE_BDG(bus_p) && HAS_SAER_LOGS(pfd_p,
3065 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_status)) {
3066 fm_payload_set(ereport,
3067 "pcie_sue_tgt_trans", DATA_TYPE_UINT32,
3068 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans,
3069 "pcie_sue_tgt_addr", DATA_TYPE_UINT64,
3070 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr,
3071 "pcie_sue_tgt_bdf", DATA_TYPE_UINT16,
3072 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf,
3073 NULL);
3074 /* Clear these values as they no longer valid */
3075 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_trans = 0;
3076 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_addr = 0;
3077 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf =
3078 PCIE_INVALID_BDF;
3079 }
3080
3081 /* PCIe RP registers */
3082 if (PCIE_IS_RP(bus_p)) {
3083 fm_payload_set(ereport,
3084 "pcie_rp_status", DATA_TYPE_UINT32,
3085 PCIE_RP_REG(pfd_p)->pcie_rp_status,
3086 "pcie_rp_control", DATA_TYPE_UINT16,
3087 PCIE_RP_REG(pfd_p)->pcie_rp_ctl,
3088 NULL);
3089 }
3090
3091 /* PCIe RP AER registers */
3092 if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) {
3093 fm_payload_set(ereport,
3094 "pcie_adv_rp_status", DATA_TYPE_UINT32,
3095 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_status,
3096 "pcie_adv_rp_command", DATA_TYPE_UINT32,
3097 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_err_cmd,
3098 "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16,
3099 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id,
3100 "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16,
3101 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id,
3102 NULL);
3103 }
3104
3105 /*
3106 * Slot Status registers
3107 *
3108 * Since we only gather these for certain types of components,
3109 * only put these registers into the ereport if we have valid
3110 * data.
3111 */
3112 if (PCIE_SLOT_REG(pfd_p)->pcie_slot_regs_valid) {
3113 fm_payload_set(ereport,
3114 "pcie_slot_cap", DATA_TYPE_UINT32,
3115 PCIE_SLOT_REG(pfd_p)->pcie_slot_cap,
3116 "pcie_slot_control", DATA_TYPE_UINT16,
3117 PCIE_SLOT_REG(pfd_p)->pcie_slot_control,
3118 "pcie_slot_status", DATA_TYPE_UINT16,
3119 PCIE_SLOT_REG(pfd_p)->pcie_slot_status,
3120 NULL);
3121 }
3122
3123 generic:
3124 /* IOV related information */
3125 if (!PCIE_BDG_IS_UNASSIGNED(PCIE_PFD2BUS(impl->pf_dq_head_p))) {
3126 fm_payload_set(ereport,
3127 "pcie_aff_flags", DATA_TYPE_UINT16,
3128 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags,
3129 "pcie_aff_bdf", DATA_TYPE_UINT16,
3130 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf,
3131 "orig_sev", DATA_TYPE_UINT32,
3132 pfd_p->pe_orig_severity_flags,
3133 NULL);
3134 }
3135
3136 /* Misc ereport information */
3137 fm_payload_set(ereport,
3138 "remainder", DATA_TYPE_UINT32, --total,
3139 "severity", DATA_TYPE_UINT32, pfd_p->pe_severity_flags,
3140 NULL);
3141
3142 pf_ereport_post(PCIE_BUS2DIP(bus_p), &ereport, &detector,
3143 &eqep);
3144 }
3145
3146 pf_dq_unlock_chain(impl);
3147 }
3148
3149 /*
3150 * pf_handler_enter must be called to serial access to each device's pf_data_t.
3151 * Once error handling is finished with the device call pf_handler_exit to allow
3152 * other threads to access it. The same thread may call pf_handler_enter
3153 * several times without any consequences.
3154 *
3155 * The "impl" variable is passed in during scan fabric to double check that
3156 * there is not a recursive algorithm and to ensure only one thread is doing a
3157 * fabric scan at all times.
3158 *
3159 * In some cases "impl" is not available, such as "child lookup" being called
3160 * from outside of scan fabric, just pass in NULL for this variable and this
3161 * extra check will be skipped.
3162 */
3163 static int
pf_handler_enter(dev_info_t * dip,pf_impl_t * impl)3164 pf_handler_enter(dev_info_t *dip, pf_impl_t *impl)
3165 {
3166 pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
3167
3168 ASSERT(pfd_p);
3169
3170 /*
3171 * Check to see if the lock has already been taken by this
3172 * thread. If so just return and don't take lock again.
3173 */
3174 if (!pfd_p->pe_lock || !impl) {
3175 i_ddi_fm_handler_enter(dip);
3176 pfd_p->pe_lock = B_TRUE;
3177 return (PF_SCAN_SUCCESS);
3178 }
3179
3180 /* Check to see that this dip is already in the "impl" error queue */
3181 for (pfd_p = impl->pf_dq_head_p; pfd_p; pfd_p = pfd_p->pe_next) {
3182 if (PCIE_PFD2DIP(pfd_p) == dip) {
3183 return (PF_SCAN_SUCCESS);
3184 }
3185 }
3186
3187 return (PF_SCAN_DEADLOCK);
3188 }
3189
3190 static void
pf_handler_exit(dev_info_t * dip)3191 pf_handler_exit(dev_info_t *dip)
3192 {
3193 pf_data_t *pfd_p = PCIE_DIP2PFD(dip);
3194
3195 ASSERT(pfd_p);
3196
3197 ASSERT(pfd_p->pe_lock == B_TRUE);
3198 i_ddi_fm_handler_exit(dip);
3199 pfd_p->pe_lock = B_FALSE;
3200 }
3201
3202 /*
3203 * This function calls the driver's callback function (if it's FMA hardened
3204 * and callback capable). This function relies on the current thread already
3205 * owning the driver's fmhdl lock.
3206 */
3207 static int
pf_fm_callback(dev_info_t * dip,ddi_fm_error_t * derr)3208 pf_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr)
3209 {
3210 int cb_sts = DDI_FM_OK;
3211
3212 if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) {
3213 dev_info_t *pdip = ddi_get_parent(dip);
3214 struct i_ddi_fmhdl *hdl = DEVI(pdip)->devi_fmhdl;
3215 struct i_ddi_fmtgt *tgt = hdl->fh_tgts;
3216 struct i_ddi_errhdl *errhdl;
3217 while (tgt != NULL) {
3218 if (dip == tgt->ft_dip) {
3219 errhdl = tgt->ft_errhdl;
3220 cb_sts = errhdl->eh_func(dip, derr,
3221 errhdl->eh_impl);
3222 break;
3223 }
3224 tgt = tgt->ft_next;
3225 }
3226 }
3227 return (cb_sts);
3228 }
3229
3230 static void
pf_reset_pfd(pf_data_t * pfd_p)3231 pf_reset_pfd(pf_data_t *pfd_p)
3232 {
3233 pcie_bus_t *bus_p = PCIE_PFD2BUS(pfd_p);
3234
3235 pfd_p->pe_severity_flags = 0;
3236 pfd_p->pe_severity_mask = 0;
3237 pfd_p->pe_orig_severity_flags = 0;
3238 /* pe_lock and pe_valid were reset in pf_send_ereport */
3239
3240 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0;
3241 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
3242
3243 if (PCIE_IS_ROOT(bus_p)) {
3244 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
3245 PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0;
3246 PCIE_ROOT_FAULT(pfd_p)->full_scan = B_FALSE;
3247 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE;
3248 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL;
3249 }
3250
3251 if (PCIE_IS_BDG(bus_p)) {
3252 bzero(PCI_BDG_ERR_REG(pfd_p), sizeof (pf_pci_bdg_err_regs_t));
3253 }
3254
3255 PCI_ERR_REG(pfd_p)->pci_err_status = 0;
3256 PCI_ERR_REG(pfd_p)->pci_cfg_comm = 0;
3257
3258 if (PCIE_IS_PCIE(bus_p)) {
3259 if (PCIE_IS_ROOT(bus_p)) {
3260 bzero(PCIE_RP_REG(pfd_p),
3261 sizeof (pf_pcie_rp_err_regs_t));
3262 bzero(PCIE_ADV_RP_REG(pfd_p),
3263 sizeof (pf_pcie_adv_rp_err_regs_t));
3264 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id =
3265 PCIE_INVALID_BDF;
3266 PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id =
3267 PCIE_INVALID_BDF;
3268 } else if (PCIE_IS_PCIE_BDG(bus_p)) {
3269 bzero(PCIE_ADV_BDG_REG(pfd_p),
3270 sizeof (pf_pcie_adv_bdg_err_regs_t));
3271 PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf =
3272 PCIE_INVALID_BDF;
3273 }
3274
3275 if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) {
3276 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
3277 bzero(PCIX_BDG_ECC_REG(pfd_p, 0),
3278 sizeof (pf_pcix_ecc_regs_t));
3279 bzero(PCIX_BDG_ECC_REG(pfd_p, 1),
3280 sizeof (pf_pcix_ecc_regs_t));
3281 }
3282 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0;
3283 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0;
3284 }
3285
3286 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl = 0;
3287 PCIE_ADV_REG(pfd_p)->pcie_ue_status = 0;
3288 PCIE_ADV_REG(pfd_p)->pcie_ue_mask = 0;
3289 PCIE_ADV_REG(pfd_p)->pcie_ue_sev = 0;
3290 PCIE_ADV_HDR(pfd_p, 0) = 0;
3291 PCIE_ADV_HDR(pfd_p, 1) = 0;
3292 PCIE_ADV_HDR(pfd_p, 2) = 0;
3293 PCIE_ADV_HDR(pfd_p, 3) = 0;
3294 PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0;
3295 PCIE_ADV_REG(pfd_p)->pcie_ce_mask = 0;
3296 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_trans = 0;
3297 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_addr = 0;
3298 PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
3299
3300 PCIE_ERR_REG(pfd_p)->pcie_err_status = 0;
3301 PCIE_ERR_REG(pfd_p)->pcie_err_ctl = 0;
3302 PCIE_ERR_REG(pfd_p)->pcie_dev_cap = 0;
3303
3304 } else if (PCIE_IS_PCIX(bus_p)) {
3305 if (PCIE_IS_BDG(bus_p)) {
3306 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
3307 bzero(PCIX_BDG_ECC_REG(pfd_p, 0),
3308 sizeof (pf_pcix_ecc_regs_t));
3309 bzero(PCIX_BDG_ECC_REG(pfd_p, 1),
3310 sizeof (pf_pcix_ecc_regs_t));
3311 }
3312 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_sec_stat = 0;
3313 PCIX_BDG_ERR_REG(pfd_p)->pcix_bdg_stat = 0;
3314 } else {
3315 if (PCIX_ECC_VERSION_CHECK(bus_p)) {
3316 bzero(PCIX_ECC_REG(pfd_p),
3317 sizeof (pf_pcix_ecc_regs_t));
3318 }
3319 PCIX_ERR_REG(pfd_p)->pcix_command = 0;
3320 PCIX_ERR_REG(pfd_p)->pcix_status = 0;
3321 }
3322 }
3323
3324 pfd_p->pe_prev = NULL;
3325 pfd_p->pe_next = NULL;
3326 pfd_p->pe_rber_fatal = B_FALSE;
3327 }
3328
3329 pcie_bus_t *
pf_find_busp_by_bdf(pf_impl_t * impl,pcie_req_id_t bdf)3330 pf_find_busp_by_bdf(pf_impl_t *impl, pcie_req_id_t bdf)
3331 {
3332 pcie_bus_t *temp_bus_p;
3333 pf_data_t *temp_pfd_p;
3334
3335 for (temp_pfd_p = impl->pf_dq_head_p;
3336 temp_pfd_p;
3337 temp_pfd_p = temp_pfd_p->pe_next) {
3338 temp_bus_p = PCIE_PFD2BUS(temp_pfd_p);
3339
3340 if (bdf == temp_bus_p->bus_bdf) {
3341 return (temp_bus_p);
3342 }
3343 }
3344
3345 return (NULL);
3346 }
3347
3348 pcie_bus_t *
pf_find_busp_by_addr(pf_impl_t * impl,uint64_t addr)3349 pf_find_busp_by_addr(pf_impl_t *impl, uint64_t addr)
3350 {
3351 pcie_bus_t *temp_bus_p;
3352 pf_data_t *temp_pfd_p;
3353
3354 for (temp_pfd_p = impl->pf_dq_head_p;
3355 temp_pfd_p;
3356 temp_pfd_p = temp_pfd_p->pe_next) {
3357 temp_bus_p = PCIE_PFD2BUS(temp_pfd_p);
3358
3359 if (pf_in_assigned_addr(temp_bus_p, addr)) {
3360 return (temp_bus_p);
3361 }
3362 }
3363
3364 return (NULL);
3365 }
3366
3367 pcie_bus_t *
pf_find_busp_by_aer(pf_impl_t * impl,pf_data_t * pfd_p)3368 pf_find_busp_by_aer(pf_impl_t *impl, pf_data_t *pfd_p)
3369 {
3370 pf_pcie_adv_err_regs_t *reg_p = PCIE_ADV_REG(pfd_p);
3371 pcie_bus_t *temp_bus_p = NULL;
3372 pcie_req_id_t bdf;
3373 uint64_t addr;
3374 pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)reg_p->pcie_ue_hdr;
3375 uint32_t trans_type = reg_p->pcie_ue_tgt_trans;
3376
3377 if ((tlp_hdr->type == PCIE_TLP_TYPE_CPL) ||
3378 (tlp_hdr->type == PCIE_TLP_TYPE_CPLLK)) {
3379 pcie_cpl_t *cpl_tlp = (pcie_cpl_t *)®_p->pcie_ue_hdr[1];
3380
3381 bdf = (cpl_tlp->rid > cpl_tlp->cid) ? cpl_tlp->rid :
3382 cpl_tlp->cid;
3383 temp_bus_p = pf_find_busp_by_bdf(impl, bdf);
3384 } else if (trans_type == PF_ADDR_PIO) {
3385 addr = reg_p->pcie_ue_tgt_addr;
3386 temp_bus_p = pf_find_busp_by_addr(impl, addr);
3387 } else {
3388 /* PF_ADDR_DMA type */
3389 bdf = reg_p->pcie_ue_tgt_bdf;
3390 temp_bus_p = pf_find_busp_by_bdf(impl, bdf);
3391 }
3392
3393 return (temp_bus_p);
3394 }
3395
3396 pcie_bus_t *
pf_find_busp_by_saer(pf_impl_t * impl,pf_data_t * pfd_p)3397 pf_find_busp_by_saer(pf_impl_t *impl, pf_data_t *pfd_p)
3398 {
3399 pf_pcie_adv_bdg_err_regs_t *reg_p = PCIE_ADV_BDG_REG(pfd_p);
3400 pcie_bus_t *temp_bus_p = NULL;
3401 pcie_req_id_t bdf;
3402 uint64_t addr;
3403
3404 addr = reg_p->pcie_sue_tgt_addr;
3405 bdf = reg_p->pcie_sue_tgt_bdf;
3406
3407 if (addr != 0) {
3408 temp_bus_p = pf_find_busp_by_addr(impl, addr);
3409 } else if (PCIE_CHECK_VALID_BDF(bdf)) {
3410 temp_bus_p = pf_find_busp_by_bdf(impl, bdf);
3411 }
3412
3413 return (temp_bus_p);
3414 }
3415