1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * PX Fault Management Architecture
28 */
29 #include <sys/types.h>
30 #include <sys/sunndi.h>
31 #include <sys/sunddi.h>
32 #include <sys/fm/protocol.h>
33 #include <sys/fm/util.h>
34 #include <sys/fm/io/pci.h>
35 #include <sys/membar.h>
36 #include "px_obj.h"
37
38 extern uint_t px_ranges_phi_mask;
39
40 #define PX_PCIE_PANIC_BITS \
41 (PCIE_AER_UCE_DLP | PCIE_AER_UCE_FCP | PCIE_AER_UCE_TO | \
42 PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP | PCIE_AER_UCE_ECRC)
43 #define PX_PCIE_NO_PANIC_BITS \
44 (PCIE_AER_UCE_TRAINING | PCIE_AER_UCE_SD | PCIE_AER_UCE_CA | \
45 PCIE_AER_UCE_UC | PCIE_AER_UCE_UR)
46
47 /*
48 * Global panicing state variabled used to control if further error handling
49 * should occur. If the system is already panic'ing or if PX itself has
50 * recommended panic'ing the system, no further error handling should occur to
51 * prevent the system from hanging.
52 */
53 boolean_t px_panicing = B_FALSE;
54
55 static int px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr,
56 px_err_pcie_t *regs);
57
58 #if defined(DEBUG)
59 static void px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs);
60 #else /* DEBUG */
61 #define px_pcie_log 0 &&
62 #endif /* DEBUG */
63
64 /*
65 * Initialize px FMA support
66 */
67 int
px_fm_attach(px_t * px_p)68 px_fm_attach(px_t *px_p)
69 {
70 int i;
71 dev_info_t *dip = px_p->px_dip;
72 pcie_bus_t *bus_p;
73
74 px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
75 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
76
77 /*
78 * check parents' capability
79 */
80 ddi_fm_init(dip, &px_p->px_fm_cap, &px_p->px_fm_ibc);
81
82 /*
83 * parents need to be ereport and error handling capable
84 */
85 ASSERT(px_p->px_fm_cap &&
86 (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE));
87
88 /*
89 * Initialize lock to synchronize fabric error handling
90 */
91 mutex_init(&px_p->px_fm_mutex, NULL, MUTEX_DRIVER,
92 (void *)px_p->px_fm_ibc);
93
94 px_p->px_pfd_idx = 0;
95 for (i = 0; i < 5; i++)
96 pcie_rc_init_pfd(dip, &px_p->px_pfd_arr[i]);
97 PCIE_DIP2PFD(dip) = px_p->px_pfd_arr;
98
99 bus_p = PCIE_DIP2BUS(dip);
100 bus_p->bus_rp_bdf = px_p->px_bdf;
101 bus_p->bus_rp_dip = dip;
102
103 return (DDI_SUCCESS);
104 }
105
106 /*
107 * Deregister FMA
108 */
109 void
px_fm_detach(px_t * px_p)110 px_fm_detach(px_t *px_p)
111 {
112 int i;
113
114 mutex_destroy(&px_p->px_fm_mutex);
115 ddi_fm_fini(px_p->px_dip);
116 for (i = 0; i < 5; i++)
117 pcie_rc_fini_pfd(&px_p->px_pfd_arr[i]);
118 }
119
120 /*
121 * register error callback in parent
122 */
123 void
px_fm_cb_enable(px_t * px_p)124 px_fm_cb_enable(px_t *px_p)
125 {
126 ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p);
127 }
128
129 void
px_fm_cb_disable(px_t * px_p)130 px_fm_cb_disable(px_t *px_p)
131 {
132 ddi_fm_handler_unregister(px_p->px_dip);
133 }
134
135 /*
136 * Function used to setup access functions depending on level of desired
137 * protection.
138 */
139 void
px_fm_acc_setup(ddi_map_req_t * mp,dev_info_t * rdip,pci_regspec_t * rp)140 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip, pci_regspec_t *rp)
141 {
142 uchar_t fflag;
143 ndi_err_t *errp;
144 ddi_acc_hdl_t *hp;
145 ddi_acc_impl_t *ap;
146
147 hp = mp->map_handlep;
148 ap = (ddi_acc_impl_t *)hp->ah_platform_private;
149 fflag = ap->ahi_common.ah_acc.devacc_attr_access;
150
151 if (mp->map_op == DDI_MO_MAP_LOCKED) {
152 ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL);
153 switch (fflag) {
154 case DDI_FLAGERR_ACC:
155 ap->ahi_get8 = i_ddi_prot_get8;
156 ap->ahi_get16 = i_ddi_prot_get16;
157 ap->ahi_get32 = i_ddi_prot_get32;
158 ap->ahi_get64 = i_ddi_prot_get64;
159 ap->ahi_put8 = i_ddi_prot_put8;
160 ap->ahi_put16 = i_ddi_prot_put16;
161 ap->ahi_put32 = i_ddi_prot_put32;
162 ap->ahi_put64 = i_ddi_prot_put64;
163 ap->ahi_rep_get8 = i_ddi_prot_rep_get8;
164 ap->ahi_rep_get16 = i_ddi_prot_rep_get16;
165 ap->ahi_rep_get32 = i_ddi_prot_rep_get32;
166 ap->ahi_rep_get64 = i_ddi_prot_rep_get64;
167 ap->ahi_rep_put8 = i_ddi_prot_rep_put8;
168 ap->ahi_rep_put16 = i_ddi_prot_rep_put16;
169 ap->ahi_rep_put32 = i_ddi_prot_rep_put32;
170 ap->ahi_rep_put64 = i_ddi_prot_rep_put64;
171 impl_acc_err_init(hp);
172 errp = ((ddi_acc_impl_t *)hp)->ahi_err;
173 if ((rp->pci_phys_hi & PCI_REG_ADDR_M) ==
174 PCI_ADDR_CONFIG)
175 errp->err_cf = px_err_cfg_hdl_check;
176 else
177 errp->err_cf = px_err_pio_hdl_check;
178 break;
179 case DDI_CAUTIOUS_ACC :
180 ap->ahi_get8 = i_ddi_caut_get8;
181 ap->ahi_get16 = i_ddi_caut_get16;
182 ap->ahi_get32 = i_ddi_caut_get32;
183 ap->ahi_get64 = i_ddi_caut_get64;
184 ap->ahi_put8 = i_ddi_caut_put8;
185 ap->ahi_put16 = i_ddi_caut_put16;
186 ap->ahi_put32 = i_ddi_caut_put32;
187 ap->ahi_put64 = i_ddi_caut_put64;
188 ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
189 ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
190 ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
191 ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
192 ap->ahi_rep_put8 = i_ddi_caut_rep_put8;
193 ap->ahi_rep_put16 = i_ddi_caut_rep_put16;
194 ap->ahi_rep_put32 = i_ddi_caut_rep_put32;
195 ap->ahi_rep_put64 = i_ddi_caut_rep_put64;
196 impl_acc_err_init(hp);
197 errp = ((ddi_acc_impl_t *)hp)->ahi_err;
198 if ((rp->pci_phys_hi & PCI_REG_ADDR_M) ==
199 PCI_ADDR_CONFIG)
200 errp->err_cf = px_err_cfg_hdl_check;
201 else
202 errp->err_cf = px_err_pio_hdl_check;
203 break;
204 default:
205 /* Illegal state, remove the handle from cache */
206 ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
207 break;
208 }
209 } else if (mp->map_op == DDI_MO_UNMAP) {
210 ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
211 }
212 }
213
214 /*
215 * Function used to initialize FMA for our children nodes. Called
216 * through pci busops when child node calls ddi_fm_init.
217 */
218 /*ARGSUSED*/
219 int
px_fm_init_child(dev_info_t * dip,dev_info_t * cdip,int cap,ddi_iblock_cookie_t * ibc_p)220 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
221 ddi_iblock_cookie_t *ibc_p)
222 {
223 px_t *px_p = DIP_TO_STATE(dip);
224
225 ASSERT(ibc_p != NULL);
226 *ibc_p = px_p->px_fm_ibc;
227
228 return (px_p->px_fm_cap);
229 }
230
231 /*
232 * lock access for exclusive PCIe access
233 */
234 void
px_bus_enter(dev_info_t * dip,ddi_acc_handle_t handle)235 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle)
236 {
237 px_pec_t *pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p;
238
239 /*
240 * Exclusive access has been used for cautious put/get,
241 * Both utilize i_ddi_ontrap which, on sparcv9, implements
242 * similar protection as what on_trap() does, and which calls
243 * membar #Sync to flush out all cpu deferred errors
244 * prior to get/put operation, so here we're not calling
245 * membar #Sync - a difference from what's in pci_bus_enter().
246 */
247 mutex_enter(&pec_p->pec_pokefault_mutex);
248 pec_p->pec_acc_hdl = handle;
249 }
250
251 /*
252 * unlock access for exclusive PCIe access
253 */
254 /* ARGSUSED */
255 void
px_bus_exit(dev_info_t * dip,ddi_acc_handle_t handle)256 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle)
257 {
258 px_t *px_p = DIP_TO_STATE(dip);
259 px_pec_t *pec_p = px_p->px_pec_p;
260
261 pec_p->pec_acc_hdl = NULL;
262 mutex_exit(&pec_p->pec_pokefault_mutex);
263 }
264
265 static uint64_t
px_in_addr_range(dev_info_t * dip,pci_ranges_t * ranges_p,uint64_t addr)266 px_in_addr_range(dev_info_t *dip, pci_ranges_t *ranges_p, uint64_t addr)
267 {
268 uint64_t addr_low, addr_high;
269
270 addr_low = (uint64_t)(ranges_p->parent_high & px_ranges_phi_mask) << 32;
271 addr_low |= (uint64_t)ranges_p->parent_low;
272 addr_high = addr_low + ((uint64_t)ranges_p->size_high << 32) +
273 (uint64_t)ranges_p->size_low;
274
275 DBG(DBG_ERR_INTR, dip, "Addr: 0x%llx high: 0x%llx low: 0x%llx\n",
276 addr, addr_high, addr_low);
277
278 if ((addr < addr_high) && (addr >= addr_low))
279 return (addr_low);
280
281 return (0);
282 }
283
284 /*
285 * PCI error callback which is registered with our parent to call
286 * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors
287 * and PCI BERR/TO/UE on IO Loads.
288 */
289 /*ARGSUSED*/
290 int
px_fm_callback(dev_info_t * dip,ddi_fm_error_t * derr,const void * impl_data)291 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data)
292 {
293 dev_info_t *pdip = ddi_get_parent(dip);
294 px_t *px_p = (px_t *)impl_data;
295 int i, acc_type = 0;
296 int lookup, rc_err, fab_err;
297 uint64_t addr, base_addr;
298 uint64_t fault_addr = (uint64_t)derr->fme_bus_specific;
299 pcie_req_id_t bdf = PCIE_INVALID_BDF;
300 pci_ranges_t *ranges_p;
301 int range_len;
302 pf_data_t *pfd_p;
303
304 /*
305 * If the current thread already owns the px_fm_mutex, then we
306 * have encountered an error while processing a previous
307 * error. Attempting to take the mutex again will cause the
308 * system to deadlock.
309 */
310 if (px_p->px_fm_mutex_owner == curthread)
311 return (DDI_FM_FATAL);
312
313 i_ddi_fm_handler_exit(pdip);
314
315 if (px_fm_enter(px_p) != DDI_SUCCESS) {
316 i_ddi_fm_handler_enter(pdip);
317 return (DDI_FM_FATAL);
318 }
319
320 /*
321 * Make sure this failed load came from this PCIe port. Check by
322 * matching the upper 32 bits of the address with the ranges property.
323 */
324 range_len = px_p->px_ranges_length / sizeof (pci_ranges_t);
325 i = 0;
326 for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) {
327 base_addr = px_in_addr_range(dip, ranges_p, fault_addr);
328 if (base_addr) {
329 switch (ranges_p->child_high & PCI_ADDR_MASK) {
330 case PCI_ADDR_CONFIG:
331 acc_type = PF_ADDR_CFG;
332 addr = 0;
333 bdf = (pcie_req_id_t)((fault_addr >> 12) &
334 0xFFFF);
335 break;
336 case PCI_ADDR_IO:
337 case PCI_ADDR_MEM64:
338 case PCI_ADDR_MEM32:
339 acc_type = PF_ADDR_PIO;
340 addr = fault_addr - base_addr;
341 bdf = PCIE_INVALID_BDF;
342 break;
343 }
344 break;
345 }
346 }
347
348 /* This address doesn't belong to this leaf, just return with OK */
349 if (!acc_type) {
350 px_fm_exit(px_p);
351 i_ddi_fm_handler_enter(pdip);
352 return (DDI_FM_OK);
353 }
354
355 rc_err = px_err_cmn_intr(px_p, derr, PX_TRAP_CALL, PX_FM_BLOCK_ALL);
356 lookup = pf_hdl_lookup(dip, derr->fme_ena, acc_type, (uint64_t)addr,
357 bdf);
358
359 pfd_p = px_rp_en_q(px_p, bdf, addr,
360 (PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB));
361 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_DATA;
362
363 /* Update affected info, either addr or bdf is not NULL */
364 if (addr) {
365 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_ADDR;
366 } else if (PCIE_CHECK_VALID_BDF(bdf)) {
367 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_BDF;
368 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = bdf;
369 }
370
371 fab_err = px_scan_fabric(px_p, dip, derr);
372
373 px_fm_exit(px_p);
374 i_ddi_fm_handler_enter(pdip);
375
376 if (!px_die)
377 return (DDI_FM_OK);
378
379 if ((rc_err & (PX_PANIC | PX_PROTECTED)) ||
380 (fab_err & PF_ERR_FATAL_FLAGS) ||
381 (lookup == PF_HDL_NOTFOUND))
382 return (DDI_FM_FATAL);
383 else if ((rc_err == PX_NO_ERROR) && (fab_err == PF_ERR_NO_ERROR))
384 return (DDI_FM_OK);
385
386 return (DDI_FM_NONFATAL);
387 }
388
389 /*
390 * px_err_fabric_intr:
391 * Interrupt handler for PCIE fabric block.
392 * o lock
393 * o create derr
394 * o px_err_cmn_intr(leaf, with jbc)
395 * o send ereport(fire fmri, derr, payload = BDF)
396 * o dispatch (leaf)
397 * o unlock
398 * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
399 */
400 /* ARGSUSED */
401 uint_t
px_err_fabric_intr(px_t * px_p,msgcode_t msg_code,pcie_req_id_t rid)402 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, pcie_req_id_t rid)
403 {
404 dev_info_t *rpdip = px_p->px_dip;
405 int rc_err, fab_err;
406 ddi_fm_error_t derr;
407 uint32_t rp_status;
408 uint16_t ce_source, ue_source;
409 pf_data_t *pfd_p;
410
411 if (px_fm_enter(px_p) != DDI_SUCCESS)
412 goto done;
413
414 /* Create the derr */
415 bzero(&derr, sizeof (ddi_fm_error_t));
416 derr.fme_version = DDI_FME_VERSION;
417 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
418 derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
419
420 px_err_safeacc_check(px_p, &derr);
421
422 if (msg_code == PCIE_MSG_CODE_ERR_COR) {
423 rp_status = PCIE_AER_RE_STS_CE_RCVD;
424 ce_source = rid;
425 ue_source = 0;
426 } else {
427 rp_status = PCIE_AER_RE_STS_FE_NFE_RCVD;
428 ce_source = 0;
429 ue_source = rid;
430 if (msg_code == PCIE_MSG_CODE_ERR_NONFATAL)
431 rp_status |= PCIE_AER_RE_STS_NFE_MSGS_RCVD;
432 else {
433 rp_status |= PCIE_AER_RE_STS_FE_MSGS_RCVD;
434 rp_status |= PCIE_AER_RE_STS_FIRST_UC_FATAL;
435 }
436 }
437
438 if (derr.fme_flag == DDI_FM_ERR_UNEXPECTED) {
439 ddi_fm_ereport_post(rpdip, PCI_ERROR_SUBCLASS "." PCIEX_FABRIC,
440 derr.fme_ena,
441 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
442 FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE,
443 "pcie_adv_rp_status", DATA_TYPE_UINT32, rp_status,
444 "pcie_adv_rp_command", DATA_TYPE_UINT32, 0,
445 "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16, ce_source,
446 "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16, ue_source,
447 NULL);
448 }
449
450 /* Ensure that the rid of the fabric message will get scanned. */
451 pfd_p = px_rp_en_q(px_p, rid, 0, 0);
452 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_FABRIC;
453
454 rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE);
455
456 /* call rootport dispatch */
457 fab_err = px_scan_fabric(px_p, rpdip, &derr);
458
459 px_err_panic(rc_err, PX_RC, fab_err, B_TRUE);
460 px_fm_exit(px_p);
461 px_err_panic(rc_err, PX_RC, fab_err, B_FALSE);
462
463 done:
464 return (DDI_INTR_CLAIMED);
465 }
466
467 /*
468 * px_scan_fabric:
469 *
470 * Check for drain state and if there is anything to scan.
471 *
472 * Note on pfd: Different interrupts will populate the pfd's differently. The
473 * px driver can have a total of 5 different error sources, so it has a queue of
474 * 5 pfds. Each valid PDF is linked together and passed to pf_scan_fabric.
475 *
476 * Each error handling will populate the following info in the pfd
477 *
478 * Root Fault Intr Src Affected BDF
479 * ----------------+---------------+------------
480 * Callback/CPU Trap Address/BDF |DATA |Lookup Addr
481 * Mondo 62/63 (sun4u) decode error |N/A |N/A
482 * EPKT (sun4v) decode epkt |INTERNAL |decode epkt
483 * Fabric Message fabric payload |FABRIC |NULL
484 * Peek/Poke Address/BDF |NULL |NULL
485 * ----------------+---------------+------------
486 */
487 int
px_scan_fabric(px_t * px_p,dev_info_t * rpdip,ddi_fm_error_t * derr)488 px_scan_fabric(px_t *px_p, dev_info_t *rpdip, ddi_fm_error_t *derr) {
489 int fab_err = 0;
490
491 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
492
493 if (!px_lib_is_in_drain_state(px_p) && px_p->px_pfd_idx) {
494 fab_err = pf_scan_fabric(rpdip, derr, px_p->px_pfd_arr);
495 }
496
497 return (fab_err);
498 }
499
500 /*
501 * px_err_safeacc_check:
502 * Check to see if a peek/poke and cautious access is currently being
503 * done on a particular leaf.
504 *
505 * Safe access reads induced fire errors will be handled by cpu trap handler
506 * which will call px_fm_callback() which calls this function. In that
507 * case, the derr fields will be set by trap handler with the correct values.
508 *
509 * Safe access writes induced errors will be handled by px interrupt
510 * handlers, this function will fill in the derr fields.
511 *
512 * If a cpu trap does occur, it will quiesce all other interrupts allowing
513 * the cpu trap error handling to finish before Fire receives an interrupt.
514 *
515 * If fire does indeed have an error when a cpu trap occurs as a result of
516 * a safe access, a trap followed by a Mondo/Fabric interrupt will occur.
517 * In which case derr will be initialized as "UNEXPECTED" by the interrupt
518 * handler and this function will need to find if this error occured in the
519 * middle of a safe access operation.
520 *
521 * @param px_p leaf in which to check access
522 * @param derr fm err data structure to be updated
523 */
524 void
px_err_safeacc_check(px_t * px_p,ddi_fm_error_t * derr)525 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr)
526 {
527 px_pec_t *pec_p = px_p->px_pec_p;
528 int acctype = pec_p->pec_safeacc_type;
529
530 ASSERT(MUTEX_HELD(&px_p->px_fm_mutex));
531
532 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
533 return;
534 }
535
536 /* safe access checking */
537 switch (acctype) {
538 case DDI_FM_ERR_EXPECTED:
539 /*
540 * cautious access protection, protected from all err.
541 */
542 ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr,
543 DDI_FME_VERSION);
544 derr->fme_flag = acctype;
545 derr->fme_acc_handle = pec_p->pec_acc_hdl;
546 break;
547 case DDI_FM_ERR_POKE:
548 /*
549 * ddi_poke protection, check nexus and children for
550 * expected errors.
551 */
552 membar_sync();
553 derr->fme_flag = acctype;
554 break;
555 case DDI_FM_ERR_PEEK:
556 derr->fme_flag = acctype;
557 break;
558 }
559 }
560
561 /*
562 * Suggest panic if any EQ (except CE q) has overflown.
563 */
564 int
px_err_check_eq(dev_info_t * dip)565 px_err_check_eq(dev_info_t *dip)
566 {
567 px_t *px_p = DIP_TO_STATE(dip);
568 px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
569 px_pec_t *pec_p = px_p->px_pec_p;
570 msiqid_t eq_no = msiq_state_p->msiq_1st_msiq_id;
571 pci_msiq_state_t msiq_state;
572 int i;
573
574 for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
575 if (i + eq_no == pec_p->pec_corr_msg_msiq_id) /* skip CE q */
576 continue;
577 if ((px_lib_msiq_getstate(dip, i + eq_no, &msiq_state) !=
578 DDI_SUCCESS) || msiq_state == PCI_MSIQ_STATE_ERROR)
579 return (PX_PANIC);
580 }
581 return (PX_NO_PANIC);
582 }
583
584 /* ARGSUSED */
585 int
px_err_check_pcie(dev_info_t * dip,ddi_fm_error_t * derr,px_err_pcie_t * regs,pf_intr_type_t intr_type)586 px_err_check_pcie(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs,
587 pf_intr_type_t intr_type)
588 {
589 px_t *px_p = DIP_TO_STATE(dip);
590 pf_data_t *pfd_p = px_get_pfd(px_p);
591 int i;
592 pf_pcie_adv_err_regs_t *adv_reg = PCIE_ADV_REG(pfd_p);
593
594 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = intr_type;
595
596 /*
597 * set RC s_status in PCI term to coordinate with downstream fabric
598 * errors ananlysis.
599 */
600 if (regs->primary_ue & PCIE_AER_UCE_UR)
601 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_R_MAST_AB;
602 if (regs->primary_ue & PCIE_AER_UCE_CA)
603 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_R_TARG_AB;
604 if (regs->primary_ue & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC))
605 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_PERROR;
606
607 if (!regs->primary_ue)
608 goto done;
609
610 adv_reg->pcie_ce_status = regs->ce_reg;
611 adv_reg->pcie_ue_status = regs->ue_reg | regs->primary_ue;
612 PCIE_ADV_HDR(pfd_p, 0) = regs->rx_hdr1;
613 PCIE_ADV_HDR(pfd_p, 1) = regs->rx_hdr2;
614 PCIE_ADV_HDR(pfd_p, 2) = regs->rx_hdr3;
615 PCIE_ADV_HDR(pfd_p, 3) = regs->rx_hdr4;
616 for (i = regs->primary_ue; i != 1; i = i >> 1)
617 adv_reg->pcie_adv_ctl++;
618
619 if (regs->primary_ue & (PCIE_AER_UCE_UR | PCIE_AER_UCE_CA)) {
620 if (pf_tlp_decode(PCIE_DIP2BUS(dip), adv_reg) == DDI_SUCCESS)
621 PCIE_ROOT_FAULT(pfd_p)->scan_bdf =
622 adv_reg->pcie_ue_tgt_bdf;
623 } else if (regs->primary_ue & PCIE_AER_UCE_PTLP) {
624 if (pf_tlp_decode(PCIE_DIP2BUS(dip), adv_reg) == DDI_SUCCESS) {
625 PCIE_ROOT_FAULT(pfd_p)->scan_bdf =
626 adv_reg->pcie_ue_tgt_bdf;
627 if (adv_reg->pcie_ue_tgt_trans ==
628 PF_ADDR_PIO)
629 PCIE_ROOT_FAULT(pfd_p)->scan_addr =
630 adv_reg->pcie_ue_tgt_addr;
631 }
632
633 /*
634 * Normally for Poisoned Completion TLPs we can look at the
635 * transmit log header for the original request and the original
636 * address, however this doesn't seem to be working. HW BUG.
637 */
638 }
639
640 done:
641 px_pcie_log(dip, regs);
642
643 /* Return No Error here and let the pcie misc module analyse it */
644 return (PX_NO_ERROR);
645 }
646
647 #if defined(DEBUG)
648 static void
px_pcie_log(dev_info_t * dip,px_err_pcie_t * regs)649 px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs)
650 {
651 DBG(DBG_ERR_INTR, dip,
652 "A PCIe RC error has occured\n"
653 "\tCE: 0x%x UE: 0x%x Primary UE: 0x%x\n"
654 "\tTX Hdr: 0x%x 0x%x 0x%x 0x%x\n\tRX Hdr: 0x%x 0x%x 0x%x 0x%x\n",
655 regs->ce_reg, regs->ue_reg, regs->primary_ue,
656 regs->tx_hdr1, regs->tx_hdr2, regs->tx_hdr3, regs->tx_hdr4,
657 regs->rx_hdr1, regs->rx_hdr2, regs->rx_hdr3, regs->rx_hdr4);
658 }
659 #endif
660
661 /*
662 * look through poisoned TLP cases and suggest panic/no panic depend on
663 * handle lookup.
664 */
665 static int
px_pcie_ptlp(dev_info_t * dip,ddi_fm_error_t * derr,px_err_pcie_t * regs)666 px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs)
667 {
668 pf_pcie_adv_err_regs_t adv_reg;
669 pcie_req_id_t bdf;
670 uint64_t addr;
671 uint32_t trans_type;
672 int tlp_sts, tlp_cmd;
673 int lookup = PF_HDL_NOTFOUND;
674
675 if (regs->primary_ue != PCIE_AER_UCE_PTLP)
676 return (PX_PANIC);
677
678 if (!regs->rx_hdr1)
679 goto done;
680
681 adv_reg.pcie_ue_hdr[0] = regs->rx_hdr1;
682 adv_reg.pcie_ue_hdr[1] = regs->rx_hdr2;
683 adv_reg.pcie_ue_hdr[2] = regs->rx_hdr3;
684 adv_reg.pcie_ue_hdr[3] = regs->rx_hdr4;
685
686 tlp_sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
687 tlp_cmd = ((pcie_tlp_hdr_t *)(adv_reg.pcie_ue_hdr))->type;
688
689 if (tlp_sts == DDI_FAILURE)
690 goto done;
691
692 bdf = adv_reg.pcie_ue_tgt_bdf;
693 addr = adv_reg.pcie_ue_tgt_addr;
694 trans_type = adv_reg.pcie_ue_tgt_trans;
695
696 switch (tlp_cmd) {
697 case PCIE_TLP_TYPE_CPL:
698 case PCIE_TLP_TYPE_CPLLK:
699 /*
700 * Usually a PTLP is a CPL with data. Grab the completer BDF
701 * from the RX TLP, and the original address from the TX TLP.
702 */
703 if (regs->tx_hdr1) {
704 adv_reg.pcie_ue_hdr[0] = regs->tx_hdr1;
705 adv_reg.pcie_ue_hdr[1] = regs->tx_hdr2;
706 adv_reg.pcie_ue_hdr[2] = regs->tx_hdr3;
707 adv_reg.pcie_ue_hdr[3] = regs->tx_hdr4;
708
709 lookup = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
710 if (lookup != DDI_SUCCESS)
711 break;
712 addr = adv_reg.pcie_ue_tgt_addr;
713 trans_type = adv_reg.pcie_ue_tgt_trans;
714 } /* FALLTHRU */
715 case PCIE_TLP_TYPE_IO:
716 case PCIE_TLP_TYPE_MEM:
717 case PCIE_TLP_TYPE_MEMLK:
718 lookup = pf_hdl_lookup(dip, derr->fme_ena, trans_type, addr,
719 bdf);
720 break;
721 default:
722 lookup = PF_HDL_NOTFOUND;
723 }
724 done:
725 return (lookup == PF_HDL_FOUND ? PX_NO_PANIC : PX_PANIC);
726 }
727
728 /*
729 * px_get_pdf automatically allocates a RC pf_data_t and returns a pointer to
730 * it. This function should be used when an error requires a fabric scan.
731 */
732 pf_data_t *
px_get_pfd(px_t * px_p)733 px_get_pfd(px_t *px_p) {
734 int idx = px_p->px_pfd_idx++;
735 pf_data_t *pfd_p = &px_p->px_pfd_arr[idx];
736
737 /* Clear Old Data */
738 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
739 PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0;
740 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE;
741 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL;
742 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0;
743 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
744 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = 0;
745 PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0;
746 PCIE_ADV_REG(pfd_p)->pcie_ue_status = 0;
747 PCIE_ADV_REG(pfd_p)->pcie_adv_ctl = 0;
748
749 pfd_p->pe_next = NULL;
750
751 if (idx > 0) {
752 px_p->px_pfd_arr[idx - 1].pe_next = pfd_p;
753 pfd_p->pe_prev = &px_p->px_pfd_arr[idx - 1];
754 } else {
755 pfd_p->pe_prev = NULL;
756 }
757
758 pfd_p->pe_severity_flags = 0;
759 pfd_p->pe_severity_mask = 0;
760 pfd_p->pe_orig_severity_flags = 0;
761 pfd_p->pe_valid = B_TRUE;
762
763 return (pfd_p);
764 }
765
766 /*
767 * This function appends a pf_data structure to the error q which is used later
768 * during PCIe fabric scan. It signifies:
769 * o errs rcvd in RC, that may have been propagated to/from the fabric
770 * o the fabric scan code should scan the device path of fault bdf/addr
771 *
772 * scan_bdf: The bdf that caused the fault, which may have error bits set.
773 * scan_addr: The PIO addr that caused the fault, such as failed PIO, but not
774 * failed DMAs.
775 * s_status: Secondary Status equivalent to why the fault occured.
776 * (ie S-TA/MA, R-TA)
777 * Either the scan bdf or addr may be NULL, but not both.
778 */
779 pf_data_t *
px_rp_en_q(px_t * px_p,pcie_req_id_t scan_bdf,uint32_t scan_addr,uint16_t s_status)780 px_rp_en_q(px_t *px_p, pcie_req_id_t scan_bdf, uint32_t scan_addr,
781 uint16_t s_status)
782 {
783 pf_data_t *pfd_p;
784
785 if (!PCIE_CHECK_VALID_BDF(scan_bdf) && !scan_addr)
786 return (NULL);
787
788 pfd_p = px_get_pfd(px_p);
789
790 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = scan_bdf;
791 PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)scan_addr;
792 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status;
793
794 return (pfd_p);
795 }
796
797
798 /*
799 * Find and Mark CFG Handles as failed associated with the given BDF. We should
800 * always know the BDF for CFG accesses, since it is encoded in the address of
801 * the TLP. Since there can be multiple cfg handles, mark them all as failed.
802 */
803 /* ARGSUSED */
804 int
px_err_cfg_hdl_check(dev_info_t * dip,const void * handle,const void * arg1,const void * arg2)805 px_err_cfg_hdl_check(dev_info_t *dip, const void *handle, const void *arg1,
806 const void *arg2)
807 {
808 int status = DDI_FM_FATAL;
809 uint32_t addr = *(uint32_t *)arg1;
810 uint16_t bdf = *(uint16_t *)arg2;
811 pcie_bus_t *bus_p;
812
813 DBG(DBG_ERR_INTR, dip, "Check CFG Hdl: dip 0x%p addr 0x%x bdf=0x%x\n",
814 dip, addr, bdf);
815
816 bus_p = PCIE_DIP2BUS(dip);
817
818 /*
819 * Because CFG and IO Acc Handlers are on the same cache list and both
820 * types of hdls gets called for both types of errors. For this checker
821 * only mark the device as "Non-Fatal" if the addr == NULL and bdf !=
822 * NULL.
823 */
824 status = (!addr && (PCIE_CHECK_VALID_BDF(bdf) &&
825 (bus_p->bus_bdf == bdf))) ? DDI_FM_NONFATAL : DDI_FM_FATAL;
826
827 return (status);
828 }
829
830 /*
831 * Find and Mark all ACC Handles associated with a give address and BDF as
832 * failed. If the BDF != NULL, then check to see if the device has a ACC Handle
833 * associated with ADDR. If the handle is not found, mark all the handles as
834 * failed. If the BDF == NULL, mark the handle as failed if it is associated
835 * with ADDR.
836 */
837 int
px_err_pio_hdl_check(dev_info_t * dip,const void * handle,const void * arg1,const void * arg2)838 px_err_pio_hdl_check(dev_info_t *dip, const void *handle, const void *arg1,
839 const void *arg2)
840 {
841 dev_info_t *px_dip;
842 px_t *px_p;
843 pci_ranges_t *ranges_p;
844 int range_len;
845 ddi_acc_handle_t ap = (ddi_acc_handle_t)handle;
846 ddi_acc_hdl_t *hp = impl_acc_hdl_get(ap);
847 int i, status = DDI_FM_FATAL;
848 uint64_t fault_addr = *(uint64_t *)arg1;
849 uint16_t bdf = *(uint16_t *)arg2;
850 uint64_t base_addr, range_addr;
851 uint_t size;
852
853 /*
854 * Find the correct px dip. On system with a real Root Port, it's the
855 * node above the root port. On systems without a real Root Port the px
856 * dip is the bus_rp_dip.
857 */
858 px_dip = PCIE_DIP2BUS(dip)->bus_rp_dip;
859
860 if (!PCIE_IS_RC(PCIE_DIP2BUS(px_dip)))
861 px_dip = ddi_get_parent(px_dip);
862
863 ASSERT(PCIE_IS_RC(PCIE_DIP2BUS(px_dip)));
864 px_p = INST_TO_STATE(ddi_get_instance(px_dip));
865
866 DBG(DBG_ERR_INTR, dip, "Check PIO Hdl: dip 0x%x addr 0x%x bdf=0x%x\n",
867 dip, fault_addr, bdf);
868
869 /* Normalize the base addr to the addr and strip off the HB info. */
870 base_addr = (hp->ah_pfn << MMU_PAGESHIFT) + hp->ah_offset;
871 range_len = px_p->px_ranges_length / sizeof (pci_ranges_t);
872 i = 0;
873 for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) {
874 range_addr = px_in_addr_range(dip, ranges_p, base_addr);
875 if (range_addr) {
876 switch (ranges_p->child_high & PCI_ADDR_MASK) {
877 case PCI_ADDR_IO:
878 case PCI_ADDR_MEM64:
879 case PCI_ADDR_MEM32:
880 base_addr = base_addr - range_addr;
881 break;
882 }
883 break;
884 }
885 }
886
887 /*
888 * Mark the handle as failed if the ADDR is mapped, or if we
889 * know the BDF and ADDR == 0.
890 */
891 size = hp->ah_len;
892 if (((fault_addr >= base_addr) && (fault_addr < (base_addr + size))) ||
893 ((fault_addr == 0) && (PCIE_CHECK_VALID_BDF(bdf) &&
894 (bdf == PCIE_DIP2BUS(dip)->bus_bdf))))
895 status = DDI_FM_NONFATAL;
896
897 return (status);
898 }
899
900 /*
901 * Find and Mark all DNA Handles associated with a give address and BDF as
902 * failed. If the BDF != NULL, then check to see if the device has a DMA Handle
903 * associated with ADDR. If the handle is not found, mark all the handles as
904 * failed. If the BDF == NULL, mark the handle as failed if it is associated
905 * with ADDR.
906 */
907 int
px_err_dma_hdl_check(dev_info_t * dip,const void * handle,const void * arg1,const void * arg2)908 px_err_dma_hdl_check(dev_info_t *dip, const void *handle, const void *arg1,
909 const void *arg2)
910 {
911 ddi_dma_impl_t *pcie_dp;
912 int status = DDI_FM_FATAL;
913 uint32_t addr = *(uint32_t *)arg1;
914 uint16_t bdf = *(uint16_t *)arg2;
915 uint32_t base_addr;
916 uint_t size;
917
918 DBG(DBG_ERR_INTR, dip, "Check PIO Hdl: dip 0x%x addr 0x%x bdf=0x%x\n",
919 dip, addr, bdf);
920
921 pcie_dp = (ddi_dma_impl_t *)handle;
922 base_addr = (uint32_t)pcie_dp->dmai_mapping;
923 size = pcie_dp->dmai_size;
924
925 /*
926 * Mark the handle as failed if the ADDR is mapped, or if we
927 * know the BDF and ADDR == 0.
928 */
929 if (((addr >= base_addr) && (addr < (base_addr + size))) ||
930 ((addr == 0) && PCIE_CHECK_VALID_BDF(bdf)))
931 status = DDI_FM_NONFATAL;
932
933 return (status);
934 }
935
936 int
px_fm_enter(px_t * px_p)937 px_fm_enter(px_t *px_p) {
938 if (px_panicing || (px_p->px_fm_mutex_owner == curthread))
939 return (DDI_FAILURE);
940
941 mutex_enter(&px_p->px_fm_mutex);
942 /*
943 * In rare cases when trap occurs and in the middle of scanning the
944 * fabric, a PIO will fail in the scan fabric. The CPU error handling
945 * code will correctly panic the system, while a mondo for the failed
946 * PIO may also show up. Normally the mondo will try to grab the mutex
947 * and wait until the callback finishes. But in this rare case,
948 * mutex_enter actually suceeds also continues to scan the fabric.
949 *
950 * This code below is designed specifically to check for this case. If
951 * we successfully grab the px_fm_mutex, the px_fm_mutex_owner better be
952 * NULL. If it isn't that means we are in the rare corner case. Return
953 * DDI_FAILURE, this should prevent PX from doing anymore error
954 * handling.
955 */
956 if (px_p->px_fm_mutex_owner) {
957 return (DDI_FAILURE);
958 }
959
960 px_p->px_fm_mutex_owner = curthread;
961
962 if (px_panicing) {
963 px_fm_exit(px_p);
964 return (DDI_FAILURE);
965 }
966
967 /* Signal the PCIe error handling module error handling is starting */
968 pf_eh_enter(PCIE_DIP2BUS(px_p->px_dip));
969
970 return (DDI_SUCCESS);
971 }
972
973 static void
px_guest_panic(px_t * px_p)974 px_guest_panic(px_t *px_p)
975 {
976 pf_data_t *root_pfd_p = PCIE_DIP2PFD(px_p->px_dip);
977 pf_data_t *pfd_p;
978 pcie_bus_t *bus_p, *root_bus_p;
979 pcie_req_id_list_t *rl;
980
981 /*
982 * check if all devices under the root device are unassigned.
983 * this function should quickly return in non-IOV environment.
984 */
985 root_bus_p = PCIE_PFD2BUS(root_pfd_p);
986 if (PCIE_BDG_IS_UNASSIGNED(root_bus_p))
987 return;
988
989 for (pfd_p = root_pfd_p; pfd_p; pfd_p = pfd_p->pe_next) {
990 bus_p = PCIE_PFD2BUS(pfd_p);
991
992 /* assume all affected devs were in the error Q */
993 if (!PCIE_BUS2DOM(bus_p)->nfma_panic)
994 continue;
995
996 if (PCIE_IS_BDG(bus_p)) {
997 rl = PCIE_BDF_LIST_GET(bus_p);
998 while (rl) {
999 px_panic_domain(px_p, rl->bdf);
1000 rl = rl->next;
1001 }
1002 } else {
1003 px_panic_domain(px_p, bus_p->bus_bdf);
1004 }
1005 /* clear panic flag */
1006 PCIE_BUS2DOM(bus_p)->nfma_panic = B_FALSE;
1007 }
1008 }
1009
1010 void
px_fm_exit(px_t * px_p)1011 px_fm_exit(px_t *px_p) {
1012 px_p->px_fm_mutex_owner = NULL;
1013 if (px_p->px_pfd_idx == 0) {
1014 mutex_exit(&px_p->px_fm_mutex);
1015 return;
1016 }
1017 /* panic the affected domains that are non-fma-capable */
1018 px_guest_panic(px_p);
1019 /* Signal the PCIe error handling module error handling is ending */
1020 pf_eh_exit(PCIE_DIP2BUS(px_p->px_dip));
1021 px_p->px_pfd_idx = 0;
1022 mutex_exit(&px_p->px_fm_mutex);
1023 }
1024
1025 /*
1026 * Panic if the err tunable is set and that we are not already in the middle
1027 * of panic'ing.
1028 *
1029 * rc_err = Error severity of PX specific errors
1030 * msg = Where the error was detected
1031 * fabric_err = Error severity of PCIe Fabric errors
1032 * isTest = Test if error severity causes panic
1033 */
1034 #define MSZ (sizeof (fm_msg) -strlen(fm_msg) - 1)
1035 void
px_err_panic(int rc_err,int msg,int fabric_err,boolean_t isTest)1036 px_err_panic(int rc_err, int msg, int fabric_err, boolean_t isTest)
1037 {
1038 char fm_msg[96] = "";
1039 int ferr = PX_NO_ERROR;
1040
1041 if (panicstr) {
1042 px_panicing = B_TRUE;
1043 return;
1044 }
1045
1046 if (!(rc_err & px_die))
1047 goto fabric;
1048 if (msg & PX_RC)
1049 (void) strncat(fm_msg, px_panic_rc_msg, MSZ);
1050 if (msg & PX_RP)
1051 (void) strncat(fm_msg, px_panic_rp_msg, MSZ);
1052 if (msg & PX_HB)
1053 (void) strncat(fm_msg, px_panic_hb_msg, MSZ);
1054
1055 fabric:
1056 if (fabric_err & PF_ERR_FATAL_FLAGS)
1057 ferr = PX_PANIC;
1058 else if (fabric_err & ~(PF_ERR_FATAL_FLAGS | PF_ERR_NO_ERROR))
1059 ferr = PX_NO_PANIC;
1060
1061 if (ferr & px_die) {
1062 if (strlen(fm_msg)) {
1063 (void) strncat(fm_msg, " and", MSZ);
1064 }
1065 (void) strncat(fm_msg, px_panic_fab_msg, MSZ);
1066 }
1067
1068 if (strlen(fm_msg)) {
1069 px_panicing = B_TRUE;
1070 if (!isTest)
1071 fm_panic("Fatal error has occured in:%s.(0x%x)(0x%x)",
1072 fm_msg, rc_err, fabric_err);
1073 }
1074 }
1075