1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * sun4v Fire Error Handling
27 */
28
29 #include <sys/types.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/sunndi.h>
33 #include <sys/fm/protocol.h>
34 #include <sys/fm/util.h>
35 #include <sys/membar.h>
36 #include "px_obj.h"
37 #include "px_err.h"
38
39 static void px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p,
40 px_rc_err_t *epkt);
41 static uint_t px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt);
42 static int px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr,
43 px_rc_err_t *epkt, pf_data_t *pfd_p);
44
45 static void px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt,
46 boolean_t is_block_pci, char *msg);
47 static void px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt,
48 boolean_t is_block_pci, int err, ddi_fm_error_t *derr,
49 boolean_t is_valid_epkt);
50 static int px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
51 px_rc_err_t *epkt, pf_data_t *pfd_p);
52 static int px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
53 px_rc_err_t *epkt, pf_data_t *pfd_p);
54 static int px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
55 px_rc_err_t *epkt, pf_data_t *pfd_p);
56 static int px_port_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
57 px_rc_err_t *epkt, pf_data_t *pfd_p);
58 static int px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr,
59 px_rc_err_t *epkt, pf_data_t *pfd_p);
60 static int px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr,
61 px_rc_err_t *epkt, pf_data_t *pfd_p);
62 static int px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr,
63 px_rc_err_t *epkt, pf_data_t *pfd_p);
64 static void px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr,
65 px_rc_err_t *epkt);
66 static int px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr,
67 px_rc_err_t *epkt);
68
69 /* Include the code generated sun4v epkt checking code */
70 #include "px_err_gen.c"
71
72 /*
73 * This variable indicates if we have a hypervisor that could potentially send
74 * incorrect epkts. We always set this to TRUE for now until we find a way to
75 * tell if this HV bug has been fixed.
76 */
77 boolean_t px_legacy_epkt = B_TRUE;
78
79 /*
80 * px_err_cb_intr:
81 * Interrupt handler for the Host Bus Block.
82 */
83 uint_t
px_err_cb_intr(caddr_t arg)84 px_err_cb_intr(caddr_t arg)
85 {
86 px_fault_t *fault_p = (px_fault_t *)arg;
87 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload;
88
89 if (epkt != NULL) {
90 return (px_err_intr(fault_p, epkt));
91 }
92
93 return (DDI_INTR_UNCLAIMED);
94 }
95
96 /*
97 * px_err_dmc_pec_intr:
98 * Interrupt handler for the DMC/PEC block.
99 */
100 uint_t
px_err_dmc_pec_intr(caddr_t arg)101 px_err_dmc_pec_intr(caddr_t arg)
102 {
103 px_fault_t *fault_p = (px_fault_t *)arg;
104 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload;
105
106 if (epkt != NULL) {
107 return (px_err_intr(fault_p, epkt));
108 }
109
110 return (DDI_INTR_UNCLAIMED);
111 }
112
113 /*
114 * px_err_cmn_intr:
115 * Common function called by trap, mondo and fabric intr.
116 * This function is more meaningful in sun4u implementation. Kept
117 * to mirror sun4u call stack.
118 * o check for safe access
119 * o create and queue RC info for later use in fabric scan.
120 * o RUC/WUC, PTLP, MMU Errors(CA), UR
121 *
122 * @param px_p leaf in which to check access
123 * @param derr fm err data structure to be updated
124 * @param caller PX_TRAP_CALL | PX_INTR_CALL
125 * @param chkjbc whether to handle hostbus registers (ignored)
126 * @return err PX_NO_PANIC | PX_PROTECTED |
127 * PX_PANIC | PX_HW_RESET | PX_EXPECTED
128 */
129 /* ARGSUSED */
130 int
px_err_cmn_intr(px_t * px_p,ddi_fm_error_t * derr,int caller,int block)131 px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block)
132 {
133 px_err_safeacc_check(px_p, derr);
134 return (PX_NO_ERROR);
135 }
136
137 /*
138 * fills RC specific fault data
139 */
140 static void
px_err_fill_pfd(dev_info_t * dip,pf_data_t * pfd_p,px_rc_err_t * epkt)141 px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, px_rc_err_t *epkt) {
142 pf_pcie_adv_err_regs_t adv_reg;
143 pcie_req_id_t fault_bdf = PCIE_INVALID_BDF;
144 uint64_t fault_addr = 0;
145 uint16_t s_status = 0;
146 px_pec_err_t *pec_p;
147 uint32_t dir;
148
149 /* Add an PCIE PF_DATA Entry */
150 switch (epkt->rc_descr.block) {
151 case BLOCK_MMU:
152 /* Only PIO Fault Addresses are valid, this is DMA */
153 s_status = PCI_STAT_S_TARG_AB;
154 fault_addr = NULL;
155
156 if (epkt->rc_descr.H) {
157 fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16);
158 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags =
159 PF_AFFECTED_BDF;
160 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf =
161 fault_bdf;
162 }
163 break;
164 case BLOCK_PCIE:
165 pec_p = (px_pec_err_t *)epkt;
166 dir = pec_p->pec_descr.dir;
167
168 /* translate RC UR/CA to legacy secondary errors */
169 if ((dir == DIR_READ || dir == DIR_WRITE) &&
170 pec_p->pec_descr.U) {
171 if (pec_p->ue_reg_status & PCIE_AER_UCE_UR)
172 s_status |= PCI_STAT_R_MAST_AB;
173 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA)
174 s_status |= PCI_STAT_R_TARG_AB;
175 }
176
177 if (pec_p->ue_reg_status & PCIE_AER_UCE_PTLP)
178 s_status |= PCI_STAT_PERROR;
179
180 if (pec_p->ue_reg_status & PCIE_AER_UCE_CA)
181 s_status |= PCI_STAT_S_TARG_AB;
182
183 if (pec_p->pec_descr.H) {
184 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0] >>32);
185 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0]);
186 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1] >>32);
187 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1]);
188
189 if (pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg) ==
190 DDI_SUCCESS) {
191 fault_bdf = adv_reg.pcie_ue_tgt_bdf;
192 fault_addr = adv_reg.pcie_ue_tgt_addr;
193 /*
194 * affected BDF is to be filled in by
195 * px_scan_fabric
196 */
197 }
198 }
199 break;
200 case BLOCK_HOSTBUS:
201 case BLOCK_INTR:
202 case BLOCK_PORT:
203 /*
204 * If the affected device information is available then we
205 * add the affected_bdf to the pfd, so the affected device
206 * will be scanned and added to the error q. This will then
207 * go through the pciev_eh code path and forgive the error
208 * as needed.
209 */
210 if (PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags ==
211 PF_AFFECTED_BDF)
212 fault_bdf = PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf;
213
214 break;
215 default:
216 break;
217 }
218
219 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = fault_bdf;
220 PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)fault_addr;
221 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status;
222 }
223
224 /*
225 * Convert error severity from PX internal values to PCIe Fabric values. Most
226 * are self explanitory, except PX_PROTECTED. PX_PROTECTED will never be
227 * returned as is if forgivable.
228 */
229 static int
px_err_to_fab_sev(int * rc_err)230 px_err_to_fab_sev(int *rc_err) {
231 int fab_err = 0;
232
233 if (*rc_err & px_die) {
234 /*
235 * Let fabric scan decide the final severity of the error.
236 * This is needed incase IOV code needs to forgive the error.
237 */
238 *rc_err = PX_FABRIC_SCAN;
239 fab_err |= PF_ERR_PANIC;
240 }
241
242 if (*rc_err & (PX_EXPECTED | PX_NO_PANIC))
243 fab_err |= PF_ERR_NO_PANIC;
244
245 if (*rc_err & PX_NO_ERROR)
246 fab_err |= PF_ERR_NO_ERROR;
247
248 return (fab_err);
249 }
250
251 /*
252 * px_err_intr:
253 * Interrupt handler for the JBC/DMC/PEC block.
254 * o lock
255 * o create derr
256 * o check safe access
257 * o px_err_check_severity(epkt)
258 * o pcie_scan_fabric
259 * o Idle intr state
260 * o unlock
261 * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
262 */
263 static uint_t
px_err_intr(px_fault_t * fault_p,px_rc_err_t * epkt)264 px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt)
265 {
266 px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip);
267 dev_info_t *rpdip = px_p->px_dip;
268 int rc_err, tmp_rc_err, fab_err, msg;
269 ddi_fm_error_t derr;
270 pf_data_t *pfd_p;
271
272 if (px_fm_enter(px_p) != DDI_SUCCESS)
273 goto done;
274
275 pfd_p = px_get_pfd(px_p);
276 PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_INTERNAL;
277 PCIE_ROOT_EH_SRC(pfd_p)->intr_data = epkt;
278
279 /* Create the derr */
280 bzero(&derr, sizeof (ddi_fm_error_t));
281 derr.fme_version = DDI_FME_VERSION;
282 derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1);
283 derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
284
285 /* Basically check for safe access */
286 (void) px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_ALL);
287
288 /* Check the severity of this error */
289 rc_err = px_err_epkt_severity(px_p, &derr, epkt, pfd_p);
290
291 /* Pass the 'rc_err' severity to the fabric scan code. */
292 tmp_rc_err = rc_err;
293 pfd_p->pe_severity_flags = px_err_to_fab_sev(&rc_err);
294
295 /* Scan the fabric */
296 if (!(fab_err = px_scan_fabric(px_p, rpdip, &derr))) {
297 /*
298 * Fabric scan didn't occur because of some error condition
299 * such as Root Port being in drain state, so reset rc_err.
300 */
301 rc_err = tmp_rc_err;
302 }
303
304 /* Set the intr state to idle for the leaf that received the mondo */
305 if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino,
306 INTR_IDLE_STATE) != DDI_SUCCESS) {
307 px_fm_exit(px_p);
308 return (DDI_INTR_UNCLAIMED);
309 }
310
311 switch (epkt->rc_descr.block) {
312 case BLOCK_MMU: /* FALLTHROUGH */
313 case BLOCK_INTR:
314 msg = PX_RC;
315 break;
316 case BLOCK_PCIE:
317 msg = PX_RP;
318 break;
319 case BLOCK_HOSTBUS: /* FALLTHROUGH */
320 default:
321 msg = PX_HB;
322 break;
323 }
324
325 px_err_panic(rc_err, msg, fab_err, B_TRUE);
326 px_fm_exit(px_p);
327 px_err_panic(rc_err, msg, fab_err, B_FALSE);
328
329 done:
330 return (DDI_INTR_CLAIMED);
331 }
332
333 /*
334 * px_err_epkt_severity:
335 * Check the severity of the fire error based the epkt received
336 *
337 * @param px_p leaf in which to take the snap shot.
338 * @param derr fm err in which the ereport is to be based on
339 * @param epkt epkt recevied from HV
340 */
341 static int
px_err_epkt_severity(px_t * px_p,ddi_fm_error_t * derr,px_rc_err_t * epkt,pf_data_t * pfd_p)342 px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt,
343 pf_data_t *pfd_p)
344 {
345 px_pec_t *pec_p = px_p->px_pec_p;
346 dev_info_t *dip = px_p->px_dip;
347 boolean_t is_safeacc = B_FALSE;
348 boolean_t is_block_pci = B_FALSE;
349 boolean_t is_valid_epkt = B_FALSE;
350 int err = 0;
351
352 /* Cautious access error handling */
353 switch (derr->fme_flag) {
354 case DDI_FM_ERR_EXPECTED:
355 /*
356 * For ddi_caut_put treat all events as nonfatal. Here
357 * we have the handle and can call ndi_fm_acc_err_set().
358 */
359 derr->fme_status = DDI_FM_NONFATAL;
360 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr);
361 is_safeacc = B_TRUE;
362 break;
363 case DDI_FM_ERR_PEEK:
364 case DDI_FM_ERR_POKE:
365 /*
366 * For ddi_peek/poke treat all events as nonfatal.
367 */
368 is_safeacc = B_TRUE;
369 break;
370 default:
371 is_safeacc = B_FALSE;
372 }
373
374 /*
375 * Older hypervisors in some cases send epkts with incorrect fields.
376 * We have to handle these "special" epkts correctly.
377 */
378 if (px_legacy_epkt)
379 px_fix_legacy_epkt(dip, derr, epkt);
380
381 /*
382 * The affected device by default is set to 'SELF'. The 'block'
383 * specific error handling below will update this as needed.
384 */
385 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF;
386
387 switch (epkt->rc_descr.block) {
388 case BLOCK_HOSTBUS:
389 err = px_cb_epkt_severity(dip, derr, epkt, pfd_p);
390 break;
391 case BLOCK_MMU:
392 err = px_mmu_epkt_severity(dip, derr, epkt, pfd_p);
393 break;
394 case BLOCK_INTR:
395 err = px_intr_epkt_severity(dip, derr, epkt, pfd_p);
396 break;
397 case BLOCK_PORT:
398 err = px_port_epkt_severity(dip, derr, epkt, pfd_p);
399 break;
400 case BLOCK_PCIE:
401 is_block_pci = B_TRUE;
402 err = px_pcie_epkt_severity(dip, derr, epkt, pfd_p);
403 break;
404 default:
405 err = 0;
406 }
407
408 px_err_fill_pfd(dip, pfd_p, epkt);
409
410 if ((err & PX_HW_RESET) || (err & PX_PANIC)) {
411 if (px_log & PX_PANIC)
412 px_err_log_handle(dip, epkt, is_block_pci, "PANIC");
413 is_valid_epkt = B_TRUE;
414 } else if (err & PX_PROTECTED) {
415 if (px_log & PX_PROTECTED)
416 px_err_log_handle(dip, epkt, is_block_pci, "PROTECTED");
417 is_valid_epkt = B_TRUE;
418 } else if (err & PX_NO_PANIC) {
419 if (px_log & PX_NO_PANIC)
420 px_err_log_handle(dip, epkt, is_block_pci, "NO PANIC");
421 is_valid_epkt = B_TRUE;
422 } else if (err & PX_NO_ERROR) {
423 if (px_log & PX_NO_ERROR)
424 px_err_log_handle(dip, epkt, is_block_pci, "NO ERROR");
425 is_valid_epkt = B_TRUE;
426 } else if (err == 0) {
427 px_err_log_handle(dip, epkt, is_block_pci, "UNRECOGNIZED");
428 is_valid_epkt = B_FALSE;
429
430 /* Panic on a unrecognized epkt */
431 err = PX_PANIC;
432 }
433
434 px_err_send_epkt_erpt(dip, epkt, is_block_pci, err, derr,
435 is_valid_epkt);
436
437 /* Readjust the severity as a result of safe access */
438 if (is_safeacc && !(err & PX_PANIC) && !(px_die & PX_PROTECTED))
439 err = PX_NO_PANIC;
440
441 return (err);
442 }
443
444 static void
px_err_send_epkt_erpt(dev_info_t * dip,px_rc_err_t * epkt,boolean_t is_block_pci,int err,ddi_fm_error_t * derr,boolean_t is_valid_epkt)445 px_err_send_epkt_erpt(dev_info_t *dip, px_rc_err_t *epkt,
446 boolean_t is_block_pci, int err, ddi_fm_error_t *derr,
447 boolean_t is_valid_epkt)
448 {
449 char buf[FM_MAX_CLASS], descr_buf[1024];
450
451 /* send ereport for debug purposes */
452 (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_RC_UNRECOG);
453
454 if (is_block_pci) {
455 px_pec_err_t *pec = (px_pec_err_t *)epkt;
456 (void) snprintf(descr_buf, sizeof (descr_buf),
457 "%s Epkt contents:\n"
458 "Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n"
459 "I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n"
460 "PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n"
461 "CE Status Reg: 0x%x, UE Status Reg: 0x%x\n"
462 "HDR1: 0x%lx, HDR2: 0x%lx\n"
463 "Err Src Reg: 0x%x, Root Err Status: 0x%x\n"
464 "Err Severity: 0x%x\n",
465 is_valid_epkt ? "Valid" : "Invalid",
466 pec->pec_descr.block, pec->pec_descr.dir,
467 pec->pec_descr.Z, pec->pec_descr.S,
468 pec->pec_descr.R, pec->pec_descr.I,
469 pec->pec_descr.H, pec->pec_descr.C,
470 pec->pec_descr.U, pec->pec_descr.E,
471 pec->pec_descr.P, pec->pci_err_status,
472 pec->pcie_err_status, pec->ce_reg_status,
473 pec->ue_reg_status, pec->hdr[0],
474 pec->hdr[1], pec->err_src_reg,
475 pec->root_err_status, err);
476
477 ddi_fm_ereport_post(dip, buf, derr->fme_ena,
478 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
479 EPKT_SYSINO, DATA_TYPE_UINT64,
480 is_valid_epkt ? pec->sysino : 0,
481 EPKT_EHDL, DATA_TYPE_UINT64,
482 is_valid_epkt ? pec->ehdl : 0,
483 EPKT_STICK, DATA_TYPE_UINT64,
484 is_valid_epkt ? pec->stick : 0,
485 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)pec)[3],
486 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)pec)[4],
487 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)pec)[5],
488 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)pec)[6],
489 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)pec)[7],
490 EPKT_PEC_DESCR, DATA_TYPE_STRING, descr_buf);
491 } else {
492 (void) snprintf(descr_buf, sizeof (descr_buf),
493 "%s Epkt contents:\n"
494 "Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n"
495 "Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n"
496 "M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n"
497 "Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n"
498 "Err Severity: 0x%x\n",
499 is_valid_epkt ? "Valid" : "Invalid",
500 epkt->rc_descr.block, epkt->rc_descr.op,
501 epkt->rc_descr.phase, epkt->rc_descr.cond,
502 epkt->rc_descr.dir, epkt->rc_descr.STOP,
503 epkt->rc_descr.H, epkt->rc_descr.R,
504 epkt->rc_descr.D, epkt->rc_descr.M,
505 epkt->rc_descr.S, epkt->size, epkt->addr,
506 epkt->hdr[0], epkt->hdr[1], epkt->reserved,
507 err);
508
509 ddi_fm_ereport_post(dip, buf, derr->fme_ena,
510 DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0,
511 EPKT_SYSINO, DATA_TYPE_UINT64,
512 is_valid_epkt ? epkt->sysino : 0,
513 EPKT_EHDL, DATA_TYPE_UINT64,
514 is_valid_epkt ? epkt->ehdl : 0,
515 EPKT_STICK, DATA_TYPE_UINT64,
516 is_valid_epkt ? epkt->stick : 0,
517 EPKT_DW0, DATA_TYPE_UINT64, ((uint64_t *)epkt)[3],
518 EPKT_DW1, DATA_TYPE_UINT64, ((uint64_t *)epkt)[4],
519 EPKT_DW2, DATA_TYPE_UINT64, ((uint64_t *)epkt)[5],
520 EPKT_DW3, DATA_TYPE_UINT64, ((uint64_t *)epkt)[6],
521 EPKT_DW4, DATA_TYPE_UINT64, ((uint64_t *)epkt)[7],
522 EPKT_RC_DESCR, DATA_TYPE_STRING, descr_buf);
523 }
524 }
525
526 static void
px_err_log_handle(dev_info_t * dip,px_rc_err_t * epkt,boolean_t is_block_pci,char * msg)527 px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, boolean_t is_block_pci,
528 char *msg)
529 {
530 if (is_block_pci) {
531 px_pec_err_t *pec = (px_pec_err_t *)epkt;
532 DBG(DBG_ERR_INTR, dip,
533 "A PCIe root port error has occured with a severity"
534 " \"%s\"\n"
535 "\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n"
536 "\tH=%d, C=%d, U=%d, E=%d, P=%d\n"
537 "\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n"
538 "\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n"
539 "\terr_src: 0x%x, root_err: 0x%x\n",
540 msg, pec->pec_descr.block, pec->pec_descr.dir,
541 pec->pec_descr.Z, pec->pec_descr.S, pec->pec_descr.R,
542 pec->pec_descr.I, pec->pec_descr.H, pec->pec_descr.C,
543 pec->pec_descr.U, pec->pec_descr.E, pec->pec_descr.P,
544 pec->pci_err_status, pec->pcie_err_status,
545 pec->ce_reg_status, pec->ue_reg_status, pec->hdr[0],
546 pec->hdr[1], pec->err_src_reg, pec->root_err_status);
547 } else {
548 DBG(DBG_ERR_INTR, dip,
549 "A PCIe root complex error has occured with a severity"
550 " \"%s\"\n"
551 "\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n"
552 "\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n"
553 "\tS=%d, Size: 0x%x, Addr: 0x%p\n"
554 "\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n",
555 msg, epkt->rc_descr.block, epkt->rc_descr.op,
556 epkt->rc_descr.phase, epkt->rc_descr.cond,
557 epkt->rc_descr.dir, epkt->rc_descr.STOP, epkt->rc_descr.H,
558 epkt->rc_descr.R, epkt->rc_descr.D, epkt->rc_descr.M,
559 epkt->rc_descr.S, epkt->size, epkt->addr, epkt->hdr[0],
560 epkt->hdr[1], epkt->reserved);
561 }
562 }
563
564 /* ARGSUSED */
565 static void
px_fix_legacy_epkt(dev_info_t * dip,ddi_fm_error_t * derr,px_rc_err_t * epkt)566 px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt)
567 {
568 /*
569 * We don't have a default case for any of the below switch statements
570 * since we are ok with the code falling through.
571 */
572 switch (epkt->rc_descr.block) {
573 case BLOCK_HOSTBUS:
574 switch (epkt->rc_descr.op) {
575 case OP_DMA:
576 switch (epkt->rc_descr.phase) {
577 case PH_UNKNOWN:
578 switch (epkt->rc_descr.cond) {
579 case CND_UNKNOWN:
580 switch (epkt->rc_descr.dir) {
581 case DIR_RESERVED:
582 epkt->rc_descr.dir = DIR_READ;
583 break;
584 } /* DIR */
585 } /* CND */
586 } /* PH */
587 } /* OP */
588 break;
589 case BLOCK_MMU:
590 switch (epkt->rc_descr.op) {
591 case OP_XLAT:
592 switch (epkt->rc_descr.phase) {
593 case PH_DATA:
594 switch (epkt->rc_descr.cond) {
595 case CND_PROT:
596 switch (epkt->rc_descr.dir) {
597 case DIR_UNKNOWN:
598 epkt->rc_descr.dir = DIR_WRITE;
599 break;
600 } /* DIR */
601 } /* CND */
602 break;
603 case PH_IRR:
604 switch (epkt->rc_descr.cond) {
605 case CND_RESERVED:
606 switch (epkt->rc_descr.dir) {
607 case DIR_IRR:
608 epkt->rc_descr.phase = PH_ADDR;
609 epkt->rc_descr.cond = CND_IRR;
610 } /* DIR */
611 } /* CND */
612 } /* PH */
613 } /* OP */
614 break;
615 case BLOCK_INTR:
616 switch (epkt->rc_descr.op) {
617 case OP_MSIQ:
618 switch (epkt->rc_descr.phase) {
619 case PH_UNKNOWN:
620 switch (epkt->rc_descr.cond) {
621 case CND_ILL:
622 switch (epkt->rc_descr.dir) {
623 case DIR_RESERVED:
624 epkt->rc_descr.dir = DIR_IRR;
625 break;
626 } /* DIR */
627 break;
628 case CND_IRR:
629 switch (epkt->rc_descr.dir) {
630 case DIR_IRR:
631 epkt->rc_descr.cond = CND_OV;
632 break;
633 } /* DIR */
634 } /* CND */
635 } /* PH */
636 break;
637 case OP_RESERVED:
638 switch (epkt->rc_descr.phase) {
639 case PH_UNKNOWN:
640 switch (epkt->rc_descr.cond) {
641 case CND_ILL:
642 switch (epkt->rc_descr.dir) {
643 case DIR_IRR:
644 epkt->rc_descr.op = OP_MSI32;
645 epkt->rc_descr.phase = PH_DATA;
646 break;
647 } /* DIR */
648 } /* CND */
649 break;
650 case PH_DATA:
651 switch (epkt->rc_descr.cond) {
652 case CND_INT:
653 switch (epkt->rc_descr.dir) {
654 case DIR_UNKNOWN:
655 epkt->rc_descr.op = OP_MSI32;
656 break;
657 } /* DIR */
658 } /* CND */
659 } /* PH */
660 } /* OP */
661 } /* BLOCK */
662 }
663
664 /* ARGSUSED */
665 static int
px_intr_handle_errors(dev_info_t * dip,ddi_fm_error_t * derr,px_rc_err_t * epkt,pf_data_t * pfd_p)666 px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt,
667 pf_data_t *pfd_p)
668 {
669 return (px_err_check_eq(dip));
670 }
671
672 /* ARGSUSED */
673 static int
px_port_handle_errors(dev_info_t * dip,ddi_fm_error_t * derr,px_rc_err_t * epkt,pf_data_t * pfd_p)674 px_port_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt,
675 pf_data_t *pfd_p)
676 {
677 pf_pcie_adv_err_regs_t adv_reg;
678 uint16_t s_status;
679 int sts = PX_PANIC;
680
681 /*
682 * Check for failed non-posted writes, which are errors that are not
683 * defined in the PCIe spec. If not return panic.
684 */
685 if (!((epkt->rc_descr.op == OP_PIO) &&
686 (epkt->rc_descr.phase == PH_IRR))) {
687 sts = (PX_PANIC);
688 goto done;
689 }
690
691 /*
692 * Gather the error logs, if they do not exist just return with no panic
693 * and let the fabric message take care of the error.
694 */
695 if (!epkt->rc_descr.H) {
696 sts = (PX_NO_PANIC);
697 goto done;
698 }
699
700 adv_reg.pcie_ue_hdr[0] = (uint32_t)(epkt->hdr[0] >> 32);
701 adv_reg.pcie_ue_hdr[1] = (uint32_t)(epkt->hdr[0]);
702 adv_reg.pcie_ue_hdr[2] = (uint32_t)(epkt->hdr[1] >> 32);
703 adv_reg.pcie_ue_hdr[3] = (uint32_t)(epkt->hdr[1]);
704
705 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
706
707 if (epkt->rc_descr.M)
708 adv_reg.pcie_ue_tgt_addr = epkt->addr;
709
710 if (!((sts == DDI_SUCCESS) || (epkt->rc_descr.M))) {
711 /* Let the fabric message take care of error */
712 sts = PX_NO_PANIC;
713 goto done;
714 }
715
716 /* See if the failed transaction belonged to a hardened driver */
717 if (pf_hdl_lookup(dip, derr->fme_ena,
718 adv_reg.pcie_ue_tgt_trans, adv_reg.pcie_ue_tgt_addr,
719 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND)
720 sts = (PX_NO_PANIC);
721 else
722 sts = (PX_PANIC);
723
724 /* Add pfd to cause a fabric scan */
725 switch (epkt->rc_descr.cond) {
726 case CND_RCA:
727 s_status = PCI_STAT_R_TARG_AB;
728 break;
729 case CND_RUR:
730 s_status = PCI_STAT_R_MAST_AB;
731 break;
732 }
733 PCIE_ROOT_FAULT(pfd_p)->scan_bdf = adv_reg.pcie_ue_tgt_bdf;
734 PCIE_ROOT_FAULT(pfd_p)->scan_addr = adv_reg.pcie_ue_tgt_addr;
735 PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status;
736
737 PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_BDF;
738 PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = adv_reg.pcie_ue_tgt_bdf;
739
740 done:
741 return (sts);
742 }
743
744 /* ARGSUSED */
745 static int
px_pcie_epkt_severity(dev_info_t * dip,ddi_fm_error_t * derr,px_rc_err_t * epkt,pf_data_t * pfd_p)746 px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt,
747 pf_data_t *pfd_p)
748 {
749 px_pec_err_t *pec_p = (px_pec_err_t *)epkt;
750 px_err_pcie_t *pcie = (px_err_pcie_t *)epkt;
751 pf_pcie_adv_err_regs_t adv_reg;
752 int sts;
753 uint32_t temp;
754
755 /*
756 * Check for failed PIO Read/Writes, which are errors that are not
757 * defined in the PCIe spec.
758 */
759
760 temp = PCIE_AER_UCE_UR | PCIE_AER_UCE_CA;
761 if (((pec_p->pec_descr.dir == DIR_READ) ||
762 (pec_p->pec_descr.dir == DIR_WRITE)) &&
763 pec_p->pec_descr.U && (pec_p->ue_reg_status & temp)) {
764
765 adv_reg.pcie_ue_hdr[0] = (uint32_t)(pec_p->hdr[0] >> 32);
766 adv_reg.pcie_ue_hdr[1] = (uint32_t)(pec_p->hdr[0]);
767 adv_reg.pcie_ue_hdr[2] = (uint32_t)(pec_p->hdr[1] >> 32);
768 adv_reg.pcie_ue_hdr[3] = (uint32_t)(pec_p->hdr[1]);
769
770 sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg);
771
772 if (sts == DDI_SUCCESS &&
773 pf_hdl_lookup(dip, derr->fme_ena,
774 adv_reg.pcie_ue_tgt_trans,
775 adv_reg.pcie_ue_tgt_addr,
776 adv_reg.pcie_ue_tgt_bdf) == PF_HDL_FOUND)
777 return (PX_NO_PANIC);
778 else
779 return (PX_PANIC);
780 }
781
782 if (!pec_p->pec_descr.C)
783 pec_p->ce_reg_status = 0;
784 if (!pec_p->pec_descr.U)
785 pec_p->ue_reg_status = 0;
786 if (!pec_p->pec_descr.H)
787 pec_p->hdr[0] = 0;
788 if (!pec_p->pec_descr.I)
789 pec_p->hdr[1] = 0;
790
791 /*
792 * According to the PCIe spec, there is a first error pointer. If there
793 * are header logs recorded and there are more than one error, the log
794 * will belong to the error that the first error pointer points to.
795 *
796 * The regs.primary_ue expects a bit number, go through the ue register
797 * and find the first error that occured. Because the sun4v epkt spec
798 * does not define this value, the algorithm below gives the lower bit
799 * priority.
800 */
801 temp = pcie->ue_reg;
802 if (temp) {
803 int x;
804 for (x = 0; !(temp & 0x1); x++) {
805 temp = temp >> 1;
806 }
807 pcie->primary_ue = 1 << x;
808 } else {
809 pcie->primary_ue = 0;
810 }
811
812 /* Sun4v doesn't log the TX hdr except for CTOs */
813 if (pcie->primary_ue == PCIE_AER_UCE_TO) {
814 pcie->tx_hdr1 = pcie->rx_hdr1;
815 pcie->tx_hdr2 = pcie->rx_hdr2;
816 pcie->tx_hdr3 = pcie->rx_hdr3;
817 pcie->tx_hdr4 = pcie->rx_hdr4;
818 pcie->rx_hdr1 = 0;
819 pcie->rx_hdr2 = 0;
820 pcie->rx_hdr3 = 0;
821 pcie->rx_hdr4 = 0;
822 } else {
823 pcie->tx_hdr1 = 0;
824 pcie->tx_hdr2 = 0;
825 pcie->tx_hdr3 = 0;
826 pcie->tx_hdr4 = 0;
827 }
828
829 return (px_err_check_pcie(dip, derr, pcie, PF_INTR_TYPE_INTERNAL));
830 }
831
832 static int
px_mmu_handle_lookup(dev_info_t * dip,ddi_fm_error_t * derr,px_rc_err_t * epkt)833 px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt)
834 {
835 uint64_t addr = (uint64_t)epkt->addr;
836 pcie_req_id_t bdf = PCIE_INVALID_BDF;
837
838 if (epkt->rc_descr.H) {
839 bdf = (uint32_t)((epkt->hdr[0] >> 16) && 0xFFFF);
840 }
841
842 return (pf_hdl_lookup(dip, derr->fme_ena, PF_ADDR_DMA, addr,
843 bdf));
844 }
845