xref: /titanic_52/usr/src/uts/sun4v/io/px/px_err.c (revision 82d33c01b078ed404a986a369750cdb4743773fb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * sun4v Fire Error Handling
30  */
31 
32 #include <sys/types.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/fm/protocol.h>
36 #include <sys/fm/util.h>
37 #include <sys/membar.h>
38 #include "px_obj.h"
39 #include "px_err.h"
40 
41 static uint_t px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt);
42 static int  px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr,
43     px_rc_err_t *epkt, int caller);
44 
45 static int px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
46     px_rc_err_t *epkt, int caller);
47 static int px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
48     px_rc_err_t *epkt, int caller);
49 static int px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
50     px_rc_err_t *epkt, int caller);
51 
52 /*
53  * px_err_cb_intr:
54  * Interrupt handler for the Host Bus Block.
55  */
56 uint_t
57 px_err_cb_intr(caddr_t arg)
58 {
59 	px_fault_t	*fault_p = (px_fault_t *)arg;
60 	px_rc_err_t	*epkt = (px_rc_err_t *)fault_p->px_intr_payload;
61 
62 	if (epkt != NULL) {
63 		return (px_err_common_intr(fault_p, epkt));
64 	}
65 
66 	return (DDI_INTR_UNCLAIMED);
67 }
68 
69 /*
70  * px_err_dmc_pec_intr:
71  * Interrupt handler for the DMC/PEC block.
72  */
73 uint_t
74 px_err_dmc_pec_intr(caddr_t arg)
75 {
76 	px_fault_t	*fault_p = (px_fault_t *)arg;
77 	px_rc_err_t	*epkt = (px_rc_err_t *)fault_p->px_intr_payload;
78 
79 	if (epkt != NULL) {
80 		return (px_err_common_intr(fault_p, epkt));
81 	}
82 
83 	return (DDI_INTR_UNCLAIMED);
84 }
85 
86 /*
87  * px_err_handle:
88  * Common function called by trap, mondo and fabric intr.
89  * This function is more meaningful in sun4u implementation.  Kept
90  * to mirror sun4u call stack.
91  * o check for safe access
92  *
93  * @param px_p		leaf in which to check access
94  * @param derr		fm err data structure to be updated
95  * @param caller	PX_TRAP_CALL | PX_INTR_CALL
96  * @param chkjbc	whether to handle hostbus registers (ignored)
97  * @return err		PX_OK | PX_NONFATAL |
98  *                      PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL
99  */
100 /* ARGSUSED */
101 int
102 px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller,
103     boolean_t chkxbc)
104 {
105 	/* check for safe access */
106 	px_err_safeacc_check(px_p, derr);
107 
108 	return (DDI_FM_OK);
109 }
110 
111 /*
112  * px_err_common_intr:
113  * Interrupt handler for the JBC/DMC/PEC block.
114  * o lock
115  * o create derr
116  * o check safe access
117  * o px_err_check_severiy(epkt)
118  * o dispatch
119  * o Idle intr state
120  * o unlock
121  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
122  */
123 static uint_t
124 px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt)
125 {
126 	px_t		*px_p = DIP_TO_STATE(fault_p->px_fh_dip);
127 	dev_info_t	*rpdip = px_p->px_dip;
128 	int		err, ret;
129 	ddi_fm_error_t	derr;
130 
131 	mutex_enter(&px_p->px_fm_mutex);
132 
133 	/* Create the derr */
134 	bzero(&derr, sizeof (ddi_fm_error_t));
135 	derr.fme_version = DDI_FME_VERSION;
136 	derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1);
137 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
138 
139 	/* Basically check for safe access */
140 	(void) px_err_handle(px_p, &derr, PX_INTR_CALL, B_FALSE);
141 
142 	/* Check the severity of this error */
143 	err = px_err_check_severity(px_p, &derr, epkt, PX_INTR_CALL);
144 
145 	/* check for error severity */
146 	ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr);
147 
148 	/* Set the intr state to idle for the leaf that received the mondo */
149 	if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino,
150 		INTR_IDLE_STATE) != DDI_SUCCESS) {
151 		mutex_exit(&px_p->px_fm_mutex);
152 		return (DDI_INTR_UNCLAIMED);
153 	}
154 
155 	mutex_exit(&px_p->px_fm_mutex);
156 
157 	if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL))
158 		PX_FM_PANIC("Fatal System Bus Error has occurred\n");
159 
160 	return (DDI_INTR_CLAIMED);
161 }
162 
163 /*
164  * px_err_check_severity:
165  * Check the severity of the fire error based the epkt received
166  *
167  * @param px_p		leaf in which to take the snap shot.
168  * @param derr		fm err in which the ereport is to be based on
169  * @param epkt		epkt recevied from HV
170  */
171 static int
172 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt,
173     int caller)
174 {
175 	px_pec_t 	*pec_p = px_p->px_pec_p;
176 	dev_info_t	*dip = px_p->px_dip;
177 	int		err = 0;
178 
179 	/* Cautious access error handling  */
180 	if (derr->fme_flag == DDI_FM_ERR_EXPECTED) {
181 		if (caller == PX_TRAP_CALL) {
182 			/*
183 			 * for ddi_caut_get treat all events as nonfatal
184 			 * The trampoline will set err_ena = 0,
185 			 * err_status = NONFATAL.
186 			 */
187 			derr->fme_status = DDI_FM_NONFATAL;
188 		} else {
189 			/*
190 			 * For ddi_caut_put treat all events as nonfatal. Here
191 			 * we have the handle and can call ndi_fm_acc_err_set().
192 			 */
193 			derr->fme_status = DDI_FM_NONFATAL;
194 			ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr);
195 		}
196 	}
197 
198 	switch (epkt->rc_descr.block) {
199 	case BLOCK_HOSTBUS:
200 		err = px_cb_check_errors(dip, derr, epkt, caller);
201 		break;
202 	case BLOCK_MMU:
203 		err = px_mmu_check_errors(dip, derr, epkt, caller);
204 		break;
205 	case BLOCK_INTR:
206 		err = PX_NONFATAL;
207 		break;
208 	case BLOCK_PCIE:
209 		err = px_pcie_check_errors(dip, derr, epkt, caller);
210 		break;
211 	default:
212 		err = PX_ERR_UNKNOWN;
213 	}
214 
215 	return (err);
216 }
217 
218 /* ARGSUSED */
219 static int
220 px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
221     px_rc_err_t *epkt, int caller)
222 {
223 	int		fme_flag = derr->fme_flag;
224 	boolean_t	is_safeacc;
225 	int		ret,  err = 0;
226 
227 	is_safeacc = (fme_flag == DDI_FM_ERR_EXPECTED) ||
228 	    (fme_flag == DDI_FM_ERR_PEEK) ||
229 	    (fme_flag == DDI_FM_ERR_POKE);
230 
231 	/* block/op/phase/cond/dir/flag... */
232 	switch (epkt->rc_descr.op) {
233 	case OP_PIO:
234 		err = PX_NONFATAL;
235 		/* check handle if affected memory address is captured */
236 		if (epkt->rc_descr.M != 0) {
237 			ret = px_handle_lookup(dip, ACC_HANDLE,
238 			    derr->fme_ena, (void *)epkt->addr);
239 		}
240 		if (ret == DDI_FM_FATAL)
241 			err |= PX_FATAL_GOS;
242 		break;
243 
244 	case OP_DMA:
245 		switch (epkt->rc_descr.phase) {
246 		case PH_ADDR:
247 			err = PX_FATAL_GOS;
248 			break;
249 		case PH_DATA:
250 			if (epkt->rc_descr.cond == CND_UE) {
251 				err = PX_FATAL_GOS;
252 				break;
253 			}
254 
255 			err = PX_NONFATAL;
256 			if (epkt->rc_descr.M == 1) {
257 				ret = px_handle_lookup(dip, DMA_HANDLE,
258 				    derr->fme_ena, (void *)epkt->addr);
259 				if (ret == DDI_FM_FATAL)
260 					err |= PX_FATAL_GOS;
261 			}
262 			break;
263 		default:
264 			DBG(DBG_ERR_INTR, dip, "Unexpected epkt");
265 			err = PX_FATAL_GOS;
266 			break;
267 		}
268 		break;
269 	case OP_UNKNOWN:
270 		err = PX_NONFATAL;
271 		if ((epkt->rc_descr.cond == CND_UNMAP) ||
272 		    (epkt->rc_descr.cond == CND_UE) ||
273 		    (epkt->rc_descr.cond == CND_INT) ||
274 		    (epkt->rc_descr.cond == CND_ILL))
275 			err |= PX_FATAL_GOS;
276 
277 		if (epkt->rc_descr.M == 1) {
278 			int	ret1, ret2;
279 
280 			ret1 = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena,
281 			    (void *)epkt->addr);
282 			ret2 = px_handle_lookup(dip, ACC_HANDLE, derr->fme_ena,
283 			    (void *)epkt->addr);
284 
285 			if (ret1 == DDI_FM_FATAL || ret2 == DDI_FM_FATAL)
286 				err |= PX_FATAL_GOS;
287 		}
288 		break;
289 
290 	case OP_RESERVED:
291 	default:
292 		DBG(DBG_ERR_INTR, NULL, "Unrecognized JBC error.");
293 		err = PX_FATAL_GOS;
294 		break;
295 	}
296 
297 	/*
298 	 * For protected safe access, consider PX_FATAL_GOS as the only
299 	 * exception for px to take immediate panic, else, treat errors
300 	 * as nonfatal.
301 	 */
302 	if (is_safeacc) {
303 		if (err & PX_FATAL_GOS)
304 			err = PX_FATAL_GOS;
305 		else
306 			err = PX_NONFATAL;
307 	}
308 
309 	return (err);
310 }
311 
312 /* ARGSUSED */
313 static int
314 px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
315     px_rc_err_t *epkt, int caller)
316 {
317 	int		ret, err = 0;
318 
319 	switch (epkt->rc_descr.op) {
320 	case OP_BYPASS:	/* nonfatal */
321 	case OP_XLAT:	/* nonfatal, stuck-fatal, fatal-reset */
322 	case OP_TBW:	/* nonfatal, stuck-fatal */
323 		err = PX_NONFATAL;
324 		break;
325 	default:
326 		err = PX_ERR_UNKNOWN;
327 		break;
328 	}
329 
330 	if ((epkt->rc_descr.D != 0) || (epkt->rc_descr.M != 0)) {
331 		ret = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena,
332 		    (void *)epkt->addr);
333 		if (ret == DDI_FM_FATAL)
334 			err |= PX_FATAL_GOS;
335 		else
336 			err |= PX_NONFATAL;
337 	} else
338 		err |= PX_NONFATAL;
339 
340 	return (err);
341 }
342 
343 /* ARGSUSED */
344 static int
345 px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
346     px_rc_err_t *epkt, int caller)
347 {
348 	int		ret = PX_NONFATAL;
349 	px_pec_err_t	*pec = (px_pec_err_t *)epkt;
350 
351 	switch (pec->pec_descr.dir) {
352 	case DIR_INGRESS:
353 	case DIR_EGRESS:
354 	case DIR_LINK:
355 		ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status,
356 		    px_fabric_die_rc_ue, px_fabric_die_rc_ue_gos);
357 		ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status,
358 		    px_fabric_die_rc_ce, px_fabric_die_rc_ce_gos);
359 		break;
360 	default:
361 		ret = PX_ERR_UNKNOWN;
362 		break;
363 	}
364 
365 	return (ret);
366 }
367