xref: /titanic_41/usr/src/uts/sun4v/io/px/px_err.c (revision 749f21d359d8fbd020c974a1a5227316221bfc9c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v Fire Error Handling
31  */
32 
33 #include <sys/types.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/fm/protocol.h>
37 #include <sys/fm/util.h>
38 #include <sys/membar.h>
39 #include "px_obj.h"
40 #include "px_err.h"
41 
42 static uint_t px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt);
43 static int  px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr,
44     px_rc_err_t *epkt, int caller);
45 
46 static int px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
47     px_rc_err_t *epkt, int caller);
48 static int px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
49     px_rc_err_t *epkt, int caller);
50 static int px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
51     px_rc_err_t *epkt, int caller);
52 
53 /*
54  * px_err_cb_intr:
55  * Interrupt handler for the Host Bus Block.
56  */
57 uint_t
58 px_err_cb_intr(caddr_t arg)
59 {
60 	px_fault_t	*fault_p = (px_fault_t *)arg;
61 	px_rc_err_t	*epkt = (px_rc_err_t *)fault_p->px_intr_payload;
62 
63 	if (epkt != NULL) {
64 		return (px_err_common_intr(fault_p, epkt));
65 	}
66 
67 	return (DDI_INTR_UNCLAIMED);
68 }
69 
70 /*
71  * px_err_dmc_pec_intr:
72  * Interrupt handler for the DMC/PEC block.
73  */
74 uint_t
75 px_err_dmc_pec_intr(caddr_t arg)
76 {
77 	px_fault_t	*fault_p = (px_fault_t *)arg;
78 	px_rc_err_t	*epkt = (px_rc_err_t *)fault_p->px_intr_payload;
79 
80 	if (epkt != NULL) {
81 		return (px_err_common_intr(fault_p, epkt));
82 	}
83 
84 	return (DDI_INTR_UNCLAIMED);
85 }
86 
87 /*
88  * px_err_handle:
89  * Common function called by trap, mondo and fabric intr.
90  * This function is more meaningful in sun4u implementation.  Kept
91  * to mirror sun4u call stack.
92  * o check for safe access
93  *
94  * @param px_p		leaf in which to check access
95  * @param derr		fm err data structure to be updated
96  * @param caller	PX_TRAP_CALL | PX_INTR_CALL
97  * @param chkjbc	whether to handle hostbus registers (ignored)
98  * @return err		PX_OK | PX_NONFATAL |
99  *                      PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL
100  */
101 /* ARGSUSED */
102 int
103 px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller,
104     boolean_t chkxbc)
105 {
106 	/* check for safe access */
107 	px_err_safeacc_check(px_p, derr);
108 
109 	return (DDI_FM_OK);
110 }
111 
112 /*
113  * px_err_common_intr:
114  * Interrupt handler for the JBC/DMC/PEC block.
115  * o lock
116  * o create derr
117  * o check safe access
118  * o px_err_check_severiy(epkt)
119  * o dispatch
120  * o Idle intr state
121  * o unlock
122  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
123  */
124 static uint_t
125 px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt)
126 {
127 	px_t		*px_p = DIP_TO_STATE(fault_p->px_fh_dip);
128 	dev_info_t	*rpdip = px_p->px_dip;
129 	px_cb_t		*cb_p = px_p->px_cb_p;
130 	int		err, ret;
131 	ddi_fm_error_t	derr;
132 
133 	mutex_enter(&cb_p->xbc_fm_mutex);
134 
135 	/* Create the derr */
136 	bzero(&derr, sizeof (ddi_fm_error_t));
137 	derr.fme_version = DDI_FME_VERSION;
138 	derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1);
139 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
140 
141 	/* Basically check for safe access */
142 	(void) px_err_handle(px_p, &derr, PX_INTR_CALL, B_FALSE);
143 
144 	/* Check the severity of this error */
145 	err = px_err_check_severity(px_p, &derr, epkt, PX_INTR_CALL);
146 
147 	/* check for error severity */
148 	ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr);
149 
150 	/* Set the intr state to idle for the leaf that received the mondo */
151 	if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino,
152 		INTR_IDLE_STATE) != DDI_SUCCESS) {
153 		mutex_exit(&cb_p->xbc_fm_mutex);
154 		return (DDI_INTR_UNCLAIMED);
155 	}
156 
157 	mutex_exit(&cb_p->xbc_fm_mutex);
158 
159 	if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL))
160 		PX_FM_PANIC("Fatal System Bus Error has occurred\n");
161 
162 	return (DDI_INTR_CLAIMED);
163 }
164 
165 /*
166  * px_err_check_severity:
167  * Check the severity of the fire error based the epkt received
168  *
169  * @param px_p		leaf in which to take the snap shot.
170  * @param derr		fm err in which the ereport is to be based on
171  * @param epkt		epkt recevied from HV
172  */
173 static int
174 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt,
175     int caller)
176 {
177 	px_pec_t 	*pec_p = px_p->px_pec_p;
178 	dev_info_t	*dip = px_p->px_dip;
179 	int		err = 0;
180 
181 	/* Cautious access error handling  */
182 	if (derr->fme_flag == DDI_FM_ERR_EXPECTED) {
183 		if (caller == PX_TRAP_CALL) {
184 			/*
185 			 * for ddi_caut_get treat all events as nonfatal
186 			 * The trampoline will set err_ena = 0,
187 			 * err_status = NONFATAL.
188 			 */
189 			derr->fme_status = DDI_FM_NONFATAL;
190 		} else {
191 			/*
192 			 * For ddi_caut_put treat all events as nonfatal. Here
193 			 * we have the handle and can call ndi_fm_acc_err_set().
194 			 */
195 			derr->fme_status = DDI_FM_NONFATAL;
196 			ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr);
197 		}
198 	}
199 
200 	switch (epkt->rc_descr.block) {
201 	case BLOCK_HOSTBUS:
202 		err = px_cb_check_errors(dip, derr, epkt, caller);
203 		break;
204 	case BLOCK_MMU:
205 		err = px_mmu_check_errors(dip, derr, epkt, caller);
206 		break;
207 	case BLOCK_INTR:
208 		err = PX_NONFATAL;
209 		break;
210 	case BLOCK_PCIE:
211 		err = px_pcie_check_errors(dip, derr, epkt, caller);
212 		break;
213 	default:
214 		err = PX_ERR_UNKNOWN;
215 	}
216 
217 	return (err);
218 }
219 
220 /* ARGSUSED */
221 static int
222 px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
223     px_rc_err_t *epkt, int caller)
224 {
225 	int		fme_flag = derr->fme_flag;
226 	boolean_t	is_safeacc;
227 	int		ret,  err = 0;
228 
229 	is_safeacc = (fme_flag == DDI_FM_ERR_EXPECTED) ||
230 	    (fme_flag == DDI_FM_ERR_PEEK) ||
231 	    (fme_flag == DDI_FM_ERR_POKE);
232 
233 	/* block/op/phase/cond/dir/flag... */
234 	switch (epkt->rc_descr.op) {
235 	case OP_PIO:
236 		err = PX_NONFATAL;
237 		/* check handle if affected memory address is captured */
238 		if (epkt->rc_descr.M != 0) {
239 			ret = px_handle_lookup(dip, ACC_HANDLE,
240 			    derr->fme_ena, (void *)epkt->addr);
241 		}
242 		if (ret == DDI_FM_FATAL)
243 			err |= PX_FATAL_GOS;
244 		break;
245 
246 	case OP_DMA:
247 		switch (epkt->rc_descr.phase) {
248 		case PH_ADDR:
249 			err = PX_FATAL_GOS;
250 			break;
251 		case PH_DATA:
252 			if (epkt->rc_descr.cond == CND_UE) {
253 				err = PX_FATAL_GOS;
254 				break;
255 			}
256 
257 			err = PX_NONFATAL;
258 			if (epkt->rc_descr.M == 1) {
259 				ret = px_handle_lookup(dip, DMA_HANDLE,
260 				    derr->fme_ena, (void *)epkt->addr);
261 				if (ret == DDI_FM_FATAL)
262 					err |= PX_FATAL_GOS;
263 			}
264 			break;
265 		default:
266 			DBG(DBG_ERR_INTR, dip, "Unexpected epkt");
267 			err = PX_FATAL_GOS;
268 			break;
269 		}
270 		break;
271 	case OP_UNKNOWN:
272 		err = PX_NONFATAL;
273 		if (epkt->rc_descr.M == 1) {
274 			int	ret1, ret2;
275 
276 			ret1 = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena,
277 			    (void *)epkt->addr);
278 			ret2 = px_handle_lookup(dip, ACC_HANDLE, derr->fme_ena,
279 			    (void *)epkt->addr);
280 
281 			if (ret1 == DDI_FM_FATAL || ret2 == DDI_FM_FATAL)
282 				err |= PX_FATAL_GOS;
283 		}
284 		break;
285 
286 	case OP_RESERVED:
287 	default:
288 		DBG(DBG_ERR_INTR, NULL, "Unrecognized JBC error.");
289 		err = PX_FATAL_GOS;
290 		break;
291 	}
292 
293 	/*
294 	 * For protected safe access, consider PX_FATAL_GOS as the only
295 	 * exception for px to take immediate panic, else, treat errors
296 	 * as nonfatal.
297 	 */
298 	if (is_safeacc) {
299 		if (err & PX_FATAL_GOS)
300 			err = PX_FATAL_GOS;
301 		else
302 			err = PX_NONFATAL;
303 	}
304 
305 	return (err);
306 }
307 
308 /* ARGSUSED */
309 static int
310 px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
311     px_rc_err_t *epkt, int caller)
312 {
313 	int		ret, err = 0;
314 
315 	switch (epkt->rc_descr.op) {
316 	case OP_BYPASS:	/* nonfatal */
317 	case OP_XLAT:	/* nonfatal, stuck-fatal, fatal-reset */
318 	case OP_TBW:	/* nonfatal, stuck-fatal */
319 		err = PX_NONFATAL;
320 		break;
321 	default:
322 		err = PX_ERR_UNKNOWN;
323 		break;
324 	}
325 
326 	if ((epkt->rc_descr.D != 0) || (epkt->rc_descr.M != 0)) {
327 		ret = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena,
328 		    (void *)epkt->addr);
329 		if (ret == DDI_FM_FATAL)
330 			err |= PX_FATAL_GOS;
331 		else
332 			err |= PX_NONFATAL;
333 	} else
334 		err |= PX_NONFATAL;
335 
336 	return (err);
337 }
338 
339 /* ARGSUSED */
340 static int
341 px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr,
342     px_rc_err_t *epkt, int caller)
343 {
344 	int		ret = PX_NONFATAL;
345 	px_pec_err_t	*pec = (px_pec_err_t *)epkt;
346 
347 	switch (pec->pec_descr.dir) {
348 	case DIR_INGRESS:
349 	case DIR_EGRESS:
350 	case DIR_LINK:
351 		ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status,
352 		    px_fabric_die_rc_ue, px_fabric_die_rc_ue_gos);
353 		ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status,
354 		    px_fabric_die_rc_ce, px_fabric_die_rc_ce_gos);
355 		break;
356 	default:
357 		ret = PX_ERR_UNKNOWN;
358 		break;
359 	}
360 
361 	return (ret);
362 }
363