xref: /titanic_41/usr/src/uts/sun4/io/px/px_fm.c (revision 66e1f4391ea0d382201658130a560296881a014b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PX Fault Management Architecture
31  */
32 #include <sys/types.h>
33 #include <sys/sunndi.h>
34 #include <sys/sunddi.h>
35 #include <sys/fm/protocol.h>
36 #include <sys/fm/util.h>
37 #include <sys/membar.h>
38 #include "px_obj.h"
39 
40 /*
41  * Initialize px FMA support
42  */
43 int
44 px_fm_attach(px_t *px_p)
45 {
46 	px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
47 		DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
48 
49 	/*
50 	 * check parents' capability
51 	 */
52 	ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc);
53 
54 	/*
55 	 * parents need to be ereport and error handling capable
56 	 */
57 	ASSERT(px_p->px_fm_cap &&
58 	    (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE));
59 
60 	/*
61 	 * register error callback in parent
62 	 */
63 	ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p);
64 
65 	return (DDI_SUCCESS);
66 }
67 
68 /*
69  * Deregister FMA
70  */
71 void
72 px_fm_detach(px_t *px_p)
73 {
74 	ddi_fm_handler_unregister(px_p->px_dip);
75 	ddi_fm_fini(px_p->px_dip);
76 }
77 
78 /*
79  * Function used to setup access functions depending on level of desired
80  * protection.
81  */
82 void
83 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip)
84 {
85 	uchar_t fflag;
86 	ddi_acc_hdl_t *hp;
87 	ddi_acc_impl_t *ap;
88 
89 	hp = mp->map_handlep;
90 	ap = (ddi_acc_impl_t *)hp->ah_platform_private;
91 	fflag = ap->ahi_common.ah_acc.devacc_attr_access;
92 
93 	if (mp->map_op == DDI_MO_MAP_LOCKED) {
94 		ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL);
95 		switch (fflag) {
96 		case DDI_FLAGERR_ACC:
97 			ap->ahi_get8 = i_ddi_prot_get8;
98 			ap->ahi_get16 = i_ddi_prot_get16;
99 			ap->ahi_get32 = i_ddi_prot_get32;
100 			ap->ahi_get64 = i_ddi_prot_get64;
101 			ap->ahi_put8 = i_ddi_prot_put8;
102 			ap->ahi_put16 = i_ddi_prot_put16;
103 			ap->ahi_put32 = i_ddi_prot_put32;
104 			ap->ahi_put64 = i_ddi_prot_put64;
105 			ap->ahi_rep_get8 = i_ddi_prot_rep_get8;
106 			ap->ahi_rep_get16 = i_ddi_prot_rep_get16;
107 			ap->ahi_rep_get32 = i_ddi_prot_rep_get32;
108 			ap->ahi_rep_get64 = i_ddi_prot_rep_get64;
109 			ap->ahi_rep_put8 = i_ddi_prot_rep_put8;
110 			ap->ahi_rep_put16 = i_ddi_prot_rep_put16;
111 			ap->ahi_rep_put32 = i_ddi_prot_rep_put32;
112 			ap->ahi_rep_put64 = i_ddi_prot_rep_put64;
113 			break;
114 		case DDI_CAUTIOUS_ACC :
115 			ap->ahi_get8 = i_ddi_caut_get8;
116 			ap->ahi_get16 = i_ddi_caut_get16;
117 			ap->ahi_get32 = i_ddi_caut_get32;
118 			ap->ahi_get64 = i_ddi_caut_get64;
119 			ap->ahi_put8 = i_ddi_caut_put8;
120 			ap->ahi_put16 = i_ddi_caut_put16;
121 			ap->ahi_put32 = i_ddi_caut_put32;
122 			ap->ahi_put64 = i_ddi_caut_put64;
123 			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
124 			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
125 			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
126 			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
127 			ap->ahi_rep_put8 = i_ddi_caut_rep_put8;
128 			ap->ahi_rep_put16 = i_ddi_caut_rep_put16;
129 			ap->ahi_rep_put32 = i_ddi_caut_rep_put32;
130 			ap->ahi_rep_put64 = i_ddi_caut_rep_put64;
131 			break;
132 		default:
133 			break;
134 		}
135 	} else if (mp->map_op == DDI_MO_UNMAP) {
136 		ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
137 	}
138 }
139 
140 /*
141  * Function called after a dma fault occurred to find out whether the
142  * fault address is associated with a driver that is able to handle faults
143  * and recover from faults. The driver has to set DDI_DMA_FLAGERR and
144  * cache dma handles in order to make this checking effective to help
145  * recovery from dma faults.
146  */
147 /* ARGSUSED */
148 static int
149 px_dma_check(dev_info_t *dip, const void *handle, const void *comp_addr,
150     const void *not_used)
151 {
152 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle;
153 	pfn_t fault_pfn = mmu_btop(*(uint64_t *)comp_addr);
154 	pfn_t comp_pfn;
155 	int page;
156 
157 	/*
158 	 * Assertion failure if DDI_FM_DMACHK_CAPABLE capability has not
159 	 * been effectively initialized during attach.
160 	 */
161 	ASSERT(mp);
162 
163 	for (page = 0; page < mp->dmai_ndvmapages; page++) {
164 		comp_pfn = PX_GET_MP_PFN(mp, page);
165 		if (fault_pfn == comp_pfn)
166 			return (DDI_FM_NONFATAL);
167 	}
168 
169 	return (DDI_FM_UNKNOWN);
170 }
171 
172 /*
173  * Function used to check if a given access handle owns the failing address.
174  * Called by ndi_fmc_error, when we detect a PIO error.
175  */
176 /* ARGSUSED */
177 static int
178 px_acc_check(dev_info_t *dip, const void *handle, const void *comp_addr,
179     const void *not_used)
180 {
181 	pfn_t pfn, fault_pfn;
182 	ddi_acc_hdl_t *hp = impl_acc_hdl_get((ddi_acc_handle_t)handle);
183 
184 	/*
185 	 * Assertion failure if DDI_FM_ACCCHK_CAPABLE capability has not
186 	 * been effectively initialized during attach.
187 	 */
188 	ASSERT(hp);
189 
190 	pfn = hp->ah_pfn;
191 	fault_pfn = mmu_btop(*(uint64_t *)comp_addr);
192 	if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum))
193 		return (DDI_FM_NONFATAL);
194 
195 	return (DDI_FM_UNKNOWN);
196 }
197 
198 /*
199  * Function used by PCI error handlers to check if captured address is stored
200  * in the DMA or ACC handle caches.
201  */
202 int
203 px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar)
204 {
205 	uint32_t cap = ((px_t *)DIP_TO_STATE(dip))->px_fm_cap;
206 	int (*f)() = type == DMA_HANDLE ?
207 	    (DDI_FM_DMA_ERR_CAP(cap) ? px_dma_check : NULL) :
208 	    (DDI_FM_ACC_ERR_CAP(cap) ? px_acc_check : NULL);
209 
210 	return (f ? ndi_fmc_error(dip, NULL, type, f, fme_ena, afar) :
211 	    DDI_FM_UNKNOWN);
212 }
213 
214 /*
215  * Function used to initialize FMA for our children nodes. Called
216  * through pci busops when child node calls ddi_fm_init.
217  */
218 /*ARGSUSED*/
219 int
220 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
221     ddi_iblock_cookie_t *ibc_p)
222 {
223 	px_t *px_p = DIP_TO_STATE(dip);
224 
225 	ASSERT(ibc_p != NULL);
226 	*ibc_p = px_p->px_fm_ibc;
227 
228 	return (px_p->px_fm_cap);
229 }
230 
231 /*
232  * lock access for exclusive PCIe access
233  */
234 void
235 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle)
236 {
237 	px_pec_t	*pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p;
238 
239 	/*
240 	 * Exclusive access has been used for cautious put/get,
241 	 * Both utilize i_ddi_ontrap which, on sparcv9, implements
242 	 * similar protection as what on_trap() does, and which calls
243 	 * membar  #Sync to flush out all cpu deferred errors
244 	 * prior to get/put operation, so here we're not calling
245 	 * membar  #Sync - a difference from what's in pci_bus_enter().
246 	 */
247 	mutex_enter(&pec_p->pec_pokefault_mutex);
248 	pec_p->pec_acc_hdl = handle;
249 }
250 
251 /*
252  * unlock access for exclusive PCIe access
253  */
254 /* ARGSUSED */
255 void
256 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle)
257 {
258 	px_t		*px_p = DIP_TO_STATE(dip);
259 	px_pec_t	*pec_p = px_p->px_pec_p;
260 
261 	pec_p->pec_acc_hdl = NULL;
262 	mutex_exit(&pec_p->pec_pokefault_mutex);
263 }
264 
265 
266 /*
267  * PCI error callback which is registered with our parent to call
268  * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors
269  * and PCI BERR/TO/UE
270  *
271  * Dispatch on all known leaves of this fire device because we cannot tell
272  * which side the error came from.
273  */
274 /*ARGSUSED*/
275 int
276 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data)
277 {
278 	px_t	*px_p = (px_t *)impl_data;
279 	px_cb_t	*cb_p = px_p->px_cb_p;
280 	int	err = PX_OK;
281 	int	fatal = 0;
282 	int	nonfatal = 0;
283 	int	unknown = 0;
284 	int	ret = DDI_FM_OK;
285 	int	i;
286 
287 	mutex_enter(&cb_p->xbc_fm_mutex);
288 
289 	for (i = 0; i < PX_CB_MAX_LEAF; i++) {
290 		px_p = cb_p->xbc_px_list[i];
291 		if (px_p != NULL)
292 			err |= px_err_handle(px_p, derr, PX_TRAP_CALL,
293 			    (i == 0));
294 	}
295 
296 	for (i = 0; i < PX_CB_MAX_LEAF; i++) {
297 		px_p = cb_p->xbc_px_list[i];
298 		if (px_p != NULL) {
299 			ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr);
300 			switch (ret) {
301 			case DDI_FM_FATAL:
302 				fatal++;
303 				break;
304 			case DDI_FM_NONFATAL:
305 				nonfatal++;
306 				break;
307 			case DDI_FM_UNKNOWN:
308 				unknown++;
309 				break;
310 			default:
311 				break;
312 			}
313 		}
314 	}
315 	mutex_exit(&cb_p->xbc_fm_mutex);
316 
317 	ret = (fatal != 0) ? DDI_FM_FATAL :
318 	    ((nonfatal != 0) ? DDI_FM_NONFATAL :
319 	    (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK)));
320 
321 	/* fire fatal error overrides device error */
322 	if (err & (PX_FATAL_GOS | PX_FATAL_SW))
323 		ret = DDI_FM_FATAL;
324 	/* if fire encounts no error, then take whatever device error */
325 	else if ((err != PX_OK) && (ret != DDI_FM_FATAL))
326 		ret = DDI_FM_NONFATAL;
327 
328 	return (ret);
329 }
330 
331 /*
332  * px_err_dmc_pec_intr:
333  * Interrupt handler for the DMC/PEC block.
334  * o lock
335  * o create derr
336  * o px_err_handle(leaf, with jbc)
337  * o send ereport(fire fmri, derr, payload = BDF)
338  * o dispatch (leaf)
339  * o unlock
340  * o handle error: fatal? fm_panic() : return INTR_CLAIMED)
341  */
342 /* ARGSUSED */
343 uint_t
344 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code,
345     pcie_req_id_t rid)
346 {
347 	dev_info_t	*rpdip = px_p->px_dip;
348 	px_cb_t		*cb_p = px_p->px_cb_p;
349 	int		err = PX_OK, ret;
350 	ddi_fm_error_t	derr;
351 
352 	mutex_enter(&cb_p->xbc_fm_mutex);
353 
354 	/* Create the derr */
355 	bzero(&derr, sizeof (ddi_fm_error_t));
356 	derr.fme_version = DDI_FME_VERSION;
357 	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
358 	derr.fme_flag = DDI_FM_ERR_UNEXPECTED;
359 
360 	/* send ereport/handle/clear fire registers */
361 	err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE);
362 
363 	/* Check all child devices for errors */
364 	ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr);
365 
366 	mutex_exit(&cb_p->xbc_fm_mutex);
367 
368 	/*
369 	 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset,
370 	 * therefore it does not cause panic.
371 	 */
372 	if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL))
373 		fm_panic("Fatal PCIe Fabric Error has occurred\n");
374 
375 	return (DDI_INTR_CLAIMED);
376 }
377 
378 /*
379  * px_err_safeacc_check:
380  * Check to see if a peek/poke and cautious access is currently being
381  * done on a particular leaf.
382  *
383  * Safe access reads induced fire errors will be handled by cpu trap handler
384  * which will call px_fm_callback() which calls this function. In that
385  * case, the derr fields will be set by trap handler with the correct values.
386  *
387  * Safe access writes induced errors will be handled by px interrupt
388  * handlers, this function will fill in the derr fields.
389  *
390  * If a cpu trap does occur, it will quiesce all other interrupts allowing
391  * the cpu trap error handling to finish before Fire receives an interrupt.
392  *
393  * If fire does indeed have an error when a cpu trap occurs as a result of
394  * a safe access, a trap followed by a Mondo/Fabric interrupt will occur.
395  * In which case derr will be initialized as "UNEXPECTED" by the interrupt
396  * handler and this function will need to find if this error occured in the
397  * middle of a safe access operation.
398  *
399  * @param px_p		leaf in which to check access
400  * @param derr		fm err data structure to be updated
401  */
402 void
403 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr)
404 {
405 	px_pec_t 	*pec_p = px_p->px_pec_p;
406 	px_cb_t		*cb_p = px_p->px_cb_p;
407 	int		acctype = pec_p->pec_safeacc_type;
408 
409 	ASSERT(MUTEX_HELD(&cb_p->xbc_fm_mutex));
410 
411 	if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) {
412 		return;
413 	}
414 
415 	/* safe access checking */
416 	switch (acctype) {
417 	case DDI_FM_ERR_EXPECTED:
418 		/*
419 		 * cautious access protection, protected from all err.
420 		 */
421 		ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex));
422 		ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr,
423 		    DDI_FME_VERSION);
424 		derr->fme_flag = acctype;
425 		derr->fme_acc_handle = pec_p->pec_acc_hdl;
426 		break;
427 	case DDI_FM_ERR_POKE:
428 		/*
429 		 * ddi_poke protection, check nexus and children for
430 		 * expected errors.
431 		 */
432 		ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex));
433 		membar_sync();
434 		derr->fme_flag = acctype;
435 		break;
436 	case DDI_FM_ERR_PEEK:
437 		derr->fme_flag = acctype;
438 		break;
439 	}
440 }
441