xref: /illumos-gate/usr/src/uts/sun4u/io/pci/pci_fm.c (revision 63f91fbc3c024870d86dc3332a4a0080fb29bc40)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2019 Peter Tribble.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/sunddi.h>
32 #include <sys/sunndi.h>
33 #include <sys/ddi_impldefs.h>
34 #include <sys/async.h>
35 #include <sys/membar.h>
36 #include <sys/spl.h>
37 #include <sys/iommu.h>
38 #include <sys/pci/pci_obj.h>
39 #include <sys/fm/util.h>
40 #include <sys/fm/io/pci.h>
41 #include <sys/fm/io/ddi.h>
42 #include <sys/fm/io/sun4upci.h>
43 #include <sys/fm/protocol.h>
44 #include <sys/intr.h>
45 
46 /*LINTLIBRARY*/
47 
48 /*
49  * The routines below are generic sun4u PCI interfaces to support
50  * Fault Management.
51  *
52  * pci_dma_check, pci_acc_check, pci_handle_lookup are functions used
53  * to associate a captured PCI address to a particular dma/acc handle.
54  *
55  * pci_fm_acc_setup, pci_fm_init_child, pci_fm_create,
56  * pci_fm_destroy are constructors/destructors used to setup and teardown
57  * necessary resources.
58  *
59  * pci_bus_enter, pci_bus_exit are registered via busops and are used to
60  * provide exclusive access to the PCI bus.
61  *
62  * pci_err_callback is the registered callback for PCI which is called
63  * by the CPU code when it detects a UE/TO/BERR.
64  *
65  * pbm_ereport_post is used by the PBM code to generically report all
66  * PBM errors.
67  *
68  */
69 
70 /*
71  * Function used to setup access functions depending on level of desired
72  * protection.
73  */
74 void
75 pci_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip)
76 {
77 	uchar_t fflag;
78 	ddi_acc_hdl_t *hp;
79 	ddi_acc_impl_t *ap;
80 
81 	hp = mp->map_handlep;
82 	ap = (ddi_acc_impl_t *)hp->ah_platform_private;
83 	fflag = ap->ahi_common.ah_acc.devacc_attr_access;
84 
85 	if (mp->map_op == DDI_MO_MAP_LOCKED) {
86 		ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL);
87 		switch (fflag) {
88 		case DDI_FLAGERR_ACC:
89 			ap->ahi_get8 = i_ddi_prot_get8;
90 			ap->ahi_get16 = i_ddi_prot_get16;
91 			ap->ahi_get32 = i_ddi_prot_get32;
92 			ap->ahi_get64 = i_ddi_prot_get64;
93 			ap->ahi_put8 = i_ddi_prot_put8;
94 			ap->ahi_put16 = i_ddi_prot_put16;
95 			ap->ahi_put32 = i_ddi_prot_put32;
96 			ap->ahi_put64 = i_ddi_prot_put64;
97 			ap->ahi_rep_get8 = i_ddi_prot_rep_get8;
98 			ap->ahi_rep_get16 = i_ddi_prot_rep_get16;
99 			ap->ahi_rep_get32 = i_ddi_prot_rep_get32;
100 			ap->ahi_rep_get64 = i_ddi_prot_rep_get64;
101 			ap->ahi_rep_put8 = i_ddi_prot_rep_put8;
102 			ap->ahi_rep_put16 = i_ddi_prot_rep_put16;
103 			ap->ahi_rep_put32 = i_ddi_prot_rep_put32;
104 			ap->ahi_rep_put64 = i_ddi_prot_rep_put64;
105 			break;
106 		case DDI_CAUTIOUS_ACC :
107 			ap->ahi_get8 = i_ddi_caut_get8;
108 			ap->ahi_get16 = i_ddi_caut_get16;
109 			ap->ahi_get32 = i_ddi_caut_get32;
110 			ap->ahi_get64 = i_ddi_caut_get64;
111 			ap->ahi_put8 = i_ddi_caut_put8;
112 			ap->ahi_put16 = i_ddi_caut_put16;
113 			ap->ahi_put32 = i_ddi_caut_put32;
114 			ap->ahi_put64 = i_ddi_caut_put64;
115 			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
116 			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
117 			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
118 			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
119 			ap->ahi_rep_put8 = i_ddi_caut_rep_put8;
120 			ap->ahi_rep_put16 = i_ddi_caut_rep_put16;
121 			ap->ahi_rep_put32 = i_ddi_caut_rep_put32;
122 			ap->ahi_rep_put64 = i_ddi_caut_rep_put64;
123 			break;
124 		default:
125 			break;
126 		}
127 	} else if (mp->map_op == DDI_MO_UNMAP) {
128 		ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp);
129 	}
130 }
131 
132 /*
133  * Function used to initialize FMA for our children nodes. Called
134  * through pci busops when child node calls ddi_fm_init.
135  */
136 /* ARGSUSED */
137 int
138 pci_fm_init_child(dev_info_t *dip, dev_info_t *tdip, int cap,
139     ddi_iblock_cookie_t *ibc)
140 {
141 	pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip));
142 
143 	ASSERT(ibc != NULL);
144 	*ibc = pci_p->pci_fm_ibc;
145 
146 	return (pci_p->pci_fm_cap);
147 }
148 
149 /*
150  * Lock accesses to the pci bus, to be able to protect against bus errors.
151  */
152 void
153 pci_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle)
154 {
155 	pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip));
156 	pbm_t *pbm_p = pci_p->pci_pbm_p;
157 
158 	membar_sync();
159 
160 	mutex_enter(&pbm_p->pbm_pokefault_mutex);
161 	pbm_p->pbm_excl_handle = handle;
162 }
163 
164 /*
165  * Unlock access to bus and clear errors before exiting.
166  */
167 /* ARGSUSED */
168 void
169 pci_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle)
170 {
171 	pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip));
172 	pbm_t *pbm_p = pci_p->pci_pbm_p;
173 	ddi_fm_error_t derr;
174 
175 	ASSERT(MUTEX_HELD(&pbm_p->pbm_pokefault_mutex));
176 
177 	membar_sync();
178 
179 	mutex_enter(&pci_p->pci_common_p->pci_fm_mutex);
180 	ddi_fm_acc_err_get(pbm_p->pbm_excl_handle, &derr, DDI_FME_VERSION);
181 
182 	if (derr.fme_status == DDI_FM_OK) {
183 		if (pci_check_error(pci_p) != 0) {
184 			(void) pci_pbm_err_handler(pci_p->pci_dip, &derr,
185 			    (const void *)pci_p, PCI_BUS_EXIT_CALL);
186 		}
187 	}
188 	mutex_exit(&pci_p->pci_common_p->pci_fm_mutex);
189 
190 	pbm_p->pbm_excl_handle = NULL;
191 	mutex_exit(&pbm_p->pbm_pokefault_mutex);
192 }
193 
194 /*
195  * PCI error callback which is registered with our parent to call
196  * for PCI logging when the CPU traps due to BERR/TO/UE.
197  */
198 int
199 pci_err_callback(dev_info_t *dip, ddi_fm_error_t *derr,
200     const void *impl_data)
201 {
202 	pci_t *pci_p = (pci_t *)impl_data;
203 	pci_common_t *cmn_p = pci_p->pci_common_p;
204 	ecc_t *ecc_p = cmn_p->pci_common_ecc_p;
205 	ecc_errstate_t ecc_err;
206 	int fatal = 0;
207 	int nonfatal = 0;
208 	int unknown = 0;
209 	int ret = DDI_FM_OK;
210 
211 	bzero(&ecc_err, sizeof (ecc_err));
212 	mutex_enter(&cmn_p->pci_fm_mutex);
213 	/*
214 	 * Check and log ecc and pbm errors
215 	 */
216 	ecc_err.ecc_ii_p = ecc_p->ecc_ue;
217 	ecc_err.ecc_ena = derr->fme_ena;
218 	ecc_err.ecc_caller = PCI_TRAP_CALL;
219 
220 	if ((ret = ecc_err_handler(&ecc_err)) == DDI_FM_FATAL)
221 		fatal++;
222 	else if (ret == DDI_FM_NONFATAL)
223 		nonfatal++;
224 	else if (ret == DDI_FM_UNKNOWN)
225 		unknown++;
226 
227 	if (pci_check_error(pci_p) != 0) {
228 		int err = pci_pbm_err_handler(pci_p->pci_dip, derr,
229 		    (const void *)pci_p, PCI_TRAP_CALL);
230 		if (err == DDI_FM_FATAL)
231 			fatal++;
232 		else if (err == DDI_FM_NONFATAL)
233 			nonfatal++;
234 		else if (err == DDI_FM_UNKNOWN)
235 			unknown++;
236 	}
237 
238 	mutex_exit(&cmn_p->pci_fm_mutex);
239 
240 	if (fatal)
241 		return (DDI_FM_FATAL);
242 	else if (nonfatal)
243 		return (DDI_FM_NONFATAL);
244 	else if (unknown)
245 		return (DDI_FM_UNKNOWN);
246 	else
247 		return (DDI_FM_OK);
248 }
249 
250 void
251 pci_fm_create(pci_t *pci_p)
252 {
253 	pci_common_t *cmn_p = pci_p->pci_common_p;
254 
255 	/*
256 	 * PCI detected ECC errorq, to schedule async handling
257 	 * of ECC errors and logging.
258 	 * The errorq is created here but destroyed when _fini is called
259 	 * for the pci module.
260 	 */
261 	if (pci_ecc_queue == NULL) {
262 		pci_ecc_queue = errorq_create("pci_ecc_queue",
263 		    (errorq_func_t)ecc_err_drain,
264 		    (void *)pci_p->pci_ecc_p,
265 		    ECC_MAX_ERRS, sizeof (ecc_errstate_t),
266 		    PIL_2, ERRORQ_VITAL);
267 		if (pci_ecc_queue == NULL)
268 			panic("failed to create required system error queue");
269 	}
270 
271 	/*
272 	 * Initialize pci_target_queue for FMA handling of pci errors.
273 	 */
274 	pci_targetq_init();
275 
276 	/*
277 	 * Initialize FMA support
278 	 */
279 	pci_p->pci_fm_cap = DDI_FM_EREPORT_CAPABLE |
280 	    DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE |
281 	    DDI_FM_ERRCB_CAPABLE;
282 	/*
283 	 * Call parent to get it's capablity
284 	 */
285 	ddi_fm_init(pci_p->pci_dip, &pci_p->pci_fm_cap,
286 	    &pci_p->pci_fm_ibc);
287 	/*
288 	 * Need to be ereport and error handler cabable
289 	 */
290 	ASSERT((pci_p->pci_fm_cap & DDI_FM_ERRCB_CAPABLE) &&
291 	    (pci_p->pci_fm_cap & DDI_FM_EREPORT_CAPABLE));
292 	/*
293 	 * Initialize error handling mutex.
294 	 */
295 	if (cmn_p->pci_common_refcnt == 0) {
296 		mutex_init(&cmn_p->pci_fm_mutex, NULL, MUTEX_DRIVER,
297 		    (void *)pci_p->pci_fm_ibc);
298 	}
299 
300 	/*
301 	 * Register error callback with our parent.
302 	 */
303 	ddi_fm_handler_register(pci_p->pci_dip, pci_err_callback, pci_p);
304 
305 }
306 
307 void
308 pci_fm_destroy(pci_t *pci_p)
309 {
310 	pci_common_t *cmn_p = pci_p->pci_common_p;
311 
312 	/* schizo non-shared objects */
313 	ddi_fm_handler_unregister(pci_p->pci_dip);
314 	ddi_fm_fini(pci_p->pci_dip);
315 
316 	if (cmn_p->pci_common_refcnt != 0)
317 		return;
318 
319 	mutex_destroy(&cmn_p->pci_fm_mutex);
320 }
321 
322 /*
323  * Function used to post PCI block module specific ereports.
324  */
325 void
326 pbm_ereport_post(dev_info_t *dip, uint64_t ena, pbm_errstate_t *pbm_err)
327 {
328 	char buf[FM_MAX_CLASS];
329 
330 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
331 	    pbm_err->pbm_bridge_type, pbm_err->pbm_err_class);
332 
333 	ena = ena ? ena : fm_ena_generate(0, FM_ENA_FMT1);
334 
335 	ddi_fm_ereport_post(dip, buf, ena, DDI_NOSLEEP,
336 	    FM_VERSION, DATA_TYPE_UINT8, 0,
337 	    PCI_CONFIG_STATUS, DATA_TYPE_UINT16, pbm_err->pbm_pci.pci_cfg_stat,
338 	    PCI_CONFIG_COMMAND, DATA_TYPE_UINT16, pbm_err->pbm_pci.pci_cfg_comm,
339 	    PCI_PBM_CSR, DATA_TYPE_UINT64, pbm_err->pbm_ctl_stat,
340 	    PCI_PBM_AFSR, DATA_TYPE_UINT64, pbm_err->pbm_afsr,
341 	    PCI_PBM_AFAR, DATA_TYPE_UINT64, pbm_err->pbm_afar,
342 	    PCI_PBM_SLOT, DATA_TYPE_UINT64, pbm_err->pbm_err_sl,
343 	    PCI_PBM_VALOG, DATA_TYPE_UINT64, pbm_err->pbm_va_log,
344 	    NULL);
345 }
346