xref: /illumos-gate/usr/src/uts/common/io/devfm.c (revision 2aa8db5932a99c01d32f2aea7dbbf15b4898169b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2023 Oxide Computer Company
28  */
29 
30 #include <sys/stat.h>
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/cred.h>
34 #include <sys/policy.h>
35 #include <sys/file.h>
36 #include <sys/errno.h>
37 #include <sys/modctl.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/conf.h>
41 #include <sys/debug.h>
42 #include <sys/systeminfo.h>
43 
44 #include <sys/fm/protocol.h>
45 #include <sys/devfm.h>
46 
47 extern int fm_get_paddr(nvlist_t *, uint64_t *);
48 #if defined(__x86)
49 extern int fm_ioctl_physcpu_info(int, nvlist_t *, nvlist_t **);
50 extern int fm_ioctl_cpu_retire(int, nvlist_t *, nvlist_t **);
51 extern int fm_ioctl_gentopo_legacy(int, nvlist_t *, nvlist_t **);
52 extern int fm_ioctl_pci_data(int, nvlist_t *, nvlist_t **);
53 #endif /* __x86 */
54 extern int fm_ioctl_cache_info(int, nvlist_t *, nvlist_t **);
55 
56 static int fm_ioctl_versions(int, nvlist_t *, nvlist_t **);
57 static int fm_ioctl_page_retire(int, nvlist_t *, nvlist_t **);
58 
59 /*
60  * The driver's capabilities are strictly versioned, allowing userland patching
61  * without a reboot.  The userland should start with a FM_VERSIONS ioctl to
62  * query the versions of the kernel interfaces, then it's all userland's
63  * responsibility to prepare arguments etc to match the current kenrel.
64  * The version of FM_VERSIONS itself is FM_DRV_VERSION.
65  */
66 typedef struct fm_version {
67 	char		*interface;	/* interface name */
68 	uint32_t	version;	/* interface version */
69 } fm_vers_t;
70 
71 typedef struct fm_subroutine {
72 	int		cmd;		/* ioctl cmd */
73 	boolean_t	priv;		/* require privilege */
74 	char		*version;	/* version name */
75 	int		(*func)(int, nvlist_t *, nvlist_t **);	/* handler */
76 } fm_subr_t;
77 
78 static const fm_vers_t fm_versions[] = {
79 	{ FM_VERSIONS_VERSION, FM_DRV_VERSION },
80 	{ FM_PAGE_OP_VERSION, 1 },
81 	{ FM_CPU_OP_VERSION, 1 },
82 	{ FM_CPU_INFO_VERSION, 1 },
83 	{ FM_TOPO_LEGACY_VERSION, 1 },
84 	{ FM_CACHE_INFO_VERSION, 1 },
85 	{ FM_CPU_PCI_VERSION, 1 },
86 	{ NULL, 0 }
87 };
88 
89 static const fm_subr_t fm_subrs[] = {
90 	{ FM_IOC_VERSIONS, B_FALSE, FM_VERSIONS_VERSION, fm_ioctl_versions },
91 	{ FM_IOC_PAGE_RETIRE, B_TRUE, FM_PAGE_OP_VERSION,
92 	    fm_ioctl_page_retire },
93 	{ FM_IOC_PAGE_STATUS, B_FALSE, FM_PAGE_OP_VERSION,
94 	    fm_ioctl_page_retire },
95 	{ FM_IOC_PAGE_UNRETIRE, B_TRUE, FM_PAGE_OP_VERSION,
96 	    fm_ioctl_page_retire },
97 #if defined(__x86)
98 	{ FM_IOC_PHYSCPU_INFO, B_FALSE, FM_CPU_INFO_VERSION,
99 	    fm_ioctl_physcpu_info },
100 	{ FM_IOC_CPU_RETIRE, B_TRUE, FM_CPU_OP_VERSION,
101 	    fm_ioctl_cpu_retire },
102 	{ FM_IOC_CPU_STATUS, B_FALSE, FM_CPU_OP_VERSION,
103 	    fm_ioctl_cpu_retire },
104 	{ FM_IOC_CPU_UNRETIRE, B_TRUE, FM_CPU_OP_VERSION,
105 	    fm_ioctl_cpu_retire },
106 	{ FM_IOC_GENTOPO_LEGACY, B_FALSE, FM_TOPO_LEGACY_VERSION,
107 	    fm_ioctl_gentopo_legacy },
108 	{ FM_IOC_PCI_DATA, B_TRUE, FM_CPU_PCI_VERSION,
109 	    fm_ioctl_pci_data },
110 #endif	/* __x86 */
111 	{ FM_IOC_CACHE_INFO, B_FALSE, FM_CACHE_INFO_VERSION,
112 	    fm_ioctl_cache_info },
113 	{ -1, B_FALSE, NULL, NULL },
114 };
115 
116 static dev_info_t *fm_dip;
117 static boolean_t is_i86xpv;
118 static nvlist_t *fm_vers_nvl;
119 
120 static int
121 fm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
122 {
123 	switch (cmd) {
124 	case DDI_ATTACH:
125 		if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
126 		    ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) {
127 			ddi_remove_minor_node(dip, NULL);
128 			return (DDI_FAILURE);
129 		}
130 		fm_dip = dip;
131 		is_i86xpv = (strcmp(platform, "i86xpv") == 0);
132 		break;
133 	case DDI_RESUME:
134 		break;
135 	default:
136 		return (DDI_FAILURE);
137 	}
138 	return (DDI_SUCCESS);
139 }
140 
141 static int
142 fm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
143 {
144 	int ret = DDI_SUCCESS;
145 
146 	switch (cmd) {
147 	case DDI_DETACH:
148 		ddi_remove_minor_node(dip, NULL);
149 		fm_dip = NULL;
150 		break;
151 	default:
152 		ret = DDI_FAILURE;
153 	}
154 	return (ret);
155 }
156 
157 /*ARGSUSED*/
158 static int
159 fm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
160 {
161 	int error;
162 
163 	switch (infocmd) {
164 	case DDI_INFO_DEVT2DEVINFO:
165 		*result = fm_dip;
166 		error = DDI_SUCCESS;
167 		break;
168 	case DDI_INFO_DEVT2INSTANCE:
169 		*result = NULL;
170 		error = DDI_SUCCESS;
171 		break;
172 	default:
173 		error = DDI_FAILURE;
174 	}
175 	return (error);
176 }
177 
178 /*ARGSUSED1*/
179 static int
180 fm_open(dev_t *devp, int flag, int typ, struct cred *cred)
181 {
182 	if (typ != OTYP_CHR)
183 		return (EINVAL);
184 	if (getminor(*devp) != 0)
185 		return (ENXIO);
186 
187 	return (0);
188 }
189 
190 /*ARGSUSED*/
191 static int
192 fm_ioctl_versions(int cmd, nvlist_t *invl, nvlist_t **onvlp)
193 {
194 	nvlist_t *nvl;
195 	int err;
196 
197 	if ((err = nvlist_dup(fm_vers_nvl, &nvl, KM_SLEEP)) == 0)
198 		*onvlp = nvl;
199 
200 	return (err);
201 }
202 
203 /*
204  * Given a mem-scheme FMRI for a page, execute the given page retire
205  * command on it.
206  */
207 /*ARGSUSED*/
208 static int
209 fm_ioctl_page_retire(int cmd, nvlist_t *invl, nvlist_t **onvlp)
210 {
211 	uint64_t pa;
212 	nvlist_t *fmri;
213 	int err;
214 
215 	if (is_i86xpv)
216 		return (ENOTSUP);
217 
218 	if ((err = nvlist_lookup_nvlist(invl, FM_PAGE_RETIRE_FMRI, &fmri))
219 	    != 0)
220 		return (err);
221 
222 	if ((err = fm_get_paddr(fmri, &pa)) != 0)
223 		return (err);
224 
225 	switch (cmd) {
226 	case FM_IOC_PAGE_STATUS:
227 		return (page_retire_check(pa, NULL));
228 
229 	case FM_IOC_PAGE_RETIRE:
230 		return (page_retire(pa, PR_FMA));
231 
232 	case FM_IOC_PAGE_UNRETIRE:
233 		return (page_unretire(pa));
234 	}
235 
236 	return (ENOTTY);
237 }
238 
239 /*ARGSUSED*/
240 static int
241 fm_ioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
242 {
243 	char *buf;
244 	int err;
245 	uint_t model;
246 	const fm_subr_t *subr;
247 	uint32_t vers;
248 	fm_ioc_data_t fid;
249 	nvlist_t *invl = NULL, *onvl = NULL;
250 #ifdef _MULTI_DATAMODEL
251 	fm_ioc_data32_t fid32;
252 #endif
253 
254 	if (getminor(dev) != 0)
255 		return (ENXIO);
256 
257 	for (subr = fm_subrs; subr->cmd != cmd; subr++)
258 		if (subr->cmd == -1)
259 			return (ENOTTY);
260 
261 	if (subr->priv && (flag & FWRITE) == 0 &&
262 	    secpolicy_sys_config(CRED(), 0) != 0)
263 		return (EPERM);
264 
265 	model = ddi_model_convert_from(flag & FMODELS);
266 
267 	switch (model) {
268 #ifdef _MULTI_DATAMODEL
269 	case DDI_MODEL_ILP32:
270 		if (ddi_copyin((void *)data, &fid32,
271 		    sizeof (fm_ioc_data32_t), flag) != 0)
272 			return (EFAULT);
273 		fid.fid_version = fid32.fid_version;
274 		fid.fid_insz = fid32.fid_insz;
275 		fid.fid_inbuf = (caddr_t)(uintptr_t)fid32.fid_inbuf;
276 		fid.fid_outsz = fid32.fid_outsz;
277 		fid.fid_outbuf = (caddr_t)(uintptr_t)fid32.fid_outbuf;
278 		break;
279 #endif /* _MULTI_DATAMODEL */
280 	case DDI_MODEL_NONE:
281 	default:
282 		if (ddi_copyin((void *)data, &fid, sizeof (fm_ioc_data_t),
283 		    flag) != 0)
284 			return (EFAULT);
285 	}
286 
287 	if (nvlist_lookup_uint32(fm_vers_nvl, subr->version, &vers) != 0 ||
288 	    fid.fid_version != vers)
289 		return (ENOTSUP);
290 
291 	if (fid.fid_insz > FM_IOC_MAXBUFSZ)
292 		return (ENAMETOOLONG);
293 	if (fid.fid_outsz > FM_IOC_OUT_MAXBUFSZ)
294 		return (EINVAL);
295 
296 	/*
297 	 * Copy in and unpack the input nvlist.
298 	 */
299 	if (fid.fid_insz != 0 && fid.fid_inbuf != (caddr_t)0) {
300 		buf = kmem_alloc(fid.fid_insz, KM_SLEEP);
301 		if (ddi_copyin(fid.fid_inbuf, buf, fid.fid_insz, flag) != 0) {
302 			kmem_free(buf, fid.fid_insz);
303 			return (EFAULT);
304 		}
305 		err = nvlist_unpack(buf, fid.fid_insz, &invl, KM_SLEEP);
306 		kmem_free(buf, fid.fid_insz);
307 		if (err != 0)
308 			return (err);
309 	}
310 
311 	err = subr->func(cmd, invl, &onvl);
312 
313 	nvlist_free(invl);
314 
315 	if (err != 0) {
316 		nvlist_free(onvl);
317 		return (err);
318 	}
319 
320 	/*
321 	 * If the output nvlist contains any data, pack it and copyout.
322 	 */
323 	if (onvl != NULL) {
324 		size_t sz;
325 
326 		if ((err = nvlist_size(onvl, &sz, NV_ENCODE_NATIVE)) != 0) {
327 			nvlist_free(onvl);
328 			return (err);
329 		}
330 		if (sz > fid.fid_outsz) {
331 			nvlist_free(onvl);
332 			return (ENAMETOOLONG);
333 		}
334 
335 		buf = kmem_alloc(sz, KM_SLEEP);
336 		if ((err = nvlist_pack(onvl, &buf, &sz, NV_ENCODE_NATIVE,
337 		    KM_SLEEP)) != 0) {
338 			kmem_free(buf, sz);
339 			nvlist_free(onvl);
340 			return (err);
341 		}
342 		nvlist_free(onvl);
343 		if (ddi_copyout(buf, fid.fid_outbuf, sz, flag) != 0) {
344 			kmem_free(buf, sz);
345 			return (EFAULT);
346 		}
347 		kmem_free(buf, sz);
348 		fid.fid_outsz = sz;
349 
350 		switch (model) {
351 #ifdef _MULTI_DATAMODEL
352 		case DDI_MODEL_ILP32:
353 			fid32.fid_outsz = (size32_t)fid.fid_outsz;
354 			if (ddi_copyout(&fid32, (void *)data,
355 			    sizeof (fm_ioc_data32_t), flag) != 0)
356 				return (EFAULT);
357 			break;
358 #endif /* _MULTI_DATAMODEL */
359 		case DDI_MODEL_NONE:
360 		default:
361 			if (ddi_copyout(&fid, (void *)data,
362 			    sizeof (fm_ioc_data_t), flag) != 0)
363 				return (EFAULT);
364 		}
365 	}
366 
367 	return (err);
368 }
369 
370 static struct cb_ops fm_cb_ops = {
371 	fm_open,		/* open */
372 	nulldev,		/* close */
373 	nodev,			/* strategy */
374 	nodev,			/* print */
375 	nodev,			/* dump */
376 	nodev,			/* read */
377 	nodev,			/* write */
378 	fm_ioctl,		/* ioctl */
379 	nodev,			/* devmap */
380 	nodev,			/* mmap */
381 	nodev,			/* segmap */
382 	nochpoll,		/* poll */
383 	ddi_prop_op,		/* prop_op */
384 	NULL,			/* streamtab  */
385 	D_NEW | D_MP | D_64BIT | D_U64BIT
386 };
387 
388 static struct dev_ops fm_ops = {
389 	DEVO_REV,		/* devo_rev, */
390 	0,			/* refcnt  */
391 	fm_info,		/* get_dev_info */
392 	nulldev,		/* identify */
393 	nulldev,		/* probe */
394 	fm_attach,		/* attach */
395 	fm_detach,		/* detach */
396 	nodev,			/* reset */
397 	&fm_cb_ops,		/* driver operations */
398 	(struct bus_ops *)0	/* bus operations */
399 };
400 
401 static struct modldrv modldrv = {
402 	&mod_driverops, "fault management driver", &fm_ops,
403 };
404 
405 struct modlinkage devfm_modlinkage = {
406 	MODREV_1, &modldrv, NULL
407 };
408 
409 int
410 _init(void)
411 {
412 	const fm_vers_t *p;
413 	int ret;
414 
415 
416 	if ((ret = mod_install(&devfm_modlinkage)) == 0) {
417 		(void) nvlist_alloc(&fm_vers_nvl, NV_UNIQUE_NAME, KM_SLEEP);
418 		for (p = fm_versions; p->interface != NULL; p++)
419 			(void) nvlist_add_uint32(fm_vers_nvl, p->interface,
420 			    p->version);
421 	}
422 
423 	return (ret);
424 }
425 
426 int
427 _info(struct modinfo *modinfop)
428 {
429 	return (mod_info(&devfm_modlinkage, modinfop));
430 }
431 
432 int
433 _fini(void)
434 {
435 	int ret;
436 
437 	if ((ret = mod_remove(&devfm_modlinkage)) == 0) {
438 		nvlist_free(fm_vers_nvl);
439 	}
440 
441 	return (ret);
442 }
443