xref: /titanic_44/usr/src/uts/i86xpv/io/privcmd.c (revision 59d2da88ef75ee90d89de8d98edf0521bea61f8d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/file.h>
29 #include <sys/errno.h>
30 #include <sys/open.h>
31 #include <sys/cred.h>
32 #include <sys/conf.h>
33 #include <sys/stat.h>
34 #include <sys/modctl.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/vmsystm.h>
38 #include <sys/sdt.h>
39 #include <sys/hypervisor.h>
40 #include <sys/xen_errno.h>
41 #include <sys/policy.h>
42 
43 #include <vm/hat_i86.h>
44 #include <vm/hat_pte.h>
45 #include <vm/seg_mf.h>
46 
47 #include <xen/sys/privcmd.h>
48 #include <sys/privcmd_impl.h>
49 
50 static dev_info_t *privcmd_devi;
51 
52 /*ARGSUSED*/
53 static int
54 privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
55 {
56 	switch (cmd) {
57 	case DDI_INFO_DEVT2DEVINFO:
58 	case DDI_INFO_DEVT2INSTANCE:
59 		break;
60 	default:
61 		return (DDI_FAILURE);
62 	}
63 
64 	switch (getminor((dev_t)arg)) {
65 	case PRIVCMD_MINOR:
66 		break;
67 	default:
68 		return (DDI_FAILURE);
69 	}
70 
71 	if (cmd == DDI_INFO_DEVT2INSTANCE)
72 		*result = 0;
73 	else
74 		*result = privcmd_devi;
75 	return (DDI_SUCCESS);
76 }
77 
78 static int
79 privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
80 {
81 	if (cmd != DDI_ATTACH)
82 		return (DDI_FAILURE);
83 
84 	if (ddi_create_minor_node(devi, PRIVCMD_NODE,
85 	    S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
86 		return (DDI_FAILURE);
87 
88 	privcmd_devi = devi;
89 	ddi_report_dev(devi);
90 	return (DDI_SUCCESS);
91 }
92 
93 static int
94 privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
95 {
96 	if (cmd != DDI_DETACH)
97 		return (DDI_FAILURE);
98 	ddi_remove_minor_node(devi, NULL);
99 	privcmd_devi = NULL;
100 	return (DDI_SUCCESS);
101 }
102 
103 /*ARGSUSED1*/
104 static int
105 privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
106 {
107 	return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
108 }
109 
110 /*
111  * Map a contiguous set of machine frames in a foreign domain.
112  * Used in the following way:
113  *
114  *	privcmd_mmap_t p;
115  *	privcmd_mmap_entry_t e;
116  *
117  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
118  *	p.num = number of privcmd_mmap_entry_t's
119  *	p.dom = domid;
120  *	p.entry = &e;
121  *	e.va = addr;
122  *	e.mfn = mfn;
123  *	e.npages = btopr(size);
124  *	ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
125  */
126 /*ARGSUSED2*/
127 int
128 do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
129 {
130 	privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
131 	privcmd_mmap_entry_t *umme;
132 	struct as *as = curproc->p_as;
133 	struct seg *seg;
134 	int i, error = 0;
135 
136 	if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
137 		return (EFAULT);
138 
139 	DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
140 	    privcmd_mmap_entry_t *, mmc->entry);
141 
142 	if (mmc->dom == DOMID_SELF) {
143 		error = ENOTSUP;	/* Too paranoid? */
144 		goto done;
145 	}
146 
147 	for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
148 		privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
149 		caddr_t addr;
150 
151 		if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
152 			error = EFAULT;
153 			break;
154 		}
155 
156 		DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
157 		    ulong_t, mme->npages);
158 
159 		if (mme->mfn == MFN_INVALID) {
160 			error = EINVAL;
161 			break;
162 		}
163 
164 		addr = (caddr_t)mme->va;
165 
166 		/*
167 		 * Find the segment we want to mess with, then add
168 		 * the mfn range to the segment.
169 		 */
170 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
171 		if ((seg = as_findseg(as, addr, 0)) == NULL ||
172 		    addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
173 			error = EINVAL;
174 		else
175 			error = segmf_add_mfns(seg, addr,
176 			    mme->mfn, mme->npages, mmc->dom);
177 		AS_LOCK_EXIT(as, &as->a_lock);
178 
179 		if (error != 0)
180 			break;
181 	}
182 
183 done:
184 	DTRACE_XPV1(mmap__end, int, error);
185 
186 	return (error);
187 }
188 
189 /*
190  * Set up the address range to map to an array of mfns in
191  * a foreign domain.  Used in the following way:
192  *
193  *	privcmd_mmap_batch_t p;
194  *
195  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
196  *	p.num = number of pages
197  *	p.dom = domid
198  *	p.addr = addr;
199  *	p.arr = array of mfns, indexed 0 .. p.num - 1
200  *	ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
201  */
202 /*ARGSUSED2*/
203 static int
204 do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
205 {
206 	privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
207 	struct as *as = curproc->p_as;
208 	struct seg *seg;
209 	int i, error = 0;
210 	caddr_t addr;
211 	ulong_t *ulp;
212 
213 	if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
214 		return (EFAULT);
215 
216 	DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
217 	    caddr_t, mmb->addr);
218 
219 	addr = (caddr_t)mmb->addr;
220 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
221 	if ((seg = as_findseg(as, addr, 0)) == NULL ||
222 	    addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
223 		error = EINVAL;
224 		goto done;
225 	}
226 
227 	for (i = 0, ulp = mmb->arr;
228 	    i < mmb->num; i++, addr += PAGESIZE, ulp++) {
229 		mfn_t mfn;
230 
231 		if (fulword(ulp, &mfn) != 0) {
232 			error = EFAULT;
233 			break;
234 		}
235 
236 		if (mfn == MFN_INVALID) {
237 			/*
238 			 * This mfn is invalid and should not be added to
239 			 * segmf, as we'd only cause an immediate EFAULT when
240 			 * we tried to fault it in.
241 			 */
242 			mfn |= XEN_DOMCTL_PFINFO_XTAB;
243 			continue;
244 		}
245 
246 		if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
247 			continue;
248 
249 		/*
250 		 * Tell the process that this MFN could not be mapped, so it
251 		 * won't later try to access it.
252 		 */
253 		mfn |= XEN_DOMCTL_PFINFO_XTAB;
254 		if (sulword(ulp, mfn) != 0) {
255 			error = EFAULT;
256 			break;
257 		}
258 	}
259 
260 done:
261 	AS_LOCK_EXIT(as, &as->a_lock);
262 
263 	DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
264 	    mmb->addr);
265 
266 	return (error);
267 }
268 
269 /*ARGSUSED*/
270 static int
271 privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
272 {
273 	if (secpolicy_xvm_control(cr))
274 		return (EPERM);
275 
276 	/*
277 	 * Everything is a -native- data type.
278 	 */
279 	if ((mode & FMODELS) != FNATIVE)
280 		return (EOVERFLOW);
281 
282 	switch (cmd) {
283 	case IOCTL_PRIVCMD_HYPERCALL:
284 		return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
285 	case IOCTL_PRIVCMD_MMAP:
286 		if (DOMAIN_IS_PRIVILEGED(xen_info))
287 			return (do_privcmd_mmap((void *)arg, mode, cr));
288 		break;
289 	case IOCTL_PRIVCMD_MMAPBATCH:
290 		if (DOMAIN_IS_PRIVILEGED(xen_info))
291 			return (do_privcmd_mmapbatch((void *)arg, mode, cr));
292 		break;
293 	default:
294 		break;
295 	}
296 	return (EINVAL);
297 }
298 
299 /*
300  * The real magic happens in the segmf segment driver.
301  */
302 /*ARGSUSED8*/
303 static int
304 privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
305     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
306 {
307 	struct segmf_crargs a;
308 	int error;
309 
310 	if (secpolicy_xvm_control(cr))
311 		return (EPERM);
312 
313 	as_rangelock(as);
314 	if ((flags & MAP_FIXED) == 0) {
315 		map_addr(addrp, len, (offset_t)off, 0, flags);
316 		if (*addrp == NULL) {
317 			error = ENOMEM;
318 			goto rangeunlock;
319 		}
320 	} else {
321 		/*
322 		 * User specified address
323 		 */
324 		(void) as_unmap(as, *addrp, len);
325 	}
326 
327 	/*
328 	 * The mapping *must* be MAP_SHARED at offset 0.
329 	 *
330 	 * (Foreign pages are treated like device memory; the
331 	 * ioctl interface allows the backing objects to be
332 	 * arbitrarily redefined to point at any machine frame.)
333 	 */
334 	if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
335 		error = EINVAL;
336 		goto rangeunlock;
337 	}
338 
339 	a.dev = dev;
340 	a.prot = (uchar_t)prot;
341 	a.maxprot = (uchar_t)maxprot;
342 	error = as_map(as, *addrp, len, segmf_create, &a);
343 
344 rangeunlock:
345 	as_rangeunlock(as);
346 	return (error);
347 }
348 
349 static struct cb_ops privcmd_cb_ops = {
350 	privcmd_open,
351 	nulldev,	/* close */
352 	nodev,		/* strategy */
353 	nodev,		/* print */
354 	nodev,		/* dump */
355 	nodev,		/* read */
356 	nodev,		/* write */
357 	privcmd_ioctl,
358 	nodev,		/* devmap */
359 	nodev,		/* mmap */
360 	privcmd_segmap,
361 	nochpoll,	/* poll */
362 	ddi_prop_op,
363 	NULL,
364 	D_64BIT | D_NEW | D_MP
365 };
366 
367 static struct dev_ops privcmd_dv_ops = {
368 	DEVO_REV,
369 	0,
370 	privcmd_getinfo,
371 	nulldev,		/* identify */
372 	nulldev,		/* probe */
373 	privcmd_attach,
374 	privcmd_detach,
375 	nodev,			/* reset */
376 	&privcmd_cb_ops,
377 	0,			/* struct bus_ops */
378 	NULL,			/* power */
379 	ddi_quiesce_not_needed,		/* quiesce */
380 };
381 
382 static struct modldrv modldrv = {
383 	&mod_driverops,
384 	"privcmd driver",
385 	&privcmd_dv_ops
386 };
387 
388 static struct modlinkage modl = {
389 	MODREV_1,
390 	&modldrv
391 };
392 
393 int
394 _init(void)
395 {
396 	return (mod_install(&modl));
397 }
398 
399 int
400 _fini(void)
401 {
402 	return (mod_remove(&modl));
403 }
404 
405 int
406 _info(struct modinfo *modinfo)
407 {
408 	return (mod_info(&modl, modinfo));
409 }
410