xref: /titanic_44/usr/src/uts/i86xpv/io/privcmd.c (revision 193974072f41a843678abf5f61979c748687e66b)
1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
21843e1988Sjohnlev 
22843e1988Sjohnlev /*
23a576ab5bSrab  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25843e1988Sjohnlev  */
26843e1988Sjohnlev 
27843e1988Sjohnlev #include <sys/types.h>
28843e1988Sjohnlev #include <sys/file.h>
29843e1988Sjohnlev #include <sys/errno.h>
30843e1988Sjohnlev #include <sys/open.h>
31843e1988Sjohnlev #include <sys/cred.h>
32843e1988Sjohnlev #include <sys/conf.h>
33843e1988Sjohnlev #include <sys/stat.h>
34843e1988Sjohnlev #include <sys/modctl.h>
35843e1988Sjohnlev #include <sys/ddi.h>
36843e1988Sjohnlev #include <sys/sunddi.h>
37843e1988Sjohnlev #include <sys/vmsystm.h>
38843e1988Sjohnlev #include <sys/sdt.h>
39843e1988Sjohnlev #include <sys/hypervisor.h>
40843e1988Sjohnlev #include <sys/xen_errno.h>
41b26a64aeSjohnlev #include <sys/policy.h>
42843e1988Sjohnlev 
43843e1988Sjohnlev #include <vm/hat_i86.h>
44843e1988Sjohnlev #include <vm/hat_pte.h>
45843e1988Sjohnlev #include <vm/seg_mf.h>
46843e1988Sjohnlev 
47843e1988Sjohnlev #include <xen/sys/privcmd.h>
48843e1988Sjohnlev #include <sys/privcmd_impl.h>
49843e1988Sjohnlev 
50843e1988Sjohnlev static dev_info_t *privcmd_devi;
51843e1988Sjohnlev 
52843e1988Sjohnlev /*ARGSUSED*/
53843e1988Sjohnlev static int
54843e1988Sjohnlev privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
55843e1988Sjohnlev {
56843e1988Sjohnlev 	switch (cmd) {
57843e1988Sjohnlev 	case DDI_INFO_DEVT2DEVINFO:
58843e1988Sjohnlev 	case DDI_INFO_DEVT2INSTANCE:
59843e1988Sjohnlev 		break;
60843e1988Sjohnlev 	default:
61843e1988Sjohnlev 		return (DDI_FAILURE);
62843e1988Sjohnlev 	}
63843e1988Sjohnlev 
64843e1988Sjohnlev 	switch (getminor((dev_t)arg)) {
65843e1988Sjohnlev 	case PRIVCMD_MINOR:
66843e1988Sjohnlev 		break;
67843e1988Sjohnlev 	default:
68843e1988Sjohnlev 		return (DDI_FAILURE);
69843e1988Sjohnlev 	}
70843e1988Sjohnlev 
71843e1988Sjohnlev 	if (cmd == DDI_INFO_DEVT2INSTANCE)
72843e1988Sjohnlev 		*result = 0;
73843e1988Sjohnlev 	else
74843e1988Sjohnlev 		*result = privcmd_devi;
75843e1988Sjohnlev 	return (DDI_SUCCESS);
76843e1988Sjohnlev }
77843e1988Sjohnlev 
78843e1988Sjohnlev static int
79843e1988Sjohnlev privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
80843e1988Sjohnlev {
81843e1988Sjohnlev 	if (cmd != DDI_ATTACH)
82843e1988Sjohnlev 		return (DDI_FAILURE);
83843e1988Sjohnlev 
84843e1988Sjohnlev 	if (ddi_create_minor_node(devi, PRIVCMD_NODE,
85843e1988Sjohnlev 	    S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
86843e1988Sjohnlev 		return (DDI_FAILURE);
87843e1988Sjohnlev 
88843e1988Sjohnlev 	privcmd_devi = devi;
89843e1988Sjohnlev 	ddi_report_dev(devi);
90843e1988Sjohnlev 	return (DDI_SUCCESS);
91843e1988Sjohnlev }
92843e1988Sjohnlev 
93843e1988Sjohnlev static int
94843e1988Sjohnlev privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
95843e1988Sjohnlev {
96843e1988Sjohnlev 	if (cmd != DDI_DETACH)
97843e1988Sjohnlev 		return (DDI_FAILURE);
98843e1988Sjohnlev 	ddi_remove_minor_node(devi, NULL);
99843e1988Sjohnlev 	privcmd_devi = NULL;
100843e1988Sjohnlev 	return (DDI_SUCCESS);
101843e1988Sjohnlev }
102843e1988Sjohnlev 
103843e1988Sjohnlev /*ARGSUSED1*/
104843e1988Sjohnlev static int
105843e1988Sjohnlev privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
106843e1988Sjohnlev {
107843e1988Sjohnlev 	return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
108843e1988Sjohnlev }
109843e1988Sjohnlev 
110843e1988Sjohnlev /*
111843e1988Sjohnlev  * Map a contiguous set of machine frames in a foreign domain.
112843e1988Sjohnlev  * Used in the following way:
113843e1988Sjohnlev  *
114843e1988Sjohnlev  *	privcmd_mmap_t p;
115843e1988Sjohnlev  *	privcmd_mmap_entry_t e;
116843e1988Sjohnlev  *
117843e1988Sjohnlev  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
118843e1988Sjohnlev  *	p.num = number of privcmd_mmap_entry_t's
119843e1988Sjohnlev  *	p.dom = domid;
120843e1988Sjohnlev  *	p.entry = &e;
121843e1988Sjohnlev  *	e.va = addr;
122843e1988Sjohnlev  *	e.mfn = mfn;
123843e1988Sjohnlev  *	e.npages = btopr(size);
124843e1988Sjohnlev  *	ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
125843e1988Sjohnlev  */
126843e1988Sjohnlev /*ARGSUSED2*/
127843e1988Sjohnlev int
128843e1988Sjohnlev do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
129843e1988Sjohnlev {
130843e1988Sjohnlev 	privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
131843e1988Sjohnlev 	privcmd_mmap_entry_t *umme;
132843e1988Sjohnlev 	struct as *as = curproc->p_as;
133843e1988Sjohnlev 	struct seg *seg;
134843e1988Sjohnlev 	int i, error = 0;
135843e1988Sjohnlev 
136843e1988Sjohnlev 	if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
137843e1988Sjohnlev 		return (EFAULT);
138843e1988Sjohnlev 
139843e1988Sjohnlev 	DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
140843e1988Sjohnlev 	    privcmd_mmap_entry_t *, mmc->entry);
141843e1988Sjohnlev 
142843e1988Sjohnlev 	if (mmc->dom == DOMID_SELF) {
143843e1988Sjohnlev 		error = ENOTSUP;	/* Too paranoid? */
144843e1988Sjohnlev 		goto done;
145843e1988Sjohnlev 	}
146843e1988Sjohnlev 
147843e1988Sjohnlev 	for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
148843e1988Sjohnlev 		privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
149843e1988Sjohnlev 		caddr_t addr;
150843e1988Sjohnlev 
151843e1988Sjohnlev 		if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
152843e1988Sjohnlev 			error = EFAULT;
153843e1988Sjohnlev 			break;
154843e1988Sjohnlev 		}
155843e1988Sjohnlev 
156843e1988Sjohnlev 		DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
157843e1988Sjohnlev 		    ulong_t, mme->npages);
158843e1988Sjohnlev 
159843e1988Sjohnlev 		if (mme->mfn == MFN_INVALID) {
160843e1988Sjohnlev 			error = EINVAL;
161843e1988Sjohnlev 			break;
162843e1988Sjohnlev 		}
163843e1988Sjohnlev 
164843e1988Sjohnlev 		addr = (caddr_t)mme->va;
165843e1988Sjohnlev 
166843e1988Sjohnlev 		/*
167843e1988Sjohnlev 		 * Find the segment we want to mess with, then add
168843e1988Sjohnlev 		 * the mfn range to the segment.
169843e1988Sjohnlev 		 */
170843e1988Sjohnlev 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
171843e1988Sjohnlev 		if ((seg = as_findseg(as, addr, 0)) == NULL ||
172843e1988Sjohnlev 		    addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
173843e1988Sjohnlev 			error = EINVAL;
174843e1988Sjohnlev 		else
175843e1988Sjohnlev 			error = segmf_add_mfns(seg, addr,
176843e1988Sjohnlev 			    mme->mfn, mme->npages, mmc->dom);
177843e1988Sjohnlev 		AS_LOCK_EXIT(as, &as->a_lock);
178843e1988Sjohnlev 
179843e1988Sjohnlev 		if (error != 0)
180843e1988Sjohnlev 			break;
181843e1988Sjohnlev 	}
182843e1988Sjohnlev 
183843e1988Sjohnlev done:
184843e1988Sjohnlev 	DTRACE_XPV1(mmap__end, int, error);
185843e1988Sjohnlev 
186843e1988Sjohnlev 	return (error);
187843e1988Sjohnlev }
188843e1988Sjohnlev 
189843e1988Sjohnlev /*
190843e1988Sjohnlev  * Set up the address range to map to an array of mfns in
191843e1988Sjohnlev  * a foreign domain.  Used in the following way:
192843e1988Sjohnlev  *
193843e1988Sjohnlev  *	privcmd_mmap_batch_t p;
194843e1988Sjohnlev  *
195843e1988Sjohnlev  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
196843e1988Sjohnlev  *	p.num = number of pages
197843e1988Sjohnlev  *	p.dom = domid
198843e1988Sjohnlev  *	p.addr = addr;
199843e1988Sjohnlev  *	p.arr = array of mfns, indexed 0 .. p.num - 1
200843e1988Sjohnlev  *	ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
201843e1988Sjohnlev  */
202843e1988Sjohnlev /*ARGSUSED2*/
203843e1988Sjohnlev static int
204843e1988Sjohnlev do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
205843e1988Sjohnlev {
206843e1988Sjohnlev 	privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
207843e1988Sjohnlev 	struct as *as = curproc->p_as;
208843e1988Sjohnlev 	struct seg *seg;
209843e1988Sjohnlev 	int i, error = 0;
210843e1988Sjohnlev 	caddr_t addr;
211843e1988Sjohnlev 	ulong_t *ulp;
212843e1988Sjohnlev 
213843e1988Sjohnlev 	if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
214843e1988Sjohnlev 		return (EFAULT);
215843e1988Sjohnlev 
216843e1988Sjohnlev 	DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
217843e1988Sjohnlev 	    caddr_t, mmb->addr);
218843e1988Sjohnlev 
219843e1988Sjohnlev 	addr = (caddr_t)mmb->addr;
220843e1988Sjohnlev 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
221843e1988Sjohnlev 	if ((seg = as_findseg(as, addr, 0)) == NULL ||
222843e1988Sjohnlev 	    addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
223843e1988Sjohnlev 		error = EINVAL;
224843e1988Sjohnlev 		goto done;
225843e1988Sjohnlev 	}
226843e1988Sjohnlev 
227843e1988Sjohnlev 	for (i = 0, ulp = mmb->arr;
228843e1988Sjohnlev 	    i < mmb->num; i++, addr += PAGESIZE, ulp++) {
229843e1988Sjohnlev 		mfn_t mfn;
230843e1988Sjohnlev 
231843e1988Sjohnlev 		if (fulword(ulp, &mfn) != 0) {
232843e1988Sjohnlev 			error = EFAULT;
233843e1988Sjohnlev 			break;
234843e1988Sjohnlev 		}
235843e1988Sjohnlev 
236843e1988Sjohnlev 		if (mfn == MFN_INVALID) {
237a576ab5bSrab 			/*
238a576ab5bSrab 			 * This mfn is invalid and should not be added to
239a576ab5bSrab 			 * segmf, as we'd only cause an immediate EFAULT when
240a576ab5bSrab 			 * we tried to fault it in.
241a576ab5bSrab 			 */
242a576ab5bSrab 			mfn |= XEN_DOMCTL_PFINFO_XTAB;
243a576ab5bSrab 			continue;
244843e1988Sjohnlev 		}
245843e1988Sjohnlev 
246843e1988Sjohnlev 		if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
247843e1988Sjohnlev 			continue;
248843e1988Sjohnlev 
249843e1988Sjohnlev 		/*
250843e1988Sjohnlev 		 * Tell the process that this MFN could not be mapped, so it
251843e1988Sjohnlev 		 * won't later try to access it.
252843e1988Sjohnlev 		 */
253a576ab5bSrab 		mfn |= XEN_DOMCTL_PFINFO_XTAB;
254843e1988Sjohnlev 		if (sulword(ulp, mfn) != 0) {
255843e1988Sjohnlev 			error = EFAULT;
256843e1988Sjohnlev 			break;
257843e1988Sjohnlev 		}
258843e1988Sjohnlev 	}
259843e1988Sjohnlev 
260843e1988Sjohnlev done:
261843e1988Sjohnlev 	AS_LOCK_EXIT(as, &as->a_lock);
262843e1988Sjohnlev 
263843e1988Sjohnlev 	DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
264843e1988Sjohnlev 	    mmb->addr);
265843e1988Sjohnlev 
266843e1988Sjohnlev 	return (error);
267843e1988Sjohnlev }
268843e1988Sjohnlev 
269843e1988Sjohnlev /*ARGSUSED*/
270843e1988Sjohnlev static int
271843e1988Sjohnlev privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
272843e1988Sjohnlev {
273b26a64aeSjohnlev 	if (secpolicy_xvm_control(cr))
274b26a64aeSjohnlev 		return (EPERM);
275843e1988Sjohnlev 
276843e1988Sjohnlev 	/*
277843e1988Sjohnlev 	 * Everything is a -native- data type.
278843e1988Sjohnlev 	 */
279b26a64aeSjohnlev 	if ((mode & FMODELS) != FNATIVE)
280b26a64aeSjohnlev 		return (EOVERFLOW);
281843e1988Sjohnlev 
282843e1988Sjohnlev 	switch (cmd) {
283843e1988Sjohnlev 	case IOCTL_PRIVCMD_HYPERCALL:
284843e1988Sjohnlev 		return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
285843e1988Sjohnlev 	case IOCTL_PRIVCMD_MMAP:
286843e1988Sjohnlev 		if (DOMAIN_IS_PRIVILEGED(xen_info))
287843e1988Sjohnlev 			return (do_privcmd_mmap((void *)arg, mode, cr));
288843e1988Sjohnlev 		break;
289843e1988Sjohnlev 	case IOCTL_PRIVCMD_MMAPBATCH:
290843e1988Sjohnlev 		if (DOMAIN_IS_PRIVILEGED(xen_info))
291843e1988Sjohnlev 			return (do_privcmd_mmapbatch((void *)arg, mode, cr));
292843e1988Sjohnlev 		break;
293843e1988Sjohnlev 	default:
294843e1988Sjohnlev 		break;
295843e1988Sjohnlev 	}
296843e1988Sjohnlev 	return (EINVAL);
297843e1988Sjohnlev }
298843e1988Sjohnlev 
299843e1988Sjohnlev /*
300843e1988Sjohnlev  * The real magic happens in the segmf segment driver.
301843e1988Sjohnlev  */
302843e1988Sjohnlev /*ARGSUSED8*/
303843e1988Sjohnlev static int
304843e1988Sjohnlev privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
305843e1988Sjohnlev     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
306843e1988Sjohnlev {
307843e1988Sjohnlev 	struct segmf_crargs a;
308843e1988Sjohnlev 	int error;
309843e1988Sjohnlev 
310b26a64aeSjohnlev 	if (secpolicy_xvm_control(cr))
311b26a64aeSjohnlev 		return (EPERM);
312b26a64aeSjohnlev 
313843e1988Sjohnlev 	as_rangelock(as);
314843e1988Sjohnlev 	if ((flags & MAP_FIXED) == 0) {
315843e1988Sjohnlev 		map_addr(addrp, len, (offset_t)off, 0, flags);
316843e1988Sjohnlev 		if (*addrp == NULL) {
317843e1988Sjohnlev 			error = ENOMEM;
318843e1988Sjohnlev 			goto rangeunlock;
319843e1988Sjohnlev 		}
320843e1988Sjohnlev 	} else {
321843e1988Sjohnlev 		/*
322843e1988Sjohnlev 		 * User specified address
323843e1988Sjohnlev 		 */
324843e1988Sjohnlev 		(void) as_unmap(as, *addrp, len);
325843e1988Sjohnlev 	}
326843e1988Sjohnlev 
327843e1988Sjohnlev 	/*
328843e1988Sjohnlev 	 * The mapping *must* be MAP_SHARED at offset 0.
329843e1988Sjohnlev 	 *
330843e1988Sjohnlev 	 * (Foreign pages are treated like device memory; the
331843e1988Sjohnlev 	 * ioctl interface allows the backing objects to be
332843e1988Sjohnlev 	 * arbitrarily redefined to point at any machine frame.)
333843e1988Sjohnlev 	 */
334843e1988Sjohnlev 	if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
335843e1988Sjohnlev 		error = EINVAL;
336843e1988Sjohnlev 		goto rangeunlock;
337843e1988Sjohnlev 	}
338843e1988Sjohnlev 
339843e1988Sjohnlev 	a.dev = dev;
340843e1988Sjohnlev 	a.prot = (uchar_t)prot;
341843e1988Sjohnlev 	a.maxprot = (uchar_t)maxprot;
342843e1988Sjohnlev 	error = as_map(as, *addrp, len, segmf_create, &a);
343843e1988Sjohnlev 
344843e1988Sjohnlev rangeunlock:
345843e1988Sjohnlev 	as_rangeunlock(as);
346843e1988Sjohnlev 	return (error);
347843e1988Sjohnlev }
348843e1988Sjohnlev 
349843e1988Sjohnlev static struct cb_ops privcmd_cb_ops = {
350843e1988Sjohnlev 	privcmd_open,
351843e1988Sjohnlev 	nulldev,	/* close */
352843e1988Sjohnlev 	nodev,		/* strategy */
353843e1988Sjohnlev 	nodev,		/* print */
354843e1988Sjohnlev 	nodev,		/* dump */
355843e1988Sjohnlev 	nodev,		/* read */
356843e1988Sjohnlev 	nodev,		/* write */
357843e1988Sjohnlev 	privcmd_ioctl,
358843e1988Sjohnlev 	nodev,		/* devmap */
359843e1988Sjohnlev 	nodev,		/* mmap */
360843e1988Sjohnlev 	privcmd_segmap,
361843e1988Sjohnlev 	nochpoll,	/* poll */
362843e1988Sjohnlev 	ddi_prop_op,
363843e1988Sjohnlev 	NULL,
364843e1988Sjohnlev 	D_64BIT | D_NEW | D_MP
365843e1988Sjohnlev };
366843e1988Sjohnlev 
367843e1988Sjohnlev static struct dev_ops privcmd_dv_ops = {
368843e1988Sjohnlev 	DEVO_REV,
369843e1988Sjohnlev 	0,
370843e1988Sjohnlev 	privcmd_getinfo,
371843e1988Sjohnlev 	nulldev,		/* identify */
372843e1988Sjohnlev 	nulldev,		/* probe */
373843e1988Sjohnlev 	privcmd_attach,
374843e1988Sjohnlev 	privcmd_detach,
375843e1988Sjohnlev 	nodev,			/* reset */
376843e1988Sjohnlev 	&privcmd_cb_ops,
377*19397407SSherry Moore 	0,			/* struct bus_ops */
378*19397407SSherry Moore 	NULL,			/* power */
379*19397407SSherry Moore 	ddi_quiesce_not_needed,		/* quiesce */
380843e1988Sjohnlev };
381843e1988Sjohnlev 
382843e1988Sjohnlev static struct modldrv modldrv = {
383843e1988Sjohnlev 	&mod_driverops,
384613b2871SRichard Bean 	"privcmd driver",
385843e1988Sjohnlev 	&privcmd_dv_ops
386843e1988Sjohnlev };
387843e1988Sjohnlev 
388843e1988Sjohnlev static struct modlinkage modl = {
389843e1988Sjohnlev 	MODREV_1,
390843e1988Sjohnlev 	&modldrv
391843e1988Sjohnlev };
392843e1988Sjohnlev 
393843e1988Sjohnlev int
394843e1988Sjohnlev _init(void)
395843e1988Sjohnlev {
396843e1988Sjohnlev 	return (mod_install(&modl));
397843e1988Sjohnlev }
398843e1988Sjohnlev 
399843e1988Sjohnlev int
400843e1988Sjohnlev _fini(void)
401843e1988Sjohnlev {
402843e1988Sjohnlev 	return (mod_remove(&modl));
403843e1988Sjohnlev }
404843e1988Sjohnlev 
405843e1988Sjohnlev int
406843e1988Sjohnlev _info(struct modinfo *modinfo)
407843e1988Sjohnlev {
408843e1988Sjohnlev 	return (mod_info(&modl, modinfo));
409843e1988Sjohnlev }
410