xref: /freebsd/sys/dev/vmm/vmm_dev.c (revision b9ef152bec6cff4cd82b68921f631bd6efb24ae6)
1*b9ef152bSMark Johnston /*-
2*b9ef152bSMark Johnston  * SPDX-License-Identifier: BSD-2-Clause
3*b9ef152bSMark Johnston  *
4*b9ef152bSMark Johnston  * Copyright (c) 2011 NetApp, Inc.
5*b9ef152bSMark Johnston  * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6*b9ef152bSMark Johnston  * All rights reserved.
7*b9ef152bSMark Johnston  */
8*b9ef152bSMark Johnston 
9*b9ef152bSMark Johnston #include <sys/param.h>
10*b9ef152bSMark Johnston #include <sys/conf.h>
11*b9ef152bSMark Johnston #include <sys/ioccom.h>
12*b9ef152bSMark Johnston #include <sys/jail.h>
13*b9ef152bSMark Johnston #include <sys/kernel.h>
14*b9ef152bSMark Johnston #include <sys/malloc.h>
15*b9ef152bSMark Johnston #include <sys/mman.h>
16*b9ef152bSMark Johnston #include <sys/mutex.h>
17*b9ef152bSMark Johnston #include <sys/proc.h>
18*b9ef152bSMark Johnston #include <sys/queue.h>
19*b9ef152bSMark Johnston #include <sys/sysctl.h>
20*b9ef152bSMark Johnston #include <sys/ucred.h>
21*b9ef152bSMark Johnston #include <sys/uio.h>
22*b9ef152bSMark Johnston 
23*b9ef152bSMark Johnston #include <machine/vmm.h>
24*b9ef152bSMark Johnston 
25*b9ef152bSMark Johnston #include <vm/vm.h>
26*b9ef152bSMark Johnston #include <vm/vm_object.h>
27*b9ef152bSMark Johnston 
28*b9ef152bSMark Johnston #include <dev/vmm/vmm_dev.h>
29*b9ef152bSMark Johnston #include <dev/vmm/vmm_stat.h>
30*b9ef152bSMark Johnston 
31*b9ef152bSMark Johnston static int devmem_create_cdev(const char *vmname, int id, char *devmem);
32*b9ef152bSMark Johnston 
33*b9ef152bSMark Johnston struct devmem_softc {
34*b9ef152bSMark Johnston 	int	segid;
35*b9ef152bSMark Johnston 	char	*name;
36*b9ef152bSMark Johnston 	struct cdev *cdev;
37*b9ef152bSMark Johnston 	struct vmmdev_softc *sc;
38*b9ef152bSMark Johnston 	SLIST_ENTRY(devmem_softc) link;
39*b9ef152bSMark Johnston };
40*b9ef152bSMark Johnston 
41*b9ef152bSMark Johnston struct vmmdev_softc {
42*b9ef152bSMark Johnston 	struct vm	*vm;		/* vm instance cookie */
43*b9ef152bSMark Johnston 	struct cdev	*cdev;
44*b9ef152bSMark Johnston 	struct ucred	*ucred;
45*b9ef152bSMark Johnston 	SLIST_ENTRY(vmmdev_softc) link;
46*b9ef152bSMark Johnston 	SLIST_HEAD(, devmem_softc) devmem;
47*b9ef152bSMark Johnston 	int		flags;
48*b9ef152bSMark Johnston };
49*b9ef152bSMark Johnston #define	VSC_LINKED		0x01
50*b9ef152bSMark Johnston 
51*b9ef152bSMark Johnston static SLIST_HEAD(, vmmdev_softc) head;
52*b9ef152bSMark Johnston 
53*b9ef152bSMark Johnston static unsigned pr_allow_flag;
54*b9ef152bSMark Johnston static struct mtx vmmdev_mtx;
55*b9ef152bSMark Johnston MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF);
56*b9ef152bSMark Johnston 
57*b9ef152bSMark Johnston static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
58*b9ef152bSMark Johnston 
59*b9ef152bSMark Johnston SYSCTL_DECL(_hw_vmm);
60*b9ef152bSMark Johnston 
61*b9ef152bSMark Johnston static void devmem_destroy(void *arg);
62*b9ef152bSMark Johnston 
63*b9ef152bSMark Johnston static int
64*b9ef152bSMark Johnston vmm_priv_check(struct ucred *ucred)
65*b9ef152bSMark Johnston {
66*b9ef152bSMark Johnston 	if (jailed(ucred) &&
67*b9ef152bSMark Johnston 	    !(ucred->cr_prison->pr_allow & pr_allow_flag))
68*b9ef152bSMark Johnston 		return (EPERM);
69*b9ef152bSMark Johnston 
70*b9ef152bSMark Johnston 	return (0);
71*b9ef152bSMark Johnston }
72*b9ef152bSMark Johnston 
73*b9ef152bSMark Johnston static int
74*b9ef152bSMark Johnston vcpu_lock_one(struct vcpu *vcpu)
75*b9ef152bSMark Johnston {
76*b9ef152bSMark Johnston 	return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
77*b9ef152bSMark Johnston }
78*b9ef152bSMark Johnston 
79*b9ef152bSMark Johnston static void
80*b9ef152bSMark Johnston vcpu_unlock_one(struct vcpu *vcpu)
81*b9ef152bSMark Johnston {
82*b9ef152bSMark Johnston 	enum vcpu_state state;
83*b9ef152bSMark Johnston 
84*b9ef152bSMark Johnston 	state = vcpu_get_state(vcpu, NULL);
85*b9ef152bSMark Johnston 	if (state != VCPU_FROZEN) {
86*b9ef152bSMark Johnston 		panic("vcpu %s(%d) has invalid state %d",
87*b9ef152bSMark Johnston 		    vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
88*b9ef152bSMark Johnston 	}
89*b9ef152bSMark Johnston 
90*b9ef152bSMark Johnston 	vcpu_set_state(vcpu, VCPU_IDLE, false);
91*b9ef152bSMark Johnston }
92*b9ef152bSMark Johnston 
93*b9ef152bSMark Johnston static int
94*b9ef152bSMark Johnston vcpu_lock_all(struct vmmdev_softc *sc)
95*b9ef152bSMark Johnston {
96*b9ef152bSMark Johnston 	struct vcpu *vcpu;
97*b9ef152bSMark Johnston 	int error;
98*b9ef152bSMark Johnston 	uint16_t i, j, maxcpus;
99*b9ef152bSMark Johnston 
100*b9ef152bSMark Johnston 	error = 0;
101*b9ef152bSMark Johnston 	vm_slock_vcpus(sc->vm);
102*b9ef152bSMark Johnston 	maxcpus = vm_get_maxcpus(sc->vm);
103*b9ef152bSMark Johnston 	for (i = 0; i < maxcpus; i++) {
104*b9ef152bSMark Johnston 		vcpu = vm_vcpu(sc->vm, i);
105*b9ef152bSMark Johnston 		if (vcpu == NULL)
106*b9ef152bSMark Johnston 			continue;
107*b9ef152bSMark Johnston 		error = vcpu_lock_one(vcpu);
108*b9ef152bSMark Johnston 		if (error)
109*b9ef152bSMark Johnston 			break;
110*b9ef152bSMark Johnston 	}
111*b9ef152bSMark Johnston 
112*b9ef152bSMark Johnston 	if (error) {
113*b9ef152bSMark Johnston 		for (j = 0; j < i; j++) {
114*b9ef152bSMark Johnston 			vcpu = vm_vcpu(sc->vm, j);
115*b9ef152bSMark Johnston 			if (vcpu == NULL)
116*b9ef152bSMark Johnston 				continue;
117*b9ef152bSMark Johnston 			vcpu_unlock_one(vcpu);
118*b9ef152bSMark Johnston 		}
119*b9ef152bSMark Johnston 		vm_unlock_vcpus(sc->vm);
120*b9ef152bSMark Johnston 	}
121*b9ef152bSMark Johnston 
122*b9ef152bSMark Johnston 	return (error);
123*b9ef152bSMark Johnston }
124*b9ef152bSMark Johnston 
125*b9ef152bSMark Johnston static void
126*b9ef152bSMark Johnston vcpu_unlock_all(struct vmmdev_softc *sc)
127*b9ef152bSMark Johnston {
128*b9ef152bSMark Johnston 	struct vcpu *vcpu;
129*b9ef152bSMark Johnston 	uint16_t i, maxcpus;
130*b9ef152bSMark Johnston 
131*b9ef152bSMark Johnston 	maxcpus = vm_get_maxcpus(sc->vm);
132*b9ef152bSMark Johnston 	for (i = 0; i < maxcpus; i++) {
133*b9ef152bSMark Johnston 		vcpu = vm_vcpu(sc->vm, i);
134*b9ef152bSMark Johnston 		if (vcpu == NULL)
135*b9ef152bSMark Johnston 			continue;
136*b9ef152bSMark Johnston 		vcpu_unlock_one(vcpu);
137*b9ef152bSMark Johnston 	}
138*b9ef152bSMark Johnston 	vm_unlock_vcpus(sc->vm);
139*b9ef152bSMark Johnston }
140*b9ef152bSMark Johnston 
141*b9ef152bSMark Johnston static struct vmmdev_softc *
142*b9ef152bSMark Johnston vmmdev_lookup(const char *name)
143*b9ef152bSMark Johnston {
144*b9ef152bSMark Johnston 	struct vmmdev_softc *sc;
145*b9ef152bSMark Johnston 
146*b9ef152bSMark Johnston 	mtx_assert(&vmmdev_mtx, MA_OWNED);
147*b9ef152bSMark Johnston 
148*b9ef152bSMark Johnston 	SLIST_FOREACH(sc, &head, link) {
149*b9ef152bSMark Johnston 		if (strcmp(name, vm_name(sc->vm)) == 0)
150*b9ef152bSMark Johnston 			break;
151*b9ef152bSMark Johnston 	}
152*b9ef152bSMark Johnston 
153*b9ef152bSMark Johnston 	if (sc == NULL)
154*b9ef152bSMark Johnston 		return (NULL);
155*b9ef152bSMark Johnston 
156*b9ef152bSMark Johnston 	if (cr_cansee(curthread->td_ucred, sc->ucred))
157*b9ef152bSMark Johnston 		return (NULL);
158*b9ef152bSMark Johnston 
159*b9ef152bSMark Johnston 	return (sc);
160*b9ef152bSMark Johnston }
161*b9ef152bSMark Johnston 
162*b9ef152bSMark Johnston static struct vmmdev_softc *
163*b9ef152bSMark Johnston vmmdev_lookup2(struct cdev *cdev)
164*b9ef152bSMark Johnston {
165*b9ef152bSMark Johnston 	return (cdev->si_drv1);
166*b9ef152bSMark Johnston }
167*b9ef152bSMark Johnston 
168*b9ef152bSMark Johnston static int
169*b9ef152bSMark Johnston vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
170*b9ef152bSMark Johnston {
171*b9ef152bSMark Johnston 	int error, off, c, prot;
172*b9ef152bSMark Johnston 	vm_paddr_t gpa, maxaddr;
173*b9ef152bSMark Johnston 	void *hpa, *cookie;
174*b9ef152bSMark Johnston 	struct vmmdev_softc *sc;
175*b9ef152bSMark Johnston 
176*b9ef152bSMark Johnston 	error = vmm_priv_check(curthread->td_ucred);
177*b9ef152bSMark Johnston 	if (error)
178*b9ef152bSMark Johnston 		return (error);
179*b9ef152bSMark Johnston 
180*b9ef152bSMark Johnston 	sc = vmmdev_lookup2(cdev);
181*b9ef152bSMark Johnston 	if (sc == NULL)
182*b9ef152bSMark Johnston 		return (ENXIO);
183*b9ef152bSMark Johnston 
184*b9ef152bSMark Johnston 	/*
185*b9ef152bSMark Johnston 	 * Get a read lock on the guest memory map.
186*b9ef152bSMark Johnston 	 */
187*b9ef152bSMark Johnston 	vm_slock_memsegs(sc->vm);
188*b9ef152bSMark Johnston 
189*b9ef152bSMark Johnston 	prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
190*b9ef152bSMark Johnston 	maxaddr = vmm_sysmem_maxaddr(sc->vm);
191*b9ef152bSMark Johnston 	while (uio->uio_resid > 0 && error == 0) {
192*b9ef152bSMark Johnston 		gpa = uio->uio_offset;
193*b9ef152bSMark Johnston 		off = gpa & PAGE_MASK;
194*b9ef152bSMark Johnston 		c = min(uio->uio_resid, PAGE_SIZE - off);
195*b9ef152bSMark Johnston 
196*b9ef152bSMark Johnston 		/*
197*b9ef152bSMark Johnston 		 * The VM has a hole in its physical memory map. If we want to
198*b9ef152bSMark Johnston 		 * use 'dd' to inspect memory beyond the hole we need to
199*b9ef152bSMark Johnston 		 * provide bogus data for memory that lies in the hole.
200*b9ef152bSMark Johnston 		 *
201*b9ef152bSMark Johnston 		 * Since this device does not support lseek(2), dd(1) will
202*b9ef152bSMark Johnston 		 * read(2) blocks of data to simulate the lseek(2).
203*b9ef152bSMark Johnston 		 */
204*b9ef152bSMark Johnston 		hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
205*b9ef152bSMark Johnston 		if (hpa == NULL) {
206*b9ef152bSMark Johnston 			if (uio->uio_rw == UIO_READ && gpa < maxaddr)
207*b9ef152bSMark Johnston 				error = uiomove(__DECONST(void *, zero_region),
208*b9ef152bSMark Johnston 				    c, uio);
209*b9ef152bSMark Johnston 			else
210*b9ef152bSMark Johnston 				error = EFAULT;
211*b9ef152bSMark Johnston 		} else {
212*b9ef152bSMark Johnston 			error = uiomove(hpa, c, uio);
213*b9ef152bSMark Johnston 			vm_gpa_release(cookie);
214*b9ef152bSMark Johnston 		}
215*b9ef152bSMark Johnston 	}
216*b9ef152bSMark Johnston 	vm_unlock_memsegs(sc->vm);
217*b9ef152bSMark Johnston 	return (error);
218*b9ef152bSMark Johnston }
219*b9ef152bSMark Johnston 
220*b9ef152bSMark Johnston CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
221*b9ef152bSMark Johnston 
222*b9ef152bSMark Johnston static int
223*b9ef152bSMark Johnston get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
224*b9ef152bSMark Johnston {
225*b9ef152bSMark Johnston 	struct devmem_softc *dsc;
226*b9ef152bSMark Johnston 	int error;
227*b9ef152bSMark Johnston 	bool sysmem;
228*b9ef152bSMark Johnston 
229*b9ef152bSMark Johnston 	error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
230*b9ef152bSMark Johnston 	if (error || mseg->len == 0)
231*b9ef152bSMark Johnston 		return (error);
232*b9ef152bSMark Johnston 
233*b9ef152bSMark Johnston 	if (!sysmem) {
234*b9ef152bSMark Johnston 		SLIST_FOREACH(dsc, &sc->devmem, link) {
235*b9ef152bSMark Johnston 			if (dsc->segid == mseg->segid)
236*b9ef152bSMark Johnston 				break;
237*b9ef152bSMark Johnston 		}
238*b9ef152bSMark Johnston 		KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
239*b9ef152bSMark Johnston 		    __func__, mseg->segid));
240*b9ef152bSMark Johnston 		error = copystr(dsc->name, mseg->name, len, NULL);
241*b9ef152bSMark Johnston 	} else {
242*b9ef152bSMark Johnston 		bzero(mseg->name, len);
243*b9ef152bSMark Johnston 	}
244*b9ef152bSMark Johnston 
245*b9ef152bSMark Johnston 	return (error);
246*b9ef152bSMark Johnston }
247*b9ef152bSMark Johnston 
248*b9ef152bSMark Johnston static int
249*b9ef152bSMark Johnston alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
250*b9ef152bSMark Johnston {
251*b9ef152bSMark Johnston 	char *name;
252*b9ef152bSMark Johnston 	int error;
253*b9ef152bSMark Johnston 	bool sysmem;
254*b9ef152bSMark Johnston 
255*b9ef152bSMark Johnston 	error = 0;
256*b9ef152bSMark Johnston 	name = NULL;
257*b9ef152bSMark Johnston 	sysmem = true;
258*b9ef152bSMark Johnston 
259*b9ef152bSMark Johnston 	/*
260*b9ef152bSMark Johnston 	 * The allocation is lengthened by 1 to hold a terminating NUL.  It'll
261*b9ef152bSMark Johnston 	 * by stripped off when devfs processes the full string.
262*b9ef152bSMark Johnston 	 */
263*b9ef152bSMark Johnston 	if (VM_MEMSEG_NAME(mseg)) {
264*b9ef152bSMark Johnston 		sysmem = false;
265*b9ef152bSMark Johnston 		name = malloc(len, M_VMMDEV, M_WAITOK);
266*b9ef152bSMark Johnston 		error = copystr(mseg->name, name, len, NULL);
267*b9ef152bSMark Johnston 		if (error)
268*b9ef152bSMark Johnston 			goto done;
269*b9ef152bSMark Johnston 	}
270*b9ef152bSMark Johnston 
271*b9ef152bSMark Johnston 	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
272*b9ef152bSMark Johnston 	if (error)
273*b9ef152bSMark Johnston 		goto done;
274*b9ef152bSMark Johnston 
275*b9ef152bSMark Johnston 	if (VM_MEMSEG_NAME(mseg)) {
276*b9ef152bSMark Johnston 		error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
277*b9ef152bSMark Johnston 		if (error)
278*b9ef152bSMark Johnston 			vm_free_memseg(sc->vm, mseg->segid);
279*b9ef152bSMark Johnston 		else
280*b9ef152bSMark Johnston 			name = NULL;	/* freed when 'cdev' is destroyed */
281*b9ef152bSMark Johnston 	}
282*b9ef152bSMark Johnston done:
283*b9ef152bSMark Johnston 	free(name, M_VMMDEV);
284*b9ef152bSMark Johnston 	return (error);
285*b9ef152bSMark Johnston }
286*b9ef152bSMark Johnston 
287*b9ef152bSMark Johnston static int
288*b9ef152bSMark Johnston vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
289*b9ef152bSMark Johnston     uint64_t *regval)
290*b9ef152bSMark Johnston {
291*b9ef152bSMark Johnston 	int error, i;
292*b9ef152bSMark Johnston 
293*b9ef152bSMark Johnston 	error = 0;
294*b9ef152bSMark Johnston 	for (i = 0; i < count; i++) {
295*b9ef152bSMark Johnston 		error = vm_get_register(vcpu, regnum[i], &regval[i]);
296*b9ef152bSMark Johnston 		if (error)
297*b9ef152bSMark Johnston 			break;
298*b9ef152bSMark Johnston 	}
299*b9ef152bSMark Johnston 	return (error);
300*b9ef152bSMark Johnston }
301*b9ef152bSMark Johnston 
302*b9ef152bSMark Johnston static int
303*b9ef152bSMark Johnston vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
304*b9ef152bSMark Johnston     uint64_t *regval)
305*b9ef152bSMark Johnston {
306*b9ef152bSMark Johnston 	int error, i;
307*b9ef152bSMark Johnston 
308*b9ef152bSMark Johnston 	error = 0;
309*b9ef152bSMark Johnston 	for (i = 0; i < count; i++) {
310*b9ef152bSMark Johnston 		error = vm_set_register(vcpu, regnum[i], regval[i]);
311*b9ef152bSMark Johnston 		if (error)
312*b9ef152bSMark Johnston 			break;
313*b9ef152bSMark Johnston 	}
314*b9ef152bSMark Johnston 	return (error);
315*b9ef152bSMark Johnston }
316*b9ef152bSMark Johnston 
317*b9ef152bSMark Johnston static const struct vmmdev_ioctl vmmdev_ioctls[] = {
318*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
319*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
320*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
321*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
322*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
323*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
324*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
325*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
326*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
327*b9ef152bSMark Johnston 
328*b9ef152bSMark Johnston #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
329*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_FBSD12,
330*b9ef152bSMark Johnston 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
331*b9ef152bSMark Johnston #endif
332*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
333*b9ef152bSMark Johnston 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
334*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_MMAP_MEMSEG,
335*b9ef152bSMark Johnston 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
336*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
337*b9ef152bSMark Johnston 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
338*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_REINIT,
339*b9ef152bSMark Johnston 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
340*b9ef152bSMark Johnston 
341*b9ef152bSMark Johnston #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
342*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_GET_MEMSEG_FBSD12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
343*b9ef152bSMark Johnston #endif
344*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
345*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
346*b9ef152bSMark Johnston 
347*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
348*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
349*b9ef152bSMark Johnston 
350*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_SUSPEND, 0),
351*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_GET_CPUS, 0),
352*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
353*b9ef152bSMark Johnston 	VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
354*b9ef152bSMark Johnston };
355*b9ef152bSMark Johnston 
356*b9ef152bSMark Johnston static int
357*b9ef152bSMark Johnston vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
358*b9ef152bSMark Johnston     struct thread *td)
359*b9ef152bSMark Johnston {
360*b9ef152bSMark Johnston 	struct vmmdev_softc *sc;
361*b9ef152bSMark Johnston 	struct vcpu *vcpu;
362*b9ef152bSMark Johnston 	const struct vmmdev_ioctl *ioctl;
363*b9ef152bSMark Johnston 	int error, vcpuid;
364*b9ef152bSMark Johnston 
365*b9ef152bSMark Johnston 	error = vmm_priv_check(td->td_ucred);
366*b9ef152bSMark Johnston 	if (error)
367*b9ef152bSMark Johnston 		return (error);
368*b9ef152bSMark Johnston 
369*b9ef152bSMark Johnston 	sc = vmmdev_lookup2(cdev);
370*b9ef152bSMark Johnston 	if (sc == NULL)
371*b9ef152bSMark Johnston 		return (ENXIO);
372*b9ef152bSMark Johnston 
373*b9ef152bSMark Johnston 	ioctl = NULL;
374*b9ef152bSMark Johnston 	for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
375*b9ef152bSMark Johnston 		if (vmmdev_ioctls[i].cmd == cmd) {
376*b9ef152bSMark Johnston 			ioctl = &vmmdev_ioctls[i];
377*b9ef152bSMark Johnston 			break;
378*b9ef152bSMark Johnston 		}
379*b9ef152bSMark Johnston 	}
380*b9ef152bSMark Johnston 	if (ioctl == NULL) {
381*b9ef152bSMark Johnston 		for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
382*b9ef152bSMark Johnston 			if (vmmdev_machdep_ioctls[i].cmd == cmd) {
383*b9ef152bSMark Johnston 				ioctl = &vmmdev_machdep_ioctls[i];
384*b9ef152bSMark Johnston 				break;
385*b9ef152bSMark Johnston 			}
386*b9ef152bSMark Johnston 		}
387*b9ef152bSMark Johnston 	}
388*b9ef152bSMark Johnston 	if (ioctl == NULL)
389*b9ef152bSMark Johnston 		return (ENOTTY);
390*b9ef152bSMark Johnston 
391*b9ef152bSMark Johnston 	if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
392*b9ef152bSMark Johnston 		vm_xlock_memsegs(sc->vm);
393*b9ef152bSMark Johnston 	else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
394*b9ef152bSMark Johnston 		vm_slock_memsegs(sc->vm);
395*b9ef152bSMark Johnston 
396*b9ef152bSMark Johnston 	vcpu = NULL;
397*b9ef152bSMark Johnston 	vcpuid = -1;
398*b9ef152bSMark Johnston 	if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
399*b9ef152bSMark Johnston 	    VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
400*b9ef152bSMark Johnston 		vcpuid = *(int *)data;
401*b9ef152bSMark Johnston 		if (vcpuid == -1) {
402*b9ef152bSMark Johnston 			if ((ioctl->flags &
403*b9ef152bSMark Johnston 			    VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
404*b9ef152bSMark Johnston 				error = EINVAL;
405*b9ef152bSMark Johnston 				goto lockfail;
406*b9ef152bSMark Johnston 			}
407*b9ef152bSMark Johnston 		} else {
408*b9ef152bSMark Johnston 			vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
409*b9ef152bSMark Johnston 			if (vcpu == NULL) {
410*b9ef152bSMark Johnston 				error = EINVAL;
411*b9ef152bSMark Johnston 				goto lockfail;
412*b9ef152bSMark Johnston 			}
413*b9ef152bSMark Johnston 			if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
414*b9ef152bSMark Johnston 				error = vcpu_lock_one(vcpu);
415*b9ef152bSMark Johnston 				if (error)
416*b9ef152bSMark Johnston 					goto lockfail;
417*b9ef152bSMark Johnston 			}
418*b9ef152bSMark Johnston 		}
419*b9ef152bSMark Johnston 	}
420*b9ef152bSMark Johnston 	if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
421*b9ef152bSMark Johnston 		error = vcpu_lock_all(sc);
422*b9ef152bSMark Johnston 		if (error)
423*b9ef152bSMark Johnston 			goto lockfail;
424*b9ef152bSMark Johnston 	}
425*b9ef152bSMark Johnston 
426*b9ef152bSMark Johnston 	switch (cmd) {
427*b9ef152bSMark Johnston 	case VM_SUSPEND: {
428*b9ef152bSMark Johnston 		struct vm_suspend *vmsuspend;
429*b9ef152bSMark Johnston 
430*b9ef152bSMark Johnston 		vmsuspend = (struct vm_suspend *)data;
431*b9ef152bSMark Johnston 		error = vm_suspend(sc->vm, vmsuspend->how);
432*b9ef152bSMark Johnston 		break;
433*b9ef152bSMark Johnston 	}
434*b9ef152bSMark Johnston 	case VM_REINIT:
435*b9ef152bSMark Johnston 		error = vm_reinit(sc->vm);
436*b9ef152bSMark Johnston 		break;
437*b9ef152bSMark Johnston 	case VM_STAT_DESC: {
438*b9ef152bSMark Johnston 		struct vm_stat_desc *statdesc;
439*b9ef152bSMark Johnston 
440*b9ef152bSMark Johnston 		statdesc = (struct vm_stat_desc *)data;
441*b9ef152bSMark Johnston 		error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
442*b9ef152bSMark Johnston 		    sizeof(statdesc->desc));
443*b9ef152bSMark Johnston 		break;
444*b9ef152bSMark Johnston 	}
445*b9ef152bSMark Johnston 	case VM_STATS: {
446*b9ef152bSMark Johnston 		struct vm_stats *vmstats;
447*b9ef152bSMark Johnston 
448*b9ef152bSMark Johnston 		vmstats = (struct vm_stats *)data;
449*b9ef152bSMark Johnston 		getmicrotime(&vmstats->tv);
450*b9ef152bSMark Johnston 		error = vmm_stat_copy(vcpu, vmstats->index,
451*b9ef152bSMark Johnston 		    nitems(vmstats->statbuf), &vmstats->num_entries,
452*b9ef152bSMark Johnston 		    vmstats->statbuf);
453*b9ef152bSMark Johnston 		break;
454*b9ef152bSMark Johnston 	}
455*b9ef152bSMark Johnston 	case VM_MMAP_GETNEXT: {
456*b9ef152bSMark Johnston 		struct vm_memmap *mm;
457*b9ef152bSMark Johnston 
458*b9ef152bSMark Johnston 		mm = (struct vm_memmap *)data;
459*b9ef152bSMark Johnston 		error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
460*b9ef152bSMark Johnston 		    &mm->segoff, &mm->len, &mm->prot, &mm->flags);
461*b9ef152bSMark Johnston 		break;
462*b9ef152bSMark Johnston 	}
463*b9ef152bSMark Johnston 	case VM_MMAP_MEMSEG: {
464*b9ef152bSMark Johnston 		struct vm_memmap *mm;
465*b9ef152bSMark Johnston 
466*b9ef152bSMark Johnston 		mm = (struct vm_memmap *)data;
467*b9ef152bSMark Johnston 		error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
468*b9ef152bSMark Johnston 		    mm->len, mm->prot, mm->flags);
469*b9ef152bSMark Johnston 		break;
470*b9ef152bSMark Johnston 	}
471*b9ef152bSMark Johnston 	case VM_MUNMAP_MEMSEG: {
472*b9ef152bSMark Johnston 		struct vm_munmap *mu;
473*b9ef152bSMark Johnston 
474*b9ef152bSMark Johnston 		mu = (struct vm_munmap *)data;
475*b9ef152bSMark Johnston 		error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
476*b9ef152bSMark Johnston 		break;
477*b9ef152bSMark Johnston 	}
478*b9ef152bSMark Johnston #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
479*b9ef152bSMark Johnston 	case VM_ALLOC_MEMSEG_FBSD12:
480*b9ef152bSMark Johnston 		error = alloc_memseg(sc, (struct vm_memseg *)data,
481*b9ef152bSMark Johnston 		    sizeof(((struct vm_memseg_fbsd12 *)0)->name));
482*b9ef152bSMark Johnston 		break;
483*b9ef152bSMark Johnston 	case VM_GET_MEMSEG_FBSD12:
484*b9ef152bSMark Johnston 		error = get_memseg(sc, (struct vm_memseg *)data,
485*b9ef152bSMark Johnston 		    sizeof(((struct vm_memseg_fbsd12 *)0)->name));
486*b9ef152bSMark Johnston 		break;
487*b9ef152bSMark Johnston #endif
488*b9ef152bSMark Johnston 	case VM_ALLOC_MEMSEG:
489*b9ef152bSMark Johnston 		error = alloc_memseg(sc, (struct vm_memseg *)data,
490*b9ef152bSMark Johnston 		    sizeof(((struct vm_memseg *)0)->name));
491*b9ef152bSMark Johnston 		break;
492*b9ef152bSMark Johnston 	case VM_GET_MEMSEG:
493*b9ef152bSMark Johnston 		error = get_memseg(sc, (struct vm_memseg *)data,
494*b9ef152bSMark Johnston 		    sizeof(((struct vm_memseg *)0)->name));
495*b9ef152bSMark Johnston 		break;
496*b9ef152bSMark Johnston 	case VM_GET_REGISTER: {
497*b9ef152bSMark Johnston 		struct vm_register *vmreg;
498*b9ef152bSMark Johnston 
499*b9ef152bSMark Johnston 		vmreg = (struct vm_register *)data;
500*b9ef152bSMark Johnston 		error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
501*b9ef152bSMark Johnston 		break;
502*b9ef152bSMark Johnston 	}
503*b9ef152bSMark Johnston 	case VM_SET_REGISTER: {
504*b9ef152bSMark Johnston 		struct vm_register *vmreg;
505*b9ef152bSMark Johnston 
506*b9ef152bSMark Johnston 		vmreg = (struct vm_register *)data;
507*b9ef152bSMark Johnston 		error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
508*b9ef152bSMark Johnston 		break;
509*b9ef152bSMark Johnston 	}
510*b9ef152bSMark Johnston 	case VM_GET_REGISTER_SET: {
511*b9ef152bSMark Johnston 		struct vm_register_set *vmregset;
512*b9ef152bSMark Johnston 		uint64_t *regvals;
513*b9ef152bSMark Johnston 		int *regnums;
514*b9ef152bSMark Johnston 
515*b9ef152bSMark Johnston 		vmregset = (struct vm_register_set *)data;
516*b9ef152bSMark Johnston 		if (vmregset->count > VM_REG_LAST) {
517*b9ef152bSMark Johnston 			error = EINVAL;
518*b9ef152bSMark Johnston 			break;
519*b9ef152bSMark Johnston 		}
520*b9ef152bSMark Johnston 		regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
521*b9ef152bSMark Johnston 		    M_WAITOK);
522*b9ef152bSMark Johnston 		regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
523*b9ef152bSMark Johnston 		    M_WAITOK);
524*b9ef152bSMark Johnston 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
525*b9ef152bSMark Johnston 		    vmregset->count);
526*b9ef152bSMark Johnston 		if (error == 0)
527*b9ef152bSMark Johnston 			error = vm_get_register_set(vcpu,
528*b9ef152bSMark Johnston 			    vmregset->count, regnums, regvals);
529*b9ef152bSMark Johnston 		if (error == 0)
530*b9ef152bSMark Johnston 			error = copyout(regvals, vmregset->regvals,
531*b9ef152bSMark Johnston 			    sizeof(regvals[0]) * vmregset->count);
532*b9ef152bSMark Johnston 		free(regvals, M_VMMDEV);
533*b9ef152bSMark Johnston 		free(regnums, M_VMMDEV);
534*b9ef152bSMark Johnston 		break;
535*b9ef152bSMark Johnston 	}
536*b9ef152bSMark Johnston 	case VM_SET_REGISTER_SET: {
537*b9ef152bSMark Johnston 		struct vm_register_set *vmregset;
538*b9ef152bSMark Johnston 		uint64_t *regvals;
539*b9ef152bSMark Johnston 		int *regnums;
540*b9ef152bSMark Johnston 
541*b9ef152bSMark Johnston 		vmregset = (struct vm_register_set *)data;
542*b9ef152bSMark Johnston 		if (vmregset->count > VM_REG_LAST) {
543*b9ef152bSMark Johnston 			error = EINVAL;
544*b9ef152bSMark Johnston 			break;
545*b9ef152bSMark Johnston 		}
546*b9ef152bSMark Johnston 		regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
547*b9ef152bSMark Johnston 		    M_WAITOK);
548*b9ef152bSMark Johnston 		regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
549*b9ef152bSMark Johnston 		    M_WAITOK);
550*b9ef152bSMark Johnston 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
551*b9ef152bSMark Johnston 		    vmregset->count);
552*b9ef152bSMark Johnston 		if (error == 0)
553*b9ef152bSMark Johnston 			error = copyin(vmregset->regvals, regvals,
554*b9ef152bSMark Johnston 			    sizeof(regvals[0]) * vmregset->count);
555*b9ef152bSMark Johnston 		if (error == 0)
556*b9ef152bSMark Johnston 			error = vm_set_register_set(vcpu,
557*b9ef152bSMark Johnston 			    vmregset->count, regnums, regvals);
558*b9ef152bSMark Johnston 		free(regvals, M_VMMDEV);
559*b9ef152bSMark Johnston 		free(regnums, M_VMMDEV);
560*b9ef152bSMark Johnston 		break;
561*b9ef152bSMark Johnston 	}
562*b9ef152bSMark Johnston 	case VM_GET_CAPABILITY: {
563*b9ef152bSMark Johnston 		struct vm_capability *vmcap;
564*b9ef152bSMark Johnston 
565*b9ef152bSMark Johnston 		vmcap = (struct vm_capability *)data;
566*b9ef152bSMark Johnston 		error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
567*b9ef152bSMark Johnston 		break;
568*b9ef152bSMark Johnston 	}
569*b9ef152bSMark Johnston 	case VM_SET_CAPABILITY: {
570*b9ef152bSMark Johnston 		struct vm_capability *vmcap;
571*b9ef152bSMark Johnston 
572*b9ef152bSMark Johnston 		vmcap = (struct vm_capability *)data;
573*b9ef152bSMark Johnston 		error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
574*b9ef152bSMark Johnston 		break;
575*b9ef152bSMark Johnston 	}
576*b9ef152bSMark Johnston 	case VM_ACTIVATE_CPU:
577*b9ef152bSMark Johnston 		error = vm_activate_cpu(vcpu);
578*b9ef152bSMark Johnston 		break;
579*b9ef152bSMark Johnston 	case VM_GET_CPUS: {
580*b9ef152bSMark Johnston 		struct vm_cpuset *vm_cpuset;
581*b9ef152bSMark Johnston 		cpuset_t *cpuset;
582*b9ef152bSMark Johnston 		int size;
583*b9ef152bSMark Johnston 
584*b9ef152bSMark Johnston 		error = 0;
585*b9ef152bSMark Johnston 		vm_cpuset = (struct vm_cpuset *)data;
586*b9ef152bSMark Johnston 		size = vm_cpuset->cpusetsize;
587*b9ef152bSMark Johnston 		if (size < 1 || size > CPU_MAXSIZE / NBBY) {
588*b9ef152bSMark Johnston 			error = ERANGE;
589*b9ef152bSMark Johnston 			break;
590*b9ef152bSMark Johnston 		}
591*b9ef152bSMark Johnston 		cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
592*b9ef152bSMark Johnston 		    M_WAITOK | M_ZERO);
593*b9ef152bSMark Johnston 		if (vm_cpuset->which == VM_ACTIVE_CPUS)
594*b9ef152bSMark Johnston 			*cpuset = vm_active_cpus(sc->vm);
595*b9ef152bSMark Johnston 		else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
596*b9ef152bSMark Johnston 			*cpuset = vm_suspended_cpus(sc->vm);
597*b9ef152bSMark Johnston 		else if (vm_cpuset->which == VM_DEBUG_CPUS)
598*b9ef152bSMark Johnston 			*cpuset = vm_debug_cpus(sc->vm);
599*b9ef152bSMark Johnston 		else
600*b9ef152bSMark Johnston 			error = EINVAL;
601*b9ef152bSMark Johnston 		if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
602*b9ef152bSMark Johnston 			error = ERANGE;
603*b9ef152bSMark Johnston 		if (error == 0)
604*b9ef152bSMark Johnston 			error = copyout(cpuset, vm_cpuset->cpus, size);
605*b9ef152bSMark Johnston 		free(cpuset, M_TEMP);
606*b9ef152bSMark Johnston 		break;
607*b9ef152bSMark Johnston 	}
608*b9ef152bSMark Johnston 	case VM_SUSPEND_CPU:
609*b9ef152bSMark Johnston 		error = vm_suspend_cpu(sc->vm, vcpu);
610*b9ef152bSMark Johnston 		break;
611*b9ef152bSMark Johnston 	case VM_RESUME_CPU:
612*b9ef152bSMark Johnston 		error = vm_resume_cpu(sc->vm, vcpu);
613*b9ef152bSMark Johnston 		break;
614*b9ef152bSMark Johnston 	case VM_SET_TOPOLOGY: {
615*b9ef152bSMark Johnston 		struct vm_cpu_topology *topology;
616*b9ef152bSMark Johnston 
617*b9ef152bSMark Johnston 		topology = (struct vm_cpu_topology *)data;
618*b9ef152bSMark Johnston 		error = vm_set_topology(sc->vm, topology->sockets,
619*b9ef152bSMark Johnston 		    topology->cores, topology->threads, topology->maxcpus);
620*b9ef152bSMark Johnston 		break;
621*b9ef152bSMark Johnston 	}
622*b9ef152bSMark Johnston 	case VM_GET_TOPOLOGY: {
623*b9ef152bSMark Johnston 		struct vm_cpu_topology *topology;
624*b9ef152bSMark Johnston 
625*b9ef152bSMark Johnston 		topology = (struct vm_cpu_topology *)data;
626*b9ef152bSMark Johnston 		vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
627*b9ef152bSMark Johnston 		    &topology->threads, &topology->maxcpus);
628*b9ef152bSMark Johnston 		error = 0;
629*b9ef152bSMark Johnston 		break;
630*b9ef152bSMark Johnston 	}
631*b9ef152bSMark Johnston 	default:
632*b9ef152bSMark Johnston 		error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
633*b9ef152bSMark Johnston 		    td);
634*b9ef152bSMark Johnston 		break;
635*b9ef152bSMark Johnston 	}
636*b9ef152bSMark Johnston 
637*b9ef152bSMark Johnston 	if ((ioctl->flags &
638*b9ef152bSMark Johnston 	    (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
639*b9ef152bSMark Johnston 		vm_unlock_memsegs(sc->vm);
640*b9ef152bSMark Johnston 	if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
641*b9ef152bSMark Johnston 		vcpu_unlock_all(sc);
642*b9ef152bSMark Johnston 	else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
643*b9ef152bSMark Johnston 		vcpu_unlock_one(vcpu);
644*b9ef152bSMark Johnston 
645*b9ef152bSMark Johnston 	/*
646*b9ef152bSMark Johnston 	 * Make sure that no handler returns a kernel-internal
647*b9ef152bSMark Johnston 	 * error value to userspace.
648*b9ef152bSMark Johnston 	 */
649*b9ef152bSMark Johnston 	KASSERT(error == ERESTART || error >= 0,
650*b9ef152bSMark Johnston 	    ("vmmdev_ioctl: invalid error return %d", error));
651*b9ef152bSMark Johnston 	return (error);
652*b9ef152bSMark Johnston 
653*b9ef152bSMark Johnston lockfail:
654*b9ef152bSMark Johnston 	if ((ioctl->flags &
655*b9ef152bSMark Johnston 	    (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
656*b9ef152bSMark Johnston 		vm_unlock_memsegs(sc->vm);
657*b9ef152bSMark Johnston 	return (error);
658*b9ef152bSMark Johnston }
659*b9ef152bSMark Johnston 
660*b9ef152bSMark Johnston static int
661*b9ef152bSMark Johnston vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
662*b9ef152bSMark Johnston     struct vm_object **objp, int nprot)
663*b9ef152bSMark Johnston {
664*b9ef152bSMark Johnston 	struct vmmdev_softc *sc;
665*b9ef152bSMark Johnston 	vm_paddr_t gpa;
666*b9ef152bSMark Johnston 	size_t len;
667*b9ef152bSMark Johnston 	vm_ooffset_t segoff, first, last;
668*b9ef152bSMark Johnston 	int error, found, segid;
669*b9ef152bSMark Johnston 	bool sysmem;
670*b9ef152bSMark Johnston 
671*b9ef152bSMark Johnston 	error = vmm_priv_check(curthread->td_ucred);
672*b9ef152bSMark Johnston 	if (error)
673*b9ef152bSMark Johnston 		return (error);
674*b9ef152bSMark Johnston 
675*b9ef152bSMark Johnston 	first = *offset;
676*b9ef152bSMark Johnston 	last = first + mapsize;
677*b9ef152bSMark Johnston 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
678*b9ef152bSMark Johnston 		return (EINVAL);
679*b9ef152bSMark Johnston 
680*b9ef152bSMark Johnston 	sc = vmmdev_lookup2(cdev);
681*b9ef152bSMark Johnston 	if (sc == NULL) {
682*b9ef152bSMark Johnston 		/* virtual machine is in the process of being created */
683*b9ef152bSMark Johnston 		return (EINVAL);
684*b9ef152bSMark Johnston 	}
685*b9ef152bSMark Johnston 
686*b9ef152bSMark Johnston 	/*
687*b9ef152bSMark Johnston 	 * Get a read lock on the guest memory map.
688*b9ef152bSMark Johnston 	 */
689*b9ef152bSMark Johnston 	vm_slock_memsegs(sc->vm);
690*b9ef152bSMark Johnston 
691*b9ef152bSMark Johnston 	gpa = 0;
692*b9ef152bSMark Johnston 	found = 0;
693*b9ef152bSMark Johnston 	while (!found) {
694*b9ef152bSMark Johnston 		error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
695*b9ef152bSMark Johnston 		    NULL, NULL);
696*b9ef152bSMark Johnston 		if (error)
697*b9ef152bSMark Johnston 			break;
698*b9ef152bSMark Johnston 
699*b9ef152bSMark Johnston 		if (first >= gpa && last <= gpa + len)
700*b9ef152bSMark Johnston 			found = 1;
701*b9ef152bSMark Johnston 		else
702*b9ef152bSMark Johnston 			gpa += len;
703*b9ef152bSMark Johnston 	}
704*b9ef152bSMark Johnston 
705*b9ef152bSMark Johnston 	if (found) {
706*b9ef152bSMark Johnston 		error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
707*b9ef152bSMark Johnston 		KASSERT(error == 0 && *objp != NULL,
708*b9ef152bSMark Johnston 		    ("%s: invalid memory segment %d", __func__, segid));
709*b9ef152bSMark Johnston 		if (sysmem) {
710*b9ef152bSMark Johnston 			vm_object_reference(*objp);
711*b9ef152bSMark Johnston 			*offset = segoff + (first - gpa);
712*b9ef152bSMark Johnston 		} else {
713*b9ef152bSMark Johnston 			error = EINVAL;
714*b9ef152bSMark Johnston 		}
715*b9ef152bSMark Johnston 	}
716*b9ef152bSMark Johnston 	vm_unlock_memsegs(sc->vm);
717*b9ef152bSMark Johnston 	return (error);
718*b9ef152bSMark Johnston }
719*b9ef152bSMark Johnston 
720*b9ef152bSMark Johnston static void
721*b9ef152bSMark Johnston vmmdev_destroy(void *arg)
722*b9ef152bSMark Johnston {
723*b9ef152bSMark Johnston 	struct vmmdev_softc *sc = arg;
724*b9ef152bSMark Johnston 	struct devmem_softc *dsc;
725*b9ef152bSMark Johnston 	int error __diagused;
726*b9ef152bSMark Johnston 
727*b9ef152bSMark Johnston 	vm_disable_vcpu_creation(sc->vm);
728*b9ef152bSMark Johnston 	error = vcpu_lock_all(sc);
729*b9ef152bSMark Johnston 	KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
730*b9ef152bSMark Johnston 	vm_unlock_vcpus(sc->vm);
731*b9ef152bSMark Johnston 
732*b9ef152bSMark Johnston 	while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
733*b9ef152bSMark Johnston 		KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
734*b9ef152bSMark Johnston 		SLIST_REMOVE_HEAD(&sc->devmem, link);
735*b9ef152bSMark Johnston 		free(dsc->name, M_VMMDEV);
736*b9ef152bSMark Johnston 		free(dsc, M_VMMDEV);
737*b9ef152bSMark Johnston 	}
738*b9ef152bSMark Johnston 
739*b9ef152bSMark Johnston 	if (sc->cdev != NULL)
740*b9ef152bSMark Johnston 		destroy_dev(sc->cdev);
741*b9ef152bSMark Johnston 
742*b9ef152bSMark Johnston 	if (sc->vm != NULL)
743*b9ef152bSMark Johnston 		vm_destroy(sc->vm);
744*b9ef152bSMark Johnston 
745*b9ef152bSMark Johnston 	if (sc->ucred != NULL)
746*b9ef152bSMark Johnston 		crfree(sc->ucred);
747*b9ef152bSMark Johnston 
748*b9ef152bSMark Johnston 	if ((sc->flags & VSC_LINKED) != 0) {
749*b9ef152bSMark Johnston 		mtx_lock(&vmmdev_mtx);
750*b9ef152bSMark Johnston 		SLIST_REMOVE(&head, sc, vmmdev_softc, link);
751*b9ef152bSMark Johnston 		mtx_unlock(&vmmdev_mtx);
752*b9ef152bSMark Johnston 	}
753*b9ef152bSMark Johnston 
754*b9ef152bSMark Johnston 	free(sc, M_VMMDEV);
755*b9ef152bSMark Johnston }
756*b9ef152bSMark Johnston 
757*b9ef152bSMark Johnston static int
758*b9ef152bSMark Johnston sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
759*b9ef152bSMark Johnston {
760*b9ef152bSMark Johnston 	struct devmem_softc *dsc;
761*b9ef152bSMark Johnston 	struct vmmdev_softc *sc;
762*b9ef152bSMark Johnston 	struct cdev *cdev;
763*b9ef152bSMark Johnston 	char *buf;
764*b9ef152bSMark Johnston 	int error, buflen;
765*b9ef152bSMark Johnston 
766*b9ef152bSMark Johnston 	error = vmm_priv_check(req->td->td_ucred);
767*b9ef152bSMark Johnston 	if (error)
768*b9ef152bSMark Johnston 		return (error);
769*b9ef152bSMark Johnston 
770*b9ef152bSMark Johnston 	buflen = VM_MAX_NAMELEN + 1;
771*b9ef152bSMark Johnston 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
772*b9ef152bSMark Johnston 	strlcpy(buf, "beavis", buflen);
773*b9ef152bSMark Johnston 	error = sysctl_handle_string(oidp, buf, buflen, req);
774*b9ef152bSMark Johnston 	if (error != 0 || req->newptr == NULL)
775*b9ef152bSMark Johnston 		goto out;
776*b9ef152bSMark Johnston 
777*b9ef152bSMark Johnston 	mtx_lock(&vmmdev_mtx);
778*b9ef152bSMark Johnston 	sc = vmmdev_lookup(buf);
779*b9ef152bSMark Johnston 	if (sc == NULL || sc->cdev == NULL) {
780*b9ef152bSMark Johnston 		mtx_unlock(&vmmdev_mtx);
781*b9ef152bSMark Johnston 		error = EINVAL;
782*b9ef152bSMark Johnston 		goto out;
783*b9ef152bSMark Johnston 	}
784*b9ef152bSMark Johnston 
785*b9ef152bSMark Johnston 	/*
786*b9ef152bSMark Johnston 	 * Setting 'sc->cdev' to NULL is used to indicate that the VM
787*b9ef152bSMark Johnston 	 * is scheduled for destruction.
788*b9ef152bSMark Johnston 	 */
789*b9ef152bSMark Johnston 	cdev = sc->cdev;
790*b9ef152bSMark Johnston 	sc->cdev = NULL;
791*b9ef152bSMark Johnston 	mtx_unlock(&vmmdev_mtx);
792*b9ef152bSMark Johnston 
793*b9ef152bSMark Johnston 	/*
794*b9ef152bSMark Johnston 	 * Destroy all cdevs:
795*b9ef152bSMark Johnston 	 *
796*b9ef152bSMark Johnston 	 * - any new operations on the 'cdev' will return an error (ENXIO).
797*b9ef152bSMark Johnston 	 *
798*b9ef152bSMark Johnston 	 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
799*b9ef152bSMark Johnston 	 */
800*b9ef152bSMark Johnston 	SLIST_FOREACH(dsc, &sc->devmem, link) {
801*b9ef152bSMark Johnston 		KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
802*b9ef152bSMark Johnston 		destroy_dev(dsc->cdev);
803*b9ef152bSMark Johnston 		devmem_destroy(dsc);
804*b9ef152bSMark Johnston 	}
805*b9ef152bSMark Johnston 	destroy_dev(cdev);
806*b9ef152bSMark Johnston 	vmmdev_destroy(sc);
807*b9ef152bSMark Johnston 	error = 0;
808*b9ef152bSMark Johnston 
809*b9ef152bSMark Johnston out:
810*b9ef152bSMark Johnston 	free(buf, M_VMMDEV);
811*b9ef152bSMark Johnston 	return (error);
812*b9ef152bSMark Johnston }
813*b9ef152bSMark Johnston SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
814*b9ef152bSMark Johnston     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
815*b9ef152bSMark Johnston     NULL, 0, sysctl_vmm_destroy, "A",
816*b9ef152bSMark Johnston     NULL);
817*b9ef152bSMark Johnston 
818*b9ef152bSMark Johnston static struct cdevsw vmmdevsw = {
819*b9ef152bSMark Johnston 	.d_name		= "vmmdev",
820*b9ef152bSMark Johnston 	.d_version	= D_VERSION,
821*b9ef152bSMark Johnston 	.d_ioctl	= vmmdev_ioctl,
822*b9ef152bSMark Johnston 	.d_mmap_single	= vmmdev_mmap_single,
823*b9ef152bSMark Johnston 	.d_read		= vmmdev_rw,
824*b9ef152bSMark Johnston 	.d_write	= vmmdev_rw,
825*b9ef152bSMark Johnston };
826*b9ef152bSMark Johnston 
827*b9ef152bSMark Johnston static int
828*b9ef152bSMark Johnston sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
829*b9ef152bSMark Johnston {
830*b9ef152bSMark Johnston 	struct vm *vm;
831*b9ef152bSMark Johnston 	struct cdev *cdev;
832*b9ef152bSMark Johnston 	struct vmmdev_softc *sc, *sc2;
833*b9ef152bSMark Johnston 	char *buf;
834*b9ef152bSMark Johnston 	int error, buflen;
835*b9ef152bSMark Johnston 
836*b9ef152bSMark Johnston 	error = vmm_priv_check(req->td->td_ucred);
837*b9ef152bSMark Johnston 	if (error)
838*b9ef152bSMark Johnston 		return (error);
839*b9ef152bSMark Johnston 
840*b9ef152bSMark Johnston 	buflen = VM_MAX_NAMELEN + 1;
841*b9ef152bSMark Johnston 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
842*b9ef152bSMark Johnston 	strlcpy(buf, "beavis", buflen);
843*b9ef152bSMark Johnston 	error = sysctl_handle_string(oidp, buf, buflen, req);
844*b9ef152bSMark Johnston 	if (error != 0 || req->newptr == NULL)
845*b9ef152bSMark Johnston 		goto out;
846*b9ef152bSMark Johnston 
847*b9ef152bSMark Johnston 	mtx_lock(&vmmdev_mtx);
848*b9ef152bSMark Johnston 	sc = vmmdev_lookup(buf);
849*b9ef152bSMark Johnston 	mtx_unlock(&vmmdev_mtx);
850*b9ef152bSMark Johnston 	if (sc != NULL) {
851*b9ef152bSMark Johnston 		error = EEXIST;
852*b9ef152bSMark Johnston 		goto out;
853*b9ef152bSMark Johnston 	}
854*b9ef152bSMark Johnston 
855*b9ef152bSMark Johnston 	error = vm_create(buf, &vm);
856*b9ef152bSMark Johnston 	if (error != 0)
857*b9ef152bSMark Johnston 		goto out;
858*b9ef152bSMark Johnston 
859*b9ef152bSMark Johnston 	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
860*b9ef152bSMark Johnston 	sc->ucred = crhold(curthread->td_ucred);
861*b9ef152bSMark Johnston 	sc->vm = vm;
862*b9ef152bSMark Johnston 	SLIST_INIT(&sc->devmem);
863*b9ef152bSMark Johnston 
864*b9ef152bSMark Johnston 	/*
865*b9ef152bSMark Johnston 	 * Lookup the name again just in case somebody sneaked in when we
866*b9ef152bSMark Johnston 	 * dropped the lock.
867*b9ef152bSMark Johnston 	 */
868*b9ef152bSMark Johnston 	mtx_lock(&vmmdev_mtx);
869*b9ef152bSMark Johnston 	sc2 = vmmdev_lookup(buf);
870*b9ef152bSMark Johnston 	if (sc2 == NULL) {
871*b9ef152bSMark Johnston 		SLIST_INSERT_HEAD(&head, sc, link);
872*b9ef152bSMark Johnston 		sc->flags |= VSC_LINKED;
873*b9ef152bSMark Johnston 	}
874*b9ef152bSMark Johnston 	mtx_unlock(&vmmdev_mtx);
875*b9ef152bSMark Johnston 
876*b9ef152bSMark Johnston 	if (sc2 != NULL) {
877*b9ef152bSMark Johnston 		vmmdev_destroy(sc);
878*b9ef152bSMark Johnston 		error = EEXIST;
879*b9ef152bSMark Johnston 		goto out;
880*b9ef152bSMark Johnston 	}
881*b9ef152bSMark Johnston 
882*b9ef152bSMark Johnston 	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred,
883*b9ef152bSMark Johnston 	    UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
884*b9ef152bSMark Johnston 	if (error != 0) {
885*b9ef152bSMark Johnston 		vmmdev_destroy(sc);
886*b9ef152bSMark Johnston 		goto out;
887*b9ef152bSMark Johnston 	}
888*b9ef152bSMark Johnston 
889*b9ef152bSMark Johnston 	mtx_lock(&vmmdev_mtx);
890*b9ef152bSMark Johnston 	sc->cdev = cdev;
891*b9ef152bSMark Johnston 	sc->cdev->si_drv1 = sc;
892*b9ef152bSMark Johnston 	mtx_unlock(&vmmdev_mtx);
893*b9ef152bSMark Johnston 
894*b9ef152bSMark Johnston out:
895*b9ef152bSMark Johnston 	free(buf, M_VMMDEV);
896*b9ef152bSMark Johnston 	return (error);
897*b9ef152bSMark Johnston }
898*b9ef152bSMark Johnston SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
899*b9ef152bSMark Johnston     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
900*b9ef152bSMark Johnston     NULL, 0, sysctl_vmm_create, "A",
901*b9ef152bSMark Johnston     NULL);
902*b9ef152bSMark Johnston 
903*b9ef152bSMark Johnston void
904*b9ef152bSMark Johnston vmmdev_init(void)
905*b9ef152bSMark Johnston {
906*b9ef152bSMark Johnston 	pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
907*b9ef152bSMark Johnston 	    "Allow use of vmm in a jail.");
908*b9ef152bSMark Johnston }
909*b9ef152bSMark Johnston 
910*b9ef152bSMark Johnston int
911*b9ef152bSMark Johnston vmmdev_cleanup(void)
912*b9ef152bSMark Johnston {
913*b9ef152bSMark Johnston 	int error;
914*b9ef152bSMark Johnston 
915*b9ef152bSMark Johnston 	if (SLIST_EMPTY(&head))
916*b9ef152bSMark Johnston 		error = 0;
917*b9ef152bSMark Johnston 	else
918*b9ef152bSMark Johnston 		error = EBUSY;
919*b9ef152bSMark Johnston 
920*b9ef152bSMark Johnston 	return (error);
921*b9ef152bSMark Johnston }
922*b9ef152bSMark Johnston 
923*b9ef152bSMark Johnston static int
924*b9ef152bSMark Johnston devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
925*b9ef152bSMark Johnston     struct vm_object **objp, int nprot)
926*b9ef152bSMark Johnston {
927*b9ef152bSMark Johnston 	struct devmem_softc *dsc;
928*b9ef152bSMark Johnston 	vm_ooffset_t first, last;
929*b9ef152bSMark Johnston 	size_t seglen;
930*b9ef152bSMark Johnston 	int error;
931*b9ef152bSMark Johnston 	bool sysmem;
932*b9ef152bSMark Johnston 
933*b9ef152bSMark Johnston 	dsc = cdev->si_drv1;
934*b9ef152bSMark Johnston 	if (dsc == NULL) {
935*b9ef152bSMark Johnston 		/* 'cdev' has been created but is not ready for use */
936*b9ef152bSMark Johnston 		return (ENXIO);
937*b9ef152bSMark Johnston 	}
938*b9ef152bSMark Johnston 
939*b9ef152bSMark Johnston 	first = *offset;
940*b9ef152bSMark Johnston 	last = *offset + len;
941*b9ef152bSMark Johnston 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
942*b9ef152bSMark Johnston 		return (EINVAL);
943*b9ef152bSMark Johnston 
944*b9ef152bSMark Johnston 	vm_slock_memsegs(dsc->sc->vm);
945*b9ef152bSMark Johnston 
946*b9ef152bSMark Johnston 	error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
947*b9ef152bSMark Johnston 	KASSERT(error == 0 && !sysmem && *objp != NULL,
948*b9ef152bSMark Johnston 	    ("%s: invalid devmem segment %d", __func__, dsc->segid));
949*b9ef152bSMark Johnston 
950*b9ef152bSMark Johnston 	if (seglen >= last)
951*b9ef152bSMark Johnston 		vm_object_reference(*objp);
952*b9ef152bSMark Johnston 	else
953*b9ef152bSMark Johnston 		error = EINVAL;
954*b9ef152bSMark Johnston 
955*b9ef152bSMark Johnston 	vm_unlock_memsegs(dsc->sc->vm);
956*b9ef152bSMark Johnston 	return (error);
957*b9ef152bSMark Johnston }
958*b9ef152bSMark Johnston 
959*b9ef152bSMark Johnston static struct cdevsw devmemsw = {
960*b9ef152bSMark Johnston 	.d_name		= "devmem",
961*b9ef152bSMark Johnston 	.d_version	= D_VERSION,
962*b9ef152bSMark Johnston 	.d_mmap_single	= devmem_mmap_single,
963*b9ef152bSMark Johnston };
964*b9ef152bSMark Johnston 
965*b9ef152bSMark Johnston static int
966*b9ef152bSMark Johnston devmem_create_cdev(const char *vmname, int segid, char *devname)
967*b9ef152bSMark Johnston {
968*b9ef152bSMark Johnston 	struct devmem_softc *dsc;
969*b9ef152bSMark Johnston 	struct vmmdev_softc *sc;
970*b9ef152bSMark Johnston 	struct cdev *cdev;
971*b9ef152bSMark Johnston 	int error;
972*b9ef152bSMark Johnston 
973*b9ef152bSMark Johnston 	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
974*b9ef152bSMark Johnston 	    UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
975*b9ef152bSMark Johnston 	if (error)
976*b9ef152bSMark Johnston 		return (error);
977*b9ef152bSMark Johnston 
978*b9ef152bSMark Johnston 	dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
979*b9ef152bSMark Johnston 
980*b9ef152bSMark Johnston 	mtx_lock(&vmmdev_mtx);
981*b9ef152bSMark Johnston 	sc = vmmdev_lookup(vmname);
982*b9ef152bSMark Johnston 	KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
983*b9ef152bSMark Johnston 	if (sc->cdev == NULL) {
984*b9ef152bSMark Johnston 		/* virtual machine is being created or destroyed */
985*b9ef152bSMark Johnston 		mtx_unlock(&vmmdev_mtx);
986*b9ef152bSMark Johnston 		free(dsc, M_VMMDEV);
987*b9ef152bSMark Johnston 		destroy_dev_sched_cb(cdev, NULL, 0);
988*b9ef152bSMark Johnston 		return (ENODEV);
989*b9ef152bSMark Johnston 	}
990*b9ef152bSMark Johnston 
991*b9ef152bSMark Johnston 	dsc->segid = segid;
992*b9ef152bSMark Johnston 	dsc->name = devname;
993*b9ef152bSMark Johnston 	dsc->cdev = cdev;
994*b9ef152bSMark Johnston 	dsc->sc = sc;
995*b9ef152bSMark Johnston 	SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
996*b9ef152bSMark Johnston 	mtx_unlock(&vmmdev_mtx);
997*b9ef152bSMark Johnston 
998*b9ef152bSMark Johnston 	/* The 'cdev' is ready for use after 'si_drv1' is initialized */
999*b9ef152bSMark Johnston 	cdev->si_drv1 = dsc;
1000*b9ef152bSMark Johnston 	return (0);
1001*b9ef152bSMark Johnston }
1002*b9ef152bSMark Johnston 
1003*b9ef152bSMark Johnston static void
1004*b9ef152bSMark Johnston devmem_destroy(void *arg)
1005*b9ef152bSMark Johnston {
1006*b9ef152bSMark Johnston 	struct devmem_softc *dsc = arg;
1007*b9ef152bSMark Johnston 
1008*b9ef152bSMark Johnston 	KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
1009*b9ef152bSMark Johnston 	dsc->cdev = NULL;
1010*b9ef152bSMark Johnston 	dsc->sc = NULL;
1011*b9ef152bSMark Johnston }
1012