xref: /freebsd/sys/dev/vmm/vmm_dev.c (revision 99afbc5cc7ae8ba7b112fbafbf24ea2575a65ba4)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6  * All rights reserved.
7  */
8 
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #include <sys/fcntl.h>
12 #include <sys/ioccom.h>
13 #include <sys/jail.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/mman.h>
17 #include <sys/module.h>
18 #include <sys/priv.h>
19 #include <sys/proc.h>
20 #include <sys/queue.h>
21 #include <sys/resourcevar.h>
22 #include <sys/smp.h>
23 #include <sys/sx.h>
24 #include <sys/sysctl.h>
25 #include <sys/ucred.h>
26 #include <sys/uio.h>
27 
28 #include <machine/vmm.h>
29 
30 #include <vm/vm.h>
31 #include <vm/vm_object.h>
32 
33 #include <dev/vmm/vmm_dev.h>
34 #include <dev/vmm/vmm_mem.h>
35 #include <dev/vmm/vmm_stat.h>
36 #include <dev/vmm/vmm_vm.h>
37 
38 #ifdef __amd64__
39 #ifdef COMPAT_FREEBSD12
40 struct vm_memseg_12 {
41 	int		segid;
42 	size_t		len;
43 	char		name[64];
44 };
45 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
46 
47 #define	VM_ALLOC_MEMSEG_12	\
48 	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
49 #define	VM_GET_MEMSEG_12	\
50 	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
51 #endif /* COMPAT_FREEBSD12 */
52 #ifdef COMPAT_FREEBSD14
53 struct vm_memseg_14 {
54 	int		segid;
55 	size_t		len;
56 	char		name[VM_MAX_SUFFIXLEN + 1];
57 };
58 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
59     "COMPAT_FREEBSD14 ABI");
60 
61 #define	VM_ALLOC_MEMSEG_14	\
62 	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
63 #define	VM_GET_MEMSEG_14	\
64 	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
65 #endif /* COMPAT_FREEBSD14 */
66 #endif /* __amd64__ */
67 
68 struct devmem_softc {
69 	int	segid;
70 	char	*name;
71 	struct cdev *cdev;
72 	struct vmmdev_softc *sc;
73 	SLIST_ENTRY(devmem_softc) link;
74 };
75 
76 struct vmmdev_softc {
77 	struct vm	*vm;		/* vm instance cookie */
78 	struct cdev	*cdev;
79 	struct ucred	*ucred;
80 	SLIST_ENTRY(vmmdev_softc) link;
81 	LIST_ENTRY(vmmdev_softc) priv_link;
82 	SLIST_HEAD(, devmem_softc) devmem;
83 	int		flags;
84 };
85 
86 struct vmmctl_priv {
87 	LIST_HEAD(, vmmdev_softc) softcs;
88 };
89 
90 static bool vmm_initialized = false;
91 
92 static SLIST_HEAD(, vmmdev_softc) head;
93 
94 static unsigned pr_allow_flag;
95 static struct sx vmmdev_mtx;
96 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
97 
98 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
99 
100 SYSCTL_DECL(_hw_vmm);
101 
102 u_int vm_maxcpu;
103 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
104     &vm_maxcpu, 0, "Maximum number of vCPUs");
105 
106 u_int vm_maxvmms;
107 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN,
108     &vm_maxvmms, 0, "Maximum number of VMM instances per user");
109 
110 static void devmem_destroy(void *arg);
111 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
112 static void vmmdev_destroy(struct vmmdev_softc *sc);
113 
114 static int
vmm_priv_check(struct ucred * ucred)115 vmm_priv_check(struct ucred *ucred)
116 {
117 	if (jailed(ucred) &&
118 	    !(ucred->cr_prison->pr_allow & pr_allow_flag))
119 		return (EPERM);
120 
121 	return (0);
122 }
123 
124 static int
vcpu_lock_one(struct vcpu * vcpu)125 vcpu_lock_one(struct vcpu *vcpu)
126 {
127 	return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
128 }
129 
130 static void
vcpu_unlock_one(struct vcpu * vcpu)131 vcpu_unlock_one(struct vcpu *vcpu)
132 {
133 	enum vcpu_state state;
134 
135 	state = vcpu_get_state(vcpu, NULL);
136 	if (state != VCPU_FROZEN) {
137 		panic("vcpu %s(%d) has invalid state %d",
138 		    vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
139 	}
140 
141 	vcpu_set_state(vcpu, VCPU_IDLE, false);
142 }
143 
144 static int
vcpu_lock_all(struct vmmdev_softc * sc)145 vcpu_lock_all(struct vmmdev_softc *sc)
146 {
147 	int error;
148 
149 	/*
150 	 * Serialize vcpu_lock_all() callers.  Individual vCPUs are not locked
151 	 * in a consistent order so we need to serialize to avoid deadlocks.
152 	 */
153 	vm_lock_vcpus(sc->vm);
154 	error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
155 	if (error != 0)
156 		vm_unlock_vcpus(sc->vm);
157 	return (error);
158 }
159 
160 static void
vcpu_unlock_all(struct vmmdev_softc * sc)161 vcpu_unlock_all(struct vmmdev_softc *sc)
162 {
163 	struct vcpu *vcpu;
164 	uint16_t i, maxcpus;
165 
166 	maxcpus = vm_get_maxcpus(sc->vm);
167 	for (i = 0; i < maxcpus; i++) {
168 		vcpu = vm_vcpu(sc->vm, i);
169 		if (vcpu == NULL)
170 			continue;
171 		vcpu_unlock_one(vcpu);
172 	}
173 	vm_unlock_vcpus(sc->vm);
174 }
175 
176 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)177 vmmdev_lookup(const char *name, struct ucred *cred)
178 {
179 	struct vmmdev_softc *sc;
180 
181 	sx_assert(&vmmdev_mtx, SA_XLOCKED);
182 
183 	SLIST_FOREACH(sc, &head, link) {
184 		if (strcmp(name, vm_name(sc->vm)) == 0)
185 			break;
186 	}
187 
188 	if (sc == NULL)
189 		return (NULL);
190 
191 	if (cr_cansee(cred, sc->ucred))
192 		return (NULL);
193 
194 	return (sc);
195 }
196 
197 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)198 vmmdev_lookup2(struct cdev *cdev)
199 {
200 	return (cdev->si_drv1);
201 }
202 
203 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)204 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
205 {
206 	int error, off, c, prot;
207 	vm_paddr_t gpa, maxaddr;
208 	void *hpa, *cookie;
209 	struct vmmdev_softc *sc;
210 
211 	sc = vmmdev_lookup2(cdev);
212 	if (sc == NULL)
213 		return (ENXIO);
214 
215 	/*
216 	 * Get a read lock on the guest memory map.
217 	 */
218 	vm_slock_memsegs(sc->vm);
219 
220 	error = 0;
221 	prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
222 	maxaddr = vmm_sysmem_maxaddr(sc->vm);
223 	while (uio->uio_resid > 0 && error == 0) {
224 		gpa = uio->uio_offset;
225 		off = gpa & PAGE_MASK;
226 		c = min(uio->uio_resid, PAGE_SIZE - off);
227 
228 		/*
229 		 * The VM has a hole in its physical memory map. If we want to
230 		 * use 'dd' to inspect memory beyond the hole we need to
231 		 * provide bogus data for memory that lies in the hole.
232 		 *
233 		 * Since this device does not support lseek(2), dd(1) will
234 		 * read(2) blocks of data to simulate the lseek(2).
235 		 */
236 		hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
237 		if (hpa == NULL) {
238 			if (uio->uio_rw == UIO_READ && gpa < maxaddr)
239 				error = uiomove(__DECONST(void *, zero_region),
240 				    c, uio);
241 			else
242 				error = EFAULT;
243 		} else {
244 			error = uiomove(hpa, c, uio);
245 			vm_gpa_release(cookie);
246 		}
247 	}
248 	vm_unlock_memsegs(sc->vm);
249 	return (error);
250 }
251 
252 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
253 
254 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)255 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
256 {
257 	struct devmem_softc *dsc;
258 	int error;
259 	bool sysmem;
260 
261 	error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
262 	if (error || mseg->len == 0)
263 		return (error);
264 
265 	if (!sysmem) {
266 		SLIST_FOREACH(dsc, &sc->devmem, link) {
267 			if (dsc->segid == mseg->segid)
268 				break;
269 		}
270 		KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
271 		    __func__, mseg->segid));
272 		error = copystr(dsc->name, mseg->name, len, NULL);
273 	} else {
274 		bzero(mseg->name, len);
275 	}
276 
277 	return (error);
278 }
279 
280 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len,struct domainset * domainset)281 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
282     struct domainset *domainset)
283 {
284 	char *name;
285 	int error;
286 	bool sysmem;
287 
288 	error = 0;
289 	name = NULL;
290 	sysmem = true;
291 
292 	/*
293 	 * The allocation is lengthened by 1 to hold a terminating NUL.  It'll
294 	 * by stripped off when devfs processes the full string.
295 	 */
296 	if (VM_MEMSEG_NAME(mseg)) {
297 		sysmem = false;
298 		name = malloc(len, M_VMMDEV, M_WAITOK);
299 		error = copystr(mseg->name, name, len, NULL);
300 		if (error)
301 			goto done;
302 	}
303 	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
304 	if (error)
305 		goto done;
306 
307 	if (VM_MEMSEG_NAME(mseg)) {
308 		error = devmem_create_cdev(sc, mseg->segid, name);
309 		if (error)
310 			vm_free_memseg(sc->vm, mseg->segid);
311 		else
312 			name = NULL;	/* freed when 'cdev' is destroyed */
313 	}
314 done:
315 	free(name, M_VMMDEV);
316 	return (error);
317 }
318 
319 #if defined(__amd64__) && \
320     (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
321 /*
322  * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
323  */
324 static void
adjust_segid(struct vm_memseg * mseg)325 adjust_segid(struct vm_memseg *mseg)
326 {
327 	if (mseg->segid != VM_SYSMEM) {
328 		mseg->segid += (VM_BOOTROM - 1);
329 	}
330 }
331 #endif
332 
333 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)334 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
335     uint64_t *regval)
336 {
337 	int error, i;
338 
339 	error = 0;
340 	for (i = 0; i < count; i++) {
341 		error = vm_get_register(vcpu, regnum[i], &regval[i]);
342 		if (error)
343 			break;
344 	}
345 	return (error);
346 }
347 
348 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)349 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
350     uint64_t *regval)
351 {
352 	int error, i;
353 
354 	error = 0;
355 	for (i = 0; i < count; i++) {
356 		error = vm_set_register(vcpu, regnum[i], regval[i]);
357 		if (error)
358 			break;
359 	}
360 	return (error);
361 }
362 
363 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)364 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
365 {
366 	int error;
367 
368 	/*
369 	 * A jail without vmm access shouldn't be able to access vmm device
370 	 * files at all, but check here just to be thorough.
371 	 */
372 	error = vmm_priv_check(td->td_ucred);
373 	if (error != 0)
374 		return (error);
375 
376 	return (0);
377 }
378 
379 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
380 	VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
381 	VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
382 	VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
383 	VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
384 	VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
385 	VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
386 	VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
387 	VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
388 	VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
389 	VMMDEV_IOCTL(VM_STAT_DESC, 0),
390 
391 #ifdef __amd64__
392 #ifdef COMPAT_FREEBSD12
393 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
394 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
395 #endif
396 #ifdef COMPAT_FREEBSD14
397 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
398 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
399 #endif
400 #endif /* __amd64__ */
401 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
402 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
403 	VMMDEV_IOCTL(VM_MMAP_MEMSEG,
404 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
405 	VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
406 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
407 	VMMDEV_IOCTL(VM_REINIT,
408 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
409 
410 #ifdef __amd64__
411 #if defined(COMPAT_FREEBSD12)
412 	VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
413 #endif
414 #ifdef COMPAT_FREEBSD14
415 	VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
416 #endif
417 #endif /* __amd64__ */
418 	VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
419 	VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
420 
421 	VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
422 	VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
423 
424 	VMMDEV_IOCTL(VM_SUSPEND, 0),
425 	VMMDEV_IOCTL(VM_GET_CPUS, 0),
426 	VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
427 	VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
428 };
429 
430 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)431 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
432     struct thread *td)
433 {
434 	struct vmmdev_softc *sc;
435 	struct vcpu *vcpu;
436 	const struct vmmdev_ioctl *ioctl;
437 	struct vm_memseg *mseg;
438 	int error, vcpuid;
439 
440 	sc = vmmdev_lookup2(cdev);
441 	if (sc == NULL)
442 		return (ENXIO);
443 
444 	ioctl = NULL;
445 	for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
446 		if (vmmdev_ioctls[i].cmd == cmd) {
447 			ioctl = &vmmdev_ioctls[i];
448 			break;
449 		}
450 	}
451 	if (ioctl == NULL) {
452 		for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
453 			if (vmmdev_machdep_ioctls[i].cmd == cmd) {
454 				ioctl = &vmmdev_machdep_ioctls[i];
455 				break;
456 			}
457 		}
458 	}
459 	if (ioctl == NULL)
460 		return (ENOTTY);
461 
462 	if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) {
463 		error = priv_check(td, PRIV_DRIVER);
464 		if (error != 0)
465 			return (error);
466 	}
467 
468 	if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
469 		vm_xlock_memsegs(sc->vm);
470 	else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
471 		vm_slock_memsegs(sc->vm);
472 
473 	vcpu = NULL;
474 	vcpuid = -1;
475 	if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
476 	    VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
477 		vcpuid = *(int *)data;
478 		if (vcpuid == -1) {
479 			if ((ioctl->flags &
480 			    VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
481 				error = EINVAL;
482 				goto lockfail;
483 			}
484 		} else {
485 			vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
486 			if (vcpu == NULL) {
487 				error = EINVAL;
488 				goto lockfail;
489 			}
490 			if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
491 				error = vcpu_lock_one(vcpu);
492 				if (error)
493 					goto lockfail;
494 			}
495 		}
496 	}
497 	if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
498 		error = vcpu_lock_all(sc);
499 		if (error)
500 			goto lockfail;
501 	}
502 
503 	switch (cmd) {
504 	case VM_SUSPEND: {
505 		struct vm_suspend *vmsuspend;
506 
507 		vmsuspend = (struct vm_suspend *)data;
508 		error = vm_suspend(sc->vm, vmsuspend->how);
509 		break;
510 	}
511 	case VM_REINIT:
512 		error = vm_reinit(sc->vm);
513 		break;
514 	case VM_STAT_DESC: {
515 		struct vm_stat_desc *statdesc;
516 
517 		statdesc = (struct vm_stat_desc *)data;
518 		error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
519 		    sizeof(statdesc->desc));
520 		break;
521 	}
522 	case VM_STATS: {
523 		struct vm_stats *vmstats;
524 
525 		vmstats = (struct vm_stats *)data;
526 		getmicrotime(&vmstats->tv);
527 		error = vmm_stat_copy(vcpu, vmstats->index,
528 		    nitems(vmstats->statbuf), &vmstats->num_entries,
529 		    vmstats->statbuf);
530 		break;
531 	}
532 	case VM_MMAP_GETNEXT: {
533 		struct vm_memmap *mm;
534 
535 		mm = (struct vm_memmap *)data;
536 		error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
537 		    &mm->segoff, &mm->len, &mm->prot, &mm->flags);
538 		break;
539 	}
540 	case VM_MMAP_MEMSEG: {
541 		struct vm_memmap *mm;
542 
543 		mm = (struct vm_memmap *)data;
544 		error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
545 		    mm->len, mm->prot, mm->flags);
546 		break;
547 	}
548 	case VM_MUNMAP_MEMSEG: {
549 		struct vm_munmap *mu;
550 
551 		mu = (struct vm_munmap *)data;
552 		error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
553 		break;
554 	}
555 #ifdef __amd64__
556 #ifdef COMPAT_FREEBSD12
557 	case VM_ALLOC_MEMSEG_12:
558 		mseg = (struct vm_memseg *)data;
559 
560 		adjust_segid(mseg);
561 		error = alloc_memseg(sc, mseg,
562 		    sizeof(((struct vm_memseg_12 *)0)->name), NULL);
563 		break;
564 	case VM_GET_MEMSEG_12:
565 		mseg = (struct vm_memseg *)data;
566 
567 		adjust_segid(mseg);
568 		error = get_memseg(sc, mseg,
569 		    sizeof(((struct vm_memseg_12 *)0)->name));
570 		break;
571 #endif /* COMPAT_FREEBSD12 */
572 #ifdef COMPAT_FREEBSD14
573 	case VM_ALLOC_MEMSEG_14:
574 		mseg = (struct vm_memseg *)data;
575 
576 		adjust_segid(mseg);
577 		error = alloc_memseg(sc, mseg,
578 		    sizeof(((struct vm_memseg_14 *)0)->name), NULL);
579 		break;
580 	case VM_GET_MEMSEG_14:
581 		mseg = (struct vm_memseg *)data;
582 
583 		adjust_segid(mseg);
584 		error = get_memseg(sc, mseg,
585 		    sizeof(((struct vm_memseg_14 *)0)->name));
586 		break;
587 #endif /* COMPAT_FREEBSD14 */
588 #endif /* __amd64__ */
589 	case VM_ALLOC_MEMSEG: {
590 		domainset_t *mask;
591 		struct domainset *domainset, domain;
592 
593 		domainset = NULL;
594 		mseg = (struct vm_memseg *)data;
595 		if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
596 			if (mseg->ds_mask_size < sizeof(domainset_t) ||
597 			    mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
598 				error = ERANGE;
599 				break;
600 			}
601 			memset(&domain, 0, sizeof(domain));
602 			mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
603 			error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
604 			if (error) {
605 				free(mask, M_VMMDEV);
606 				break;
607 			}
608 			error = domainset_populate(&domain, mask, mseg->ds_policy,
609 			    mseg->ds_mask_size);
610 			free(mask, M_VMMDEV);
611 			if (error)
612 				break;
613 			domainset = domainset_create(&domain);
614 			if (domainset == NULL) {
615 				error = EINVAL;
616 				break;
617 			}
618 		}
619 		error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
620 		break;
621 	}
622 	case VM_GET_MEMSEG:
623 		error = get_memseg(sc, (struct vm_memseg *)data,
624 		    sizeof(((struct vm_memseg *)0)->name));
625 		break;
626 	case VM_GET_REGISTER: {
627 		struct vm_register *vmreg;
628 
629 		vmreg = (struct vm_register *)data;
630 		error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
631 		break;
632 	}
633 	case VM_SET_REGISTER: {
634 		struct vm_register *vmreg;
635 
636 		vmreg = (struct vm_register *)data;
637 		error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
638 		break;
639 	}
640 	case VM_GET_REGISTER_SET: {
641 		struct vm_register_set *vmregset;
642 		uint64_t *regvals;
643 		int *regnums;
644 
645 		vmregset = (struct vm_register_set *)data;
646 		if (vmregset->count > VM_REG_LAST) {
647 			error = EINVAL;
648 			break;
649 		}
650 		regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
651 		    M_VMMDEV, M_WAITOK);
652 		regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
653 		    M_VMMDEV, M_WAITOK);
654 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
655 		    vmregset->count);
656 		if (error == 0)
657 			error = vm_get_register_set(vcpu,
658 			    vmregset->count, regnums, regvals);
659 		if (error == 0)
660 			error = copyout(regvals, vmregset->regvals,
661 			    sizeof(regvals[0]) * vmregset->count);
662 		free(regvals, M_VMMDEV);
663 		free(regnums, M_VMMDEV);
664 		break;
665 	}
666 	case VM_SET_REGISTER_SET: {
667 		struct vm_register_set *vmregset;
668 		uint64_t *regvals;
669 		int *regnums;
670 
671 		vmregset = (struct vm_register_set *)data;
672 		if (vmregset->count > VM_REG_LAST) {
673 			error = EINVAL;
674 			break;
675 		}
676 		regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
677 		    M_VMMDEV, M_WAITOK);
678 		regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
679 		    M_VMMDEV, M_WAITOK);
680 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
681 		    vmregset->count);
682 		if (error == 0)
683 			error = copyin(vmregset->regvals, regvals,
684 			    sizeof(regvals[0]) * vmregset->count);
685 		if (error == 0)
686 			error = vm_set_register_set(vcpu,
687 			    vmregset->count, regnums, regvals);
688 		free(regvals, M_VMMDEV);
689 		free(regnums, M_VMMDEV);
690 		break;
691 	}
692 	case VM_GET_CAPABILITY: {
693 		struct vm_capability *vmcap;
694 
695 		vmcap = (struct vm_capability *)data;
696 		error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
697 		break;
698 	}
699 	case VM_SET_CAPABILITY: {
700 		struct vm_capability *vmcap;
701 
702 		vmcap = (struct vm_capability *)data;
703 		error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
704 		break;
705 	}
706 	case VM_ACTIVATE_CPU:
707 		error = vm_activate_cpu(vcpu);
708 		break;
709 	case VM_GET_CPUS: {
710 		struct vm_cpuset *vm_cpuset;
711 		cpuset_t *cpuset;
712 		int size;
713 
714 		error = 0;
715 		vm_cpuset = (struct vm_cpuset *)data;
716 		size = vm_cpuset->cpusetsize;
717 		if (size < 1 || size > CPU_MAXSIZE / NBBY) {
718 			error = ERANGE;
719 			break;
720 		}
721 		cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
722 		    M_WAITOK | M_ZERO);
723 		if (vm_cpuset->which == VM_ACTIVE_CPUS)
724 			*cpuset = vm_active_cpus(sc->vm);
725 		else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
726 			*cpuset = vm_suspended_cpus(sc->vm);
727 		else if (vm_cpuset->which == VM_DEBUG_CPUS)
728 			*cpuset = vm_debug_cpus(sc->vm);
729 		else
730 			error = EINVAL;
731 		if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
732 			error = ERANGE;
733 		if (error == 0)
734 			error = copyout(cpuset, vm_cpuset->cpus, size);
735 		free(cpuset, M_TEMP);
736 		break;
737 	}
738 	case VM_SUSPEND_CPU:
739 		error = vm_suspend_cpu(sc->vm, vcpu);
740 		break;
741 	case VM_RESUME_CPU:
742 		error = vm_resume_cpu(sc->vm, vcpu);
743 		break;
744 	case VM_SET_TOPOLOGY: {
745 		struct vm_cpu_topology *topology;
746 
747 		topology = (struct vm_cpu_topology *)data;
748 		error = vm_set_topology(sc->vm, topology->sockets,
749 		    topology->cores, topology->threads, topology->maxcpus);
750 		break;
751 	}
752 	case VM_GET_TOPOLOGY: {
753 		struct vm_cpu_topology *topology;
754 
755 		topology = (struct vm_cpu_topology *)data;
756 		vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
757 		    &topology->threads, &topology->maxcpus);
758 		error = 0;
759 		break;
760 	}
761 	default:
762 		error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
763 		    td);
764 		break;
765 	}
766 
767 	if ((ioctl->flags &
768 	    (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
769 		vm_unlock_memsegs(sc->vm);
770 	if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
771 		vcpu_unlock_all(sc);
772 	else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
773 		vcpu_unlock_one(vcpu);
774 
775 	/*
776 	 * Make sure that no handler returns a kernel-internal
777 	 * error value to userspace.
778 	 */
779 	KASSERT(error == ERESTART || error >= 0,
780 	    ("vmmdev_ioctl: invalid error return %d", error));
781 	return (error);
782 
783 lockfail:
784 	if ((ioctl->flags &
785 	    (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
786 		vm_unlock_memsegs(sc->vm);
787 	return (error);
788 }
789 
790 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)791 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
792     struct vm_object **objp, int nprot)
793 {
794 	struct vmmdev_softc *sc;
795 	vm_paddr_t gpa;
796 	size_t len;
797 	vm_ooffset_t segoff, first, last;
798 	int error, found, segid;
799 	bool sysmem;
800 
801 	first = *offset;
802 	last = first + mapsize;
803 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
804 		return (EINVAL);
805 
806 	sc = vmmdev_lookup2(cdev);
807 	if (sc == NULL) {
808 		/* virtual machine is in the process of being created */
809 		return (EINVAL);
810 	}
811 
812 	/*
813 	 * Get a read lock on the guest memory map.
814 	 */
815 	vm_slock_memsegs(sc->vm);
816 
817 	gpa = 0;
818 	found = 0;
819 	while (!found) {
820 		error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
821 		    NULL, NULL);
822 		if (error)
823 			break;
824 
825 		if (first >= gpa && last <= gpa + len)
826 			found = 1;
827 		else
828 			gpa += len;
829 	}
830 
831 	if (found) {
832 		error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
833 		KASSERT(error == 0 && *objp != NULL,
834 		    ("%s: invalid memory segment %d", __func__, segid));
835 		if (sysmem) {
836 			vm_object_reference(*objp);
837 			*offset = segoff + (first - gpa);
838 		} else {
839 			error = EINVAL;
840 		}
841 	}
842 	vm_unlock_memsegs(sc->vm);
843 	return (error);
844 }
845 
846 static void
vmmdev_destroy(struct vmmdev_softc * sc)847 vmmdev_destroy(struct vmmdev_softc *sc)
848 {
849 	struct devmem_softc *dsc;
850 	int error __diagused;
851 
852 	KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
853 	KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__));
854 
855 	/*
856 	 * Destroy all cdevs:
857 	 *
858 	 * - any new operations on the 'cdev' will return an error (ENXIO).
859 	 *
860 	 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
861 	 */
862 	SLIST_FOREACH(dsc, &sc->devmem, link) {
863 		KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
864 		devmem_destroy(dsc);
865 	}
866 
867 	vm_disable_vcpu_creation(sc->vm);
868 	error = vcpu_lock_all(sc);
869 	KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
870 	vm_unlock_vcpus(sc->vm);
871 
872 	while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
873 		KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
874 		SLIST_REMOVE_HEAD(&sc->devmem, link);
875 		free(dsc->name, M_VMMDEV);
876 		free(dsc, M_VMMDEV);
877 	}
878 
879 	if (sc->vm != NULL)
880 		vm_destroy(sc->vm);
881 
882 	chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0);
883 	crfree(sc->ucred);
884 
885 	sx_xlock(&vmmdev_mtx);
886 	SLIST_REMOVE(&head, sc, vmmdev_softc, link);
887 	if ((sc->flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
888 		LIST_REMOVE(sc, priv_link);
889 	sx_xunlock(&vmmdev_mtx);
890 	wakeup(sc);
891 	free(sc, M_VMMDEV);
892 }
893 
894 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)895 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
896 {
897 	struct cdev *cdev;
898 	struct vmmdev_softc *sc;
899 
900 	sx_xlock(&vmmdev_mtx);
901 	sc = vmmdev_lookup(name, cred);
902 	if (sc == NULL || sc->cdev == NULL) {
903 		sx_xunlock(&vmmdev_mtx);
904 		return (EINVAL);
905 	}
906 
907 	/*
908 	 * Setting 'sc->cdev' to NULL is used to indicate that the VM
909 	 * is scheduled for destruction.
910 	 */
911 	cdev = sc->cdev;
912 	sc->cdev = NULL;
913 	sx_xunlock(&vmmdev_mtx);
914 
915 	(void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
916 	destroy_dev(cdev);
917 	vmmdev_destroy(sc);
918 
919 	return (0);
920 }
921 
922 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)923 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
924 {
925 	char *buf;
926 	int error, buflen;
927 
928 	error = vmm_priv_check(req->td->td_ucred);
929 	if (error)
930 		return (error);
931 
932 	buflen = VM_MAX_NAMELEN + 1;
933 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
934 	error = sysctl_handle_string(oidp, buf, buflen, req);
935 	if (error == 0 && req->newptr != NULL)
936 		error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
937 	free(buf, M_VMMDEV);
938 	return (error);
939 }
940 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
941     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
942     NULL, 0, sysctl_vmm_destroy, "A",
943     "Destroy a vmm(4) instance (legacy interface)");
944 
945 static struct cdevsw vmmdevsw = {
946 	.d_name		= "vmmdev",
947 	.d_version	= D_VERSION,
948 	.d_open		= vmmdev_open,
949 	.d_ioctl	= vmmdev_ioctl,
950 	.d_mmap_single	= vmmdev_mmap_single,
951 	.d_read		= vmmdev_rw,
952 	.d_write	= vmmdev_rw,
953 };
954 
955 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)956 vmmdev_alloc(struct vm *vm, struct ucred *cred)
957 {
958 	struct vmmdev_softc *sc;
959 
960 	sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
961 	SLIST_INIT(&sc->devmem);
962 	sc->vm = vm;
963 	sc->ucred = crhold(cred);
964 	return (sc);
965 }
966 
967 static int
vmmdev_create(const char * name,uint32_t flags,struct ucred * cred)968 vmmdev_create(const char *name, uint32_t flags, struct ucred *cred)
969 {
970 	struct make_dev_args mda;
971 	struct cdev *cdev;
972 	struct vmmdev_softc *sc;
973 	struct vmmctl_priv *priv;
974 	struct vm *vm;
975 	int error;
976 
977 	if (name == NULL || strlen(name) > VM_MAX_NAMELEN)
978 		return (EINVAL);
979 
980 	if ((flags & ~VMMCTL_FLAGS_MASK) != 0)
981 		return (EINVAL);
982 	error = devfs_get_cdevpriv((void **)&priv);
983 	if (error)
984 		return (error);
985 
986 	sx_xlock(&vmmdev_mtx);
987 	sc = vmmdev_lookup(name, cred);
988 	if (sc != NULL) {
989 		sx_xunlock(&vmmdev_mtx);
990 		return (EEXIST);
991 	}
992 
993 	error = vm_create(name, &vm);
994 	if (error != 0) {
995 		sx_xunlock(&vmmdev_mtx);
996 		return (error);
997 	}
998 	sc = vmmdev_alloc(vm, cred);
999 	SLIST_INSERT_HEAD(&head, sc, link);
1000 	sc->flags = flags;
1001 	if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
1002 		LIST_INSERT_HEAD(&priv->softcs, sc, priv_link);
1003 
1004 	make_dev_args_init(&mda);
1005 	mda.mda_devsw = &vmmdevsw;
1006 	mda.mda_cr = sc->ucred;
1007 	mda.mda_uid = UID_ROOT;
1008 	mda.mda_gid = GID_WHEEL;
1009 	mda.mda_mode = 0600;
1010 	mda.mda_si_drv1 = sc;
1011 	mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1012 	error = make_dev_s(&mda, &cdev, "vmm/%s", name);
1013 	if (error != 0) {
1014 		sx_xunlock(&vmmdev_mtx);
1015 		vmmdev_destroy(sc);
1016 		return (error);
1017 	}
1018 	if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) {
1019 		sx_xunlock(&vmmdev_mtx);
1020 		destroy_dev(cdev);
1021 		vmmdev_destroy(sc);
1022 		return (ENOMEM);
1023 	}
1024 	sc->cdev = cdev;
1025 	sx_xunlock(&vmmdev_mtx);
1026 	return (0);
1027 }
1028 
1029 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)1030 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1031 {
1032 	char *buf;
1033 	int error, buflen;
1034 
1035 	if (!vmm_initialized)
1036 		return (ENXIO);
1037 
1038 	error = vmm_priv_check(req->td->td_ucred);
1039 	if (error != 0)
1040 		return (error);
1041 
1042 	buflen = VM_MAX_NAMELEN + 1;
1043 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1044 	error = sysctl_handle_string(oidp, buf, buflen, req);
1045 	if (error == 0 && req->newptr != NULL)
1046 		error = vmmdev_create(buf, 0, req->td->td_ucred);
1047 	free(buf, M_VMMDEV);
1048 	return (error);
1049 }
1050 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1051     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1052     NULL, 0, sysctl_vmm_create, "A",
1053     "Create a vmm(4) instance (legacy interface)");
1054 
1055 static void
vmmctl_dtor(void * arg)1056 vmmctl_dtor(void *arg)
1057 {
1058 	struct cdev *sc_cdev;
1059 	struct vmmdev_softc *sc;
1060 	struct vmmctl_priv *priv = arg;
1061 
1062 	/*
1063 	 * Scan the softc list for any VMs associated with
1064 	 * the current descriptor and destroy them.
1065 	 */
1066 	sx_xlock(&vmmdev_mtx);
1067 	while (!LIST_EMPTY(&priv->softcs)) {
1068 		sc = LIST_FIRST(&priv->softcs);
1069 		sc_cdev = sc->cdev;
1070 		if (sc_cdev != NULL) {
1071 			sc->cdev = NULL;
1072 		} else {
1073 			/*
1074 			 * Another thread has already
1075 			 * started the removal process.
1076 			 * Sleep until 'vmmdev_destroy' notifies us
1077 			 * that the removal has finished.
1078 			 */
1079 			sx_sleep(sc, &vmmdev_mtx, 0, "vmmctl_dtor", 0);
1080 			continue;
1081 		}
1082 		/*
1083 		 * Temporarily drop the lock to allow vmmdev_destroy to run.
1084 		 */
1085 		sx_xunlock(&vmmdev_mtx);
1086 		(void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
1087 		destroy_dev(sc_cdev);
1088 		/* vmmdev_destroy will unlink the 'priv_link' entry. */
1089 		vmmdev_destroy(sc);
1090 		sx_xlock(&vmmdev_mtx);
1091 	}
1092 	sx_xunlock(&vmmdev_mtx);
1093 
1094 	free(priv, M_VMMDEV);
1095 }
1096 
1097 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)1098 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
1099 {
1100 	int error;
1101 	struct vmmctl_priv *priv;
1102 
1103 	error = vmm_priv_check(td->td_ucred);
1104 	if (error != 0)
1105 		return (error);
1106 
1107 	if ((flags & FWRITE) == 0)
1108 		return (EPERM);
1109 
1110 	priv = malloc(sizeof(*priv), M_VMMDEV, M_WAITOK | M_ZERO);
1111 	LIST_INIT(&priv->softcs);
1112 	error = devfs_set_cdevpriv(priv, vmmctl_dtor);
1113 	if (error != 0) {
1114 		free(priv, M_VMMDEV);
1115 		return (error);
1116 	}
1117 
1118 	return (0);
1119 }
1120 
1121 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)1122 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
1123     struct thread *td)
1124 {
1125 	int error;
1126 
1127 	switch (cmd) {
1128 	case VMMCTL_VM_CREATE: {
1129 		struct vmmctl_vm_create *vmc;
1130 
1131 		vmc = (struct vmmctl_vm_create *)data;
1132 		vmc->name[VM_MAX_NAMELEN] = '\0';
1133 		for (size_t i = 0; i < nitems(vmc->reserved); i++) {
1134 			if (vmc->reserved[i] != 0) {
1135 				error = EINVAL;
1136 				return (error);
1137 			}
1138 		}
1139 
1140 		error = vmmdev_create(vmc->name, vmc->flags, td->td_ucred);
1141 		break;
1142 	}
1143 	case VMMCTL_VM_DESTROY: {
1144 		struct vmmctl_vm_destroy *vmd;
1145 
1146 		vmd = (struct vmmctl_vm_destroy *)data;
1147 		vmd->name[VM_MAX_NAMELEN] = '\0';
1148 		for (size_t i = 0; i < nitems(vmd->reserved); i++) {
1149 			if (vmd->reserved[i] != 0) {
1150 				error = EINVAL;
1151 				return (error);
1152 			}
1153 		}
1154 
1155 		error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
1156 		break;
1157 	}
1158 	default:
1159 		error = ENOTTY;
1160 		break;
1161 	}
1162 
1163 	return (error);
1164 }
1165 
1166 static struct cdev *vmmctl_cdev;
1167 static struct cdevsw vmmctlsw = {
1168 	.d_name		= "vmmctl",
1169 	.d_version	= D_VERSION,
1170 	.d_open		= vmmctl_open,
1171 	.d_ioctl	= vmmctl_ioctl,
1172 };
1173 
1174 static int
vmmdev_init(void)1175 vmmdev_init(void)
1176 {
1177 	int error;
1178 
1179 	sx_xlock(&vmmdev_mtx);
1180 	error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
1181 	    UID_ROOT, GID_WHEEL, 0600, "vmmctl");
1182 	if (error == 0)
1183 		pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1184 		    "Allow use of vmm in a jail.");
1185 	sx_xunlock(&vmmdev_mtx);
1186 
1187 	return (error);
1188 }
1189 
1190 static int
vmmdev_cleanup(void)1191 vmmdev_cleanup(void)
1192 {
1193 	sx_xlock(&vmmdev_mtx);
1194 	if (!SLIST_EMPTY(&head)) {
1195 		sx_xunlock(&vmmdev_mtx);
1196 		return (EBUSY);
1197 	}
1198 	if (vmmctl_cdev != NULL) {
1199 		destroy_dev(vmmctl_cdev);
1200 		vmmctl_cdev = NULL;
1201 	}
1202 	sx_xunlock(&vmmdev_mtx);
1203 
1204 	return (0);
1205 }
1206 
1207 static int
vmm_handler(module_t mod,int what,void * arg)1208 vmm_handler(module_t mod, int what, void *arg)
1209 {
1210 	int error;
1211 
1212 	switch (what) {
1213 	case MOD_LOAD:
1214 		error = vmmdev_init();
1215 		if (error != 0)
1216 			break;
1217 
1218 		vm_maxcpu = mp_ncpus;
1219 		TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
1220 		if (vm_maxcpu > VM_MAXCPU) {
1221 			printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
1222 			vm_maxcpu = VM_MAXCPU;
1223 		}
1224 		if (vm_maxcpu == 0)
1225 			vm_maxcpu = 1;
1226 		vm_maxvmms = 4 * mp_ncpus;
1227 		error = vmm_modinit();
1228 		if (error == 0)
1229 			vmm_initialized = true;
1230 		else {
1231 			int error1 __diagused;
1232 
1233 			error1 = vmmdev_cleanup();
1234 			KASSERT(error1 == 0,
1235 			    ("%s: vmmdev_cleanup failed: %d", __func__, error1));
1236 		}
1237 		break;
1238 	case MOD_UNLOAD:
1239 		error = vmmdev_cleanup();
1240 		if (error == 0 && vmm_initialized) {
1241 			error = vmm_modcleanup();
1242 			if (error) {
1243 				/*
1244 				 * Something bad happened - prevent new
1245 				 * VMs from being created
1246 				 */
1247 				vmm_initialized = false;
1248 			}
1249 		}
1250 		break;
1251 	default:
1252 		error = 0;
1253 		break;
1254 	}
1255 	return (error);
1256 }
1257 
1258 static moduledata_t vmm_kmod = {
1259 	"vmm",
1260 	vmm_handler,
1261 	NULL
1262 };
1263 
1264 /*
1265  * vmm initialization has the following dependencies:
1266  *
1267  * - Initialization requires smp_rendezvous() and therefore must happen
1268  *   after SMP is fully functional (after SI_SUB_SMP).
1269  * - vmm device initialization requires an initialized devfs.
1270  */
1271 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
1272 MODULE_VERSION(vmm, 1);
1273 
1274 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1275 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1276     struct vm_object **objp, int nprot)
1277 {
1278 	struct devmem_softc *dsc;
1279 	vm_ooffset_t first, last;
1280 	size_t seglen;
1281 	int error;
1282 	bool sysmem;
1283 
1284 	dsc = cdev->si_drv1;
1285 	if (dsc == NULL) {
1286 		/* 'cdev' has been created but is not ready for use */
1287 		return (ENXIO);
1288 	}
1289 
1290 	first = *offset;
1291 	last = *offset + len;
1292 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1293 		return (EINVAL);
1294 
1295 	vm_slock_memsegs(dsc->sc->vm);
1296 
1297 	error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1298 	KASSERT(error == 0 && !sysmem && *objp != NULL,
1299 	    ("%s: invalid devmem segment %d", __func__, dsc->segid));
1300 
1301 	if (seglen >= last)
1302 		vm_object_reference(*objp);
1303 	else
1304 		error = EINVAL;
1305 
1306 	vm_unlock_memsegs(dsc->sc->vm);
1307 	return (error);
1308 }
1309 
1310 static struct cdevsw devmemsw = {
1311 	.d_name		= "devmem",
1312 	.d_version	= D_VERSION,
1313 	.d_mmap_single	= devmem_mmap_single,
1314 };
1315 
1316 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1317 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1318 {
1319 	struct make_dev_args mda;
1320 	struct devmem_softc *dsc;
1321 	int error;
1322 
1323 	sx_xlock(&vmmdev_mtx);
1324 
1325 	dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1326 	dsc->segid = segid;
1327 	dsc->name = devname;
1328 	dsc->sc = sc;
1329 	SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1330 
1331 	make_dev_args_init(&mda);
1332 	mda.mda_devsw = &devmemsw;
1333 	mda.mda_cr = sc->ucred;
1334 	mda.mda_uid = UID_ROOT;
1335 	mda.mda_gid = GID_WHEEL;
1336 	mda.mda_mode = 0600;
1337 	mda.mda_si_drv1 = dsc;
1338 	mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1339 	error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1340 	    devname);
1341 	if (error != 0) {
1342 		SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1343 		free(dsc->name, M_VMMDEV);
1344 		free(dsc, M_VMMDEV);
1345 	}
1346 
1347 	sx_xunlock(&vmmdev_mtx);
1348 
1349 	return (error);
1350 }
1351 
1352 static void
devmem_destroy(void * arg)1353 devmem_destroy(void *arg)
1354 {
1355 	struct devmem_softc *dsc = arg;
1356 
1357 	destroy_dev(dsc->cdev);
1358 	dsc->cdev = NULL;
1359 	dsc->sc = NULL;
1360 }
1361