xref: /freebsd/sys/dev/vmm/vmm_dev.c (revision c46e5dc65ba5c9666bb4452878e332dc49730843)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6  * All rights reserved.
7  */
8 
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #include <sys/fcntl.h>
12 #include <sys/ioccom.h>
13 #include <sys/jail.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/mman.h>
17 #include <sys/module.h>
18 #include <sys/priv.h>
19 #include <sys/proc.h>
20 #include <sys/queue.h>
21 #include <sys/smp.h>
22 #include <sys/sx.h>
23 #include <sys/sysctl.h>
24 #include <sys/ucred.h>
25 #include <sys/uio.h>
26 
27 #include <machine/vmm.h>
28 
29 #include <vm/vm.h>
30 #include <vm/vm_object.h>
31 
32 #include <dev/vmm/vmm_dev.h>
33 #include <dev/vmm/vmm_mem.h>
34 #include <dev/vmm/vmm_stat.h>
35 
36 #ifdef __amd64__
37 #ifdef COMPAT_FREEBSD12
38 struct vm_memseg_12 {
39 	int		segid;
40 	size_t		len;
41 	char		name[64];
42 };
43 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
44 
45 #define	VM_ALLOC_MEMSEG_12	\
46 	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
47 #define	VM_GET_MEMSEG_12	\
48 	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
49 #endif /* COMPAT_FREEBSD12 */
50 #ifdef COMPAT_FREEBSD14
51 struct vm_memseg_14 {
52 	int		segid;
53 	size_t		len;
54 	char		name[VM_MAX_SUFFIXLEN + 1];
55 };
56 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
57     "COMPAT_FREEBSD14 ABI");
58 
59 #define	VM_ALLOC_MEMSEG_14	\
60 	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
61 #define	VM_GET_MEMSEG_14	\
62 	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
63 #endif /* COMPAT_FREEBSD14 */
64 #endif /* __amd64__ */
65 
66 struct devmem_softc {
67 	int	segid;
68 	char	*name;
69 	struct cdev *cdev;
70 	struct vmmdev_softc *sc;
71 	SLIST_ENTRY(devmem_softc) link;
72 };
73 
74 struct vmmdev_softc {
75 	struct vm	*vm;		/* vm instance cookie */
76 	struct cdev	*cdev;
77 	struct ucred	*ucred;
78 	SLIST_ENTRY(vmmdev_softc) link;
79 	SLIST_HEAD(, devmem_softc) devmem;
80 	int		flags;
81 };
82 
83 static bool vmm_initialized = false;
84 
85 static SLIST_HEAD(, vmmdev_softc) head;
86 
87 static unsigned pr_allow_flag;
88 static struct sx vmmdev_mtx;
89 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
90 
91 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
92 
93 SYSCTL_DECL(_hw_vmm);
94 
95 u_int vm_maxcpu;
96 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
97     &vm_maxcpu, 0, "Maximum number of vCPUs");
98 
99 static void devmem_destroy(void *arg);
100 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
101 
102 static int
vmm_priv_check(struct ucred * ucred)103 vmm_priv_check(struct ucred *ucred)
104 {
105 	if (jailed(ucred) &&
106 	    !(ucred->cr_prison->pr_allow & pr_allow_flag))
107 		return (EPERM);
108 
109 	return (0);
110 }
111 
112 static int
vcpu_lock_one(struct vcpu * vcpu)113 vcpu_lock_one(struct vcpu *vcpu)
114 {
115 	return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
116 }
117 
118 static void
vcpu_unlock_one(struct vcpu * vcpu)119 vcpu_unlock_one(struct vcpu *vcpu)
120 {
121 	enum vcpu_state state;
122 
123 	state = vcpu_get_state(vcpu, NULL);
124 	if (state != VCPU_FROZEN) {
125 		panic("vcpu %s(%d) has invalid state %d",
126 		    vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
127 	}
128 
129 	vcpu_set_state(vcpu, VCPU_IDLE, false);
130 }
131 
132 #ifndef __amd64__
133 static int
vcpu_set_state_all(struct vm * vm,enum vcpu_state newstate)134 vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
135 {
136 	struct vcpu *vcpu;
137 	int error;
138 	uint16_t i, j, maxcpus;
139 
140 	error = 0;
141 	maxcpus = vm_get_maxcpus(vm);
142 	for (i = 0; i < maxcpus; i++) {
143 		vcpu = vm_vcpu(vm, i);
144 		if (vcpu == NULL)
145 			continue;
146 		error = vcpu_lock_one(vcpu);
147 		if (error)
148 			break;
149 	}
150 
151 	if (error) {
152 		for (j = 0; j < i; j++) {
153 			vcpu = vm_vcpu(vm, j);
154 			if (vcpu == NULL)
155 				continue;
156 			vcpu_unlock_one(vcpu);
157 		}
158 	}
159 
160 	return (error);
161 }
162 #endif
163 
164 static int
vcpu_lock_all(struct vmmdev_softc * sc)165 vcpu_lock_all(struct vmmdev_softc *sc)
166 {
167 	int error;
168 
169 	/*
170 	 * Serialize vcpu_lock_all() callers.  Individual vCPUs are not locked
171 	 * in a consistent order so we need to serialize to avoid deadlocks.
172 	 */
173 	vm_lock_vcpus(sc->vm);
174 	error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
175 	if (error != 0)
176 		vm_unlock_vcpus(sc->vm);
177 	return (error);
178 }
179 
180 static void
vcpu_unlock_all(struct vmmdev_softc * sc)181 vcpu_unlock_all(struct vmmdev_softc *sc)
182 {
183 	struct vcpu *vcpu;
184 	uint16_t i, maxcpus;
185 
186 	maxcpus = vm_get_maxcpus(sc->vm);
187 	for (i = 0; i < maxcpus; i++) {
188 		vcpu = vm_vcpu(sc->vm, i);
189 		if (vcpu == NULL)
190 			continue;
191 		vcpu_unlock_one(vcpu);
192 	}
193 	vm_unlock_vcpus(sc->vm);
194 }
195 
196 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)197 vmmdev_lookup(const char *name, struct ucred *cred)
198 {
199 	struct vmmdev_softc *sc;
200 
201 	sx_assert(&vmmdev_mtx, SA_XLOCKED);
202 
203 	SLIST_FOREACH(sc, &head, link) {
204 		if (strcmp(name, vm_name(sc->vm)) == 0)
205 			break;
206 	}
207 
208 	if (sc == NULL)
209 		return (NULL);
210 
211 	if (cr_cansee(cred, sc->ucred))
212 		return (NULL);
213 
214 	return (sc);
215 }
216 
217 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)218 vmmdev_lookup2(struct cdev *cdev)
219 {
220 	return (cdev->si_drv1);
221 }
222 
223 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)224 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
225 {
226 	int error, off, c, prot;
227 	vm_paddr_t gpa, maxaddr;
228 	void *hpa, *cookie;
229 	struct vmmdev_softc *sc;
230 
231 	sc = vmmdev_lookup2(cdev);
232 	if (sc == NULL)
233 		return (ENXIO);
234 
235 	/*
236 	 * Get a read lock on the guest memory map.
237 	 */
238 	vm_slock_memsegs(sc->vm);
239 
240 	error = 0;
241 	prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
242 	maxaddr = vmm_sysmem_maxaddr(sc->vm);
243 	while (uio->uio_resid > 0 && error == 0) {
244 		gpa = uio->uio_offset;
245 		off = gpa & PAGE_MASK;
246 		c = min(uio->uio_resid, PAGE_SIZE - off);
247 
248 		/*
249 		 * The VM has a hole in its physical memory map. If we want to
250 		 * use 'dd' to inspect memory beyond the hole we need to
251 		 * provide bogus data for memory that lies in the hole.
252 		 *
253 		 * Since this device does not support lseek(2), dd(1) will
254 		 * read(2) blocks of data to simulate the lseek(2).
255 		 */
256 		hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
257 		if (hpa == NULL) {
258 			if (uio->uio_rw == UIO_READ && gpa < maxaddr)
259 				error = uiomove(__DECONST(void *, zero_region),
260 				    c, uio);
261 			else
262 				error = EFAULT;
263 		} else {
264 			error = uiomove(hpa, c, uio);
265 			vm_gpa_release(cookie);
266 		}
267 	}
268 	vm_unlock_memsegs(sc->vm);
269 	return (error);
270 }
271 
272 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
273 
274 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)275 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
276 {
277 	struct devmem_softc *dsc;
278 	int error;
279 	bool sysmem;
280 
281 	error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
282 	if (error || mseg->len == 0)
283 		return (error);
284 
285 	if (!sysmem) {
286 		SLIST_FOREACH(dsc, &sc->devmem, link) {
287 			if (dsc->segid == mseg->segid)
288 				break;
289 		}
290 		KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
291 		    __func__, mseg->segid));
292 		error = copystr(dsc->name, mseg->name, len, NULL);
293 	} else {
294 		bzero(mseg->name, len);
295 	}
296 
297 	return (error);
298 }
299 
300 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len,struct domainset * domainset)301 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
302     struct domainset *domainset)
303 {
304 	char *name;
305 	int error;
306 	bool sysmem;
307 
308 	error = 0;
309 	name = NULL;
310 	sysmem = true;
311 
312 	/*
313 	 * The allocation is lengthened by 1 to hold a terminating NUL.  It'll
314 	 * by stripped off when devfs processes the full string.
315 	 */
316 	if (VM_MEMSEG_NAME(mseg)) {
317 		sysmem = false;
318 		name = malloc(len, M_VMMDEV, M_WAITOK);
319 		error = copystr(mseg->name, name, len, NULL);
320 		if (error)
321 			goto done;
322 	}
323 	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
324 	if (error)
325 		goto done;
326 
327 	if (VM_MEMSEG_NAME(mseg)) {
328 		error = devmem_create_cdev(sc, mseg->segid, name);
329 		if (error)
330 			vm_free_memseg(sc->vm, mseg->segid);
331 		else
332 			name = NULL;	/* freed when 'cdev' is destroyed */
333 	}
334 done:
335 	free(name, M_VMMDEV);
336 	return (error);
337 }
338 
339 #if defined(__amd64__) && \
340     (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
341 /*
342  * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
343  */
344 static void
adjust_segid(struct vm_memseg * mseg)345 adjust_segid(struct vm_memseg *mseg)
346 {
347 	if (mseg->segid != VM_SYSMEM) {
348 		mseg->segid += (VM_BOOTROM - 1);
349 	}
350 }
351 #endif
352 
353 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)354 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
355     uint64_t *regval)
356 {
357 	int error, i;
358 
359 	error = 0;
360 	for (i = 0; i < count; i++) {
361 		error = vm_get_register(vcpu, regnum[i], &regval[i]);
362 		if (error)
363 			break;
364 	}
365 	return (error);
366 }
367 
368 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)369 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
370     uint64_t *regval)
371 {
372 	int error, i;
373 
374 	error = 0;
375 	for (i = 0; i < count; i++) {
376 		error = vm_set_register(vcpu, regnum[i], regval[i]);
377 		if (error)
378 			break;
379 	}
380 	return (error);
381 }
382 
383 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)384 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
385 {
386 	int error;
387 
388 	/*
389 	 * A jail without vmm access shouldn't be able to access vmm device
390 	 * files at all, but check here just to be thorough.
391 	 */
392 	error = vmm_priv_check(td->td_ucred);
393 	if (error != 0)
394 		return (error);
395 
396 	return (0);
397 }
398 
399 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
400 	VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
401 	VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
402 	VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
403 	VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
404 	VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
405 	VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
406 	VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
407 	VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
408 	VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
409 	VMMDEV_IOCTL(VM_STAT_DESC, 0),
410 
411 #ifdef __amd64__
412 #ifdef COMPAT_FREEBSD12
413 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
414 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
415 #endif
416 #ifdef COMPAT_FREEBSD14
417 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
418 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
419 #endif
420 #endif /* __amd64__ */
421 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
422 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
423 	VMMDEV_IOCTL(VM_MMAP_MEMSEG,
424 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
425 	VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
426 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
427 	VMMDEV_IOCTL(VM_REINIT,
428 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
429 
430 #ifdef __amd64__
431 #if defined(COMPAT_FREEBSD12)
432 	VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
433 #endif
434 #ifdef COMPAT_FREEBSD14
435 	VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
436 #endif
437 #endif /* __amd64__ */
438 	VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
439 	VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
440 
441 	VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
442 	VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
443 
444 	VMMDEV_IOCTL(VM_SUSPEND, 0),
445 	VMMDEV_IOCTL(VM_GET_CPUS, 0),
446 	VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
447 	VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
448 };
449 
450 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)451 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
452     struct thread *td)
453 {
454 	struct vmmdev_softc *sc;
455 	struct vcpu *vcpu;
456 	const struct vmmdev_ioctl *ioctl;
457 	struct vm_memseg *mseg;
458 	int error, vcpuid;
459 
460 	sc = vmmdev_lookup2(cdev);
461 	if (sc == NULL)
462 		return (ENXIO);
463 
464 	ioctl = NULL;
465 	for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
466 		if (vmmdev_ioctls[i].cmd == cmd) {
467 			ioctl = &vmmdev_ioctls[i];
468 			break;
469 		}
470 	}
471 	if (ioctl == NULL) {
472 		for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
473 			if (vmmdev_machdep_ioctls[i].cmd == cmd) {
474 				ioctl = &vmmdev_machdep_ioctls[i];
475 				break;
476 			}
477 		}
478 	}
479 	if (ioctl == NULL)
480 		return (ENOTTY);
481 
482 	if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) {
483 		error = priv_check(td, PRIV_DRIVER);
484 		if (error != 0)
485 			return (error);
486 	}
487 
488 	if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
489 		vm_xlock_memsegs(sc->vm);
490 	else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
491 		vm_slock_memsegs(sc->vm);
492 
493 	vcpu = NULL;
494 	vcpuid = -1;
495 	if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
496 	    VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
497 		vcpuid = *(int *)data;
498 		if (vcpuid == -1) {
499 			if ((ioctl->flags &
500 			    VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
501 				error = EINVAL;
502 				goto lockfail;
503 			}
504 		} else {
505 			vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
506 			if (vcpu == NULL) {
507 				error = EINVAL;
508 				goto lockfail;
509 			}
510 			if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
511 				error = vcpu_lock_one(vcpu);
512 				if (error)
513 					goto lockfail;
514 			}
515 		}
516 	}
517 	if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
518 		error = vcpu_lock_all(sc);
519 		if (error)
520 			goto lockfail;
521 	}
522 
523 	switch (cmd) {
524 	case VM_SUSPEND: {
525 		struct vm_suspend *vmsuspend;
526 
527 		vmsuspend = (struct vm_suspend *)data;
528 		error = vm_suspend(sc->vm, vmsuspend->how);
529 		break;
530 	}
531 	case VM_REINIT:
532 		error = vm_reinit(sc->vm);
533 		break;
534 	case VM_STAT_DESC: {
535 		struct vm_stat_desc *statdesc;
536 
537 		statdesc = (struct vm_stat_desc *)data;
538 		error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
539 		    sizeof(statdesc->desc));
540 		break;
541 	}
542 	case VM_STATS: {
543 		struct vm_stats *vmstats;
544 
545 		vmstats = (struct vm_stats *)data;
546 		getmicrotime(&vmstats->tv);
547 		error = vmm_stat_copy(vcpu, vmstats->index,
548 		    nitems(vmstats->statbuf), &vmstats->num_entries,
549 		    vmstats->statbuf);
550 		break;
551 	}
552 	case VM_MMAP_GETNEXT: {
553 		struct vm_memmap *mm;
554 
555 		mm = (struct vm_memmap *)data;
556 		error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
557 		    &mm->segoff, &mm->len, &mm->prot, &mm->flags);
558 		break;
559 	}
560 	case VM_MMAP_MEMSEG: {
561 		struct vm_memmap *mm;
562 
563 		mm = (struct vm_memmap *)data;
564 		error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
565 		    mm->len, mm->prot, mm->flags);
566 		break;
567 	}
568 	case VM_MUNMAP_MEMSEG: {
569 		struct vm_munmap *mu;
570 
571 		mu = (struct vm_munmap *)data;
572 		error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
573 		break;
574 	}
575 #ifdef __amd64__
576 #ifdef COMPAT_FREEBSD12
577 	case VM_ALLOC_MEMSEG_12:
578 		mseg = (struct vm_memseg *)data;
579 
580 		adjust_segid(mseg);
581 		error = alloc_memseg(sc, mseg,
582 		    sizeof(((struct vm_memseg_12 *)0)->name), NULL);
583 		break;
584 	case VM_GET_MEMSEG_12:
585 		mseg = (struct vm_memseg *)data;
586 
587 		adjust_segid(mseg);
588 		error = get_memseg(sc, mseg,
589 		    sizeof(((struct vm_memseg_12 *)0)->name));
590 		break;
591 #endif /* COMPAT_FREEBSD12 */
592 #ifdef COMPAT_FREEBSD14
593 	case VM_ALLOC_MEMSEG_14:
594 		mseg = (struct vm_memseg *)data;
595 
596 		adjust_segid(mseg);
597 		error = alloc_memseg(sc, mseg,
598 		    sizeof(((struct vm_memseg_14 *)0)->name), NULL);
599 		break;
600 	case VM_GET_MEMSEG_14:
601 		mseg = (struct vm_memseg *)data;
602 
603 		adjust_segid(mseg);
604 		error = get_memseg(sc, mseg,
605 		    sizeof(((struct vm_memseg_14 *)0)->name));
606 		break;
607 #endif /* COMPAT_FREEBSD14 */
608 #endif /* __amd64__ */
609 	case VM_ALLOC_MEMSEG: {
610 		domainset_t *mask;
611 		struct domainset *domainset, domain;
612 
613 		domainset = NULL;
614 		mseg = (struct vm_memseg *)data;
615 		if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
616 			if (mseg->ds_mask_size < sizeof(domainset_t) ||
617 			    mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
618 				error = ERANGE;
619 				break;
620 			}
621 			memset(&domain, 0, sizeof(domain));
622 			mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
623 			error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
624 			if (error) {
625 				free(mask, M_VMMDEV);
626 				break;
627 			}
628 			error = domainset_populate(&domain, mask, mseg->ds_policy,
629 			    mseg->ds_mask_size);
630 			free(mask, M_VMMDEV);
631 			if (error)
632 				break;
633 			domainset = domainset_create(&domain);
634 			if (domainset == NULL) {
635 				error = EINVAL;
636 				break;
637 			}
638 		}
639 		error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
640 		break;
641 	}
642 	case VM_GET_MEMSEG:
643 		error = get_memseg(sc, (struct vm_memseg *)data,
644 		    sizeof(((struct vm_memseg *)0)->name));
645 		break;
646 	case VM_GET_REGISTER: {
647 		struct vm_register *vmreg;
648 
649 		vmreg = (struct vm_register *)data;
650 		error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
651 		break;
652 	}
653 	case VM_SET_REGISTER: {
654 		struct vm_register *vmreg;
655 
656 		vmreg = (struct vm_register *)data;
657 		error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
658 		break;
659 	}
660 	case VM_GET_REGISTER_SET: {
661 		struct vm_register_set *vmregset;
662 		uint64_t *regvals;
663 		int *regnums;
664 
665 		vmregset = (struct vm_register_set *)data;
666 		if (vmregset->count > VM_REG_LAST) {
667 			error = EINVAL;
668 			break;
669 		}
670 		regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
671 		    M_VMMDEV, M_WAITOK);
672 		regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
673 		    M_VMMDEV, M_WAITOK);
674 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
675 		    vmregset->count);
676 		if (error == 0)
677 			error = vm_get_register_set(vcpu,
678 			    vmregset->count, regnums, regvals);
679 		if (error == 0)
680 			error = copyout(regvals, vmregset->regvals,
681 			    sizeof(regvals[0]) * vmregset->count);
682 		free(regvals, M_VMMDEV);
683 		free(regnums, M_VMMDEV);
684 		break;
685 	}
686 	case VM_SET_REGISTER_SET: {
687 		struct vm_register_set *vmregset;
688 		uint64_t *regvals;
689 		int *regnums;
690 
691 		vmregset = (struct vm_register_set *)data;
692 		if (vmregset->count > VM_REG_LAST) {
693 			error = EINVAL;
694 			break;
695 		}
696 		regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
697 		    M_VMMDEV, M_WAITOK);
698 		regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
699 		    M_VMMDEV, M_WAITOK);
700 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
701 		    vmregset->count);
702 		if (error == 0)
703 			error = copyin(vmregset->regvals, regvals,
704 			    sizeof(regvals[0]) * vmregset->count);
705 		if (error == 0)
706 			error = vm_set_register_set(vcpu,
707 			    vmregset->count, regnums, regvals);
708 		free(regvals, M_VMMDEV);
709 		free(regnums, M_VMMDEV);
710 		break;
711 	}
712 	case VM_GET_CAPABILITY: {
713 		struct vm_capability *vmcap;
714 
715 		vmcap = (struct vm_capability *)data;
716 		error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
717 		break;
718 	}
719 	case VM_SET_CAPABILITY: {
720 		struct vm_capability *vmcap;
721 
722 		vmcap = (struct vm_capability *)data;
723 		error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
724 		break;
725 	}
726 	case VM_ACTIVATE_CPU:
727 		error = vm_activate_cpu(vcpu);
728 		break;
729 	case VM_GET_CPUS: {
730 		struct vm_cpuset *vm_cpuset;
731 		cpuset_t *cpuset;
732 		int size;
733 
734 		error = 0;
735 		vm_cpuset = (struct vm_cpuset *)data;
736 		size = vm_cpuset->cpusetsize;
737 		if (size < 1 || size > CPU_MAXSIZE / NBBY) {
738 			error = ERANGE;
739 			break;
740 		}
741 		cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
742 		    M_WAITOK | M_ZERO);
743 		if (vm_cpuset->which == VM_ACTIVE_CPUS)
744 			*cpuset = vm_active_cpus(sc->vm);
745 		else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
746 			*cpuset = vm_suspended_cpus(sc->vm);
747 		else if (vm_cpuset->which == VM_DEBUG_CPUS)
748 			*cpuset = vm_debug_cpus(sc->vm);
749 		else
750 			error = EINVAL;
751 		if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
752 			error = ERANGE;
753 		if (error == 0)
754 			error = copyout(cpuset, vm_cpuset->cpus, size);
755 		free(cpuset, M_TEMP);
756 		break;
757 	}
758 	case VM_SUSPEND_CPU:
759 		error = vm_suspend_cpu(sc->vm, vcpu);
760 		break;
761 	case VM_RESUME_CPU:
762 		error = vm_resume_cpu(sc->vm, vcpu);
763 		break;
764 	case VM_SET_TOPOLOGY: {
765 		struct vm_cpu_topology *topology;
766 
767 		topology = (struct vm_cpu_topology *)data;
768 		error = vm_set_topology(sc->vm, topology->sockets,
769 		    topology->cores, topology->threads, topology->maxcpus);
770 		break;
771 	}
772 	case VM_GET_TOPOLOGY: {
773 		struct vm_cpu_topology *topology;
774 
775 		topology = (struct vm_cpu_topology *)data;
776 		vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
777 		    &topology->threads, &topology->maxcpus);
778 		error = 0;
779 		break;
780 	}
781 	default:
782 		error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
783 		    td);
784 		break;
785 	}
786 
787 	if ((ioctl->flags &
788 	    (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
789 		vm_unlock_memsegs(sc->vm);
790 	if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
791 		vcpu_unlock_all(sc);
792 	else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
793 		vcpu_unlock_one(vcpu);
794 
795 	/*
796 	 * Make sure that no handler returns a kernel-internal
797 	 * error value to userspace.
798 	 */
799 	KASSERT(error == ERESTART || error >= 0,
800 	    ("vmmdev_ioctl: invalid error return %d", error));
801 	return (error);
802 
803 lockfail:
804 	if ((ioctl->flags &
805 	    (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
806 		vm_unlock_memsegs(sc->vm);
807 	return (error);
808 }
809 
810 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)811 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
812     struct vm_object **objp, int nprot)
813 {
814 	struct vmmdev_softc *sc;
815 	vm_paddr_t gpa;
816 	size_t len;
817 	vm_ooffset_t segoff, first, last;
818 	int error, found, segid;
819 	bool sysmem;
820 
821 	first = *offset;
822 	last = first + mapsize;
823 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
824 		return (EINVAL);
825 
826 	sc = vmmdev_lookup2(cdev);
827 	if (sc == NULL) {
828 		/* virtual machine is in the process of being created */
829 		return (EINVAL);
830 	}
831 
832 	/*
833 	 * Get a read lock on the guest memory map.
834 	 */
835 	vm_slock_memsegs(sc->vm);
836 
837 	gpa = 0;
838 	found = 0;
839 	while (!found) {
840 		error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
841 		    NULL, NULL);
842 		if (error)
843 			break;
844 
845 		if (first >= gpa && last <= gpa + len)
846 			found = 1;
847 		else
848 			gpa += len;
849 	}
850 
851 	if (found) {
852 		error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
853 		KASSERT(error == 0 && *objp != NULL,
854 		    ("%s: invalid memory segment %d", __func__, segid));
855 		if (sysmem) {
856 			vm_object_reference(*objp);
857 			*offset = segoff + (first - gpa);
858 		} else {
859 			error = EINVAL;
860 		}
861 	}
862 	vm_unlock_memsegs(sc->vm);
863 	return (error);
864 }
865 
866 static void
vmmdev_destroy(struct vmmdev_softc * sc)867 vmmdev_destroy(struct vmmdev_softc *sc)
868 {
869 	struct devmem_softc *dsc;
870 	int error __diagused;
871 
872 	KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
873 
874 	/*
875 	 * Destroy all cdevs:
876 	 *
877 	 * - any new operations on the 'cdev' will return an error (ENXIO).
878 	 *
879 	 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
880 	 */
881 	SLIST_FOREACH(dsc, &sc->devmem, link) {
882 		KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
883 		devmem_destroy(dsc);
884 	}
885 
886 	vm_disable_vcpu_creation(sc->vm);
887 	error = vcpu_lock_all(sc);
888 	KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
889 	vm_unlock_vcpus(sc->vm);
890 
891 	while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
892 		KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
893 		SLIST_REMOVE_HEAD(&sc->devmem, link);
894 		free(dsc->name, M_VMMDEV);
895 		free(dsc, M_VMMDEV);
896 	}
897 
898 	if (sc->vm != NULL)
899 		vm_destroy(sc->vm);
900 
901 	if (sc->ucred != NULL)
902 		crfree(sc->ucred);
903 
904 	sx_xlock(&vmmdev_mtx);
905 	SLIST_REMOVE(&head, sc, vmmdev_softc, link);
906 	sx_xunlock(&vmmdev_mtx);
907 	free(sc, M_VMMDEV);
908 }
909 
910 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)911 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
912 {
913 	struct cdev *cdev;
914 	struct vmmdev_softc *sc;
915 
916 	sx_xlock(&vmmdev_mtx);
917 	sc = vmmdev_lookup(name, cred);
918 	if (sc == NULL || sc->cdev == NULL) {
919 		sx_xunlock(&vmmdev_mtx);
920 		return (EINVAL);
921 	}
922 
923 	/*
924 	 * Setting 'sc->cdev' to NULL is used to indicate that the VM
925 	 * is scheduled for destruction.
926 	 */
927 	cdev = sc->cdev;
928 	sc->cdev = NULL;
929 	sx_xunlock(&vmmdev_mtx);
930 
931 	vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
932 	destroy_dev(cdev);
933 	vmmdev_destroy(sc);
934 
935 	return (0);
936 }
937 
938 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)939 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
940 {
941 	char *buf;
942 	int error, buflen;
943 
944 	error = vmm_priv_check(req->td->td_ucred);
945 	if (error)
946 		return (error);
947 
948 	buflen = VM_MAX_NAMELEN + 1;
949 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
950 	error = sysctl_handle_string(oidp, buf, buflen, req);
951 	if (error == 0 && req->newptr != NULL)
952 		error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
953 	free(buf, M_VMMDEV);
954 	return (error);
955 }
956 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
957     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
958     NULL, 0, sysctl_vmm_destroy, "A",
959     "Destroy a vmm(4) instance (legacy interface)");
960 
961 static struct cdevsw vmmdevsw = {
962 	.d_name		= "vmmdev",
963 	.d_version	= D_VERSION,
964 	.d_open		= vmmdev_open,
965 	.d_ioctl	= vmmdev_ioctl,
966 	.d_mmap_single	= vmmdev_mmap_single,
967 	.d_read		= vmmdev_rw,
968 	.d_write	= vmmdev_rw,
969 };
970 
971 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)972 vmmdev_alloc(struct vm *vm, struct ucred *cred)
973 {
974 	struct vmmdev_softc *sc;
975 
976 	sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
977 	SLIST_INIT(&sc->devmem);
978 	sc->vm = vm;
979 	sc->ucred = crhold(cred);
980 	return (sc);
981 }
982 
983 static int
vmmdev_create(const char * name,struct ucred * cred)984 vmmdev_create(const char *name, struct ucred *cred)
985 {
986 	struct make_dev_args mda;
987 	struct cdev *cdev;
988 	struct vmmdev_softc *sc;
989 	struct vm *vm;
990 	int error;
991 
992 	if (name == NULL || strlen(name) > VM_MAX_NAMELEN)
993 		return (EINVAL);
994 
995 	sx_xlock(&vmmdev_mtx);
996 	sc = vmmdev_lookup(name, cred);
997 	if (sc != NULL) {
998 		sx_xunlock(&vmmdev_mtx);
999 		return (EEXIST);
1000 	}
1001 
1002 	error = vm_create(name, &vm);
1003 	if (error != 0) {
1004 		sx_xunlock(&vmmdev_mtx);
1005 		return (error);
1006 	}
1007 	sc = vmmdev_alloc(vm, cred);
1008 	SLIST_INSERT_HEAD(&head, sc, link);
1009 
1010 	make_dev_args_init(&mda);
1011 	mda.mda_devsw = &vmmdevsw;
1012 	mda.mda_cr = sc->ucred;
1013 	mda.mda_uid = UID_ROOT;
1014 	mda.mda_gid = GID_WHEEL;
1015 	mda.mda_mode = 0600;
1016 	mda.mda_si_drv1 = sc;
1017 	mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1018 	error = make_dev_s(&mda, &cdev, "vmm/%s", name);
1019 	if (error != 0) {
1020 		sx_xunlock(&vmmdev_mtx);
1021 		vmmdev_destroy(sc);
1022 		return (error);
1023 	}
1024 	sc->cdev = cdev;
1025 	sx_xunlock(&vmmdev_mtx);
1026 	return (0);
1027 }
1028 
1029 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)1030 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1031 {
1032 	char *buf;
1033 	int error, buflen;
1034 
1035 	if (!vmm_initialized)
1036 		return (ENXIO);
1037 
1038 	error = vmm_priv_check(req->td->td_ucred);
1039 	if (error != 0)
1040 		return (error);
1041 
1042 	buflen = VM_MAX_NAMELEN + 1;
1043 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1044 	error = sysctl_handle_string(oidp, buf, buflen, req);
1045 	if (error == 0 && req->newptr != NULL)
1046 		error = vmmdev_create(buf, req->td->td_ucred);
1047 	free(buf, M_VMMDEV);
1048 	return (error);
1049 }
1050 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1051     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1052     NULL, 0, sysctl_vmm_create, "A",
1053     "Create a vmm(4) instance (legacy interface)");
1054 
1055 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)1056 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
1057 {
1058 	int error;
1059 
1060 	error = vmm_priv_check(td->td_ucred);
1061 	if (error != 0)
1062 		return (error);
1063 
1064 	if ((flags & FWRITE) == 0)
1065 		return (EPERM);
1066 
1067 	return (0);
1068 }
1069 
1070 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)1071 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
1072     struct thread *td)
1073 {
1074 	int error;
1075 
1076 	switch (cmd) {
1077 	case VMMCTL_VM_CREATE: {
1078 		struct vmmctl_vm_create *vmc;
1079 
1080 		vmc = (struct vmmctl_vm_create *)data;
1081 		vmc->name[VM_MAX_NAMELEN] = '\0';
1082 		for (size_t i = 0; i < nitems(vmc->reserved); i++) {
1083 			if (vmc->reserved[i] != 0) {
1084 				error = EINVAL;
1085 				return (error);
1086 			}
1087 		}
1088 
1089 		error = vmmdev_create(vmc->name, td->td_ucred);
1090 		break;
1091 	}
1092 	case VMMCTL_VM_DESTROY: {
1093 		struct vmmctl_vm_destroy *vmd;
1094 
1095 		vmd = (struct vmmctl_vm_destroy *)data;
1096 		vmd->name[VM_MAX_NAMELEN] = '\0';
1097 		for (size_t i = 0; i < nitems(vmd->reserved); i++) {
1098 			if (vmd->reserved[i] != 0) {
1099 				error = EINVAL;
1100 				return (error);
1101 			}
1102 		}
1103 
1104 		error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
1105 		break;
1106 	}
1107 	default:
1108 		error = ENOTTY;
1109 		break;
1110 	}
1111 
1112 	return (error);
1113 }
1114 
1115 static struct cdev *vmmctl_cdev;
1116 static struct cdevsw vmmctlsw = {
1117 	.d_name		= "vmmctl",
1118 	.d_version	= D_VERSION,
1119 	.d_open		= vmmctl_open,
1120 	.d_ioctl	= vmmctl_ioctl,
1121 };
1122 
1123 static int
vmmdev_init(void)1124 vmmdev_init(void)
1125 {
1126 	int error;
1127 
1128 	sx_xlock(&vmmdev_mtx);
1129 	error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
1130 	    UID_ROOT, GID_WHEEL, 0600, "vmmctl");
1131 	if (error == 0)
1132 		pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1133 		    "Allow use of vmm in a jail.");
1134 	sx_xunlock(&vmmdev_mtx);
1135 
1136 	return (error);
1137 }
1138 
1139 static int
vmmdev_cleanup(void)1140 vmmdev_cleanup(void)
1141 {
1142 	sx_xlock(&vmmdev_mtx);
1143 	if (!SLIST_EMPTY(&head)) {
1144 		sx_xunlock(&vmmdev_mtx);
1145 		return (EBUSY);
1146 	}
1147 	if (vmmctl_cdev != NULL) {
1148 		destroy_dev(vmmctl_cdev);
1149 		vmmctl_cdev = NULL;
1150 	}
1151 	sx_xunlock(&vmmdev_mtx);
1152 
1153 	return (0);
1154 }
1155 
1156 static int
vmm_handler(module_t mod,int what,void * arg)1157 vmm_handler(module_t mod, int what, void *arg)
1158 {
1159 	int error;
1160 
1161 	switch (what) {
1162 	case MOD_LOAD:
1163 		error = vmmdev_init();
1164 		if (error != 0)
1165 			break;
1166 
1167 		vm_maxcpu = mp_ncpus;
1168 		TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
1169 		if (vm_maxcpu > VM_MAXCPU) {
1170 			printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
1171 			vm_maxcpu = VM_MAXCPU;
1172 		}
1173 		if (vm_maxcpu == 0)
1174 			vm_maxcpu = 1;
1175 
1176 		error = vmm_modinit();
1177 		if (error == 0)
1178 			vmm_initialized = true;
1179 		else {
1180 			error = vmmdev_cleanup();
1181 			KASSERT(error == 0,
1182 			    ("%s: vmmdev_cleanup failed: %d", __func__, error));
1183 		}
1184 		break;
1185 	case MOD_UNLOAD:
1186 		error = vmmdev_cleanup();
1187 		if (error == 0 && vmm_initialized) {
1188 			error = vmm_modcleanup();
1189 			if (error) {
1190 				/*
1191 				 * Something bad happened - prevent new
1192 				 * VMs from being created
1193 				 */
1194 				vmm_initialized = false;
1195 			}
1196 		}
1197 		break;
1198 	default:
1199 		error = 0;
1200 		break;
1201 	}
1202 	return (error);
1203 }
1204 
1205 static moduledata_t vmm_kmod = {
1206 	"vmm",
1207 	vmm_handler,
1208 	NULL
1209 };
1210 
1211 /*
1212  * vmm initialization has the following dependencies:
1213  *
1214  * - Initialization requires smp_rendezvous() and therefore must happen
1215  *   after SMP is fully functional (after SI_SUB_SMP).
1216  * - vmm device initialization requires an initialized devfs.
1217  */
1218 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
1219 MODULE_VERSION(vmm, 1);
1220 
1221 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1222 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1223     struct vm_object **objp, int nprot)
1224 {
1225 	struct devmem_softc *dsc;
1226 	vm_ooffset_t first, last;
1227 	size_t seglen;
1228 	int error;
1229 	bool sysmem;
1230 
1231 	dsc = cdev->si_drv1;
1232 	if (dsc == NULL) {
1233 		/* 'cdev' has been created but is not ready for use */
1234 		return (ENXIO);
1235 	}
1236 
1237 	first = *offset;
1238 	last = *offset + len;
1239 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1240 		return (EINVAL);
1241 
1242 	vm_slock_memsegs(dsc->sc->vm);
1243 
1244 	error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1245 	KASSERT(error == 0 && !sysmem && *objp != NULL,
1246 	    ("%s: invalid devmem segment %d", __func__, dsc->segid));
1247 
1248 	if (seglen >= last)
1249 		vm_object_reference(*objp);
1250 	else
1251 		error = EINVAL;
1252 
1253 	vm_unlock_memsegs(dsc->sc->vm);
1254 	return (error);
1255 }
1256 
1257 static struct cdevsw devmemsw = {
1258 	.d_name		= "devmem",
1259 	.d_version	= D_VERSION,
1260 	.d_mmap_single	= devmem_mmap_single,
1261 };
1262 
1263 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1264 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1265 {
1266 	struct make_dev_args mda;
1267 	struct devmem_softc *dsc;
1268 	int error;
1269 
1270 	sx_xlock(&vmmdev_mtx);
1271 
1272 	dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1273 	dsc->segid = segid;
1274 	dsc->name = devname;
1275 	dsc->sc = sc;
1276 	SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1277 
1278 	make_dev_args_init(&mda);
1279 	mda.mda_devsw = &devmemsw;
1280 	mda.mda_cr = sc->ucred;
1281 	mda.mda_uid = UID_ROOT;
1282 	mda.mda_gid = GID_WHEEL;
1283 	mda.mda_mode = 0600;
1284 	mda.mda_si_drv1 = dsc;
1285 	mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1286 	error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1287 	    devname);
1288 	if (error != 0) {
1289 		SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1290 		free(dsc->name, M_VMMDEV);
1291 		free(dsc, M_VMMDEV);
1292 	}
1293 
1294 	sx_xunlock(&vmmdev_mtx);
1295 
1296 	return (error);
1297 }
1298 
1299 static void
devmem_destroy(void * arg)1300 devmem_destroy(void *arg)
1301 {
1302 	struct devmem_softc *dsc = arg;
1303 
1304 	destroy_dev(dsc->cdev);
1305 	dsc->cdev = NULL;
1306 	dsc->sc = NULL;
1307 }
1308