1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6 * All rights reserved.
7 */
8
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #include <sys/fcntl.h>
12 #include <sys/ioccom.h>
13 #include <sys/jail.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/mman.h>
17 #include <sys/module.h>
18 #include <sys/priv.h>
19 #include <sys/proc.h>
20 #include <sys/queue.h>
21 #include <sys/resourcevar.h>
22 #include <sys/smp.h>
23 #include <sys/sx.h>
24 #include <sys/sysctl.h>
25 #include <sys/ucred.h>
26 #include <sys/uio.h>
27
28 #include <machine/vmm.h>
29
30 #include <vm/vm.h>
31 #include <vm/vm_object.h>
32
33 #include <dev/vmm/vmm_dev.h>
34 #include <dev/vmm/vmm_mem.h>
35 #include <dev/vmm/vmm_stat.h>
36
37 #ifdef __amd64__
38 #ifdef COMPAT_FREEBSD12
39 struct vm_memseg_12 {
40 int segid;
41 size_t len;
42 char name[64];
43 };
44 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
45
46 #define VM_ALLOC_MEMSEG_12 \
47 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
48 #define VM_GET_MEMSEG_12 \
49 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
50 #endif /* COMPAT_FREEBSD12 */
51 #ifdef COMPAT_FREEBSD14
52 struct vm_memseg_14 {
53 int segid;
54 size_t len;
55 char name[VM_MAX_SUFFIXLEN + 1];
56 };
57 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
58 "COMPAT_FREEBSD14 ABI");
59
60 #define VM_ALLOC_MEMSEG_14 \
61 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
62 #define VM_GET_MEMSEG_14 \
63 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
64 #endif /* COMPAT_FREEBSD14 */
65 #endif /* __amd64__ */
66
67 struct devmem_softc {
68 int segid;
69 char *name;
70 struct cdev *cdev;
71 struct vmmdev_softc *sc;
72 SLIST_ENTRY(devmem_softc) link;
73 };
74
75 struct vmmdev_softc {
76 struct vm *vm; /* vm instance cookie */
77 struct cdev *cdev;
78 struct ucred *ucred;
79 SLIST_ENTRY(vmmdev_softc) link;
80 LIST_ENTRY(vmmdev_softc) priv_link;
81 SLIST_HEAD(, devmem_softc) devmem;
82 int flags;
83 };
84
85 struct vmmctl_priv {
86 LIST_HEAD(, vmmdev_softc) softcs;
87 };
88
89 static bool vmm_initialized = false;
90
91 static SLIST_HEAD(, vmmdev_softc) head;
92
93 static unsigned pr_allow_flag;
94 static struct sx vmmdev_mtx;
95 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
96
97 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
98
99 SYSCTL_DECL(_hw_vmm);
100
101 u_int vm_maxcpu;
102 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
103 &vm_maxcpu, 0, "Maximum number of vCPUs");
104
105 u_int vm_maxvmms;
106 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN,
107 &vm_maxvmms, 0, "Maximum number of VMM instances per user");
108
109 static void devmem_destroy(void *arg);
110 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
111 static void vmmdev_destroy(struct vmmdev_softc *sc);
112
113 static int
vmm_priv_check(struct ucred * ucred)114 vmm_priv_check(struct ucred *ucred)
115 {
116 if (jailed(ucred) &&
117 !(ucred->cr_prison->pr_allow & pr_allow_flag))
118 return (EPERM);
119
120 return (0);
121 }
122
123 static int
vcpu_lock_one(struct vcpu * vcpu)124 vcpu_lock_one(struct vcpu *vcpu)
125 {
126 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
127 }
128
129 static void
vcpu_unlock_one(struct vcpu * vcpu)130 vcpu_unlock_one(struct vcpu *vcpu)
131 {
132 enum vcpu_state state;
133
134 state = vcpu_get_state(vcpu, NULL);
135 if (state != VCPU_FROZEN) {
136 panic("vcpu %s(%d) has invalid state %d",
137 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
138 }
139
140 vcpu_set_state(vcpu, VCPU_IDLE, false);
141 }
142
143 #ifndef __amd64__
144 static int
vcpu_set_state_all(struct vm * vm,enum vcpu_state newstate)145 vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
146 {
147 struct vcpu *vcpu;
148 int error;
149 uint16_t i, j, maxcpus;
150
151 error = 0;
152 maxcpus = vm_get_maxcpus(vm);
153 for (i = 0; i < maxcpus; i++) {
154 vcpu = vm_vcpu(vm, i);
155 if (vcpu == NULL)
156 continue;
157 error = vcpu_lock_one(vcpu);
158 if (error)
159 break;
160 }
161
162 if (error) {
163 for (j = 0; j < i; j++) {
164 vcpu = vm_vcpu(vm, j);
165 if (vcpu == NULL)
166 continue;
167 vcpu_unlock_one(vcpu);
168 }
169 }
170
171 return (error);
172 }
173 #endif
174
175 static int
vcpu_lock_all(struct vmmdev_softc * sc)176 vcpu_lock_all(struct vmmdev_softc *sc)
177 {
178 int error;
179
180 /*
181 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked
182 * in a consistent order so we need to serialize to avoid deadlocks.
183 */
184 vm_lock_vcpus(sc->vm);
185 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
186 if (error != 0)
187 vm_unlock_vcpus(sc->vm);
188 return (error);
189 }
190
191 static void
vcpu_unlock_all(struct vmmdev_softc * sc)192 vcpu_unlock_all(struct vmmdev_softc *sc)
193 {
194 struct vcpu *vcpu;
195 uint16_t i, maxcpus;
196
197 maxcpus = vm_get_maxcpus(sc->vm);
198 for (i = 0; i < maxcpus; i++) {
199 vcpu = vm_vcpu(sc->vm, i);
200 if (vcpu == NULL)
201 continue;
202 vcpu_unlock_one(vcpu);
203 }
204 vm_unlock_vcpus(sc->vm);
205 }
206
207 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)208 vmmdev_lookup(const char *name, struct ucred *cred)
209 {
210 struct vmmdev_softc *sc;
211
212 sx_assert(&vmmdev_mtx, SA_XLOCKED);
213
214 SLIST_FOREACH(sc, &head, link) {
215 if (strcmp(name, vm_name(sc->vm)) == 0)
216 break;
217 }
218
219 if (sc == NULL)
220 return (NULL);
221
222 if (cr_cansee(cred, sc->ucred))
223 return (NULL);
224
225 return (sc);
226 }
227
228 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)229 vmmdev_lookup2(struct cdev *cdev)
230 {
231 return (cdev->si_drv1);
232 }
233
234 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)235 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
236 {
237 int error, off, c, prot;
238 vm_paddr_t gpa, maxaddr;
239 void *hpa, *cookie;
240 struct vmmdev_softc *sc;
241
242 sc = vmmdev_lookup2(cdev);
243 if (sc == NULL)
244 return (ENXIO);
245
246 /*
247 * Get a read lock on the guest memory map.
248 */
249 vm_slock_memsegs(sc->vm);
250
251 error = 0;
252 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
253 maxaddr = vmm_sysmem_maxaddr(sc->vm);
254 while (uio->uio_resid > 0 && error == 0) {
255 gpa = uio->uio_offset;
256 off = gpa & PAGE_MASK;
257 c = min(uio->uio_resid, PAGE_SIZE - off);
258
259 /*
260 * The VM has a hole in its physical memory map. If we want to
261 * use 'dd' to inspect memory beyond the hole we need to
262 * provide bogus data for memory that lies in the hole.
263 *
264 * Since this device does not support lseek(2), dd(1) will
265 * read(2) blocks of data to simulate the lseek(2).
266 */
267 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
268 if (hpa == NULL) {
269 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
270 error = uiomove(__DECONST(void *, zero_region),
271 c, uio);
272 else
273 error = EFAULT;
274 } else {
275 error = uiomove(hpa, c, uio);
276 vm_gpa_release(cookie);
277 }
278 }
279 vm_unlock_memsegs(sc->vm);
280 return (error);
281 }
282
283 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
284
285 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)286 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
287 {
288 struct devmem_softc *dsc;
289 int error;
290 bool sysmem;
291
292 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
293 if (error || mseg->len == 0)
294 return (error);
295
296 if (!sysmem) {
297 SLIST_FOREACH(dsc, &sc->devmem, link) {
298 if (dsc->segid == mseg->segid)
299 break;
300 }
301 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
302 __func__, mseg->segid));
303 error = copystr(dsc->name, mseg->name, len, NULL);
304 } else {
305 bzero(mseg->name, len);
306 }
307
308 return (error);
309 }
310
311 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len,struct domainset * domainset)312 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
313 struct domainset *domainset)
314 {
315 char *name;
316 int error;
317 bool sysmem;
318
319 error = 0;
320 name = NULL;
321 sysmem = true;
322
323 /*
324 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
325 * by stripped off when devfs processes the full string.
326 */
327 if (VM_MEMSEG_NAME(mseg)) {
328 sysmem = false;
329 name = malloc(len, M_VMMDEV, M_WAITOK);
330 error = copystr(mseg->name, name, len, NULL);
331 if (error)
332 goto done;
333 }
334 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
335 if (error)
336 goto done;
337
338 if (VM_MEMSEG_NAME(mseg)) {
339 error = devmem_create_cdev(sc, mseg->segid, name);
340 if (error)
341 vm_free_memseg(sc->vm, mseg->segid);
342 else
343 name = NULL; /* freed when 'cdev' is destroyed */
344 }
345 done:
346 free(name, M_VMMDEV);
347 return (error);
348 }
349
350 #if defined(__amd64__) && \
351 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
352 /*
353 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
354 */
355 static void
adjust_segid(struct vm_memseg * mseg)356 adjust_segid(struct vm_memseg *mseg)
357 {
358 if (mseg->segid != VM_SYSMEM) {
359 mseg->segid += (VM_BOOTROM - 1);
360 }
361 }
362 #endif
363
364 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)365 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
366 uint64_t *regval)
367 {
368 int error, i;
369
370 error = 0;
371 for (i = 0; i < count; i++) {
372 error = vm_get_register(vcpu, regnum[i], ®val[i]);
373 if (error)
374 break;
375 }
376 return (error);
377 }
378
379 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)380 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
381 uint64_t *regval)
382 {
383 int error, i;
384
385 error = 0;
386 for (i = 0; i < count; i++) {
387 error = vm_set_register(vcpu, regnum[i], regval[i]);
388 if (error)
389 break;
390 }
391 return (error);
392 }
393
394 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)395 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
396 {
397 int error;
398
399 /*
400 * A jail without vmm access shouldn't be able to access vmm device
401 * files at all, but check here just to be thorough.
402 */
403 error = vmm_priv_check(td->td_ucred);
404 if (error != 0)
405 return (error);
406
407 return (0);
408 }
409
410 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
411 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
412 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
413 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
414 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
415 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
416 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
417 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
418 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
419 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
420 VMMDEV_IOCTL(VM_STAT_DESC, 0),
421
422 #ifdef __amd64__
423 #ifdef COMPAT_FREEBSD12
424 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
425 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
426 #endif
427 #ifdef COMPAT_FREEBSD14
428 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
429 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
430 #endif
431 #endif /* __amd64__ */
432 VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
433 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
434 VMMDEV_IOCTL(VM_MMAP_MEMSEG,
435 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
436 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
437 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
438 VMMDEV_IOCTL(VM_REINIT,
439 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
440
441 #ifdef __amd64__
442 #if defined(COMPAT_FREEBSD12)
443 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
444 #endif
445 #ifdef COMPAT_FREEBSD14
446 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
447 #endif
448 #endif /* __amd64__ */
449 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
450 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
451
452 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
453 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
454
455 VMMDEV_IOCTL(VM_SUSPEND, 0),
456 VMMDEV_IOCTL(VM_GET_CPUS, 0),
457 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
458 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
459 };
460
461 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)462 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
463 struct thread *td)
464 {
465 struct vmmdev_softc *sc;
466 struct vcpu *vcpu;
467 const struct vmmdev_ioctl *ioctl;
468 struct vm_memseg *mseg;
469 int error, vcpuid;
470
471 sc = vmmdev_lookup2(cdev);
472 if (sc == NULL)
473 return (ENXIO);
474
475 ioctl = NULL;
476 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
477 if (vmmdev_ioctls[i].cmd == cmd) {
478 ioctl = &vmmdev_ioctls[i];
479 break;
480 }
481 }
482 if (ioctl == NULL) {
483 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
484 if (vmmdev_machdep_ioctls[i].cmd == cmd) {
485 ioctl = &vmmdev_machdep_ioctls[i];
486 break;
487 }
488 }
489 }
490 if (ioctl == NULL)
491 return (ENOTTY);
492
493 if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) {
494 error = priv_check(td, PRIV_DRIVER);
495 if (error != 0)
496 return (error);
497 }
498
499 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
500 vm_xlock_memsegs(sc->vm);
501 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
502 vm_slock_memsegs(sc->vm);
503
504 vcpu = NULL;
505 vcpuid = -1;
506 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
507 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
508 vcpuid = *(int *)data;
509 if (vcpuid == -1) {
510 if ((ioctl->flags &
511 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
512 error = EINVAL;
513 goto lockfail;
514 }
515 } else {
516 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
517 if (vcpu == NULL) {
518 error = EINVAL;
519 goto lockfail;
520 }
521 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
522 error = vcpu_lock_one(vcpu);
523 if (error)
524 goto lockfail;
525 }
526 }
527 }
528 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
529 error = vcpu_lock_all(sc);
530 if (error)
531 goto lockfail;
532 }
533
534 switch (cmd) {
535 case VM_SUSPEND: {
536 struct vm_suspend *vmsuspend;
537
538 vmsuspend = (struct vm_suspend *)data;
539 error = vm_suspend(sc->vm, vmsuspend->how);
540 break;
541 }
542 case VM_REINIT:
543 error = vm_reinit(sc->vm);
544 break;
545 case VM_STAT_DESC: {
546 struct vm_stat_desc *statdesc;
547
548 statdesc = (struct vm_stat_desc *)data;
549 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
550 sizeof(statdesc->desc));
551 break;
552 }
553 case VM_STATS: {
554 struct vm_stats *vmstats;
555
556 vmstats = (struct vm_stats *)data;
557 getmicrotime(&vmstats->tv);
558 error = vmm_stat_copy(vcpu, vmstats->index,
559 nitems(vmstats->statbuf), &vmstats->num_entries,
560 vmstats->statbuf);
561 break;
562 }
563 case VM_MMAP_GETNEXT: {
564 struct vm_memmap *mm;
565
566 mm = (struct vm_memmap *)data;
567 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
568 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
569 break;
570 }
571 case VM_MMAP_MEMSEG: {
572 struct vm_memmap *mm;
573
574 mm = (struct vm_memmap *)data;
575 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
576 mm->len, mm->prot, mm->flags);
577 break;
578 }
579 case VM_MUNMAP_MEMSEG: {
580 struct vm_munmap *mu;
581
582 mu = (struct vm_munmap *)data;
583 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
584 break;
585 }
586 #ifdef __amd64__
587 #ifdef COMPAT_FREEBSD12
588 case VM_ALLOC_MEMSEG_12:
589 mseg = (struct vm_memseg *)data;
590
591 adjust_segid(mseg);
592 error = alloc_memseg(sc, mseg,
593 sizeof(((struct vm_memseg_12 *)0)->name), NULL);
594 break;
595 case VM_GET_MEMSEG_12:
596 mseg = (struct vm_memseg *)data;
597
598 adjust_segid(mseg);
599 error = get_memseg(sc, mseg,
600 sizeof(((struct vm_memseg_12 *)0)->name));
601 break;
602 #endif /* COMPAT_FREEBSD12 */
603 #ifdef COMPAT_FREEBSD14
604 case VM_ALLOC_MEMSEG_14:
605 mseg = (struct vm_memseg *)data;
606
607 adjust_segid(mseg);
608 error = alloc_memseg(sc, mseg,
609 sizeof(((struct vm_memseg_14 *)0)->name), NULL);
610 break;
611 case VM_GET_MEMSEG_14:
612 mseg = (struct vm_memseg *)data;
613
614 adjust_segid(mseg);
615 error = get_memseg(sc, mseg,
616 sizeof(((struct vm_memseg_14 *)0)->name));
617 break;
618 #endif /* COMPAT_FREEBSD14 */
619 #endif /* __amd64__ */
620 case VM_ALLOC_MEMSEG: {
621 domainset_t *mask;
622 struct domainset *domainset, domain;
623
624 domainset = NULL;
625 mseg = (struct vm_memseg *)data;
626 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
627 if (mseg->ds_mask_size < sizeof(domainset_t) ||
628 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
629 error = ERANGE;
630 break;
631 }
632 memset(&domain, 0, sizeof(domain));
633 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
634 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
635 if (error) {
636 free(mask, M_VMMDEV);
637 break;
638 }
639 error = domainset_populate(&domain, mask, mseg->ds_policy,
640 mseg->ds_mask_size);
641 free(mask, M_VMMDEV);
642 if (error)
643 break;
644 domainset = domainset_create(&domain);
645 if (domainset == NULL) {
646 error = EINVAL;
647 break;
648 }
649 }
650 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
651 break;
652 }
653 case VM_GET_MEMSEG:
654 error = get_memseg(sc, (struct vm_memseg *)data,
655 sizeof(((struct vm_memseg *)0)->name));
656 break;
657 case VM_GET_REGISTER: {
658 struct vm_register *vmreg;
659
660 vmreg = (struct vm_register *)data;
661 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
662 break;
663 }
664 case VM_SET_REGISTER: {
665 struct vm_register *vmreg;
666
667 vmreg = (struct vm_register *)data;
668 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
669 break;
670 }
671 case VM_GET_REGISTER_SET: {
672 struct vm_register_set *vmregset;
673 uint64_t *regvals;
674 int *regnums;
675
676 vmregset = (struct vm_register_set *)data;
677 if (vmregset->count > VM_REG_LAST) {
678 error = EINVAL;
679 break;
680 }
681 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
682 M_VMMDEV, M_WAITOK);
683 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
684 M_VMMDEV, M_WAITOK);
685 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
686 vmregset->count);
687 if (error == 0)
688 error = vm_get_register_set(vcpu,
689 vmregset->count, regnums, regvals);
690 if (error == 0)
691 error = copyout(regvals, vmregset->regvals,
692 sizeof(regvals[0]) * vmregset->count);
693 free(regvals, M_VMMDEV);
694 free(regnums, M_VMMDEV);
695 break;
696 }
697 case VM_SET_REGISTER_SET: {
698 struct vm_register_set *vmregset;
699 uint64_t *regvals;
700 int *regnums;
701
702 vmregset = (struct vm_register_set *)data;
703 if (vmregset->count > VM_REG_LAST) {
704 error = EINVAL;
705 break;
706 }
707 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
708 M_VMMDEV, M_WAITOK);
709 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
710 M_VMMDEV, M_WAITOK);
711 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
712 vmregset->count);
713 if (error == 0)
714 error = copyin(vmregset->regvals, regvals,
715 sizeof(regvals[0]) * vmregset->count);
716 if (error == 0)
717 error = vm_set_register_set(vcpu,
718 vmregset->count, regnums, regvals);
719 free(regvals, M_VMMDEV);
720 free(regnums, M_VMMDEV);
721 break;
722 }
723 case VM_GET_CAPABILITY: {
724 struct vm_capability *vmcap;
725
726 vmcap = (struct vm_capability *)data;
727 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
728 break;
729 }
730 case VM_SET_CAPABILITY: {
731 struct vm_capability *vmcap;
732
733 vmcap = (struct vm_capability *)data;
734 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
735 break;
736 }
737 case VM_ACTIVATE_CPU:
738 error = vm_activate_cpu(vcpu);
739 break;
740 case VM_GET_CPUS: {
741 struct vm_cpuset *vm_cpuset;
742 cpuset_t *cpuset;
743 int size;
744
745 error = 0;
746 vm_cpuset = (struct vm_cpuset *)data;
747 size = vm_cpuset->cpusetsize;
748 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
749 error = ERANGE;
750 break;
751 }
752 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
753 M_WAITOK | M_ZERO);
754 if (vm_cpuset->which == VM_ACTIVE_CPUS)
755 *cpuset = vm_active_cpus(sc->vm);
756 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
757 *cpuset = vm_suspended_cpus(sc->vm);
758 else if (vm_cpuset->which == VM_DEBUG_CPUS)
759 *cpuset = vm_debug_cpus(sc->vm);
760 else
761 error = EINVAL;
762 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
763 error = ERANGE;
764 if (error == 0)
765 error = copyout(cpuset, vm_cpuset->cpus, size);
766 free(cpuset, M_TEMP);
767 break;
768 }
769 case VM_SUSPEND_CPU:
770 error = vm_suspend_cpu(sc->vm, vcpu);
771 break;
772 case VM_RESUME_CPU:
773 error = vm_resume_cpu(sc->vm, vcpu);
774 break;
775 case VM_SET_TOPOLOGY: {
776 struct vm_cpu_topology *topology;
777
778 topology = (struct vm_cpu_topology *)data;
779 error = vm_set_topology(sc->vm, topology->sockets,
780 topology->cores, topology->threads, topology->maxcpus);
781 break;
782 }
783 case VM_GET_TOPOLOGY: {
784 struct vm_cpu_topology *topology;
785
786 topology = (struct vm_cpu_topology *)data;
787 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
788 &topology->threads, &topology->maxcpus);
789 error = 0;
790 break;
791 }
792 default:
793 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
794 td);
795 break;
796 }
797
798 if ((ioctl->flags &
799 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
800 vm_unlock_memsegs(sc->vm);
801 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
802 vcpu_unlock_all(sc);
803 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
804 vcpu_unlock_one(vcpu);
805
806 /*
807 * Make sure that no handler returns a kernel-internal
808 * error value to userspace.
809 */
810 KASSERT(error == ERESTART || error >= 0,
811 ("vmmdev_ioctl: invalid error return %d", error));
812 return (error);
813
814 lockfail:
815 if ((ioctl->flags &
816 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
817 vm_unlock_memsegs(sc->vm);
818 return (error);
819 }
820
821 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)822 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
823 struct vm_object **objp, int nprot)
824 {
825 struct vmmdev_softc *sc;
826 vm_paddr_t gpa;
827 size_t len;
828 vm_ooffset_t segoff, first, last;
829 int error, found, segid;
830 bool sysmem;
831
832 first = *offset;
833 last = first + mapsize;
834 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
835 return (EINVAL);
836
837 sc = vmmdev_lookup2(cdev);
838 if (sc == NULL) {
839 /* virtual machine is in the process of being created */
840 return (EINVAL);
841 }
842
843 /*
844 * Get a read lock on the guest memory map.
845 */
846 vm_slock_memsegs(sc->vm);
847
848 gpa = 0;
849 found = 0;
850 while (!found) {
851 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
852 NULL, NULL);
853 if (error)
854 break;
855
856 if (first >= gpa && last <= gpa + len)
857 found = 1;
858 else
859 gpa += len;
860 }
861
862 if (found) {
863 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
864 KASSERT(error == 0 && *objp != NULL,
865 ("%s: invalid memory segment %d", __func__, segid));
866 if (sysmem) {
867 vm_object_reference(*objp);
868 *offset = segoff + (first - gpa);
869 } else {
870 error = EINVAL;
871 }
872 }
873 vm_unlock_memsegs(sc->vm);
874 return (error);
875 }
876
877 static void
vmmdev_destroy(struct vmmdev_softc * sc)878 vmmdev_destroy(struct vmmdev_softc *sc)
879 {
880 struct devmem_softc *dsc;
881 int error __diagused;
882
883 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
884 KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__));
885
886 /*
887 * Destroy all cdevs:
888 *
889 * - any new operations on the 'cdev' will return an error (ENXIO).
890 *
891 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
892 */
893 SLIST_FOREACH(dsc, &sc->devmem, link) {
894 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
895 devmem_destroy(dsc);
896 }
897
898 vm_disable_vcpu_creation(sc->vm);
899 error = vcpu_lock_all(sc);
900 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
901 vm_unlock_vcpus(sc->vm);
902
903 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
904 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
905 SLIST_REMOVE_HEAD(&sc->devmem, link);
906 free(dsc->name, M_VMMDEV);
907 free(dsc, M_VMMDEV);
908 }
909
910 if (sc->vm != NULL)
911 vm_destroy(sc->vm);
912
913 chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0);
914 crfree(sc->ucred);
915
916 sx_xlock(&vmmdev_mtx);
917 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
918 if ((sc->flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
919 LIST_REMOVE(sc, priv_link);
920 sx_xunlock(&vmmdev_mtx);
921 wakeup(sc);
922 free(sc, M_VMMDEV);
923 }
924
925 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)926 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
927 {
928 struct cdev *cdev;
929 struct vmmdev_softc *sc;
930
931 sx_xlock(&vmmdev_mtx);
932 sc = vmmdev_lookup(name, cred);
933 if (sc == NULL || sc->cdev == NULL) {
934 sx_xunlock(&vmmdev_mtx);
935 return (EINVAL);
936 }
937
938 /*
939 * Setting 'sc->cdev' to NULL is used to indicate that the VM
940 * is scheduled for destruction.
941 */
942 cdev = sc->cdev;
943 sc->cdev = NULL;
944 sx_xunlock(&vmmdev_mtx);
945
946 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
947 destroy_dev(cdev);
948 vmmdev_destroy(sc);
949
950 return (0);
951 }
952
953 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)954 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
955 {
956 char *buf;
957 int error, buflen;
958
959 error = vmm_priv_check(req->td->td_ucred);
960 if (error)
961 return (error);
962
963 buflen = VM_MAX_NAMELEN + 1;
964 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
965 error = sysctl_handle_string(oidp, buf, buflen, req);
966 if (error == 0 && req->newptr != NULL)
967 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
968 free(buf, M_VMMDEV);
969 return (error);
970 }
971 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
972 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
973 NULL, 0, sysctl_vmm_destroy, "A",
974 "Destroy a vmm(4) instance (legacy interface)");
975
976 static struct cdevsw vmmdevsw = {
977 .d_name = "vmmdev",
978 .d_version = D_VERSION,
979 .d_open = vmmdev_open,
980 .d_ioctl = vmmdev_ioctl,
981 .d_mmap_single = vmmdev_mmap_single,
982 .d_read = vmmdev_rw,
983 .d_write = vmmdev_rw,
984 };
985
986 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)987 vmmdev_alloc(struct vm *vm, struct ucred *cred)
988 {
989 struct vmmdev_softc *sc;
990
991 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
992 SLIST_INIT(&sc->devmem);
993 sc->vm = vm;
994 sc->ucred = crhold(cred);
995 return (sc);
996 }
997
998 static int
vmmdev_create(const char * name,uint32_t flags,struct ucred * cred)999 vmmdev_create(const char *name, uint32_t flags, struct ucred *cred)
1000 {
1001 struct make_dev_args mda;
1002 struct cdev *cdev;
1003 struct vmmdev_softc *sc;
1004 struct vmmctl_priv *priv;
1005 struct vm *vm;
1006 int error;
1007
1008 if (name == NULL || strlen(name) > VM_MAX_NAMELEN)
1009 return (EINVAL);
1010
1011 if ((flags & ~VMMCTL_FLAGS_MASK) != 0)
1012 return (EINVAL);
1013 error = devfs_get_cdevpriv((void **)&priv);
1014 if (error)
1015 return (error);
1016
1017 sx_xlock(&vmmdev_mtx);
1018 sc = vmmdev_lookup(name, cred);
1019 if (sc != NULL) {
1020 sx_xunlock(&vmmdev_mtx);
1021 return (EEXIST);
1022 }
1023
1024 error = vm_create(name, &vm);
1025 if (error != 0) {
1026 sx_xunlock(&vmmdev_mtx);
1027 return (error);
1028 }
1029 sc = vmmdev_alloc(vm, cred);
1030 SLIST_INSERT_HEAD(&head, sc, link);
1031 sc->flags = flags;
1032 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
1033 LIST_INSERT_HEAD(&priv->softcs, sc, priv_link);
1034
1035 make_dev_args_init(&mda);
1036 mda.mda_devsw = &vmmdevsw;
1037 mda.mda_cr = sc->ucred;
1038 mda.mda_uid = UID_ROOT;
1039 mda.mda_gid = GID_WHEEL;
1040 mda.mda_mode = 0600;
1041 mda.mda_si_drv1 = sc;
1042 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1043 error = make_dev_s(&mda, &cdev, "vmm/%s", name);
1044 if (error != 0) {
1045 sx_xunlock(&vmmdev_mtx);
1046 vmmdev_destroy(sc);
1047 return (error);
1048 }
1049 if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) {
1050 sx_xunlock(&vmmdev_mtx);
1051 destroy_dev(cdev);
1052 vmmdev_destroy(sc);
1053 return (ENOMEM);
1054 }
1055 sc->cdev = cdev;
1056 sx_xunlock(&vmmdev_mtx);
1057 return (0);
1058 }
1059
1060 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)1061 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1062 {
1063 char *buf;
1064 int error, buflen;
1065
1066 if (!vmm_initialized)
1067 return (ENXIO);
1068
1069 error = vmm_priv_check(req->td->td_ucred);
1070 if (error != 0)
1071 return (error);
1072
1073 buflen = VM_MAX_NAMELEN + 1;
1074 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1075 error = sysctl_handle_string(oidp, buf, buflen, req);
1076 if (error == 0 && req->newptr != NULL)
1077 error = vmmdev_create(buf, 0, req->td->td_ucred);
1078 free(buf, M_VMMDEV);
1079 return (error);
1080 }
1081 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1082 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1083 NULL, 0, sysctl_vmm_create, "A",
1084 "Create a vmm(4) instance (legacy interface)");
1085
1086 static void
vmmctl_dtor(void * arg)1087 vmmctl_dtor(void *arg)
1088 {
1089 struct cdev *sc_cdev;
1090 struct vmmdev_softc *sc;
1091 struct vmmctl_priv *priv = arg;
1092
1093 /*
1094 * Scan the softc list for any VMs associated with
1095 * the current descriptor and destroy them.
1096 */
1097 sx_xlock(&vmmdev_mtx);
1098 while (!LIST_EMPTY(&priv->softcs)) {
1099 sc = LIST_FIRST(&priv->softcs);
1100 sc_cdev = sc->cdev;
1101 if (sc_cdev != NULL) {
1102 sc->cdev = NULL;
1103 } else {
1104 /*
1105 * Another thread has already
1106 * started the removal process.
1107 * Sleep until 'vmmdev_destroy' notifies us
1108 * that the removal has finished.
1109 */
1110 sx_sleep(sc, &vmmdev_mtx, 0, "vmmctl_dtor", 0);
1111 continue;
1112 }
1113 /*
1114 * Temporarily drop the lock to allow vmmdev_destroy to run.
1115 */
1116 sx_xunlock(&vmmdev_mtx);
1117 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
1118 destroy_dev(sc_cdev);
1119 /* vmmdev_destroy will unlink the 'priv_link' entry. */
1120 vmmdev_destroy(sc);
1121 sx_xlock(&vmmdev_mtx);
1122 }
1123 sx_xunlock(&vmmdev_mtx);
1124
1125 free(priv, M_VMMDEV);
1126 }
1127
1128 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)1129 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
1130 {
1131 int error;
1132 struct vmmctl_priv *priv;
1133
1134 error = vmm_priv_check(td->td_ucred);
1135 if (error != 0)
1136 return (error);
1137
1138 if ((flags & FWRITE) == 0)
1139 return (EPERM);
1140
1141 priv = malloc(sizeof(*priv), M_VMMDEV, M_WAITOK | M_ZERO);
1142 LIST_INIT(&priv->softcs);
1143 error = devfs_set_cdevpriv(priv, vmmctl_dtor);
1144 if (error != 0) {
1145 free(priv, M_VMMDEV);
1146 return (error);
1147 }
1148
1149 return (0);
1150 }
1151
1152 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)1153 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
1154 struct thread *td)
1155 {
1156 int error;
1157
1158 switch (cmd) {
1159 case VMMCTL_VM_CREATE: {
1160 struct vmmctl_vm_create *vmc;
1161
1162 vmc = (struct vmmctl_vm_create *)data;
1163 vmc->name[VM_MAX_NAMELEN] = '\0';
1164 for (size_t i = 0; i < nitems(vmc->reserved); i++) {
1165 if (vmc->reserved[i] != 0) {
1166 error = EINVAL;
1167 return (error);
1168 }
1169 }
1170
1171 error = vmmdev_create(vmc->name, vmc->flags, td->td_ucred);
1172 break;
1173 }
1174 case VMMCTL_VM_DESTROY: {
1175 struct vmmctl_vm_destroy *vmd;
1176
1177 vmd = (struct vmmctl_vm_destroy *)data;
1178 vmd->name[VM_MAX_NAMELEN] = '\0';
1179 for (size_t i = 0; i < nitems(vmd->reserved); i++) {
1180 if (vmd->reserved[i] != 0) {
1181 error = EINVAL;
1182 return (error);
1183 }
1184 }
1185
1186 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
1187 break;
1188 }
1189 default:
1190 error = ENOTTY;
1191 break;
1192 }
1193
1194 return (error);
1195 }
1196
1197 static struct cdev *vmmctl_cdev;
1198 static struct cdevsw vmmctlsw = {
1199 .d_name = "vmmctl",
1200 .d_version = D_VERSION,
1201 .d_open = vmmctl_open,
1202 .d_ioctl = vmmctl_ioctl,
1203 };
1204
1205 static int
vmmdev_init(void)1206 vmmdev_init(void)
1207 {
1208 int error;
1209
1210 sx_xlock(&vmmdev_mtx);
1211 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
1212 UID_ROOT, GID_WHEEL, 0600, "vmmctl");
1213 if (error == 0)
1214 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1215 "Allow use of vmm in a jail.");
1216 sx_xunlock(&vmmdev_mtx);
1217
1218 return (error);
1219 }
1220
1221 static int
vmmdev_cleanup(void)1222 vmmdev_cleanup(void)
1223 {
1224 sx_xlock(&vmmdev_mtx);
1225 if (!SLIST_EMPTY(&head)) {
1226 sx_xunlock(&vmmdev_mtx);
1227 return (EBUSY);
1228 }
1229 if (vmmctl_cdev != NULL) {
1230 destroy_dev(vmmctl_cdev);
1231 vmmctl_cdev = NULL;
1232 }
1233 sx_xunlock(&vmmdev_mtx);
1234
1235 return (0);
1236 }
1237
1238 static int
vmm_handler(module_t mod,int what,void * arg)1239 vmm_handler(module_t mod, int what, void *arg)
1240 {
1241 int error;
1242
1243 switch (what) {
1244 case MOD_LOAD:
1245 error = vmmdev_init();
1246 if (error != 0)
1247 break;
1248
1249 vm_maxcpu = mp_ncpus;
1250 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
1251 if (vm_maxcpu > VM_MAXCPU) {
1252 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
1253 vm_maxcpu = VM_MAXCPU;
1254 }
1255 if (vm_maxcpu == 0)
1256 vm_maxcpu = 1;
1257 vm_maxvmms = 4 * mp_ncpus;
1258 error = vmm_modinit();
1259 if (error == 0)
1260 vmm_initialized = true;
1261 else {
1262 error = vmmdev_cleanup();
1263 KASSERT(error == 0,
1264 ("%s: vmmdev_cleanup failed: %d", __func__, error));
1265 }
1266 break;
1267 case MOD_UNLOAD:
1268 error = vmmdev_cleanup();
1269 if (error == 0 && vmm_initialized) {
1270 error = vmm_modcleanup();
1271 if (error) {
1272 /*
1273 * Something bad happened - prevent new
1274 * VMs from being created
1275 */
1276 vmm_initialized = false;
1277 }
1278 }
1279 break;
1280 default:
1281 error = 0;
1282 break;
1283 }
1284 return (error);
1285 }
1286
1287 static moduledata_t vmm_kmod = {
1288 "vmm",
1289 vmm_handler,
1290 NULL
1291 };
1292
1293 /*
1294 * vmm initialization has the following dependencies:
1295 *
1296 * - Initialization requires smp_rendezvous() and therefore must happen
1297 * after SMP is fully functional (after SI_SUB_SMP).
1298 * - vmm device initialization requires an initialized devfs.
1299 */
1300 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
1301 MODULE_VERSION(vmm, 1);
1302
1303 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1304 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1305 struct vm_object **objp, int nprot)
1306 {
1307 struct devmem_softc *dsc;
1308 vm_ooffset_t first, last;
1309 size_t seglen;
1310 int error;
1311 bool sysmem;
1312
1313 dsc = cdev->si_drv1;
1314 if (dsc == NULL) {
1315 /* 'cdev' has been created but is not ready for use */
1316 return (ENXIO);
1317 }
1318
1319 first = *offset;
1320 last = *offset + len;
1321 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1322 return (EINVAL);
1323
1324 vm_slock_memsegs(dsc->sc->vm);
1325
1326 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1327 KASSERT(error == 0 && !sysmem && *objp != NULL,
1328 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1329
1330 if (seglen >= last)
1331 vm_object_reference(*objp);
1332 else
1333 error = EINVAL;
1334
1335 vm_unlock_memsegs(dsc->sc->vm);
1336 return (error);
1337 }
1338
1339 static struct cdevsw devmemsw = {
1340 .d_name = "devmem",
1341 .d_version = D_VERSION,
1342 .d_mmap_single = devmem_mmap_single,
1343 };
1344
1345 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1346 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1347 {
1348 struct make_dev_args mda;
1349 struct devmem_softc *dsc;
1350 int error;
1351
1352 sx_xlock(&vmmdev_mtx);
1353
1354 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1355 dsc->segid = segid;
1356 dsc->name = devname;
1357 dsc->sc = sc;
1358 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1359
1360 make_dev_args_init(&mda);
1361 mda.mda_devsw = &devmemsw;
1362 mda.mda_cr = sc->ucred;
1363 mda.mda_uid = UID_ROOT;
1364 mda.mda_gid = GID_WHEEL;
1365 mda.mda_mode = 0600;
1366 mda.mda_si_drv1 = dsc;
1367 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1368 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1369 devname);
1370 if (error != 0) {
1371 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1372 free(dsc->name, M_VMMDEV);
1373 free(dsc, M_VMMDEV);
1374 }
1375
1376 sx_xunlock(&vmmdev_mtx);
1377
1378 return (error);
1379 }
1380
1381 static void
devmem_destroy(void * arg)1382 devmem_destroy(void *arg)
1383 {
1384 struct devmem_softc *dsc = arg;
1385
1386 destroy_dev(dsc->cdev);
1387 dsc->cdev = NULL;
1388 dsc->sc = NULL;
1389 }
1390