1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6 * All rights reserved.
7 */
8
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #define EXTERR_CATEGORY EXTERR_CAT_VMM
12 #include <sys/exterrvar.h>
13 #include <sys/fcntl.h>
14 #include <sys/ioccom.h>
15 #include <sys/jail.h>
16 #include <sys/kernel.h>
17 #include <sys/malloc.h>
18 #include <sys/mman.h>
19 #include <sys/module.h>
20 #include <sys/priv.h>
21 #include <sys/proc.h>
22 #include <sys/queue.h>
23 #include <sys/resourcevar.h>
24 #include <sys/smp.h>
25 #include <sys/sx.h>
26 #include <sys/sysctl.h>
27 #include <sys/ucred.h>
28 #include <sys/uio.h>
29
30 #include <machine/vmm.h>
31
32 #include <vm/vm.h>
33 #include <vm/vm_object.h>
34
35 #include <dev/vmm/vmm_dev.h>
36 #include <dev/vmm/vmm_mem.h>
37 #include <dev/vmm/vmm_stat.h>
38 #include <dev/vmm/vmm_vm.h>
39
40 #ifdef __amd64__
41 #ifdef COMPAT_FREEBSD12
42 struct vm_memseg_12 {
43 int segid;
44 size_t len;
45 char name[64];
46 };
47 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
48
49 #define VM_ALLOC_MEMSEG_12 \
50 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
51 #define VM_GET_MEMSEG_12 \
52 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
53 #endif /* COMPAT_FREEBSD12 */
54 #ifdef COMPAT_FREEBSD14
55 struct vm_memseg_14 {
56 int segid;
57 size_t len;
58 char name[VM_MAX_SUFFIXLEN + 1];
59 };
60 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
61 "COMPAT_FREEBSD14 ABI");
62
63 #define VM_ALLOC_MEMSEG_14 \
64 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
65 #define VM_GET_MEMSEG_14 \
66 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
67 #endif /* COMPAT_FREEBSD14 */
68 #endif /* __amd64__ */
69
70 struct devmem_softc {
71 int segid;
72 char *name;
73 struct cdev *cdev;
74 struct vmmdev_softc *sc;
75 SLIST_ENTRY(devmem_softc) link;
76 };
77
78 struct vmmdev_softc {
79 struct vm *vm; /* vm instance cookie */
80 struct cdev *cdev;
81 struct ucred *ucred;
82 SLIST_ENTRY(vmmdev_softc) link;
83 LIST_ENTRY(vmmdev_softc) priv_link;
84 SLIST_HEAD(, devmem_softc) devmem;
85 int flags;
86 };
87
88 struct vmmctl_priv {
89 LIST_HEAD(, vmmdev_softc) softcs;
90 };
91
92 static bool vmm_initialized = false;
93
94 static SLIST_HEAD(, vmmdev_softc) head;
95
96 static unsigned int pr_allow_vmm_flag, pr_allow_vmm_ppt_flag;
97 static struct sx vmmdev_mtx;
98 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
99
100 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
101
102 SYSCTL_DECL(_hw_vmm);
103
104 u_int vm_maxcpu;
105 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
106 &vm_maxcpu, 0, "Maximum number of vCPUs");
107
108 u_int vm_maxvmms;
109 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN,
110 &vm_maxvmms, 0, "Maximum number of VMM instances per user");
111
112 static void devmem_destroy(void *arg);
113 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
114 static void vmmdev_destroy(struct vmmdev_softc *sc);
115
116 static int
vmm_jail_priv_check(struct ucred * ucred)117 vmm_jail_priv_check(struct ucred *ucred)
118 {
119 if (jailed(ucred) &&
120 (ucred->cr_prison->pr_allow & pr_allow_vmm_flag) == 0)
121 return (EPERM);
122
123 return (0);
124 }
125
126 static int
vcpu_lock_one(struct vcpu * vcpu)127 vcpu_lock_one(struct vcpu *vcpu)
128 {
129 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
130 }
131
132 static void
vcpu_unlock_one(struct vcpu * vcpu)133 vcpu_unlock_one(struct vcpu *vcpu)
134 {
135 enum vcpu_state state;
136
137 state = vcpu_get_state(vcpu, NULL);
138 if (state != VCPU_FROZEN) {
139 panic("vcpu %s(%d) has invalid state %d",
140 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
141 }
142
143 vcpu_set_state(vcpu, VCPU_IDLE, false);
144 }
145
146 static int
vcpu_lock_all(struct vmmdev_softc * sc)147 vcpu_lock_all(struct vmmdev_softc *sc)
148 {
149 int error;
150
151 /*
152 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked
153 * in a consistent order so we need to serialize to avoid deadlocks.
154 */
155 vm_lock_vcpus(sc->vm);
156 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
157 if (error != 0)
158 vm_unlock_vcpus(sc->vm);
159 return (error);
160 }
161
162 static void
vcpu_unlock_all(struct vmmdev_softc * sc)163 vcpu_unlock_all(struct vmmdev_softc *sc)
164 {
165 struct vcpu *vcpu;
166 uint16_t i, maxcpus;
167
168 maxcpus = vm_get_maxcpus(sc->vm);
169 for (i = 0; i < maxcpus; i++) {
170 vcpu = vm_vcpu(sc->vm, i);
171 if (vcpu == NULL)
172 continue;
173 vcpu_unlock_one(vcpu);
174 }
175 vm_unlock_vcpus(sc->vm);
176 }
177
178 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)179 vmmdev_lookup(const char *name, struct ucred *cred)
180 {
181 struct vmmdev_softc *sc;
182
183 sx_assert(&vmmdev_mtx, SA_XLOCKED);
184
185 SLIST_FOREACH(sc, &head, link) {
186 if (strcmp(name, vm_name(sc->vm)) == 0)
187 break;
188 }
189
190 if (sc == NULL)
191 return (NULL);
192
193 if (cr_cansee(cred, sc->ucred))
194 return (NULL);
195
196 return (sc);
197 }
198
199 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)200 vmmdev_lookup2(struct cdev *cdev)
201 {
202 return (cdev->si_drv1);
203 }
204
205 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)206 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
207 {
208 int error, off, c, prot;
209 vm_paddr_t gpa, maxaddr;
210 void *hpa, *cookie;
211 struct vmmdev_softc *sc;
212
213 sc = vmmdev_lookup2(cdev);
214 if (sc == NULL)
215 return (ENXIO);
216
217 /*
218 * Get a read lock on the guest memory map.
219 */
220 vm_slock_memsegs(sc->vm);
221
222 error = 0;
223 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
224 maxaddr = vmm_sysmem_maxaddr(sc->vm);
225 while (uio->uio_resid > 0 && error == 0) {
226 gpa = uio->uio_offset;
227 off = gpa & PAGE_MASK;
228 c = min(uio->uio_resid, PAGE_SIZE - off);
229
230 /*
231 * The VM has a hole in its physical memory map. If we want to
232 * use 'dd' to inspect memory beyond the hole we need to
233 * provide bogus data for memory that lies in the hole.
234 *
235 * Since this device does not support lseek(2), dd(1) will
236 * read(2) blocks of data to simulate the lseek(2).
237 */
238 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
239 if (hpa == NULL) {
240 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
241 error = uiomove(__DECONST(void *, zero_region),
242 c, uio);
243 else
244 error = EFAULT;
245 } else {
246 error = uiomove(hpa, c, uio);
247 vm_gpa_release(cookie);
248 }
249 }
250 vm_unlock_memsegs(sc->vm);
251 return (error);
252 }
253
254 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
255
256 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)257 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
258 {
259 struct devmem_softc *dsc;
260 int error;
261 bool sysmem;
262
263 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
264 if (error || mseg->len == 0)
265 return (error);
266
267 if (!sysmem) {
268 SLIST_FOREACH(dsc, &sc->devmem, link) {
269 if (dsc->segid == mseg->segid)
270 break;
271 }
272 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
273 __func__, mseg->segid));
274 error = copystr(dsc->name, mseg->name, len, NULL);
275 } else {
276 bzero(mseg->name, len);
277 }
278
279 return (error);
280 }
281
282 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len,struct domainset * domainset)283 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
284 struct domainset *domainset)
285 {
286 char *name;
287 int error;
288 bool sysmem;
289
290 error = 0;
291 name = NULL;
292 sysmem = true;
293
294 /*
295 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
296 * by stripped off when devfs processes the full string.
297 */
298 if (VM_MEMSEG_NAME(mseg)) {
299 sysmem = false;
300 name = malloc(len, M_VMMDEV, M_WAITOK);
301 error = copystr(mseg->name, name, len, NULL);
302 if (error)
303 goto done;
304 }
305 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
306 if (error)
307 goto done;
308
309 if (VM_MEMSEG_NAME(mseg)) {
310 error = devmem_create_cdev(sc, mseg->segid, name);
311 if (error)
312 vm_free_memseg(sc->vm, mseg->segid);
313 else
314 name = NULL; /* freed when 'cdev' is destroyed */
315 }
316 done:
317 free(name, M_VMMDEV);
318 return (error);
319 }
320
321 #if defined(__amd64__) && \
322 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
323 /*
324 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
325 */
326 static void
adjust_segid(struct vm_memseg * mseg)327 adjust_segid(struct vm_memseg *mseg)
328 {
329 if (mseg->segid != VM_SYSMEM) {
330 mseg->segid += (VM_BOOTROM - 1);
331 }
332 }
333 #endif
334
335 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)336 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
337 uint64_t *regval)
338 {
339 int error, i;
340
341 error = 0;
342 for (i = 0; i < count; i++) {
343 error = vm_get_register(vcpu, regnum[i], ®val[i]);
344 if (error)
345 break;
346 }
347 return (error);
348 }
349
350 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)351 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
352 uint64_t *regval)
353 {
354 int error, i;
355
356 error = 0;
357 for (i = 0; i < count; i++) {
358 error = vm_set_register(vcpu, regnum[i], regval[i]);
359 if (error)
360 break;
361 }
362 return (error);
363 }
364
365 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)366 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
367 {
368 int error;
369
370 /*
371 * A jail without vmm access shouldn't be able to access vmm device
372 * files at all, but check here just to be thorough.
373 */
374 error = vmm_jail_priv_check(td->td_ucred);
375 if (error != 0)
376 return (error);
377
378 return (0);
379 }
380
381 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
382 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
383 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
384 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
385 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
386 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
387 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
388 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
389 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
390 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
391 VMMDEV_IOCTL(VM_STAT_DESC, 0),
392
393 #ifdef __amd64__
394 #ifdef COMPAT_FREEBSD12
395 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
396 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
397 #endif
398 #ifdef COMPAT_FREEBSD14
399 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
400 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
401 #endif
402 #endif /* __amd64__ */
403 VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
404 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
405 VMMDEV_IOCTL(VM_MMAP_MEMSEG,
406 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
407 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
408 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
409 VMMDEV_IOCTL(VM_REINIT,
410 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
411
412 #ifdef __amd64__
413 #if defined(COMPAT_FREEBSD12)
414 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
415 #endif
416 #ifdef COMPAT_FREEBSD14
417 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
418 #endif
419 #endif /* __amd64__ */
420 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
421 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
422
423 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
424 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
425
426 VMMDEV_IOCTL(VM_SUSPEND, 0),
427 VMMDEV_IOCTL(VM_GET_CPUS, 0),
428 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
429 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
430 };
431
432 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)433 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
434 struct thread *td)
435 {
436 struct vmmdev_softc *sc;
437 struct vcpu *vcpu;
438 const struct vmmdev_ioctl *ioctl;
439 struct vm_memseg *mseg;
440 int error, vcpuid;
441
442 sc = vmmdev_lookup2(cdev);
443 if (sc == NULL)
444 return (ENXIO);
445
446 ioctl = NULL;
447 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
448 if (vmmdev_ioctls[i].cmd == cmd) {
449 ioctl = &vmmdev_ioctls[i];
450 break;
451 }
452 }
453 if (ioctl == NULL) {
454 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
455 if (vmmdev_machdep_ioctls[i].cmd == cmd) {
456 ioctl = &vmmdev_machdep_ioctls[i];
457 break;
458 }
459 }
460 }
461 if (ioctl == NULL)
462 return (ENOTTY);
463
464 if ((ioctl->flags & VMMDEV_IOCTL_PPT) != 0) {
465 if (jailed(td->td_ucred) && (td->td_ucred->cr_prison->pr_allow &
466 pr_allow_vmm_ppt_flag) == 0)
467 return (EPERM);
468 error = priv_check(td, PRIV_VMM_PPTDEV);
469 if (error != 0)
470 return (error);
471 }
472
473 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
474 vm_xlock_memsegs(sc->vm);
475 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
476 vm_slock_memsegs(sc->vm);
477
478 vcpu = NULL;
479 vcpuid = -1;
480 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
481 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
482 vcpuid = *(int *)data;
483 if (vcpuid == -1) {
484 if ((ioctl->flags &
485 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
486 error = EINVAL;
487 goto lockfail;
488 }
489 } else {
490 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
491 if (vcpu == NULL) {
492 error = EINVAL;
493 goto lockfail;
494 }
495 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
496 error = vcpu_lock_one(vcpu);
497 if (error)
498 goto lockfail;
499 }
500 }
501 }
502 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
503 error = vcpu_lock_all(sc);
504 if (error)
505 goto lockfail;
506 }
507
508 switch (cmd) {
509 case VM_SUSPEND: {
510 struct vm_suspend *vmsuspend;
511
512 vmsuspend = (struct vm_suspend *)data;
513 error = vm_suspend(sc->vm, vmsuspend->how);
514 break;
515 }
516 case VM_REINIT:
517 error = vm_reinit(sc->vm);
518 break;
519 case VM_STAT_DESC: {
520 struct vm_stat_desc *statdesc;
521
522 statdesc = (struct vm_stat_desc *)data;
523 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
524 sizeof(statdesc->desc));
525 break;
526 }
527 case VM_STATS: {
528 struct vm_stats *vmstats;
529
530 vmstats = (struct vm_stats *)data;
531 getmicrotime(&vmstats->tv);
532 error = vmm_stat_copy(vcpu, vmstats->index,
533 nitems(vmstats->statbuf), &vmstats->num_entries,
534 vmstats->statbuf);
535 break;
536 }
537 case VM_MMAP_GETNEXT: {
538 struct vm_memmap *mm;
539
540 mm = (struct vm_memmap *)data;
541 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
542 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
543 break;
544 }
545 case VM_MMAP_MEMSEG: {
546 struct vm_memmap *mm;
547
548 mm = (struct vm_memmap *)data;
549 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
550 mm->len, mm->prot, mm->flags);
551 break;
552 }
553 case VM_MUNMAP_MEMSEG: {
554 struct vm_munmap *mu;
555
556 mu = (struct vm_munmap *)data;
557 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
558 break;
559 }
560 #ifdef __amd64__
561 #ifdef COMPAT_FREEBSD12
562 case VM_ALLOC_MEMSEG_12:
563 mseg = (struct vm_memseg *)data;
564
565 adjust_segid(mseg);
566 error = alloc_memseg(sc, mseg,
567 sizeof(((struct vm_memseg_12 *)0)->name), NULL);
568 break;
569 case VM_GET_MEMSEG_12:
570 mseg = (struct vm_memseg *)data;
571
572 adjust_segid(mseg);
573 error = get_memseg(sc, mseg,
574 sizeof(((struct vm_memseg_12 *)0)->name));
575 break;
576 #endif /* COMPAT_FREEBSD12 */
577 #ifdef COMPAT_FREEBSD14
578 case VM_ALLOC_MEMSEG_14:
579 mseg = (struct vm_memseg *)data;
580
581 adjust_segid(mseg);
582 error = alloc_memseg(sc, mseg,
583 sizeof(((struct vm_memseg_14 *)0)->name), NULL);
584 break;
585 case VM_GET_MEMSEG_14:
586 mseg = (struct vm_memseg *)data;
587
588 adjust_segid(mseg);
589 error = get_memseg(sc, mseg,
590 sizeof(((struct vm_memseg_14 *)0)->name));
591 break;
592 #endif /* COMPAT_FREEBSD14 */
593 #endif /* __amd64__ */
594 case VM_ALLOC_MEMSEG: {
595 domainset_t *mask;
596 struct domainset *domainset, domain;
597
598 domainset = NULL;
599 mseg = (struct vm_memseg *)data;
600 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
601 if (mseg->ds_mask_size < sizeof(domainset_t) ||
602 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
603 error = ERANGE;
604 break;
605 }
606 memset(&domain, 0, sizeof(domain));
607 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
608 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
609 if (error) {
610 free(mask, M_VMMDEV);
611 break;
612 }
613 error = domainset_populate(&domain, mask, mseg->ds_policy,
614 mseg->ds_mask_size);
615 free(mask, M_VMMDEV);
616 if (error)
617 break;
618 domainset = domainset_create(&domain);
619 if (domainset == NULL) {
620 error = EINVAL;
621 break;
622 }
623 }
624 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
625 break;
626 }
627 case VM_GET_MEMSEG:
628 error = get_memseg(sc, (struct vm_memseg *)data,
629 sizeof(((struct vm_memseg *)0)->name));
630 break;
631 case VM_GET_REGISTER: {
632 struct vm_register *vmreg;
633
634 vmreg = (struct vm_register *)data;
635 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
636 break;
637 }
638 case VM_SET_REGISTER: {
639 struct vm_register *vmreg;
640
641 vmreg = (struct vm_register *)data;
642 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
643 break;
644 }
645 case VM_GET_REGISTER_SET: {
646 struct vm_register_set *vmregset;
647 uint64_t *regvals;
648 int *regnums;
649
650 vmregset = (struct vm_register_set *)data;
651 if (vmregset->count > VM_REG_LAST) {
652 error = EINVAL;
653 break;
654 }
655 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
656 M_VMMDEV, M_WAITOK);
657 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
658 M_VMMDEV, M_WAITOK);
659 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
660 vmregset->count);
661 if (error == 0)
662 error = vm_get_register_set(vcpu,
663 vmregset->count, regnums, regvals);
664 if (error == 0)
665 error = copyout(regvals, vmregset->regvals,
666 sizeof(regvals[0]) * vmregset->count);
667 free(regvals, M_VMMDEV);
668 free(regnums, M_VMMDEV);
669 break;
670 }
671 case VM_SET_REGISTER_SET: {
672 struct vm_register_set *vmregset;
673 uint64_t *regvals;
674 int *regnums;
675
676 vmregset = (struct vm_register_set *)data;
677 if (vmregset->count > VM_REG_LAST) {
678 error = EINVAL;
679 break;
680 }
681 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
682 M_VMMDEV, M_WAITOK);
683 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
684 M_VMMDEV, M_WAITOK);
685 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
686 vmregset->count);
687 if (error == 0)
688 error = copyin(vmregset->regvals, regvals,
689 sizeof(regvals[0]) * vmregset->count);
690 if (error == 0)
691 error = vm_set_register_set(vcpu,
692 vmregset->count, regnums, regvals);
693 free(regvals, M_VMMDEV);
694 free(regnums, M_VMMDEV);
695 break;
696 }
697 case VM_GET_CAPABILITY: {
698 struct vm_capability *vmcap;
699
700 vmcap = (struct vm_capability *)data;
701 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
702 break;
703 }
704 case VM_SET_CAPABILITY: {
705 struct vm_capability *vmcap;
706
707 vmcap = (struct vm_capability *)data;
708 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
709 break;
710 }
711 case VM_ACTIVATE_CPU:
712 error = vm_activate_cpu(vcpu);
713 break;
714 case VM_GET_CPUS: {
715 struct vm_cpuset *vm_cpuset;
716 cpuset_t *cpuset;
717 int size;
718
719 error = 0;
720 vm_cpuset = (struct vm_cpuset *)data;
721 size = vm_cpuset->cpusetsize;
722 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
723 error = ERANGE;
724 break;
725 }
726 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
727 M_WAITOK | M_ZERO);
728 if (vm_cpuset->which == VM_ACTIVE_CPUS)
729 *cpuset = vm_active_cpus(sc->vm);
730 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
731 *cpuset = vm_suspended_cpus(sc->vm);
732 else if (vm_cpuset->which == VM_DEBUG_CPUS)
733 *cpuset = vm_debug_cpus(sc->vm);
734 else
735 error = EINVAL;
736 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
737 error = ERANGE;
738 if (error == 0)
739 error = copyout(cpuset, vm_cpuset->cpus, size);
740 free(cpuset, M_TEMP);
741 break;
742 }
743 case VM_SUSPEND_CPU:
744 error = vm_suspend_cpu(sc->vm, vcpu);
745 break;
746 case VM_RESUME_CPU:
747 error = vm_resume_cpu(sc->vm, vcpu);
748 break;
749 case VM_SET_TOPOLOGY: {
750 struct vm_cpu_topology *topology;
751
752 topology = (struct vm_cpu_topology *)data;
753 error = vm_set_topology(sc->vm, topology->sockets,
754 topology->cores, topology->threads, topology->maxcpus);
755 break;
756 }
757 case VM_GET_TOPOLOGY: {
758 struct vm_cpu_topology *topology;
759
760 topology = (struct vm_cpu_topology *)data;
761 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
762 &topology->threads, &topology->maxcpus);
763 error = 0;
764 break;
765 }
766 default:
767 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
768 td);
769 break;
770 }
771
772 if ((ioctl->flags &
773 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
774 vm_unlock_memsegs(sc->vm);
775 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
776 vcpu_unlock_all(sc);
777 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
778 vcpu_unlock_one(vcpu);
779
780 /*
781 * Make sure that no handler returns a kernel-internal
782 * error value to userspace.
783 */
784 KASSERT(error == ERESTART || error >= 0,
785 ("vmmdev_ioctl: invalid error return %d", error));
786 return (error);
787
788 lockfail:
789 if ((ioctl->flags &
790 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
791 vm_unlock_memsegs(sc->vm);
792 return (error);
793 }
794
795 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)796 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
797 struct vm_object **objp, int nprot)
798 {
799 struct vmmdev_softc *sc;
800 vm_paddr_t gpa;
801 size_t len;
802 vm_ooffset_t segoff, first, last;
803 int error, found, segid;
804 bool sysmem;
805
806 first = *offset;
807 last = first + mapsize;
808 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
809 return (EINVAL);
810
811 sc = vmmdev_lookup2(cdev);
812 if (sc == NULL) {
813 /* virtual machine is in the process of being created */
814 return (EINVAL);
815 }
816
817 /*
818 * Get a read lock on the guest memory map.
819 */
820 vm_slock_memsegs(sc->vm);
821
822 gpa = 0;
823 found = 0;
824 while (!found) {
825 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
826 NULL, NULL);
827 if (error)
828 break;
829
830 if (first >= gpa && last <= gpa + len)
831 found = 1;
832 else
833 gpa += len;
834 }
835
836 if (found) {
837 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
838 KASSERT(error == 0 && *objp != NULL,
839 ("%s: invalid memory segment %d", __func__, segid));
840 if (sysmem) {
841 vm_object_reference(*objp);
842 *offset = segoff + (first - gpa);
843 } else {
844 error = EINVAL;
845 }
846 }
847 vm_unlock_memsegs(sc->vm);
848 return (error);
849 }
850
851 static void
vmmdev_destroy(struct vmmdev_softc * sc)852 vmmdev_destroy(struct vmmdev_softc *sc)
853 {
854 struct devmem_softc *dsc;
855 int error __diagused;
856
857 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
858 KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__));
859
860 /*
861 * Destroy all cdevs:
862 *
863 * - any new operations on the 'cdev' will return an error (ENXIO).
864 *
865 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
866 */
867 SLIST_FOREACH(dsc, &sc->devmem, link) {
868 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
869 devmem_destroy(dsc);
870 }
871
872 vm_disable_vcpu_creation(sc->vm);
873 error = vcpu_lock_all(sc);
874 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
875 vm_unlock_vcpus(sc->vm);
876
877 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
878 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
879 SLIST_REMOVE_HEAD(&sc->devmem, link);
880 free(dsc->name, M_VMMDEV);
881 free(dsc, M_VMMDEV);
882 }
883
884 vm_destroy(sc->vm);
885
886 chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0);
887 crfree(sc->ucred);
888
889 sx_xlock(&vmmdev_mtx);
890 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
891 if ((sc->flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
892 LIST_REMOVE(sc, priv_link);
893 sx_xunlock(&vmmdev_mtx);
894 wakeup(sc);
895 free(sc, M_VMMDEV);
896 }
897
898 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)899 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
900 {
901 struct cdev *cdev;
902 struct vmmdev_softc *sc;
903 int error;
904
905 sx_xlock(&vmmdev_mtx);
906 sc = vmmdev_lookup(name, cred);
907 if (sc == NULL || sc->cdev == NULL) {
908 sx_xunlock(&vmmdev_mtx);
909 return (EINVAL);
910 }
911
912 /*
913 * Only the creator of a VM or a privileged user can destroy it.
914 */
915 if ((cred->cr_uid != sc->ucred->cr_uid ||
916 cred->cr_prison != sc->ucred->cr_prison) &&
917 (error = priv_check_cred(cred, PRIV_VMM_DESTROY)) != 0) {
918 sx_xunlock(&vmmdev_mtx);
919 return (error);
920 }
921
922 /*
923 * Setting 'sc->cdev' to NULL is used to indicate that the VM
924 * is scheduled for destruction.
925 */
926 cdev = sc->cdev;
927 sc->cdev = NULL;
928 sx_xunlock(&vmmdev_mtx);
929
930 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
931 destroy_dev(cdev);
932 vmmdev_destroy(sc);
933
934 return (0);
935 }
936
937 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)938 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
939 {
940 char *buf;
941 int error, buflen;
942
943 error = vmm_jail_priv_check(req->td->td_ucred);
944 if (error)
945 return (error);
946
947 buflen = VM_MAX_NAMELEN + 1;
948 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
949 error = sysctl_handle_string(oidp, buf, buflen, req);
950 if (error == 0 && req->newptr != NULL)
951 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
952 free(buf, M_VMMDEV);
953 return (error);
954 }
955 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
956 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
957 NULL, 0, sysctl_vmm_destroy, "A",
958 "Destroy a vmm(4) instance (legacy interface)");
959
960 static struct cdevsw vmmdevsw = {
961 .d_name = "vmmdev",
962 .d_version = D_VERSION,
963 .d_open = vmmdev_open,
964 .d_ioctl = vmmdev_ioctl,
965 .d_mmap_single = vmmdev_mmap_single,
966 .d_read = vmmdev_rw,
967 .d_write = vmmdev_rw,
968 };
969
970 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)971 vmmdev_alloc(struct vm *vm, struct ucred *cred)
972 {
973 struct vmmdev_softc *sc;
974
975 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
976 SLIST_INIT(&sc->devmem);
977 sc->vm = vm;
978 sc->ucred = crhold(cred);
979 return (sc);
980 }
981
982 static int
vmmdev_create(const char * name,uint32_t flags,struct ucred * cred)983 vmmdev_create(const char *name, uint32_t flags, struct ucred *cred)
984 {
985 struct make_dev_args mda;
986 struct cdev *cdev;
987 struct vmmdev_softc *sc;
988 struct vmmctl_priv *priv;
989 struct vm *vm;
990 int error;
991
992 if (name == NULL || strlen(name) > VM_MAX_NAMELEN)
993 return (EINVAL);
994
995 if ((flags & ~VMMCTL_FLAGS_MASK) != 0)
996 return (EINVAL);
997 error = devfs_get_cdevpriv((void **)&priv);
998 if (error)
999 return (error);
1000
1001 sx_xlock(&vmmdev_mtx);
1002 sc = vmmdev_lookup(name, cred);
1003 if (sc != NULL) {
1004 sx_xunlock(&vmmdev_mtx);
1005 return (EEXIST);
1006 }
1007
1008 /*
1009 * Unprivileged users can only create VMs that will be automatically
1010 * destroyed when the creating descriptor is closed.
1011 */
1012 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) == 0 &&
1013 (error = priv_check_cred(cred, PRIV_VMM_CREATE)) != 0) {
1014 sx_xunlock(&vmmdev_mtx);
1015 return (EXTERROR(error,
1016 "An unprivileged user must run VMs in monitor mode"));
1017 }
1018
1019 if ((error = vmm_jail_priv_check(cred)) != 0) {
1020 sx_xunlock(&vmmdev_mtx);
1021 return (EXTERROR(error,
1022 "VMs cannot be created in the current jail"));
1023 }
1024
1025 if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) {
1026 sx_xunlock(&vmmdev_mtx);
1027 return (ENOMEM);
1028 }
1029
1030 error = vm_create(name, &vm);
1031 if (error != 0) {
1032 sx_xunlock(&vmmdev_mtx);
1033 (void)chgvmmcnt(cred->cr_ruidinfo, -1, 0);
1034 return (error);
1035 }
1036 sc = vmmdev_alloc(vm, cred);
1037 SLIST_INSERT_HEAD(&head, sc, link);
1038 sc->flags = flags;
1039 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
1040 LIST_INSERT_HEAD(&priv->softcs, sc, priv_link);
1041
1042 make_dev_args_init(&mda);
1043 mda.mda_devsw = &vmmdevsw;
1044 mda.mda_cr = sc->ucred;
1045 mda.mda_uid = cred->cr_uid;
1046 mda.mda_gid = GID_VMM;
1047 mda.mda_mode = 0600;
1048 mda.mda_si_drv1 = sc;
1049 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1050 error = make_dev_s(&mda, &cdev, "vmm/%s", name);
1051 if (error != 0) {
1052 sx_xunlock(&vmmdev_mtx);
1053 vmmdev_destroy(sc);
1054 return (error);
1055 }
1056 sc->cdev = cdev;
1057 sx_xunlock(&vmmdev_mtx);
1058 return (0);
1059 }
1060
1061 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)1062 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1063 {
1064 char *buf;
1065 int error, buflen;
1066
1067 if (!vmm_initialized)
1068 return (ENXIO);
1069
1070 error = vmm_jail_priv_check(req->td->td_ucred);
1071 if (error != 0)
1072 return (error);
1073
1074 buflen = VM_MAX_NAMELEN + 1;
1075 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1076 error = sysctl_handle_string(oidp, buf, buflen, req);
1077 if (error == 0 && req->newptr != NULL)
1078 error = vmmdev_create(buf, 0, req->td->td_ucred);
1079 free(buf, M_VMMDEV);
1080 return (error);
1081 }
1082 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1083 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1084 NULL, 0, sysctl_vmm_create, "A",
1085 "Create a vmm(4) instance (legacy interface)");
1086
1087 static void
vmmctl_dtor(void * arg)1088 vmmctl_dtor(void *arg)
1089 {
1090 struct cdev *sc_cdev;
1091 struct vmmdev_softc *sc;
1092 struct vmmctl_priv *priv = arg;
1093
1094 /*
1095 * Scan the softc list for any VMs associated with
1096 * the current descriptor and destroy them.
1097 */
1098 sx_xlock(&vmmdev_mtx);
1099 while (!LIST_EMPTY(&priv->softcs)) {
1100 sc = LIST_FIRST(&priv->softcs);
1101 sc_cdev = sc->cdev;
1102 if (sc_cdev != NULL) {
1103 sc->cdev = NULL;
1104 } else {
1105 /*
1106 * Another thread has already
1107 * started the removal process.
1108 * Sleep until 'vmmdev_destroy' notifies us
1109 * that the removal has finished.
1110 */
1111 sx_sleep(sc, &vmmdev_mtx, 0, "vmmctl_dtor", 0);
1112 continue;
1113 }
1114 /*
1115 * Temporarily drop the lock to allow vmmdev_destroy to run.
1116 */
1117 sx_xunlock(&vmmdev_mtx);
1118 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
1119 destroy_dev(sc_cdev);
1120 /* vmmdev_destroy will unlink the 'priv_link' entry. */
1121 vmmdev_destroy(sc);
1122 sx_xlock(&vmmdev_mtx);
1123 }
1124 sx_xunlock(&vmmdev_mtx);
1125
1126 free(priv, M_VMMDEV);
1127 }
1128
1129 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)1130 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
1131 {
1132 int error;
1133 struct vmmctl_priv *priv;
1134
1135 error = vmm_jail_priv_check(td->td_ucred);
1136 if (error != 0)
1137 return (error);
1138
1139 if ((flags & FWRITE) == 0)
1140 return (EPERM);
1141
1142 priv = malloc(sizeof(*priv), M_VMMDEV, M_WAITOK | M_ZERO);
1143 LIST_INIT(&priv->softcs);
1144 error = devfs_set_cdevpriv(priv, vmmctl_dtor);
1145 if (error != 0) {
1146 free(priv, M_VMMDEV);
1147 return (error);
1148 }
1149
1150 return (0);
1151 }
1152
1153 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)1154 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
1155 struct thread *td)
1156 {
1157 int error;
1158
1159 switch (cmd) {
1160 case VMMCTL_VM_CREATE: {
1161 struct vmmctl_vm_create *vmc;
1162
1163 vmc = (struct vmmctl_vm_create *)data;
1164 vmc->name[VM_MAX_NAMELEN] = '\0';
1165 for (size_t i = 0; i < nitems(vmc->reserved); i++) {
1166 if (vmc->reserved[i] != 0) {
1167 error = EINVAL;
1168 return (error);
1169 }
1170 }
1171
1172 error = vmmdev_create(vmc->name, vmc->flags, td->td_ucred);
1173 break;
1174 }
1175 case VMMCTL_VM_DESTROY: {
1176 struct vmmctl_vm_destroy *vmd;
1177
1178 vmd = (struct vmmctl_vm_destroy *)data;
1179 vmd->name[VM_MAX_NAMELEN] = '\0';
1180 for (size_t i = 0; i < nitems(vmd->reserved); i++) {
1181 if (vmd->reserved[i] != 0) {
1182 error = EINVAL;
1183 return (error);
1184 }
1185 }
1186
1187 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
1188 break;
1189 }
1190 default:
1191 error = ENOTTY;
1192 break;
1193 }
1194
1195 return (error);
1196 }
1197
1198 static struct cdev *vmmctl_cdev;
1199 static struct cdevsw vmmctlsw = {
1200 .d_name = "vmmctl",
1201 .d_version = D_VERSION,
1202 .d_open = vmmctl_open,
1203 .d_ioctl = vmmctl_ioctl,
1204 };
1205
1206 static int
vmmdev_init(void)1207 vmmdev_init(void)
1208 {
1209 int error;
1210
1211 sx_xlock(&vmmdev_mtx);
1212 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
1213 UID_ROOT, GID_VMM, 0660, "vmmctl");
1214 if (error == 0) {
1215 pr_allow_vmm_flag = prison_add_allow(NULL, "vmm", NULL,
1216 "Allow use of vmm in a jail");
1217 pr_allow_vmm_ppt_flag = prison_add_allow(NULL, "vmm_ppt", NULL,
1218 "Allow use of vmm with ppt devices in a jail");
1219 }
1220 sx_xunlock(&vmmdev_mtx);
1221
1222 return (error);
1223 }
1224
1225 static int
vmmdev_cleanup(void)1226 vmmdev_cleanup(void)
1227 {
1228 sx_xlock(&vmmdev_mtx);
1229 if (!SLIST_EMPTY(&head)) {
1230 sx_xunlock(&vmmdev_mtx);
1231 return (EBUSY);
1232 }
1233 if (vmmctl_cdev != NULL) {
1234 destroy_dev(vmmctl_cdev);
1235 vmmctl_cdev = NULL;
1236 }
1237 sx_xunlock(&vmmdev_mtx);
1238
1239 return (0);
1240 }
1241
1242 static int
vmm_handler(module_t mod,int what,void * arg)1243 vmm_handler(module_t mod, int what, void *arg)
1244 {
1245 int error;
1246
1247 switch (what) {
1248 case MOD_LOAD:
1249 error = vmmdev_init();
1250 if (error != 0)
1251 break;
1252
1253 vm_maxcpu = mp_ncpus;
1254 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
1255 if (vm_maxcpu > VM_MAXCPU) {
1256 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
1257 vm_maxcpu = VM_MAXCPU;
1258 }
1259 if (vm_maxcpu == 0)
1260 vm_maxcpu = 1;
1261 vm_maxvmms = 4 * mp_ncpus;
1262 error = vmm_modinit();
1263 if (error == 0)
1264 vmm_initialized = true;
1265 else {
1266 int error1 __diagused;
1267
1268 error1 = vmmdev_cleanup();
1269 KASSERT(error1 == 0,
1270 ("%s: vmmdev_cleanup failed: %d", __func__, error1));
1271 }
1272 break;
1273 case MOD_UNLOAD:
1274 error = vmmdev_cleanup();
1275 if (error == 0 && vmm_initialized) {
1276 error = vmm_modcleanup();
1277 if (error) {
1278 /*
1279 * Something bad happened - prevent new
1280 * VMs from being created
1281 */
1282 vmm_initialized = false;
1283 }
1284 }
1285 break;
1286 default:
1287 error = 0;
1288 break;
1289 }
1290 return (error);
1291 }
1292
1293 static moduledata_t vmm_kmod = {
1294 "vmm",
1295 vmm_handler,
1296 NULL
1297 };
1298
1299 /*
1300 * vmm initialization has the following dependencies:
1301 *
1302 * - Initialization requires smp_rendezvous() and therefore must happen
1303 * after SMP is fully functional (after SI_SUB_SMP).
1304 * - vmm device initialization requires an initialized devfs.
1305 */
1306 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
1307 MODULE_VERSION(vmm, 1);
1308
1309 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1310 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1311 struct vm_object **objp, int nprot)
1312 {
1313 struct devmem_softc *dsc;
1314 vm_ooffset_t first, last;
1315 size_t seglen;
1316 int error;
1317 bool sysmem;
1318
1319 dsc = cdev->si_drv1;
1320 if (dsc == NULL) {
1321 /* 'cdev' has been created but is not ready for use */
1322 return (ENXIO);
1323 }
1324
1325 first = *offset;
1326 last = *offset + len;
1327 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1328 return (EINVAL);
1329
1330 vm_slock_memsegs(dsc->sc->vm);
1331
1332 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1333 KASSERT(error == 0 && !sysmem && *objp != NULL,
1334 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1335
1336 if (seglen >= last)
1337 vm_object_reference(*objp);
1338 else
1339 error = EINVAL;
1340
1341 vm_unlock_memsegs(dsc->sc->vm);
1342 return (error);
1343 }
1344
1345 static struct cdevsw devmemsw = {
1346 .d_name = "devmem",
1347 .d_version = D_VERSION,
1348 .d_mmap_single = devmem_mmap_single,
1349 };
1350
1351 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1352 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1353 {
1354 struct make_dev_args mda;
1355 struct devmem_softc *dsc;
1356 int error;
1357
1358 sx_xlock(&vmmdev_mtx);
1359
1360 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1361 dsc->segid = segid;
1362 dsc->name = devname;
1363 dsc->sc = sc;
1364 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1365
1366 make_dev_args_init(&mda);
1367 mda.mda_devsw = &devmemsw;
1368 mda.mda_cr = sc->ucred;
1369 mda.mda_uid = sc->ucred->cr_uid;
1370 mda.mda_gid = GID_VMM;
1371 mda.mda_mode = 0600;
1372 mda.mda_si_drv1 = dsc;
1373 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1374 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1375 devname);
1376 if (error != 0) {
1377 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1378 free(dsc->name, M_VMMDEV);
1379 free(dsc, M_VMMDEV);
1380 }
1381
1382 sx_xunlock(&vmmdev_mtx);
1383
1384 return (error);
1385 }
1386
1387 static void
devmem_destroy(void * arg)1388 devmem_destroy(void *arg)
1389 {
1390 struct devmem_softc *dsc = arg;
1391
1392 destroy_dev(dsc->cdev);
1393 dsc->cdev = NULL;
1394 dsc->sc = NULL;
1395 }
1396