1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6 * All rights reserved.
7 */
8
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #include <sys/fcntl.h>
12 #include <sys/ioccom.h>
13 #include <sys/jail.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/mman.h>
17 #include <sys/priv.h>
18 #include <sys/proc.h>
19 #include <sys/queue.h>
20 #include <sys/sx.h>
21 #include <sys/sysctl.h>
22 #include <sys/ucred.h>
23 #include <sys/uio.h>
24
25 #include <machine/vmm.h>
26
27 #include <vm/vm.h>
28 #include <vm/vm_object.h>
29
30 #include <dev/vmm/vmm_dev.h>
31 #include <dev/vmm/vmm_mem.h>
32 #include <dev/vmm/vmm_stat.h>
33
34 #ifdef __amd64__
35 #ifdef COMPAT_FREEBSD12
36 struct vm_memseg_12 {
37 int segid;
38 size_t len;
39 char name[64];
40 };
41 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
42
43 #define VM_ALLOC_MEMSEG_12 \
44 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
45 #define VM_GET_MEMSEG_12 \
46 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
47 #endif /* COMPAT_FREEBSD12 */
48 #ifdef COMPAT_FREEBSD14
49 struct vm_memseg_14 {
50 int segid;
51 size_t len;
52 char name[VM_MAX_SUFFIXLEN + 1];
53 };
54 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
55 "COMPAT_FREEBSD14 ABI");
56
57 #define VM_ALLOC_MEMSEG_14 \
58 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
59 #define VM_GET_MEMSEG_14 \
60 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
61 #endif /* COMPAT_FREEBSD14 */
62 #endif /* __amd64__ */
63
64 struct devmem_softc {
65 int segid;
66 char *name;
67 struct cdev *cdev;
68 struct vmmdev_softc *sc;
69 SLIST_ENTRY(devmem_softc) link;
70 };
71
72 struct vmmdev_softc {
73 struct vm *vm; /* vm instance cookie */
74 struct cdev *cdev;
75 struct ucred *ucred;
76 SLIST_ENTRY(vmmdev_softc) link;
77 SLIST_HEAD(, devmem_softc) devmem;
78 int flags;
79 };
80
81 static SLIST_HEAD(, vmmdev_softc) head;
82
83 static unsigned pr_allow_flag;
84 static struct sx vmmdev_mtx;
85 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
86
87 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
88
89 SYSCTL_DECL(_hw_vmm);
90
91 static void devmem_destroy(void *arg);
92 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
93
94 static int
vmm_priv_check(struct ucred * ucred)95 vmm_priv_check(struct ucred *ucred)
96 {
97 if (jailed(ucred) &&
98 !(ucred->cr_prison->pr_allow & pr_allow_flag))
99 return (EPERM);
100
101 return (0);
102 }
103
104 static int
vcpu_lock_one(struct vcpu * vcpu)105 vcpu_lock_one(struct vcpu *vcpu)
106 {
107 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
108 }
109
110 static void
vcpu_unlock_one(struct vcpu * vcpu)111 vcpu_unlock_one(struct vcpu *vcpu)
112 {
113 enum vcpu_state state;
114
115 state = vcpu_get_state(vcpu, NULL);
116 if (state != VCPU_FROZEN) {
117 panic("vcpu %s(%d) has invalid state %d",
118 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
119 }
120
121 vcpu_set_state(vcpu, VCPU_IDLE, false);
122 }
123
124 #ifndef __amd64__
125 static int
vcpu_set_state_all(struct vm * vm,enum vcpu_state newstate)126 vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
127 {
128 struct vcpu *vcpu;
129 int error;
130 uint16_t i, j, maxcpus;
131
132 error = 0;
133 maxcpus = vm_get_maxcpus(vm);
134 for (i = 0; i < maxcpus; i++) {
135 vcpu = vm_vcpu(vm, i);
136 if (vcpu == NULL)
137 continue;
138 error = vcpu_lock_one(vcpu);
139 if (error)
140 break;
141 }
142
143 if (error) {
144 for (j = 0; j < i; j++) {
145 vcpu = vm_vcpu(vm, j);
146 if (vcpu == NULL)
147 continue;
148 vcpu_unlock_one(vcpu);
149 }
150 }
151
152 return (error);
153 }
154 #endif
155
156 static int
vcpu_lock_all(struct vmmdev_softc * sc)157 vcpu_lock_all(struct vmmdev_softc *sc)
158 {
159 int error;
160
161 /*
162 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked
163 * in a consistent order so we need to serialize to avoid deadlocks.
164 */
165 vm_lock_vcpus(sc->vm);
166 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
167 if (error != 0)
168 vm_unlock_vcpus(sc->vm);
169 return (error);
170 }
171
172 static void
vcpu_unlock_all(struct vmmdev_softc * sc)173 vcpu_unlock_all(struct vmmdev_softc *sc)
174 {
175 struct vcpu *vcpu;
176 uint16_t i, maxcpus;
177
178 maxcpus = vm_get_maxcpus(sc->vm);
179 for (i = 0; i < maxcpus; i++) {
180 vcpu = vm_vcpu(sc->vm, i);
181 if (vcpu == NULL)
182 continue;
183 vcpu_unlock_one(vcpu);
184 }
185 vm_unlock_vcpus(sc->vm);
186 }
187
188 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)189 vmmdev_lookup(const char *name, struct ucred *cred)
190 {
191 struct vmmdev_softc *sc;
192
193 sx_assert(&vmmdev_mtx, SA_XLOCKED);
194
195 SLIST_FOREACH(sc, &head, link) {
196 if (strcmp(name, vm_name(sc->vm)) == 0)
197 break;
198 }
199
200 if (sc == NULL)
201 return (NULL);
202
203 if (cr_cansee(cred, sc->ucred))
204 return (NULL);
205
206 return (sc);
207 }
208
209 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)210 vmmdev_lookup2(struct cdev *cdev)
211 {
212 return (cdev->si_drv1);
213 }
214
215 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)216 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
217 {
218 int error, off, c, prot;
219 vm_paddr_t gpa, maxaddr;
220 void *hpa, *cookie;
221 struct vmmdev_softc *sc;
222
223 sc = vmmdev_lookup2(cdev);
224 if (sc == NULL)
225 return (ENXIO);
226
227 /*
228 * Get a read lock on the guest memory map.
229 */
230 vm_slock_memsegs(sc->vm);
231
232 error = 0;
233 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
234 maxaddr = vmm_sysmem_maxaddr(sc->vm);
235 while (uio->uio_resid > 0 && error == 0) {
236 gpa = uio->uio_offset;
237 off = gpa & PAGE_MASK;
238 c = min(uio->uio_resid, PAGE_SIZE - off);
239
240 /*
241 * The VM has a hole in its physical memory map. If we want to
242 * use 'dd' to inspect memory beyond the hole we need to
243 * provide bogus data for memory that lies in the hole.
244 *
245 * Since this device does not support lseek(2), dd(1) will
246 * read(2) blocks of data to simulate the lseek(2).
247 */
248 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
249 if (hpa == NULL) {
250 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
251 error = uiomove(__DECONST(void *, zero_region),
252 c, uio);
253 else
254 error = EFAULT;
255 } else {
256 error = uiomove(hpa, c, uio);
257 vm_gpa_release(cookie);
258 }
259 }
260 vm_unlock_memsegs(sc->vm);
261 return (error);
262 }
263
264 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
265
266 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)267 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
268 {
269 struct devmem_softc *dsc;
270 int error;
271 bool sysmem;
272
273 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
274 if (error || mseg->len == 0)
275 return (error);
276
277 if (!sysmem) {
278 SLIST_FOREACH(dsc, &sc->devmem, link) {
279 if (dsc->segid == mseg->segid)
280 break;
281 }
282 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
283 __func__, mseg->segid));
284 error = copystr(dsc->name, mseg->name, len, NULL);
285 } else {
286 bzero(mseg->name, len);
287 }
288
289 return (error);
290 }
291
292 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len,struct domainset * domainset)293 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
294 struct domainset *domainset)
295 {
296 char *name;
297 int error;
298 bool sysmem;
299
300 error = 0;
301 name = NULL;
302 sysmem = true;
303
304 /*
305 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
306 * by stripped off when devfs processes the full string.
307 */
308 if (VM_MEMSEG_NAME(mseg)) {
309 sysmem = false;
310 name = malloc(len, M_VMMDEV, M_WAITOK);
311 error = copystr(mseg->name, name, len, NULL);
312 if (error)
313 goto done;
314 }
315 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
316 if (error)
317 goto done;
318
319 if (VM_MEMSEG_NAME(mseg)) {
320 error = devmem_create_cdev(sc, mseg->segid, name);
321 if (error)
322 vm_free_memseg(sc->vm, mseg->segid);
323 else
324 name = NULL; /* freed when 'cdev' is destroyed */
325 }
326 done:
327 free(name, M_VMMDEV);
328 return (error);
329 }
330
331 #if defined(__amd64__) && \
332 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
333 /*
334 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
335 */
336 static void
adjust_segid(struct vm_memseg * mseg)337 adjust_segid(struct vm_memseg *mseg)
338 {
339 if (mseg->segid != VM_SYSMEM) {
340 mseg->segid += (VM_BOOTROM - 1);
341 }
342 }
343 #endif
344
345 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)346 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
347 uint64_t *regval)
348 {
349 int error, i;
350
351 error = 0;
352 for (i = 0; i < count; i++) {
353 error = vm_get_register(vcpu, regnum[i], ®val[i]);
354 if (error)
355 break;
356 }
357 return (error);
358 }
359
360 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)361 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
362 uint64_t *regval)
363 {
364 int error, i;
365
366 error = 0;
367 for (i = 0; i < count; i++) {
368 error = vm_set_register(vcpu, regnum[i], regval[i]);
369 if (error)
370 break;
371 }
372 return (error);
373 }
374
375 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)376 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
377 {
378 int error;
379
380 /*
381 * A jail without vmm access shouldn't be able to access vmm device
382 * files at all, but check here just to be thorough.
383 */
384 error = vmm_priv_check(td->td_ucred);
385 if (error != 0)
386 return (error);
387
388 return (0);
389 }
390
391 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
392 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
393 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
394 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
395 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
396 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
397 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
398 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
399 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
400 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
401 VMMDEV_IOCTL(VM_STAT_DESC, 0),
402
403 #ifdef __amd64__
404 #ifdef COMPAT_FREEBSD12
405 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
406 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
407 #endif
408 #ifdef COMPAT_FREEBSD14
409 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
410 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
411 #endif
412 #endif /* __amd64__ */
413 VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
414 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
415 VMMDEV_IOCTL(VM_MMAP_MEMSEG,
416 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
417 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
418 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
419 VMMDEV_IOCTL(VM_REINIT,
420 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
421
422 #ifdef __amd64__
423 #if defined(COMPAT_FREEBSD12)
424 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
425 #endif
426 #ifdef COMPAT_FREEBSD14
427 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
428 #endif
429 #endif /* __amd64__ */
430 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
431 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
432
433 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
434 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
435
436 VMMDEV_IOCTL(VM_SUSPEND, 0),
437 VMMDEV_IOCTL(VM_GET_CPUS, 0),
438 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
439 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
440 };
441
442 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)443 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
444 struct thread *td)
445 {
446 struct vmmdev_softc *sc;
447 struct vcpu *vcpu;
448 const struct vmmdev_ioctl *ioctl;
449 struct vm_memseg *mseg;
450 int error, vcpuid;
451
452 sc = vmmdev_lookup2(cdev);
453 if (sc == NULL)
454 return (ENXIO);
455
456 ioctl = NULL;
457 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
458 if (vmmdev_ioctls[i].cmd == cmd) {
459 ioctl = &vmmdev_ioctls[i];
460 break;
461 }
462 }
463 if (ioctl == NULL) {
464 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
465 if (vmmdev_machdep_ioctls[i].cmd == cmd) {
466 ioctl = &vmmdev_machdep_ioctls[i];
467 break;
468 }
469 }
470 }
471 if (ioctl == NULL)
472 return (ENOTTY);
473
474 if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) {
475 error = priv_check(td, PRIV_DRIVER);
476 if (error != 0)
477 return (error);
478 }
479
480 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
481 vm_xlock_memsegs(sc->vm);
482 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
483 vm_slock_memsegs(sc->vm);
484
485 vcpu = NULL;
486 vcpuid = -1;
487 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
488 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
489 vcpuid = *(int *)data;
490 if (vcpuid == -1) {
491 if ((ioctl->flags &
492 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
493 error = EINVAL;
494 goto lockfail;
495 }
496 } else {
497 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
498 if (vcpu == NULL) {
499 error = EINVAL;
500 goto lockfail;
501 }
502 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
503 error = vcpu_lock_one(vcpu);
504 if (error)
505 goto lockfail;
506 }
507 }
508 }
509 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
510 error = vcpu_lock_all(sc);
511 if (error)
512 goto lockfail;
513 }
514
515 switch (cmd) {
516 case VM_SUSPEND: {
517 struct vm_suspend *vmsuspend;
518
519 vmsuspend = (struct vm_suspend *)data;
520 error = vm_suspend(sc->vm, vmsuspend->how);
521 break;
522 }
523 case VM_REINIT:
524 error = vm_reinit(sc->vm);
525 break;
526 case VM_STAT_DESC: {
527 struct vm_stat_desc *statdesc;
528
529 statdesc = (struct vm_stat_desc *)data;
530 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
531 sizeof(statdesc->desc));
532 break;
533 }
534 case VM_STATS: {
535 struct vm_stats *vmstats;
536
537 vmstats = (struct vm_stats *)data;
538 getmicrotime(&vmstats->tv);
539 error = vmm_stat_copy(vcpu, vmstats->index,
540 nitems(vmstats->statbuf), &vmstats->num_entries,
541 vmstats->statbuf);
542 break;
543 }
544 case VM_MMAP_GETNEXT: {
545 struct vm_memmap *mm;
546
547 mm = (struct vm_memmap *)data;
548 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
549 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
550 break;
551 }
552 case VM_MMAP_MEMSEG: {
553 struct vm_memmap *mm;
554
555 mm = (struct vm_memmap *)data;
556 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
557 mm->len, mm->prot, mm->flags);
558 break;
559 }
560 case VM_MUNMAP_MEMSEG: {
561 struct vm_munmap *mu;
562
563 mu = (struct vm_munmap *)data;
564 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
565 break;
566 }
567 #ifdef __amd64__
568 #ifdef COMPAT_FREEBSD12
569 case VM_ALLOC_MEMSEG_12:
570 mseg = (struct vm_memseg *)data;
571
572 adjust_segid(mseg);
573 error = alloc_memseg(sc, mseg,
574 sizeof(((struct vm_memseg_12 *)0)->name), NULL);
575 break;
576 case VM_GET_MEMSEG_12:
577 mseg = (struct vm_memseg *)data;
578
579 adjust_segid(mseg);
580 error = get_memseg(sc, mseg,
581 sizeof(((struct vm_memseg_12 *)0)->name));
582 break;
583 #endif /* COMPAT_FREEBSD12 */
584 #ifdef COMPAT_FREEBSD14
585 case VM_ALLOC_MEMSEG_14:
586 mseg = (struct vm_memseg *)data;
587
588 adjust_segid(mseg);
589 error = alloc_memseg(sc, mseg,
590 sizeof(((struct vm_memseg_14 *)0)->name), NULL);
591 break;
592 case VM_GET_MEMSEG_14:
593 mseg = (struct vm_memseg *)data;
594
595 adjust_segid(mseg);
596 error = get_memseg(sc, mseg,
597 sizeof(((struct vm_memseg_14 *)0)->name));
598 break;
599 #endif /* COMPAT_FREEBSD14 */
600 #endif /* __amd64__ */
601 case VM_ALLOC_MEMSEG: {
602 domainset_t *mask;
603 struct domainset *domainset, domain;
604
605 domainset = NULL;
606 mseg = (struct vm_memseg *)data;
607 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
608 if (mseg->ds_mask_size < sizeof(domainset_t) ||
609 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
610 error = ERANGE;
611 break;
612 }
613 memset(&domain, 0, sizeof(domain));
614 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
615 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
616 if (error) {
617 free(mask, M_VMMDEV);
618 break;
619 }
620 error = domainset_populate(&domain, mask, mseg->ds_policy,
621 mseg->ds_mask_size);
622 if (error) {
623 free(mask, M_VMMDEV);
624 break;
625 }
626 domainset = domainset_create(&domain);
627 if (domainset == NULL) {
628 error = EINVAL;
629 free(mask, M_VMMDEV);
630 break;
631 }
632 free(mask, M_VMMDEV);
633 }
634 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
635
636 break;
637 }
638 case VM_GET_MEMSEG:
639 error = get_memseg(sc, (struct vm_memseg *)data,
640 sizeof(((struct vm_memseg *)0)->name));
641 break;
642 case VM_GET_REGISTER: {
643 struct vm_register *vmreg;
644
645 vmreg = (struct vm_register *)data;
646 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
647 break;
648 }
649 case VM_SET_REGISTER: {
650 struct vm_register *vmreg;
651
652 vmreg = (struct vm_register *)data;
653 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
654 break;
655 }
656 case VM_GET_REGISTER_SET: {
657 struct vm_register_set *vmregset;
658 uint64_t *regvals;
659 int *regnums;
660
661 vmregset = (struct vm_register_set *)data;
662 if (vmregset->count > VM_REG_LAST) {
663 error = EINVAL;
664 break;
665 }
666 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
667 M_VMMDEV, M_WAITOK);
668 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
669 M_VMMDEV, M_WAITOK);
670 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
671 vmregset->count);
672 if (error == 0)
673 error = vm_get_register_set(vcpu,
674 vmregset->count, regnums, regvals);
675 if (error == 0)
676 error = copyout(regvals, vmregset->regvals,
677 sizeof(regvals[0]) * vmregset->count);
678 free(regvals, M_VMMDEV);
679 free(regnums, M_VMMDEV);
680 break;
681 }
682 case VM_SET_REGISTER_SET: {
683 struct vm_register_set *vmregset;
684 uint64_t *regvals;
685 int *regnums;
686
687 vmregset = (struct vm_register_set *)data;
688 if (vmregset->count > VM_REG_LAST) {
689 error = EINVAL;
690 break;
691 }
692 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
693 M_VMMDEV, M_WAITOK);
694 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
695 M_VMMDEV, M_WAITOK);
696 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
697 vmregset->count);
698 if (error == 0)
699 error = copyin(vmregset->regvals, regvals,
700 sizeof(regvals[0]) * vmregset->count);
701 if (error == 0)
702 error = vm_set_register_set(vcpu,
703 vmregset->count, regnums, regvals);
704 free(regvals, M_VMMDEV);
705 free(regnums, M_VMMDEV);
706 break;
707 }
708 case VM_GET_CAPABILITY: {
709 struct vm_capability *vmcap;
710
711 vmcap = (struct vm_capability *)data;
712 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
713 break;
714 }
715 case VM_SET_CAPABILITY: {
716 struct vm_capability *vmcap;
717
718 vmcap = (struct vm_capability *)data;
719 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
720 break;
721 }
722 case VM_ACTIVATE_CPU:
723 error = vm_activate_cpu(vcpu);
724 break;
725 case VM_GET_CPUS: {
726 struct vm_cpuset *vm_cpuset;
727 cpuset_t *cpuset;
728 int size;
729
730 error = 0;
731 vm_cpuset = (struct vm_cpuset *)data;
732 size = vm_cpuset->cpusetsize;
733 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
734 error = ERANGE;
735 break;
736 }
737 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
738 M_WAITOK | M_ZERO);
739 if (vm_cpuset->which == VM_ACTIVE_CPUS)
740 *cpuset = vm_active_cpus(sc->vm);
741 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
742 *cpuset = vm_suspended_cpus(sc->vm);
743 else if (vm_cpuset->which == VM_DEBUG_CPUS)
744 *cpuset = vm_debug_cpus(sc->vm);
745 else
746 error = EINVAL;
747 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
748 error = ERANGE;
749 if (error == 0)
750 error = copyout(cpuset, vm_cpuset->cpus, size);
751 free(cpuset, M_TEMP);
752 break;
753 }
754 case VM_SUSPEND_CPU:
755 error = vm_suspend_cpu(sc->vm, vcpu);
756 break;
757 case VM_RESUME_CPU:
758 error = vm_resume_cpu(sc->vm, vcpu);
759 break;
760 case VM_SET_TOPOLOGY: {
761 struct vm_cpu_topology *topology;
762
763 topology = (struct vm_cpu_topology *)data;
764 error = vm_set_topology(sc->vm, topology->sockets,
765 topology->cores, topology->threads, topology->maxcpus);
766 break;
767 }
768 case VM_GET_TOPOLOGY: {
769 struct vm_cpu_topology *topology;
770
771 topology = (struct vm_cpu_topology *)data;
772 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
773 &topology->threads, &topology->maxcpus);
774 error = 0;
775 break;
776 }
777 default:
778 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
779 td);
780 break;
781 }
782
783 if ((ioctl->flags &
784 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
785 vm_unlock_memsegs(sc->vm);
786 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
787 vcpu_unlock_all(sc);
788 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
789 vcpu_unlock_one(vcpu);
790
791 /*
792 * Make sure that no handler returns a kernel-internal
793 * error value to userspace.
794 */
795 KASSERT(error == ERESTART || error >= 0,
796 ("vmmdev_ioctl: invalid error return %d", error));
797 return (error);
798
799 lockfail:
800 if ((ioctl->flags &
801 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
802 vm_unlock_memsegs(sc->vm);
803 return (error);
804 }
805
806 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)807 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
808 struct vm_object **objp, int nprot)
809 {
810 struct vmmdev_softc *sc;
811 vm_paddr_t gpa;
812 size_t len;
813 vm_ooffset_t segoff, first, last;
814 int error, found, segid;
815 bool sysmem;
816
817 first = *offset;
818 last = first + mapsize;
819 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
820 return (EINVAL);
821
822 sc = vmmdev_lookup2(cdev);
823 if (sc == NULL) {
824 /* virtual machine is in the process of being created */
825 return (EINVAL);
826 }
827
828 /*
829 * Get a read lock on the guest memory map.
830 */
831 vm_slock_memsegs(sc->vm);
832
833 gpa = 0;
834 found = 0;
835 while (!found) {
836 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
837 NULL, NULL);
838 if (error)
839 break;
840
841 if (first >= gpa && last <= gpa + len)
842 found = 1;
843 else
844 gpa += len;
845 }
846
847 if (found) {
848 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
849 KASSERT(error == 0 && *objp != NULL,
850 ("%s: invalid memory segment %d", __func__, segid));
851 if (sysmem) {
852 vm_object_reference(*objp);
853 *offset = segoff + (first - gpa);
854 } else {
855 error = EINVAL;
856 }
857 }
858 vm_unlock_memsegs(sc->vm);
859 return (error);
860 }
861
862 static void
vmmdev_destroy(struct vmmdev_softc * sc)863 vmmdev_destroy(struct vmmdev_softc *sc)
864 {
865 struct devmem_softc *dsc;
866 int error __diagused;
867
868 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
869
870 /*
871 * Destroy all cdevs:
872 *
873 * - any new operations on the 'cdev' will return an error (ENXIO).
874 *
875 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
876 */
877 SLIST_FOREACH(dsc, &sc->devmem, link) {
878 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
879 devmem_destroy(dsc);
880 }
881
882 vm_disable_vcpu_creation(sc->vm);
883 error = vcpu_lock_all(sc);
884 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
885 vm_unlock_vcpus(sc->vm);
886
887 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
888 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
889 SLIST_REMOVE_HEAD(&sc->devmem, link);
890 free(dsc->name, M_VMMDEV);
891 free(dsc, M_VMMDEV);
892 }
893
894 if (sc->vm != NULL)
895 vm_destroy(sc->vm);
896
897 if (sc->ucred != NULL)
898 crfree(sc->ucred);
899
900 sx_xlock(&vmmdev_mtx);
901 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
902 sx_xunlock(&vmmdev_mtx);
903 free(sc, M_VMMDEV);
904 }
905
906 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)907 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
908 {
909 struct cdev *cdev;
910 struct vmmdev_softc *sc;
911
912 sx_xlock(&vmmdev_mtx);
913 sc = vmmdev_lookup(name, cred);
914 if (sc == NULL || sc->cdev == NULL) {
915 sx_xunlock(&vmmdev_mtx);
916 return (EINVAL);
917 }
918
919 /*
920 * Setting 'sc->cdev' to NULL is used to indicate that the VM
921 * is scheduled for destruction.
922 */
923 cdev = sc->cdev;
924 sc->cdev = NULL;
925 sx_xunlock(&vmmdev_mtx);
926
927 vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
928 destroy_dev(cdev);
929 vmmdev_destroy(sc);
930
931 return (0);
932 }
933
934 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)935 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
936 {
937 char *buf;
938 int error, buflen;
939
940 error = vmm_priv_check(req->td->td_ucred);
941 if (error)
942 return (error);
943
944 buflen = VM_MAX_NAMELEN + 1;
945 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
946 error = sysctl_handle_string(oidp, buf, buflen, req);
947 if (error == 0 && req->newptr != NULL)
948 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
949 free(buf, M_VMMDEV);
950 return (error);
951 }
952 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
953 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
954 NULL, 0, sysctl_vmm_destroy, "A",
955 "Destroy a vmm(4) instance (legacy interface)");
956
957 static struct cdevsw vmmdevsw = {
958 .d_name = "vmmdev",
959 .d_version = D_VERSION,
960 .d_open = vmmdev_open,
961 .d_ioctl = vmmdev_ioctl,
962 .d_mmap_single = vmmdev_mmap_single,
963 .d_read = vmmdev_rw,
964 .d_write = vmmdev_rw,
965 };
966
967 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)968 vmmdev_alloc(struct vm *vm, struct ucred *cred)
969 {
970 struct vmmdev_softc *sc;
971
972 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
973 SLIST_INIT(&sc->devmem);
974 sc->vm = vm;
975 sc->ucred = crhold(cred);
976 return (sc);
977 }
978
979 static int
vmmdev_create(const char * name,struct ucred * cred)980 vmmdev_create(const char *name, struct ucred *cred)
981 {
982 struct make_dev_args mda;
983 struct cdev *cdev;
984 struct vmmdev_softc *sc;
985 struct vm *vm;
986 int error;
987
988 sx_xlock(&vmmdev_mtx);
989 sc = vmmdev_lookup(name, cred);
990 if (sc != NULL) {
991 sx_xunlock(&vmmdev_mtx);
992 return (EEXIST);
993 }
994
995 error = vm_create(name, &vm);
996 if (error != 0) {
997 sx_xunlock(&vmmdev_mtx);
998 return (error);
999 }
1000 sc = vmmdev_alloc(vm, cred);
1001 SLIST_INSERT_HEAD(&head, sc, link);
1002
1003 make_dev_args_init(&mda);
1004 mda.mda_devsw = &vmmdevsw;
1005 mda.mda_cr = sc->ucred;
1006 mda.mda_uid = UID_ROOT;
1007 mda.mda_gid = GID_WHEEL;
1008 mda.mda_mode = 0600;
1009 mda.mda_si_drv1 = sc;
1010 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1011 error = make_dev_s(&mda, &cdev, "vmm/%s", name);
1012 if (error != 0) {
1013 sx_xunlock(&vmmdev_mtx);
1014 vmmdev_destroy(sc);
1015 return (error);
1016 }
1017 sc->cdev = cdev;
1018 sx_xunlock(&vmmdev_mtx);
1019 return (0);
1020 }
1021
1022 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)1023 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1024 {
1025 char *buf;
1026 int error, buflen;
1027
1028 error = vmm_priv_check(req->td->td_ucred);
1029 if (error != 0)
1030 return (error);
1031
1032 buflen = VM_MAX_NAMELEN + 1;
1033 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1034 error = sysctl_handle_string(oidp, buf, buflen, req);
1035 if (error == 0 && req->newptr != NULL)
1036 error = vmmdev_create(buf, req->td->td_ucred);
1037 free(buf, M_VMMDEV);
1038 return (error);
1039 }
1040 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1041 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1042 NULL, 0, sysctl_vmm_create, "A",
1043 "Create a vmm(4) instance (legacy interface)");
1044
1045 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)1046 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
1047 {
1048 int error;
1049
1050 error = vmm_priv_check(td->td_ucred);
1051 if (error != 0)
1052 return (error);
1053
1054 if ((flags & FWRITE) == 0)
1055 return (EPERM);
1056
1057 return (0);
1058 }
1059
1060 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)1061 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
1062 struct thread *td)
1063 {
1064 int error;
1065
1066 switch (cmd) {
1067 case VMMCTL_VM_CREATE: {
1068 struct vmmctl_vm_create *vmc;
1069
1070 vmc = (struct vmmctl_vm_create *)data;
1071 vmc->name[VM_MAX_NAMELEN] = '\0';
1072 for (size_t i = 0; i < nitems(vmc->reserved); i++) {
1073 if (vmc->reserved[i] != 0) {
1074 error = EINVAL;
1075 return (error);
1076 }
1077 }
1078
1079 error = vmmdev_create(vmc->name, td->td_ucred);
1080 break;
1081 }
1082 case VMMCTL_VM_DESTROY: {
1083 struct vmmctl_vm_destroy *vmd;
1084
1085 vmd = (struct vmmctl_vm_destroy *)data;
1086 vmd->name[VM_MAX_NAMELEN] = '\0';
1087 for (size_t i = 0; i < nitems(vmd->reserved); i++) {
1088 if (vmd->reserved[i] != 0) {
1089 error = EINVAL;
1090 return (error);
1091 }
1092 }
1093
1094 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
1095 break;
1096 }
1097 default:
1098 error = ENOTTY;
1099 break;
1100 }
1101
1102 return (error);
1103 }
1104
1105 static struct cdev *vmmctl_cdev;
1106 static struct cdevsw vmmctlsw = {
1107 .d_name = "vmmctl",
1108 .d_version = D_VERSION,
1109 .d_open = vmmctl_open,
1110 .d_ioctl = vmmctl_ioctl,
1111 };
1112
1113 int
vmmdev_init(void)1114 vmmdev_init(void)
1115 {
1116 int error;
1117
1118 sx_xlock(&vmmdev_mtx);
1119 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
1120 UID_ROOT, GID_WHEEL, 0600, "vmmctl");
1121 if (error == 0)
1122 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1123 "Allow use of vmm in a jail.");
1124 sx_xunlock(&vmmdev_mtx);
1125
1126 return (error);
1127 }
1128
1129 int
vmmdev_cleanup(void)1130 vmmdev_cleanup(void)
1131 {
1132 sx_xlock(&vmmdev_mtx);
1133 if (!SLIST_EMPTY(&head)) {
1134 sx_xunlock(&vmmdev_mtx);
1135 return (EBUSY);
1136 }
1137 if (vmmctl_cdev != NULL) {
1138 destroy_dev(vmmctl_cdev);
1139 vmmctl_cdev = NULL;
1140 }
1141 sx_xunlock(&vmmdev_mtx);
1142
1143 return (0);
1144 }
1145
1146 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1147 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1148 struct vm_object **objp, int nprot)
1149 {
1150 struct devmem_softc *dsc;
1151 vm_ooffset_t first, last;
1152 size_t seglen;
1153 int error;
1154 bool sysmem;
1155
1156 dsc = cdev->si_drv1;
1157 if (dsc == NULL) {
1158 /* 'cdev' has been created but is not ready for use */
1159 return (ENXIO);
1160 }
1161
1162 first = *offset;
1163 last = *offset + len;
1164 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1165 return (EINVAL);
1166
1167 vm_slock_memsegs(dsc->sc->vm);
1168
1169 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1170 KASSERT(error == 0 && !sysmem && *objp != NULL,
1171 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1172
1173 if (seglen >= last)
1174 vm_object_reference(*objp);
1175 else
1176 error = EINVAL;
1177
1178 vm_unlock_memsegs(dsc->sc->vm);
1179 return (error);
1180 }
1181
1182 static struct cdevsw devmemsw = {
1183 .d_name = "devmem",
1184 .d_version = D_VERSION,
1185 .d_mmap_single = devmem_mmap_single,
1186 };
1187
1188 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1189 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1190 {
1191 struct make_dev_args mda;
1192 struct devmem_softc *dsc;
1193 int error;
1194
1195 sx_xlock(&vmmdev_mtx);
1196
1197 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1198 dsc->segid = segid;
1199 dsc->name = devname;
1200 dsc->sc = sc;
1201 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1202
1203 make_dev_args_init(&mda);
1204 mda.mda_devsw = &devmemsw;
1205 mda.mda_cr = sc->ucred;
1206 mda.mda_uid = UID_ROOT;
1207 mda.mda_gid = GID_WHEEL;
1208 mda.mda_mode = 0600;
1209 mda.mda_si_drv1 = dsc;
1210 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1211 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1212 devname);
1213 if (error != 0) {
1214 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1215 free(dsc->name, M_VMMDEV);
1216 free(dsc, M_VMMDEV);
1217 }
1218
1219 sx_xunlock(&vmmdev_mtx);
1220
1221 return (error);
1222 }
1223
1224 static void
devmem_destroy(void * arg)1225 devmem_destroy(void *arg)
1226 {
1227 struct devmem_softc *dsc = arg;
1228
1229 destroy_dev(dsc->cdev);
1230 dsc->cdev = NULL;
1231 dsc->sc = NULL;
1232 }
1233