1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6 * All rights reserved.
7 */
8
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #include <sys/fcntl.h>
12 #include <sys/ioccom.h>
13 #include <sys/jail.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/mman.h>
17 #include <sys/proc.h>
18 #include <sys/queue.h>
19 #include <sys/sx.h>
20 #include <sys/sysctl.h>
21 #include <sys/ucred.h>
22 #include <sys/uio.h>
23
24 #include <machine/vmm.h>
25
26 #include <vm/vm.h>
27 #include <vm/vm_object.h>
28
29 #include <dev/vmm/vmm_dev.h>
30 #include <dev/vmm/vmm_mem.h>
31 #include <dev/vmm/vmm_stat.h>
32
33 #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
34 struct vm_memseg_12 {
35 int segid;
36 size_t len;
37 char name[64];
38 };
39 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
40
41 #define VM_ALLOC_MEMSEG_12 \
42 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
43 #define VM_GET_MEMSEG_12 \
44 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
45 #endif
46
47 struct devmem_softc {
48 int segid;
49 char *name;
50 struct cdev *cdev;
51 struct vmmdev_softc *sc;
52 SLIST_ENTRY(devmem_softc) link;
53 };
54
55 struct vmmdev_softc {
56 struct vm *vm; /* vm instance cookie */
57 struct cdev *cdev;
58 struct ucred *ucred;
59 SLIST_ENTRY(vmmdev_softc) link;
60 SLIST_HEAD(, devmem_softc) devmem;
61 int flags;
62 };
63
64 static SLIST_HEAD(, vmmdev_softc) head;
65
66 static unsigned pr_allow_flag;
67 static struct sx vmmdev_mtx;
68 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
69
70 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
71
72 SYSCTL_DECL(_hw_vmm);
73
74 static void devmem_destroy(void *arg);
75 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
76
77 static int
vmm_priv_check(struct ucred * ucred)78 vmm_priv_check(struct ucred *ucred)
79 {
80 if (jailed(ucred) &&
81 !(ucred->cr_prison->pr_allow & pr_allow_flag))
82 return (EPERM);
83
84 return (0);
85 }
86
87 static int
vcpu_lock_one(struct vcpu * vcpu)88 vcpu_lock_one(struct vcpu *vcpu)
89 {
90 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
91 }
92
93 static void
vcpu_unlock_one(struct vcpu * vcpu)94 vcpu_unlock_one(struct vcpu *vcpu)
95 {
96 enum vcpu_state state;
97
98 state = vcpu_get_state(vcpu, NULL);
99 if (state != VCPU_FROZEN) {
100 panic("vcpu %s(%d) has invalid state %d",
101 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
102 }
103
104 vcpu_set_state(vcpu, VCPU_IDLE, false);
105 }
106
107 static int
vcpu_lock_all(struct vmmdev_softc * sc)108 vcpu_lock_all(struct vmmdev_softc *sc)
109 {
110 struct vcpu *vcpu;
111 int error;
112 uint16_t i, j, maxcpus;
113
114 error = 0;
115 vm_slock_vcpus(sc->vm);
116 maxcpus = vm_get_maxcpus(sc->vm);
117 for (i = 0; i < maxcpus; i++) {
118 vcpu = vm_vcpu(sc->vm, i);
119 if (vcpu == NULL)
120 continue;
121 error = vcpu_lock_one(vcpu);
122 if (error)
123 break;
124 }
125
126 if (error) {
127 for (j = 0; j < i; j++) {
128 vcpu = vm_vcpu(sc->vm, j);
129 if (vcpu == NULL)
130 continue;
131 vcpu_unlock_one(vcpu);
132 }
133 vm_unlock_vcpus(sc->vm);
134 }
135
136 return (error);
137 }
138
139 static void
vcpu_unlock_all(struct vmmdev_softc * sc)140 vcpu_unlock_all(struct vmmdev_softc *sc)
141 {
142 struct vcpu *vcpu;
143 uint16_t i, maxcpus;
144
145 maxcpus = vm_get_maxcpus(sc->vm);
146 for (i = 0; i < maxcpus; i++) {
147 vcpu = vm_vcpu(sc->vm, i);
148 if (vcpu == NULL)
149 continue;
150 vcpu_unlock_one(vcpu);
151 }
152 vm_unlock_vcpus(sc->vm);
153 }
154
155 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)156 vmmdev_lookup(const char *name, struct ucred *cred)
157 {
158 struct vmmdev_softc *sc;
159
160 sx_assert(&vmmdev_mtx, SA_XLOCKED);
161
162 SLIST_FOREACH(sc, &head, link) {
163 if (strcmp(name, vm_name(sc->vm)) == 0)
164 break;
165 }
166
167 if (sc == NULL)
168 return (NULL);
169
170 if (cr_cansee(cred, sc->ucred))
171 return (NULL);
172
173 return (sc);
174 }
175
176 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)177 vmmdev_lookup2(struct cdev *cdev)
178 {
179 return (cdev->si_drv1);
180 }
181
182 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)183 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
184 {
185 int error, off, c, prot;
186 vm_paddr_t gpa, maxaddr;
187 void *hpa, *cookie;
188 struct vmmdev_softc *sc;
189
190 sc = vmmdev_lookup2(cdev);
191 if (sc == NULL)
192 return (ENXIO);
193
194 /*
195 * Get a read lock on the guest memory map.
196 */
197 vm_slock_memsegs(sc->vm);
198
199 error = 0;
200 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
201 maxaddr = vmm_sysmem_maxaddr(sc->vm);
202 while (uio->uio_resid > 0 && error == 0) {
203 gpa = uio->uio_offset;
204 off = gpa & PAGE_MASK;
205 c = min(uio->uio_resid, PAGE_SIZE - off);
206
207 /*
208 * The VM has a hole in its physical memory map. If we want to
209 * use 'dd' to inspect memory beyond the hole we need to
210 * provide bogus data for memory that lies in the hole.
211 *
212 * Since this device does not support lseek(2), dd(1) will
213 * read(2) blocks of data to simulate the lseek(2).
214 */
215 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
216 if (hpa == NULL) {
217 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
218 error = uiomove(__DECONST(void *, zero_region),
219 c, uio);
220 else
221 error = EFAULT;
222 } else {
223 error = uiomove(hpa, c, uio);
224 vm_gpa_release(cookie);
225 }
226 }
227 vm_unlock_memsegs(sc->vm);
228 return (error);
229 }
230
231 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
232
233 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)234 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
235 {
236 struct devmem_softc *dsc;
237 int error;
238 bool sysmem;
239
240 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
241 if (error || mseg->len == 0)
242 return (error);
243
244 if (!sysmem) {
245 SLIST_FOREACH(dsc, &sc->devmem, link) {
246 if (dsc->segid == mseg->segid)
247 break;
248 }
249 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
250 __func__, mseg->segid));
251 error = copystr(dsc->name, mseg->name, len, NULL);
252 } else {
253 bzero(mseg->name, len);
254 }
255
256 return (error);
257 }
258
259 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)260 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
261 {
262 char *name;
263 int error;
264 bool sysmem;
265
266 error = 0;
267 name = NULL;
268 sysmem = true;
269
270 /*
271 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
272 * by stripped off when devfs processes the full string.
273 */
274 if (VM_MEMSEG_NAME(mseg)) {
275 sysmem = false;
276 name = malloc(len, M_VMMDEV, M_WAITOK);
277 error = copystr(mseg->name, name, len, NULL);
278 if (error)
279 goto done;
280 }
281
282 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
283 if (error)
284 goto done;
285
286 if (VM_MEMSEG_NAME(mseg)) {
287 error = devmem_create_cdev(sc, mseg->segid, name);
288 if (error)
289 vm_free_memseg(sc->vm, mseg->segid);
290 else
291 name = NULL; /* freed when 'cdev' is destroyed */
292 }
293 done:
294 free(name, M_VMMDEV);
295 return (error);
296 }
297
298 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)299 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
300 uint64_t *regval)
301 {
302 int error, i;
303
304 error = 0;
305 for (i = 0; i < count; i++) {
306 error = vm_get_register(vcpu, regnum[i], ®val[i]);
307 if (error)
308 break;
309 }
310 return (error);
311 }
312
313 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)314 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
315 uint64_t *regval)
316 {
317 int error, i;
318
319 error = 0;
320 for (i = 0; i < count; i++) {
321 error = vm_set_register(vcpu, regnum[i], regval[i]);
322 if (error)
323 break;
324 }
325 return (error);
326 }
327
328 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)329 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
330 {
331 int error;
332
333 /*
334 * A jail without vmm access shouldn't be able to access vmm device
335 * files at all, but check here just to be thorough.
336 */
337 error = vmm_priv_check(td->td_ucred);
338 if (error != 0)
339 return (error);
340
341 return (0);
342 }
343
344 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
345 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
346 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
347 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
348 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
349 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
350 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
351 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
352 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
353 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
354 VMMDEV_IOCTL(VM_STAT_DESC, 0),
355
356 #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
357 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
358 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
359 #endif
360 VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
361 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
362 VMMDEV_IOCTL(VM_MMAP_MEMSEG,
363 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
364 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
365 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
366 VMMDEV_IOCTL(VM_REINIT,
367 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
368
369 #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
370 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
371 #endif
372 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
373 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
374
375 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
376 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
377
378 VMMDEV_IOCTL(VM_SUSPEND, 0),
379 VMMDEV_IOCTL(VM_GET_CPUS, 0),
380 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
381 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
382 };
383
384 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)385 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
386 struct thread *td)
387 {
388 struct vmmdev_softc *sc;
389 struct vcpu *vcpu;
390 const struct vmmdev_ioctl *ioctl;
391 int error, vcpuid;
392
393 sc = vmmdev_lookup2(cdev);
394 if (sc == NULL)
395 return (ENXIO);
396
397 ioctl = NULL;
398 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
399 if (vmmdev_ioctls[i].cmd == cmd) {
400 ioctl = &vmmdev_ioctls[i];
401 break;
402 }
403 }
404 if (ioctl == NULL) {
405 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
406 if (vmmdev_machdep_ioctls[i].cmd == cmd) {
407 ioctl = &vmmdev_machdep_ioctls[i];
408 break;
409 }
410 }
411 }
412 if (ioctl == NULL)
413 return (ENOTTY);
414
415 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
416 vm_xlock_memsegs(sc->vm);
417 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
418 vm_slock_memsegs(sc->vm);
419
420 vcpu = NULL;
421 vcpuid = -1;
422 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
423 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
424 vcpuid = *(int *)data;
425 if (vcpuid == -1) {
426 if ((ioctl->flags &
427 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
428 error = EINVAL;
429 goto lockfail;
430 }
431 } else {
432 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
433 if (vcpu == NULL) {
434 error = EINVAL;
435 goto lockfail;
436 }
437 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
438 error = vcpu_lock_one(vcpu);
439 if (error)
440 goto lockfail;
441 }
442 }
443 }
444 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
445 error = vcpu_lock_all(sc);
446 if (error)
447 goto lockfail;
448 }
449
450 switch (cmd) {
451 case VM_SUSPEND: {
452 struct vm_suspend *vmsuspend;
453
454 vmsuspend = (struct vm_suspend *)data;
455 error = vm_suspend(sc->vm, vmsuspend->how);
456 break;
457 }
458 case VM_REINIT:
459 error = vm_reinit(sc->vm);
460 break;
461 case VM_STAT_DESC: {
462 struct vm_stat_desc *statdesc;
463
464 statdesc = (struct vm_stat_desc *)data;
465 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
466 sizeof(statdesc->desc));
467 break;
468 }
469 case VM_STATS: {
470 struct vm_stats *vmstats;
471
472 vmstats = (struct vm_stats *)data;
473 getmicrotime(&vmstats->tv);
474 error = vmm_stat_copy(vcpu, vmstats->index,
475 nitems(vmstats->statbuf), &vmstats->num_entries,
476 vmstats->statbuf);
477 break;
478 }
479 case VM_MMAP_GETNEXT: {
480 struct vm_memmap *mm;
481
482 mm = (struct vm_memmap *)data;
483 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
484 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
485 break;
486 }
487 case VM_MMAP_MEMSEG: {
488 struct vm_memmap *mm;
489
490 mm = (struct vm_memmap *)data;
491 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
492 mm->len, mm->prot, mm->flags);
493 break;
494 }
495 case VM_MUNMAP_MEMSEG: {
496 struct vm_munmap *mu;
497
498 mu = (struct vm_munmap *)data;
499 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
500 break;
501 }
502 #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
503 case VM_ALLOC_MEMSEG_12:
504 error = alloc_memseg(sc, (struct vm_memseg *)data,
505 sizeof(((struct vm_memseg_12 *)0)->name));
506 break;
507 case VM_GET_MEMSEG_12:
508 error = get_memseg(sc, (struct vm_memseg *)data,
509 sizeof(((struct vm_memseg_12 *)0)->name));
510 break;
511 #endif
512 case VM_ALLOC_MEMSEG:
513 error = alloc_memseg(sc, (struct vm_memseg *)data,
514 sizeof(((struct vm_memseg *)0)->name));
515 break;
516 case VM_GET_MEMSEG:
517 error = get_memseg(sc, (struct vm_memseg *)data,
518 sizeof(((struct vm_memseg *)0)->name));
519 break;
520 case VM_GET_REGISTER: {
521 struct vm_register *vmreg;
522
523 vmreg = (struct vm_register *)data;
524 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
525 break;
526 }
527 case VM_SET_REGISTER: {
528 struct vm_register *vmreg;
529
530 vmreg = (struct vm_register *)data;
531 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
532 break;
533 }
534 case VM_GET_REGISTER_SET: {
535 struct vm_register_set *vmregset;
536 uint64_t *regvals;
537 int *regnums;
538
539 vmregset = (struct vm_register_set *)data;
540 if (vmregset->count > VM_REG_LAST) {
541 error = EINVAL;
542 break;
543 }
544 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
545 M_WAITOK);
546 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
547 M_WAITOK);
548 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
549 vmregset->count);
550 if (error == 0)
551 error = vm_get_register_set(vcpu,
552 vmregset->count, regnums, regvals);
553 if (error == 0)
554 error = copyout(regvals, vmregset->regvals,
555 sizeof(regvals[0]) * vmregset->count);
556 free(regvals, M_VMMDEV);
557 free(regnums, M_VMMDEV);
558 break;
559 }
560 case VM_SET_REGISTER_SET: {
561 struct vm_register_set *vmregset;
562 uint64_t *regvals;
563 int *regnums;
564
565 vmregset = (struct vm_register_set *)data;
566 if (vmregset->count > VM_REG_LAST) {
567 error = EINVAL;
568 break;
569 }
570 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
571 M_WAITOK);
572 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
573 M_WAITOK);
574 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
575 vmregset->count);
576 if (error == 0)
577 error = copyin(vmregset->regvals, regvals,
578 sizeof(regvals[0]) * vmregset->count);
579 if (error == 0)
580 error = vm_set_register_set(vcpu,
581 vmregset->count, regnums, regvals);
582 free(regvals, M_VMMDEV);
583 free(regnums, M_VMMDEV);
584 break;
585 }
586 case VM_GET_CAPABILITY: {
587 struct vm_capability *vmcap;
588
589 vmcap = (struct vm_capability *)data;
590 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
591 break;
592 }
593 case VM_SET_CAPABILITY: {
594 struct vm_capability *vmcap;
595
596 vmcap = (struct vm_capability *)data;
597 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
598 break;
599 }
600 case VM_ACTIVATE_CPU:
601 error = vm_activate_cpu(vcpu);
602 break;
603 case VM_GET_CPUS: {
604 struct vm_cpuset *vm_cpuset;
605 cpuset_t *cpuset;
606 int size;
607
608 error = 0;
609 vm_cpuset = (struct vm_cpuset *)data;
610 size = vm_cpuset->cpusetsize;
611 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
612 error = ERANGE;
613 break;
614 }
615 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
616 M_WAITOK | M_ZERO);
617 if (vm_cpuset->which == VM_ACTIVE_CPUS)
618 *cpuset = vm_active_cpus(sc->vm);
619 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
620 *cpuset = vm_suspended_cpus(sc->vm);
621 else if (vm_cpuset->which == VM_DEBUG_CPUS)
622 *cpuset = vm_debug_cpus(sc->vm);
623 else
624 error = EINVAL;
625 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
626 error = ERANGE;
627 if (error == 0)
628 error = copyout(cpuset, vm_cpuset->cpus, size);
629 free(cpuset, M_TEMP);
630 break;
631 }
632 case VM_SUSPEND_CPU:
633 error = vm_suspend_cpu(sc->vm, vcpu);
634 break;
635 case VM_RESUME_CPU:
636 error = vm_resume_cpu(sc->vm, vcpu);
637 break;
638 case VM_SET_TOPOLOGY: {
639 struct vm_cpu_topology *topology;
640
641 topology = (struct vm_cpu_topology *)data;
642 error = vm_set_topology(sc->vm, topology->sockets,
643 topology->cores, topology->threads, topology->maxcpus);
644 break;
645 }
646 case VM_GET_TOPOLOGY: {
647 struct vm_cpu_topology *topology;
648
649 topology = (struct vm_cpu_topology *)data;
650 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
651 &topology->threads, &topology->maxcpus);
652 error = 0;
653 break;
654 }
655 default:
656 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
657 td);
658 break;
659 }
660
661 if ((ioctl->flags &
662 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
663 vm_unlock_memsegs(sc->vm);
664 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
665 vcpu_unlock_all(sc);
666 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
667 vcpu_unlock_one(vcpu);
668
669 /*
670 * Make sure that no handler returns a kernel-internal
671 * error value to userspace.
672 */
673 KASSERT(error == ERESTART || error >= 0,
674 ("vmmdev_ioctl: invalid error return %d", error));
675 return (error);
676
677 lockfail:
678 if ((ioctl->flags &
679 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
680 vm_unlock_memsegs(sc->vm);
681 return (error);
682 }
683
684 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)685 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
686 struct vm_object **objp, int nprot)
687 {
688 struct vmmdev_softc *sc;
689 vm_paddr_t gpa;
690 size_t len;
691 vm_ooffset_t segoff, first, last;
692 int error, found, segid;
693 bool sysmem;
694
695 first = *offset;
696 last = first + mapsize;
697 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
698 return (EINVAL);
699
700 sc = vmmdev_lookup2(cdev);
701 if (sc == NULL) {
702 /* virtual machine is in the process of being created */
703 return (EINVAL);
704 }
705
706 /*
707 * Get a read lock on the guest memory map.
708 */
709 vm_slock_memsegs(sc->vm);
710
711 gpa = 0;
712 found = 0;
713 while (!found) {
714 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
715 NULL, NULL);
716 if (error)
717 break;
718
719 if (first >= gpa && last <= gpa + len)
720 found = 1;
721 else
722 gpa += len;
723 }
724
725 if (found) {
726 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
727 KASSERT(error == 0 && *objp != NULL,
728 ("%s: invalid memory segment %d", __func__, segid));
729 if (sysmem) {
730 vm_object_reference(*objp);
731 *offset = segoff + (first - gpa);
732 } else {
733 error = EINVAL;
734 }
735 }
736 vm_unlock_memsegs(sc->vm);
737 return (error);
738 }
739
740 static void
vmmdev_destroy(struct vmmdev_softc * sc)741 vmmdev_destroy(struct vmmdev_softc *sc)
742 {
743 struct devmem_softc *dsc;
744 int error __diagused;
745
746 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
747
748 /*
749 * Destroy all cdevs:
750 *
751 * - any new operations on the 'cdev' will return an error (ENXIO).
752 *
753 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
754 */
755 SLIST_FOREACH(dsc, &sc->devmem, link) {
756 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
757 devmem_destroy(dsc);
758 }
759
760 vm_disable_vcpu_creation(sc->vm);
761 error = vcpu_lock_all(sc);
762 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
763 vm_unlock_vcpus(sc->vm);
764
765 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
766 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
767 SLIST_REMOVE_HEAD(&sc->devmem, link);
768 free(dsc->name, M_VMMDEV);
769 free(dsc, M_VMMDEV);
770 }
771
772 if (sc->vm != NULL)
773 vm_destroy(sc->vm);
774
775 if (sc->ucred != NULL)
776 crfree(sc->ucred);
777
778 sx_xlock(&vmmdev_mtx);
779 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
780 sx_xunlock(&vmmdev_mtx);
781 free(sc, M_VMMDEV);
782 }
783
784 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)785 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
786 {
787 struct cdev *cdev;
788 struct vmmdev_softc *sc;
789
790 sx_xlock(&vmmdev_mtx);
791 sc = vmmdev_lookup(name, cred);
792 if (sc == NULL || sc->cdev == NULL) {
793 sx_xunlock(&vmmdev_mtx);
794 return (EINVAL);
795 }
796
797 /*
798 * Setting 'sc->cdev' to NULL is used to indicate that the VM
799 * is scheduled for destruction.
800 */
801 cdev = sc->cdev;
802 sc->cdev = NULL;
803 sx_xunlock(&vmmdev_mtx);
804
805 destroy_dev(cdev);
806 vmmdev_destroy(sc);
807
808 return (0);
809 }
810
811 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)812 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
813 {
814 char *buf;
815 int error, buflen;
816
817 error = vmm_priv_check(req->td->td_ucred);
818 if (error)
819 return (error);
820
821 buflen = VM_MAX_NAMELEN + 1;
822 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
823 strlcpy(buf, "beavis", buflen);
824 error = sysctl_handle_string(oidp, buf, buflen, req);
825 if (error == 0 && req->newptr != NULL)
826 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
827 free(buf, M_VMMDEV);
828 return (error);
829 }
830 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
831 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
832 NULL, 0, sysctl_vmm_destroy, "A",
833 NULL);
834
835 static struct cdevsw vmmdevsw = {
836 .d_name = "vmmdev",
837 .d_version = D_VERSION,
838 .d_open = vmmdev_open,
839 .d_ioctl = vmmdev_ioctl,
840 .d_mmap_single = vmmdev_mmap_single,
841 .d_read = vmmdev_rw,
842 .d_write = vmmdev_rw,
843 };
844
845 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)846 vmmdev_alloc(struct vm *vm, struct ucred *cred)
847 {
848 struct vmmdev_softc *sc;
849
850 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
851 SLIST_INIT(&sc->devmem);
852 sc->vm = vm;
853 sc->ucred = crhold(cred);
854 return (sc);
855 }
856
857 static int
vmmdev_create(const char * name,struct ucred * cred)858 vmmdev_create(const char *name, struct ucred *cred)
859 {
860 struct make_dev_args mda;
861 struct cdev *cdev;
862 struct vmmdev_softc *sc;
863 struct vm *vm;
864 int error;
865
866 sx_xlock(&vmmdev_mtx);
867 sc = vmmdev_lookup(name, cred);
868 if (sc != NULL) {
869 sx_xunlock(&vmmdev_mtx);
870 return (EEXIST);
871 }
872
873 error = vm_create(name, &vm);
874 if (error != 0) {
875 sx_xunlock(&vmmdev_mtx);
876 return (error);
877 }
878 sc = vmmdev_alloc(vm, cred);
879 SLIST_INSERT_HEAD(&head, sc, link);
880
881 make_dev_args_init(&mda);
882 mda.mda_devsw = &vmmdevsw;
883 mda.mda_cr = sc->ucred;
884 mda.mda_uid = UID_ROOT;
885 mda.mda_gid = GID_WHEEL;
886 mda.mda_mode = 0600;
887 mda.mda_si_drv1 = sc;
888 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
889 error = make_dev_s(&mda, &cdev, "vmm/%s", name);
890 if (error != 0) {
891 sx_xunlock(&vmmdev_mtx);
892 vmmdev_destroy(sc);
893 return (error);
894 }
895 sc->cdev = cdev;
896 sx_xunlock(&vmmdev_mtx);
897 return (0);
898 }
899
900 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)901 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
902 {
903 char *buf;
904 int error, buflen;
905
906 error = vmm_priv_check(req->td->td_ucred);
907 if (error != 0)
908 return (error);
909
910 buflen = VM_MAX_NAMELEN + 1;
911 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
912 strlcpy(buf, "beavis", buflen);
913 error = sysctl_handle_string(oidp, buf, buflen, req);
914 if (error == 0 && req->newptr != NULL)
915 error = vmmdev_create(buf, req->td->td_ucred);
916 free(buf, M_VMMDEV);
917 return (error);
918 }
919 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
920 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
921 NULL, 0, sysctl_vmm_create, "A",
922 NULL);
923
924 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)925 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
926 {
927 int error;
928
929 error = vmm_priv_check(td->td_ucred);
930 if (error != 0)
931 return (error);
932
933 if ((flags & FWRITE) == 0)
934 return (EPERM);
935
936 return (0);
937 }
938
939 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)940 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
941 struct thread *td)
942 {
943 int error;
944
945 switch (cmd) {
946 case VMMCTL_VM_CREATE: {
947 struct vmmctl_vm_create *vmc;
948
949 vmc = (struct vmmctl_vm_create *)data;
950 vmc->name[VM_MAX_NAMELEN] = '\0';
951 for (size_t i = 0; i < nitems(vmc->reserved); i++) {
952 if (vmc->reserved[i] != 0) {
953 error = EINVAL;
954 return (error);
955 }
956 }
957
958 error = vmmdev_create(vmc->name, td->td_ucred);
959 break;
960 }
961 case VMMCTL_VM_DESTROY: {
962 struct vmmctl_vm_destroy *vmd;
963
964 vmd = (struct vmmctl_vm_destroy *)data;
965 vmd->name[VM_MAX_NAMELEN] = '\0';
966 for (size_t i = 0; i < nitems(vmd->reserved); i++) {
967 if (vmd->reserved[i] != 0) {
968 error = EINVAL;
969 return (error);
970 }
971 }
972
973 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
974 break;
975 }
976 default:
977 error = ENOTTY;
978 break;
979 }
980
981 return (error);
982 }
983
984 static struct cdev *vmmctl_cdev;
985 static struct cdevsw vmmctlsw = {
986 .d_name = "vmmctl",
987 .d_version = D_VERSION,
988 .d_open = vmmctl_open,
989 .d_ioctl = vmmctl_ioctl,
990 };
991
992 int
vmmdev_init(void)993 vmmdev_init(void)
994 {
995 int error;
996
997 sx_xlock(&vmmdev_mtx);
998 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
999 UID_ROOT, GID_WHEEL, 0600, "vmmctl");
1000 if (error == 0)
1001 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1002 "Allow use of vmm in a jail.");
1003 sx_xunlock(&vmmdev_mtx);
1004
1005 return (error);
1006 }
1007
1008 int
vmmdev_cleanup(void)1009 vmmdev_cleanup(void)
1010 {
1011 sx_xlock(&vmmdev_mtx);
1012 if (!SLIST_EMPTY(&head)) {
1013 sx_xunlock(&vmmdev_mtx);
1014 return (EBUSY);
1015 }
1016 if (vmmctl_cdev != NULL) {
1017 destroy_dev(vmmctl_cdev);
1018 vmmctl_cdev = NULL;
1019 }
1020 sx_xunlock(&vmmdev_mtx);
1021
1022 return (0);
1023 }
1024
1025 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1026 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1027 struct vm_object **objp, int nprot)
1028 {
1029 struct devmem_softc *dsc;
1030 vm_ooffset_t first, last;
1031 size_t seglen;
1032 int error;
1033 bool sysmem;
1034
1035 dsc = cdev->si_drv1;
1036 if (dsc == NULL) {
1037 /* 'cdev' has been created but is not ready for use */
1038 return (ENXIO);
1039 }
1040
1041 first = *offset;
1042 last = *offset + len;
1043 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1044 return (EINVAL);
1045
1046 vm_slock_memsegs(dsc->sc->vm);
1047
1048 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1049 KASSERT(error == 0 && !sysmem && *objp != NULL,
1050 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1051
1052 if (seglen >= last)
1053 vm_object_reference(*objp);
1054 else
1055 error = EINVAL;
1056
1057 vm_unlock_memsegs(dsc->sc->vm);
1058 return (error);
1059 }
1060
1061 static struct cdevsw devmemsw = {
1062 .d_name = "devmem",
1063 .d_version = D_VERSION,
1064 .d_mmap_single = devmem_mmap_single,
1065 };
1066
1067 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1068 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1069 {
1070 struct make_dev_args mda;
1071 struct devmem_softc *dsc;
1072 int error;
1073
1074 sx_xlock(&vmmdev_mtx);
1075
1076 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1077 dsc->segid = segid;
1078 dsc->name = devname;
1079 dsc->sc = sc;
1080 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1081
1082 make_dev_args_init(&mda);
1083 mda.mda_devsw = &devmemsw;
1084 mda.mda_cr = sc->ucred;
1085 mda.mda_uid = UID_ROOT;
1086 mda.mda_gid = GID_WHEEL;
1087 mda.mda_mode = 0600;
1088 mda.mda_si_drv1 = dsc;
1089 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1090 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1091 devname);
1092 if (error != 0) {
1093 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1094 free(dsc->name, M_VMMDEV);
1095 free(dsc, M_VMMDEV);
1096 }
1097
1098 sx_xunlock(&vmmdev_mtx);
1099
1100 return (error);
1101 }
1102
1103 static void
devmem_destroy(void * arg)1104 devmem_destroy(void *arg)
1105 {
1106 struct devmem_softc *dsc = arg;
1107
1108 destroy_dev(dsc->cdev);
1109 dsc->cdev = NULL;
1110 dsc->sc = NULL;
1111 }
1112