1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6 * All rights reserved.
7 */
8
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #include <sys/fcntl.h>
12 #include <sys/ioccom.h>
13 #include <sys/jail.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/mman.h>
17 #include <sys/proc.h>
18 #include <sys/queue.h>
19 #include <sys/sx.h>
20 #include <sys/sysctl.h>
21 #include <sys/ucred.h>
22 #include <sys/uio.h>
23
24 #include <machine/vmm.h>
25
26 #include <vm/vm.h>
27 #include <vm/vm_object.h>
28
29 #include <dev/vmm/vmm_dev.h>
30 #include <dev/vmm/vmm_stat.h>
31
32 #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
33 struct vm_memseg_12 {
34 int segid;
35 size_t len;
36 char name[64];
37 };
38 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
39
40 #define VM_ALLOC_MEMSEG_12 \
41 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
42 #define VM_GET_MEMSEG_12 \
43 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
44 #endif
45
46 struct devmem_softc {
47 int segid;
48 char *name;
49 struct cdev *cdev;
50 struct vmmdev_softc *sc;
51 SLIST_ENTRY(devmem_softc) link;
52 };
53
54 struct vmmdev_softc {
55 struct vm *vm; /* vm instance cookie */
56 struct cdev *cdev;
57 struct ucred *ucred;
58 SLIST_ENTRY(vmmdev_softc) link;
59 SLIST_HEAD(, devmem_softc) devmem;
60 int flags;
61 };
62
63 static SLIST_HEAD(, vmmdev_softc) head;
64
65 static unsigned pr_allow_flag;
66 static struct sx vmmdev_mtx;
67 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
68
69 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
70
71 SYSCTL_DECL(_hw_vmm);
72
73 static void devmem_destroy(void *arg);
74 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
75
76 static int
vmm_priv_check(struct ucred * ucred)77 vmm_priv_check(struct ucred *ucred)
78 {
79 if (jailed(ucred) &&
80 !(ucred->cr_prison->pr_allow & pr_allow_flag))
81 return (EPERM);
82
83 return (0);
84 }
85
86 static int
vcpu_lock_one(struct vcpu * vcpu)87 vcpu_lock_one(struct vcpu *vcpu)
88 {
89 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
90 }
91
92 static void
vcpu_unlock_one(struct vcpu * vcpu)93 vcpu_unlock_one(struct vcpu *vcpu)
94 {
95 enum vcpu_state state;
96
97 state = vcpu_get_state(vcpu, NULL);
98 if (state != VCPU_FROZEN) {
99 panic("vcpu %s(%d) has invalid state %d",
100 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
101 }
102
103 vcpu_set_state(vcpu, VCPU_IDLE, false);
104 }
105
106 static int
vcpu_lock_all(struct vmmdev_softc * sc)107 vcpu_lock_all(struct vmmdev_softc *sc)
108 {
109 struct vcpu *vcpu;
110 int error;
111 uint16_t i, j, maxcpus;
112
113 error = 0;
114 vm_slock_vcpus(sc->vm);
115 maxcpus = vm_get_maxcpus(sc->vm);
116 for (i = 0; i < maxcpus; i++) {
117 vcpu = vm_vcpu(sc->vm, i);
118 if (vcpu == NULL)
119 continue;
120 error = vcpu_lock_one(vcpu);
121 if (error)
122 break;
123 }
124
125 if (error) {
126 for (j = 0; j < i; j++) {
127 vcpu = vm_vcpu(sc->vm, j);
128 if (vcpu == NULL)
129 continue;
130 vcpu_unlock_one(vcpu);
131 }
132 vm_unlock_vcpus(sc->vm);
133 }
134
135 return (error);
136 }
137
138 static void
vcpu_unlock_all(struct vmmdev_softc * sc)139 vcpu_unlock_all(struct vmmdev_softc *sc)
140 {
141 struct vcpu *vcpu;
142 uint16_t i, maxcpus;
143
144 maxcpus = vm_get_maxcpus(sc->vm);
145 for (i = 0; i < maxcpus; i++) {
146 vcpu = vm_vcpu(sc->vm, i);
147 if (vcpu == NULL)
148 continue;
149 vcpu_unlock_one(vcpu);
150 }
151 vm_unlock_vcpus(sc->vm);
152 }
153
154 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)155 vmmdev_lookup(const char *name, struct ucred *cred)
156 {
157 struct vmmdev_softc *sc;
158
159 sx_assert(&vmmdev_mtx, SA_XLOCKED);
160
161 SLIST_FOREACH(sc, &head, link) {
162 if (strcmp(name, vm_name(sc->vm)) == 0)
163 break;
164 }
165
166 if (sc == NULL)
167 return (NULL);
168
169 if (cr_cansee(cred, sc->ucred))
170 return (NULL);
171
172 return (sc);
173 }
174
175 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)176 vmmdev_lookup2(struct cdev *cdev)
177 {
178 return (cdev->si_drv1);
179 }
180
181 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)182 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
183 {
184 int error, off, c, prot;
185 vm_paddr_t gpa, maxaddr;
186 void *hpa, *cookie;
187 struct vmmdev_softc *sc;
188
189 sc = vmmdev_lookup2(cdev);
190 if (sc == NULL)
191 return (ENXIO);
192
193 /*
194 * Get a read lock on the guest memory map.
195 */
196 vm_slock_memsegs(sc->vm);
197
198 error = 0;
199 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
200 maxaddr = vmm_sysmem_maxaddr(sc->vm);
201 while (uio->uio_resid > 0 && error == 0) {
202 gpa = uio->uio_offset;
203 off = gpa & PAGE_MASK;
204 c = min(uio->uio_resid, PAGE_SIZE - off);
205
206 /*
207 * The VM has a hole in its physical memory map. If we want to
208 * use 'dd' to inspect memory beyond the hole we need to
209 * provide bogus data for memory that lies in the hole.
210 *
211 * Since this device does not support lseek(2), dd(1) will
212 * read(2) blocks of data to simulate the lseek(2).
213 */
214 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
215 if (hpa == NULL) {
216 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
217 error = uiomove(__DECONST(void *, zero_region),
218 c, uio);
219 else
220 error = EFAULT;
221 } else {
222 error = uiomove(hpa, c, uio);
223 vm_gpa_release(cookie);
224 }
225 }
226 vm_unlock_memsegs(sc->vm);
227 return (error);
228 }
229
230 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
231
232 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)233 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
234 {
235 struct devmem_softc *dsc;
236 int error;
237 bool sysmem;
238
239 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
240 if (error || mseg->len == 0)
241 return (error);
242
243 if (!sysmem) {
244 SLIST_FOREACH(dsc, &sc->devmem, link) {
245 if (dsc->segid == mseg->segid)
246 break;
247 }
248 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
249 __func__, mseg->segid));
250 error = copystr(dsc->name, mseg->name, len, NULL);
251 } else {
252 bzero(mseg->name, len);
253 }
254
255 return (error);
256 }
257
258 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)259 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
260 {
261 char *name;
262 int error;
263 bool sysmem;
264
265 error = 0;
266 name = NULL;
267 sysmem = true;
268
269 /*
270 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
271 * by stripped off when devfs processes the full string.
272 */
273 if (VM_MEMSEG_NAME(mseg)) {
274 sysmem = false;
275 name = malloc(len, M_VMMDEV, M_WAITOK);
276 error = copystr(mseg->name, name, len, NULL);
277 if (error)
278 goto done;
279 }
280
281 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
282 if (error)
283 goto done;
284
285 if (VM_MEMSEG_NAME(mseg)) {
286 error = devmem_create_cdev(sc, mseg->segid, name);
287 if (error)
288 vm_free_memseg(sc->vm, mseg->segid);
289 else
290 name = NULL; /* freed when 'cdev' is destroyed */
291 }
292 done:
293 free(name, M_VMMDEV);
294 return (error);
295 }
296
297 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)298 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
299 uint64_t *regval)
300 {
301 int error, i;
302
303 error = 0;
304 for (i = 0; i < count; i++) {
305 error = vm_get_register(vcpu, regnum[i], ®val[i]);
306 if (error)
307 break;
308 }
309 return (error);
310 }
311
312 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)313 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
314 uint64_t *regval)
315 {
316 int error, i;
317
318 error = 0;
319 for (i = 0; i < count; i++) {
320 error = vm_set_register(vcpu, regnum[i], regval[i]);
321 if (error)
322 break;
323 }
324 return (error);
325 }
326
327 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)328 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
329 {
330 int error;
331
332 /*
333 * A jail without vmm access shouldn't be able to access vmm device
334 * files at all, but check here just to be thorough.
335 */
336 error = vmm_priv_check(td->td_ucred);
337 if (error != 0)
338 return (error);
339
340 return (0);
341 }
342
343 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
344 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
345 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
346 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
347 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
348 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
349 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
350 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
351 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
352 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
353
354 #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
355 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
356 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
357 #endif
358 VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
359 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
360 VMMDEV_IOCTL(VM_MMAP_MEMSEG,
361 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
362 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
363 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
364 VMMDEV_IOCTL(VM_REINIT,
365 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
366
367 #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
368 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
369 #endif
370 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
371 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
372
373 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
374 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
375
376 VMMDEV_IOCTL(VM_SUSPEND, 0),
377 VMMDEV_IOCTL(VM_GET_CPUS, 0),
378 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
379 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
380 };
381
382 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)383 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
384 struct thread *td)
385 {
386 struct vmmdev_softc *sc;
387 struct vcpu *vcpu;
388 const struct vmmdev_ioctl *ioctl;
389 int error, vcpuid;
390
391 sc = vmmdev_lookup2(cdev);
392 if (sc == NULL)
393 return (ENXIO);
394
395 ioctl = NULL;
396 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
397 if (vmmdev_ioctls[i].cmd == cmd) {
398 ioctl = &vmmdev_ioctls[i];
399 break;
400 }
401 }
402 if (ioctl == NULL) {
403 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
404 if (vmmdev_machdep_ioctls[i].cmd == cmd) {
405 ioctl = &vmmdev_machdep_ioctls[i];
406 break;
407 }
408 }
409 }
410 if (ioctl == NULL)
411 return (ENOTTY);
412
413 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
414 vm_xlock_memsegs(sc->vm);
415 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
416 vm_slock_memsegs(sc->vm);
417
418 vcpu = NULL;
419 vcpuid = -1;
420 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
421 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
422 vcpuid = *(int *)data;
423 if (vcpuid == -1) {
424 if ((ioctl->flags &
425 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
426 error = EINVAL;
427 goto lockfail;
428 }
429 } else {
430 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
431 if (vcpu == NULL) {
432 error = EINVAL;
433 goto lockfail;
434 }
435 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
436 error = vcpu_lock_one(vcpu);
437 if (error)
438 goto lockfail;
439 }
440 }
441 }
442 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
443 error = vcpu_lock_all(sc);
444 if (error)
445 goto lockfail;
446 }
447
448 switch (cmd) {
449 case VM_SUSPEND: {
450 struct vm_suspend *vmsuspend;
451
452 vmsuspend = (struct vm_suspend *)data;
453 error = vm_suspend(sc->vm, vmsuspend->how);
454 break;
455 }
456 case VM_REINIT:
457 error = vm_reinit(sc->vm);
458 break;
459 case VM_STAT_DESC: {
460 struct vm_stat_desc *statdesc;
461
462 statdesc = (struct vm_stat_desc *)data;
463 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
464 sizeof(statdesc->desc));
465 break;
466 }
467 case VM_STATS: {
468 struct vm_stats *vmstats;
469
470 vmstats = (struct vm_stats *)data;
471 getmicrotime(&vmstats->tv);
472 error = vmm_stat_copy(vcpu, vmstats->index,
473 nitems(vmstats->statbuf), &vmstats->num_entries,
474 vmstats->statbuf);
475 break;
476 }
477 case VM_MMAP_GETNEXT: {
478 struct vm_memmap *mm;
479
480 mm = (struct vm_memmap *)data;
481 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
482 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
483 break;
484 }
485 case VM_MMAP_MEMSEG: {
486 struct vm_memmap *mm;
487
488 mm = (struct vm_memmap *)data;
489 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
490 mm->len, mm->prot, mm->flags);
491 break;
492 }
493 case VM_MUNMAP_MEMSEG: {
494 struct vm_munmap *mu;
495
496 mu = (struct vm_munmap *)data;
497 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
498 break;
499 }
500 #if defined(__amd64__) && defined(COMPAT_FREEBSD12)
501 case VM_ALLOC_MEMSEG_12:
502 error = alloc_memseg(sc, (struct vm_memseg *)data,
503 sizeof(((struct vm_memseg_12 *)0)->name));
504 break;
505 case VM_GET_MEMSEG_12:
506 error = get_memseg(sc, (struct vm_memseg *)data,
507 sizeof(((struct vm_memseg_12 *)0)->name));
508 break;
509 #endif
510 case VM_ALLOC_MEMSEG:
511 error = alloc_memseg(sc, (struct vm_memseg *)data,
512 sizeof(((struct vm_memseg *)0)->name));
513 break;
514 case VM_GET_MEMSEG:
515 error = get_memseg(sc, (struct vm_memseg *)data,
516 sizeof(((struct vm_memseg *)0)->name));
517 break;
518 case VM_GET_REGISTER: {
519 struct vm_register *vmreg;
520
521 vmreg = (struct vm_register *)data;
522 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
523 break;
524 }
525 case VM_SET_REGISTER: {
526 struct vm_register *vmreg;
527
528 vmreg = (struct vm_register *)data;
529 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
530 break;
531 }
532 case VM_GET_REGISTER_SET: {
533 struct vm_register_set *vmregset;
534 uint64_t *regvals;
535 int *regnums;
536
537 vmregset = (struct vm_register_set *)data;
538 if (vmregset->count > VM_REG_LAST) {
539 error = EINVAL;
540 break;
541 }
542 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
543 M_WAITOK);
544 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
545 M_WAITOK);
546 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
547 vmregset->count);
548 if (error == 0)
549 error = vm_get_register_set(vcpu,
550 vmregset->count, regnums, regvals);
551 if (error == 0)
552 error = copyout(regvals, vmregset->regvals,
553 sizeof(regvals[0]) * vmregset->count);
554 free(regvals, M_VMMDEV);
555 free(regnums, M_VMMDEV);
556 break;
557 }
558 case VM_SET_REGISTER_SET: {
559 struct vm_register_set *vmregset;
560 uint64_t *regvals;
561 int *regnums;
562
563 vmregset = (struct vm_register_set *)data;
564 if (vmregset->count > VM_REG_LAST) {
565 error = EINVAL;
566 break;
567 }
568 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
569 M_WAITOK);
570 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
571 M_WAITOK);
572 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
573 vmregset->count);
574 if (error == 0)
575 error = copyin(vmregset->regvals, regvals,
576 sizeof(regvals[0]) * vmregset->count);
577 if (error == 0)
578 error = vm_set_register_set(vcpu,
579 vmregset->count, regnums, regvals);
580 free(regvals, M_VMMDEV);
581 free(regnums, M_VMMDEV);
582 break;
583 }
584 case VM_GET_CAPABILITY: {
585 struct vm_capability *vmcap;
586
587 vmcap = (struct vm_capability *)data;
588 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
589 break;
590 }
591 case VM_SET_CAPABILITY: {
592 struct vm_capability *vmcap;
593
594 vmcap = (struct vm_capability *)data;
595 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
596 break;
597 }
598 case VM_ACTIVATE_CPU:
599 error = vm_activate_cpu(vcpu);
600 break;
601 case VM_GET_CPUS: {
602 struct vm_cpuset *vm_cpuset;
603 cpuset_t *cpuset;
604 int size;
605
606 error = 0;
607 vm_cpuset = (struct vm_cpuset *)data;
608 size = vm_cpuset->cpusetsize;
609 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
610 error = ERANGE;
611 break;
612 }
613 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
614 M_WAITOK | M_ZERO);
615 if (vm_cpuset->which == VM_ACTIVE_CPUS)
616 *cpuset = vm_active_cpus(sc->vm);
617 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
618 *cpuset = vm_suspended_cpus(sc->vm);
619 else if (vm_cpuset->which == VM_DEBUG_CPUS)
620 *cpuset = vm_debug_cpus(sc->vm);
621 else
622 error = EINVAL;
623 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
624 error = ERANGE;
625 if (error == 0)
626 error = copyout(cpuset, vm_cpuset->cpus, size);
627 free(cpuset, M_TEMP);
628 break;
629 }
630 case VM_SUSPEND_CPU:
631 error = vm_suspend_cpu(sc->vm, vcpu);
632 break;
633 case VM_RESUME_CPU:
634 error = vm_resume_cpu(sc->vm, vcpu);
635 break;
636 case VM_SET_TOPOLOGY: {
637 struct vm_cpu_topology *topology;
638
639 topology = (struct vm_cpu_topology *)data;
640 error = vm_set_topology(sc->vm, topology->sockets,
641 topology->cores, topology->threads, topology->maxcpus);
642 break;
643 }
644 case VM_GET_TOPOLOGY: {
645 struct vm_cpu_topology *topology;
646
647 topology = (struct vm_cpu_topology *)data;
648 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
649 &topology->threads, &topology->maxcpus);
650 error = 0;
651 break;
652 }
653 default:
654 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
655 td);
656 break;
657 }
658
659 if ((ioctl->flags &
660 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
661 vm_unlock_memsegs(sc->vm);
662 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
663 vcpu_unlock_all(sc);
664 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
665 vcpu_unlock_one(vcpu);
666
667 /*
668 * Make sure that no handler returns a kernel-internal
669 * error value to userspace.
670 */
671 KASSERT(error == ERESTART || error >= 0,
672 ("vmmdev_ioctl: invalid error return %d", error));
673 return (error);
674
675 lockfail:
676 if ((ioctl->flags &
677 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
678 vm_unlock_memsegs(sc->vm);
679 return (error);
680 }
681
682 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)683 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
684 struct vm_object **objp, int nprot)
685 {
686 struct vmmdev_softc *sc;
687 vm_paddr_t gpa;
688 size_t len;
689 vm_ooffset_t segoff, first, last;
690 int error, found, segid;
691 bool sysmem;
692
693 first = *offset;
694 last = first + mapsize;
695 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
696 return (EINVAL);
697
698 sc = vmmdev_lookup2(cdev);
699 if (sc == NULL) {
700 /* virtual machine is in the process of being created */
701 return (EINVAL);
702 }
703
704 /*
705 * Get a read lock on the guest memory map.
706 */
707 vm_slock_memsegs(sc->vm);
708
709 gpa = 0;
710 found = 0;
711 while (!found) {
712 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
713 NULL, NULL);
714 if (error)
715 break;
716
717 if (first >= gpa && last <= gpa + len)
718 found = 1;
719 else
720 gpa += len;
721 }
722
723 if (found) {
724 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
725 KASSERT(error == 0 && *objp != NULL,
726 ("%s: invalid memory segment %d", __func__, segid));
727 if (sysmem) {
728 vm_object_reference(*objp);
729 *offset = segoff + (first - gpa);
730 } else {
731 error = EINVAL;
732 }
733 }
734 vm_unlock_memsegs(sc->vm);
735 return (error);
736 }
737
738 static void
vmmdev_destroy(struct vmmdev_softc * sc)739 vmmdev_destroy(struct vmmdev_softc *sc)
740 {
741 struct devmem_softc *dsc;
742 int error __diagused;
743
744 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
745
746 /*
747 * Destroy all cdevs:
748 *
749 * - any new operations on the 'cdev' will return an error (ENXIO).
750 *
751 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
752 */
753 SLIST_FOREACH(dsc, &sc->devmem, link) {
754 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
755 devmem_destroy(dsc);
756 }
757
758 vm_disable_vcpu_creation(sc->vm);
759 error = vcpu_lock_all(sc);
760 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
761 vm_unlock_vcpus(sc->vm);
762
763 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
764 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
765 SLIST_REMOVE_HEAD(&sc->devmem, link);
766 free(dsc->name, M_VMMDEV);
767 free(dsc, M_VMMDEV);
768 }
769
770 if (sc->vm != NULL)
771 vm_destroy(sc->vm);
772
773 if (sc->ucred != NULL)
774 crfree(sc->ucred);
775
776 sx_xlock(&vmmdev_mtx);
777 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
778 sx_xunlock(&vmmdev_mtx);
779 free(sc, M_VMMDEV);
780 }
781
782 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)783 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
784 {
785 struct cdev *cdev;
786 struct vmmdev_softc *sc;
787
788 sx_xlock(&vmmdev_mtx);
789 sc = vmmdev_lookup(name, cred);
790 if (sc == NULL || sc->cdev == NULL) {
791 sx_xunlock(&vmmdev_mtx);
792 return (EINVAL);
793 }
794
795 /*
796 * Setting 'sc->cdev' to NULL is used to indicate that the VM
797 * is scheduled for destruction.
798 */
799 cdev = sc->cdev;
800 sc->cdev = NULL;
801 sx_xunlock(&vmmdev_mtx);
802
803 destroy_dev(cdev);
804 vmmdev_destroy(sc);
805
806 return (0);
807 }
808
809 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)810 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
811 {
812 char *buf;
813 int error, buflen;
814
815 error = vmm_priv_check(req->td->td_ucred);
816 if (error)
817 return (error);
818
819 buflen = VM_MAX_NAMELEN + 1;
820 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
821 strlcpy(buf, "beavis", buflen);
822 error = sysctl_handle_string(oidp, buf, buflen, req);
823 if (error == 0 && req->newptr != NULL)
824 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
825 free(buf, M_VMMDEV);
826 return (error);
827 }
828 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
829 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
830 NULL, 0, sysctl_vmm_destroy, "A",
831 NULL);
832
833 static struct cdevsw vmmdevsw = {
834 .d_name = "vmmdev",
835 .d_version = D_VERSION,
836 .d_open = vmmdev_open,
837 .d_ioctl = vmmdev_ioctl,
838 .d_mmap_single = vmmdev_mmap_single,
839 .d_read = vmmdev_rw,
840 .d_write = vmmdev_rw,
841 };
842
843 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)844 vmmdev_alloc(struct vm *vm, struct ucred *cred)
845 {
846 struct vmmdev_softc *sc;
847
848 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
849 SLIST_INIT(&sc->devmem);
850 sc->vm = vm;
851 sc->ucred = crhold(cred);
852 return (sc);
853 }
854
855 static int
vmmdev_create(const char * name,struct ucred * cred)856 vmmdev_create(const char *name, struct ucred *cred)
857 {
858 struct make_dev_args mda;
859 struct cdev *cdev;
860 struct vmmdev_softc *sc;
861 struct vm *vm;
862 int error;
863
864 sx_xlock(&vmmdev_mtx);
865 sc = vmmdev_lookup(name, cred);
866 if (sc != NULL) {
867 sx_xunlock(&vmmdev_mtx);
868 return (EEXIST);
869 }
870
871 error = vm_create(name, &vm);
872 if (error != 0) {
873 sx_xunlock(&vmmdev_mtx);
874 return (error);
875 }
876 sc = vmmdev_alloc(vm, cred);
877 SLIST_INSERT_HEAD(&head, sc, link);
878
879 make_dev_args_init(&mda);
880 mda.mda_devsw = &vmmdevsw;
881 mda.mda_cr = sc->ucred;
882 mda.mda_uid = UID_ROOT;
883 mda.mda_gid = GID_WHEEL;
884 mda.mda_mode = 0600;
885 mda.mda_si_drv1 = sc;
886 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
887 error = make_dev_s(&mda, &cdev, "vmm/%s", name);
888 if (error != 0) {
889 sx_xunlock(&vmmdev_mtx);
890 vmmdev_destroy(sc);
891 return (error);
892 }
893 sc->cdev = cdev;
894 sx_xunlock(&vmmdev_mtx);
895 return (0);
896 }
897
898 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)899 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
900 {
901 char *buf;
902 int error, buflen;
903
904 error = vmm_priv_check(req->td->td_ucred);
905 if (error != 0)
906 return (error);
907
908 buflen = VM_MAX_NAMELEN + 1;
909 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
910 strlcpy(buf, "beavis", buflen);
911 error = sysctl_handle_string(oidp, buf, buflen, req);
912 if (error == 0 && req->newptr != NULL)
913 error = vmmdev_create(buf, req->td->td_ucred);
914 free(buf, M_VMMDEV);
915 return (error);
916 }
917 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
918 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
919 NULL, 0, sysctl_vmm_create, "A",
920 NULL);
921
922 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)923 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
924 {
925 int error;
926
927 error = vmm_priv_check(td->td_ucred);
928 if (error != 0)
929 return (error);
930
931 if ((flags & FWRITE) == 0)
932 return (EPERM);
933
934 return (0);
935 }
936
937 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)938 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
939 struct thread *td)
940 {
941 int error;
942
943 switch (cmd) {
944 case VMMCTL_VM_CREATE: {
945 struct vmmctl_vm_create *vmc;
946
947 vmc = (struct vmmctl_vm_create *)data;
948 vmc->name[VM_MAX_NAMELEN] = '\0';
949 for (size_t i = 0; i < nitems(vmc->reserved); i++) {
950 if (vmc->reserved[i] != 0) {
951 error = EINVAL;
952 return (error);
953 }
954 }
955
956 error = vmmdev_create(vmc->name, td->td_ucred);
957 break;
958 }
959 case VMMCTL_VM_DESTROY: {
960 struct vmmctl_vm_destroy *vmd;
961
962 vmd = (struct vmmctl_vm_destroy *)data;
963 vmd->name[VM_MAX_NAMELEN] = '\0';
964 for (size_t i = 0; i < nitems(vmd->reserved); i++) {
965 if (vmd->reserved[i] != 0) {
966 error = EINVAL;
967 return (error);
968 }
969 }
970
971 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
972 break;
973 }
974 default:
975 error = ENOTTY;
976 break;
977 }
978
979 return (error);
980 }
981
982 static struct cdevsw vmmctlsw = {
983 .d_name = "vmmctl",
984 .d_version = D_VERSION,
985 .d_open = vmmctl_open,
986 .d_ioctl = vmmctl_ioctl,
987 };
988
989 int
vmmdev_init(void)990 vmmdev_init(void)
991 {
992 struct cdev *cdev;
993 int error;
994
995 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmctlsw, NULL,
996 UID_ROOT, GID_WHEEL, 0600, "vmmctl");
997 if (error)
998 return (error);
999
1000 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1001 "Allow use of vmm in a jail.");
1002
1003 return (0);
1004 }
1005
1006 int
vmmdev_cleanup(void)1007 vmmdev_cleanup(void)
1008 {
1009 int error;
1010
1011 if (SLIST_EMPTY(&head))
1012 error = 0;
1013 else
1014 error = EBUSY;
1015
1016 return (error);
1017 }
1018
1019 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1020 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1021 struct vm_object **objp, int nprot)
1022 {
1023 struct devmem_softc *dsc;
1024 vm_ooffset_t first, last;
1025 size_t seglen;
1026 int error;
1027 bool sysmem;
1028
1029 dsc = cdev->si_drv1;
1030 if (dsc == NULL) {
1031 /* 'cdev' has been created but is not ready for use */
1032 return (ENXIO);
1033 }
1034
1035 first = *offset;
1036 last = *offset + len;
1037 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1038 return (EINVAL);
1039
1040 vm_slock_memsegs(dsc->sc->vm);
1041
1042 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1043 KASSERT(error == 0 && !sysmem && *objp != NULL,
1044 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1045
1046 if (seglen >= last)
1047 vm_object_reference(*objp);
1048 else
1049 error = EINVAL;
1050
1051 vm_unlock_memsegs(dsc->sc->vm);
1052 return (error);
1053 }
1054
1055 static struct cdevsw devmemsw = {
1056 .d_name = "devmem",
1057 .d_version = D_VERSION,
1058 .d_mmap_single = devmem_mmap_single,
1059 };
1060
1061 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1062 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1063 {
1064 struct make_dev_args mda;
1065 struct devmem_softc *dsc;
1066 int error;
1067
1068 sx_xlock(&vmmdev_mtx);
1069
1070 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1071 dsc->segid = segid;
1072 dsc->name = devname;
1073 dsc->sc = sc;
1074 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1075
1076 make_dev_args_init(&mda);
1077 mda.mda_devsw = &devmemsw;
1078 mda.mda_cr = sc->ucred;
1079 mda.mda_uid = UID_ROOT;
1080 mda.mda_gid = GID_WHEEL;
1081 mda.mda_mode = 0600;
1082 mda.mda_si_drv1 = dsc;
1083 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1084 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1085 devname);
1086 if (error != 0) {
1087 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1088 free(dsc->name, M_VMMDEV);
1089 free(dsc, M_VMMDEV);
1090 }
1091
1092 sx_xunlock(&vmmdev_mtx);
1093
1094 return (error);
1095 }
1096
1097 static void
devmem_destroy(void * arg)1098 devmem_destroy(void *arg)
1099 {
1100 struct devmem_softc *dsc = arg;
1101
1102 destroy_dev(dsc->cdev);
1103 dsc->cdev = NULL;
1104 dsc->sc = NULL;
1105 }
1106