1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6 * All rights reserved.
7 */
8
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #include <sys/fcntl.h>
12 #include <sys/ioccom.h>
13 #include <sys/jail.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/mman.h>
17 #include <sys/proc.h>
18 #include <sys/queue.h>
19 #include <sys/sx.h>
20 #include <sys/sysctl.h>
21 #include <sys/ucred.h>
22 #include <sys/uio.h>
23
24 #include <machine/vmm.h>
25
26 #include <vm/vm.h>
27 #include <vm/vm_object.h>
28
29 #include <dev/vmm/vmm_dev.h>
30 #include <dev/vmm/vmm_mem.h>
31 #include <dev/vmm/vmm_stat.h>
32
33 #ifdef __amd64__
34 #ifdef COMPAT_FREEBSD12
35 struct vm_memseg_12 {
36 int segid;
37 size_t len;
38 char name[64];
39 };
40 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
41
42 #define VM_ALLOC_MEMSEG_12 \
43 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
44 #define VM_GET_MEMSEG_12 \
45 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
46 #endif /* COMPAT_FREEBSD12 */
47 #ifdef COMPAT_FREEBSD14
48 struct vm_memseg_14 {
49 int segid;
50 size_t len;
51 char name[VM_MAX_SUFFIXLEN + 1];
52 };
53 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
54 "COMPAT_FREEBSD14 ABI");
55
56 #define VM_ALLOC_MEMSEG_14 \
57 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
58 #define VM_GET_MEMSEG_14 \
59 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
60 #endif /* COMPAT_FREEBSD14 */
61 #endif /* __amd64__ */
62
63 struct devmem_softc {
64 int segid;
65 char *name;
66 struct cdev *cdev;
67 struct vmmdev_softc *sc;
68 SLIST_ENTRY(devmem_softc) link;
69 };
70
71 struct vmmdev_softc {
72 struct vm *vm; /* vm instance cookie */
73 struct cdev *cdev;
74 struct ucred *ucred;
75 SLIST_ENTRY(vmmdev_softc) link;
76 SLIST_HEAD(, devmem_softc) devmem;
77 int flags;
78 };
79
80 static SLIST_HEAD(, vmmdev_softc) head;
81
82 static unsigned pr_allow_flag;
83 static struct sx vmmdev_mtx;
84 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
85
86 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
87
88 SYSCTL_DECL(_hw_vmm);
89
90 static void devmem_destroy(void *arg);
91 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
92
93 static int
vmm_priv_check(struct ucred * ucred)94 vmm_priv_check(struct ucred *ucred)
95 {
96 if (jailed(ucred) &&
97 !(ucred->cr_prison->pr_allow & pr_allow_flag))
98 return (EPERM);
99
100 return (0);
101 }
102
103 static int
vcpu_lock_one(struct vcpu * vcpu)104 vcpu_lock_one(struct vcpu *vcpu)
105 {
106 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
107 }
108
109 static void
vcpu_unlock_one(struct vcpu * vcpu)110 vcpu_unlock_one(struct vcpu *vcpu)
111 {
112 enum vcpu_state state;
113
114 state = vcpu_get_state(vcpu, NULL);
115 if (state != VCPU_FROZEN) {
116 panic("vcpu %s(%d) has invalid state %d",
117 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
118 }
119
120 vcpu_set_state(vcpu, VCPU_IDLE, false);
121 }
122
123 static int
vcpu_lock_all(struct vmmdev_softc * sc)124 vcpu_lock_all(struct vmmdev_softc *sc)
125 {
126 struct vcpu *vcpu;
127 int error;
128 uint16_t i, j, maxcpus;
129
130 error = 0;
131 vm_slock_vcpus(sc->vm);
132 maxcpus = vm_get_maxcpus(sc->vm);
133 for (i = 0; i < maxcpus; i++) {
134 vcpu = vm_vcpu(sc->vm, i);
135 if (vcpu == NULL)
136 continue;
137 error = vcpu_lock_one(vcpu);
138 if (error)
139 break;
140 }
141
142 if (error) {
143 for (j = 0; j < i; j++) {
144 vcpu = vm_vcpu(sc->vm, j);
145 if (vcpu == NULL)
146 continue;
147 vcpu_unlock_one(vcpu);
148 }
149 vm_unlock_vcpus(sc->vm);
150 }
151
152 return (error);
153 }
154
155 static void
vcpu_unlock_all(struct vmmdev_softc * sc)156 vcpu_unlock_all(struct vmmdev_softc *sc)
157 {
158 struct vcpu *vcpu;
159 uint16_t i, maxcpus;
160
161 maxcpus = vm_get_maxcpus(sc->vm);
162 for (i = 0; i < maxcpus; i++) {
163 vcpu = vm_vcpu(sc->vm, i);
164 if (vcpu == NULL)
165 continue;
166 vcpu_unlock_one(vcpu);
167 }
168 vm_unlock_vcpus(sc->vm);
169 }
170
171 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)172 vmmdev_lookup(const char *name, struct ucred *cred)
173 {
174 struct vmmdev_softc *sc;
175
176 sx_assert(&vmmdev_mtx, SA_XLOCKED);
177
178 SLIST_FOREACH(sc, &head, link) {
179 if (strcmp(name, vm_name(sc->vm)) == 0)
180 break;
181 }
182
183 if (sc == NULL)
184 return (NULL);
185
186 if (cr_cansee(cred, sc->ucred))
187 return (NULL);
188
189 return (sc);
190 }
191
192 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)193 vmmdev_lookup2(struct cdev *cdev)
194 {
195 return (cdev->si_drv1);
196 }
197
198 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)199 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
200 {
201 int error, off, c, prot;
202 vm_paddr_t gpa, maxaddr;
203 void *hpa, *cookie;
204 struct vmmdev_softc *sc;
205
206 sc = vmmdev_lookup2(cdev);
207 if (sc == NULL)
208 return (ENXIO);
209
210 /*
211 * Get a read lock on the guest memory map.
212 */
213 vm_slock_memsegs(sc->vm);
214
215 error = 0;
216 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
217 maxaddr = vmm_sysmem_maxaddr(sc->vm);
218 while (uio->uio_resid > 0 && error == 0) {
219 gpa = uio->uio_offset;
220 off = gpa & PAGE_MASK;
221 c = min(uio->uio_resid, PAGE_SIZE - off);
222
223 /*
224 * The VM has a hole in its physical memory map. If we want to
225 * use 'dd' to inspect memory beyond the hole we need to
226 * provide bogus data for memory that lies in the hole.
227 *
228 * Since this device does not support lseek(2), dd(1) will
229 * read(2) blocks of data to simulate the lseek(2).
230 */
231 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
232 if (hpa == NULL) {
233 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
234 error = uiomove(__DECONST(void *, zero_region),
235 c, uio);
236 else
237 error = EFAULT;
238 } else {
239 error = uiomove(hpa, c, uio);
240 vm_gpa_release(cookie);
241 }
242 }
243 vm_unlock_memsegs(sc->vm);
244 return (error);
245 }
246
247 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
248
249 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)250 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
251 {
252 struct devmem_softc *dsc;
253 int error;
254 bool sysmem;
255
256 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
257 if (error || mseg->len == 0)
258 return (error);
259
260 if (!sysmem) {
261 SLIST_FOREACH(dsc, &sc->devmem, link) {
262 if (dsc->segid == mseg->segid)
263 break;
264 }
265 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
266 __func__, mseg->segid));
267 error = copystr(dsc->name, mseg->name, len, NULL);
268 } else {
269 bzero(mseg->name, len);
270 }
271
272 return (error);
273 }
274
275 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len,struct domainset * domainset)276 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
277 struct domainset *domainset)
278 {
279 char *name;
280 int error;
281 bool sysmem;
282
283 error = 0;
284 name = NULL;
285 sysmem = true;
286
287 /*
288 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
289 * by stripped off when devfs processes the full string.
290 */
291 if (VM_MEMSEG_NAME(mseg)) {
292 sysmem = false;
293 name = malloc(len, M_VMMDEV, M_WAITOK);
294 error = copystr(mseg->name, name, len, NULL);
295 if (error)
296 goto done;
297 }
298 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
299 if (error)
300 goto done;
301
302 if (VM_MEMSEG_NAME(mseg)) {
303 error = devmem_create_cdev(sc, mseg->segid, name);
304 if (error)
305 vm_free_memseg(sc->vm, mseg->segid);
306 else
307 name = NULL; /* freed when 'cdev' is destroyed */
308 }
309 done:
310 free(name, M_VMMDEV);
311 return (error);
312 }
313
314 #if defined(__amd64__) && \
315 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
316 /*
317 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
318 */
319 static void
adjust_segid(struct vm_memseg * mseg)320 adjust_segid(struct vm_memseg *mseg)
321 {
322 if (mseg->segid != VM_SYSMEM) {
323 mseg->segid += (VM_BOOTROM - 1);
324 }
325 }
326 #endif
327
328 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)329 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
330 uint64_t *regval)
331 {
332 int error, i;
333
334 error = 0;
335 for (i = 0; i < count; i++) {
336 error = vm_get_register(vcpu, regnum[i], ®val[i]);
337 if (error)
338 break;
339 }
340 return (error);
341 }
342
343 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)344 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
345 uint64_t *regval)
346 {
347 int error, i;
348
349 error = 0;
350 for (i = 0; i < count; i++) {
351 error = vm_set_register(vcpu, regnum[i], regval[i]);
352 if (error)
353 break;
354 }
355 return (error);
356 }
357
358 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)359 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
360 {
361 int error;
362
363 /*
364 * A jail without vmm access shouldn't be able to access vmm device
365 * files at all, but check here just to be thorough.
366 */
367 error = vmm_priv_check(td->td_ucred);
368 if (error != 0)
369 return (error);
370
371 return (0);
372 }
373
374 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
375 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
376 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
377 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
378 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
379 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
380 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
381 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
382 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
383 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
384 VMMDEV_IOCTL(VM_STAT_DESC, 0),
385
386 #ifdef __amd64__
387 #ifdef COMPAT_FREEBSD12
388 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
389 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
390 #endif
391 #ifdef COMPAT_FREEBSD14
392 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
393 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
394 #endif
395 #endif /* __amd64__ */
396 VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
397 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
398 VMMDEV_IOCTL(VM_MMAP_MEMSEG,
399 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
400 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
401 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
402 VMMDEV_IOCTL(VM_REINIT,
403 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
404
405 #ifdef __amd64__
406 #if defined(COMPAT_FREEBSD12)
407 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
408 #endif
409 #ifdef COMPAT_FREEBSD14
410 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
411 #endif
412 #endif /* __amd64__ */
413 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
414 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
415
416 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
417 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
418
419 VMMDEV_IOCTL(VM_SUSPEND, 0),
420 VMMDEV_IOCTL(VM_GET_CPUS, 0),
421 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
422 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
423 };
424
425 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)426 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
427 struct thread *td)
428 {
429 struct vmmdev_softc *sc;
430 struct vcpu *vcpu;
431 const struct vmmdev_ioctl *ioctl;
432 struct vm_memseg *mseg;
433 int error, vcpuid;
434
435 sc = vmmdev_lookup2(cdev);
436 if (sc == NULL)
437 return (ENXIO);
438
439 ioctl = NULL;
440 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
441 if (vmmdev_ioctls[i].cmd == cmd) {
442 ioctl = &vmmdev_ioctls[i];
443 break;
444 }
445 }
446 if (ioctl == NULL) {
447 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
448 if (vmmdev_machdep_ioctls[i].cmd == cmd) {
449 ioctl = &vmmdev_machdep_ioctls[i];
450 break;
451 }
452 }
453 }
454 if (ioctl == NULL)
455 return (ENOTTY);
456
457 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
458 vm_xlock_memsegs(sc->vm);
459 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
460 vm_slock_memsegs(sc->vm);
461
462 vcpu = NULL;
463 vcpuid = -1;
464 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
465 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
466 vcpuid = *(int *)data;
467 if (vcpuid == -1) {
468 if ((ioctl->flags &
469 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
470 error = EINVAL;
471 goto lockfail;
472 }
473 } else {
474 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
475 if (vcpu == NULL) {
476 error = EINVAL;
477 goto lockfail;
478 }
479 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
480 error = vcpu_lock_one(vcpu);
481 if (error)
482 goto lockfail;
483 }
484 }
485 }
486 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
487 error = vcpu_lock_all(sc);
488 if (error)
489 goto lockfail;
490 }
491
492 switch (cmd) {
493 case VM_SUSPEND: {
494 struct vm_suspend *vmsuspend;
495
496 vmsuspend = (struct vm_suspend *)data;
497 error = vm_suspend(sc->vm, vmsuspend->how);
498 break;
499 }
500 case VM_REINIT:
501 error = vm_reinit(sc->vm);
502 break;
503 case VM_STAT_DESC: {
504 struct vm_stat_desc *statdesc;
505
506 statdesc = (struct vm_stat_desc *)data;
507 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
508 sizeof(statdesc->desc));
509 break;
510 }
511 case VM_STATS: {
512 struct vm_stats *vmstats;
513
514 vmstats = (struct vm_stats *)data;
515 getmicrotime(&vmstats->tv);
516 error = vmm_stat_copy(vcpu, vmstats->index,
517 nitems(vmstats->statbuf), &vmstats->num_entries,
518 vmstats->statbuf);
519 break;
520 }
521 case VM_MMAP_GETNEXT: {
522 struct vm_memmap *mm;
523
524 mm = (struct vm_memmap *)data;
525 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
526 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
527 break;
528 }
529 case VM_MMAP_MEMSEG: {
530 struct vm_memmap *mm;
531
532 mm = (struct vm_memmap *)data;
533 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
534 mm->len, mm->prot, mm->flags);
535 break;
536 }
537 case VM_MUNMAP_MEMSEG: {
538 struct vm_munmap *mu;
539
540 mu = (struct vm_munmap *)data;
541 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
542 break;
543 }
544 #ifdef __amd64__
545 #ifdef COMPAT_FREEBSD12
546 case VM_ALLOC_MEMSEG_12:
547 mseg = (struct vm_memseg *)data;
548
549 adjust_segid(mseg);
550 error = alloc_memseg(sc, mseg,
551 sizeof(((struct vm_memseg_12 *)0)->name), NULL);
552 break;
553 case VM_GET_MEMSEG_12:
554 mseg = (struct vm_memseg *)data;
555
556 adjust_segid(mseg);
557 error = get_memseg(sc, mseg,
558 sizeof(((struct vm_memseg_12 *)0)->name));
559 break;
560 #endif /* COMPAT_FREEBSD12 */
561 #ifdef COMPAT_FREEBSD14
562 case VM_ALLOC_MEMSEG_14:
563 mseg = (struct vm_memseg *)data;
564
565 adjust_segid(mseg);
566 error = alloc_memseg(sc, mseg,
567 sizeof(((struct vm_memseg_14 *)0)->name), NULL);
568 break;
569 case VM_GET_MEMSEG_14:
570 mseg = (struct vm_memseg *)data;
571
572 adjust_segid(mseg);
573 error = get_memseg(sc, mseg,
574 sizeof(((struct vm_memseg_14 *)0)->name));
575 break;
576 #endif /* COMPAT_FREEBSD14 */
577 #endif /* __amd64__ */
578 case VM_ALLOC_MEMSEG: {
579 domainset_t *mask;
580 struct domainset *domainset, domain;
581
582 domainset = NULL;
583 mseg = (struct vm_memseg *)data;
584 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
585 if (mseg->ds_mask_size < sizeof(domainset_t) ||
586 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
587 error = ERANGE;
588 break;
589 }
590 memset(&domain, 0, sizeof(domain));
591 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
592 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
593 if (error) {
594 free(mask, M_VMMDEV);
595 break;
596 }
597 error = domainset_populate(&domain, mask, mseg->ds_policy,
598 mseg->ds_mask_size);
599 if (error) {
600 free(mask, M_VMMDEV);
601 break;
602 }
603 domainset = domainset_create(&domain);
604 if (domainset == NULL) {
605 error = EINVAL;
606 free(mask, M_VMMDEV);
607 break;
608 }
609 free(mask, M_VMMDEV);
610 }
611 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
612
613 break;
614 }
615 case VM_GET_MEMSEG:
616 error = get_memseg(sc, (struct vm_memseg *)data,
617 sizeof(((struct vm_memseg *)0)->name));
618 break;
619 case VM_GET_REGISTER: {
620 struct vm_register *vmreg;
621
622 vmreg = (struct vm_register *)data;
623 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
624 break;
625 }
626 case VM_SET_REGISTER: {
627 struct vm_register *vmreg;
628
629 vmreg = (struct vm_register *)data;
630 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
631 break;
632 }
633 case VM_GET_REGISTER_SET: {
634 struct vm_register_set *vmregset;
635 uint64_t *regvals;
636 int *regnums;
637
638 vmregset = (struct vm_register_set *)data;
639 if (vmregset->count > VM_REG_LAST) {
640 error = EINVAL;
641 break;
642 }
643 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
644 M_WAITOK);
645 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
646 M_WAITOK);
647 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
648 vmregset->count);
649 if (error == 0)
650 error = vm_get_register_set(vcpu,
651 vmregset->count, regnums, regvals);
652 if (error == 0)
653 error = copyout(regvals, vmregset->regvals,
654 sizeof(regvals[0]) * vmregset->count);
655 free(regvals, M_VMMDEV);
656 free(regnums, M_VMMDEV);
657 break;
658 }
659 case VM_SET_REGISTER_SET: {
660 struct vm_register_set *vmregset;
661 uint64_t *regvals;
662 int *regnums;
663
664 vmregset = (struct vm_register_set *)data;
665 if (vmregset->count > VM_REG_LAST) {
666 error = EINVAL;
667 break;
668 }
669 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
670 M_WAITOK);
671 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
672 M_WAITOK);
673 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
674 vmregset->count);
675 if (error == 0)
676 error = copyin(vmregset->regvals, regvals,
677 sizeof(regvals[0]) * vmregset->count);
678 if (error == 0)
679 error = vm_set_register_set(vcpu,
680 vmregset->count, regnums, regvals);
681 free(regvals, M_VMMDEV);
682 free(regnums, M_VMMDEV);
683 break;
684 }
685 case VM_GET_CAPABILITY: {
686 struct vm_capability *vmcap;
687
688 vmcap = (struct vm_capability *)data;
689 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
690 break;
691 }
692 case VM_SET_CAPABILITY: {
693 struct vm_capability *vmcap;
694
695 vmcap = (struct vm_capability *)data;
696 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
697 break;
698 }
699 case VM_ACTIVATE_CPU:
700 error = vm_activate_cpu(vcpu);
701 break;
702 case VM_GET_CPUS: {
703 struct vm_cpuset *vm_cpuset;
704 cpuset_t *cpuset;
705 int size;
706
707 error = 0;
708 vm_cpuset = (struct vm_cpuset *)data;
709 size = vm_cpuset->cpusetsize;
710 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
711 error = ERANGE;
712 break;
713 }
714 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
715 M_WAITOK | M_ZERO);
716 if (vm_cpuset->which == VM_ACTIVE_CPUS)
717 *cpuset = vm_active_cpus(sc->vm);
718 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
719 *cpuset = vm_suspended_cpus(sc->vm);
720 else if (vm_cpuset->which == VM_DEBUG_CPUS)
721 *cpuset = vm_debug_cpus(sc->vm);
722 else
723 error = EINVAL;
724 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
725 error = ERANGE;
726 if (error == 0)
727 error = copyout(cpuset, vm_cpuset->cpus, size);
728 free(cpuset, M_TEMP);
729 break;
730 }
731 case VM_SUSPEND_CPU:
732 error = vm_suspend_cpu(sc->vm, vcpu);
733 break;
734 case VM_RESUME_CPU:
735 error = vm_resume_cpu(sc->vm, vcpu);
736 break;
737 case VM_SET_TOPOLOGY: {
738 struct vm_cpu_topology *topology;
739
740 topology = (struct vm_cpu_topology *)data;
741 error = vm_set_topology(sc->vm, topology->sockets,
742 topology->cores, topology->threads, topology->maxcpus);
743 break;
744 }
745 case VM_GET_TOPOLOGY: {
746 struct vm_cpu_topology *topology;
747
748 topology = (struct vm_cpu_topology *)data;
749 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
750 &topology->threads, &topology->maxcpus);
751 error = 0;
752 break;
753 }
754 default:
755 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
756 td);
757 break;
758 }
759
760 if ((ioctl->flags &
761 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
762 vm_unlock_memsegs(sc->vm);
763 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
764 vcpu_unlock_all(sc);
765 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
766 vcpu_unlock_one(vcpu);
767
768 /*
769 * Make sure that no handler returns a kernel-internal
770 * error value to userspace.
771 */
772 KASSERT(error == ERESTART || error >= 0,
773 ("vmmdev_ioctl: invalid error return %d", error));
774 return (error);
775
776 lockfail:
777 if ((ioctl->flags &
778 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
779 vm_unlock_memsegs(sc->vm);
780 return (error);
781 }
782
783 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)784 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
785 struct vm_object **objp, int nprot)
786 {
787 struct vmmdev_softc *sc;
788 vm_paddr_t gpa;
789 size_t len;
790 vm_ooffset_t segoff, first, last;
791 int error, found, segid;
792 bool sysmem;
793
794 first = *offset;
795 last = first + mapsize;
796 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
797 return (EINVAL);
798
799 sc = vmmdev_lookup2(cdev);
800 if (sc == NULL) {
801 /* virtual machine is in the process of being created */
802 return (EINVAL);
803 }
804
805 /*
806 * Get a read lock on the guest memory map.
807 */
808 vm_slock_memsegs(sc->vm);
809
810 gpa = 0;
811 found = 0;
812 while (!found) {
813 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
814 NULL, NULL);
815 if (error)
816 break;
817
818 if (first >= gpa && last <= gpa + len)
819 found = 1;
820 else
821 gpa += len;
822 }
823
824 if (found) {
825 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
826 KASSERT(error == 0 && *objp != NULL,
827 ("%s: invalid memory segment %d", __func__, segid));
828 if (sysmem) {
829 vm_object_reference(*objp);
830 *offset = segoff + (first - gpa);
831 } else {
832 error = EINVAL;
833 }
834 }
835 vm_unlock_memsegs(sc->vm);
836 return (error);
837 }
838
839 static void
vmmdev_destroy(struct vmmdev_softc * sc)840 vmmdev_destroy(struct vmmdev_softc *sc)
841 {
842 struct devmem_softc *dsc;
843 int error __diagused;
844
845 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
846
847 /*
848 * Destroy all cdevs:
849 *
850 * - any new operations on the 'cdev' will return an error (ENXIO).
851 *
852 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
853 */
854 SLIST_FOREACH(dsc, &sc->devmem, link) {
855 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
856 devmem_destroy(dsc);
857 }
858
859 vm_disable_vcpu_creation(sc->vm);
860 error = vcpu_lock_all(sc);
861 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
862 vm_unlock_vcpus(sc->vm);
863
864 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
865 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
866 SLIST_REMOVE_HEAD(&sc->devmem, link);
867 free(dsc->name, M_VMMDEV);
868 free(dsc, M_VMMDEV);
869 }
870
871 if (sc->vm != NULL)
872 vm_destroy(sc->vm);
873
874 if (sc->ucred != NULL)
875 crfree(sc->ucred);
876
877 sx_xlock(&vmmdev_mtx);
878 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
879 sx_xunlock(&vmmdev_mtx);
880 free(sc, M_VMMDEV);
881 }
882
883 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)884 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
885 {
886 struct cdev *cdev;
887 struct vmmdev_softc *sc;
888
889 sx_xlock(&vmmdev_mtx);
890 sc = vmmdev_lookup(name, cred);
891 if (sc == NULL || sc->cdev == NULL) {
892 sx_xunlock(&vmmdev_mtx);
893 return (EINVAL);
894 }
895
896 /*
897 * Setting 'sc->cdev' to NULL is used to indicate that the VM
898 * is scheduled for destruction.
899 */
900 cdev = sc->cdev;
901 sc->cdev = NULL;
902 sx_xunlock(&vmmdev_mtx);
903
904 vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
905 destroy_dev(cdev);
906 vmmdev_destroy(sc);
907
908 return (0);
909 }
910
911 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)912 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
913 {
914 char *buf;
915 int error, buflen;
916
917 error = vmm_priv_check(req->td->td_ucred);
918 if (error)
919 return (error);
920
921 buflen = VM_MAX_NAMELEN + 1;
922 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
923 error = sysctl_handle_string(oidp, buf, buflen, req);
924 if (error == 0 && req->newptr != NULL)
925 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
926 free(buf, M_VMMDEV);
927 return (error);
928 }
929 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
930 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
931 NULL, 0, sysctl_vmm_destroy, "A",
932 "Destroy a vmm(4) instance (legacy interface)");
933
934 static struct cdevsw vmmdevsw = {
935 .d_name = "vmmdev",
936 .d_version = D_VERSION,
937 .d_open = vmmdev_open,
938 .d_ioctl = vmmdev_ioctl,
939 .d_mmap_single = vmmdev_mmap_single,
940 .d_read = vmmdev_rw,
941 .d_write = vmmdev_rw,
942 };
943
944 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)945 vmmdev_alloc(struct vm *vm, struct ucred *cred)
946 {
947 struct vmmdev_softc *sc;
948
949 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
950 SLIST_INIT(&sc->devmem);
951 sc->vm = vm;
952 sc->ucred = crhold(cred);
953 return (sc);
954 }
955
956 static int
vmmdev_create(const char * name,struct ucred * cred)957 vmmdev_create(const char *name, struct ucred *cred)
958 {
959 struct make_dev_args mda;
960 struct cdev *cdev;
961 struct vmmdev_softc *sc;
962 struct vm *vm;
963 int error;
964
965 sx_xlock(&vmmdev_mtx);
966 sc = vmmdev_lookup(name, cred);
967 if (sc != NULL) {
968 sx_xunlock(&vmmdev_mtx);
969 return (EEXIST);
970 }
971
972 error = vm_create(name, &vm);
973 if (error != 0) {
974 sx_xunlock(&vmmdev_mtx);
975 return (error);
976 }
977 sc = vmmdev_alloc(vm, cred);
978 SLIST_INSERT_HEAD(&head, sc, link);
979
980 make_dev_args_init(&mda);
981 mda.mda_devsw = &vmmdevsw;
982 mda.mda_cr = sc->ucred;
983 mda.mda_uid = UID_ROOT;
984 mda.mda_gid = GID_WHEEL;
985 mda.mda_mode = 0600;
986 mda.mda_si_drv1 = sc;
987 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
988 error = make_dev_s(&mda, &cdev, "vmm/%s", name);
989 if (error != 0) {
990 sx_xunlock(&vmmdev_mtx);
991 vmmdev_destroy(sc);
992 return (error);
993 }
994 sc->cdev = cdev;
995 sx_xunlock(&vmmdev_mtx);
996 return (0);
997 }
998
999 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)1000 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1001 {
1002 char *buf;
1003 int error, buflen;
1004
1005 error = vmm_priv_check(req->td->td_ucred);
1006 if (error != 0)
1007 return (error);
1008
1009 buflen = VM_MAX_NAMELEN + 1;
1010 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1011 error = sysctl_handle_string(oidp, buf, buflen, req);
1012 if (error == 0 && req->newptr != NULL)
1013 error = vmmdev_create(buf, req->td->td_ucred);
1014 free(buf, M_VMMDEV);
1015 return (error);
1016 }
1017 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1018 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1019 NULL, 0, sysctl_vmm_create, "A",
1020 "Create a vmm(4) instance (legacy interface)");
1021
1022 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)1023 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
1024 {
1025 int error;
1026
1027 error = vmm_priv_check(td->td_ucred);
1028 if (error != 0)
1029 return (error);
1030
1031 if ((flags & FWRITE) == 0)
1032 return (EPERM);
1033
1034 return (0);
1035 }
1036
1037 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)1038 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
1039 struct thread *td)
1040 {
1041 int error;
1042
1043 switch (cmd) {
1044 case VMMCTL_VM_CREATE: {
1045 struct vmmctl_vm_create *vmc;
1046
1047 vmc = (struct vmmctl_vm_create *)data;
1048 vmc->name[VM_MAX_NAMELEN] = '\0';
1049 for (size_t i = 0; i < nitems(vmc->reserved); i++) {
1050 if (vmc->reserved[i] != 0) {
1051 error = EINVAL;
1052 return (error);
1053 }
1054 }
1055
1056 error = vmmdev_create(vmc->name, td->td_ucred);
1057 break;
1058 }
1059 case VMMCTL_VM_DESTROY: {
1060 struct vmmctl_vm_destroy *vmd;
1061
1062 vmd = (struct vmmctl_vm_destroy *)data;
1063 vmd->name[VM_MAX_NAMELEN] = '\0';
1064 for (size_t i = 0; i < nitems(vmd->reserved); i++) {
1065 if (vmd->reserved[i] != 0) {
1066 error = EINVAL;
1067 return (error);
1068 }
1069 }
1070
1071 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
1072 break;
1073 }
1074 default:
1075 error = ENOTTY;
1076 break;
1077 }
1078
1079 return (error);
1080 }
1081
1082 static struct cdev *vmmctl_cdev;
1083 static struct cdevsw vmmctlsw = {
1084 .d_name = "vmmctl",
1085 .d_version = D_VERSION,
1086 .d_open = vmmctl_open,
1087 .d_ioctl = vmmctl_ioctl,
1088 };
1089
1090 int
vmmdev_init(void)1091 vmmdev_init(void)
1092 {
1093 int error;
1094
1095 sx_xlock(&vmmdev_mtx);
1096 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
1097 UID_ROOT, GID_WHEEL, 0600, "vmmctl");
1098 if (error == 0)
1099 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1100 "Allow use of vmm in a jail.");
1101 sx_xunlock(&vmmdev_mtx);
1102
1103 return (error);
1104 }
1105
1106 int
vmmdev_cleanup(void)1107 vmmdev_cleanup(void)
1108 {
1109 sx_xlock(&vmmdev_mtx);
1110 if (!SLIST_EMPTY(&head)) {
1111 sx_xunlock(&vmmdev_mtx);
1112 return (EBUSY);
1113 }
1114 if (vmmctl_cdev != NULL) {
1115 destroy_dev(vmmctl_cdev);
1116 vmmctl_cdev = NULL;
1117 }
1118 sx_xunlock(&vmmdev_mtx);
1119
1120 return (0);
1121 }
1122
1123 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1124 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1125 struct vm_object **objp, int nprot)
1126 {
1127 struct devmem_softc *dsc;
1128 vm_ooffset_t first, last;
1129 size_t seglen;
1130 int error;
1131 bool sysmem;
1132
1133 dsc = cdev->si_drv1;
1134 if (dsc == NULL) {
1135 /* 'cdev' has been created but is not ready for use */
1136 return (ENXIO);
1137 }
1138
1139 first = *offset;
1140 last = *offset + len;
1141 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1142 return (EINVAL);
1143
1144 vm_slock_memsegs(dsc->sc->vm);
1145
1146 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1147 KASSERT(error == 0 && !sysmem && *objp != NULL,
1148 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1149
1150 if (seglen >= last)
1151 vm_object_reference(*objp);
1152 else
1153 error = EINVAL;
1154
1155 vm_unlock_memsegs(dsc->sc->vm);
1156 return (error);
1157 }
1158
1159 static struct cdevsw devmemsw = {
1160 .d_name = "devmem",
1161 .d_version = D_VERSION,
1162 .d_mmap_single = devmem_mmap_single,
1163 };
1164
1165 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1166 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1167 {
1168 struct make_dev_args mda;
1169 struct devmem_softc *dsc;
1170 int error;
1171
1172 sx_xlock(&vmmdev_mtx);
1173
1174 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1175 dsc->segid = segid;
1176 dsc->name = devname;
1177 dsc->sc = sc;
1178 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1179
1180 make_dev_args_init(&mda);
1181 mda.mda_devsw = &devmemsw;
1182 mda.mda_cr = sc->ucred;
1183 mda.mda_uid = UID_ROOT;
1184 mda.mda_gid = GID_WHEEL;
1185 mda.mda_mode = 0600;
1186 mda.mda_si_drv1 = dsc;
1187 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1188 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1189 devname);
1190 if (error != 0) {
1191 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1192 free(dsc->name, M_VMMDEV);
1193 free(dsc, M_VMMDEV);
1194 }
1195
1196 sx_xunlock(&vmmdev_mtx);
1197
1198 return (error);
1199 }
1200
1201 static void
devmem_destroy(void * arg)1202 devmem_destroy(void *arg)
1203 {
1204 struct devmem_softc *dsc = arg;
1205
1206 destroy_dev(dsc->cdev);
1207 dsc->cdev = NULL;
1208 dsc->sc = NULL;
1209 }
1210