1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6 * All rights reserved.
7 */
8
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #include <sys/fcntl.h>
12 #include <sys/ioccom.h>
13 #include <sys/jail.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/mman.h>
17 #include <sys/proc.h>
18 #include <sys/queue.h>
19 #include <sys/sx.h>
20 #include <sys/sysctl.h>
21 #include <sys/ucred.h>
22 #include <sys/uio.h>
23
24 #include <machine/vmm.h>
25
26 #include <vm/vm.h>
27 #include <vm/vm_object.h>
28
29 #include <dev/vmm/vmm_dev.h>
30 #include <dev/vmm/vmm_mem.h>
31 #include <dev/vmm/vmm_stat.h>
32
33 #ifdef __amd64__
34 #ifdef COMPAT_FREEBSD12
35 struct vm_memseg_12 {
36 int segid;
37 size_t len;
38 char name[64];
39 };
40 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
41
42 #define VM_ALLOC_MEMSEG_12 \
43 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
44 #define VM_GET_MEMSEG_12 \
45 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
46 #endif /* COMPAT_FREEBSD12 */
47 #ifdef COMPAT_FREEBSD14
48 struct vm_memseg_14 {
49 int segid;
50 size_t len;
51 char name[VM_MAX_SUFFIXLEN + 1];
52 };
53 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
54 "COMPAT_FREEBSD14 ABI");
55
56 #define VM_ALLOC_MEMSEG_14 \
57 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
58 #define VM_GET_MEMSEG_14 \
59 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
60 #endif /* COMPAT_FREEBSD14 */
61 #endif /* __amd64__ */
62
63 struct devmem_softc {
64 int segid;
65 char *name;
66 struct cdev *cdev;
67 struct vmmdev_softc *sc;
68 SLIST_ENTRY(devmem_softc) link;
69 };
70
71 struct vmmdev_softc {
72 struct vm *vm; /* vm instance cookie */
73 struct cdev *cdev;
74 struct ucred *ucred;
75 SLIST_ENTRY(vmmdev_softc) link;
76 SLIST_HEAD(, devmem_softc) devmem;
77 int flags;
78 };
79
80 static SLIST_HEAD(, vmmdev_softc) head;
81
82 static unsigned pr_allow_flag;
83 static struct sx vmmdev_mtx;
84 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
85
86 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
87
88 SYSCTL_DECL(_hw_vmm);
89
90 static void devmem_destroy(void *arg);
91 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
92
93 static int
vmm_priv_check(struct ucred * ucred)94 vmm_priv_check(struct ucred *ucred)
95 {
96 if (jailed(ucred) &&
97 !(ucred->cr_prison->pr_allow & pr_allow_flag))
98 return (EPERM);
99
100 return (0);
101 }
102
103 static int
vcpu_lock_one(struct vcpu * vcpu)104 vcpu_lock_one(struct vcpu *vcpu)
105 {
106 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
107 }
108
109 static void
vcpu_unlock_one(struct vcpu * vcpu)110 vcpu_unlock_one(struct vcpu *vcpu)
111 {
112 enum vcpu_state state;
113
114 state = vcpu_get_state(vcpu, NULL);
115 if (state != VCPU_FROZEN) {
116 panic("vcpu %s(%d) has invalid state %d",
117 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
118 }
119
120 vcpu_set_state(vcpu, VCPU_IDLE, false);
121 }
122
123 static int
vcpu_lock_all(struct vmmdev_softc * sc)124 vcpu_lock_all(struct vmmdev_softc *sc)
125 {
126 struct vcpu *vcpu;
127 int error;
128 uint16_t i, j, maxcpus;
129
130 error = 0;
131 vm_slock_vcpus(sc->vm);
132 maxcpus = vm_get_maxcpus(sc->vm);
133 for (i = 0; i < maxcpus; i++) {
134 vcpu = vm_vcpu(sc->vm, i);
135 if (vcpu == NULL)
136 continue;
137 error = vcpu_lock_one(vcpu);
138 if (error)
139 break;
140 }
141
142 if (error) {
143 for (j = 0; j < i; j++) {
144 vcpu = vm_vcpu(sc->vm, j);
145 if (vcpu == NULL)
146 continue;
147 vcpu_unlock_one(vcpu);
148 }
149 vm_unlock_vcpus(sc->vm);
150 }
151
152 return (error);
153 }
154
155 static void
vcpu_unlock_all(struct vmmdev_softc * sc)156 vcpu_unlock_all(struct vmmdev_softc *sc)
157 {
158 struct vcpu *vcpu;
159 uint16_t i, maxcpus;
160
161 maxcpus = vm_get_maxcpus(sc->vm);
162 for (i = 0; i < maxcpus; i++) {
163 vcpu = vm_vcpu(sc->vm, i);
164 if (vcpu == NULL)
165 continue;
166 vcpu_unlock_one(vcpu);
167 }
168 vm_unlock_vcpus(sc->vm);
169 }
170
171 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)172 vmmdev_lookup(const char *name, struct ucred *cred)
173 {
174 struct vmmdev_softc *sc;
175
176 sx_assert(&vmmdev_mtx, SA_XLOCKED);
177
178 SLIST_FOREACH(sc, &head, link) {
179 if (strcmp(name, vm_name(sc->vm)) == 0)
180 break;
181 }
182
183 if (sc == NULL)
184 return (NULL);
185
186 if (cr_cansee(cred, sc->ucred))
187 return (NULL);
188
189 return (sc);
190 }
191
192 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)193 vmmdev_lookup2(struct cdev *cdev)
194 {
195 return (cdev->si_drv1);
196 }
197
198 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)199 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
200 {
201 int error, off, c, prot;
202 vm_paddr_t gpa, maxaddr;
203 void *hpa, *cookie;
204 struct vmmdev_softc *sc;
205
206 sc = vmmdev_lookup2(cdev);
207 if (sc == NULL)
208 return (ENXIO);
209
210 /*
211 * Get a read lock on the guest memory map.
212 */
213 vm_slock_memsegs(sc->vm);
214
215 error = 0;
216 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
217 maxaddr = vmm_sysmem_maxaddr(sc->vm);
218 while (uio->uio_resid > 0 && error == 0) {
219 gpa = uio->uio_offset;
220 off = gpa & PAGE_MASK;
221 c = min(uio->uio_resid, PAGE_SIZE - off);
222
223 /*
224 * The VM has a hole in its physical memory map. If we want to
225 * use 'dd' to inspect memory beyond the hole we need to
226 * provide bogus data for memory that lies in the hole.
227 *
228 * Since this device does not support lseek(2), dd(1) will
229 * read(2) blocks of data to simulate the lseek(2).
230 */
231 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
232 if (hpa == NULL) {
233 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
234 error = uiomove(__DECONST(void *, zero_region),
235 c, uio);
236 else
237 error = EFAULT;
238 } else {
239 error = uiomove(hpa, c, uio);
240 vm_gpa_release(cookie);
241 }
242 }
243 vm_unlock_memsegs(sc->vm);
244 return (error);
245 }
246
247 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
248
249 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)250 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
251 {
252 struct devmem_softc *dsc;
253 int error;
254 bool sysmem;
255
256 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
257 if (error || mseg->len == 0)
258 return (error);
259
260 if (!sysmem) {
261 SLIST_FOREACH(dsc, &sc->devmem, link) {
262 if (dsc->segid == mseg->segid)
263 break;
264 }
265 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
266 __func__, mseg->segid));
267 error = copystr(dsc->name, mseg->name, len, NULL);
268 } else {
269 bzero(mseg->name, len);
270 }
271
272 return (error);
273 }
274
275 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len,struct domainset * domainset)276 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
277 struct domainset *domainset)
278 {
279 char *name;
280 int error;
281 bool sysmem;
282
283 error = 0;
284 name = NULL;
285 sysmem = true;
286
287 /*
288 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
289 * by stripped off when devfs processes the full string.
290 */
291 if (VM_MEMSEG_NAME(mseg)) {
292 sysmem = false;
293 name = malloc(len, M_VMMDEV, M_WAITOK);
294 error = copystr(mseg->name, name, len, NULL);
295 if (error)
296 goto done;
297 }
298 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
299 if (error)
300 goto done;
301
302 if (VM_MEMSEG_NAME(mseg)) {
303 error = devmem_create_cdev(sc, mseg->segid, name);
304 if (error)
305 vm_free_memseg(sc->vm, mseg->segid);
306 else
307 name = NULL; /* freed when 'cdev' is destroyed */
308 }
309 done:
310 free(name, M_VMMDEV);
311 return (error);
312 }
313
314 #if defined(__amd64__) && \
315 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
316 /*
317 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
318 */
319 static void
adjust_segid(struct vm_memseg * mseg)320 adjust_segid(struct vm_memseg *mseg)
321 {
322 if (mseg->segid != VM_SYSMEM) {
323 mseg->segid += (VM_BOOTROM - 1);
324 }
325 }
326 #endif
327
328 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)329 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
330 uint64_t *regval)
331 {
332 int error, i;
333
334 error = 0;
335 for (i = 0; i < count; i++) {
336 error = vm_get_register(vcpu, regnum[i], ®val[i]);
337 if (error)
338 break;
339 }
340 return (error);
341 }
342
343 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)344 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
345 uint64_t *regval)
346 {
347 int error, i;
348
349 error = 0;
350 for (i = 0; i < count; i++) {
351 error = vm_set_register(vcpu, regnum[i], regval[i]);
352 if (error)
353 break;
354 }
355 return (error);
356 }
357
358 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)359 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
360 {
361 int error;
362
363 /*
364 * A jail without vmm access shouldn't be able to access vmm device
365 * files at all, but check here just to be thorough.
366 */
367 error = vmm_priv_check(td->td_ucred);
368 if (error != 0)
369 return (error);
370
371 return (0);
372 }
373
374 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
375 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
376 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
377 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
378 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
379 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
380 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
381 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
382 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
383 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
384 VMMDEV_IOCTL(VM_STAT_DESC, 0),
385
386 #ifdef __amd64__
387 #ifdef COMPAT_FREEBSD12
388 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
389 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
390 #endif
391 #ifdef COMPAT_FREEBSD14
392 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
393 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
394 #endif
395 #endif /* __amd64__ */
396 VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
397 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
398 VMMDEV_IOCTL(VM_MMAP_MEMSEG,
399 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
400 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
401 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
402 VMMDEV_IOCTL(VM_REINIT,
403 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
404
405 #ifdef __amd64__
406 #if defined(COMPAT_FREEBSD12)
407 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
408 #endif
409 #ifdef COMPAT_FREEBSD14
410 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
411 #endif
412 #endif /* __amd64__ */
413 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
414 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
415
416 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
417 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
418
419 VMMDEV_IOCTL(VM_SUSPEND, 0),
420 VMMDEV_IOCTL(VM_GET_CPUS, 0),
421 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
422 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
423 };
424
425 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)426 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
427 struct thread *td)
428 {
429 struct vmmdev_softc *sc;
430 struct vcpu *vcpu;
431 const struct vmmdev_ioctl *ioctl;
432 struct vm_memseg *mseg;
433 int error, vcpuid;
434
435 sc = vmmdev_lookup2(cdev);
436 if (sc == NULL)
437 return (ENXIO);
438
439 ioctl = NULL;
440 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
441 if (vmmdev_ioctls[i].cmd == cmd) {
442 ioctl = &vmmdev_ioctls[i];
443 break;
444 }
445 }
446 if (ioctl == NULL) {
447 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
448 if (vmmdev_machdep_ioctls[i].cmd == cmd) {
449 ioctl = &vmmdev_machdep_ioctls[i];
450 break;
451 }
452 }
453 }
454 if (ioctl == NULL)
455 return (ENOTTY);
456
457 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
458 vm_xlock_memsegs(sc->vm);
459 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
460 vm_slock_memsegs(sc->vm);
461
462 vcpu = NULL;
463 vcpuid = -1;
464 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
465 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
466 vcpuid = *(int *)data;
467 if (vcpuid == -1) {
468 if ((ioctl->flags &
469 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
470 error = EINVAL;
471 goto lockfail;
472 }
473 } else {
474 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
475 if (vcpu == NULL) {
476 error = EINVAL;
477 goto lockfail;
478 }
479 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
480 error = vcpu_lock_one(vcpu);
481 if (error)
482 goto lockfail;
483 }
484 }
485 }
486 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
487 error = vcpu_lock_all(sc);
488 if (error)
489 goto lockfail;
490 }
491
492 switch (cmd) {
493 case VM_SUSPEND: {
494 struct vm_suspend *vmsuspend;
495
496 vmsuspend = (struct vm_suspend *)data;
497 error = vm_suspend(sc->vm, vmsuspend->how);
498 break;
499 }
500 case VM_REINIT:
501 error = vm_reinit(sc->vm);
502 break;
503 case VM_STAT_DESC: {
504 struct vm_stat_desc *statdesc;
505
506 statdesc = (struct vm_stat_desc *)data;
507 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
508 sizeof(statdesc->desc));
509 break;
510 }
511 case VM_STATS: {
512 struct vm_stats *vmstats;
513
514 vmstats = (struct vm_stats *)data;
515 getmicrotime(&vmstats->tv);
516 error = vmm_stat_copy(vcpu, vmstats->index,
517 nitems(vmstats->statbuf), &vmstats->num_entries,
518 vmstats->statbuf);
519 break;
520 }
521 case VM_MMAP_GETNEXT: {
522 struct vm_memmap *mm;
523
524 mm = (struct vm_memmap *)data;
525 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
526 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
527 break;
528 }
529 case VM_MMAP_MEMSEG: {
530 struct vm_memmap *mm;
531
532 mm = (struct vm_memmap *)data;
533 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
534 mm->len, mm->prot, mm->flags);
535 break;
536 }
537 case VM_MUNMAP_MEMSEG: {
538 struct vm_munmap *mu;
539
540 mu = (struct vm_munmap *)data;
541 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
542 break;
543 }
544 #ifdef __amd64__
545 #ifdef COMPAT_FREEBSD12
546 case VM_ALLOC_MEMSEG_12:
547 mseg = (struct vm_memseg *)data;
548
549 adjust_segid(mseg);
550 error = alloc_memseg(sc, mseg,
551 sizeof(((struct vm_memseg_12 *)0)->name), NULL);
552 break;
553 case VM_GET_MEMSEG_12:
554 mseg = (struct vm_memseg *)data;
555
556 adjust_segid(mseg);
557 error = get_memseg(sc, mseg,
558 sizeof(((struct vm_memseg_12 *)0)->name));
559 break;
560 #endif /* COMPAT_FREEBSD12 */
561 #ifdef COMPAT_FREEBSD14
562 case VM_ALLOC_MEMSEG_14:
563 mseg = (struct vm_memseg *)data;
564
565 adjust_segid(mseg);
566 error = alloc_memseg(sc, mseg,
567 sizeof(((struct vm_memseg_14 *)0)->name), NULL);
568 break;
569 case VM_GET_MEMSEG_14:
570 mseg = (struct vm_memseg *)data;
571
572 adjust_segid(mseg);
573 error = get_memseg(sc, mseg,
574 sizeof(((struct vm_memseg_14 *)0)->name));
575 break;
576 #endif /* COMPAT_FREEBSD14 */
577 #endif /* __amd64__ */
578 case VM_ALLOC_MEMSEG: {
579 domainset_t *mask;
580 struct domainset *domainset, domain;
581
582 domainset = NULL;
583 mseg = (struct vm_memseg *)data;
584 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
585 if (mseg->ds_mask_size < sizeof(domainset_t) ||
586 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
587 error = ERANGE;
588 break;
589 }
590 memset(&domain, 0, sizeof(domain));
591 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
592 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
593 if (error) {
594 free(mask, M_VMMDEV);
595 break;
596 }
597 error = domainset_populate(&domain, mask, mseg->ds_policy,
598 mseg->ds_mask_size);
599 if (error) {
600 free(mask, M_VMMDEV);
601 break;
602 }
603 domainset = domainset_create(&domain);
604 if (domainset == NULL) {
605 error = EINVAL;
606 free(mask, M_VMMDEV);
607 break;
608 }
609 free(mask, M_VMMDEV);
610 }
611 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
612
613 break;
614 }
615 case VM_GET_MEMSEG:
616 error = get_memseg(sc, (struct vm_memseg *)data,
617 sizeof(((struct vm_memseg *)0)->name));
618 break;
619 case VM_GET_REGISTER: {
620 struct vm_register *vmreg;
621
622 vmreg = (struct vm_register *)data;
623 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
624 break;
625 }
626 case VM_SET_REGISTER: {
627 struct vm_register *vmreg;
628
629 vmreg = (struct vm_register *)data;
630 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
631 break;
632 }
633 case VM_GET_REGISTER_SET: {
634 struct vm_register_set *vmregset;
635 uint64_t *regvals;
636 int *regnums;
637
638 vmregset = (struct vm_register_set *)data;
639 if (vmregset->count > VM_REG_LAST) {
640 error = EINVAL;
641 break;
642 }
643 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
644 M_WAITOK);
645 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
646 M_WAITOK);
647 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
648 vmregset->count);
649 if (error == 0)
650 error = vm_get_register_set(vcpu,
651 vmregset->count, regnums, regvals);
652 if (error == 0)
653 error = copyout(regvals, vmregset->regvals,
654 sizeof(regvals[0]) * vmregset->count);
655 free(regvals, M_VMMDEV);
656 free(regnums, M_VMMDEV);
657 break;
658 }
659 case VM_SET_REGISTER_SET: {
660 struct vm_register_set *vmregset;
661 uint64_t *regvals;
662 int *regnums;
663
664 vmregset = (struct vm_register_set *)data;
665 if (vmregset->count > VM_REG_LAST) {
666 error = EINVAL;
667 break;
668 }
669 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
670 M_WAITOK);
671 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
672 M_WAITOK);
673 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
674 vmregset->count);
675 if (error == 0)
676 error = copyin(vmregset->regvals, regvals,
677 sizeof(regvals[0]) * vmregset->count);
678 if (error == 0)
679 error = vm_set_register_set(vcpu,
680 vmregset->count, regnums, regvals);
681 free(regvals, M_VMMDEV);
682 free(regnums, M_VMMDEV);
683 break;
684 }
685 case VM_GET_CAPABILITY: {
686 struct vm_capability *vmcap;
687
688 vmcap = (struct vm_capability *)data;
689 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
690 break;
691 }
692 case VM_SET_CAPABILITY: {
693 struct vm_capability *vmcap;
694
695 vmcap = (struct vm_capability *)data;
696 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
697 break;
698 }
699 case VM_ACTIVATE_CPU:
700 error = vm_activate_cpu(vcpu);
701 break;
702 case VM_GET_CPUS: {
703 struct vm_cpuset *vm_cpuset;
704 cpuset_t *cpuset;
705 int size;
706
707 error = 0;
708 vm_cpuset = (struct vm_cpuset *)data;
709 size = vm_cpuset->cpusetsize;
710 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
711 error = ERANGE;
712 break;
713 }
714 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
715 M_WAITOK | M_ZERO);
716 if (vm_cpuset->which == VM_ACTIVE_CPUS)
717 *cpuset = vm_active_cpus(sc->vm);
718 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
719 *cpuset = vm_suspended_cpus(sc->vm);
720 else if (vm_cpuset->which == VM_DEBUG_CPUS)
721 *cpuset = vm_debug_cpus(sc->vm);
722 else
723 error = EINVAL;
724 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
725 error = ERANGE;
726 if (error == 0)
727 error = copyout(cpuset, vm_cpuset->cpus, size);
728 free(cpuset, M_TEMP);
729 break;
730 }
731 case VM_SUSPEND_CPU:
732 error = vm_suspend_cpu(sc->vm, vcpu);
733 break;
734 case VM_RESUME_CPU:
735 error = vm_resume_cpu(sc->vm, vcpu);
736 break;
737 case VM_SET_TOPOLOGY: {
738 struct vm_cpu_topology *topology;
739
740 topology = (struct vm_cpu_topology *)data;
741 error = vm_set_topology(sc->vm, topology->sockets,
742 topology->cores, topology->threads, topology->maxcpus);
743 break;
744 }
745 case VM_GET_TOPOLOGY: {
746 struct vm_cpu_topology *topology;
747
748 topology = (struct vm_cpu_topology *)data;
749 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
750 &topology->threads, &topology->maxcpus);
751 error = 0;
752 break;
753 }
754 default:
755 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
756 td);
757 break;
758 }
759
760 if ((ioctl->flags &
761 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
762 vm_unlock_memsegs(sc->vm);
763 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
764 vcpu_unlock_all(sc);
765 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
766 vcpu_unlock_one(vcpu);
767
768 /*
769 * Make sure that no handler returns a kernel-internal
770 * error value to userspace.
771 */
772 KASSERT(error == ERESTART || error >= 0,
773 ("vmmdev_ioctl: invalid error return %d", error));
774 return (error);
775
776 lockfail:
777 if ((ioctl->flags &
778 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
779 vm_unlock_memsegs(sc->vm);
780 return (error);
781 }
782
783 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)784 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
785 struct vm_object **objp, int nprot)
786 {
787 struct vmmdev_softc *sc;
788 vm_paddr_t gpa;
789 size_t len;
790 vm_ooffset_t segoff, first, last;
791 int error, found, segid;
792 bool sysmem;
793
794 first = *offset;
795 last = first + mapsize;
796 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
797 return (EINVAL);
798
799 sc = vmmdev_lookup2(cdev);
800 if (sc == NULL) {
801 /* virtual machine is in the process of being created */
802 return (EINVAL);
803 }
804
805 /*
806 * Get a read lock on the guest memory map.
807 */
808 vm_slock_memsegs(sc->vm);
809
810 gpa = 0;
811 found = 0;
812 while (!found) {
813 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
814 NULL, NULL);
815 if (error)
816 break;
817
818 if (first >= gpa && last <= gpa + len)
819 found = 1;
820 else
821 gpa += len;
822 }
823
824 if (found) {
825 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
826 KASSERT(error == 0 && *objp != NULL,
827 ("%s: invalid memory segment %d", __func__, segid));
828 if (sysmem) {
829 vm_object_reference(*objp);
830 *offset = segoff + (first - gpa);
831 } else {
832 error = EINVAL;
833 }
834 }
835 vm_unlock_memsegs(sc->vm);
836 return (error);
837 }
838
839 static void
vmmdev_destroy(struct vmmdev_softc * sc)840 vmmdev_destroy(struct vmmdev_softc *sc)
841 {
842 struct devmem_softc *dsc;
843 int error __diagused;
844
845 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
846
847 /*
848 * Destroy all cdevs:
849 *
850 * - any new operations on the 'cdev' will return an error (ENXIO).
851 *
852 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
853 */
854 SLIST_FOREACH(dsc, &sc->devmem, link) {
855 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
856 devmem_destroy(dsc);
857 }
858
859 vm_disable_vcpu_creation(sc->vm);
860 error = vcpu_lock_all(sc);
861 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
862 vm_unlock_vcpus(sc->vm);
863
864 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
865 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
866 SLIST_REMOVE_HEAD(&sc->devmem, link);
867 free(dsc->name, M_VMMDEV);
868 free(dsc, M_VMMDEV);
869 }
870
871 if (sc->vm != NULL)
872 vm_destroy(sc->vm);
873
874 if (sc->ucred != NULL)
875 crfree(sc->ucred);
876
877 sx_xlock(&vmmdev_mtx);
878 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
879 sx_xunlock(&vmmdev_mtx);
880 free(sc, M_VMMDEV);
881 }
882
883 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)884 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
885 {
886 struct cdev *cdev;
887 struct vmmdev_softc *sc;
888
889 sx_xlock(&vmmdev_mtx);
890 sc = vmmdev_lookup(name, cred);
891 if (sc == NULL || sc->cdev == NULL) {
892 sx_xunlock(&vmmdev_mtx);
893 return (EINVAL);
894 }
895
896 /*
897 * Setting 'sc->cdev' to NULL is used to indicate that the VM
898 * is scheduled for destruction.
899 */
900 cdev = sc->cdev;
901 sc->cdev = NULL;
902 sx_xunlock(&vmmdev_mtx);
903
904 destroy_dev(cdev);
905 vmmdev_destroy(sc);
906
907 return (0);
908 }
909
910 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)911 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
912 {
913 char *buf;
914 int error, buflen;
915
916 error = vmm_priv_check(req->td->td_ucred);
917 if (error)
918 return (error);
919
920 buflen = VM_MAX_NAMELEN + 1;
921 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
922 error = sysctl_handle_string(oidp, buf, buflen, req);
923 if (error == 0 && req->newptr != NULL)
924 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
925 free(buf, M_VMMDEV);
926 return (error);
927 }
928 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
929 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
930 NULL, 0, sysctl_vmm_destroy, "A",
931 "Destroy a vmm(4) instance (legacy interface)");
932
933 static struct cdevsw vmmdevsw = {
934 .d_name = "vmmdev",
935 .d_version = D_VERSION,
936 .d_open = vmmdev_open,
937 .d_ioctl = vmmdev_ioctl,
938 .d_mmap_single = vmmdev_mmap_single,
939 .d_read = vmmdev_rw,
940 .d_write = vmmdev_rw,
941 };
942
943 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)944 vmmdev_alloc(struct vm *vm, struct ucred *cred)
945 {
946 struct vmmdev_softc *sc;
947
948 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
949 SLIST_INIT(&sc->devmem);
950 sc->vm = vm;
951 sc->ucred = crhold(cred);
952 return (sc);
953 }
954
955 static int
vmmdev_create(const char * name,struct ucred * cred)956 vmmdev_create(const char *name, struct ucred *cred)
957 {
958 struct make_dev_args mda;
959 struct cdev *cdev;
960 struct vmmdev_softc *sc;
961 struct vm *vm;
962 int error;
963
964 sx_xlock(&vmmdev_mtx);
965 sc = vmmdev_lookup(name, cred);
966 if (sc != NULL) {
967 sx_xunlock(&vmmdev_mtx);
968 return (EEXIST);
969 }
970
971 error = vm_create(name, &vm);
972 if (error != 0) {
973 sx_xunlock(&vmmdev_mtx);
974 return (error);
975 }
976 sc = vmmdev_alloc(vm, cred);
977 SLIST_INSERT_HEAD(&head, sc, link);
978
979 make_dev_args_init(&mda);
980 mda.mda_devsw = &vmmdevsw;
981 mda.mda_cr = sc->ucred;
982 mda.mda_uid = UID_ROOT;
983 mda.mda_gid = GID_WHEEL;
984 mda.mda_mode = 0600;
985 mda.mda_si_drv1 = sc;
986 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
987 error = make_dev_s(&mda, &cdev, "vmm/%s", name);
988 if (error != 0) {
989 sx_xunlock(&vmmdev_mtx);
990 vmmdev_destroy(sc);
991 return (error);
992 }
993 sc->cdev = cdev;
994 sx_xunlock(&vmmdev_mtx);
995 return (0);
996 }
997
998 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)999 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1000 {
1001 char *buf;
1002 int error, buflen;
1003
1004 error = vmm_priv_check(req->td->td_ucred);
1005 if (error != 0)
1006 return (error);
1007
1008 buflen = VM_MAX_NAMELEN + 1;
1009 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1010 error = sysctl_handle_string(oidp, buf, buflen, req);
1011 if (error == 0 && req->newptr != NULL)
1012 error = vmmdev_create(buf, req->td->td_ucred);
1013 free(buf, M_VMMDEV);
1014 return (error);
1015 }
1016 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1017 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1018 NULL, 0, sysctl_vmm_create, "A",
1019 "Create a vmm(4) instance (legacy interface)");
1020
1021 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)1022 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
1023 {
1024 int error;
1025
1026 error = vmm_priv_check(td->td_ucred);
1027 if (error != 0)
1028 return (error);
1029
1030 if ((flags & FWRITE) == 0)
1031 return (EPERM);
1032
1033 return (0);
1034 }
1035
1036 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)1037 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
1038 struct thread *td)
1039 {
1040 int error;
1041
1042 switch (cmd) {
1043 case VMMCTL_VM_CREATE: {
1044 struct vmmctl_vm_create *vmc;
1045
1046 vmc = (struct vmmctl_vm_create *)data;
1047 vmc->name[VM_MAX_NAMELEN] = '\0';
1048 for (size_t i = 0; i < nitems(vmc->reserved); i++) {
1049 if (vmc->reserved[i] != 0) {
1050 error = EINVAL;
1051 return (error);
1052 }
1053 }
1054
1055 error = vmmdev_create(vmc->name, td->td_ucred);
1056 break;
1057 }
1058 case VMMCTL_VM_DESTROY: {
1059 struct vmmctl_vm_destroy *vmd;
1060
1061 vmd = (struct vmmctl_vm_destroy *)data;
1062 vmd->name[VM_MAX_NAMELEN] = '\0';
1063 for (size_t i = 0; i < nitems(vmd->reserved); i++) {
1064 if (vmd->reserved[i] != 0) {
1065 error = EINVAL;
1066 return (error);
1067 }
1068 }
1069
1070 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
1071 break;
1072 }
1073 default:
1074 error = ENOTTY;
1075 break;
1076 }
1077
1078 return (error);
1079 }
1080
1081 static struct cdev *vmmctl_cdev;
1082 static struct cdevsw vmmctlsw = {
1083 .d_name = "vmmctl",
1084 .d_version = D_VERSION,
1085 .d_open = vmmctl_open,
1086 .d_ioctl = vmmctl_ioctl,
1087 };
1088
1089 int
vmmdev_init(void)1090 vmmdev_init(void)
1091 {
1092 int error;
1093
1094 sx_xlock(&vmmdev_mtx);
1095 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
1096 UID_ROOT, GID_WHEEL, 0600, "vmmctl");
1097 if (error == 0)
1098 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1099 "Allow use of vmm in a jail.");
1100 sx_xunlock(&vmmdev_mtx);
1101
1102 return (error);
1103 }
1104
1105 int
vmmdev_cleanup(void)1106 vmmdev_cleanup(void)
1107 {
1108 sx_xlock(&vmmdev_mtx);
1109 if (!SLIST_EMPTY(&head)) {
1110 sx_xunlock(&vmmdev_mtx);
1111 return (EBUSY);
1112 }
1113 if (vmmctl_cdev != NULL) {
1114 destroy_dev(vmmctl_cdev);
1115 vmmctl_cdev = NULL;
1116 }
1117 sx_xunlock(&vmmdev_mtx);
1118
1119 return (0);
1120 }
1121
1122 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1123 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1124 struct vm_object **objp, int nprot)
1125 {
1126 struct devmem_softc *dsc;
1127 vm_ooffset_t first, last;
1128 size_t seglen;
1129 int error;
1130 bool sysmem;
1131
1132 dsc = cdev->si_drv1;
1133 if (dsc == NULL) {
1134 /* 'cdev' has been created but is not ready for use */
1135 return (ENXIO);
1136 }
1137
1138 first = *offset;
1139 last = *offset + len;
1140 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1141 return (EINVAL);
1142
1143 vm_slock_memsegs(dsc->sc->vm);
1144
1145 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1146 KASSERT(error == 0 && !sysmem && *objp != NULL,
1147 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1148
1149 if (seglen >= last)
1150 vm_object_reference(*objp);
1151 else
1152 error = EINVAL;
1153
1154 vm_unlock_memsegs(dsc->sc->vm);
1155 return (error);
1156 }
1157
1158 static struct cdevsw devmemsw = {
1159 .d_name = "devmem",
1160 .d_version = D_VERSION,
1161 .d_mmap_single = devmem_mmap_single,
1162 };
1163
1164 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1165 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1166 {
1167 struct make_dev_args mda;
1168 struct devmem_softc *dsc;
1169 int error;
1170
1171 sx_xlock(&vmmdev_mtx);
1172
1173 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1174 dsc->segid = segid;
1175 dsc->name = devname;
1176 dsc->sc = sc;
1177 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1178
1179 make_dev_args_init(&mda);
1180 mda.mda_devsw = &devmemsw;
1181 mda.mda_cr = sc->ucred;
1182 mda.mda_uid = UID_ROOT;
1183 mda.mda_gid = GID_WHEEL;
1184 mda.mda_mode = 0600;
1185 mda.mda_si_drv1 = dsc;
1186 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1187 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1188 devname);
1189 if (error != 0) {
1190 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1191 free(dsc->name, M_VMMDEV);
1192 free(dsc, M_VMMDEV);
1193 }
1194
1195 sx_xunlock(&vmmdev_mtx);
1196
1197 return (error);
1198 }
1199
1200 static void
devmem_destroy(void * arg)1201 devmem_destroy(void *arg)
1202 {
1203 struct devmem_softc *dsc = arg;
1204
1205 destroy_dev(dsc->cdev);
1206 dsc->cdev = NULL;
1207 dsc->sc = NULL;
1208 }
1209