1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6 * All rights reserved.
7 */
8
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #include <sys/fcntl.h>
12 #include <sys/ioccom.h>
13 #include <sys/jail.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/mman.h>
17 #include <sys/module.h>
18 #include <sys/priv.h>
19 #include <sys/proc.h>
20 #include <sys/queue.h>
21 #include <sys/resourcevar.h>
22 #include <sys/smp.h>
23 #include <sys/sx.h>
24 #include <sys/sysctl.h>
25 #include <sys/ucred.h>
26 #include <sys/uio.h>
27
28 #include <machine/vmm.h>
29
30 #include <vm/vm.h>
31 #include <vm/vm_object.h>
32
33 #include <dev/vmm/vmm_dev.h>
34 #include <dev/vmm/vmm_mem.h>
35 #include <dev/vmm/vmm_stat.h>
36 #include <dev/vmm/vmm_vm.h>
37
38 #ifdef __amd64__
39 #ifdef COMPAT_FREEBSD12
40 struct vm_memseg_12 {
41 int segid;
42 size_t len;
43 char name[64];
44 };
45 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
46
47 #define VM_ALLOC_MEMSEG_12 \
48 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
49 #define VM_GET_MEMSEG_12 \
50 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
51 #endif /* COMPAT_FREEBSD12 */
52 #ifdef COMPAT_FREEBSD14
53 struct vm_memseg_14 {
54 int segid;
55 size_t len;
56 char name[VM_MAX_SUFFIXLEN + 1];
57 };
58 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
59 "COMPAT_FREEBSD14 ABI");
60
61 #define VM_ALLOC_MEMSEG_14 \
62 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
63 #define VM_GET_MEMSEG_14 \
64 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
65 #endif /* COMPAT_FREEBSD14 */
66 #endif /* __amd64__ */
67
68 struct devmem_softc {
69 int segid;
70 char *name;
71 struct cdev *cdev;
72 struct vmmdev_softc *sc;
73 SLIST_ENTRY(devmem_softc) link;
74 };
75
76 struct vmmdev_softc {
77 struct vm *vm; /* vm instance cookie */
78 struct cdev *cdev;
79 struct ucred *ucred;
80 SLIST_ENTRY(vmmdev_softc) link;
81 LIST_ENTRY(vmmdev_softc) priv_link;
82 SLIST_HEAD(, devmem_softc) devmem;
83 int flags;
84 };
85
86 struct vmmctl_priv {
87 LIST_HEAD(, vmmdev_softc) softcs;
88 };
89
90 static bool vmm_initialized = false;
91
92 static SLIST_HEAD(, vmmdev_softc) head;
93
94 static unsigned pr_allow_flag;
95 static struct sx vmmdev_mtx;
96 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
97
98 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
99
100 SYSCTL_DECL(_hw_vmm);
101
102 u_int vm_maxcpu;
103 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
104 &vm_maxcpu, 0, "Maximum number of vCPUs");
105
106 u_int vm_maxvmms;
107 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN,
108 &vm_maxvmms, 0, "Maximum number of VMM instances per user");
109
110 static void devmem_destroy(void *arg);
111 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
112 static void vmmdev_destroy(struct vmmdev_softc *sc);
113
114 static int
vmm_priv_check(struct ucred * ucred)115 vmm_priv_check(struct ucred *ucred)
116 {
117 if (jailed(ucred) &&
118 !(ucred->cr_prison->pr_allow & pr_allow_flag))
119 return (EPERM);
120
121 return (0);
122 }
123
124 static int
vcpu_lock_one(struct vcpu * vcpu)125 vcpu_lock_one(struct vcpu *vcpu)
126 {
127 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
128 }
129
130 static void
vcpu_unlock_one(struct vcpu * vcpu)131 vcpu_unlock_one(struct vcpu *vcpu)
132 {
133 enum vcpu_state state;
134
135 state = vcpu_get_state(vcpu, NULL);
136 if (state != VCPU_FROZEN) {
137 panic("vcpu %s(%d) has invalid state %d",
138 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
139 }
140
141 vcpu_set_state(vcpu, VCPU_IDLE, false);
142 }
143
144 static int
vcpu_lock_all(struct vmmdev_softc * sc)145 vcpu_lock_all(struct vmmdev_softc *sc)
146 {
147 int error;
148
149 /*
150 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked
151 * in a consistent order so we need to serialize to avoid deadlocks.
152 */
153 vm_lock_vcpus(sc->vm);
154 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
155 if (error != 0)
156 vm_unlock_vcpus(sc->vm);
157 return (error);
158 }
159
160 static void
vcpu_unlock_all(struct vmmdev_softc * sc)161 vcpu_unlock_all(struct vmmdev_softc *sc)
162 {
163 struct vcpu *vcpu;
164 uint16_t i, maxcpus;
165
166 maxcpus = vm_get_maxcpus(sc->vm);
167 for (i = 0; i < maxcpus; i++) {
168 vcpu = vm_vcpu(sc->vm, i);
169 if (vcpu == NULL)
170 continue;
171 vcpu_unlock_one(vcpu);
172 }
173 vm_unlock_vcpus(sc->vm);
174 }
175
176 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)177 vmmdev_lookup(const char *name, struct ucred *cred)
178 {
179 struct vmmdev_softc *sc;
180
181 sx_assert(&vmmdev_mtx, SA_XLOCKED);
182
183 SLIST_FOREACH(sc, &head, link) {
184 if (strcmp(name, vm_name(sc->vm)) == 0)
185 break;
186 }
187
188 if (sc == NULL)
189 return (NULL);
190
191 if (cr_cansee(cred, sc->ucred))
192 return (NULL);
193
194 return (sc);
195 }
196
197 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)198 vmmdev_lookup2(struct cdev *cdev)
199 {
200 return (cdev->si_drv1);
201 }
202
203 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)204 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
205 {
206 int error, off, c, prot;
207 vm_paddr_t gpa, maxaddr;
208 void *hpa, *cookie;
209 struct vmmdev_softc *sc;
210
211 sc = vmmdev_lookup2(cdev);
212 if (sc == NULL)
213 return (ENXIO);
214
215 /*
216 * Get a read lock on the guest memory map.
217 */
218 vm_slock_memsegs(sc->vm);
219
220 error = 0;
221 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
222 maxaddr = vmm_sysmem_maxaddr(sc->vm);
223 while (uio->uio_resid > 0 && error == 0) {
224 gpa = uio->uio_offset;
225 off = gpa & PAGE_MASK;
226 c = min(uio->uio_resid, PAGE_SIZE - off);
227
228 /*
229 * The VM has a hole in its physical memory map. If we want to
230 * use 'dd' to inspect memory beyond the hole we need to
231 * provide bogus data for memory that lies in the hole.
232 *
233 * Since this device does not support lseek(2), dd(1) will
234 * read(2) blocks of data to simulate the lseek(2).
235 */
236 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
237 if (hpa == NULL) {
238 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
239 error = uiomove(__DECONST(void *, zero_region),
240 c, uio);
241 else
242 error = EFAULT;
243 } else {
244 error = uiomove(hpa, c, uio);
245 vm_gpa_release(cookie);
246 }
247 }
248 vm_unlock_memsegs(sc->vm);
249 return (error);
250 }
251
252 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
253
254 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)255 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
256 {
257 struct devmem_softc *dsc;
258 int error;
259 bool sysmem;
260
261 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
262 if (error || mseg->len == 0)
263 return (error);
264
265 if (!sysmem) {
266 SLIST_FOREACH(dsc, &sc->devmem, link) {
267 if (dsc->segid == mseg->segid)
268 break;
269 }
270 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
271 __func__, mseg->segid));
272 error = copystr(dsc->name, mseg->name, len, NULL);
273 } else {
274 bzero(mseg->name, len);
275 }
276
277 return (error);
278 }
279
280 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len,struct domainset * domainset)281 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
282 struct domainset *domainset)
283 {
284 char *name;
285 int error;
286 bool sysmem;
287
288 error = 0;
289 name = NULL;
290 sysmem = true;
291
292 /*
293 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
294 * by stripped off when devfs processes the full string.
295 */
296 if (VM_MEMSEG_NAME(mseg)) {
297 sysmem = false;
298 name = malloc(len, M_VMMDEV, M_WAITOK);
299 error = copystr(mseg->name, name, len, NULL);
300 if (error)
301 goto done;
302 }
303 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
304 if (error)
305 goto done;
306
307 if (VM_MEMSEG_NAME(mseg)) {
308 error = devmem_create_cdev(sc, mseg->segid, name);
309 if (error)
310 vm_free_memseg(sc->vm, mseg->segid);
311 else
312 name = NULL; /* freed when 'cdev' is destroyed */
313 }
314 done:
315 free(name, M_VMMDEV);
316 return (error);
317 }
318
319 #if defined(__amd64__) && \
320 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
321 /*
322 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
323 */
324 static void
adjust_segid(struct vm_memseg * mseg)325 adjust_segid(struct vm_memseg *mseg)
326 {
327 if (mseg->segid != VM_SYSMEM) {
328 mseg->segid += (VM_BOOTROM - 1);
329 }
330 }
331 #endif
332
333 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)334 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
335 uint64_t *regval)
336 {
337 int error, i;
338
339 error = 0;
340 for (i = 0; i < count; i++) {
341 error = vm_get_register(vcpu, regnum[i], ®val[i]);
342 if (error)
343 break;
344 }
345 return (error);
346 }
347
348 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)349 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
350 uint64_t *regval)
351 {
352 int error, i;
353
354 error = 0;
355 for (i = 0; i < count; i++) {
356 error = vm_set_register(vcpu, regnum[i], regval[i]);
357 if (error)
358 break;
359 }
360 return (error);
361 }
362
363 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)364 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
365 {
366 int error;
367
368 /*
369 * A jail without vmm access shouldn't be able to access vmm device
370 * files at all, but check here just to be thorough.
371 */
372 error = vmm_priv_check(td->td_ucred);
373 if (error != 0)
374 return (error);
375
376 return (0);
377 }
378
379 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
380 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
381 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
382 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
383 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
384 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
385 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
386 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
387 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
388 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
389 VMMDEV_IOCTL(VM_STAT_DESC, 0),
390
391 #ifdef __amd64__
392 #ifdef COMPAT_FREEBSD12
393 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
394 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
395 #endif
396 #ifdef COMPAT_FREEBSD14
397 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
398 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
399 #endif
400 #endif /* __amd64__ */
401 VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
402 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
403 VMMDEV_IOCTL(VM_MMAP_MEMSEG,
404 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
405 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
406 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
407 VMMDEV_IOCTL(VM_REINIT,
408 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
409
410 #ifdef __amd64__
411 #if defined(COMPAT_FREEBSD12)
412 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
413 #endif
414 #ifdef COMPAT_FREEBSD14
415 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
416 #endif
417 #endif /* __amd64__ */
418 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
419 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
420
421 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
422 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
423
424 VMMDEV_IOCTL(VM_SUSPEND, 0),
425 VMMDEV_IOCTL(VM_GET_CPUS, 0),
426 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
427 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
428 };
429
430 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)431 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
432 struct thread *td)
433 {
434 struct vmmdev_softc *sc;
435 struct vcpu *vcpu;
436 const struct vmmdev_ioctl *ioctl;
437 struct vm_memseg *mseg;
438 int error, vcpuid;
439
440 sc = vmmdev_lookup2(cdev);
441 if (sc == NULL)
442 return (ENXIO);
443
444 ioctl = NULL;
445 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
446 if (vmmdev_ioctls[i].cmd == cmd) {
447 ioctl = &vmmdev_ioctls[i];
448 break;
449 }
450 }
451 if (ioctl == NULL) {
452 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
453 if (vmmdev_machdep_ioctls[i].cmd == cmd) {
454 ioctl = &vmmdev_machdep_ioctls[i];
455 break;
456 }
457 }
458 }
459 if (ioctl == NULL)
460 return (ENOTTY);
461
462 if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) {
463 error = priv_check(td, PRIV_DRIVER);
464 if (error != 0)
465 return (error);
466 }
467
468 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
469 vm_xlock_memsegs(sc->vm);
470 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
471 vm_slock_memsegs(sc->vm);
472
473 vcpu = NULL;
474 vcpuid = -1;
475 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
476 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
477 vcpuid = *(int *)data;
478 if (vcpuid == -1) {
479 if ((ioctl->flags &
480 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
481 error = EINVAL;
482 goto lockfail;
483 }
484 } else {
485 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
486 if (vcpu == NULL) {
487 error = EINVAL;
488 goto lockfail;
489 }
490 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
491 error = vcpu_lock_one(vcpu);
492 if (error)
493 goto lockfail;
494 }
495 }
496 }
497 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
498 error = vcpu_lock_all(sc);
499 if (error)
500 goto lockfail;
501 }
502
503 switch (cmd) {
504 case VM_SUSPEND: {
505 struct vm_suspend *vmsuspend;
506
507 vmsuspend = (struct vm_suspend *)data;
508 error = vm_suspend(sc->vm, vmsuspend->how);
509 break;
510 }
511 case VM_REINIT:
512 error = vm_reinit(sc->vm);
513 break;
514 case VM_STAT_DESC: {
515 struct vm_stat_desc *statdesc;
516
517 statdesc = (struct vm_stat_desc *)data;
518 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
519 sizeof(statdesc->desc));
520 break;
521 }
522 case VM_STATS: {
523 struct vm_stats *vmstats;
524
525 vmstats = (struct vm_stats *)data;
526 getmicrotime(&vmstats->tv);
527 error = vmm_stat_copy(vcpu, vmstats->index,
528 nitems(vmstats->statbuf), &vmstats->num_entries,
529 vmstats->statbuf);
530 break;
531 }
532 case VM_MMAP_GETNEXT: {
533 struct vm_memmap *mm;
534
535 mm = (struct vm_memmap *)data;
536 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
537 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
538 break;
539 }
540 case VM_MMAP_MEMSEG: {
541 struct vm_memmap *mm;
542
543 mm = (struct vm_memmap *)data;
544 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
545 mm->len, mm->prot, mm->flags);
546 break;
547 }
548 case VM_MUNMAP_MEMSEG: {
549 struct vm_munmap *mu;
550
551 mu = (struct vm_munmap *)data;
552 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
553 break;
554 }
555 #ifdef __amd64__
556 #ifdef COMPAT_FREEBSD12
557 case VM_ALLOC_MEMSEG_12:
558 mseg = (struct vm_memseg *)data;
559
560 adjust_segid(mseg);
561 error = alloc_memseg(sc, mseg,
562 sizeof(((struct vm_memseg_12 *)0)->name), NULL);
563 break;
564 case VM_GET_MEMSEG_12:
565 mseg = (struct vm_memseg *)data;
566
567 adjust_segid(mseg);
568 error = get_memseg(sc, mseg,
569 sizeof(((struct vm_memseg_12 *)0)->name));
570 break;
571 #endif /* COMPAT_FREEBSD12 */
572 #ifdef COMPAT_FREEBSD14
573 case VM_ALLOC_MEMSEG_14:
574 mseg = (struct vm_memseg *)data;
575
576 adjust_segid(mseg);
577 error = alloc_memseg(sc, mseg,
578 sizeof(((struct vm_memseg_14 *)0)->name), NULL);
579 break;
580 case VM_GET_MEMSEG_14:
581 mseg = (struct vm_memseg *)data;
582
583 adjust_segid(mseg);
584 error = get_memseg(sc, mseg,
585 sizeof(((struct vm_memseg_14 *)0)->name));
586 break;
587 #endif /* COMPAT_FREEBSD14 */
588 #endif /* __amd64__ */
589 case VM_ALLOC_MEMSEG: {
590 domainset_t *mask;
591 struct domainset *domainset, domain;
592
593 domainset = NULL;
594 mseg = (struct vm_memseg *)data;
595 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
596 if (mseg->ds_mask_size < sizeof(domainset_t) ||
597 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
598 error = ERANGE;
599 break;
600 }
601 memset(&domain, 0, sizeof(domain));
602 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
603 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
604 if (error) {
605 free(mask, M_VMMDEV);
606 break;
607 }
608 error = domainset_populate(&domain, mask, mseg->ds_policy,
609 mseg->ds_mask_size);
610 free(mask, M_VMMDEV);
611 if (error)
612 break;
613 domainset = domainset_create(&domain);
614 if (domainset == NULL) {
615 error = EINVAL;
616 break;
617 }
618 }
619 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
620 break;
621 }
622 case VM_GET_MEMSEG:
623 error = get_memseg(sc, (struct vm_memseg *)data,
624 sizeof(((struct vm_memseg *)0)->name));
625 break;
626 case VM_GET_REGISTER: {
627 struct vm_register *vmreg;
628
629 vmreg = (struct vm_register *)data;
630 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
631 break;
632 }
633 case VM_SET_REGISTER: {
634 struct vm_register *vmreg;
635
636 vmreg = (struct vm_register *)data;
637 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
638 break;
639 }
640 case VM_GET_REGISTER_SET: {
641 struct vm_register_set *vmregset;
642 uint64_t *regvals;
643 int *regnums;
644
645 vmregset = (struct vm_register_set *)data;
646 if (vmregset->count > VM_REG_LAST) {
647 error = EINVAL;
648 break;
649 }
650 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
651 M_VMMDEV, M_WAITOK);
652 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
653 M_VMMDEV, M_WAITOK);
654 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
655 vmregset->count);
656 if (error == 0)
657 error = vm_get_register_set(vcpu,
658 vmregset->count, regnums, regvals);
659 if (error == 0)
660 error = copyout(regvals, vmregset->regvals,
661 sizeof(regvals[0]) * vmregset->count);
662 free(regvals, M_VMMDEV);
663 free(regnums, M_VMMDEV);
664 break;
665 }
666 case VM_SET_REGISTER_SET: {
667 struct vm_register_set *vmregset;
668 uint64_t *regvals;
669 int *regnums;
670
671 vmregset = (struct vm_register_set *)data;
672 if (vmregset->count > VM_REG_LAST) {
673 error = EINVAL;
674 break;
675 }
676 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
677 M_VMMDEV, M_WAITOK);
678 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
679 M_VMMDEV, M_WAITOK);
680 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
681 vmregset->count);
682 if (error == 0)
683 error = copyin(vmregset->regvals, regvals,
684 sizeof(regvals[0]) * vmregset->count);
685 if (error == 0)
686 error = vm_set_register_set(vcpu,
687 vmregset->count, regnums, regvals);
688 free(regvals, M_VMMDEV);
689 free(regnums, M_VMMDEV);
690 break;
691 }
692 case VM_GET_CAPABILITY: {
693 struct vm_capability *vmcap;
694
695 vmcap = (struct vm_capability *)data;
696 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
697 break;
698 }
699 case VM_SET_CAPABILITY: {
700 struct vm_capability *vmcap;
701
702 vmcap = (struct vm_capability *)data;
703 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
704 break;
705 }
706 case VM_ACTIVATE_CPU:
707 error = vm_activate_cpu(vcpu);
708 break;
709 case VM_GET_CPUS: {
710 struct vm_cpuset *vm_cpuset;
711 cpuset_t *cpuset;
712 int size;
713
714 error = 0;
715 vm_cpuset = (struct vm_cpuset *)data;
716 size = vm_cpuset->cpusetsize;
717 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
718 error = ERANGE;
719 break;
720 }
721 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
722 M_WAITOK | M_ZERO);
723 if (vm_cpuset->which == VM_ACTIVE_CPUS)
724 *cpuset = vm_active_cpus(sc->vm);
725 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
726 *cpuset = vm_suspended_cpus(sc->vm);
727 else if (vm_cpuset->which == VM_DEBUG_CPUS)
728 *cpuset = vm_debug_cpus(sc->vm);
729 else
730 error = EINVAL;
731 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
732 error = ERANGE;
733 if (error == 0)
734 error = copyout(cpuset, vm_cpuset->cpus, size);
735 free(cpuset, M_TEMP);
736 break;
737 }
738 case VM_SUSPEND_CPU:
739 error = vm_suspend_cpu(sc->vm, vcpu);
740 break;
741 case VM_RESUME_CPU:
742 error = vm_resume_cpu(sc->vm, vcpu);
743 break;
744 case VM_SET_TOPOLOGY: {
745 struct vm_cpu_topology *topology;
746
747 topology = (struct vm_cpu_topology *)data;
748 error = vm_set_topology(sc->vm, topology->sockets,
749 topology->cores, topology->threads, topology->maxcpus);
750 break;
751 }
752 case VM_GET_TOPOLOGY: {
753 struct vm_cpu_topology *topology;
754
755 topology = (struct vm_cpu_topology *)data;
756 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
757 &topology->threads, &topology->maxcpus);
758 error = 0;
759 break;
760 }
761 default:
762 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
763 td);
764 break;
765 }
766
767 if ((ioctl->flags &
768 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
769 vm_unlock_memsegs(sc->vm);
770 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
771 vcpu_unlock_all(sc);
772 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
773 vcpu_unlock_one(vcpu);
774
775 /*
776 * Make sure that no handler returns a kernel-internal
777 * error value to userspace.
778 */
779 KASSERT(error == ERESTART || error >= 0,
780 ("vmmdev_ioctl: invalid error return %d", error));
781 return (error);
782
783 lockfail:
784 if ((ioctl->flags &
785 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
786 vm_unlock_memsegs(sc->vm);
787 return (error);
788 }
789
790 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)791 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
792 struct vm_object **objp, int nprot)
793 {
794 struct vmmdev_softc *sc;
795 vm_paddr_t gpa;
796 size_t len;
797 vm_ooffset_t segoff, first, last;
798 int error, found, segid;
799 bool sysmem;
800
801 first = *offset;
802 last = first + mapsize;
803 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
804 return (EINVAL);
805
806 sc = vmmdev_lookup2(cdev);
807 if (sc == NULL) {
808 /* virtual machine is in the process of being created */
809 return (EINVAL);
810 }
811
812 /*
813 * Get a read lock on the guest memory map.
814 */
815 vm_slock_memsegs(sc->vm);
816
817 gpa = 0;
818 found = 0;
819 while (!found) {
820 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
821 NULL, NULL);
822 if (error)
823 break;
824
825 if (first >= gpa && last <= gpa + len)
826 found = 1;
827 else
828 gpa += len;
829 }
830
831 if (found) {
832 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
833 KASSERT(error == 0 && *objp != NULL,
834 ("%s: invalid memory segment %d", __func__, segid));
835 if (sysmem) {
836 vm_object_reference(*objp);
837 *offset = segoff + (first - gpa);
838 } else {
839 error = EINVAL;
840 }
841 }
842 vm_unlock_memsegs(sc->vm);
843 return (error);
844 }
845
846 static void
vmmdev_destroy(struct vmmdev_softc * sc)847 vmmdev_destroy(struct vmmdev_softc *sc)
848 {
849 struct devmem_softc *dsc;
850 int error __diagused;
851
852 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
853 KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__));
854
855 /*
856 * Destroy all cdevs:
857 *
858 * - any new operations on the 'cdev' will return an error (ENXIO).
859 *
860 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
861 */
862 SLIST_FOREACH(dsc, &sc->devmem, link) {
863 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
864 devmem_destroy(dsc);
865 }
866
867 vm_disable_vcpu_creation(sc->vm);
868 error = vcpu_lock_all(sc);
869 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
870 vm_unlock_vcpus(sc->vm);
871
872 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
873 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
874 SLIST_REMOVE_HEAD(&sc->devmem, link);
875 free(dsc->name, M_VMMDEV);
876 free(dsc, M_VMMDEV);
877 }
878
879 if (sc->vm != NULL)
880 vm_destroy(sc->vm);
881
882 chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0);
883 crfree(sc->ucred);
884
885 sx_xlock(&vmmdev_mtx);
886 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
887 if ((sc->flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
888 LIST_REMOVE(sc, priv_link);
889 sx_xunlock(&vmmdev_mtx);
890 wakeup(sc);
891 free(sc, M_VMMDEV);
892 }
893
894 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)895 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
896 {
897 struct cdev *cdev;
898 struct vmmdev_softc *sc;
899
900 sx_xlock(&vmmdev_mtx);
901 sc = vmmdev_lookup(name, cred);
902 if (sc == NULL || sc->cdev == NULL) {
903 sx_xunlock(&vmmdev_mtx);
904 return (EINVAL);
905 }
906
907 /*
908 * Setting 'sc->cdev' to NULL is used to indicate that the VM
909 * is scheduled for destruction.
910 */
911 cdev = sc->cdev;
912 sc->cdev = NULL;
913 sx_xunlock(&vmmdev_mtx);
914
915 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
916 destroy_dev(cdev);
917 vmmdev_destroy(sc);
918
919 return (0);
920 }
921
922 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)923 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
924 {
925 char *buf;
926 int error, buflen;
927
928 error = vmm_priv_check(req->td->td_ucred);
929 if (error)
930 return (error);
931
932 buflen = VM_MAX_NAMELEN + 1;
933 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
934 error = sysctl_handle_string(oidp, buf, buflen, req);
935 if (error == 0 && req->newptr != NULL)
936 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
937 free(buf, M_VMMDEV);
938 return (error);
939 }
940 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
941 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
942 NULL, 0, sysctl_vmm_destroy, "A",
943 "Destroy a vmm(4) instance (legacy interface)");
944
945 static struct cdevsw vmmdevsw = {
946 .d_name = "vmmdev",
947 .d_version = D_VERSION,
948 .d_open = vmmdev_open,
949 .d_ioctl = vmmdev_ioctl,
950 .d_mmap_single = vmmdev_mmap_single,
951 .d_read = vmmdev_rw,
952 .d_write = vmmdev_rw,
953 };
954
955 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)956 vmmdev_alloc(struct vm *vm, struct ucred *cred)
957 {
958 struct vmmdev_softc *sc;
959
960 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
961 SLIST_INIT(&sc->devmem);
962 sc->vm = vm;
963 sc->ucred = crhold(cred);
964 return (sc);
965 }
966
967 static int
vmmdev_create(const char * name,uint32_t flags,struct ucred * cred)968 vmmdev_create(const char *name, uint32_t flags, struct ucred *cred)
969 {
970 struct make_dev_args mda;
971 struct cdev *cdev;
972 struct vmmdev_softc *sc;
973 struct vmmctl_priv *priv;
974 struct vm *vm;
975 int error;
976
977 if (name == NULL || strlen(name) > VM_MAX_NAMELEN)
978 return (EINVAL);
979
980 if ((flags & ~VMMCTL_FLAGS_MASK) != 0)
981 return (EINVAL);
982 error = devfs_get_cdevpriv((void **)&priv);
983 if (error)
984 return (error);
985
986 sx_xlock(&vmmdev_mtx);
987 sc = vmmdev_lookup(name, cred);
988 if (sc != NULL) {
989 sx_xunlock(&vmmdev_mtx);
990 return (EEXIST);
991 }
992
993 error = vm_create(name, &vm);
994 if (error != 0) {
995 sx_xunlock(&vmmdev_mtx);
996 return (error);
997 }
998 sc = vmmdev_alloc(vm, cred);
999 SLIST_INSERT_HEAD(&head, sc, link);
1000 sc->flags = flags;
1001 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
1002 LIST_INSERT_HEAD(&priv->softcs, sc, priv_link);
1003
1004 make_dev_args_init(&mda);
1005 mda.mda_devsw = &vmmdevsw;
1006 mda.mda_cr = sc->ucred;
1007 mda.mda_uid = UID_ROOT;
1008 mda.mda_gid = GID_WHEEL;
1009 mda.mda_mode = 0600;
1010 mda.mda_si_drv1 = sc;
1011 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1012 error = make_dev_s(&mda, &cdev, "vmm/%s", name);
1013 if (error != 0) {
1014 sx_xunlock(&vmmdev_mtx);
1015 vmmdev_destroy(sc);
1016 return (error);
1017 }
1018 if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) {
1019 sx_xunlock(&vmmdev_mtx);
1020 destroy_dev(cdev);
1021 vmmdev_destroy(sc);
1022 return (ENOMEM);
1023 }
1024 sc->cdev = cdev;
1025 sx_xunlock(&vmmdev_mtx);
1026 return (0);
1027 }
1028
1029 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)1030 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1031 {
1032 char *buf;
1033 int error, buflen;
1034
1035 if (!vmm_initialized)
1036 return (ENXIO);
1037
1038 error = vmm_priv_check(req->td->td_ucred);
1039 if (error != 0)
1040 return (error);
1041
1042 buflen = VM_MAX_NAMELEN + 1;
1043 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1044 error = sysctl_handle_string(oidp, buf, buflen, req);
1045 if (error == 0 && req->newptr != NULL)
1046 error = vmmdev_create(buf, 0, req->td->td_ucred);
1047 free(buf, M_VMMDEV);
1048 return (error);
1049 }
1050 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1051 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1052 NULL, 0, sysctl_vmm_create, "A",
1053 "Create a vmm(4) instance (legacy interface)");
1054
1055 static void
vmmctl_dtor(void * arg)1056 vmmctl_dtor(void *arg)
1057 {
1058 struct cdev *sc_cdev;
1059 struct vmmdev_softc *sc;
1060 struct vmmctl_priv *priv = arg;
1061
1062 /*
1063 * Scan the softc list for any VMs associated with
1064 * the current descriptor and destroy them.
1065 */
1066 sx_xlock(&vmmdev_mtx);
1067 while (!LIST_EMPTY(&priv->softcs)) {
1068 sc = LIST_FIRST(&priv->softcs);
1069 sc_cdev = sc->cdev;
1070 if (sc_cdev != NULL) {
1071 sc->cdev = NULL;
1072 } else {
1073 /*
1074 * Another thread has already
1075 * started the removal process.
1076 * Sleep until 'vmmdev_destroy' notifies us
1077 * that the removal has finished.
1078 */
1079 sx_sleep(sc, &vmmdev_mtx, 0, "vmmctl_dtor", 0);
1080 continue;
1081 }
1082 /*
1083 * Temporarily drop the lock to allow vmmdev_destroy to run.
1084 */
1085 sx_xunlock(&vmmdev_mtx);
1086 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
1087 destroy_dev(sc_cdev);
1088 /* vmmdev_destroy will unlink the 'priv_link' entry. */
1089 vmmdev_destroy(sc);
1090 sx_xlock(&vmmdev_mtx);
1091 }
1092 sx_xunlock(&vmmdev_mtx);
1093
1094 free(priv, M_VMMDEV);
1095 }
1096
1097 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)1098 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
1099 {
1100 int error;
1101 struct vmmctl_priv *priv;
1102
1103 error = vmm_priv_check(td->td_ucred);
1104 if (error != 0)
1105 return (error);
1106
1107 if ((flags & FWRITE) == 0)
1108 return (EPERM);
1109
1110 priv = malloc(sizeof(*priv), M_VMMDEV, M_WAITOK | M_ZERO);
1111 LIST_INIT(&priv->softcs);
1112 error = devfs_set_cdevpriv(priv, vmmctl_dtor);
1113 if (error != 0) {
1114 free(priv, M_VMMDEV);
1115 return (error);
1116 }
1117
1118 return (0);
1119 }
1120
1121 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)1122 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
1123 struct thread *td)
1124 {
1125 int error;
1126
1127 switch (cmd) {
1128 case VMMCTL_VM_CREATE: {
1129 struct vmmctl_vm_create *vmc;
1130
1131 vmc = (struct vmmctl_vm_create *)data;
1132 vmc->name[VM_MAX_NAMELEN] = '\0';
1133 for (size_t i = 0; i < nitems(vmc->reserved); i++) {
1134 if (vmc->reserved[i] != 0) {
1135 error = EINVAL;
1136 return (error);
1137 }
1138 }
1139
1140 error = vmmdev_create(vmc->name, vmc->flags, td->td_ucred);
1141 break;
1142 }
1143 case VMMCTL_VM_DESTROY: {
1144 struct vmmctl_vm_destroy *vmd;
1145
1146 vmd = (struct vmmctl_vm_destroy *)data;
1147 vmd->name[VM_MAX_NAMELEN] = '\0';
1148 for (size_t i = 0; i < nitems(vmd->reserved); i++) {
1149 if (vmd->reserved[i] != 0) {
1150 error = EINVAL;
1151 return (error);
1152 }
1153 }
1154
1155 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
1156 break;
1157 }
1158 default:
1159 error = ENOTTY;
1160 break;
1161 }
1162
1163 return (error);
1164 }
1165
1166 static struct cdev *vmmctl_cdev;
1167 static struct cdevsw vmmctlsw = {
1168 .d_name = "vmmctl",
1169 .d_version = D_VERSION,
1170 .d_open = vmmctl_open,
1171 .d_ioctl = vmmctl_ioctl,
1172 };
1173
1174 static int
vmmdev_init(void)1175 vmmdev_init(void)
1176 {
1177 int error;
1178
1179 sx_xlock(&vmmdev_mtx);
1180 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
1181 UID_ROOT, GID_WHEEL, 0600, "vmmctl");
1182 if (error == 0)
1183 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1184 "Allow use of vmm in a jail.");
1185 sx_xunlock(&vmmdev_mtx);
1186
1187 return (error);
1188 }
1189
1190 static int
vmmdev_cleanup(void)1191 vmmdev_cleanup(void)
1192 {
1193 sx_xlock(&vmmdev_mtx);
1194 if (!SLIST_EMPTY(&head)) {
1195 sx_xunlock(&vmmdev_mtx);
1196 return (EBUSY);
1197 }
1198 if (vmmctl_cdev != NULL) {
1199 destroy_dev(vmmctl_cdev);
1200 vmmctl_cdev = NULL;
1201 }
1202 sx_xunlock(&vmmdev_mtx);
1203
1204 return (0);
1205 }
1206
1207 static int
vmm_handler(module_t mod,int what,void * arg)1208 vmm_handler(module_t mod, int what, void *arg)
1209 {
1210 int error;
1211
1212 switch (what) {
1213 case MOD_LOAD:
1214 error = vmmdev_init();
1215 if (error != 0)
1216 break;
1217
1218 vm_maxcpu = mp_ncpus;
1219 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
1220 if (vm_maxcpu > VM_MAXCPU) {
1221 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
1222 vm_maxcpu = VM_MAXCPU;
1223 }
1224 if (vm_maxcpu == 0)
1225 vm_maxcpu = 1;
1226 vm_maxvmms = 4 * mp_ncpus;
1227 error = vmm_modinit();
1228 if (error == 0)
1229 vmm_initialized = true;
1230 else {
1231 int error1 __diagused;
1232
1233 error1 = vmmdev_cleanup();
1234 KASSERT(error1 == 0,
1235 ("%s: vmmdev_cleanup failed: %d", __func__, error1));
1236 }
1237 break;
1238 case MOD_UNLOAD:
1239 error = vmmdev_cleanup();
1240 if (error == 0 && vmm_initialized) {
1241 error = vmm_modcleanup();
1242 if (error) {
1243 /*
1244 * Something bad happened - prevent new
1245 * VMs from being created
1246 */
1247 vmm_initialized = false;
1248 }
1249 }
1250 break;
1251 default:
1252 error = 0;
1253 break;
1254 }
1255 return (error);
1256 }
1257
1258 static moduledata_t vmm_kmod = {
1259 "vmm",
1260 vmm_handler,
1261 NULL
1262 };
1263
1264 /*
1265 * vmm initialization has the following dependencies:
1266 *
1267 * - Initialization requires smp_rendezvous() and therefore must happen
1268 * after SMP is fully functional (after SI_SUB_SMP).
1269 * - vmm device initialization requires an initialized devfs.
1270 */
1271 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
1272 MODULE_VERSION(vmm, 1);
1273
1274 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1275 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1276 struct vm_object **objp, int nprot)
1277 {
1278 struct devmem_softc *dsc;
1279 vm_ooffset_t first, last;
1280 size_t seglen;
1281 int error;
1282 bool sysmem;
1283
1284 dsc = cdev->si_drv1;
1285 if (dsc == NULL) {
1286 /* 'cdev' has been created but is not ready for use */
1287 return (ENXIO);
1288 }
1289
1290 first = *offset;
1291 last = *offset + len;
1292 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1293 return (EINVAL);
1294
1295 vm_slock_memsegs(dsc->sc->vm);
1296
1297 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1298 KASSERT(error == 0 && !sysmem && *objp != NULL,
1299 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1300
1301 if (seglen >= last)
1302 vm_object_reference(*objp);
1303 else
1304 error = EINVAL;
1305
1306 vm_unlock_memsegs(dsc->sc->vm);
1307 return (error);
1308 }
1309
1310 static struct cdevsw devmemsw = {
1311 .d_name = "devmem",
1312 .d_version = D_VERSION,
1313 .d_mmap_single = devmem_mmap_single,
1314 };
1315
1316 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1317 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1318 {
1319 struct make_dev_args mda;
1320 struct devmem_softc *dsc;
1321 int error;
1322
1323 sx_xlock(&vmmdev_mtx);
1324
1325 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1326 dsc->segid = segid;
1327 dsc->name = devname;
1328 dsc->sc = sc;
1329 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1330
1331 make_dev_args_init(&mda);
1332 mda.mda_devsw = &devmemsw;
1333 mda.mda_cr = sc->ucred;
1334 mda.mda_uid = UID_ROOT;
1335 mda.mda_gid = GID_WHEEL;
1336 mda.mda_mode = 0600;
1337 mda.mda_si_drv1 = dsc;
1338 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1339 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1340 devname);
1341 if (error != 0) {
1342 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1343 free(dsc->name, M_VMMDEV);
1344 free(dsc, M_VMMDEV);
1345 }
1346
1347 sx_xunlock(&vmmdev_mtx);
1348
1349 return (error);
1350 }
1351
1352 static void
devmem_destroy(void * arg)1353 devmem_destroy(void *arg)
1354 {
1355 struct devmem_softc *dsc = arg;
1356
1357 destroy_dev(dsc->cdev);
1358 dsc->cdev = NULL;
1359 dsc->sc = NULL;
1360 }
1361