1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/ioccom.h> 12 #include <sys/jail.h> 13 #include <sys/kernel.h> 14 #include <sys/malloc.h> 15 #include <sys/mman.h> 16 #include <sys/mutex.h> 17 #include <sys/proc.h> 18 #include <sys/queue.h> 19 #include <sys/sysctl.h> 20 #include <sys/ucred.h> 21 #include <sys/uio.h> 22 23 #include <machine/vmm.h> 24 25 #include <vm/vm.h> 26 #include <vm/vm_object.h> 27 28 #include <dev/vmm/vmm_dev.h> 29 #include <dev/vmm/vmm_stat.h> 30 31 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 32 struct vm_memseg_fbsd12 { 33 int segid; 34 size_t len; 35 char name[64]; 36 }; 37 _Static_assert(sizeof(struct vm_memseg_fbsd12) == 80, "COMPAT_FREEBSD12 ABI"); 38 39 #define VM_ALLOC_MEMSEG_FBSD12 \ 40 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_fbsd12) 41 #define VM_GET_MEMSEG_FBSD12 \ 42 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_fbsd12) 43 #endif 44 45 struct devmem_softc { 46 int segid; 47 char *name; 48 struct cdev *cdev; 49 struct vmmdev_softc *sc; 50 SLIST_ENTRY(devmem_softc) link; 51 }; 52 53 struct vmmdev_softc { 54 struct vm *vm; /* vm instance cookie */ 55 struct cdev *cdev; 56 struct ucred *ucred; 57 SLIST_ENTRY(vmmdev_softc) link; 58 SLIST_HEAD(, devmem_softc) devmem; 59 int flags; 60 }; 61 #define VSC_LINKED 0x01 62 63 static SLIST_HEAD(, vmmdev_softc) head; 64 65 static unsigned pr_allow_flag; 66 static struct mtx vmmdev_mtx; 67 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 68 69 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 70 71 SYSCTL_DECL(_hw_vmm); 72 73 static void devmem_destroy(void *arg); 74 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 75 76 static int 77 vmm_priv_check(struct ucred *ucred) 78 { 79 if (jailed(ucred) && 80 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 81 return (EPERM); 82 83 return (0); 84 } 85 86 static int 87 vcpu_lock_one(struct vcpu *vcpu) 88 { 89 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 90 } 91 92 static void 93 vcpu_unlock_one(struct vcpu *vcpu) 94 { 95 enum vcpu_state state; 96 97 state = vcpu_get_state(vcpu, NULL); 98 if (state != VCPU_FROZEN) { 99 panic("vcpu %s(%d) has invalid state %d", 100 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 101 } 102 103 vcpu_set_state(vcpu, VCPU_IDLE, false); 104 } 105 106 static int 107 vcpu_lock_all(struct vmmdev_softc *sc) 108 { 109 struct vcpu *vcpu; 110 int error; 111 uint16_t i, j, maxcpus; 112 113 error = 0; 114 vm_slock_vcpus(sc->vm); 115 maxcpus = vm_get_maxcpus(sc->vm); 116 for (i = 0; i < maxcpus; i++) { 117 vcpu = vm_vcpu(sc->vm, i); 118 if (vcpu == NULL) 119 continue; 120 error = vcpu_lock_one(vcpu); 121 if (error) 122 break; 123 } 124 125 if (error) { 126 for (j = 0; j < i; j++) { 127 vcpu = vm_vcpu(sc->vm, j); 128 if (vcpu == NULL) 129 continue; 130 vcpu_unlock_one(vcpu); 131 } 132 vm_unlock_vcpus(sc->vm); 133 } 134 135 return (error); 136 } 137 138 static void 139 vcpu_unlock_all(struct vmmdev_softc *sc) 140 { 141 struct vcpu *vcpu; 142 uint16_t i, maxcpus; 143 144 maxcpus = vm_get_maxcpus(sc->vm); 145 for (i = 0; i < maxcpus; i++) { 146 vcpu = vm_vcpu(sc->vm, i); 147 if (vcpu == NULL) 148 continue; 149 vcpu_unlock_one(vcpu); 150 } 151 vm_unlock_vcpus(sc->vm); 152 } 153 154 static struct vmmdev_softc * 155 vmmdev_lookup(const char *name, struct ucred *cred) 156 { 157 struct vmmdev_softc *sc; 158 159 mtx_assert(&vmmdev_mtx, MA_OWNED); 160 161 SLIST_FOREACH(sc, &head, link) { 162 if (strcmp(name, vm_name(sc->vm)) == 0) 163 break; 164 } 165 166 if (sc == NULL) 167 return (NULL); 168 169 if (cr_cansee(cred, sc->ucred)) 170 return (NULL); 171 172 return (sc); 173 } 174 175 static struct vmmdev_softc * 176 vmmdev_lookup2(struct cdev *cdev) 177 { 178 return (cdev->si_drv1); 179 } 180 181 static int 182 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 183 { 184 int error, off, c, prot; 185 vm_paddr_t gpa, maxaddr; 186 void *hpa, *cookie; 187 struct vmmdev_softc *sc; 188 189 error = vmm_priv_check(curthread->td_ucred); 190 if (error) 191 return (error); 192 193 sc = vmmdev_lookup2(cdev); 194 if (sc == NULL) 195 return (ENXIO); 196 197 /* 198 * Get a read lock on the guest memory map. 199 */ 200 vm_slock_memsegs(sc->vm); 201 202 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 203 maxaddr = vmm_sysmem_maxaddr(sc->vm); 204 while (uio->uio_resid > 0 && error == 0) { 205 gpa = uio->uio_offset; 206 off = gpa & PAGE_MASK; 207 c = min(uio->uio_resid, PAGE_SIZE - off); 208 209 /* 210 * The VM has a hole in its physical memory map. If we want to 211 * use 'dd' to inspect memory beyond the hole we need to 212 * provide bogus data for memory that lies in the hole. 213 * 214 * Since this device does not support lseek(2), dd(1) will 215 * read(2) blocks of data to simulate the lseek(2). 216 */ 217 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 218 if (hpa == NULL) { 219 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 220 error = uiomove(__DECONST(void *, zero_region), 221 c, uio); 222 else 223 error = EFAULT; 224 } else { 225 error = uiomove(hpa, c, uio); 226 vm_gpa_release(cookie); 227 } 228 } 229 vm_unlock_memsegs(sc->vm); 230 return (error); 231 } 232 233 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 234 235 static int 236 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 237 { 238 struct devmem_softc *dsc; 239 int error; 240 bool sysmem; 241 242 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 243 if (error || mseg->len == 0) 244 return (error); 245 246 if (!sysmem) { 247 SLIST_FOREACH(dsc, &sc->devmem, link) { 248 if (dsc->segid == mseg->segid) 249 break; 250 } 251 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 252 __func__, mseg->segid)); 253 error = copystr(dsc->name, mseg->name, len, NULL); 254 } else { 255 bzero(mseg->name, len); 256 } 257 258 return (error); 259 } 260 261 static int 262 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 263 { 264 char *name; 265 int error; 266 bool sysmem; 267 268 error = 0; 269 name = NULL; 270 sysmem = true; 271 272 /* 273 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 274 * by stripped off when devfs processes the full string. 275 */ 276 if (VM_MEMSEG_NAME(mseg)) { 277 sysmem = false; 278 name = malloc(len, M_VMMDEV, M_WAITOK); 279 error = copystr(mseg->name, name, len, NULL); 280 if (error) 281 goto done; 282 } 283 284 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 285 if (error) 286 goto done; 287 288 if (VM_MEMSEG_NAME(mseg)) { 289 error = devmem_create_cdev(sc, mseg->segid, name); 290 if (error) 291 vm_free_memseg(sc->vm, mseg->segid); 292 else 293 name = NULL; /* freed when 'cdev' is destroyed */ 294 } 295 done: 296 free(name, M_VMMDEV); 297 return (error); 298 } 299 300 static int 301 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 302 uint64_t *regval) 303 { 304 int error, i; 305 306 error = 0; 307 for (i = 0; i < count; i++) { 308 error = vm_get_register(vcpu, regnum[i], ®val[i]); 309 if (error) 310 break; 311 } 312 return (error); 313 } 314 315 static int 316 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 317 uint64_t *regval) 318 { 319 int error, i; 320 321 error = 0; 322 for (i = 0; i < count; i++) { 323 error = vm_set_register(vcpu, regnum[i], regval[i]); 324 if (error) 325 break; 326 } 327 return (error); 328 } 329 330 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 331 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 332 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 333 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 334 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 335 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 336 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 337 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 338 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 339 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 340 341 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 342 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_FBSD12, 343 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 344 #endif 345 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 346 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 347 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 348 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 349 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 350 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 351 VMMDEV_IOCTL(VM_REINIT, 352 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 353 354 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 355 VMMDEV_IOCTL(VM_GET_MEMSEG_FBSD12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 356 #endif 357 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 358 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 359 360 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 361 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 362 363 VMMDEV_IOCTL(VM_SUSPEND, 0), 364 VMMDEV_IOCTL(VM_GET_CPUS, 0), 365 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 366 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 367 }; 368 369 static int 370 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 371 struct thread *td) 372 { 373 struct vmmdev_softc *sc; 374 struct vcpu *vcpu; 375 const struct vmmdev_ioctl *ioctl; 376 int error, vcpuid; 377 378 error = vmm_priv_check(td->td_ucred); 379 if (error) 380 return (error); 381 382 sc = vmmdev_lookup2(cdev); 383 if (sc == NULL) 384 return (ENXIO); 385 386 ioctl = NULL; 387 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 388 if (vmmdev_ioctls[i].cmd == cmd) { 389 ioctl = &vmmdev_ioctls[i]; 390 break; 391 } 392 } 393 if (ioctl == NULL) { 394 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 395 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 396 ioctl = &vmmdev_machdep_ioctls[i]; 397 break; 398 } 399 } 400 } 401 if (ioctl == NULL) 402 return (ENOTTY); 403 404 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 405 vm_xlock_memsegs(sc->vm); 406 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 407 vm_slock_memsegs(sc->vm); 408 409 vcpu = NULL; 410 vcpuid = -1; 411 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 412 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 413 vcpuid = *(int *)data; 414 if (vcpuid == -1) { 415 if ((ioctl->flags & 416 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 417 error = EINVAL; 418 goto lockfail; 419 } 420 } else { 421 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 422 if (vcpu == NULL) { 423 error = EINVAL; 424 goto lockfail; 425 } 426 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 427 error = vcpu_lock_one(vcpu); 428 if (error) 429 goto lockfail; 430 } 431 } 432 } 433 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 434 error = vcpu_lock_all(sc); 435 if (error) 436 goto lockfail; 437 } 438 439 switch (cmd) { 440 case VM_SUSPEND: { 441 struct vm_suspend *vmsuspend; 442 443 vmsuspend = (struct vm_suspend *)data; 444 error = vm_suspend(sc->vm, vmsuspend->how); 445 break; 446 } 447 case VM_REINIT: 448 error = vm_reinit(sc->vm); 449 break; 450 case VM_STAT_DESC: { 451 struct vm_stat_desc *statdesc; 452 453 statdesc = (struct vm_stat_desc *)data; 454 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 455 sizeof(statdesc->desc)); 456 break; 457 } 458 case VM_STATS: { 459 struct vm_stats *vmstats; 460 461 vmstats = (struct vm_stats *)data; 462 getmicrotime(&vmstats->tv); 463 error = vmm_stat_copy(vcpu, vmstats->index, 464 nitems(vmstats->statbuf), &vmstats->num_entries, 465 vmstats->statbuf); 466 break; 467 } 468 case VM_MMAP_GETNEXT: { 469 struct vm_memmap *mm; 470 471 mm = (struct vm_memmap *)data; 472 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 473 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 474 break; 475 } 476 case VM_MMAP_MEMSEG: { 477 struct vm_memmap *mm; 478 479 mm = (struct vm_memmap *)data; 480 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 481 mm->len, mm->prot, mm->flags); 482 break; 483 } 484 case VM_MUNMAP_MEMSEG: { 485 struct vm_munmap *mu; 486 487 mu = (struct vm_munmap *)data; 488 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 489 break; 490 } 491 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 492 case VM_ALLOC_MEMSEG_FBSD12: 493 error = alloc_memseg(sc, (struct vm_memseg *)data, 494 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 495 break; 496 case VM_GET_MEMSEG_FBSD12: 497 error = get_memseg(sc, (struct vm_memseg *)data, 498 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 499 break; 500 #endif 501 case VM_ALLOC_MEMSEG: 502 error = alloc_memseg(sc, (struct vm_memseg *)data, 503 sizeof(((struct vm_memseg *)0)->name)); 504 break; 505 case VM_GET_MEMSEG: 506 error = get_memseg(sc, (struct vm_memseg *)data, 507 sizeof(((struct vm_memseg *)0)->name)); 508 break; 509 case VM_GET_REGISTER: { 510 struct vm_register *vmreg; 511 512 vmreg = (struct vm_register *)data; 513 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 514 break; 515 } 516 case VM_SET_REGISTER: { 517 struct vm_register *vmreg; 518 519 vmreg = (struct vm_register *)data; 520 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 521 break; 522 } 523 case VM_GET_REGISTER_SET: { 524 struct vm_register_set *vmregset; 525 uint64_t *regvals; 526 int *regnums; 527 528 vmregset = (struct vm_register_set *)data; 529 if (vmregset->count > VM_REG_LAST) { 530 error = EINVAL; 531 break; 532 } 533 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 534 M_WAITOK); 535 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 536 M_WAITOK); 537 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 538 vmregset->count); 539 if (error == 0) 540 error = vm_get_register_set(vcpu, 541 vmregset->count, regnums, regvals); 542 if (error == 0) 543 error = copyout(regvals, vmregset->regvals, 544 sizeof(regvals[0]) * vmregset->count); 545 free(regvals, M_VMMDEV); 546 free(regnums, M_VMMDEV); 547 break; 548 } 549 case VM_SET_REGISTER_SET: { 550 struct vm_register_set *vmregset; 551 uint64_t *regvals; 552 int *regnums; 553 554 vmregset = (struct vm_register_set *)data; 555 if (vmregset->count > VM_REG_LAST) { 556 error = EINVAL; 557 break; 558 } 559 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 560 M_WAITOK); 561 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 562 M_WAITOK); 563 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 564 vmregset->count); 565 if (error == 0) 566 error = copyin(vmregset->regvals, regvals, 567 sizeof(regvals[0]) * vmregset->count); 568 if (error == 0) 569 error = vm_set_register_set(vcpu, 570 vmregset->count, regnums, regvals); 571 free(regvals, M_VMMDEV); 572 free(regnums, M_VMMDEV); 573 break; 574 } 575 case VM_GET_CAPABILITY: { 576 struct vm_capability *vmcap; 577 578 vmcap = (struct vm_capability *)data; 579 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 580 break; 581 } 582 case VM_SET_CAPABILITY: { 583 struct vm_capability *vmcap; 584 585 vmcap = (struct vm_capability *)data; 586 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 587 break; 588 } 589 case VM_ACTIVATE_CPU: 590 error = vm_activate_cpu(vcpu); 591 break; 592 case VM_GET_CPUS: { 593 struct vm_cpuset *vm_cpuset; 594 cpuset_t *cpuset; 595 int size; 596 597 error = 0; 598 vm_cpuset = (struct vm_cpuset *)data; 599 size = vm_cpuset->cpusetsize; 600 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 601 error = ERANGE; 602 break; 603 } 604 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 605 M_WAITOK | M_ZERO); 606 if (vm_cpuset->which == VM_ACTIVE_CPUS) 607 *cpuset = vm_active_cpus(sc->vm); 608 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 609 *cpuset = vm_suspended_cpus(sc->vm); 610 else if (vm_cpuset->which == VM_DEBUG_CPUS) 611 *cpuset = vm_debug_cpus(sc->vm); 612 else 613 error = EINVAL; 614 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 615 error = ERANGE; 616 if (error == 0) 617 error = copyout(cpuset, vm_cpuset->cpus, size); 618 free(cpuset, M_TEMP); 619 break; 620 } 621 case VM_SUSPEND_CPU: 622 error = vm_suspend_cpu(sc->vm, vcpu); 623 break; 624 case VM_RESUME_CPU: 625 error = vm_resume_cpu(sc->vm, vcpu); 626 break; 627 case VM_SET_TOPOLOGY: { 628 struct vm_cpu_topology *topology; 629 630 topology = (struct vm_cpu_topology *)data; 631 error = vm_set_topology(sc->vm, topology->sockets, 632 topology->cores, topology->threads, topology->maxcpus); 633 break; 634 } 635 case VM_GET_TOPOLOGY: { 636 struct vm_cpu_topology *topology; 637 638 topology = (struct vm_cpu_topology *)data; 639 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 640 &topology->threads, &topology->maxcpus); 641 error = 0; 642 break; 643 } 644 default: 645 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 646 td); 647 break; 648 } 649 650 if ((ioctl->flags & 651 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 652 vm_unlock_memsegs(sc->vm); 653 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 654 vcpu_unlock_all(sc); 655 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 656 vcpu_unlock_one(vcpu); 657 658 /* 659 * Make sure that no handler returns a kernel-internal 660 * error value to userspace. 661 */ 662 KASSERT(error == ERESTART || error >= 0, 663 ("vmmdev_ioctl: invalid error return %d", error)); 664 return (error); 665 666 lockfail: 667 if ((ioctl->flags & 668 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 669 vm_unlock_memsegs(sc->vm); 670 return (error); 671 } 672 673 static int 674 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 675 struct vm_object **objp, int nprot) 676 { 677 struct vmmdev_softc *sc; 678 vm_paddr_t gpa; 679 size_t len; 680 vm_ooffset_t segoff, first, last; 681 int error, found, segid; 682 bool sysmem; 683 684 error = vmm_priv_check(curthread->td_ucred); 685 if (error) 686 return (error); 687 688 first = *offset; 689 last = first + mapsize; 690 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 691 return (EINVAL); 692 693 sc = vmmdev_lookup2(cdev); 694 if (sc == NULL) { 695 /* virtual machine is in the process of being created */ 696 return (EINVAL); 697 } 698 699 /* 700 * Get a read lock on the guest memory map. 701 */ 702 vm_slock_memsegs(sc->vm); 703 704 gpa = 0; 705 found = 0; 706 while (!found) { 707 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 708 NULL, NULL); 709 if (error) 710 break; 711 712 if (first >= gpa && last <= gpa + len) 713 found = 1; 714 else 715 gpa += len; 716 } 717 718 if (found) { 719 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 720 KASSERT(error == 0 && *objp != NULL, 721 ("%s: invalid memory segment %d", __func__, segid)); 722 if (sysmem) { 723 vm_object_reference(*objp); 724 *offset = segoff + (first - gpa); 725 } else { 726 error = EINVAL; 727 } 728 } 729 vm_unlock_memsegs(sc->vm); 730 return (error); 731 } 732 733 static void 734 vmmdev_destroy(struct vmmdev_softc *sc) 735 { 736 struct devmem_softc *dsc; 737 int error __diagused; 738 739 /* 740 * Destroy all cdevs: 741 * 742 * - any new operations on the 'cdev' will return an error (ENXIO). 743 * 744 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 745 */ 746 SLIST_FOREACH(dsc, &sc->devmem, link) { 747 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 748 destroy_dev(dsc->cdev); 749 devmem_destroy(dsc); 750 } 751 752 vm_disable_vcpu_creation(sc->vm); 753 error = vcpu_lock_all(sc); 754 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 755 vm_unlock_vcpus(sc->vm); 756 757 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 758 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 759 SLIST_REMOVE_HEAD(&sc->devmem, link); 760 free(dsc->name, M_VMMDEV); 761 free(dsc, M_VMMDEV); 762 } 763 764 if (sc->cdev != NULL) 765 destroy_dev(sc->cdev); 766 767 if (sc->vm != NULL) 768 vm_destroy(sc->vm); 769 770 if (sc->ucred != NULL) 771 crfree(sc->ucred); 772 773 if ((sc->flags & VSC_LINKED) != 0) { 774 mtx_lock(&vmmdev_mtx); 775 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 776 mtx_unlock(&vmmdev_mtx); 777 } 778 779 free(sc, M_VMMDEV); 780 } 781 782 static int 783 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 784 { 785 struct cdev *cdev; 786 struct vmmdev_softc *sc; 787 788 mtx_lock(&vmmdev_mtx); 789 sc = vmmdev_lookup(name, cred); 790 if (sc == NULL || sc->cdev == NULL) { 791 mtx_unlock(&vmmdev_mtx); 792 return (EINVAL); 793 } 794 795 /* 796 * Setting 'sc->cdev' to NULL is used to indicate that the VM 797 * is scheduled for destruction. 798 */ 799 cdev = sc->cdev; 800 sc->cdev = NULL; 801 mtx_unlock(&vmmdev_mtx); 802 803 destroy_dev(cdev); 804 vmmdev_destroy(sc); 805 806 return (0); 807 } 808 809 static int 810 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 811 { 812 char *buf; 813 int error, buflen; 814 815 error = vmm_priv_check(req->td->td_ucred); 816 if (error) 817 return (error); 818 819 buflen = VM_MAX_NAMELEN + 1; 820 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 821 strlcpy(buf, "beavis", buflen); 822 error = sysctl_handle_string(oidp, buf, buflen, req); 823 if (error == 0 && req->newptr != NULL) 824 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 825 free(buf, M_VMMDEV); 826 return (error); 827 } 828 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 829 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 830 NULL, 0, sysctl_vmm_destroy, "A", 831 NULL); 832 833 static struct cdevsw vmmdevsw = { 834 .d_name = "vmmdev", 835 .d_version = D_VERSION, 836 .d_ioctl = vmmdev_ioctl, 837 .d_mmap_single = vmmdev_mmap_single, 838 .d_read = vmmdev_rw, 839 .d_write = vmmdev_rw, 840 }; 841 842 static struct vmmdev_softc * 843 vmmdev_alloc(struct vm *vm, struct ucred *cred) 844 { 845 struct vmmdev_softc *sc; 846 847 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 848 SLIST_INIT(&sc->devmem); 849 sc->vm = vm; 850 sc->ucred = crhold(cred); 851 return (sc); 852 } 853 854 static int 855 vmmdev_create(const char *name, struct ucred *cred) 856 { 857 struct cdev *cdev; 858 struct vmmdev_softc *sc, *sc2; 859 struct vm *vm; 860 int error; 861 862 mtx_lock(&vmmdev_mtx); 863 sc = vmmdev_lookup(name, cred); 864 mtx_unlock(&vmmdev_mtx); 865 if (sc != NULL) 866 return (EEXIST); 867 868 error = vm_create(name, &vm); 869 if (error != 0) 870 return (error); 871 872 sc = vmmdev_alloc(vm, cred); 873 874 /* 875 * Lookup the name again just in case somebody sneaked in when we 876 * dropped the lock. 877 */ 878 mtx_lock(&vmmdev_mtx); 879 sc2 = vmmdev_lookup(name, cred); 880 if (sc2 != NULL) { 881 mtx_unlock(&vmmdev_mtx); 882 vmmdev_destroy(sc); 883 return (EEXIST); 884 } 885 sc->flags |= VSC_LINKED; 886 SLIST_INSERT_HEAD(&head, sc, link); 887 mtx_unlock(&vmmdev_mtx); 888 889 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 890 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", name); 891 if (error != 0) { 892 vmmdev_destroy(sc); 893 return (error); 894 } 895 896 mtx_lock(&vmmdev_mtx); 897 sc->cdev = cdev; 898 sc->cdev->si_drv1 = sc; 899 mtx_unlock(&vmmdev_mtx); 900 901 return (0); 902 } 903 904 static int 905 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 906 { 907 char *buf; 908 int error, buflen; 909 910 error = vmm_priv_check(req->td->td_ucred); 911 if (error != 0) 912 return (error); 913 914 buflen = VM_MAX_NAMELEN + 1; 915 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 916 strlcpy(buf, "beavis", buflen); 917 error = sysctl_handle_string(oidp, buf, buflen, req); 918 if (error == 0 && req->newptr != NULL) 919 error = vmmdev_create(buf, req->td->td_ucred); 920 free(buf, M_VMMDEV); 921 return (error); 922 } 923 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 924 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 925 NULL, 0, sysctl_vmm_create, "A", 926 NULL); 927 928 void 929 vmmdev_init(void) 930 { 931 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 932 "Allow use of vmm in a jail."); 933 } 934 935 int 936 vmmdev_cleanup(void) 937 { 938 int error; 939 940 if (SLIST_EMPTY(&head)) 941 error = 0; 942 else 943 error = EBUSY; 944 945 return (error); 946 } 947 948 static int 949 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 950 struct vm_object **objp, int nprot) 951 { 952 struct devmem_softc *dsc; 953 vm_ooffset_t first, last; 954 size_t seglen; 955 int error; 956 bool sysmem; 957 958 dsc = cdev->si_drv1; 959 if (dsc == NULL) { 960 /* 'cdev' has been created but is not ready for use */ 961 return (ENXIO); 962 } 963 964 first = *offset; 965 last = *offset + len; 966 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 967 return (EINVAL); 968 969 vm_slock_memsegs(dsc->sc->vm); 970 971 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 972 KASSERT(error == 0 && !sysmem && *objp != NULL, 973 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 974 975 if (seglen >= last) 976 vm_object_reference(*objp); 977 else 978 error = EINVAL; 979 980 vm_unlock_memsegs(dsc->sc->vm); 981 return (error); 982 } 983 984 static struct cdevsw devmemsw = { 985 .d_name = "devmem", 986 .d_version = D_VERSION, 987 .d_mmap_single = devmem_mmap_single, 988 }; 989 990 static int 991 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 992 { 993 struct devmem_softc *dsc; 994 struct cdev *cdev; 995 const char *vmname; 996 int error; 997 998 vmname = vm_name(sc->vm); 999 1000 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, sc->ucred, 1001 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1002 if (error) 1003 return (error); 1004 1005 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1006 1007 mtx_lock(&vmmdev_mtx); 1008 if (sc->cdev == NULL) { 1009 /* virtual machine is being created or destroyed */ 1010 mtx_unlock(&vmmdev_mtx); 1011 free(dsc, M_VMMDEV); 1012 destroy_dev_sched_cb(cdev, NULL, 0); 1013 return (ENODEV); 1014 } 1015 1016 dsc->segid = segid; 1017 dsc->name = devname; 1018 dsc->cdev = cdev; 1019 dsc->sc = sc; 1020 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1021 mtx_unlock(&vmmdev_mtx); 1022 1023 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1024 cdev->si_drv1 = dsc; 1025 return (0); 1026 } 1027 1028 static void 1029 devmem_destroy(void *arg) 1030 { 1031 struct devmem_softc *dsc = arg; 1032 1033 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1034 dsc->cdev = NULL; 1035 dsc->sc = NULL; 1036 } 1037