1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/ioccom.h> 12 #include <sys/jail.h> 13 #include <sys/kernel.h> 14 #include <sys/malloc.h> 15 #include <sys/mman.h> 16 #include <sys/mutex.h> 17 #include <sys/proc.h> 18 #include <sys/queue.h> 19 #include <sys/sysctl.h> 20 #include <sys/ucred.h> 21 #include <sys/uio.h> 22 23 #include <machine/vmm.h> 24 25 #include <vm/vm.h> 26 #include <vm/vm_object.h> 27 28 #include <dev/vmm/vmm_dev.h> 29 #include <dev/vmm/vmm_stat.h> 30 31 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 32 struct vm_memseg_fbsd12 { 33 int segid; 34 size_t len; 35 char name[64]; 36 }; 37 _Static_assert(sizeof(struct vm_memseg_fbsd12) == 80, "COMPAT_FREEBSD12 ABI"); 38 39 #define VM_ALLOC_MEMSEG_FBSD12 \ 40 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_fbsd12) 41 #define VM_GET_MEMSEG_FBSD12 \ 42 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_fbsd12) 43 #endif 44 45 static int devmem_create_cdev(const char *vmname, int id, char *devmem); 46 47 struct devmem_softc { 48 int segid; 49 char *name; 50 struct cdev *cdev; 51 struct vmmdev_softc *sc; 52 SLIST_ENTRY(devmem_softc) link; 53 }; 54 55 struct vmmdev_softc { 56 struct vm *vm; /* vm instance cookie */ 57 struct cdev *cdev; 58 struct ucred *ucred; 59 SLIST_ENTRY(vmmdev_softc) link; 60 SLIST_HEAD(, devmem_softc) devmem; 61 int flags; 62 }; 63 #define VSC_LINKED 0x01 64 65 static SLIST_HEAD(, vmmdev_softc) head; 66 67 static unsigned pr_allow_flag; 68 static struct mtx vmmdev_mtx; 69 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 70 71 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 72 73 SYSCTL_DECL(_hw_vmm); 74 75 static void devmem_destroy(void *arg); 76 77 static int 78 vmm_priv_check(struct ucred *ucred) 79 { 80 if (jailed(ucred) && 81 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 82 return (EPERM); 83 84 return (0); 85 } 86 87 static int 88 vcpu_lock_one(struct vcpu *vcpu) 89 { 90 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 91 } 92 93 static void 94 vcpu_unlock_one(struct vcpu *vcpu) 95 { 96 enum vcpu_state state; 97 98 state = vcpu_get_state(vcpu, NULL); 99 if (state != VCPU_FROZEN) { 100 panic("vcpu %s(%d) has invalid state %d", 101 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 102 } 103 104 vcpu_set_state(vcpu, VCPU_IDLE, false); 105 } 106 107 static int 108 vcpu_lock_all(struct vmmdev_softc *sc) 109 { 110 struct vcpu *vcpu; 111 int error; 112 uint16_t i, j, maxcpus; 113 114 error = 0; 115 vm_slock_vcpus(sc->vm); 116 maxcpus = vm_get_maxcpus(sc->vm); 117 for (i = 0; i < maxcpus; i++) { 118 vcpu = vm_vcpu(sc->vm, i); 119 if (vcpu == NULL) 120 continue; 121 error = vcpu_lock_one(vcpu); 122 if (error) 123 break; 124 } 125 126 if (error) { 127 for (j = 0; j < i; j++) { 128 vcpu = vm_vcpu(sc->vm, j); 129 if (vcpu == NULL) 130 continue; 131 vcpu_unlock_one(vcpu); 132 } 133 vm_unlock_vcpus(sc->vm); 134 } 135 136 return (error); 137 } 138 139 static void 140 vcpu_unlock_all(struct vmmdev_softc *sc) 141 { 142 struct vcpu *vcpu; 143 uint16_t i, maxcpus; 144 145 maxcpus = vm_get_maxcpus(sc->vm); 146 for (i = 0; i < maxcpus; i++) { 147 vcpu = vm_vcpu(sc->vm, i); 148 if (vcpu == NULL) 149 continue; 150 vcpu_unlock_one(vcpu); 151 } 152 vm_unlock_vcpus(sc->vm); 153 } 154 155 static struct vmmdev_softc * 156 vmmdev_lookup(const char *name) 157 { 158 struct vmmdev_softc *sc; 159 160 mtx_assert(&vmmdev_mtx, MA_OWNED); 161 162 SLIST_FOREACH(sc, &head, link) { 163 if (strcmp(name, vm_name(sc->vm)) == 0) 164 break; 165 } 166 167 if (sc == NULL) 168 return (NULL); 169 170 if (cr_cansee(curthread->td_ucred, sc->ucred)) 171 return (NULL); 172 173 return (sc); 174 } 175 176 static struct vmmdev_softc * 177 vmmdev_lookup2(struct cdev *cdev) 178 { 179 return (cdev->si_drv1); 180 } 181 182 static int 183 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 184 { 185 int error, off, c, prot; 186 vm_paddr_t gpa, maxaddr; 187 void *hpa, *cookie; 188 struct vmmdev_softc *sc; 189 190 error = vmm_priv_check(curthread->td_ucred); 191 if (error) 192 return (error); 193 194 sc = vmmdev_lookup2(cdev); 195 if (sc == NULL) 196 return (ENXIO); 197 198 /* 199 * Get a read lock on the guest memory map. 200 */ 201 vm_slock_memsegs(sc->vm); 202 203 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 204 maxaddr = vmm_sysmem_maxaddr(sc->vm); 205 while (uio->uio_resid > 0 && error == 0) { 206 gpa = uio->uio_offset; 207 off = gpa & PAGE_MASK; 208 c = min(uio->uio_resid, PAGE_SIZE - off); 209 210 /* 211 * The VM has a hole in its physical memory map. If we want to 212 * use 'dd' to inspect memory beyond the hole we need to 213 * provide bogus data for memory that lies in the hole. 214 * 215 * Since this device does not support lseek(2), dd(1) will 216 * read(2) blocks of data to simulate the lseek(2). 217 */ 218 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 219 if (hpa == NULL) { 220 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 221 error = uiomove(__DECONST(void *, zero_region), 222 c, uio); 223 else 224 error = EFAULT; 225 } else { 226 error = uiomove(hpa, c, uio); 227 vm_gpa_release(cookie); 228 } 229 } 230 vm_unlock_memsegs(sc->vm); 231 return (error); 232 } 233 234 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 235 236 static int 237 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 238 { 239 struct devmem_softc *dsc; 240 int error; 241 bool sysmem; 242 243 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 244 if (error || mseg->len == 0) 245 return (error); 246 247 if (!sysmem) { 248 SLIST_FOREACH(dsc, &sc->devmem, link) { 249 if (dsc->segid == mseg->segid) 250 break; 251 } 252 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 253 __func__, mseg->segid)); 254 error = copystr(dsc->name, mseg->name, len, NULL); 255 } else { 256 bzero(mseg->name, len); 257 } 258 259 return (error); 260 } 261 262 static int 263 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 264 { 265 char *name; 266 int error; 267 bool sysmem; 268 269 error = 0; 270 name = NULL; 271 sysmem = true; 272 273 /* 274 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 275 * by stripped off when devfs processes the full string. 276 */ 277 if (VM_MEMSEG_NAME(mseg)) { 278 sysmem = false; 279 name = malloc(len, M_VMMDEV, M_WAITOK); 280 error = copystr(mseg->name, name, len, NULL); 281 if (error) 282 goto done; 283 } 284 285 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 286 if (error) 287 goto done; 288 289 if (VM_MEMSEG_NAME(mseg)) { 290 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 291 if (error) 292 vm_free_memseg(sc->vm, mseg->segid); 293 else 294 name = NULL; /* freed when 'cdev' is destroyed */ 295 } 296 done: 297 free(name, M_VMMDEV); 298 return (error); 299 } 300 301 static int 302 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 303 uint64_t *regval) 304 { 305 int error, i; 306 307 error = 0; 308 for (i = 0; i < count; i++) { 309 error = vm_get_register(vcpu, regnum[i], ®val[i]); 310 if (error) 311 break; 312 } 313 return (error); 314 } 315 316 static int 317 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 318 uint64_t *regval) 319 { 320 int error, i; 321 322 error = 0; 323 for (i = 0; i < count; i++) { 324 error = vm_set_register(vcpu, regnum[i], regval[i]); 325 if (error) 326 break; 327 } 328 return (error); 329 } 330 331 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 332 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 333 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 334 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 335 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 336 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 337 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 338 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 339 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 340 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 341 342 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 343 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_FBSD12, 344 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 345 #endif 346 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 347 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 348 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 349 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 350 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 351 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 352 VMMDEV_IOCTL(VM_REINIT, 353 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 354 355 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 356 VMMDEV_IOCTL(VM_GET_MEMSEG_FBSD12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 357 #endif 358 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 359 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 360 361 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 362 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 363 364 VMMDEV_IOCTL(VM_SUSPEND, 0), 365 VMMDEV_IOCTL(VM_GET_CPUS, 0), 366 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 367 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 368 }; 369 370 static int 371 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 372 struct thread *td) 373 { 374 struct vmmdev_softc *sc; 375 struct vcpu *vcpu; 376 const struct vmmdev_ioctl *ioctl; 377 int error, vcpuid; 378 379 error = vmm_priv_check(td->td_ucred); 380 if (error) 381 return (error); 382 383 sc = vmmdev_lookup2(cdev); 384 if (sc == NULL) 385 return (ENXIO); 386 387 ioctl = NULL; 388 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 389 if (vmmdev_ioctls[i].cmd == cmd) { 390 ioctl = &vmmdev_ioctls[i]; 391 break; 392 } 393 } 394 if (ioctl == NULL) { 395 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 396 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 397 ioctl = &vmmdev_machdep_ioctls[i]; 398 break; 399 } 400 } 401 } 402 if (ioctl == NULL) 403 return (ENOTTY); 404 405 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 406 vm_xlock_memsegs(sc->vm); 407 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 408 vm_slock_memsegs(sc->vm); 409 410 vcpu = NULL; 411 vcpuid = -1; 412 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 413 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 414 vcpuid = *(int *)data; 415 if (vcpuid == -1) { 416 if ((ioctl->flags & 417 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 418 error = EINVAL; 419 goto lockfail; 420 } 421 } else { 422 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 423 if (vcpu == NULL) { 424 error = EINVAL; 425 goto lockfail; 426 } 427 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 428 error = vcpu_lock_one(vcpu); 429 if (error) 430 goto lockfail; 431 } 432 } 433 } 434 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 435 error = vcpu_lock_all(sc); 436 if (error) 437 goto lockfail; 438 } 439 440 switch (cmd) { 441 case VM_SUSPEND: { 442 struct vm_suspend *vmsuspend; 443 444 vmsuspend = (struct vm_suspend *)data; 445 error = vm_suspend(sc->vm, vmsuspend->how); 446 break; 447 } 448 case VM_REINIT: 449 error = vm_reinit(sc->vm); 450 break; 451 case VM_STAT_DESC: { 452 struct vm_stat_desc *statdesc; 453 454 statdesc = (struct vm_stat_desc *)data; 455 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 456 sizeof(statdesc->desc)); 457 break; 458 } 459 case VM_STATS: { 460 struct vm_stats *vmstats; 461 462 vmstats = (struct vm_stats *)data; 463 getmicrotime(&vmstats->tv); 464 error = vmm_stat_copy(vcpu, vmstats->index, 465 nitems(vmstats->statbuf), &vmstats->num_entries, 466 vmstats->statbuf); 467 break; 468 } 469 case VM_MMAP_GETNEXT: { 470 struct vm_memmap *mm; 471 472 mm = (struct vm_memmap *)data; 473 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 474 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 475 break; 476 } 477 case VM_MMAP_MEMSEG: { 478 struct vm_memmap *mm; 479 480 mm = (struct vm_memmap *)data; 481 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 482 mm->len, mm->prot, mm->flags); 483 break; 484 } 485 case VM_MUNMAP_MEMSEG: { 486 struct vm_munmap *mu; 487 488 mu = (struct vm_munmap *)data; 489 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 490 break; 491 } 492 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 493 case VM_ALLOC_MEMSEG_FBSD12: 494 error = alloc_memseg(sc, (struct vm_memseg *)data, 495 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 496 break; 497 case VM_GET_MEMSEG_FBSD12: 498 error = get_memseg(sc, (struct vm_memseg *)data, 499 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 500 break; 501 #endif 502 case VM_ALLOC_MEMSEG: 503 error = alloc_memseg(sc, (struct vm_memseg *)data, 504 sizeof(((struct vm_memseg *)0)->name)); 505 break; 506 case VM_GET_MEMSEG: 507 error = get_memseg(sc, (struct vm_memseg *)data, 508 sizeof(((struct vm_memseg *)0)->name)); 509 break; 510 case VM_GET_REGISTER: { 511 struct vm_register *vmreg; 512 513 vmreg = (struct vm_register *)data; 514 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 515 break; 516 } 517 case VM_SET_REGISTER: { 518 struct vm_register *vmreg; 519 520 vmreg = (struct vm_register *)data; 521 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 522 break; 523 } 524 case VM_GET_REGISTER_SET: { 525 struct vm_register_set *vmregset; 526 uint64_t *regvals; 527 int *regnums; 528 529 vmregset = (struct vm_register_set *)data; 530 if (vmregset->count > VM_REG_LAST) { 531 error = EINVAL; 532 break; 533 } 534 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 535 M_WAITOK); 536 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 537 M_WAITOK); 538 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 539 vmregset->count); 540 if (error == 0) 541 error = vm_get_register_set(vcpu, 542 vmregset->count, regnums, regvals); 543 if (error == 0) 544 error = copyout(regvals, vmregset->regvals, 545 sizeof(regvals[0]) * vmregset->count); 546 free(regvals, M_VMMDEV); 547 free(regnums, M_VMMDEV); 548 break; 549 } 550 case VM_SET_REGISTER_SET: { 551 struct vm_register_set *vmregset; 552 uint64_t *regvals; 553 int *regnums; 554 555 vmregset = (struct vm_register_set *)data; 556 if (vmregset->count > VM_REG_LAST) { 557 error = EINVAL; 558 break; 559 } 560 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 561 M_WAITOK); 562 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 563 M_WAITOK); 564 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 565 vmregset->count); 566 if (error == 0) 567 error = copyin(vmregset->regvals, regvals, 568 sizeof(regvals[0]) * vmregset->count); 569 if (error == 0) 570 error = vm_set_register_set(vcpu, 571 vmregset->count, regnums, regvals); 572 free(regvals, M_VMMDEV); 573 free(regnums, M_VMMDEV); 574 break; 575 } 576 case VM_GET_CAPABILITY: { 577 struct vm_capability *vmcap; 578 579 vmcap = (struct vm_capability *)data; 580 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 581 break; 582 } 583 case VM_SET_CAPABILITY: { 584 struct vm_capability *vmcap; 585 586 vmcap = (struct vm_capability *)data; 587 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 588 break; 589 } 590 case VM_ACTIVATE_CPU: 591 error = vm_activate_cpu(vcpu); 592 break; 593 case VM_GET_CPUS: { 594 struct vm_cpuset *vm_cpuset; 595 cpuset_t *cpuset; 596 int size; 597 598 error = 0; 599 vm_cpuset = (struct vm_cpuset *)data; 600 size = vm_cpuset->cpusetsize; 601 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 602 error = ERANGE; 603 break; 604 } 605 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 606 M_WAITOK | M_ZERO); 607 if (vm_cpuset->which == VM_ACTIVE_CPUS) 608 *cpuset = vm_active_cpus(sc->vm); 609 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 610 *cpuset = vm_suspended_cpus(sc->vm); 611 else if (vm_cpuset->which == VM_DEBUG_CPUS) 612 *cpuset = vm_debug_cpus(sc->vm); 613 else 614 error = EINVAL; 615 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 616 error = ERANGE; 617 if (error == 0) 618 error = copyout(cpuset, vm_cpuset->cpus, size); 619 free(cpuset, M_TEMP); 620 break; 621 } 622 case VM_SUSPEND_CPU: 623 error = vm_suspend_cpu(sc->vm, vcpu); 624 break; 625 case VM_RESUME_CPU: 626 error = vm_resume_cpu(sc->vm, vcpu); 627 break; 628 case VM_SET_TOPOLOGY: { 629 struct vm_cpu_topology *topology; 630 631 topology = (struct vm_cpu_topology *)data; 632 error = vm_set_topology(sc->vm, topology->sockets, 633 topology->cores, topology->threads, topology->maxcpus); 634 break; 635 } 636 case VM_GET_TOPOLOGY: { 637 struct vm_cpu_topology *topology; 638 639 topology = (struct vm_cpu_topology *)data; 640 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 641 &topology->threads, &topology->maxcpus); 642 error = 0; 643 break; 644 } 645 default: 646 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 647 td); 648 break; 649 } 650 651 if ((ioctl->flags & 652 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 653 vm_unlock_memsegs(sc->vm); 654 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 655 vcpu_unlock_all(sc); 656 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 657 vcpu_unlock_one(vcpu); 658 659 /* 660 * Make sure that no handler returns a kernel-internal 661 * error value to userspace. 662 */ 663 KASSERT(error == ERESTART || error >= 0, 664 ("vmmdev_ioctl: invalid error return %d", error)); 665 return (error); 666 667 lockfail: 668 if ((ioctl->flags & 669 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 670 vm_unlock_memsegs(sc->vm); 671 return (error); 672 } 673 674 static int 675 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 676 struct vm_object **objp, int nprot) 677 { 678 struct vmmdev_softc *sc; 679 vm_paddr_t gpa; 680 size_t len; 681 vm_ooffset_t segoff, first, last; 682 int error, found, segid; 683 bool sysmem; 684 685 error = vmm_priv_check(curthread->td_ucred); 686 if (error) 687 return (error); 688 689 first = *offset; 690 last = first + mapsize; 691 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 692 return (EINVAL); 693 694 sc = vmmdev_lookup2(cdev); 695 if (sc == NULL) { 696 /* virtual machine is in the process of being created */ 697 return (EINVAL); 698 } 699 700 /* 701 * Get a read lock on the guest memory map. 702 */ 703 vm_slock_memsegs(sc->vm); 704 705 gpa = 0; 706 found = 0; 707 while (!found) { 708 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 709 NULL, NULL); 710 if (error) 711 break; 712 713 if (first >= gpa && last <= gpa + len) 714 found = 1; 715 else 716 gpa += len; 717 } 718 719 if (found) { 720 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 721 KASSERT(error == 0 && *objp != NULL, 722 ("%s: invalid memory segment %d", __func__, segid)); 723 if (sysmem) { 724 vm_object_reference(*objp); 725 *offset = segoff + (first - gpa); 726 } else { 727 error = EINVAL; 728 } 729 } 730 vm_unlock_memsegs(sc->vm); 731 return (error); 732 } 733 734 static void 735 vmmdev_destroy(void *arg) 736 { 737 struct vmmdev_softc *sc = arg; 738 struct devmem_softc *dsc; 739 int error __diagused; 740 741 vm_disable_vcpu_creation(sc->vm); 742 error = vcpu_lock_all(sc); 743 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 744 vm_unlock_vcpus(sc->vm); 745 746 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 747 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 748 SLIST_REMOVE_HEAD(&sc->devmem, link); 749 free(dsc->name, M_VMMDEV); 750 free(dsc, M_VMMDEV); 751 } 752 753 if (sc->cdev != NULL) 754 destroy_dev(sc->cdev); 755 756 if (sc->vm != NULL) 757 vm_destroy(sc->vm); 758 759 if (sc->ucred != NULL) 760 crfree(sc->ucred); 761 762 if ((sc->flags & VSC_LINKED) != 0) { 763 mtx_lock(&vmmdev_mtx); 764 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 765 mtx_unlock(&vmmdev_mtx); 766 } 767 768 free(sc, M_VMMDEV); 769 } 770 771 static int 772 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 773 { 774 struct devmem_softc *dsc; 775 struct vmmdev_softc *sc; 776 struct cdev *cdev; 777 char *buf; 778 int error, buflen; 779 780 error = vmm_priv_check(req->td->td_ucred); 781 if (error) 782 return (error); 783 784 buflen = VM_MAX_NAMELEN + 1; 785 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 786 strlcpy(buf, "beavis", buflen); 787 error = sysctl_handle_string(oidp, buf, buflen, req); 788 if (error != 0 || req->newptr == NULL) 789 goto out; 790 791 mtx_lock(&vmmdev_mtx); 792 sc = vmmdev_lookup(buf); 793 if (sc == NULL || sc->cdev == NULL) { 794 mtx_unlock(&vmmdev_mtx); 795 error = EINVAL; 796 goto out; 797 } 798 799 /* 800 * Setting 'sc->cdev' to NULL is used to indicate that the VM 801 * is scheduled for destruction. 802 */ 803 cdev = sc->cdev; 804 sc->cdev = NULL; 805 mtx_unlock(&vmmdev_mtx); 806 807 /* 808 * Destroy all cdevs: 809 * 810 * - any new operations on the 'cdev' will return an error (ENXIO). 811 * 812 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 813 */ 814 SLIST_FOREACH(dsc, &sc->devmem, link) { 815 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 816 destroy_dev(dsc->cdev); 817 devmem_destroy(dsc); 818 } 819 destroy_dev(cdev); 820 vmmdev_destroy(sc); 821 error = 0; 822 823 out: 824 free(buf, M_VMMDEV); 825 return (error); 826 } 827 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 828 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 829 NULL, 0, sysctl_vmm_destroy, "A", 830 NULL); 831 832 static struct cdevsw vmmdevsw = { 833 .d_name = "vmmdev", 834 .d_version = D_VERSION, 835 .d_ioctl = vmmdev_ioctl, 836 .d_mmap_single = vmmdev_mmap_single, 837 .d_read = vmmdev_rw, 838 .d_write = vmmdev_rw, 839 }; 840 841 static int 842 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 843 { 844 struct vm *vm; 845 struct cdev *cdev; 846 struct vmmdev_softc *sc, *sc2; 847 char *buf; 848 int error, buflen; 849 850 error = vmm_priv_check(req->td->td_ucred); 851 if (error) 852 return (error); 853 854 buflen = VM_MAX_NAMELEN + 1; 855 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 856 strlcpy(buf, "beavis", buflen); 857 error = sysctl_handle_string(oidp, buf, buflen, req); 858 if (error != 0 || req->newptr == NULL) 859 goto out; 860 861 mtx_lock(&vmmdev_mtx); 862 sc = vmmdev_lookup(buf); 863 mtx_unlock(&vmmdev_mtx); 864 if (sc != NULL) { 865 error = EEXIST; 866 goto out; 867 } 868 869 error = vm_create(buf, &vm); 870 if (error != 0) 871 goto out; 872 873 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 874 sc->ucred = crhold(curthread->td_ucred); 875 sc->vm = vm; 876 SLIST_INIT(&sc->devmem); 877 878 /* 879 * Lookup the name again just in case somebody sneaked in when we 880 * dropped the lock. 881 */ 882 mtx_lock(&vmmdev_mtx); 883 sc2 = vmmdev_lookup(buf); 884 if (sc2 == NULL) { 885 SLIST_INSERT_HEAD(&head, sc, link); 886 sc->flags |= VSC_LINKED; 887 } 888 mtx_unlock(&vmmdev_mtx); 889 890 if (sc2 != NULL) { 891 vmmdev_destroy(sc); 892 error = EEXIST; 893 goto out; 894 } 895 896 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 897 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 898 if (error != 0) { 899 vmmdev_destroy(sc); 900 goto out; 901 } 902 903 mtx_lock(&vmmdev_mtx); 904 sc->cdev = cdev; 905 sc->cdev->si_drv1 = sc; 906 mtx_unlock(&vmmdev_mtx); 907 908 out: 909 free(buf, M_VMMDEV); 910 return (error); 911 } 912 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 913 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 914 NULL, 0, sysctl_vmm_create, "A", 915 NULL); 916 917 void 918 vmmdev_init(void) 919 { 920 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 921 "Allow use of vmm in a jail."); 922 } 923 924 int 925 vmmdev_cleanup(void) 926 { 927 int error; 928 929 if (SLIST_EMPTY(&head)) 930 error = 0; 931 else 932 error = EBUSY; 933 934 return (error); 935 } 936 937 static int 938 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 939 struct vm_object **objp, int nprot) 940 { 941 struct devmem_softc *dsc; 942 vm_ooffset_t first, last; 943 size_t seglen; 944 int error; 945 bool sysmem; 946 947 dsc = cdev->si_drv1; 948 if (dsc == NULL) { 949 /* 'cdev' has been created but is not ready for use */ 950 return (ENXIO); 951 } 952 953 first = *offset; 954 last = *offset + len; 955 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 956 return (EINVAL); 957 958 vm_slock_memsegs(dsc->sc->vm); 959 960 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 961 KASSERT(error == 0 && !sysmem && *objp != NULL, 962 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 963 964 if (seglen >= last) 965 vm_object_reference(*objp); 966 else 967 error = EINVAL; 968 969 vm_unlock_memsegs(dsc->sc->vm); 970 return (error); 971 } 972 973 static struct cdevsw devmemsw = { 974 .d_name = "devmem", 975 .d_version = D_VERSION, 976 .d_mmap_single = devmem_mmap_single, 977 }; 978 979 static int 980 devmem_create_cdev(const char *vmname, int segid, char *devname) 981 { 982 struct devmem_softc *dsc; 983 struct vmmdev_softc *sc; 984 struct cdev *cdev; 985 int error; 986 987 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 988 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 989 if (error) 990 return (error); 991 992 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 993 994 mtx_lock(&vmmdev_mtx); 995 sc = vmmdev_lookup(vmname); 996 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 997 if (sc->cdev == NULL) { 998 /* virtual machine is being created or destroyed */ 999 mtx_unlock(&vmmdev_mtx); 1000 free(dsc, M_VMMDEV); 1001 destroy_dev_sched_cb(cdev, NULL, 0); 1002 return (ENODEV); 1003 } 1004 1005 dsc->segid = segid; 1006 dsc->name = devname; 1007 dsc->cdev = cdev; 1008 dsc->sc = sc; 1009 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1010 mtx_unlock(&vmmdev_mtx); 1011 1012 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1013 cdev->si_drv1 = dsc; 1014 return (0); 1015 } 1016 1017 static void 1018 devmem_destroy(void *arg) 1019 { 1020 struct devmem_softc *dsc = arg; 1021 1022 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1023 dsc->cdev = NULL; 1024 dsc->sc = NULL; 1025 } 1026