1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/ioccom.h> 12 #include <sys/jail.h> 13 #include <sys/kernel.h> 14 #include <sys/malloc.h> 15 #include <sys/mman.h> 16 #include <sys/mutex.h> 17 #include <sys/proc.h> 18 #include <sys/queue.h> 19 #include <sys/sysctl.h> 20 #include <sys/ucred.h> 21 #include <sys/uio.h> 22 23 #include <machine/vmm.h> 24 25 #include <vm/vm.h> 26 #include <vm/vm_object.h> 27 28 #include <dev/vmm/vmm_dev.h> 29 #include <dev/vmm/vmm_stat.h> 30 31 static int devmem_create_cdev(const char *vmname, int id, char *devmem); 32 33 struct devmem_softc { 34 int segid; 35 char *name; 36 struct cdev *cdev; 37 struct vmmdev_softc *sc; 38 SLIST_ENTRY(devmem_softc) link; 39 }; 40 41 struct vmmdev_softc { 42 struct vm *vm; /* vm instance cookie */ 43 struct cdev *cdev; 44 struct ucred *ucred; 45 SLIST_ENTRY(vmmdev_softc) link; 46 SLIST_HEAD(, devmem_softc) devmem; 47 int flags; 48 }; 49 #define VSC_LINKED 0x01 50 51 static SLIST_HEAD(, vmmdev_softc) head; 52 53 static unsigned pr_allow_flag; 54 static struct mtx vmmdev_mtx; 55 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 56 57 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 58 59 SYSCTL_DECL(_hw_vmm); 60 61 static void devmem_destroy(void *arg); 62 63 static int 64 vmm_priv_check(struct ucred *ucred) 65 { 66 if (jailed(ucred) && 67 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 68 return (EPERM); 69 70 return (0); 71 } 72 73 static int 74 vcpu_lock_one(struct vcpu *vcpu) 75 { 76 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 77 } 78 79 static void 80 vcpu_unlock_one(struct vcpu *vcpu) 81 { 82 enum vcpu_state state; 83 84 state = vcpu_get_state(vcpu, NULL); 85 if (state != VCPU_FROZEN) { 86 panic("vcpu %s(%d) has invalid state %d", 87 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 88 } 89 90 vcpu_set_state(vcpu, VCPU_IDLE, false); 91 } 92 93 static int 94 vcpu_lock_all(struct vmmdev_softc *sc) 95 { 96 struct vcpu *vcpu; 97 int error; 98 uint16_t i, j, maxcpus; 99 100 error = 0; 101 vm_slock_vcpus(sc->vm); 102 maxcpus = vm_get_maxcpus(sc->vm); 103 for (i = 0; i < maxcpus; i++) { 104 vcpu = vm_vcpu(sc->vm, i); 105 if (vcpu == NULL) 106 continue; 107 error = vcpu_lock_one(vcpu); 108 if (error) 109 break; 110 } 111 112 if (error) { 113 for (j = 0; j < i; j++) { 114 vcpu = vm_vcpu(sc->vm, j); 115 if (vcpu == NULL) 116 continue; 117 vcpu_unlock_one(vcpu); 118 } 119 vm_unlock_vcpus(sc->vm); 120 } 121 122 return (error); 123 } 124 125 static void 126 vcpu_unlock_all(struct vmmdev_softc *sc) 127 { 128 struct vcpu *vcpu; 129 uint16_t i, maxcpus; 130 131 maxcpus = vm_get_maxcpus(sc->vm); 132 for (i = 0; i < maxcpus; i++) { 133 vcpu = vm_vcpu(sc->vm, i); 134 if (vcpu == NULL) 135 continue; 136 vcpu_unlock_one(vcpu); 137 } 138 vm_unlock_vcpus(sc->vm); 139 } 140 141 static struct vmmdev_softc * 142 vmmdev_lookup(const char *name) 143 { 144 struct vmmdev_softc *sc; 145 146 mtx_assert(&vmmdev_mtx, MA_OWNED); 147 148 SLIST_FOREACH(sc, &head, link) { 149 if (strcmp(name, vm_name(sc->vm)) == 0) 150 break; 151 } 152 153 if (sc == NULL) 154 return (NULL); 155 156 if (cr_cansee(curthread->td_ucred, sc->ucred)) 157 return (NULL); 158 159 return (sc); 160 } 161 162 static struct vmmdev_softc * 163 vmmdev_lookup2(struct cdev *cdev) 164 { 165 return (cdev->si_drv1); 166 } 167 168 static int 169 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 170 { 171 int error, off, c, prot; 172 vm_paddr_t gpa, maxaddr; 173 void *hpa, *cookie; 174 struct vmmdev_softc *sc; 175 176 error = vmm_priv_check(curthread->td_ucred); 177 if (error) 178 return (error); 179 180 sc = vmmdev_lookup2(cdev); 181 if (sc == NULL) 182 return (ENXIO); 183 184 /* 185 * Get a read lock on the guest memory map. 186 */ 187 vm_slock_memsegs(sc->vm); 188 189 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 190 maxaddr = vmm_sysmem_maxaddr(sc->vm); 191 while (uio->uio_resid > 0 && error == 0) { 192 gpa = uio->uio_offset; 193 off = gpa & PAGE_MASK; 194 c = min(uio->uio_resid, PAGE_SIZE - off); 195 196 /* 197 * The VM has a hole in its physical memory map. If we want to 198 * use 'dd' to inspect memory beyond the hole we need to 199 * provide bogus data for memory that lies in the hole. 200 * 201 * Since this device does not support lseek(2), dd(1) will 202 * read(2) blocks of data to simulate the lseek(2). 203 */ 204 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 205 if (hpa == NULL) { 206 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 207 error = uiomove(__DECONST(void *, zero_region), 208 c, uio); 209 else 210 error = EFAULT; 211 } else { 212 error = uiomove(hpa, c, uio); 213 vm_gpa_release(cookie); 214 } 215 } 216 vm_unlock_memsegs(sc->vm); 217 return (error); 218 } 219 220 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 221 222 static int 223 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 224 { 225 struct devmem_softc *dsc; 226 int error; 227 bool sysmem; 228 229 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 230 if (error || mseg->len == 0) 231 return (error); 232 233 if (!sysmem) { 234 SLIST_FOREACH(dsc, &sc->devmem, link) { 235 if (dsc->segid == mseg->segid) 236 break; 237 } 238 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 239 __func__, mseg->segid)); 240 error = copystr(dsc->name, mseg->name, len, NULL); 241 } else { 242 bzero(mseg->name, len); 243 } 244 245 return (error); 246 } 247 248 static int 249 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 250 { 251 char *name; 252 int error; 253 bool sysmem; 254 255 error = 0; 256 name = NULL; 257 sysmem = true; 258 259 /* 260 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 261 * by stripped off when devfs processes the full string. 262 */ 263 if (VM_MEMSEG_NAME(mseg)) { 264 sysmem = false; 265 name = malloc(len, M_VMMDEV, M_WAITOK); 266 error = copystr(mseg->name, name, len, NULL); 267 if (error) 268 goto done; 269 } 270 271 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 272 if (error) 273 goto done; 274 275 if (VM_MEMSEG_NAME(mseg)) { 276 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 277 if (error) 278 vm_free_memseg(sc->vm, mseg->segid); 279 else 280 name = NULL; /* freed when 'cdev' is destroyed */ 281 } 282 done: 283 free(name, M_VMMDEV); 284 return (error); 285 } 286 287 static int 288 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 289 uint64_t *regval) 290 { 291 int error, i; 292 293 error = 0; 294 for (i = 0; i < count; i++) { 295 error = vm_get_register(vcpu, regnum[i], ®val[i]); 296 if (error) 297 break; 298 } 299 return (error); 300 } 301 302 static int 303 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 304 uint64_t *regval) 305 { 306 int error, i; 307 308 error = 0; 309 for (i = 0; i < count; i++) { 310 error = vm_set_register(vcpu, regnum[i], regval[i]); 311 if (error) 312 break; 313 } 314 return (error); 315 } 316 317 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 318 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 319 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 320 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 321 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 322 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 323 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 324 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 325 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 326 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 327 328 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 329 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_FBSD12, 330 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 331 #endif 332 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 333 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 334 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 335 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 336 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 337 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 338 VMMDEV_IOCTL(VM_REINIT, 339 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 340 341 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 342 VMMDEV_IOCTL(VM_GET_MEMSEG_FBSD12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 343 #endif 344 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 345 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 346 347 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 348 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 349 350 VMMDEV_IOCTL(VM_SUSPEND, 0), 351 VMMDEV_IOCTL(VM_GET_CPUS, 0), 352 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 353 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 354 }; 355 356 static int 357 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 358 struct thread *td) 359 { 360 struct vmmdev_softc *sc; 361 struct vcpu *vcpu; 362 const struct vmmdev_ioctl *ioctl; 363 int error, vcpuid; 364 365 error = vmm_priv_check(td->td_ucred); 366 if (error) 367 return (error); 368 369 sc = vmmdev_lookup2(cdev); 370 if (sc == NULL) 371 return (ENXIO); 372 373 ioctl = NULL; 374 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 375 if (vmmdev_ioctls[i].cmd == cmd) { 376 ioctl = &vmmdev_ioctls[i]; 377 break; 378 } 379 } 380 if (ioctl == NULL) { 381 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 382 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 383 ioctl = &vmmdev_machdep_ioctls[i]; 384 break; 385 } 386 } 387 } 388 if (ioctl == NULL) 389 return (ENOTTY); 390 391 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 392 vm_xlock_memsegs(sc->vm); 393 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 394 vm_slock_memsegs(sc->vm); 395 396 vcpu = NULL; 397 vcpuid = -1; 398 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 399 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 400 vcpuid = *(int *)data; 401 if (vcpuid == -1) { 402 if ((ioctl->flags & 403 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 404 error = EINVAL; 405 goto lockfail; 406 } 407 } else { 408 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 409 if (vcpu == NULL) { 410 error = EINVAL; 411 goto lockfail; 412 } 413 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 414 error = vcpu_lock_one(vcpu); 415 if (error) 416 goto lockfail; 417 } 418 } 419 } 420 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 421 error = vcpu_lock_all(sc); 422 if (error) 423 goto lockfail; 424 } 425 426 switch (cmd) { 427 case VM_SUSPEND: { 428 struct vm_suspend *vmsuspend; 429 430 vmsuspend = (struct vm_suspend *)data; 431 error = vm_suspend(sc->vm, vmsuspend->how); 432 break; 433 } 434 case VM_REINIT: 435 error = vm_reinit(sc->vm); 436 break; 437 case VM_STAT_DESC: { 438 struct vm_stat_desc *statdesc; 439 440 statdesc = (struct vm_stat_desc *)data; 441 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 442 sizeof(statdesc->desc)); 443 break; 444 } 445 case VM_STATS: { 446 struct vm_stats *vmstats; 447 448 vmstats = (struct vm_stats *)data; 449 getmicrotime(&vmstats->tv); 450 error = vmm_stat_copy(vcpu, vmstats->index, 451 nitems(vmstats->statbuf), &vmstats->num_entries, 452 vmstats->statbuf); 453 break; 454 } 455 case VM_MMAP_GETNEXT: { 456 struct vm_memmap *mm; 457 458 mm = (struct vm_memmap *)data; 459 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 460 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 461 break; 462 } 463 case VM_MMAP_MEMSEG: { 464 struct vm_memmap *mm; 465 466 mm = (struct vm_memmap *)data; 467 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 468 mm->len, mm->prot, mm->flags); 469 break; 470 } 471 case VM_MUNMAP_MEMSEG: { 472 struct vm_munmap *mu; 473 474 mu = (struct vm_munmap *)data; 475 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 476 break; 477 } 478 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 479 case VM_ALLOC_MEMSEG_FBSD12: 480 error = alloc_memseg(sc, (struct vm_memseg *)data, 481 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 482 break; 483 case VM_GET_MEMSEG_FBSD12: 484 error = get_memseg(sc, (struct vm_memseg *)data, 485 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 486 break; 487 #endif 488 case VM_ALLOC_MEMSEG: 489 error = alloc_memseg(sc, (struct vm_memseg *)data, 490 sizeof(((struct vm_memseg *)0)->name)); 491 break; 492 case VM_GET_MEMSEG: 493 error = get_memseg(sc, (struct vm_memseg *)data, 494 sizeof(((struct vm_memseg *)0)->name)); 495 break; 496 case VM_GET_REGISTER: { 497 struct vm_register *vmreg; 498 499 vmreg = (struct vm_register *)data; 500 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 501 break; 502 } 503 case VM_SET_REGISTER: { 504 struct vm_register *vmreg; 505 506 vmreg = (struct vm_register *)data; 507 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 508 break; 509 } 510 case VM_GET_REGISTER_SET: { 511 struct vm_register_set *vmregset; 512 uint64_t *regvals; 513 int *regnums; 514 515 vmregset = (struct vm_register_set *)data; 516 if (vmregset->count > VM_REG_LAST) { 517 error = EINVAL; 518 break; 519 } 520 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 521 M_WAITOK); 522 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 523 M_WAITOK); 524 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 525 vmregset->count); 526 if (error == 0) 527 error = vm_get_register_set(vcpu, 528 vmregset->count, regnums, regvals); 529 if (error == 0) 530 error = copyout(regvals, vmregset->regvals, 531 sizeof(regvals[0]) * vmregset->count); 532 free(regvals, M_VMMDEV); 533 free(regnums, M_VMMDEV); 534 break; 535 } 536 case VM_SET_REGISTER_SET: { 537 struct vm_register_set *vmregset; 538 uint64_t *regvals; 539 int *regnums; 540 541 vmregset = (struct vm_register_set *)data; 542 if (vmregset->count > VM_REG_LAST) { 543 error = EINVAL; 544 break; 545 } 546 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 547 M_WAITOK); 548 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 549 M_WAITOK); 550 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 551 vmregset->count); 552 if (error == 0) 553 error = copyin(vmregset->regvals, regvals, 554 sizeof(regvals[0]) * vmregset->count); 555 if (error == 0) 556 error = vm_set_register_set(vcpu, 557 vmregset->count, regnums, regvals); 558 free(regvals, M_VMMDEV); 559 free(regnums, M_VMMDEV); 560 break; 561 } 562 case VM_GET_CAPABILITY: { 563 struct vm_capability *vmcap; 564 565 vmcap = (struct vm_capability *)data; 566 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 567 break; 568 } 569 case VM_SET_CAPABILITY: { 570 struct vm_capability *vmcap; 571 572 vmcap = (struct vm_capability *)data; 573 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 574 break; 575 } 576 case VM_ACTIVATE_CPU: 577 error = vm_activate_cpu(vcpu); 578 break; 579 case VM_GET_CPUS: { 580 struct vm_cpuset *vm_cpuset; 581 cpuset_t *cpuset; 582 int size; 583 584 error = 0; 585 vm_cpuset = (struct vm_cpuset *)data; 586 size = vm_cpuset->cpusetsize; 587 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 588 error = ERANGE; 589 break; 590 } 591 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 592 M_WAITOK | M_ZERO); 593 if (vm_cpuset->which == VM_ACTIVE_CPUS) 594 *cpuset = vm_active_cpus(sc->vm); 595 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 596 *cpuset = vm_suspended_cpus(sc->vm); 597 else if (vm_cpuset->which == VM_DEBUG_CPUS) 598 *cpuset = vm_debug_cpus(sc->vm); 599 else 600 error = EINVAL; 601 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 602 error = ERANGE; 603 if (error == 0) 604 error = copyout(cpuset, vm_cpuset->cpus, size); 605 free(cpuset, M_TEMP); 606 break; 607 } 608 case VM_SUSPEND_CPU: 609 error = vm_suspend_cpu(sc->vm, vcpu); 610 break; 611 case VM_RESUME_CPU: 612 error = vm_resume_cpu(sc->vm, vcpu); 613 break; 614 case VM_SET_TOPOLOGY: { 615 struct vm_cpu_topology *topology; 616 617 topology = (struct vm_cpu_topology *)data; 618 error = vm_set_topology(sc->vm, topology->sockets, 619 topology->cores, topology->threads, topology->maxcpus); 620 break; 621 } 622 case VM_GET_TOPOLOGY: { 623 struct vm_cpu_topology *topology; 624 625 topology = (struct vm_cpu_topology *)data; 626 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 627 &topology->threads, &topology->maxcpus); 628 error = 0; 629 break; 630 } 631 default: 632 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 633 td); 634 break; 635 } 636 637 if ((ioctl->flags & 638 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 639 vm_unlock_memsegs(sc->vm); 640 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 641 vcpu_unlock_all(sc); 642 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 643 vcpu_unlock_one(vcpu); 644 645 /* 646 * Make sure that no handler returns a kernel-internal 647 * error value to userspace. 648 */ 649 KASSERT(error == ERESTART || error >= 0, 650 ("vmmdev_ioctl: invalid error return %d", error)); 651 return (error); 652 653 lockfail: 654 if ((ioctl->flags & 655 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 656 vm_unlock_memsegs(sc->vm); 657 return (error); 658 } 659 660 static int 661 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 662 struct vm_object **objp, int nprot) 663 { 664 struct vmmdev_softc *sc; 665 vm_paddr_t gpa; 666 size_t len; 667 vm_ooffset_t segoff, first, last; 668 int error, found, segid; 669 bool sysmem; 670 671 error = vmm_priv_check(curthread->td_ucred); 672 if (error) 673 return (error); 674 675 first = *offset; 676 last = first + mapsize; 677 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 678 return (EINVAL); 679 680 sc = vmmdev_lookup2(cdev); 681 if (sc == NULL) { 682 /* virtual machine is in the process of being created */ 683 return (EINVAL); 684 } 685 686 /* 687 * Get a read lock on the guest memory map. 688 */ 689 vm_slock_memsegs(sc->vm); 690 691 gpa = 0; 692 found = 0; 693 while (!found) { 694 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 695 NULL, NULL); 696 if (error) 697 break; 698 699 if (first >= gpa && last <= gpa + len) 700 found = 1; 701 else 702 gpa += len; 703 } 704 705 if (found) { 706 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 707 KASSERT(error == 0 && *objp != NULL, 708 ("%s: invalid memory segment %d", __func__, segid)); 709 if (sysmem) { 710 vm_object_reference(*objp); 711 *offset = segoff + (first - gpa); 712 } else { 713 error = EINVAL; 714 } 715 } 716 vm_unlock_memsegs(sc->vm); 717 return (error); 718 } 719 720 static void 721 vmmdev_destroy(void *arg) 722 { 723 struct vmmdev_softc *sc = arg; 724 struct devmem_softc *dsc; 725 int error __diagused; 726 727 vm_disable_vcpu_creation(sc->vm); 728 error = vcpu_lock_all(sc); 729 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 730 vm_unlock_vcpus(sc->vm); 731 732 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 733 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 734 SLIST_REMOVE_HEAD(&sc->devmem, link); 735 free(dsc->name, M_VMMDEV); 736 free(dsc, M_VMMDEV); 737 } 738 739 if (sc->cdev != NULL) 740 destroy_dev(sc->cdev); 741 742 if (sc->vm != NULL) 743 vm_destroy(sc->vm); 744 745 if (sc->ucred != NULL) 746 crfree(sc->ucred); 747 748 if ((sc->flags & VSC_LINKED) != 0) { 749 mtx_lock(&vmmdev_mtx); 750 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 751 mtx_unlock(&vmmdev_mtx); 752 } 753 754 free(sc, M_VMMDEV); 755 } 756 757 static int 758 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 759 { 760 struct devmem_softc *dsc; 761 struct vmmdev_softc *sc; 762 struct cdev *cdev; 763 char *buf; 764 int error, buflen; 765 766 error = vmm_priv_check(req->td->td_ucred); 767 if (error) 768 return (error); 769 770 buflen = VM_MAX_NAMELEN + 1; 771 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 772 strlcpy(buf, "beavis", buflen); 773 error = sysctl_handle_string(oidp, buf, buflen, req); 774 if (error != 0 || req->newptr == NULL) 775 goto out; 776 777 mtx_lock(&vmmdev_mtx); 778 sc = vmmdev_lookup(buf); 779 if (sc == NULL || sc->cdev == NULL) { 780 mtx_unlock(&vmmdev_mtx); 781 error = EINVAL; 782 goto out; 783 } 784 785 /* 786 * Setting 'sc->cdev' to NULL is used to indicate that the VM 787 * is scheduled for destruction. 788 */ 789 cdev = sc->cdev; 790 sc->cdev = NULL; 791 mtx_unlock(&vmmdev_mtx); 792 793 /* 794 * Destroy all cdevs: 795 * 796 * - any new operations on the 'cdev' will return an error (ENXIO). 797 * 798 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 799 */ 800 SLIST_FOREACH(dsc, &sc->devmem, link) { 801 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 802 destroy_dev(dsc->cdev); 803 devmem_destroy(dsc); 804 } 805 destroy_dev(cdev); 806 vmmdev_destroy(sc); 807 error = 0; 808 809 out: 810 free(buf, M_VMMDEV); 811 return (error); 812 } 813 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 814 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 815 NULL, 0, sysctl_vmm_destroy, "A", 816 NULL); 817 818 static struct cdevsw vmmdevsw = { 819 .d_name = "vmmdev", 820 .d_version = D_VERSION, 821 .d_ioctl = vmmdev_ioctl, 822 .d_mmap_single = vmmdev_mmap_single, 823 .d_read = vmmdev_rw, 824 .d_write = vmmdev_rw, 825 }; 826 827 static int 828 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 829 { 830 struct vm *vm; 831 struct cdev *cdev; 832 struct vmmdev_softc *sc, *sc2; 833 char *buf; 834 int error, buflen; 835 836 error = vmm_priv_check(req->td->td_ucred); 837 if (error) 838 return (error); 839 840 buflen = VM_MAX_NAMELEN + 1; 841 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 842 strlcpy(buf, "beavis", buflen); 843 error = sysctl_handle_string(oidp, buf, buflen, req); 844 if (error != 0 || req->newptr == NULL) 845 goto out; 846 847 mtx_lock(&vmmdev_mtx); 848 sc = vmmdev_lookup(buf); 849 mtx_unlock(&vmmdev_mtx); 850 if (sc != NULL) { 851 error = EEXIST; 852 goto out; 853 } 854 855 error = vm_create(buf, &vm); 856 if (error != 0) 857 goto out; 858 859 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 860 sc->ucred = crhold(curthread->td_ucred); 861 sc->vm = vm; 862 SLIST_INIT(&sc->devmem); 863 864 /* 865 * Lookup the name again just in case somebody sneaked in when we 866 * dropped the lock. 867 */ 868 mtx_lock(&vmmdev_mtx); 869 sc2 = vmmdev_lookup(buf); 870 if (sc2 == NULL) { 871 SLIST_INSERT_HEAD(&head, sc, link); 872 sc->flags |= VSC_LINKED; 873 } 874 mtx_unlock(&vmmdev_mtx); 875 876 if (sc2 != NULL) { 877 vmmdev_destroy(sc); 878 error = EEXIST; 879 goto out; 880 } 881 882 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 883 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 884 if (error != 0) { 885 vmmdev_destroy(sc); 886 goto out; 887 } 888 889 mtx_lock(&vmmdev_mtx); 890 sc->cdev = cdev; 891 sc->cdev->si_drv1 = sc; 892 mtx_unlock(&vmmdev_mtx); 893 894 out: 895 free(buf, M_VMMDEV); 896 return (error); 897 } 898 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 899 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 900 NULL, 0, sysctl_vmm_create, "A", 901 NULL); 902 903 void 904 vmmdev_init(void) 905 { 906 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 907 "Allow use of vmm in a jail."); 908 } 909 910 int 911 vmmdev_cleanup(void) 912 { 913 int error; 914 915 if (SLIST_EMPTY(&head)) 916 error = 0; 917 else 918 error = EBUSY; 919 920 return (error); 921 } 922 923 static int 924 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 925 struct vm_object **objp, int nprot) 926 { 927 struct devmem_softc *dsc; 928 vm_ooffset_t first, last; 929 size_t seglen; 930 int error; 931 bool sysmem; 932 933 dsc = cdev->si_drv1; 934 if (dsc == NULL) { 935 /* 'cdev' has been created but is not ready for use */ 936 return (ENXIO); 937 } 938 939 first = *offset; 940 last = *offset + len; 941 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 942 return (EINVAL); 943 944 vm_slock_memsegs(dsc->sc->vm); 945 946 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 947 KASSERT(error == 0 && !sysmem && *objp != NULL, 948 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 949 950 if (seglen >= last) 951 vm_object_reference(*objp); 952 else 953 error = EINVAL; 954 955 vm_unlock_memsegs(dsc->sc->vm); 956 return (error); 957 } 958 959 static struct cdevsw devmemsw = { 960 .d_name = "devmem", 961 .d_version = D_VERSION, 962 .d_mmap_single = devmem_mmap_single, 963 }; 964 965 static int 966 devmem_create_cdev(const char *vmname, int segid, char *devname) 967 { 968 struct devmem_softc *dsc; 969 struct vmmdev_softc *sc; 970 struct cdev *cdev; 971 int error; 972 973 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 974 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 975 if (error) 976 return (error); 977 978 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 979 980 mtx_lock(&vmmdev_mtx); 981 sc = vmmdev_lookup(vmname); 982 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 983 if (sc->cdev == NULL) { 984 /* virtual machine is being created or destroyed */ 985 mtx_unlock(&vmmdev_mtx); 986 free(dsc, M_VMMDEV); 987 destroy_dev_sched_cb(cdev, NULL, 0); 988 return (ENODEV); 989 } 990 991 dsc->segid = segid; 992 dsc->name = devname; 993 dsc->cdev = cdev; 994 dsc->sc = sc; 995 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 996 mtx_unlock(&vmmdev_mtx); 997 998 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 999 cdev->si_drv1 = dsc; 1000 return (0); 1001 } 1002 1003 static void 1004 devmem_destroy(void *arg) 1005 { 1006 struct devmem_softc *dsc = arg; 1007 1008 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1009 dsc->cdev = NULL; 1010 dsc->sc = NULL; 1011 } 1012