1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/fcntl.h> 12 #include <sys/ioccom.h> 13 #include <sys/jail.h> 14 #include <sys/kernel.h> 15 #include <sys/malloc.h> 16 #include <sys/mman.h> 17 #include <sys/proc.h> 18 #include <sys/queue.h> 19 #include <sys/sx.h> 20 #include <sys/sysctl.h> 21 #include <sys/ucred.h> 22 #include <sys/uio.h> 23 24 #include <machine/vmm.h> 25 26 #include <vm/vm.h> 27 #include <vm/vm_object.h> 28 29 #include <dev/vmm/vmm_dev.h> 30 #include <dev/vmm/vmm_stat.h> 31 32 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 33 struct vm_memseg_12 { 34 int segid; 35 size_t len; 36 char name[64]; 37 }; 38 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 39 40 #define VM_ALLOC_MEMSEG_12 \ 41 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 42 #define VM_GET_MEMSEG_12 \ 43 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 44 #endif 45 46 struct devmem_softc { 47 int segid; 48 char *name; 49 struct cdev *cdev; 50 struct vmmdev_softc *sc; 51 SLIST_ENTRY(devmem_softc) link; 52 }; 53 54 struct vmmdev_softc { 55 struct vm *vm; /* vm instance cookie */ 56 struct cdev *cdev; 57 struct ucred *ucred; 58 SLIST_ENTRY(vmmdev_softc) link; 59 SLIST_HEAD(, devmem_softc) devmem; 60 int flags; 61 }; 62 63 static SLIST_HEAD(, vmmdev_softc) head; 64 65 static unsigned pr_allow_flag; 66 static struct sx vmmdev_mtx; 67 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 68 69 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 70 71 SYSCTL_DECL(_hw_vmm); 72 73 static void devmem_destroy(void *arg); 74 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 75 76 static int 77 vmm_priv_check(struct ucred *ucred) 78 { 79 if (jailed(ucred) && 80 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 81 return (EPERM); 82 83 return (0); 84 } 85 86 static int 87 vcpu_lock_one(struct vcpu *vcpu) 88 { 89 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 90 } 91 92 static void 93 vcpu_unlock_one(struct vcpu *vcpu) 94 { 95 enum vcpu_state state; 96 97 state = vcpu_get_state(vcpu, NULL); 98 if (state != VCPU_FROZEN) { 99 panic("vcpu %s(%d) has invalid state %d", 100 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 101 } 102 103 vcpu_set_state(vcpu, VCPU_IDLE, false); 104 } 105 106 static int 107 vcpu_lock_all(struct vmmdev_softc *sc) 108 { 109 struct vcpu *vcpu; 110 int error; 111 uint16_t i, j, maxcpus; 112 113 error = 0; 114 vm_slock_vcpus(sc->vm); 115 maxcpus = vm_get_maxcpus(sc->vm); 116 for (i = 0; i < maxcpus; i++) { 117 vcpu = vm_vcpu(sc->vm, i); 118 if (vcpu == NULL) 119 continue; 120 error = vcpu_lock_one(vcpu); 121 if (error) 122 break; 123 } 124 125 if (error) { 126 for (j = 0; j < i; j++) { 127 vcpu = vm_vcpu(sc->vm, j); 128 if (vcpu == NULL) 129 continue; 130 vcpu_unlock_one(vcpu); 131 } 132 vm_unlock_vcpus(sc->vm); 133 } 134 135 return (error); 136 } 137 138 static void 139 vcpu_unlock_all(struct vmmdev_softc *sc) 140 { 141 struct vcpu *vcpu; 142 uint16_t i, maxcpus; 143 144 maxcpus = vm_get_maxcpus(sc->vm); 145 for (i = 0; i < maxcpus; i++) { 146 vcpu = vm_vcpu(sc->vm, i); 147 if (vcpu == NULL) 148 continue; 149 vcpu_unlock_one(vcpu); 150 } 151 vm_unlock_vcpus(sc->vm); 152 } 153 154 static struct vmmdev_softc * 155 vmmdev_lookup(const char *name, struct ucred *cred) 156 { 157 struct vmmdev_softc *sc; 158 159 sx_assert(&vmmdev_mtx, SA_XLOCKED); 160 161 SLIST_FOREACH(sc, &head, link) { 162 if (strcmp(name, vm_name(sc->vm)) == 0) 163 break; 164 } 165 166 if (sc == NULL) 167 return (NULL); 168 169 if (cr_cansee(cred, sc->ucred)) 170 return (NULL); 171 172 return (sc); 173 } 174 175 static struct vmmdev_softc * 176 vmmdev_lookup2(struct cdev *cdev) 177 { 178 return (cdev->si_drv1); 179 } 180 181 static int 182 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 183 { 184 int error, off, c, prot; 185 vm_paddr_t gpa, maxaddr; 186 void *hpa, *cookie; 187 struct vmmdev_softc *sc; 188 189 sc = vmmdev_lookup2(cdev); 190 if (sc == NULL) 191 return (ENXIO); 192 193 /* 194 * Get a read lock on the guest memory map. 195 */ 196 vm_slock_memsegs(sc->vm); 197 198 error = 0; 199 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 200 maxaddr = vmm_sysmem_maxaddr(sc->vm); 201 while (uio->uio_resid > 0 && error == 0) { 202 gpa = uio->uio_offset; 203 off = gpa & PAGE_MASK; 204 c = min(uio->uio_resid, PAGE_SIZE - off); 205 206 /* 207 * The VM has a hole in its physical memory map. If we want to 208 * use 'dd' to inspect memory beyond the hole we need to 209 * provide bogus data for memory that lies in the hole. 210 * 211 * Since this device does not support lseek(2), dd(1) will 212 * read(2) blocks of data to simulate the lseek(2). 213 */ 214 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 215 if (hpa == NULL) { 216 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 217 error = uiomove(__DECONST(void *, zero_region), 218 c, uio); 219 else 220 error = EFAULT; 221 } else { 222 error = uiomove(hpa, c, uio); 223 vm_gpa_release(cookie); 224 } 225 } 226 vm_unlock_memsegs(sc->vm); 227 return (error); 228 } 229 230 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 231 232 static int 233 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 234 { 235 struct devmem_softc *dsc; 236 int error; 237 bool sysmem; 238 239 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 240 if (error || mseg->len == 0) 241 return (error); 242 243 if (!sysmem) { 244 SLIST_FOREACH(dsc, &sc->devmem, link) { 245 if (dsc->segid == mseg->segid) 246 break; 247 } 248 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 249 __func__, mseg->segid)); 250 error = copystr(dsc->name, mseg->name, len, NULL); 251 } else { 252 bzero(mseg->name, len); 253 } 254 255 return (error); 256 } 257 258 static int 259 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 260 { 261 char *name; 262 int error; 263 bool sysmem; 264 265 error = 0; 266 name = NULL; 267 sysmem = true; 268 269 /* 270 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 271 * by stripped off when devfs processes the full string. 272 */ 273 if (VM_MEMSEG_NAME(mseg)) { 274 sysmem = false; 275 name = malloc(len, M_VMMDEV, M_WAITOK); 276 error = copystr(mseg->name, name, len, NULL); 277 if (error) 278 goto done; 279 } 280 281 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 282 if (error) 283 goto done; 284 285 if (VM_MEMSEG_NAME(mseg)) { 286 error = devmem_create_cdev(sc, mseg->segid, name); 287 if (error) 288 vm_free_memseg(sc->vm, mseg->segid); 289 else 290 name = NULL; /* freed when 'cdev' is destroyed */ 291 } 292 done: 293 free(name, M_VMMDEV); 294 return (error); 295 } 296 297 static int 298 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 299 uint64_t *regval) 300 { 301 int error, i; 302 303 error = 0; 304 for (i = 0; i < count; i++) { 305 error = vm_get_register(vcpu, regnum[i], ®val[i]); 306 if (error) 307 break; 308 } 309 return (error); 310 } 311 312 static int 313 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 314 uint64_t *regval) 315 { 316 int error, i; 317 318 error = 0; 319 for (i = 0; i < count; i++) { 320 error = vm_set_register(vcpu, regnum[i], regval[i]); 321 if (error) 322 break; 323 } 324 return (error); 325 } 326 327 static int 328 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 329 { 330 int error; 331 332 /* 333 * A jail without vmm access shouldn't be able to access vmm device 334 * files at all, but check here just to be thorough. 335 */ 336 error = vmm_priv_check(td->td_ucred); 337 if (error != 0) 338 return (error); 339 340 return (0); 341 } 342 343 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 344 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 345 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 346 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 347 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 348 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 349 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 350 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 351 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 352 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 353 354 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 355 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 356 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 357 #endif 358 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 359 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 360 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 361 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 362 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 363 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 364 VMMDEV_IOCTL(VM_REINIT, 365 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 366 367 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 368 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 369 #endif 370 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 371 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 372 373 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 374 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 375 376 VMMDEV_IOCTL(VM_SUSPEND, 0), 377 VMMDEV_IOCTL(VM_GET_CPUS, 0), 378 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 379 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 380 }; 381 382 static int 383 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 384 struct thread *td) 385 { 386 struct vmmdev_softc *sc; 387 struct vcpu *vcpu; 388 const struct vmmdev_ioctl *ioctl; 389 int error, vcpuid; 390 391 sc = vmmdev_lookup2(cdev); 392 if (sc == NULL) 393 return (ENXIO); 394 395 ioctl = NULL; 396 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 397 if (vmmdev_ioctls[i].cmd == cmd) { 398 ioctl = &vmmdev_ioctls[i]; 399 break; 400 } 401 } 402 if (ioctl == NULL) { 403 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 404 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 405 ioctl = &vmmdev_machdep_ioctls[i]; 406 break; 407 } 408 } 409 } 410 if (ioctl == NULL) 411 return (ENOTTY); 412 413 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 414 vm_xlock_memsegs(sc->vm); 415 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 416 vm_slock_memsegs(sc->vm); 417 418 vcpu = NULL; 419 vcpuid = -1; 420 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 421 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 422 vcpuid = *(int *)data; 423 if (vcpuid == -1) { 424 if ((ioctl->flags & 425 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 426 error = EINVAL; 427 goto lockfail; 428 } 429 } else { 430 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 431 if (vcpu == NULL) { 432 error = EINVAL; 433 goto lockfail; 434 } 435 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 436 error = vcpu_lock_one(vcpu); 437 if (error) 438 goto lockfail; 439 } 440 } 441 } 442 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 443 error = vcpu_lock_all(sc); 444 if (error) 445 goto lockfail; 446 } 447 448 switch (cmd) { 449 case VM_SUSPEND: { 450 struct vm_suspend *vmsuspend; 451 452 vmsuspend = (struct vm_suspend *)data; 453 error = vm_suspend(sc->vm, vmsuspend->how); 454 break; 455 } 456 case VM_REINIT: 457 error = vm_reinit(sc->vm); 458 break; 459 case VM_STAT_DESC: { 460 struct vm_stat_desc *statdesc; 461 462 statdesc = (struct vm_stat_desc *)data; 463 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 464 sizeof(statdesc->desc)); 465 break; 466 } 467 case VM_STATS: { 468 struct vm_stats *vmstats; 469 470 vmstats = (struct vm_stats *)data; 471 getmicrotime(&vmstats->tv); 472 error = vmm_stat_copy(vcpu, vmstats->index, 473 nitems(vmstats->statbuf), &vmstats->num_entries, 474 vmstats->statbuf); 475 break; 476 } 477 case VM_MMAP_GETNEXT: { 478 struct vm_memmap *mm; 479 480 mm = (struct vm_memmap *)data; 481 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 482 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 483 break; 484 } 485 case VM_MMAP_MEMSEG: { 486 struct vm_memmap *mm; 487 488 mm = (struct vm_memmap *)data; 489 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 490 mm->len, mm->prot, mm->flags); 491 break; 492 } 493 case VM_MUNMAP_MEMSEG: { 494 struct vm_munmap *mu; 495 496 mu = (struct vm_munmap *)data; 497 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 498 break; 499 } 500 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 501 case VM_ALLOC_MEMSEG_12: 502 error = alloc_memseg(sc, (struct vm_memseg *)data, 503 sizeof(((struct vm_memseg_12 *)0)->name)); 504 break; 505 case VM_GET_MEMSEG_12: 506 error = get_memseg(sc, (struct vm_memseg *)data, 507 sizeof(((struct vm_memseg_12 *)0)->name)); 508 break; 509 #endif 510 case VM_ALLOC_MEMSEG: 511 error = alloc_memseg(sc, (struct vm_memseg *)data, 512 sizeof(((struct vm_memseg *)0)->name)); 513 break; 514 case VM_GET_MEMSEG: 515 error = get_memseg(sc, (struct vm_memseg *)data, 516 sizeof(((struct vm_memseg *)0)->name)); 517 break; 518 case VM_GET_REGISTER: { 519 struct vm_register *vmreg; 520 521 vmreg = (struct vm_register *)data; 522 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 523 break; 524 } 525 case VM_SET_REGISTER: { 526 struct vm_register *vmreg; 527 528 vmreg = (struct vm_register *)data; 529 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 530 break; 531 } 532 case VM_GET_REGISTER_SET: { 533 struct vm_register_set *vmregset; 534 uint64_t *regvals; 535 int *regnums; 536 537 vmregset = (struct vm_register_set *)data; 538 if (vmregset->count > VM_REG_LAST) { 539 error = EINVAL; 540 break; 541 } 542 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 543 M_WAITOK); 544 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 545 M_WAITOK); 546 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 547 vmregset->count); 548 if (error == 0) 549 error = vm_get_register_set(vcpu, 550 vmregset->count, regnums, regvals); 551 if (error == 0) 552 error = copyout(regvals, vmregset->regvals, 553 sizeof(regvals[0]) * vmregset->count); 554 free(regvals, M_VMMDEV); 555 free(regnums, M_VMMDEV); 556 break; 557 } 558 case VM_SET_REGISTER_SET: { 559 struct vm_register_set *vmregset; 560 uint64_t *regvals; 561 int *regnums; 562 563 vmregset = (struct vm_register_set *)data; 564 if (vmregset->count > VM_REG_LAST) { 565 error = EINVAL; 566 break; 567 } 568 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 569 M_WAITOK); 570 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 571 M_WAITOK); 572 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 573 vmregset->count); 574 if (error == 0) 575 error = copyin(vmregset->regvals, regvals, 576 sizeof(regvals[0]) * vmregset->count); 577 if (error == 0) 578 error = vm_set_register_set(vcpu, 579 vmregset->count, regnums, regvals); 580 free(regvals, M_VMMDEV); 581 free(regnums, M_VMMDEV); 582 break; 583 } 584 case VM_GET_CAPABILITY: { 585 struct vm_capability *vmcap; 586 587 vmcap = (struct vm_capability *)data; 588 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 589 break; 590 } 591 case VM_SET_CAPABILITY: { 592 struct vm_capability *vmcap; 593 594 vmcap = (struct vm_capability *)data; 595 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 596 break; 597 } 598 case VM_ACTIVATE_CPU: 599 error = vm_activate_cpu(vcpu); 600 break; 601 case VM_GET_CPUS: { 602 struct vm_cpuset *vm_cpuset; 603 cpuset_t *cpuset; 604 int size; 605 606 error = 0; 607 vm_cpuset = (struct vm_cpuset *)data; 608 size = vm_cpuset->cpusetsize; 609 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 610 error = ERANGE; 611 break; 612 } 613 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 614 M_WAITOK | M_ZERO); 615 if (vm_cpuset->which == VM_ACTIVE_CPUS) 616 *cpuset = vm_active_cpus(sc->vm); 617 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 618 *cpuset = vm_suspended_cpus(sc->vm); 619 else if (vm_cpuset->which == VM_DEBUG_CPUS) 620 *cpuset = vm_debug_cpus(sc->vm); 621 else 622 error = EINVAL; 623 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 624 error = ERANGE; 625 if (error == 0) 626 error = copyout(cpuset, vm_cpuset->cpus, size); 627 free(cpuset, M_TEMP); 628 break; 629 } 630 case VM_SUSPEND_CPU: 631 error = vm_suspend_cpu(sc->vm, vcpu); 632 break; 633 case VM_RESUME_CPU: 634 error = vm_resume_cpu(sc->vm, vcpu); 635 break; 636 case VM_SET_TOPOLOGY: { 637 struct vm_cpu_topology *topology; 638 639 topology = (struct vm_cpu_topology *)data; 640 error = vm_set_topology(sc->vm, topology->sockets, 641 topology->cores, topology->threads, topology->maxcpus); 642 break; 643 } 644 case VM_GET_TOPOLOGY: { 645 struct vm_cpu_topology *topology; 646 647 topology = (struct vm_cpu_topology *)data; 648 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 649 &topology->threads, &topology->maxcpus); 650 error = 0; 651 break; 652 } 653 default: 654 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 655 td); 656 break; 657 } 658 659 if ((ioctl->flags & 660 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 661 vm_unlock_memsegs(sc->vm); 662 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 663 vcpu_unlock_all(sc); 664 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 665 vcpu_unlock_one(vcpu); 666 667 /* 668 * Make sure that no handler returns a kernel-internal 669 * error value to userspace. 670 */ 671 KASSERT(error == ERESTART || error >= 0, 672 ("vmmdev_ioctl: invalid error return %d", error)); 673 return (error); 674 675 lockfail: 676 if ((ioctl->flags & 677 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 678 vm_unlock_memsegs(sc->vm); 679 return (error); 680 } 681 682 static int 683 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 684 struct vm_object **objp, int nprot) 685 { 686 struct vmmdev_softc *sc; 687 vm_paddr_t gpa; 688 size_t len; 689 vm_ooffset_t segoff, first, last; 690 int error, found, segid; 691 bool sysmem; 692 693 first = *offset; 694 last = first + mapsize; 695 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 696 return (EINVAL); 697 698 sc = vmmdev_lookup2(cdev); 699 if (sc == NULL) { 700 /* virtual machine is in the process of being created */ 701 return (EINVAL); 702 } 703 704 /* 705 * Get a read lock on the guest memory map. 706 */ 707 vm_slock_memsegs(sc->vm); 708 709 gpa = 0; 710 found = 0; 711 while (!found) { 712 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 713 NULL, NULL); 714 if (error) 715 break; 716 717 if (first >= gpa && last <= gpa + len) 718 found = 1; 719 else 720 gpa += len; 721 } 722 723 if (found) { 724 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 725 KASSERT(error == 0 && *objp != NULL, 726 ("%s: invalid memory segment %d", __func__, segid)); 727 if (sysmem) { 728 vm_object_reference(*objp); 729 *offset = segoff + (first - gpa); 730 } else { 731 error = EINVAL; 732 } 733 } 734 vm_unlock_memsegs(sc->vm); 735 return (error); 736 } 737 738 static void 739 vmmdev_destroy(struct vmmdev_softc *sc) 740 { 741 struct devmem_softc *dsc; 742 int error __diagused; 743 744 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 745 746 /* 747 * Destroy all cdevs: 748 * 749 * - any new operations on the 'cdev' will return an error (ENXIO). 750 * 751 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 752 */ 753 SLIST_FOREACH(dsc, &sc->devmem, link) { 754 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 755 devmem_destroy(dsc); 756 } 757 758 vm_disable_vcpu_creation(sc->vm); 759 error = vcpu_lock_all(sc); 760 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 761 vm_unlock_vcpus(sc->vm); 762 763 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 764 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 765 SLIST_REMOVE_HEAD(&sc->devmem, link); 766 free(dsc->name, M_VMMDEV); 767 free(dsc, M_VMMDEV); 768 } 769 770 if (sc->vm != NULL) 771 vm_destroy(sc->vm); 772 773 if (sc->ucred != NULL) 774 crfree(sc->ucred); 775 776 sx_xlock(&vmmdev_mtx); 777 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 778 sx_xunlock(&vmmdev_mtx); 779 free(sc, M_VMMDEV); 780 } 781 782 static int 783 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 784 { 785 struct cdev *cdev; 786 struct vmmdev_softc *sc; 787 788 sx_xlock(&vmmdev_mtx); 789 sc = vmmdev_lookup(name, cred); 790 if (sc == NULL || sc->cdev == NULL) { 791 sx_xunlock(&vmmdev_mtx); 792 return (EINVAL); 793 } 794 795 /* 796 * Setting 'sc->cdev' to NULL is used to indicate that the VM 797 * is scheduled for destruction. 798 */ 799 cdev = sc->cdev; 800 sc->cdev = NULL; 801 sx_xunlock(&vmmdev_mtx); 802 803 destroy_dev(cdev); 804 vmmdev_destroy(sc); 805 806 return (0); 807 } 808 809 static int 810 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 811 { 812 char *buf; 813 int error, buflen; 814 815 error = vmm_priv_check(req->td->td_ucred); 816 if (error) 817 return (error); 818 819 buflen = VM_MAX_NAMELEN + 1; 820 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 821 strlcpy(buf, "beavis", buflen); 822 error = sysctl_handle_string(oidp, buf, buflen, req); 823 if (error == 0 && req->newptr != NULL) 824 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 825 free(buf, M_VMMDEV); 826 return (error); 827 } 828 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 829 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 830 NULL, 0, sysctl_vmm_destroy, "A", 831 NULL); 832 833 static struct cdevsw vmmdevsw = { 834 .d_name = "vmmdev", 835 .d_version = D_VERSION, 836 .d_open = vmmdev_open, 837 .d_ioctl = vmmdev_ioctl, 838 .d_mmap_single = vmmdev_mmap_single, 839 .d_read = vmmdev_rw, 840 .d_write = vmmdev_rw, 841 }; 842 843 static struct vmmdev_softc * 844 vmmdev_alloc(struct vm *vm, struct ucred *cred) 845 { 846 struct vmmdev_softc *sc; 847 848 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 849 SLIST_INIT(&sc->devmem); 850 sc->vm = vm; 851 sc->ucred = crhold(cred); 852 return (sc); 853 } 854 855 static int 856 vmmdev_create(const char *name, struct ucred *cred) 857 { 858 struct make_dev_args mda; 859 struct cdev *cdev; 860 struct vmmdev_softc *sc; 861 struct vm *vm; 862 int error; 863 864 sx_xlock(&vmmdev_mtx); 865 sc = vmmdev_lookup(name, cred); 866 if (sc != NULL) { 867 sx_xunlock(&vmmdev_mtx); 868 return (EEXIST); 869 } 870 871 error = vm_create(name, &vm); 872 if (error != 0) { 873 sx_xunlock(&vmmdev_mtx); 874 return (error); 875 } 876 sc = vmmdev_alloc(vm, cred); 877 SLIST_INSERT_HEAD(&head, sc, link); 878 879 make_dev_args_init(&mda); 880 mda.mda_devsw = &vmmdevsw; 881 mda.mda_cr = sc->ucred; 882 mda.mda_uid = UID_ROOT; 883 mda.mda_gid = GID_WHEEL; 884 mda.mda_mode = 0600; 885 mda.mda_si_drv1 = sc; 886 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 887 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 888 if (error != 0) { 889 sx_xunlock(&vmmdev_mtx); 890 vmmdev_destroy(sc); 891 return (error); 892 } 893 sc->cdev = cdev; 894 sx_xunlock(&vmmdev_mtx); 895 return (0); 896 } 897 898 static int 899 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 900 { 901 char *buf; 902 int error, buflen; 903 904 error = vmm_priv_check(req->td->td_ucred); 905 if (error != 0) 906 return (error); 907 908 buflen = VM_MAX_NAMELEN + 1; 909 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 910 strlcpy(buf, "beavis", buflen); 911 error = sysctl_handle_string(oidp, buf, buflen, req); 912 if (error == 0 && req->newptr != NULL) 913 error = vmmdev_create(buf, req->td->td_ucred); 914 free(buf, M_VMMDEV); 915 return (error); 916 } 917 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 918 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 919 NULL, 0, sysctl_vmm_create, "A", 920 NULL); 921 922 static int 923 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) 924 { 925 int error; 926 927 error = vmm_priv_check(td->td_ucred); 928 if (error != 0) 929 return (error); 930 931 if ((flags & FWRITE) == 0) 932 return (EPERM); 933 934 return (0); 935 } 936 937 static int 938 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 939 struct thread *td) 940 { 941 int error; 942 943 switch (cmd) { 944 case VMMCTL_VM_CREATE: { 945 struct vmmctl_vm_create *vmc; 946 947 vmc = (struct vmmctl_vm_create *)data; 948 vmc->name[VM_MAX_NAMELEN] = '\0'; 949 for (size_t i = 0; i < nitems(vmc->reserved); i++) { 950 if (vmc->reserved[i] != 0) { 951 error = EINVAL; 952 return (error); 953 } 954 } 955 956 error = vmmdev_create(vmc->name, td->td_ucred); 957 break; 958 } 959 case VMMCTL_VM_DESTROY: { 960 struct vmmctl_vm_destroy *vmd; 961 962 vmd = (struct vmmctl_vm_destroy *)data; 963 vmd->name[VM_MAX_NAMELEN] = '\0'; 964 for (size_t i = 0; i < nitems(vmd->reserved); i++) { 965 if (vmd->reserved[i] != 0) { 966 error = EINVAL; 967 return (error); 968 } 969 } 970 971 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred); 972 break; 973 } 974 default: 975 error = ENOTTY; 976 break; 977 } 978 979 return (error); 980 } 981 982 static struct cdev *vmmctl_cdev; 983 static struct cdevsw vmmctlsw = { 984 .d_name = "vmmctl", 985 .d_version = D_VERSION, 986 .d_open = vmmctl_open, 987 .d_ioctl = vmmctl_ioctl, 988 }; 989 990 int 991 vmmdev_init(void) 992 { 993 int error; 994 995 sx_xlock(&vmmdev_mtx); 996 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL, 997 UID_ROOT, GID_WHEEL, 0600, "vmmctl"); 998 if (error == 0) 999 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1000 "Allow use of vmm in a jail."); 1001 sx_xunlock(&vmmdev_mtx); 1002 1003 return (error); 1004 } 1005 1006 int 1007 vmmdev_cleanup(void) 1008 { 1009 sx_xlock(&vmmdev_mtx); 1010 if (!SLIST_EMPTY(&head)) { 1011 sx_xunlock(&vmmdev_mtx); 1012 return (EBUSY); 1013 } 1014 if (vmmctl_cdev != NULL) { 1015 destroy_dev(vmmctl_cdev); 1016 vmmctl_cdev = NULL; 1017 } 1018 sx_xunlock(&vmmdev_mtx); 1019 1020 return (0); 1021 } 1022 1023 static int 1024 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1025 struct vm_object **objp, int nprot) 1026 { 1027 struct devmem_softc *dsc; 1028 vm_ooffset_t first, last; 1029 size_t seglen; 1030 int error; 1031 bool sysmem; 1032 1033 dsc = cdev->si_drv1; 1034 if (dsc == NULL) { 1035 /* 'cdev' has been created but is not ready for use */ 1036 return (ENXIO); 1037 } 1038 1039 first = *offset; 1040 last = *offset + len; 1041 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1042 return (EINVAL); 1043 1044 vm_slock_memsegs(dsc->sc->vm); 1045 1046 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1047 KASSERT(error == 0 && !sysmem && *objp != NULL, 1048 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1049 1050 if (seglen >= last) 1051 vm_object_reference(*objp); 1052 else 1053 error = EINVAL; 1054 1055 vm_unlock_memsegs(dsc->sc->vm); 1056 return (error); 1057 } 1058 1059 static struct cdevsw devmemsw = { 1060 .d_name = "devmem", 1061 .d_version = D_VERSION, 1062 .d_mmap_single = devmem_mmap_single, 1063 }; 1064 1065 static int 1066 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1067 { 1068 struct make_dev_args mda; 1069 struct devmem_softc *dsc; 1070 int error; 1071 1072 sx_xlock(&vmmdev_mtx); 1073 1074 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1075 dsc->segid = segid; 1076 dsc->name = devname; 1077 dsc->sc = sc; 1078 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1079 1080 make_dev_args_init(&mda); 1081 mda.mda_devsw = &devmemsw; 1082 mda.mda_cr = sc->ucred; 1083 mda.mda_uid = UID_ROOT; 1084 mda.mda_gid = GID_WHEEL; 1085 mda.mda_mode = 0600; 1086 mda.mda_si_drv1 = dsc; 1087 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1088 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1089 devname); 1090 if (error != 0) { 1091 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1092 free(dsc->name, M_VMMDEV); 1093 free(dsc, M_VMMDEV); 1094 } 1095 1096 sx_xunlock(&vmmdev_mtx); 1097 1098 return (error); 1099 } 1100 1101 static void 1102 devmem_destroy(void *arg) 1103 { 1104 struct devmem_softc *dsc = arg; 1105 1106 destroy_dev(dsc->cdev); 1107 dsc->cdev = NULL; 1108 dsc->sc = NULL; 1109 } 1110