1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/fcntl.h> 12 #include <sys/ioccom.h> 13 #include <sys/jail.h> 14 #include <sys/kernel.h> 15 #include <sys/malloc.h> 16 #include <sys/mman.h> 17 #include <sys/proc.h> 18 #include <sys/queue.h> 19 #include <sys/sx.h> 20 #include <sys/sysctl.h> 21 #include <sys/ucred.h> 22 #include <sys/uio.h> 23 24 #include <machine/vmm.h> 25 26 #include <vm/vm.h> 27 #include <vm/vm_object.h> 28 29 #include <dev/vmm/vmm_dev.h> 30 #include <dev/vmm/vmm_stat.h> 31 32 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 33 struct vm_memseg_12 { 34 int segid; 35 size_t len; 36 char name[64]; 37 }; 38 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 39 40 #define VM_ALLOC_MEMSEG_12 \ 41 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 42 #define VM_GET_MEMSEG_12 \ 43 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 44 #endif 45 46 struct devmem_softc { 47 int segid; 48 char *name; 49 struct cdev *cdev; 50 struct vmmdev_softc *sc; 51 SLIST_ENTRY(devmem_softc) link; 52 }; 53 54 struct vmmdev_softc { 55 struct vm *vm; /* vm instance cookie */ 56 struct cdev *cdev; 57 struct ucred *ucred; 58 SLIST_ENTRY(vmmdev_softc) link; 59 SLIST_HEAD(, devmem_softc) devmem; 60 int flags; 61 }; 62 63 static SLIST_HEAD(, vmmdev_softc) head; 64 65 static unsigned pr_allow_flag; 66 static struct sx vmmdev_mtx; 67 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 68 69 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 70 71 SYSCTL_DECL(_hw_vmm); 72 73 static void devmem_destroy(void *arg); 74 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 75 76 static int 77 vmm_priv_check(struct ucred *ucred) 78 { 79 if (jailed(ucred) && 80 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 81 return (EPERM); 82 83 return (0); 84 } 85 86 static int 87 vcpu_lock_one(struct vcpu *vcpu) 88 { 89 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 90 } 91 92 static void 93 vcpu_unlock_one(struct vcpu *vcpu) 94 { 95 enum vcpu_state state; 96 97 state = vcpu_get_state(vcpu, NULL); 98 if (state != VCPU_FROZEN) { 99 panic("vcpu %s(%d) has invalid state %d", 100 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 101 } 102 103 vcpu_set_state(vcpu, VCPU_IDLE, false); 104 } 105 106 static int 107 vcpu_lock_all(struct vmmdev_softc *sc) 108 { 109 struct vcpu *vcpu; 110 int error; 111 uint16_t i, j, maxcpus; 112 113 error = 0; 114 vm_slock_vcpus(sc->vm); 115 maxcpus = vm_get_maxcpus(sc->vm); 116 for (i = 0; i < maxcpus; i++) { 117 vcpu = vm_vcpu(sc->vm, i); 118 if (vcpu == NULL) 119 continue; 120 error = vcpu_lock_one(vcpu); 121 if (error) 122 break; 123 } 124 125 if (error) { 126 for (j = 0; j < i; j++) { 127 vcpu = vm_vcpu(sc->vm, j); 128 if (vcpu == NULL) 129 continue; 130 vcpu_unlock_one(vcpu); 131 } 132 vm_unlock_vcpus(sc->vm); 133 } 134 135 return (error); 136 } 137 138 static void 139 vcpu_unlock_all(struct vmmdev_softc *sc) 140 { 141 struct vcpu *vcpu; 142 uint16_t i, maxcpus; 143 144 maxcpus = vm_get_maxcpus(sc->vm); 145 for (i = 0; i < maxcpus; i++) { 146 vcpu = vm_vcpu(sc->vm, i); 147 if (vcpu == NULL) 148 continue; 149 vcpu_unlock_one(vcpu); 150 } 151 vm_unlock_vcpus(sc->vm); 152 } 153 154 static struct vmmdev_softc * 155 vmmdev_lookup(const char *name, struct ucred *cred) 156 { 157 struct vmmdev_softc *sc; 158 159 sx_assert(&vmmdev_mtx, SA_XLOCKED); 160 161 SLIST_FOREACH(sc, &head, link) { 162 if (strcmp(name, vm_name(sc->vm)) == 0) 163 break; 164 } 165 166 if (sc == NULL) 167 return (NULL); 168 169 if (cr_cansee(cred, sc->ucred)) 170 return (NULL); 171 172 return (sc); 173 } 174 175 static struct vmmdev_softc * 176 vmmdev_lookup2(struct cdev *cdev) 177 { 178 return (cdev->si_drv1); 179 } 180 181 static int 182 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 183 { 184 int error, off, c, prot; 185 vm_paddr_t gpa, maxaddr; 186 void *hpa, *cookie; 187 struct vmmdev_softc *sc; 188 189 sc = vmmdev_lookup2(cdev); 190 if (sc == NULL) 191 return (ENXIO); 192 193 /* 194 * Get a read lock on the guest memory map. 195 */ 196 vm_slock_memsegs(sc->vm); 197 198 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 199 maxaddr = vmm_sysmem_maxaddr(sc->vm); 200 while (uio->uio_resid > 0 && error == 0) { 201 gpa = uio->uio_offset; 202 off = gpa & PAGE_MASK; 203 c = min(uio->uio_resid, PAGE_SIZE - off); 204 205 /* 206 * The VM has a hole in its physical memory map. If we want to 207 * use 'dd' to inspect memory beyond the hole we need to 208 * provide bogus data for memory that lies in the hole. 209 * 210 * Since this device does not support lseek(2), dd(1) will 211 * read(2) blocks of data to simulate the lseek(2). 212 */ 213 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 214 if (hpa == NULL) { 215 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 216 error = uiomove(__DECONST(void *, zero_region), 217 c, uio); 218 else 219 error = EFAULT; 220 } else { 221 error = uiomove(hpa, c, uio); 222 vm_gpa_release(cookie); 223 } 224 } 225 vm_unlock_memsegs(sc->vm); 226 return (error); 227 } 228 229 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 230 231 static int 232 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 233 { 234 struct devmem_softc *dsc; 235 int error; 236 bool sysmem; 237 238 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 239 if (error || mseg->len == 0) 240 return (error); 241 242 if (!sysmem) { 243 SLIST_FOREACH(dsc, &sc->devmem, link) { 244 if (dsc->segid == mseg->segid) 245 break; 246 } 247 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 248 __func__, mseg->segid)); 249 error = copystr(dsc->name, mseg->name, len, NULL); 250 } else { 251 bzero(mseg->name, len); 252 } 253 254 return (error); 255 } 256 257 static int 258 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 259 { 260 char *name; 261 int error; 262 bool sysmem; 263 264 error = 0; 265 name = NULL; 266 sysmem = true; 267 268 /* 269 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 270 * by stripped off when devfs processes the full string. 271 */ 272 if (VM_MEMSEG_NAME(mseg)) { 273 sysmem = false; 274 name = malloc(len, M_VMMDEV, M_WAITOK); 275 error = copystr(mseg->name, name, len, NULL); 276 if (error) 277 goto done; 278 } 279 280 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 281 if (error) 282 goto done; 283 284 if (VM_MEMSEG_NAME(mseg)) { 285 error = devmem_create_cdev(sc, mseg->segid, name); 286 if (error) 287 vm_free_memseg(sc->vm, mseg->segid); 288 else 289 name = NULL; /* freed when 'cdev' is destroyed */ 290 } 291 done: 292 free(name, M_VMMDEV); 293 return (error); 294 } 295 296 static int 297 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 298 uint64_t *regval) 299 { 300 int error, i; 301 302 error = 0; 303 for (i = 0; i < count; i++) { 304 error = vm_get_register(vcpu, regnum[i], ®val[i]); 305 if (error) 306 break; 307 } 308 return (error); 309 } 310 311 static int 312 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 313 uint64_t *regval) 314 { 315 int error, i; 316 317 error = 0; 318 for (i = 0; i < count; i++) { 319 error = vm_set_register(vcpu, regnum[i], regval[i]); 320 if (error) 321 break; 322 } 323 return (error); 324 } 325 326 static int 327 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 328 { 329 int error; 330 331 /* 332 * A jail without vmm access shouldn't be able to access vmm device 333 * files at all, but check here just to be thorough. 334 */ 335 error = vmm_priv_check(td->td_ucred); 336 if (error != 0) 337 return (error); 338 339 return (0); 340 } 341 342 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 343 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 344 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 345 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 346 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 347 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 348 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 349 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 350 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 351 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 352 353 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 354 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 355 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 356 #endif 357 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 358 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 359 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 360 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 361 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 362 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 363 VMMDEV_IOCTL(VM_REINIT, 364 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 365 366 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 367 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 368 #endif 369 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 370 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 371 372 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 373 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 374 375 VMMDEV_IOCTL(VM_SUSPEND, 0), 376 VMMDEV_IOCTL(VM_GET_CPUS, 0), 377 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 378 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 379 }; 380 381 static int 382 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 383 struct thread *td) 384 { 385 struct vmmdev_softc *sc; 386 struct vcpu *vcpu; 387 const struct vmmdev_ioctl *ioctl; 388 int error, vcpuid; 389 390 sc = vmmdev_lookup2(cdev); 391 if (sc == NULL) 392 return (ENXIO); 393 394 ioctl = NULL; 395 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 396 if (vmmdev_ioctls[i].cmd == cmd) { 397 ioctl = &vmmdev_ioctls[i]; 398 break; 399 } 400 } 401 if (ioctl == NULL) { 402 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 403 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 404 ioctl = &vmmdev_machdep_ioctls[i]; 405 break; 406 } 407 } 408 } 409 if (ioctl == NULL) 410 return (ENOTTY); 411 412 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 413 vm_xlock_memsegs(sc->vm); 414 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 415 vm_slock_memsegs(sc->vm); 416 417 vcpu = NULL; 418 vcpuid = -1; 419 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 420 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 421 vcpuid = *(int *)data; 422 if (vcpuid == -1) { 423 if ((ioctl->flags & 424 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 425 error = EINVAL; 426 goto lockfail; 427 } 428 } else { 429 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 430 if (vcpu == NULL) { 431 error = EINVAL; 432 goto lockfail; 433 } 434 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 435 error = vcpu_lock_one(vcpu); 436 if (error) 437 goto lockfail; 438 } 439 } 440 } 441 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 442 error = vcpu_lock_all(sc); 443 if (error) 444 goto lockfail; 445 } 446 447 switch (cmd) { 448 case VM_SUSPEND: { 449 struct vm_suspend *vmsuspend; 450 451 vmsuspend = (struct vm_suspend *)data; 452 error = vm_suspend(sc->vm, vmsuspend->how); 453 break; 454 } 455 case VM_REINIT: 456 error = vm_reinit(sc->vm); 457 break; 458 case VM_STAT_DESC: { 459 struct vm_stat_desc *statdesc; 460 461 statdesc = (struct vm_stat_desc *)data; 462 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 463 sizeof(statdesc->desc)); 464 break; 465 } 466 case VM_STATS: { 467 struct vm_stats *vmstats; 468 469 vmstats = (struct vm_stats *)data; 470 getmicrotime(&vmstats->tv); 471 error = vmm_stat_copy(vcpu, vmstats->index, 472 nitems(vmstats->statbuf), &vmstats->num_entries, 473 vmstats->statbuf); 474 break; 475 } 476 case VM_MMAP_GETNEXT: { 477 struct vm_memmap *mm; 478 479 mm = (struct vm_memmap *)data; 480 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 481 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 482 break; 483 } 484 case VM_MMAP_MEMSEG: { 485 struct vm_memmap *mm; 486 487 mm = (struct vm_memmap *)data; 488 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 489 mm->len, mm->prot, mm->flags); 490 break; 491 } 492 case VM_MUNMAP_MEMSEG: { 493 struct vm_munmap *mu; 494 495 mu = (struct vm_munmap *)data; 496 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 497 break; 498 } 499 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 500 case VM_ALLOC_MEMSEG_12: 501 error = alloc_memseg(sc, (struct vm_memseg *)data, 502 sizeof(((struct vm_memseg_12 *)0)->name)); 503 break; 504 case VM_GET_MEMSEG_12: 505 error = get_memseg(sc, (struct vm_memseg *)data, 506 sizeof(((struct vm_memseg_12 *)0)->name)); 507 break; 508 #endif 509 case VM_ALLOC_MEMSEG: 510 error = alloc_memseg(sc, (struct vm_memseg *)data, 511 sizeof(((struct vm_memseg *)0)->name)); 512 break; 513 case VM_GET_MEMSEG: 514 error = get_memseg(sc, (struct vm_memseg *)data, 515 sizeof(((struct vm_memseg *)0)->name)); 516 break; 517 case VM_GET_REGISTER: { 518 struct vm_register *vmreg; 519 520 vmreg = (struct vm_register *)data; 521 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 522 break; 523 } 524 case VM_SET_REGISTER: { 525 struct vm_register *vmreg; 526 527 vmreg = (struct vm_register *)data; 528 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 529 break; 530 } 531 case VM_GET_REGISTER_SET: { 532 struct vm_register_set *vmregset; 533 uint64_t *regvals; 534 int *regnums; 535 536 vmregset = (struct vm_register_set *)data; 537 if (vmregset->count > VM_REG_LAST) { 538 error = EINVAL; 539 break; 540 } 541 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 542 M_WAITOK); 543 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 544 M_WAITOK); 545 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 546 vmregset->count); 547 if (error == 0) 548 error = vm_get_register_set(vcpu, 549 vmregset->count, regnums, regvals); 550 if (error == 0) 551 error = copyout(regvals, vmregset->regvals, 552 sizeof(regvals[0]) * vmregset->count); 553 free(regvals, M_VMMDEV); 554 free(regnums, M_VMMDEV); 555 break; 556 } 557 case VM_SET_REGISTER_SET: { 558 struct vm_register_set *vmregset; 559 uint64_t *regvals; 560 int *regnums; 561 562 vmregset = (struct vm_register_set *)data; 563 if (vmregset->count > VM_REG_LAST) { 564 error = EINVAL; 565 break; 566 } 567 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 568 M_WAITOK); 569 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 570 M_WAITOK); 571 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 572 vmregset->count); 573 if (error == 0) 574 error = copyin(vmregset->regvals, regvals, 575 sizeof(regvals[0]) * vmregset->count); 576 if (error == 0) 577 error = vm_set_register_set(vcpu, 578 vmregset->count, regnums, regvals); 579 free(regvals, M_VMMDEV); 580 free(regnums, M_VMMDEV); 581 break; 582 } 583 case VM_GET_CAPABILITY: { 584 struct vm_capability *vmcap; 585 586 vmcap = (struct vm_capability *)data; 587 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 588 break; 589 } 590 case VM_SET_CAPABILITY: { 591 struct vm_capability *vmcap; 592 593 vmcap = (struct vm_capability *)data; 594 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 595 break; 596 } 597 case VM_ACTIVATE_CPU: 598 error = vm_activate_cpu(vcpu); 599 break; 600 case VM_GET_CPUS: { 601 struct vm_cpuset *vm_cpuset; 602 cpuset_t *cpuset; 603 int size; 604 605 error = 0; 606 vm_cpuset = (struct vm_cpuset *)data; 607 size = vm_cpuset->cpusetsize; 608 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 609 error = ERANGE; 610 break; 611 } 612 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 613 M_WAITOK | M_ZERO); 614 if (vm_cpuset->which == VM_ACTIVE_CPUS) 615 *cpuset = vm_active_cpus(sc->vm); 616 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 617 *cpuset = vm_suspended_cpus(sc->vm); 618 else if (vm_cpuset->which == VM_DEBUG_CPUS) 619 *cpuset = vm_debug_cpus(sc->vm); 620 else 621 error = EINVAL; 622 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 623 error = ERANGE; 624 if (error == 0) 625 error = copyout(cpuset, vm_cpuset->cpus, size); 626 free(cpuset, M_TEMP); 627 break; 628 } 629 case VM_SUSPEND_CPU: 630 error = vm_suspend_cpu(sc->vm, vcpu); 631 break; 632 case VM_RESUME_CPU: 633 error = vm_resume_cpu(sc->vm, vcpu); 634 break; 635 case VM_SET_TOPOLOGY: { 636 struct vm_cpu_topology *topology; 637 638 topology = (struct vm_cpu_topology *)data; 639 error = vm_set_topology(sc->vm, topology->sockets, 640 topology->cores, topology->threads, topology->maxcpus); 641 break; 642 } 643 case VM_GET_TOPOLOGY: { 644 struct vm_cpu_topology *topology; 645 646 topology = (struct vm_cpu_topology *)data; 647 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 648 &topology->threads, &topology->maxcpus); 649 error = 0; 650 break; 651 } 652 default: 653 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 654 td); 655 break; 656 } 657 658 if ((ioctl->flags & 659 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 660 vm_unlock_memsegs(sc->vm); 661 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 662 vcpu_unlock_all(sc); 663 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 664 vcpu_unlock_one(vcpu); 665 666 /* 667 * Make sure that no handler returns a kernel-internal 668 * error value to userspace. 669 */ 670 KASSERT(error == ERESTART || error >= 0, 671 ("vmmdev_ioctl: invalid error return %d", error)); 672 return (error); 673 674 lockfail: 675 if ((ioctl->flags & 676 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 677 vm_unlock_memsegs(sc->vm); 678 return (error); 679 } 680 681 static int 682 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 683 struct vm_object **objp, int nprot) 684 { 685 struct vmmdev_softc *sc; 686 vm_paddr_t gpa; 687 size_t len; 688 vm_ooffset_t segoff, first, last; 689 int error, found, segid; 690 bool sysmem; 691 692 first = *offset; 693 last = first + mapsize; 694 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 695 return (EINVAL); 696 697 sc = vmmdev_lookup2(cdev); 698 if (sc == NULL) { 699 /* virtual machine is in the process of being created */ 700 return (EINVAL); 701 } 702 703 /* 704 * Get a read lock on the guest memory map. 705 */ 706 vm_slock_memsegs(sc->vm); 707 708 gpa = 0; 709 found = 0; 710 while (!found) { 711 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 712 NULL, NULL); 713 if (error) 714 break; 715 716 if (first >= gpa && last <= gpa + len) 717 found = 1; 718 else 719 gpa += len; 720 } 721 722 if (found) { 723 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 724 KASSERT(error == 0 && *objp != NULL, 725 ("%s: invalid memory segment %d", __func__, segid)); 726 if (sysmem) { 727 vm_object_reference(*objp); 728 *offset = segoff + (first - gpa); 729 } else { 730 error = EINVAL; 731 } 732 } 733 vm_unlock_memsegs(sc->vm); 734 return (error); 735 } 736 737 static void 738 vmmdev_destroy(struct vmmdev_softc *sc) 739 { 740 struct devmem_softc *dsc; 741 int error __diagused; 742 743 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 744 745 /* 746 * Destroy all cdevs: 747 * 748 * - any new operations on the 'cdev' will return an error (ENXIO). 749 * 750 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 751 */ 752 SLIST_FOREACH(dsc, &sc->devmem, link) { 753 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 754 devmem_destroy(dsc); 755 } 756 757 vm_disable_vcpu_creation(sc->vm); 758 error = vcpu_lock_all(sc); 759 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 760 vm_unlock_vcpus(sc->vm); 761 762 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 763 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 764 SLIST_REMOVE_HEAD(&sc->devmem, link); 765 free(dsc->name, M_VMMDEV); 766 free(dsc, M_VMMDEV); 767 } 768 769 if (sc->vm != NULL) 770 vm_destroy(sc->vm); 771 772 if (sc->ucred != NULL) 773 crfree(sc->ucred); 774 775 sx_xlock(&vmmdev_mtx); 776 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 777 sx_xunlock(&vmmdev_mtx); 778 free(sc, M_VMMDEV); 779 } 780 781 static int 782 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 783 { 784 struct cdev *cdev; 785 struct vmmdev_softc *sc; 786 787 sx_xlock(&vmmdev_mtx); 788 sc = vmmdev_lookup(name, cred); 789 if (sc == NULL || sc->cdev == NULL) { 790 sx_xunlock(&vmmdev_mtx); 791 return (EINVAL); 792 } 793 794 /* 795 * Setting 'sc->cdev' to NULL is used to indicate that the VM 796 * is scheduled for destruction. 797 */ 798 cdev = sc->cdev; 799 sc->cdev = NULL; 800 sx_xunlock(&vmmdev_mtx); 801 802 destroy_dev(cdev); 803 vmmdev_destroy(sc); 804 805 return (0); 806 } 807 808 static int 809 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 810 { 811 char *buf; 812 int error, buflen; 813 814 error = vmm_priv_check(req->td->td_ucred); 815 if (error) 816 return (error); 817 818 buflen = VM_MAX_NAMELEN + 1; 819 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 820 strlcpy(buf, "beavis", buflen); 821 error = sysctl_handle_string(oidp, buf, buflen, req); 822 if (error == 0 && req->newptr != NULL) 823 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 824 free(buf, M_VMMDEV); 825 return (error); 826 } 827 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 828 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 829 NULL, 0, sysctl_vmm_destroy, "A", 830 NULL); 831 832 static struct cdevsw vmmdevsw = { 833 .d_name = "vmmdev", 834 .d_version = D_VERSION, 835 .d_open = vmmdev_open, 836 .d_ioctl = vmmdev_ioctl, 837 .d_mmap_single = vmmdev_mmap_single, 838 .d_read = vmmdev_rw, 839 .d_write = vmmdev_rw, 840 }; 841 842 static struct vmmdev_softc * 843 vmmdev_alloc(struct vm *vm, struct ucred *cred) 844 { 845 struct vmmdev_softc *sc; 846 847 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 848 SLIST_INIT(&sc->devmem); 849 sc->vm = vm; 850 sc->ucred = crhold(cred); 851 return (sc); 852 } 853 854 static int 855 vmmdev_create(const char *name, struct ucred *cred) 856 { 857 struct make_dev_args mda; 858 struct cdev *cdev; 859 struct vmmdev_softc *sc; 860 struct vm *vm; 861 int error; 862 863 sx_xlock(&vmmdev_mtx); 864 sc = vmmdev_lookup(name, cred); 865 if (sc != NULL) { 866 sx_xunlock(&vmmdev_mtx); 867 return (EEXIST); 868 } 869 870 error = vm_create(name, &vm); 871 if (error != 0) { 872 sx_xunlock(&vmmdev_mtx); 873 return (error); 874 } 875 sc = vmmdev_alloc(vm, cred); 876 SLIST_INSERT_HEAD(&head, sc, link); 877 878 make_dev_args_init(&mda); 879 mda.mda_devsw = &vmmdevsw; 880 mda.mda_cr = sc->ucred; 881 mda.mda_uid = UID_ROOT; 882 mda.mda_gid = GID_WHEEL; 883 mda.mda_mode = 0600; 884 mda.mda_si_drv1 = sc; 885 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 886 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 887 if (error != 0) { 888 sx_xunlock(&vmmdev_mtx); 889 vmmdev_destroy(sc); 890 return (error); 891 } 892 sc->cdev = cdev; 893 sx_xunlock(&vmmdev_mtx); 894 return (0); 895 } 896 897 static int 898 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 899 { 900 char *buf; 901 int error, buflen; 902 903 error = vmm_priv_check(req->td->td_ucred); 904 if (error != 0) 905 return (error); 906 907 buflen = VM_MAX_NAMELEN + 1; 908 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 909 strlcpy(buf, "beavis", buflen); 910 error = sysctl_handle_string(oidp, buf, buflen, req); 911 if (error == 0 && req->newptr != NULL) 912 error = vmmdev_create(buf, req->td->td_ucred); 913 free(buf, M_VMMDEV); 914 return (error); 915 } 916 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 917 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 918 NULL, 0, sysctl_vmm_create, "A", 919 NULL); 920 921 static int 922 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) 923 { 924 int error; 925 926 error = vmm_priv_check(td->td_ucred); 927 if (error != 0) 928 return (error); 929 930 if ((flags & FWRITE) == 0) 931 return (EPERM); 932 933 return (0); 934 } 935 936 static int 937 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 938 struct thread *td) 939 { 940 int error; 941 942 switch (cmd) { 943 case VMMCTL_VM_CREATE: { 944 struct vmmctl_vm_create *vmc; 945 946 vmc = (struct vmmctl_vm_create *)data; 947 vmc->name[VM_MAX_NAMELEN] = '\0'; 948 for (size_t i = 0; i < nitems(vmc->reserved); i++) { 949 if (vmc->reserved[i] != 0) { 950 error = EINVAL; 951 return (error); 952 } 953 } 954 955 error = vmmdev_create(vmc->name, td->td_ucred); 956 break; 957 } 958 case VMMCTL_VM_DESTROY: { 959 struct vmmctl_vm_destroy *vmd; 960 961 vmd = (struct vmmctl_vm_destroy *)data; 962 vmd->name[VM_MAX_NAMELEN] = '\0'; 963 for (size_t i = 0; i < nitems(vmd->reserved); i++) { 964 if (vmd->reserved[i] != 0) { 965 error = EINVAL; 966 return (error); 967 } 968 } 969 970 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred); 971 break; 972 } 973 default: 974 error = ENOTTY; 975 break; 976 } 977 978 return (error); 979 } 980 981 static struct cdevsw vmmctlsw = { 982 .d_name = "vmmctl", 983 .d_version = D_VERSION, 984 .d_open = vmmctl_open, 985 .d_ioctl = vmmctl_ioctl, 986 }; 987 988 int 989 vmmdev_init(void) 990 { 991 struct cdev *cdev; 992 int error; 993 994 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmctlsw, NULL, 995 UID_ROOT, GID_WHEEL, 0600, "vmmctl"); 996 if (error) 997 return (error); 998 999 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1000 "Allow use of vmm in a jail."); 1001 1002 return (0); 1003 } 1004 1005 int 1006 vmmdev_cleanup(void) 1007 { 1008 int error; 1009 1010 if (SLIST_EMPTY(&head)) 1011 error = 0; 1012 else 1013 error = EBUSY; 1014 1015 return (error); 1016 } 1017 1018 static int 1019 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1020 struct vm_object **objp, int nprot) 1021 { 1022 struct devmem_softc *dsc; 1023 vm_ooffset_t first, last; 1024 size_t seglen; 1025 int error; 1026 bool sysmem; 1027 1028 dsc = cdev->si_drv1; 1029 if (dsc == NULL) { 1030 /* 'cdev' has been created but is not ready for use */ 1031 return (ENXIO); 1032 } 1033 1034 first = *offset; 1035 last = *offset + len; 1036 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1037 return (EINVAL); 1038 1039 vm_slock_memsegs(dsc->sc->vm); 1040 1041 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1042 KASSERT(error == 0 && !sysmem && *objp != NULL, 1043 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1044 1045 if (seglen >= last) 1046 vm_object_reference(*objp); 1047 else 1048 error = EINVAL; 1049 1050 vm_unlock_memsegs(dsc->sc->vm); 1051 return (error); 1052 } 1053 1054 static struct cdevsw devmemsw = { 1055 .d_name = "devmem", 1056 .d_version = D_VERSION, 1057 .d_mmap_single = devmem_mmap_single, 1058 }; 1059 1060 static int 1061 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1062 { 1063 struct make_dev_args mda; 1064 struct devmem_softc *dsc; 1065 int error; 1066 1067 sx_xlock(&vmmdev_mtx); 1068 1069 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1070 dsc->segid = segid; 1071 dsc->name = devname; 1072 dsc->sc = sc; 1073 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1074 1075 make_dev_args_init(&mda); 1076 mda.mda_devsw = &devmemsw; 1077 mda.mda_cr = sc->ucred; 1078 mda.mda_uid = UID_ROOT; 1079 mda.mda_gid = GID_WHEEL; 1080 mda.mda_mode = 0600; 1081 mda.mda_si_drv1 = dsc; 1082 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1083 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1084 devname); 1085 if (error != 0) { 1086 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1087 free(dsc->name, M_VMMDEV); 1088 free(dsc, M_VMMDEV); 1089 } 1090 1091 sx_xunlock(&vmmdev_mtx); 1092 1093 return (error); 1094 } 1095 1096 static void 1097 devmem_destroy(void *arg) 1098 { 1099 struct devmem_softc *dsc = arg; 1100 1101 destroy_dev(dsc->cdev); 1102 dsc->cdev = NULL; 1103 dsc->sc = NULL; 1104 } 1105