1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/ioccom.h> 12 #include <sys/jail.h> 13 #include <sys/kernel.h> 14 #include <sys/malloc.h> 15 #include <sys/mman.h> 16 #include <sys/mutex.h> 17 #include <sys/proc.h> 18 #include <sys/queue.h> 19 #include <sys/sysctl.h> 20 #include <sys/ucred.h> 21 #include <sys/uio.h> 22 23 #include <machine/vmm.h> 24 25 #include <vm/vm.h> 26 #include <vm/vm_object.h> 27 28 #include <dev/vmm/vmm_dev.h> 29 #include <dev/vmm/vmm_stat.h> 30 31 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 32 struct vm_memseg_12 { 33 int segid; 34 size_t len; 35 char name[64]; 36 }; 37 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 38 39 #define VM_ALLOC_MEMSEG_12 \ 40 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 41 #define VM_GET_MEMSEG_12 \ 42 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 43 #endif 44 45 struct devmem_softc { 46 int segid; 47 char *name; 48 struct cdev *cdev; 49 struct vmmdev_softc *sc; 50 SLIST_ENTRY(devmem_softc) link; 51 }; 52 53 struct vmmdev_softc { 54 struct vm *vm; /* vm instance cookie */ 55 struct cdev *cdev; 56 struct ucred *ucred; 57 SLIST_ENTRY(vmmdev_softc) link; 58 SLIST_HEAD(, devmem_softc) devmem; 59 int flags; 60 }; 61 #define VSC_LINKED 0x01 62 63 static SLIST_HEAD(, vmmdev_softc) head; 64 65 static unsigned pr_allow_flag; 66 static struct mtx vmmdev_mtx; 67 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 68 69 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 70 71 SYSCTL_DECL(_hw_vmm); 72 73 static void devmem_destroy(void *arg); 74 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 75 76 static int 77 vmm_priv_check(struct ucred *ucred) 78 { 79 if (jailed(ucred) && 80 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 81 return (EPERM); 82 83 return (0); 84 } 85 86 static int 87 vcpu_lock_one(struct vcpu *vcpu) 88 { 89 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 90 } 91 92 static void 93 vcpu_unlock_one(struct vcpu *vcpu) 94 { 95 enum vcpu_state state; 96 97 state = vcpu_get_state(vcpu, NULL); 98 if (state != VCPU_FROZEN) { 99 panic("vcpu %s(%d) has invalid state %d", 100 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 101 } 102 103 vcpu_set_state(vcpu, VCPU_IDLE, false); 104 } 105 106 static int 107 vcpu_lock_all(struct vmmdev_softc *sc) 108 { 109 struct vcpu *vcpu; 110 int error; 111 uint16_t i, j, maxcpus; 112 113 error = 0; 114 vm_slock_vcpus(sc->vm); 115 maxcpus = vm_get_maxcpus(sc->vm); 116 for (i = 0; i < maxcpus; i++) { 117 vcpu = vm_vcpu(sc->vm, i); 118 if (vcpu == NULL) 119 continue; 120 error = vcpu_lock_one(vcpu); 121 if (error) 122 break; 123 } 124 125 if (error) { 126 for (j = 0; j < i; j++) { 127 vcpu = vm_vcpu(sc->vm, j); 128 if (vcpu == NULL) 129 continue; 130 vcpu_unlock_one(vcpu); 131 } 132 vm_unlock_vcpus(sc->vm); 133 } 134 135 return (error); 136 } 137 138 static void 139 vcpu_unlock_all(struct vmmdev_softc *sc) 140 { 141 struct vcpu *vcpu; 142 uint16_t i, maxcpus; 143 144 maxcpus = vm_get_maxcpus(sc->vm); 145 for (i = 0; i < maxcpus; i++) { 146 vcpu = vm_vcpu(sc->vm, i); 147 if (vcpu == NULL) 148 continue; 149 vcpu_unlock_one(vcpu); 150 } 151 vm_unlock_vcpus(sc->vm); 152 } 153 154 static struct vmmdev_softc * 155 vmmdev_lookup(const char *name, struct ucred *cred) 156 { 157 struct vmmdev_softc *sc; 158 159 mtx_assert(&vmmdev_mtx, MA_OWNED); 160 161 SLIST_FOREACH(sc, &head, link) { 162 if (strcmp(name, vm_name(sc->vm)) == 0) 163 break; 164 } 165 166 if (sc == NULL) 167 return (NULL); 168 169 if (cr_cansee(cred, sc->ucred)) 170 return (NULL); 171 172 return (sc); 173 } 174 175 static struct vmmdev_softc * 176 vmmdev_lookup2(struct cdev *cdev) 177 { 178 return (cdev->si_drv1); 179 } 180 181 static int 182 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 183 { 184 int error, off, c, prot; 185 vm_paddr_t gpa, maxaddr; 186 void *hpa, *cookie; 187 struct vmmdev_softc *sc; 188 189 sc = vmmdev_lookup2(cdev); 190 if (sc == NULL) 191 return (ENXIO); 192 193 /* 194 * Get a read lock on the guest memory map. 195 */ 196 vm_slock_memsegs(sc->vm); 197 198 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 199 maxaddr = vmm_sysmem_maxaddr(sc->vm); 200 while (uio->uio_resid > 0 && error == 0) { 201 gpa = uio->uio_offset; 202 off = gpa & PAGE_MASK; 203 c = min(uio->uio_resid, PAGE_SIZE - off); 204 205 /* 206 * The VM has a hole in its physical memory map. If we want to 207 * use 'dd' to inspect memory beyond the hole we need to 208 * provide bogus data for memory that lies in the hole. 209 * 210 * Since this device does not support lseek(2), dd(1) will 211 * read(2) blocks of data to simulate the lseek(2). 212 */ 213 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 214 if (hpa == NULL) { 215 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 216 error = uiomove(__DECONST(void *, zero_region), 217 c, uio); 218 else 219 error = EFAULT; 220 } else { 221 error = uiomove(hpa, c, uio); 222 vm_gpa_release(cookie); 223 } 224 } 225 vm_unlock_memsegs(sc->vm); 226 return (error); 227 } 228 229 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 230 231 static int 232 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 233 { 234 struct devmem_softc *dsc; 235 int error; 236 bool sysmem; 237 238 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 239 if (error || mseg->len == 0) 240 return (error); 241 242 if (!sysmem) { 243 SLIST_FOREACH(dsc, &sc->devmem, link) { 244 if (dsc->segid == mseg->segid) 245 break; 246 } 247 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 248 __func__, mseg->segid)); 249 error = copystr(dsc->name, mseg->name, len, NULL); 250 } else { 251 bzero(mseg->name, len); 252 } 253 254 return (error); 255 } 256 257 static int 258 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 259 { 260 char *name; 261 int error; 262 bool sysmem; 263 264 error = 0; 265 name = NULL; 266 sysmem = true; 267 268 /* 269 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 270 * by stripped off when devfs processes the full string. 271 */ 272 if (VM_MEMSEG_NAME(mseg)) { 273 sysmem = false; 274 name = malloc(len, M_VMMDEV, M_WAITOK); 275 error = copystr(mseg->name, name, len, NULL); 276 if (error) 277 goto done; 278 } 279 280 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 281 if (error) 282 goto done; 283 284 if (VM_MEMSEG_NAME(mseg)) { 285 error = devmem_create_cdev(sc, mseg->segid, name); 286 if (error) 287 vm_free_memseg(sc->vm, mseg->segid); 288 else 289 name = NULL; /* freed when 'cdev' is destroyed */ 290 } 291 done: 292 free(name, M_VMMDEV); 293 return (error); 294 } 295 296 static int 297 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 298 uint64_t *regval) 299 { 300 int error, i; 301 302 error = 0; 303 for (i = 0; i < count; i++) { 304 error = vm_get_register(vcpu, regnum[i], ®val[i]); 305 if (error) 306 break; 307 } 308 return (error); 309 } 310 311 static int 312 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 313 uint64_t *regval) 314 { 315 int error, i; 316 317 error = 0; 318 for (i = 0; i < count; i++) { 319 error = vm_set_register(vcpu, regnum[i], regval[i]); 320 if (error) 321 break; 322 } 323 return (error); 324 } 325 326 static int 327 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 328 { 329 struct vmmdev_softc *sc; 330 int error; 331 332 sc = vmmdev_lookup2(dev); 333 KASSERT(sc != NULL, ("%s: device not found", __func__)); 334 335 /* 336 * A user can only access VMs that they themselves have created. 337 */ 338 if (td->td_ucred != sc->ucred) 339 return (EPERM); 340 341 /* 342 * A jail without vmm access shouldn't be able to access vmm device 343 * files at all, but check here just to be thorough. 344 */ 345 error = vmm_priv_check(td->td_ucred); 346 if (error != 0) 347 return (error); 348 349 return (0); 350 } 351 352 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 353 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 354 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 355 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 356 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 357 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 358 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 359 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 360 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 361 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 362 363 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 364 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 365 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 366 #endif 367 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 368 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 369 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 370 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 371 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 372 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 373 VMMDEV_IOCTL(VM_REINIT, 374 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 375 376 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 377 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 378 #endif 379 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 380 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 381 382 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 383 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 384 385 VMMDEV_IOCTL(VM_SUSPEND, 0), 386 VMMDEV_IOCTL(VM_GET_CPUS, 0), 387 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 388 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 389 }; 390 391 static int 392 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 393 struct thread *td) 394 { 395 struct vmmdev_softc *sc; 396 struct vcpu *vcpu; 397 const struct vmmdev_ioctl *ioctl; 398 int error, vcpuid; 399 400 sc = vmmdev_lookup2(cdev); 401 if (sc == NULL) 402 return (ENXIO); 403 404 ioctl = NULL; 405 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 406 if (vmmdev_ioctls[i].cmd == cmd) { 407 ioctl = &vmmdev_ioctls[i]; 408 break; 409 } 410 } 411 if (ioctl == NULL) { 412 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 413 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 414 ioctl = &vmmdev_machdep_ioctls[i]; 415 break; 416 } 417 } 418 } 419 if (ioctl == NULL) 420 return (ENOTTY); 421 422 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 423 vm_xlock_memsegs(sc->vm); 424 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 425 vm_slock_memsegs(sc->vm); 426 427 vcpu = NULL; 428 vcpuid = -1; 429 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 430 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 431 vcpuid = *(int *)data; 432 if (vcpuid == -1) { 433 if ((ioctl->flags & 434 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 435 error = EINVAL; 436 goto lockfail; 437 } 438 } else { 439 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 440 if (vcpu == NULL) { 441 error = EINVAL; 442 goto lockfail; 443 } 444 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 445 error = vcpu_lock_one(vcpu); 446 if (error) 447 goto lockfail; 448 } 449 } 450 } 451 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 452 error = vcpu_lock_all(sc); 453 if (error) 454 goto lockfail; 455 } 456 457 switch (cmd) { 458 case VM_SUSPEND: { 459 struct vm_suspend *vmsuspend; 460 461 vmsuspend = (struct vm_suspend *)data; 462 error = vm_suspend(sc->vm, vmsuspend->how); 463 break; 464 } 465 case VM_REINIT: 466 error = vm_reinit(sc->vm); 467 break; 468 case VM_STAT_DESC: { 469 struct vm_stat_desc *statdesc; 470 471 statdesc = (struct vm_stat_desc *)data; 472 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 473 sizeof(statdesc->desc)); 474 break; 475 } 476 case VM_STATS: { 477 struct vm_stats *vmstats; 478 479 vmstats = (struct vm_stats *)data; 480 getmicrotime(&vmstats->tv); 481 error = vmm_stat_copy(vcpu, vmstats->index, 482 nitems(vmstats->statbuf), &vmstats->num_entries, 483 vmstats->statbuf); 484 break; 485 } 486 case VM_MMAP_GETNEXT: { 487 struct vm_memmap *mm; 488 489 mm = (struct vm_memmap *)data; 490 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 491 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 492 break; 493 } 494 case VM_MMAP_MEMSEG: { 495 struct vm_memmap *mm; 496 497 mm = (struct vm_memmap *)data; 498 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 499 mm->len, mm->prot, mm->flags); 500 break; 501 } 502 case VM_MUNMAP_MEMSEG: { 503 struct vm_munmap *mu; 504 505 mu = (struct vm_munmap *)data; 506 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 507 break; 508 } 509 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 510 case VM_ALLOC_MEMSEG_12: 511 error = alloc_memseg(sc, (struct vm_memseg *)data, 512 sizeof(((struct vm_memseg_12 *)0)->name)); 513 break; 514 case VM_GET_MEMSEG_12: 515 error = get_memseg(sc, (struct vm_memseg *)data, 516 sizeof(((struct vm_memseg_12 *)0)->name)); 517 break; 518 #endif 519 case VM_ALLOC_MEMSEG: 520 error = alloc_memseg(sc, (struct vm_memseg *)data, 521 sizeof(((struct vm_memseg *)0)->name)); 522 break; 523 case VM_GET_MEMSEG: 524 error = get_memseg(sc, (struct vm_memseg *)data, 525 sizeof(((struct vm_memseg *)0)->name)); 526 break; 527 case VM_GET_REGISTER: { 528 struct vm_register *vmreg; 529 530 vmreg = (struct vm_register *)data; 531 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 532 break; 533 } 534 case VM_SET_REGISTER: { 535 struct vm_register *vmreg; 536 537 vmreg = (struct vm_register *)data; 538 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 539 break; 540 } 541 case VM_GET_REGISTER_SET: { 542 struct vm_register_set *vmregset; 543 uint64_t *regvals; 544 int *regnums; 545 546 vmregset = (struct vm_register_set *)data; 547 if (vmregset->count > VM_REG_LAST) { 548 error = EINVAL; 549 break; 550 } 551 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 552 M_WAITOK); 553 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 554 M_WAITOK); 555 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 556 vmregset->count); 557 if (error == 0) 558 error = vm_get_register_set(vcpu, 559 vmregset->count, regnums, regvals); 560 if (error == 0) 561 error = copyout(regvals, vmregset->regvals, 562 sizeof(regvals[0]) * vmregset->count); 563 free(regvals, M_VMMDEV); 564 free(regnums, M_VMMDEV); 565 break; 566 } 567 case VM_SET_REGISTER_SET: { 568 struct vm_register_set *vmregset; 569 uint64_t *regvals; 570 int *regnums; 571 572 vmregset = (struct vm_register_set *)data; 573 if (vmregset->count > VM_REG_LAST) { 574 error = EINVAL; 575 break; 576 } 577 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 578 M_WAITOK); 579 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 580 M_WAITOK); 581 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 582 vmregset->count); 583 if (error == 0) 584 error = copyin(vmregset->regvals, regvals, 585 sizeof(regvals[0]) * vmregset->count); 586 if (error == 0) 587 error = vm_set_register_set(vcpu, 588 vmregset->count, regnums, regvals); 589 free(regvals, M_VMMDEV); 590 free(regnums, M_VMMDEV); 591 break; 592 } 593 case VM_GET_CAPABILITY: { 594 struct vm_capability *vmcap; 595 596 vmcap = (struct vm_capability *)data; 597 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 598 break; 599 } 600 case VM_SET_CAPABILITY: { 601 struct vm_capability *vmcap; 602 603 vmcap = (struct vm_capability *)data; 604 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 605 break; 606 } 607 case VM_ACTIVATE_CPU: 608 error = vm_activate_cpu(vcpu); 609 break; 610 case VM_GET_CPUS: { 611 struct vm_cpuset *vm_cpuset; 612 cpuset_t *cpuset; 613 int size; 614 615 error = 0; 616 vm_cpuset = (struct vm_cpuset *)data; 617 size = vm_cpuset->cpusetsize; 618 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 619 error = ERANGE; 620 break; 621 } 622 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 623 M_WAITOK | M_ZERO); 624 if (vm_cpuset->which == VM_ACTIVE_CPUS) 625 *cpuset = vm_active_cpus(sc->vm); 626 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 627 *cpuset = vm_suspended_cpus(sc->vm); 628 else if (vm_cpuset->which == VM_DEBUG_CPUS) 629 *cpuset = vm_debug_cpus(sc->vm); 630 else 631 error = EINVAL; 632 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 633 error = ERANGE; 634 if (error == 0) 635 error = copyout(cpuset, vm_cpuset->cpus, size); 636 free(cpuset, M_TEMP); 637 break; 638 } 639 case VM_SUSPEND_CPU: 640 error = vm_suspend_cpu(sc->vm, vcpu); 641 break; 642 case VM_RESUME_CPU: 643 error = vm_resume_cpu(sc->vm, vcpu); 644 break; 645 case VM_SET_TOPOLOGY: { 646 struct vm_cpu_topology *topology; 647 648 topology = (struct vm_cpu_topology *)data; 649 error = vm_set_topology(sc->vm, topology->sockets, 650 topology->cores, topology->threads, topology->maxcpus); 651 break; 652 } 653 case VM_GET_TOPOLOGY: { 654 struct vm_cpu_topology *topology; 655 656 topology = (struct vm_cpu_topology *)data; 657 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 658 &topology->threads, &topology->maxcpus); 659 error = 0; 660 break; 661 } 662 default: 663 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 664 td); 665 break; 666 } 667 668 if ((ioctl->flags & 669 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 670 vm_unlock_memsegs(sc->vm); 671 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 672 vcpu_unlock_all(sc); 673 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 674 vcpu_unlock_one(vcpu); 675 676 /* 677 * Make sure that no handler returns a kernel-internal 678 * error value to userspace. 679 */ 680 KASSERT(error == ERESTART || error >= 0, 681 ("vmmdev_ioctl: invalid error return %d", error)); 682 return (error); 683 684 lockfail: 685 if ((ioctl->flags & 686 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 687 vm_unlock_memsegs(sc->vm); 688 return (error); 689 } 690 691 static int 692 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 693 struct vm_object **objp, int nprot) 694 { 695 struct vmmdev_softc *sc; 696 vm_paddr_t gpa; 697 size_t len; 698 vm_ooffset_t segoff, first, last; 699 int error, found, segid; 700 bool sysmem; 701 702 first = *offset; 703 last = first + mapsize; 704 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 705 return (EINVAL); 706 707 sc = vmmdev_lookup2(cdev); 708 if (sc == NULL) { 709 /* virtual machine is in the process of being created */ 710 return (EINVAL); 711 } 712 713 /* 714 * Get a read lock on the guest memory map. 715 */ 716 vm_slock_memsegs(sc->vm); 717 718 gpa = 0; 719 found = 0; 720 while (!found) { 721 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 722 NULL, NULL); 723 if (error) 724 break; 725 726 if (first >= gpa && last <= gpa + len) 727 found = 1; 728 else 729 gpa += len; 730 } 731 732 if (found) { 733 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 734 KASSERT(error == 0 && *objp != NULL, 735 ("%s: invalid memory segment %d", __func__, segid)); 736 if (sysmem) { 737 vm_object_reference(*objp); 738 *offset = segoff + (first - gpa); 739 } else { 740 error = EINVAL; 741 } 742 } 743 vm_unlock_memsegs(sc->vm); 744 return (error); 745 } 746 747 static void 748 vmmdev_destroy(struct vmmdev_softc *sc) 749 { 750 struct devmem_softc *dsc; 751 int error __diagused; 752 753 /* 754 * Destroy all cdevs: 755 * 756 * - any new operations on the 'cdev' will return an error (ENXIO). 757 * 758 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 759 */ 760 SLIST_FOREACH(dsc, &sc->devmem, link) { 761 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 762 destroy_dev(dsc->cdev); 763 devmem_destroy(dsc); 764 } 765 766 vm_disable_vcpu_creation(sc->vm); 767 error = vcpu_lock_all(sc); 768 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 769 vm_unlock_vcpus(sc->vm); 770 771 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 772 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 773 SLIST_REMOVE_HEAD(&sc->devmem, link); 774 free(dsc->name, M_VMMDEV); 775 free(dsc, M_VMMDEV); 776 } 777 778 if (sc->cdev != NULL) 779 destroy_dev(sc->cdev); 780 781 if (sc->vm != NULL) 782 vm_destroy(sc->vm); 783 784 if (sc->ucred != NULL) 785 crfree(sc->ucred); 786 787 if ((sc->flags & VSC_LINKED) != 0) { 788 mtx_lock(&vmmdev_mtx); 789 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 790 mtx_unlock(&vmmdev_mtx); 791 } 792 793 free(sc, M_VMMDEV); 794 } 795 796 static int 797 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 798 { 799 struct cdev *cdev; 800 struct vmmdev_softc *sc; 801 802 mtx_lock(&vmmdev_mtx); 803 sc = vmmdev_lookup(name, cred); 804 if (sc == NULL || sc->cdev == NULL) { 805 mtx_unlock(&vmmdev_mtx); 806 return (EINVAL); 807 } 808 809 /* 810 * Setting 'sc->cdev' to NULL is used to indicate that the VM 811 * is scheduled for destruction. 812 */ 813 cdev = sc->cdev; 814 sc->cdev = NULL; 815 mtx_unlock(&vmmdev_mtx); 816 817 destroy_dev(cdev); 818 vmmdev_destroy(sc); 819 820 return (0); 821 } 822 823 static int 824 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 825 { 826 char *buf; 827 int error, buflen; 828 829 error = vmm_priv_check(req->td->td_ucred); 830 if (error) 831 return (error); 832 833 buflen = VM_MAX_NAMELEN + 1; 834 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 835 strlcpy(buf, "beavis", buflen); 836 error = sysctl_handle_string(oidp, buf, buflen, req); 837 if (error == 0 && req->newptr != NULL) 838 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 839 free(buf, M_VMMDEV); 840 return (error); 841 } 842 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 843 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 844 NULL, 0, sysctl_vmm_destroy, "A", 845 NULL); 846 847 static struct cdevsw vmmdevsw = { 848 .d_name = "vmmdev", 849 .d_version = D_VERSION, 850 .d_open = vmmdev_open, 851 .d_ioctl = vmmdev_ioctl, 852 .d_mmap_single = vmmdev_mmap_single, 853 .d_read = vmmdev_rw, 854 .d_write = vmmdev_rw, 855 }; 856 857 static struct vmmdev_softc * 858 vmmdev_alloc(struct vm *vm, struct ucred *cred) 859 { 860 struct vmmdev_softc *sc; 861 862 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 863 SLIST_INIT(&sc->devmem); 864 sc->vm = vm; 865 sc->ucred = crhold(cred); 866 return (sc); 867 } 868 869 static int 870 vmmdev_create(const char *name, struct ucred *cred) 871 { 872 struct cdev *cdev; 873 struct vmmdev_softc *sc, *sc2; 874 struct vm *vm; 875 int error; 876 877 mtx_lock(&vmmdev_mtx); 878 sc = vmmdev_lookup(name, cred); 879 mtx_unlock(&vmmdev_mtx); 880 if (sc != NULL) 881 return (EEXIST); 882 883 error = vm_create(name, &vm); 884 if (error != 0) 885 return (error); 886 887 sc = vmmdev_alloc(vm, cred); 888 889 /* 890 * Lookup the name again just in case somebody sneaked in when we 891 * dropped the lock. 892 */ 893 mtx_lock(&vmmdev_mtx); 894 sc2 = vmmdev_lookup(name, cred); 895 if (sc2 != NULL) { 896 mtx_unlock(&vmmdev_mtx); 897 vmmdev_destroy(sc); 898 return (EEXIST); 899 } 900 sc->flags |= VSC_LINKED; 901 SLIST_INSERT_HEAD(&head, sc, link); 902 mtx_unlock(&vmmdev_mtx); 903 904 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 905 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", name); 906 if (error != 0) { 907 vmmdev_destroy(sc); 908 return (error); 909 } 910 911 mtx_lock(&vmmdev_mtx); 912 sc->cdev = cdev; 913 sc->cdev->si_drv1 = sc; 914 mtx_unlock(&vmmdev_mtx); 915 916 return (0); 917 } 918 919 static int 920 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 921 { 922 char *buf; 923 int error, buflen; 924 925 error = vmm_priv_check(req->td->td_ucred); 926 if (error != 0) 927 return (error); 928 929 buflen = VM_MAX_NAMELEN + 1; 930 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 931 strlcpy(buf, "beavis", buflen); 932 error = sysctl_handle_string(oidp, buf, buflen, req); 933 if (error == 0 && req->newptr != NULL) 934 error = vmmdev_create(buf, req->td->td_ucred); 935 free(buf, M_VMMDEV); 936 return (error); 937 } 938 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 939 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 940 NULL, 0, sysctl_vmm_create, "A", 941 NULL); 942 943 void 944 vmmdev_init(void) 945 { 946 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 947 "Allow use of vmm in a jail."); 948 } 949 950 int 951 vmmdev_cleanup(void) 952 { 953 int error; 954 955 if (SLIST_EMPTY(&head)) 956 error = 0; 957 else 958 error = EBUSY; 959 960 return (error); 961 } 962 963 static int 964 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 965 struct vm_object **objp, int nprot) 966 { 967 struct devmem_softc *dsc; 968 vm_ooffset_t first, last; 969 size_t seglen; 970 int error; 971 bool sysmem; 972 973 dsc = cdev->si_drv1; 974 if (dsc == NULL) { 975 /* 'cdev' has been created but is not ready for use */ 976 return (ENXIO); 977 } 978 979 first = *offset; 980 last = *offset + len; 981 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 982 return (EINVAL); 983 984 vm_slock_memsegs(dsc->sc->vm); 985 986 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 987 KASSERT(error == 0 && !sysmem && *objp != NULL, 988 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 989 990 if (seglen >= last) 991 vm_object_reference(*objp); 992 else 993 error = EINVAL; 994 995 vm_unlock_memsegs(dsc->sc->vm); 996 return (error); 997 } 998 999 static struct cdevsw devmemsw = { 1000 .d_name = "devmem", 1001 .d_version = D_VERSION, 1002 .d_mmap_single = devmem_mmap_single, 1003 }; 1004 1005 static int 1006 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1007 { 1008 struct devmem_softc *dsc; 1009 struct cdev *cdev; 1010 const char *vmname; 1011 int error; 1012 1013 vmname = vm_name(sc->vm); 1014 1015 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, sc->ucred, 1016 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1017 if (error) 1018 return (error); 1019 1020 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1021 1022 mtx_lock(&vmmdev_mtx); 1023 if (sc->cdev == NULL) { 1024 /* virtual machine is being created or destroyed */ 1025 mtx_unlock(&vmmdev_mtx); 1026 free(dsc, M_VMMDEV); 1027 destroy_dev_sched_cb(cdev, NULL, 0); 1028 return (ENODEV); 1029 } 1030 1031 dsc->segid = segid; 1032 dsc->name = devname; 1033 dsc->cdev = cdev; 1034 dsc->sc = sc; 1035 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1036 mtx_unlock(&vmmdev_mtx); 1037 1038 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1039 cdev->si_drv1 = dsc; 1040 return (0); 1041 } 1042 1043 static void 1044 devmem_destroy(void *arg) 1045 { 1046 struct devmem_softc *dsc = arg; 1047 1048 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1049 dsc->cdev = NULL; 1050 dsc->sc = NULL; 1051 } 1052