1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/fcntl.h> 12 #include <sys/ioccom.h> 13 #include <sys/jail.h> 14 #include <sys/kernel.h> 15 #include <sys/malloc.h> 16 #include <sys/mman.h> 17 #include <sys/proc.h> 18 #include <sys/queue.h> 19 #include <sys/sx.h> 20 #include <sys/sysctl.h> 21 #include <sys/ucred.h> 22 #include <sys/uio.h> 23 24 #include <machine/vmm.h> 25 26 #include <vm/vm.h> 27 #include <vm/vm_object.h> 28 29 #include <dev/vmm/vmm_dev.h> 30 #include <dev/vmm/vmm_mem.h> 31 #include <dev/vmm/vmm_stat.h> 32 33 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 34 struct vm_memseg_12 { 35 int segid; 36 size_t len; 37 char name[64]; 38 }; 39 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 40 41 #define VM_ALLOC_MEMSEG_12 \ 42 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 43 #define VM_GET_MEMSEG_12 \ 44 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 45 #endif 46 47 struct devmem_softc { 48 int segid; 49 char *name; 50 struct cdev *cdev; 51 struct vmmdev_softc *sc; 52 SLIST_ENTRY(devmem_softc) link; 53 }; 54 55 struct vmmdev_softc { 56 struct vm *vm; /* vm instance cookie */ 57 struct cdev *cdev; 58 struct ucred *ucred; 59 SLIST_ENTRY(vmmdev_softc) link; 60 SLIST_HEAD(, devmem_softc) devmem; 61 int flags; 62 }; 63 64 static SLIST_HEAD(, vmmdev_softc) head; 65 66 static unsigned pr_allow_flag; 67 static struct sx vmmdev_mtx; 68 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 69 70 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 71 72 SYSCTL_DECL(_hw_vmm); 73 74 static void devmem_destroy(void *arg); 75 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 76 77 static int 78 vmm_priv_check(struct ucred *ucred) 79 { 80 if (jailed(ucred) && 81 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 82 return (EPERM); 83 84 return (0); 85 } 86 87 static int 88 vcpu_lock_one(struct vcpu *vcpu) 89 { 90 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 91 } 92 93 static void 94 vcpu_unlock_one(struct vcpu *vcpu) 95 { 96 enum vcpu_state state; 97 98 state = vcpu_get_state(vcpu, NULL); 99 if (state != VCPU_FROZEN) { 100 panic("vcpu %s(%d) has invalid state %d", 101 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 102 } 103 104 vcpu_set_state(vcpu, VCPU_IDLE, false); 105 } 106 107 static int 108 vcpu_lock_all(struct vmmdev_softc *sc) 109 { 110 struct vcpu *vcpu; 111 int error; 112 uint16_t i, j, maxcpus; 113 114 error = 0; 115 vm_slock_vcpus(sc->vm); 116 maxcpus = vm_get_maxcpus(sc->vm); 117 for (i = 0; i < maxcpus; i++) { 118 vcpu = vm_vcpu(sc->vm, i); 119 if (vcpu == NULL) 120 continue; 121 error = vcpu_lock_one(vcpu); 122 if (error) 123 break; 124 } 125 126 if (error) { 127 for (j = 0; j < i; j++) { 128 vcpu = vm_vcpu(sc->vm, j); 129 if (vcpu == NULL) 130 continue; 131 vcpu_unlock_one(vcpu); 132 } 133 vm_unlock_vcpus(sc->vm); 134 } 135 136 return (error); 137 } 138 139 static void 140 vcpu_unlock_all(struct vmmdev_softc *sc) 141 { 142 struct vcpu *vcpu; 143 uint16_t i, maxcpus; 144 145 maxcpus = vm_get_maxcpus(sc->vm); 146 for (i = 0; i < maxcpus; i++) { 147 vcpu = vm_vcpu(sc->vm, i); 148 if (vcpu == NULL) 149 continue; 150 vcpu_unlock_one(vcpu); 151 } 152 vm_unlock_vcpus(sc->vm); 153 } 154 155 static struct vmmdev_softc * 156 vmmdev_lookup(const char *name, struct ucred *cred) 157 { 158 struct vmmdev_softc *sc; 159 160 sx_assert(&vmmdev_mtx, SA_XLOCKED); 161 162 SLIST_FOREACH(sc, &head, link) { 163 if (strcmp(name, vm_name(sc->vm)) == 0) 164 break; 165 } 166 167 if (sc == NULL) 168 return (NULL); 169 170 if (cr_cansee(cred, sc->ucred)) 171 return (NULL); 172 173 return (sc); 174 } 175 176 static struct vmmdev_softc * 177 vmmdev_lookup2(struct cdev *cdev) 178 { 179 return (cdev->si_drv1); 180 } 181 182 static int 183 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 184 { 185 int error, off, c, prot; 186 vm_paddr_t gpa, maxaddr; 187 void *hpa, *cookie; 188 struct vmmdev_softc *sc; 189 190 sc = vmmdev_lookup2(cdev); 191 if (sc == NULL) 192 return (ENXIO); 193 194 /* 195 * Get a read lock on the guest memory map. 196 */ 197 vm_slock_memsegs(sc->vm); 198 199 error = 0; 200 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 201 maxaddr = vmm_sysmem_maxaddr(sc->vm); 202 while (uio->uio_resid > 0 && error == 0) { 203 gpa = uio->uio_offset; 204 off = gpa & PAGE_MASK; 205 c = min(uio->uio_resid, PAGE_SIZE - off); 206 207 /* 208 * The VM has a hole in its physical memory map. If we want to 209 * use 'dd' to inspect memory beyond the hole we need to 210 * provide bogus data for memory that lies in the hole. 211 * 212 * Since this device does not support lseek(2), dd(1) will 213 * read(2) blocks of data to simulate the lseek(2). 214 */ 215 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 216 if (hpa == NULL) { 217 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 218 error = uiomove(__DECONST(void *, zero_region), 219 c, uio); 220 else 221 error = EFAULT; 222 } else { 223 error = uiomove(hpa, c, uio); 224 vm_gpa_release(cookie); 225 } 226 } 227 vm_unlock_memsegs(sc->vm); 228 return (error); 229 } 230 231 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 232 233 static int 234 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 235 { 236 struct devmem_softc *dsc; 237 int error; 238 bool sysmem; 239 240 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 241 if (error || mseg->len == 0) 242 return (error); 243 244 if (!sysmem) { 245 SLIST_FOREACH(dsc, &sc->devmem, link) { 246 if (dsc->segid == mseg->segid) 247 break; 248 } 249 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 250 __func__, mseg->segid)); 251 error = copystr(dsc->name, mseg->name, len, NULL); 252 } else { 253 bzero(mseg->name, len); 254 } 255 256 return (error); 257 } 258 259 static int 260 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 261 { 262 char *name; 263 int error; 264 bool sysmem; 265 266 error = 0; 267 name = NULL; 268 sysmem = true; 269 270 /* 271 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 272 * by stripped off when devfs processes the full string. 273 */ 274 if (VM_MEMSEG_NAME(mseg)) { 275 sysmem = false; 276 name = malloc(len, M_VMMDEV, M_WAITOK); 277 error = copystr(mseg->name, name, len, NULL); 278 if (error) 279 goto done; 280 } 281 282 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 283 if (error) 284 goto done; 285 286 if (VM_MEMSEG_NAME(mseg)) { 287 error = devmem_create_cdev(sc, mseg->segid, name); 288 if (error) 289 vm_free_memseg(sc->vm, mseg->segid); 290 else 291 name = NULL; /* freed when 'cdev' is destroyed */ 292 } 293 done: 294 free(name, M_VMMDEV); 295 return (error); 296 } 297 298 static int 299 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 300 uint64_t *regval) 301 { 302 int error, i; 303 304 error = 0; 305 for (i = 0; i < count; i++) { 306 error = vm_get_register(vcpu, regnum[i], ®val[i]); 307 if (error) 308 break; 309 } 310 return (error); 311 } 312 313 static int 314 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 315 uint64_t *regval) 316 { 317 int error, i; 318 319 error = 0; 320 for (i = 0; i < count; i++) { 321 error = vm_set_register(vcpu, regnum[i], regval[i]); 322 if (error) 323 break; 324 } 325 return (error); 326 } 327 328 static int 329 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 330 { 331 int error; 332 333 /* 334 * A jail without vmm access shouldn't be able to access vmm device 335 * files at all, but check here just to be thorough. 336 */ 337 error = vmm_priv_check(td->td_ucred); 338 if (error != 0) 339 return (error); 340 341 return (0); 342 } 343 344 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 345 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 346 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 347 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 348 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 349 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 350 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 351 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 352 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 353 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 354 355 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 356 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 357 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 358 #endif 359 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 360 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 361 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 362 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 363 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 364 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 365 VMMDEV_IOCTL(VM_REINIT, 366 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 367 368 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 369 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 370 #endif 371 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 372 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 373 374 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 375 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 376 377 VMMDEV_IOCTL(VM_SUSPEND, 0), 378 VMMDEV_IOCTL(VM_GET_CPUS, 0), 379 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 380 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 381 }; 382 383 static int 384 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 385 struct thread *td) 386 { 387 struct vmmdev_softc *sc; 388 struct vcpu *vcpu; 389 const struct vmmdev_ioctl *ioctl; 390 int error, vcpuid; 391 392 sc = vmmdev_lookup2(cdev); 393 if (sc == NULL) 394 return (ENXIO); 395 396 ioctl = NULL; 397 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 398 if (vmmdev_ioctls[i].cmd == cmd) { 399 ioctl = &vmmdev_ioctls[i]; 400 break; 401 } 402 } 403 if (ioctl == NULL) { 404 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 405 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 406 ioctl = &vmmdev_machdep_ioctls[i]; 407 break; 408 } 409 } 410 } 411 if (ioctl == NULL) 412 return (ENOTTY); 413 414 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 415 vm_xlock_memsegs(sc->vm); 416 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 417 vm_slock_memsegs(sc->vm); 418 419 vcpu = NULL; 420 vcpuid = -1; 421 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 422 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 423 vcpuid = *(int *)data; 424 if (vcpuid == -1) { 425 if ((ioctl->flags & 426 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 427 error = EINVAL; 428 goto lockfail; 429 } 430 } else { 431 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 432 if (vcpu == NULL) { 433 error = EINVAL; 434 goto lockfail; 435 } 436 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 437 error = vcpu_lock_one(vcpu); 438 if (error) 439 goto lockfail; 440 } 441 } 442 } 443 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 444 error = vcpu_lock_all(sc); 445 if (error) 446 goto lockfail; 447 } 448 449 switch (cmd) { 450 case VM_SUSPEND: { 451 struct vm_suspend *vmsuspend; 452 453 vmsuspend = (struct vm_suspend *)data; 454 error = vm_suspend(sc->vm, vmsuspend->how); 455 break; 456 } 457 case VM_REINIT: 458 error = vm_reinit(sc->vm); 459 break; 460 case VM_STAT_DESC: { 461 struct vm_stat_desc *statdesc; 462 463 statdesc = (struct vm_stat_desc *)data; 464 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 465 sizeof(statdesc->desc)); 466 break; 467 } 468 case VM_STATS: { 469 struct vm_stats *vmstats; 470 471 vmstats = (struct vm_stats *)data; 472 getmicrotime(&vmstats->tv); 473 error = vmm_stat_copy(vcpu, vmstats->index, 474 nitems(vmstats->statbuf), &vmstats->num_entries, 475 vmstats->statbuf); 476 break; 477 } 478 case VM_MMAP_GETNEXT: { 479 struct vm_memmap *mm; 480 481 mm = (struct vm_memmap *)data; 482 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 483 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 484 break; 485 } 486 case VM_MMAP_MEMSEG: { 487 struct vm_memmap *mm; 488 489 mm = (struct vm_memmap *)data; 490 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 491 mm->len, mm->prot, mm->flags); 492 break; 493 } 494 case VM_MUNMAP_MEMSEG: { 495 struct vm_munmap *mu; 496 497 mu = (struct vm_munmap *)data; 498 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 499 break; 500 } 501 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 502 case VM_ALLOC_MEMSEG_12: 503 error = alloc_memseg(sc, (struct vm_memseg *)data, 504 sizeof(((struct vm_memseg_12 *)0)->name)); 505 break; 506 case VM_GET_MEMSEG_12: 507 error = get_memseg(sc, (struct vm_memseg *)data, 508 sizeof(((struct vm_memseg_12 *)0)->name)); 509 break; 510 #endif 511 case VM_ALLOC_MEMSEG: 512 error = alloc_memseg(sc, (struct vm_memseg *)data, 513 sizeof(((struct vm_memseg *)0)->name)); 514 break; 515 case VM_GET_MEMSEG: 516 error = get_memseg(sc, (struct vm_memseg *)data, 517 sizeof(((struct vm_memseg *)0)->name)); 518 break; 519 case VM_GET_REGISTER: { 520 struct vm_register *vmreg; 521 522 vmreg = (struct vm_register *)data; 523 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 524 break; 525 } 526 case VM_SET_REGISTER: { 527 struct vm_register *vmreg; 528 529 vmreg = (struct vm_register *)data; 530 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 531 break; 532 } 533 case VM_GET_REGISTER_SET: { 534 struct vm_register_set *vmregset; 535 uint64_t *regvals; 536 int *regnums; 537 538 vmregset = (struct vm_register_set *)data; 539 if (vmregset->count > VM_REG_LAST) { 540 error = EINVAL; 541 break; 542 } 543 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 544 M_WAITOK); 545 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 546 M_WAITOK); 547 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 548 vmregset->count); 549 if (error == 0) 550 error = vm_get_register_set(vcpu, 551 vmregset->count, regnums, regvals); 552 if (error == 0) 553 error = copyout(regvals, vmregset->regvals, 554 sizeof(regvals[0]) * vmregset->count); 555 free(regvals, M_VMMDEV); 556 free(regnums, M_VMMDEV); 557 break; 558 } 559 case VM_SET_REGISTER_SET: { 560 struct vm_register_set *vmregset; 561 uint64_t *regvals; 562 int *regnums; 563 564 vmregset = (struct vm_register_set *)data; 565 if (vmregset->count > VM_REG_LAST) { 566 error = EINVAL; 567 break; 568 } 569 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 570 M_WAITOK); 571 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 572 M_WAITOK); 573 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 574 vmregset->count); 575 if (error == 0) 576 error = copyin(vmregset->regvals, regvals, 577 sizeof(regvals[0]) * vmregset->count); 578 if (error == 0) 579 error = vm_set_register_set(vcpu, 580 vmregset->count, regnums, regvals); 581 free(regvals, M_VMMDEV); 582 free(regnums, M_VMMDEV); 583 break; 584 } 585 case VM_GET_CAPABILITY: { 586 struct vm_capability *vmcap; 587 588 vmcap = (struct vm_capability *)data; 589 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 590 break; 591 } 592 case VM_SET_CAPABILITY: { 593 struct vm_capability *vmcap; 594 595 vmcap = (struct vm_capability *)data; 596 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 597 break; 598 } 599 case VM_ACTIVATE_CPU: 600 error = vm_activate_cpu(vcpu); 601 break; 602 case VM_GET_CPUS: { 603 struct vm_cpuset *vm_cpuset; 604 cpuset_t *cpuset; 605 int size; 606 607 error = 0; 608 vm_cpuset = (struct vm_cpuset *)data; 609 size = vm_cpuset->cpusetsize; 610 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 611 error = ERANGE; 612 break; 613 } 614 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 615 M_WAITOK | M_ZERO); 616 if (vm_cpuset->which == VM_ACTIVE_CPUS) 617 *cpuset = vm_active_cpus(sc->vm); 618 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 619 *cpuset = vm_suspended_cpus(sc->vm); 620 else if (vm_cpuset->which == VM_DEBUG_CPUS) 621 *cpuset = vm_debug_cpus(sc->vm); 622 else 623 error = EINVAL; 624 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 625 error = ERANGE; 626 if (error == 0) 627 error = copyout(cpuset, vm_cpuset->cpus, size); 628 free(cpuset, M_TEMP); 629 break; 630 } 631 case VM_SUSPEND_CPU: 632 error = vm_suspend_cpu(sc->vm, vcpu); 633 break; 634 case VM_RESUME_CPU: 635 error = vm_resume_cpu(sc->vm, vcpu); 636 break; 637 case VM_SET_TOPOLOGY: { 638 struct vm_cpu_topology *topology; 639 640 topology = (struct vm_cpu_topology *)data; 641 error = vm_set_topology(sc->vm, topology->sockets, 642 topology->cores, topology->threads, topology->maxcpus); 643 break; 644 } 645 case VM_GET_TOPOLOGY: { 646 struct vm_cpu_topology *topology; 647 648 topology = (struct vm_cpu_topology *)data; 649 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 650 &topology->threads, &topology->maxcpus); 651 error = 0; 652 break; 653 } 654 default: 655 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 656 td); 657 break; 658 } 659 660 if ((ioctl->flags & 661 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 662 vm_unlock_memsegs(sc->vm); 663 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 664 vcpu_unlock_all(sc); 665 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 666 vcpu_unlock_one(vcpu); 667 668 /* 669 * Make sure that no handler returns a kernel-internal 670 * error value to userspace. 671 */ 672 KASSERT(error == ERESTART || error >= 0, 673 ("vmmdev_ioctl: invalid error return %d", error)); 674 return (error); 675 676 lockfail: 677 if ((ioctl->flags & 678 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 679 vm_unlock_memsegs(sc->vm); 680 return (error); 681 } 682 683 static int 684 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 685 struct vm_object **objp, int nprot) 686 { 687 struct vmmdev_softc *sc; 688 vm_paddr_t gpa; 689 size_t len; 690 vm_ooffset_t segoff, first, last; 691 int error, found, segid; 692 bool sysmem; 693 694 first = *offset; 695 last = first + mapsize; 696 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 697 return (EINVAL); 698 699 sc = vmmdev_lookup2(cdev); 700 if (sc == NULL) { 701 /* virtual machine is in the process of being created */ 702 return (EINVAL); 703 } 704 705 /* 706 * Get a read lock on the guest memory map. 707 */ 708 vm_slock_memsegs(sc->vm); 709 710 gpa = 0; 711 found = 0; 712 while (!found) { 713 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 714 NULL, NULL); 715 if (error) 716 break; 717 718 if (first >= gpa && last <= gpa + len) 719 found = 1; 720 else 721 gpa += len; 722 } 723 724 if (found) { 725 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 726 KASSERT(error == 0 && *objp != NULL, 727 ("%s: invalid memory segment %d", __func__, segid)); 728 if (sysmem) { 729 vm_object_reference(*objp); 730 *offset = segoff + (first - gpa); 731 } else { 732 error = EINVAL; 733 } 734 } 735 vm_unlock_memsegs(sc->vm); 736 return (error); 737 } 738 739 static void 740 vmmdev_destroy(struct vmmdev_softc *sc) 741 { 742 struct devmem_softc *dsc; 743 int error __diagused; 744 745 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 746 747 /* 748 * Destroy all cdevs: 749 * 750 * - any new operations on the 'cdev' will return an error (ENXIO). 751 * 752 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 753 */ 754 SLIST_FOREACH(dsc, &sc->devmem, link) { 755 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 756 devmem_destroy(dsc); 757 } 758 759 vm_disable_vcpu_creation(sc->vm); 760 error = vcpu_lock_all(sc); 761 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 762 vm_unlock_vcpus(sc->vm); 763 764 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 765 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 766 SLIST_REMOVE_HEAD(&sc->devmem, link); 767 free(dsc->name, M_VMMDEV); 768 free(dsc, M_VMMDEV); 769 } 770 771 if (sc->vm != NULL) 772 vm_destroy(sc->vm); 773 774 if (sc->ucred != NULL) 775 crfree(sc->ucred); 776 777 sx_xlock(&vmmdev_mtx); 778 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 779 sx_xunlock(&vmmdev_mtx); 780 free(sc, M_VMMDEV); 781 } 782 783 static int 784 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 785 { 786 struct cdev *cdev; 787 struct vmmdev_softc *sc; 788 789 sx_xlock(&vmmdev_mtx); 790 sc = vmmdev_lookup(name, cred); 791 if (sc == NULL || sc->cdev == NULL) { 792 sx_xunlock(&vmmdev_mtx); 793 return (EINVAL); 794 } 795 796 /* 797 * Setting 'sc->cdev' to NULL is used to indicate that the VM 798 * is scheduled for destruction. 799 */ 800 cdev = sc->cdev; 801 sc->cdev = NULL; 802 sx_xunlock(&vmmdev_mtx); 803 804 destroy_dev(cdev); 805 vmmdev_destroy(sc); 806 807 return (0); 808 } 809 810 static int 811 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 812 { 813 char *buf; 814 int error, buflen; 815 816 error = vmm_priv_check(req->td->td_ucred); 817 if (error) 818 return (error); 819 820 buflen = VM_MAX_NAMELEN + 1; 821 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 822 strlcpy(buf, "beavis", buflen); 823 error = sysctl_handle_string(oidp, buf, buflen, req); 824 if (error == 0 && req->newptr != NULL) 825 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 826 free(buf, M_VMMDEV); 827 return (error); 828 } 829 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 830 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 831 NULL, 0, sysctl_vmm_destroy, "A", 832 NULL); 833 834 static struct cdevsw vmmdevsw = { 835 .d_name = "vmmdev", 836 .d_version = D_VERSION, 837 .d_open = vmmdev_open, 838 .d_ioctl = vmmdev_ioctl, 839 .d_mmap_single = vmmdev_mmap_single, 840 .d_read = vmmdev_rw, 841 .d_write = vmmdev_rw, 842 }; 843 844 static struct vmmdev_softc * 845 vmmdev_alloc(struct vm *vm, struct ucred *cred) 846 { 847 struct vmmdev_softc *sc; 848 849 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 850 SLIST_INIT(&sc->devmem); 851 sc->vm = vm; 852 sc->ucred = crhold(cred); 853 return (sc); 854 } 855 856 static int 857 vmmdev_create(const char *name, struct ucred *cred) 858 { 859 struct make_dev_args mda; 860 struct cdev *cdev; 861 struct vmmdev_softc *sc; 862 struct vm *vm; 863 int error; 864 865 sx_xlock(&vmmdev_mtx); 866 sc = vmmdev_lookup(name, cred); 867 if (sc != NULL) { 868 sx_xunlock(&vmmdev_mtx); 869 return (EEXIST); 870 } 871 872 error = vm_create(name, &vm); 873 if (error != 0) { 874 sx_xunlock(&vmmdev_mtx); 875 return (error); 876 } 877 sc = vmmdev_alloc(vm, cred); 878 SLIST_INSERT_HEAD(&head, sc, link); 879 880 make_dev_args_init(&mda); 881 mda.mda_devsw = &vmmdevsw; 882 mda.mda_cr = sc->ucred; 883 mda.mda_uid = UID_ROOT; 884 mda.mda_gid = GID_WHEEL; 885 mda.mda_mode = 0600; 886 mda.mda_si_drv1 = sc; 887 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 888 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 889 if (error != 0) { 890 sx_xunlock(&vmmdev_mtx); 891 vmmdev_destroy(sc); 892 return (error); 893 } 894 sc->cdev = cdev; 895 sx_xunlock(&vmmdev_mtx); 896 return (0); 897 } 898 899 static int 900 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 901 { 902 char *buf; 903 int error, buflen; 904 905 error = vmm_priv_check(req->td->td_ucred); 906 if (error != 0) 907 return (error); 908 909 buflen = VM_MAX_NAMELEN + 1; 910 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 911 strlcpy(buf, "beavis", buflen); 912 error = sysctl_handle_string(oidp, buf, buflen, req); 913 if (error == 0 && req->newptr != NULL) 914 error = vmmdev_create(buf, req->td->td_ucred); 915 free(buf, M_VMMDEV); 916 return (error); 917 } 918 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 919 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 920 NULL, 0, sysctl_vmm_create, "A", 921 NULL); 922 923 static int 924 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) 925 { 926 int error; 927 928 error = vmm_priv_check(td->td_ucred); 929 if (error != 0) 930 return (error); 931 932 if ((flags & FWRITE) == 0) 933 return (EPERM); 934 935 return (0); 936 } 937 938 static int 939 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 940 struct thread *td) 941 { 942 int error; 943 944 switch (cmd) { 945 case VMMCTL_VM_CREATE: { 946 struct vmmctl_vm_create *vmc; 947 948 vmc = (struct vmmctl_vm_create *)data; 949 vmc->name[VM_MAX_NAMELEN] = '\0'; 950 for (size_t i = 0; i < nitems(vmc->reserved); i++) { 951 if (vmc->reserved[i] != 0) { 952 error = EINVAL; 953 return (error); 954 } 955 } 956 957 error = vmmdev_create(vmc->name, td->td_ucred); 958 break; 959 } 960 case VMMCTL_VM_DESTROY: { 961 struct vmmctl_vm_destroy *vmd; 962 963 vmd = (struct vmmctl_vm_destroy *)data; 964 vmd->name[VM_MAX_NAMELEN] = '\0'; 965 for (size_t i = 0; i < nitems(vmd->reserved); i++) { 966 if (vmd->reserved[i] != 0) { 967 error = EINVAL; 968 return (error); 969 } 970 } 971 972 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred); 973 break; 974 } 975 default: 976 error = ENOTTY; 977 break; 978 } 979 980 return (error); 981 } 982 983 static struct cdev *vmmctl_cdev; 984 static struct cdevsw vmmctlsw = { 985 .d_name = "vmmctl", 986 .d_version = D_VERSION, 987 .d_open = vmmctl_open, 988 .d_ioctl = vmmctl_ioctl, 989 }; 990 991 int 992 vmmdev_init(void) 993 { 994 int error; 995 996 sx_xlock(&vmmdev_mtx); 997 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL, 998 UID_ROOT, GID_WHEEL, 0600, "vmmctl"); 999 if (error == 0) 1000 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1001 "Allow use of vmm in a jail."); 1002 sx_xunlock(&vmmdev_mtx); 1003 1004 return (error); 1005 } 1006 1007 int 1008 vmmdev_cleanup(void) 1009 { 1010 sx_xlock(&vmmdev_mtx); 1011 if (!SLIST_EMPTY(&head)) { 1012 sx_xunlock(&vmmdev_mtx); 1013 return (EBUSY); 1014 } 1015 if (vmmctl_cdev != NULL) { 1016 destroy_dev(vmmctl_cdev); 1017 vmmctl_cdev = NULL; 1018 } 1019 sx_xunlock(&vmmdev_mtx); 1020 1021 return (0); 1022 } 1023 1024 static int 1025 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1026 struct vm_object **objp, int nprot) 1027 { 1028 struct devmem_softc *dsc; 1029 vm_ooffset_t first, last; 1030 size_t seglen; 1031 int error; 1032 bool sysmem; 1033 1034 dsc = cdev->si_drv1; 1035 if (dsc == NULL) { 1036 /* 'cdev' has been created but is not ready for use */ 1037 return (ENXIO); 1038 } 1039 1040 first = *offset; 1041 last = *offset + len; 1042 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1043 return (EINVAL); 1044 1045 vm_slock_memsegs(dsc->sc->vm); 1046 1047 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1048 KASSERT(error == 0 && !sysmem && *objp != NULL, 1049 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1050 1051 if (seglen >= last) 1052 vm_object_reference(*objp); 1053 else 1054 error = EINVAL; 1055 1056 vm_unlock_memsegs(dsc->sc->vm); 1057 return (error); 1058 } 1059 1060 static struct cdevsw devmemsw = { 1061 .d_name = "devmem", 1062 .d_version = D_VERSION, 1063 .d_mmap_single = devmem_mmap_single, 1064 }; 1065 1066 static int 1067 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1068 { 1069 struct make_dev_args mda; 1070 struct devmem_softc *dsc; 1071 int error; 1072 1073 sx_xlock(&vmmdev_mtx); 1074 1075 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1076 dsc->segid = segid; 1077 dsc->name = devname; 1078 dsc->sc = sc; 1079 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1080 1081 make_dev_args_init(&mda); 1082 mda.mda_devsw = &devmemsw; 1083 mda.mda_cr = sc->ucred; 1084 mda.mda_uid = UID_ROOT; 1085 mda.mda_gid = GID_WHEEL; 1086 mda.mda_mode = 0600; 1087 mda.mda_si_drv1 = dsc; 1088 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1089 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1090 devname); 1091 if (error != 0) { 1092 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1093 free(dsc->name, M_VMMDEV); 1094 free(dsc, M_VMMDEV); 1095 } 1096 1097 sx_xunlock(&vmmdev_mtx); 1098 1099 return (error); 1100 } 1101 1102 static void 1103 devmem_destroy(void *arg) 1104 { 1105 struct devmem_softc *dsc = arg; 1106 1107 destroy_dev(dsc->cdev); 1108 dsc->cdev = NULL; 1109 dsc->sc = NULL; 1110 } 1111