1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/ioccom.h> 12 #include <sys/jail.h> 13 #include <sys/kernel.h> 14 #include <sys/malloc.h> 15 #include <sys/mman.h> 16 #include <sys/proc.h> 17 #include <sys/queue.h> 18 #include <sys/sx.h> 19 #include <sys/sysctl.h> 20 #include <sys/ucred.h> 21 #include <sys/uio.h> 22 23 #include <machine/vmm.h> 24 25 #include <vm/vm.h> 26 #include <vm/vm_object.h> 27 28 #include <dev/vmm/vmm_dev.h> 29 #include <dev/vmm/vmm_stat.h> 30 31 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 32 struct vm_memseg_12 { 33 int segid; 34 size_t len; 35 char name[64]; 36 }; 37 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 38 39 #define VM_ALLOC_MEMSEG_12 \ 40 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 41 #define VM_GET_MEMSEG_12 \ 42 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 43 #endif 44 45 struct devmem_softc { 46 int segid; 47 char *name; 48 struct cdev *cdev; 49 struct vmmdev_softc *sc; 50 SLIST_ENTRY(devmem_softc) link; 51 }; 52 53 struct vmmdev_softc { 54 struct vm *vm; /* vm instance cookie */ 55 struct cdev *cdev; 56 struct ucred *ucred; 57 SLIST_ENTRY(vmmdev_softc) link; 58 SLIST_HEAD(, devmem_softc) devmem; 59 int flags; 60 }; 61 62 static SLIST_HEAD(, vmmdev_softc) head; 63 64 static unsigned pr_allow_flag; 65 static struct sx vmmdev_mtx; 66 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 67 68 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 69 70 SYSCTL_DECL(_hw_vmm); 71 72 static void devmem_destroy(void *arg); 73 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 74 75 static int 76 vmm_priv_check(struct ucred *ucred) 77 { 78 if (jailed(ucred) && 79 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 80 return (EPERM); 81 82 return (0); 83 } 84 85 static int 86 vcpu_lock_one(struct vcpu *vcpu) 87 { 88 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 89 } 90 91 static void 92 vcpu_unlock_one(struct vcpu *vcpu) 93 { 94 enum vcpu_state state; 95 96 state = vcpu_get_state(vcpu, NULL); 97 if (state != VCPU_FROZEN) { 98 panic("vcpu %s(%d) has invalid state %d", 99 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 100 } 101 102 vcpu_set_state(vcpu, VCPU_IDLE, false); 103 } 104 105 static int 106 vcpu_lock_all(struct vmmdev_softc *sc) 107 { 108 struct vcpu *vcpu; 109 int error; 110 uint16_t i, j, maxcpus; 111 112 error = 0; 113 vm_slock_vcpus(sc->vm); 114 maxcpus = vm_get_maxcpus(sc->vm); 115 for (i = 0; i < maxcpus; i++) { 116 vcpu = vm_vcpu(sc->vm, i); 117 if (vcpu == NULL) 118 continue; 119 error = vcpu_lock_one(vcpu); 120 if (error) 121 break; 122 } 123 124 if (error) { 125 for (j = 0; j < i; j++) { 126 vcpu = vm_vcpu(sc->vm, j); 127 if (vcpu == NULL) 128 continue; 129 vcpu_unlock_one(vcpu); 130 } 131 vm_unlock_vcpus(sc->vm); 132 } 133 134 return (error); 135 } 136 137 static void 138 vcpu_unlock_all(struct vmmdev_softc *sc) 139 { 140 struct vcpu *vcpu; 141 uint16_t i, maxcpus; 142 143 maxcpus = vm_get_maxcpus(sc->vm); 144 for (i = 0; i < maxcpus; i++) { 145 vcpu = vm_vcpu(sc->vm, i); 146 if (vcpu == NULL) 147 continue; 148 vcpu_unlock_one(vcpu); 149 } 150 vm_unlock_vcpus(sc->vm); 151 } 152 153 static struct vmmdev_softc * 154 vmmdev_lookup(const char *name, struct ucred *cred) 155 { 156 struct vmmdev_softc *sc; 157 158 sx_assert(&vmmdev_mtx, SA_XLOCKED); 159 160 SLIST_FOREACH(sc, &head, link) { 161 if (strcmp(name, vm_name(sc->vm)) == 0) 162 break; 163 } 164 165 if (sc == NULL) 166 return (NULL); 167 168 if (cr_cansee(cred, sc->ucred)) 169 return (NULL); 170 171 return (sc); 172 } 173 174 static struct vmmdev_softc * 175 vmmdev_lookup2(struct cdev *cdev) 176 { 177 return (cdev->si_drv1); 178 } 179 180 static int 181 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 182 { 183 int error, off, c, prot; 184 vm_paddr_t gpa, maxaddr; 185 void *hpa, *cookie; 186 struct vmmdev_softc *sc; 187 188 sc = vmmdev_lookup2(cdev); 189 if (sc == NULL) 190 return (ENXIO); 191 192 /* 193 * Get a read lock on the guest memory map. 194 */ 195 vm_slock_memsegs(sc->vm); 196 197 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 198 maxaddr = vmm_sysmem_maxaddr(sc->vm); 199 while (uio->uio_resid > 0 && error == 0) { 200 gpa = uio->uio_offset; 201 off = gpa & PAGE_MASK; 202 c = min(uio->uio_resid, PAGE_SIZE - off); 203 204 /* 205 * The VM has a hole in its physical memory map. If we want to 206 * use 'dd' to inspect memory beyond the hole we need to 207 * provide bogus data for memory that lies in the hole. 208 * 209 * Since this device does not support lseek(2), dd(1) will 210 * read(2) blocks of data to simulate the lseek(2). 211 */ 212 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 213 if (hpa == NULL) { 214 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 215 error = uiomove(__DECONST(void *, zero_region), 216 c, uio); 217 else 218 error = EFAULT; 219 } else { 220 error = uiomove(hpa, c, uio); 221 vm_gpa_release(cookie); 222 } 223 } 224 vm_unlock_memsegs(sc->vm); 225 return (error); 226 } 227 228 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 229 230 static int 231 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 232 { 233 struct devmem_softc *dsc; 234 int error; 235 bool sysmem; 236 237 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 238 if (error || mseg->len == 0) 239 return (error); 240 241 if (!sysmem) { 242 SLIST_FOREACH(dsc, &sc->devmem, link) { 243 if (dsc->segid == mseg->segid) 244 break; 245 } 246 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 247 __func__, mseg->segid)); 248 error = copystr(dsc->name, mseg->name, len, NULL); 249 } else { 250 bzero(mseg->name, len); 251 } 252 253 return (error); 254 } 255 256 static int 257 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 258 { 259 char *name; 260 int error; 261 bool sysmem; 262 263 error = 0; 264 name = NULL; 265 sysmem = true; 266 267 /* 268 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 269 * by stripped off when devfs processes the full string. 270 */ 271 if (VM_MEMSEG_NAME(mseg)) { 272 sysmem = false; 273 name = malloc(len, M_VMMDEV, M_WAITOK); 274 error = copystr(mseg->name, name, len, NULL); 275 if (error) 276 goto done; 277 } 278 279 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 280 if (error) 281 goto done; 282 283 if (VM_MEMSEG_NAME(mseg)) { 284 error = devmem_create_cdev(sc, mseg->segid, name); 285 if (error) 286 vm_free_memseg(sc->vm, mseg->segid); 287 else 288 name = NULL; /* freed when 'cdev' is destroyed */ 289 } 290 done: 291 free(name, M_VMMDEV); 292 return (error); 293 } 294 295 static int 296 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 297 uint64_t *regval) 298 { 299 int error, i; 300 301 error = 0; 302 for (i = 0; i < count; i++) { 303 error = vm_get_register(vcpu, regnum[i], ®val[i]); 304 if (error) 305 break; 306 } 307 return (error); 308 } 309 310 static int 311 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 312 uint64_t *regval) 313 { 314 int error, i; 315 316 error = 0; 317 for (i = 0; i < count; i++) { 318 error = vm_set_register(vcpu, regnum[i], regval[i]); 319 if (error) 320 break; 321 } 322 return (error); 323 } 324 325 static int 326 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 327 { 328 struct vmmdev_softc *sc; 329 int error; 330 331 sc = vmmdev_lookup2(dev); 332 KASSERT(sc != NULL, ("%s: device not found", __func__)); 333 334 /* 335 * A user can only access VMs that they themselves have created. 336 */ 337 if (td->td_ucred != sc->ucred) 338 return (EPERM); 339 340 /* 341 * A jail without vmm access shouldn't be able to access vmm device 342 * files at all, but check here just to be thorough. 343 */ 344 error = vmm_priv_check(td->td_ucred); 345 if (error != 0) 346 return (error); 347 348 return (0); 349 } 350 351 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 352 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 353 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 354 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 355 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 356 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 357 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 358 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 359 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 360 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 361 362 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 363 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 364 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 365 #endif 366 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 367 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 368 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 369 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 370 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 371 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 372 VMMDEV_IOCTL(VM_REINIT, 373 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 374 375 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 376 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 377 #endif 378 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 379 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 380 381 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 382 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 383 384 VMMDEV_IOCTL(VM_SUSPEND, 0), 385 VMMDEV_IOCTL(VM_GET_CPUS, 0), 386 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 387 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 388 }; 389 390 static int 391 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 392 struct thread *td) 393 { 394 struct vmmdev_softc *sc; 395 struct vcpu *vcpu; 396 const struct vmmdev_ioctl *ioctl; 397 int error, vcpuid; 398 399 sc = vmmdev_lookup2(cdev); 400 if (sc == NULL) 401 return (ENXIO); 402 403 ioctl = NULL; 404 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 405 if (vmmdev_ioctls[i].cmd == cmd) { 406 ioctl = &vmmdev_ioctls[i]; 407 break; 408 } 409 } 410 if (ioctl == NULL) { 411 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 412 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 413 ioctl = &vmmdev_machdep_ioctls[i]; 414 break; 415 } 416 } 417 } 418 if (ioctl == NULL) 419 return (ENOTTY); 420 421 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 422 vm_xlock_memsegs(sc->vm); 423 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 424 vm_slock_memsegs(sc->vm); 425 426 vcpu = NULL; 427 vcpuid = -1; 428 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 429 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 430 vcpuid = *(int *)data; 431 if (vcpuid == -1) { 432 if ((ioctl->flags & 433 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 434 error = EINVAL; 435 goto lockfail; 436 } 437 } else { 438 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 439 if (vcpu == NULL) { 440 error = EINVAL; 441 goto lockfail; 442 } 443 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 444 error = vcpu_lock_one(vcpu); 445 if (error) 446 goto lockfail; 447 } 448 } 449 } 450 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 451 error = vcpu_lock_all(sc); 452 if (error) 453 goto lockfail; 454 } 455 456 switch (cmd) { 457 case VM_SUSPEND: { 458 struct vm_suspend *vmsuspend; 459 460 vmsuspend = (struct vm_suspend *)data; 461 error = vm_suspend(sc->vm, vmsuspend->how); 462 break; 463 } 464 case VM_REINIT: 465 error = vm_reinit(sc->vm); 466 break; 467 case VM_STAT_DESC: { 468 struct vm_stat_desc *statdesc; 469 470 statdesc = (struct vm_stat_desc *)data; 471 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 472 sizeof(statdesc->desc)); 473 break; 474 } 475 case VM_STATS: { 476 struct vm_stats *vmstats; 477 478 vmstats = (struct vm_stats *)data; 479 getmicrotime(&vmstats->tv); 480 error = vmm_stat_copy(vcpu, vmstats->index, 481 nitems(vmstats->statbuf), &vmstats->num_entries, 482 vmstats->statbuf); 483 break; 484 } 485 case VM_MMAP_GETNEXT: { 486 struct vm_memmap *mm; 487 488 mm = (struct vm_memmap *)data; 489 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 490 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 491 break; 492 } 493 case VM_MMAP_MEMSEG: { 494 struct vm_memmap *mm; 495 496 mm = (struct vm_memmap *)data; 497 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 498 mm->len, mm->prot, mm->flags); 499 break; 500 } 501 case VM_MUNMAP_MEMSEG: { 502 struct vm_munmap *mu; 503 504 mu = (struct vm_munmap *)data; 505 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 506 break; 507 } 508 #if defined(__amd64__) && defined(COMPAT_FREEBSD12) 509 case VM_ALLOC_MEMSEG_12: 510 error = alloc_memseg(sc, (struct vm_memseg *)data, 511 sizeof(((struct vm_memseg_12 *)0)->name)); 512 break; 513 case VM_GET_MEMSEG_12: 514 error = get_memseg(sc, (struct vm_memseg *)data, 515 sizeof(((struct vm_memseg_12 *)0)->name)); 516 break; 517 #endif 518 case VM_ALLOC_MEMSEG: 519 error = alloc_memseg(sc, (struct vm_memseg *)data, 520 sizeof(((struct vm_memseg *)0)->name)); 521 break; 522 case VM_GET_MEMSEG: 523 error = get_memseg(sc, (struct vm_memseg *)data, 524 sizeof(((struct vm_memseg *)0)->name)); 525 break; 526 case VM_GET_REGISTER: { 527 struct vm_register *vmreg; 528 529 vmreg = (struct vm_register *)data; 530 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 531 break; 532 } 533 case VM_SET_REGISTER: { 534 struct vm_register *vmreg; 535 536 vmreg = (struct vm_register *)data; 537 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 538 break; 539 } 540 case VM_GET_REGISTER_SET: { 541 struct vm_register_set *vmregset; 542 uint64_t *regvals; 543 int *regnums; 544 545 vmregset = (struct vm_register_set *)data; 546 if (vmregset->count > VM_REG_LAST) { 547 error = EINVAL; 548 break; 549 } 550 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 551 M_WAITOK); 552 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 553 M_WAITOK); 554 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 555 vmregset->count); 556 if (error == 0) 557 error = vm_get_register_set(vcpu, 558 vmregset->count, regnums, regvals); 559 if (error == 0) 560 error = copyout(regvals, vmregset->regvals, 561 sizeof(regvals[0]) * vmregset->count); 562 free(regvals, M_VMMDEV); 563 free(regnums, M_VMMDEV); 564 break; 565 } 566 case VM_SET_REGISTER_SET: { 567 struct vm_register_set *vmregset; 568 uint64_t *regvals; 569 int *regnums; 570 571 vmregset = (struct vm_register_set *)data; 572 if (vmregset->count > VM_REG_LAST) { 573 error = EINVAL; 574 break; 575 } 576 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 577 M_WAITOK); 578 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 579 M_WAITOK); 580 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 581 vmregset->count); 582 if (error == 0) 583 error = copyin(vmregset->regvals, regvals, 584 sizeof(regvals[0]) * vmregset->count); 585 if (error == 0) 586 error = vm_set_register_set(vcpu, 587 vmregset->count, regnums, regvals); 588 free(regvals, M_VMMDEV); 589 free(regnums, M_VMMDEV); 590 break; 591 } 592 case VM_GET_CAPABILITY: { 593 struct vm_capability *vmcap; 594 595 vmcap = (struct vm_capability *)data; 596 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 597 break; 598 } 599 case VM_SET_CAPABILITY: { 600 struct vm_capability *vmcap; 601 602 vmcap = (struct vm_capability *)data; 603 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 604 break; 605 } 606 case VM_ACTIVATE_CPU: 607 error = vm_activate_cpu(vcpu); 608 break; 609 case VM_GET_CPUS: { 610 struct vm_cpuset *vm_cpuset; 611 cpuset_t *cpuset; 612 int size; 613 614 error = 0; 615 vm_cpuset = (struct vm_cpuset *)data; 616 size = vm_cpuset->cpusetsize; 617 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 618 error = ERANGE; 619 break; 620 } 621 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 622 M_WAITOK | M_ZERO); 623 if (vm_cpuset->which == VM_ACTIVE_CPUS) 624 *cpuset = vm_active_cpus(sc->vm); 625 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 626 *cpuset = vm_suspended_cpus(sc->vm); 627 else if (vm_cpuset->which == VM_DEBUG_CPUS) 628 *cpuset = vm_debug_cpus(sc->vm); 629 else 630 error = EINVAL; 631 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 632 error = ERANGE; 633 if (error == 0) 634 error = copyout(cpuset, vm_cpuset->cpus, size); 635 free(cpuset, M_TEMP); 636 break; 637 } 638 case VM_SUSPEND_CPU: 639 error = vm_suspend_cpu(sc->vm, vcpu); 640 break; 641 case VM_RESUME_CPU: 642 error = vm_resume_cpu(sc->vm, vcpu); 643 break; 644 case VM_SET_TOPOLOGY: { 645 struct vm_cpu_topology *topology; 646 647 topology = (struct vm_cpu_topology *)data; 648 error = vm_set_topology(sc->vm, topology->sockets, 649 topology->cores, topology->threads, topology->maxcpus); 650 break; 651 } 652 case VM_GET_TOPOLOGY: { 653 struct vm_cpu_topology *topology; 654 655 topology = (struct vm_cpu_topology *)data; 656 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 657 &topology->threads, &topology->maxcpus); 658 error = 0; 659 break; 660 } 661 default: 662 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 663 td); 664 break; 665 } 666 667 if ((ioctl->flags & 668 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 669 vm_unlock_memsegs(sc->vm); 670 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 671 vcpu_unlock_all(sc); 672 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 673 vcpu_unlock_one(vcpu); 674 675 /* 676 * Make sure that no handler returns a kernel-internal 677 * error value to userspace. 678 */ 679 KASSERT(error == ERESTART || error >= 0, 680 ("vmmdev_ioctl: invalid error return %d", error)); 681 return (error); 682 683 lockfail: 684 if ((ioctl->flags & 685 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 686 vm_unlock_memsegs(sc->vm); 687 return (error); 688 } 689 690 static int 691 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 692 struct vm_object **objp, int nprot) 693 { 694 struct vmmdev_softc *sc; 695 vm_paddr_t gpa; 696 size_t len; 697 vm_ooffset_t segoff, first, last; 698 int error, found, segid; 699 bool sysmem; 700 701 first = *offset; 702 last = first + mapsize; 703 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 704 return (EINVAL); 705 706 sc = vmmdev_lookup2(cdev); 707 if (sc == NULL) { 708 /* virtual machine is in the process of being created */ 709 return (EINVAL); 710 } 711 712 /* 713 * Get a read lock on the guest memory map. 714 */ 715 vm_slock_memsegs(sc->vm); 716 717 gpa = 0; 718 found = 0; 719 while (!found) { 720 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 721 NULL, NULL); 722 if (error) 723 break; 724 725 if (first >= gpa && last <= gpa + len) 726 found = 1; 727 else 728 gpa += len; 729 } 730 731 if (found) { 732 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 733 KASSERT(error == 0 && *objp != NULL, 734 ("%s: invalid memory segment %d", __func__, segid)); 735 if (sysmem) { 736 vm_object_reference(*objp); 737 *offset = segoff + (first - gpa); 738 } else { 739 error = EINVAL; 740 } 741 } 742 vm_unlock_memsegs(sc->vm); 743 return (error); 744 } 745 746 static void 747 vmmdev_destroy(struct vmmdev_softc *sc) 748 { 749 struct devmem_softc *dsc; 750 int error __diagused; 751 752 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 753 754 /* 755 * Destroy all cdevs: 756 * 757 * - any new operations on the 'cdev' will return an error (ENXIO). 758 * 759 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 760 */ 761 SLIST_FOREACH(dsc, &sc->devmem, link) { 762 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 763 devmem_destroy(dsc); 764 } 765 766 vm_disable_vcpu_creation(sc->vm); 767 error = vcpu_lock_all(sc); 768 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 769 vm_unlock_vcpus(sc->vm); 770 771 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 772 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 773 SLIST_REMOVE_HEAD(&sc->devmem, link); 774 free(dsc->name, M_VMMDEV); 775 free(dsc, M_VMMDEV); 776 } 777 778 if (sc->vm != NULL) 779 vm_destroy(sc->vm); 780 781 if (sc->ucred != NULL) 782 crfree(sc->ucred); 783 784 sx_xlock(&vmmdev_mtx); 785 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 786 sx_xunlock(&vmmdev_mtx); 787 free(sc, M_VMMDEV); 788 } 789 790 static int 791 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 792 { 793 struct cdev *cdev; 794 struct vmmdev_softc *sc; 795 796 sx_xlock(&vmmdev_mtx); 797 sc = vmmdev_lookup(name, cred); 798 if (sc == NULL || sc->cdev == NULL) { 799 sx_xunlock(&vmmdev_mtx); 800 return (EINVAL); 801 } 802 803 /* 804 * Setting 'sc->cdev' to NULL is used to indicate that the VM 805 * is scheduled for destruction. 806 */ 807 cdev = sc->cdev; 808 sc->cdev = NULL; 809 sx_xunlock(&vmmdev_mtx); 810 811 destroy_dev(cdev); 812 vmmdev_destroy(sc); 813 814 return (0); 815 } 816 817 static int 818 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 819 { 820 char *buf; 821 int error, buflen; 822 823 error = vmm_priv_check(req->td->td_ucred); 824 if (error) 825 return (error); 826 827 buflen = VM_MAX_NAMELEN + 1; 828 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 829 strlcpy(buf, "beavis", buflen); 830 error = sysctl_handle_string(oidp, buf, buflen, req); 831 if (error == 0 && req->newptr != NULL) 832 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 833 free(buf, M_VMMDEV); 834 return (error); 835 } 836 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 837 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 838 NULL, 0, sysctl_vmm_destroy, "A", 839 NULL); 840 841 static struct cdevsw vmmdevsw = { 842 .d_name = "vmmdev", 843 .d_version = D_VERSION, 844 .d_open = vmmdev_open, 845 .d_ioctl = vmmdev_ioctl, 846 .d_mmap_single = vmmdev_mmap_single, 847 .d_read = vmmdev_rw, 848 .d_write = vmmdev_rw, 849 }; 850 851 static struct vmmdev_softc * 852 vmmdev_alloc(struct vm *vm, struct ucred *cred) 853 { 854 struct vmmdev_softc *sc; 855 856 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 857 SLIST_INIT(&sc->devmem); 858 sc->vm = vm; 859 sc->ucred = crhold(cred); 860 return (sc); 861 } 862 863 static int 864 vmmdev_create(const char *name, struct ucred *cred) 865 { 866 struct make_dev_args mda; 867 struct cdev *cdev; 868 struct vmmdev_softc *sc; 869 struct vm *vm; 870 int error; 871 872 sx_xlock(&vmmdev_mtx); 873 sc = vmmdev_lookup(name, cred); 874 if (sc != NULL) { 875 sx_xunlock(&vmmdev_mtx); 876 return (EEXIST); 877 } 878 879 error = vm_create(name, &vm); 880 if (error != 0) { 881 sx_xunlock(&vmmdev_mtx); 882 return (error); 883 } 884 sc = vmmdev_alloc(vm, cred); 885 SLIST_INSERT_HEAD(&head, sc, link); 886 887 make_dev_args_init(&mda); 888 mda.mda_devsw = &vmmdevsw; 889 mda.mda_cr = sc->ucred; 890 mda.mda_uid = UID_ROOT; 891 mda.mda_gid = GID_WHEEL; 892 mda.mda_mode = 0600; 893 mda.mda_si_drv1 = sc; 894 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 895 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 896 if (error != 0) { 897 sx_xunlock(&vmmdev_mtx); 898 vmmdev_destroy(sc); 899 return (error); 900 } 901 sc->cdev = cdev; 902 sx_xunlock(&vmmdev_mtx); 903 return (0); 904 } 905 906 static int 907 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 908 { 909 char *buf; 910 int error, buflen; 911 912 error = vmm_priv_check(req->td->td_ucred); 913 if (error != 0) 914 return (error); 915 916 buflen = VM_MAX_NAMELEN + 1; 917 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 918 strlcpy(buf, "beavis", buflen); 919 error = sysctl_handle_string(oidp, buf, buflen, req); 920 if (error == 0 && req->newptr != NULL) 921 error = vmmdev_create(buf, req->td->td_ucred); 922 free(buf, M_VMMDEV); 923 return (error); 924 } 925 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 926 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 927 NULL, 0, sysctl_vmm_create, "A", 928 NULL); 929 930 void 931 vmmdev_init(void) 932 { 933 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 934 "Allow use of vmm in a jail."); 935 } 936 937 int 938 vmmdev_cleanup(void) 939 { 940 int error; 941 942 if (SLIST_EMPTY(&head)) 943 error = 0; 944 else 945 error = EBUSY; 946 947 return (error); 948 } 949 950 static int 951 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 952 struct vm_object **objp, int nprot) 953 { 954 struct devmem_softc *dsc; 955 vm_ooffset_t first, last; 956 size_t seglen; 957 int error; 958 bool sysmem; 959 960 dsc = cdev->si_drv1; 961 if (dsc == NULL) { 962 /* 'cdev' has been created but is not ready for use */ 963 return (ENXIO); 964 } 965 966 first = *offset; 967 last = *offset + len; 968 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 969 return (EINVAL); 970 971 vm_slock_memsegs(dsc->sc->vm); 972 973 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 974 KASSERT(error == 0 && !sysmem && *objp != NULL, 975 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 976 977 if (seglen >= last) 978 vm_object_reference(*objp); 979 else 980 error = EINVAL; 981 982 vm_unlock_memsegs(dsc->sc->vm); 983 return (error); 984 } 985 986 static struct cdevsw devmemsw = { 987 .d_name = "devmem", 988 .d_version = D_VERSION, 989 .d_mmap_single = devmem_mmap_single, 990 }; 991 992 static int 993 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 994 { 995 struct make_dev_args mda; 996 struct devmem_softc *dsc; 997 int error; 998 999 sx_xlock(&vmmdev_mtx); 1000 1001 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1002 dsc->segid = segid; 1003 dsc->name = devname; 1004 dsc->sc = sc; 1005 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1006 1007 make_dev_args_init(&mda); 1008 mda.mda_devsw = &devmemsw; 1009 mda.mda_cr = sc->ucred; 1010 mda.mda_uid = UID_ROOT; 1011 mda.mda_gid = GID_WHEEL; 1012 mda.mda_mode = 0600; 1013 mda.mda_si_drv1 = dsc; 1014 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1015 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1016 devname); 1017 if (error != 0) { 1018 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1019 free(dsc->name, M_VMMDEV); 1020 free(dsc, M_VMMDEV); 1021 } 1022 1023 sx_xunlock(&vmmdev_mtx); 1024 1025 return (error); 1026 } 1027 1028 static void 1029 devmem_destroy(void *arg) 1030 { 1031 struct devmem_softc *dsc = arg; 1032 1033 destroy_dev(dsc->cdev); 1034 dsc->cdev = NULL; 1035 dsc->sc = NULL; 1036 } 1037