1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/fcntl.h> 12 #include <sys/ioccom.h> 13 #include <sys/jail.h> 14 #include <sys/kernel.h> 15 #include <sys/malloc.h> 16 #include <sys/mman.h> 17 #include <sys/proc.h> 18 #include <sys/queue.h> 19 #include <sys/sx.h> 20 #include <sys/sysctl.h> 21 #include <sys/ucred.h> 22 #include <sys/uio.h> 23 24 #include <machine/vmm.h> 25 26 #include <vm/vm.h> 27 #include <vm/vm_object.h> 28 29 #include <dev/vmm/vmm_dev.h> 30 #include <dev/vmm/vmm_mem.h> 31 #include <dev/vmm/vmm_stat.h> 32 33 #ifdef __amd64__ 34 #ifdef COMPAT_FREEBSD12 35 struct vm_memseg_12 { 36 int segid; 37 size_t len; 38 char name[64]; 39 }; 40 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 41 42 #define VM_ALLOC_MEMSEG_12 \ 43 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 44 #define VM_GET_MEMSEG_12 \ 45 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 46 #endif /* COMPAT_FREEBSD12 */ 47 #ifdef COMPAT_FREEBSD14 48 struct vm_memseg_14 { 49 int segid; 50 size_t len; 51 char name[VM_MAX_SUFFIXLEN + 1]; 52 }; 53 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16), 54 "COMPAT_FREEBSD14 ABI"); 55 56 #define VM_ALLOC_MEMSEG_14 \ 57 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14) 58 #define VM_GET_MEMSEG_14 \ 59 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14) 60 #endif /* COMPAT_FREEBSD14 */ 61 #endif /* __amd64__ */ 62 63 struct devmem_softc { 64 int segid; 65 char *name; 66 struct cdev *cdev; 67 struct vmmdev_softc *sc; 68 SLIST_ENTRY(devmem_softc) link; 69 }; 70 71 struct vmmdev_softc { 72 struct vm *vm; /* vm instance cookie */ 73 struct cdev *cdev; 74 struct ucred *ucred; 75 SLIST_ENTRY(vmmdev_softc) link; 76 SLIST_HEAD(, devmem_softc) devmem; 77 int flags; 78 }; 79 80 static SLIST_HEAD(, vmmdev_softc) head; 81 82 static unsigned pr_allow_flag; 83 static struct sx vmmdev_mtx; 84 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 85 86 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 87 88 SYSCTL_DECL(_hw_vmm); 89 90 static void devmem_destroy(void *arg); 91 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 92 93 static int 94 vmm_priv_check(struct ucred *ucred) 95 { 96 if (jailed(ucred) && 97 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 98 return (EPERM); 99 100 return (0); 101 } 102 103 static int 104 vcpu_lock_one(struct vcpu *vcpu) 105 { 106 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 107 } 108 109 static void 110 vcpu_unlock_one(struct vcpu *vcpu) 111 { 112 enum vcpu_state state; 113 114 state = vcpu_get_state(vcpu, NULL); 115 if (state != VCPU_FROZEN) { 116 panic("vcpu %s(%d) has invalid state %d", 117 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 118 } 119 120 vcpu_set_state(vcpu, VCPU_IDLE, false); 121 } 122 123 #ifndef __amd64__ 124 static int 125 vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) 126 { 127 struct vcpu *vcpu; 128 int error; 129 uint16_t i, j, maxcpus; 130 131 error = 0; 132 maxcpus = vm_get_maxcpus(vm); 133 for (i = 0; i < maxcpus; i++) { 134 vcpu = vm_vcpu(vm, i); 135 if (vcpu == NULL) 136 continue; 137 error = vcpu_lock_one(vcpu); 138 if (error) 139 break; 140 } 141 142 if (error) { 143 for (j = 0; j < i; j++) { 144 vcpu = vm_vcpu(vm, j); 145 if (vcpu == NULL) 146 continue; 147 vcpu_unlock_one(vcpu); 148 } 149 } 150 151 return (error); 152 } 153 #endif 154 155 static int 156 vcpu_lock_all(struct vmmdev_softc *sc) 157 { 158 int error; 159 160 /* 161 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked 162 * in a consistent order so we need to serialize to avoid deadlocks. 163 */ 164 vm_lock_vcpus(sc->vm); 165 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN); 166 if (error != 0) 167 vm_unlock_vcpus(sc->vm); 168 return (error); 169 } 170 171 static void 172 vcpu_unlock_all(struct vmmdev_softc *sc) 173 { 174 struct vcpu *vcpu; 175 uint16_t i, maxcpus; 176 177 maxcpus = vm_get_maxcpus(sc->vm); 178 for (i = 0; i < maxcpus; i++) { 179 vcpu = vm_vcpu(sc->vm, i); 180 if (vcpu == NULL) 181 continue; 182 vcpu_unlock_one(vcpu); 183 } 184 vm_unlock_vcpus(sc->vm); 185 } 186 187 static struct vmmdev_softc * 188 vmmdev_lookup(const char *name, struct ucred *cred) 189 { 190 struct vmmdev_softc *sc; 191 192 sx_assert(&vmmdev_mtx, SA_XLOCKED); 193 194 SLIST_FOREACH(sc, &head, link) { 195 if (strcmp(name, vm_name(sc->vm)) == 0) 196 break; 197 } 198 199 if (sc == NULL) 200 return (NULL); 201 202 if (cr_cansee(cred, sc->ucred)) 203 return (NULL); 204 205 return (sc); 206 } 207 208 static struct vmmdev_softc * 209 vmmdev_lookup2(struct cdev *cdev) 210 { 211 return (cdev->si_drv1); 212 } 213 214 static int 215 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 216 { 217 int error, off, c, prot; 218 vm_paddr_t gpa, maxaddr; 219 void *hpa, *cookie; 220 struct vmmdev_softc *sc; 221 222 sc = vmmdev_lookup2(cdev); 223 if (sc == NULL) 224 return (ENXIO); 225 226 /* 227 * Get a read lock on the guest memory map. 228 */ 229 vm_slock_memsegs(sc->vm); 230 231 error = 0; 232 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 233 maxaddr = vmm_sysmem_maxaddr(sc->vm); 234 while (uio->uio_resid > 0 && error == 0) { 235 gpa = uio->uio_offset; 236 off = gpa & PAGE_MASK; 237 c = min(uio->uio_resid, PAGE_SIZE - off); 238 239 /* 240 * The VM has a hole in its physical memory map. If we want to 241 * use 'dd' to inspect memory beyond the hole we need to 242 * provide bogus data for memory that lies in the hole. 243 * 244 * Since this device does not support lseek(2), dd(1) will 245 * read(2) blocks of data to simulate the lseek(2). 246 */ 247 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 248 if (hpa == NULL) { 249 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 250 error = uiomove(__DECONST(void *, zero_region), 251 c, uio); 252 else 253 error = EFAULT; 254 } else { 255 error = uiomove(hpa, c, uio); 256 vm_gpa_release(cookie); 257 } 258 } 259 vm_unlock_memsegs(sc->vm); 260 return (error); 261 } 262 263 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 264 265 static int 266 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 267 { 268 struct devmem_softc *dsc; 269 int error; 270 bool sysmem; 271 272 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 273 if (error || mseg->len == 0) 274 return (error); 275 276 if (!sysmem) { 277 SLIST_FOREACH(dsc, &sc->devmem, link) { 278 if (dsc->segid == mseg->segid) 279 break; 280 } 281 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 282 __func__, mseg->segid)); 283 error = copystr(dsc->name, mseg->name, len, NULL); 284 } else { 285 bzero(mseg->name, len); 286 } 287 288 return (error); 289 } 290 291 static int 292 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len, 293 struct domainset *domainset) 294 { 295 char *name; 296 int error; 297 bool sysmem; 298 299 error = 0; 300 name = NULL; 301 sysmem = true; 302 303 /* 304 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 305 * by stripped off when devfs processes the full string. 306 */ 307 if (VM_MEMSEG_NAME(mseg)) { 308 sysmem = false; 309 name = malloc(len, M_VMMDEV, M_WAITOK); 310 error = copystr(mseg->name, name, len, NULL); 311 if (error) 312 goto done; 313 } 314 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset); 315 if (error) 316 goto done; 317 318 if (VM_MEMSEG_NAME(mseg)) { 319 error = devmem_create_cdev(sc, mseg->segid, name); 320 if (error) 321 vm_free_memseg(sc->vm, mseg->segid); 322 else 323 name = NULL; /* freed when 'cdev' is destroyed */ 324 } 325 done: 326 free(name, M_VMMDEV); 327 return (error); 328 } 329 330 #if defined(__amd64__) && \ 331 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12)) 332 /* 333 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts. 334 */ 335 static void 336 adjust_segid(struct vm_memseg *mseg) 337 { 338 if (mseg->segid != VM_SYSMEM) { 339 mseg->segid += (VM_BOOTROM - 1); 340 } 341 } 342 #endif 343 344 static int 345 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 346 uint64_t *regval) 347 { 348 int error, i; 349 350 error = 0; 351 for (i = 0; i < count; i++) { 352 error = vm_get_register(vcpu, regnum[i], ®val[i]); 353 if (error) 354 break; 355 } 356 return (error); 357 } 358 359 static int 360 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 361 uint64_t *regval) 362 { 363 int error, i; 364 365 error = 0; 366 for (i = 0; i < count; i++) { 367 error = vm_set_register(vcpu, regnum[i], regval[i]); 368 if (error) 369 break; 370 } 371 return (error); 372 } 373 374 static int 375 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 376 { 377 int error; 378 379 /* 380 * A jail without vmm access shouldn't be able to access vmm device 381 * files at all, but check here just to be thorough. 382 */ 383 error = vmm_priv_check(td->td_ucred); 384 if (error != 0) 385 return (error); 386 387 return (0); 388 } 389 390 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 391 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 392 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 393 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 394 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 395 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 396 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 397 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 398 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 399 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 400 VMMDEV_IOCTL(VM_STAT_DESC, 0), 401 402 #ifdef __amd64__ 403 #ifdef COMPAT_FREEBSD12 404 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 405 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 406 #endif 407 #ifdef COMPAT_FREEBSD14 408 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14, 409 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 410 #endif 411 #endif /* __amd64__ */ 412 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 413 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 414 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 415 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 416 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 417 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 418 VMMDEV_IOCTL(VM_REINIT, 419 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 420 421 #ifdef __amd64__ 422 #if defined(COMPAT_FREEBSD12) 423 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 424 #endif 425 #ifdef COMPAT_FREEBSD14 426 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS), 427 #endif 428 #endif /* __amd64__ */ 429 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 430 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 431 432 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 433 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 434 435 VMMDEV_IOCTL(VM_SUSPEND, 0), 436 VMMDEV_IOCTL(VM_GET_CPUS, 0), 437 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 438 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 439 }; 440 441 static int 442 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 443 struct thread *td) 444 { 445 struct vmmdev_softc *sc; 446 struct vcpu *vcpu; 447 const struct vmmdev_ioctl *ioctl; 448 struct vm_memseg *mseg; 449 int error, vcpuid; 450 451 sc = vmmdev_lookup2(cdev); 452 if (sc == NULL) 453 return (ENXIO); 454 455 ioctl = NULL; 456 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 457 if (vmmdev_ioctls[i].cmd == cmd) { 458 ioctl = &vmmdev_ioctls[i]; 459 break; 460 } 461 } 462 if (ioctl == NULL) { 463 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 464 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 465 ioctl = &vmmdev_machdep_ioctls[i]; 466 break; 467 } 468 } 469 } 470 if (ioctl == NULL) 471 return (ENOTTY); 472 473 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 474 vm_xlock_memsegs(sc->vm); 475 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 476 vm_slock_memsegs(sc->vm); 477 478 vcpu = NULL; 479 vcpuid = -1; 480 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 481 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 482 vcpuid = *(int *)data; 483 if (vcpuid == -1) { 484 if ((ioctl->flags & 485 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 486 error = EINVAL; 487 goto lockfail; 488 } 489 } else { 490 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 491 if (vcpu == NULL) { 492 error = EINVAL; 493 goto lockfail; 494 } 495 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 496 error = vcpu_lock_one(vcpu); 497 if (error) 498 goto lockfail; 499 } 500 } 501 } 502 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 503 error = vcpu_lock_all(sc); 504 if (error) 505 goto lockfail; 506 } 507 508 switch (cmd) { 509 case VM_SUSPEND: { 510 struct vm_suspend *vmsuspend; 511 512 vmsuspend = (struct vm_suspend *)data; 513 error = vm_suspend(sc->vm, vmsuspend->how); 514 break; 515 } 516 case VM_REINIT: 517 error = vm_reinit(sc->vm); 518 break; 519 case VM_STAT_DESC: { 520 struct vm_stat_desc *statdesc; 521 522 statdesc = (struct vm_stat_desc *)data; 523 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 524 sizeof(statdesc->desc)); 525 break; 526 } 527 case VM_STATS: { 528 struct vm_stats *vmstats; 529 530 vmstats = (struct vm_stats *)data; 531 getmicrotime(&vmstats->tv); 532 error = vmm_stat_copy(vcpu, vmstats->index, 533 nitems(vmstats->statbuf), &vmstats->num_entries, 534 vmstats->statbuf); 535 break; 536 } 537 case VM_MMAP_GETNEXT: { 538 struct vm_memmap *mm; 539 540 mm = (struct vm_memmap *)data; 541 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 542 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 543 break; 544 } 545 case VM_MMAP_MEMSEG: { 546 struct vm_memmap *mm; 547 548 mm = (struct vm_memmap *)data; 549 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 550 mm->len, mm->prot, mm->flags); 551 break; 552 } 553 case VM_MUNMAP_MEMSEG: { 554 struct vm_munmap *mu; 555 556 mu = (struct vm_munmap *)data; 557 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 558 break; 559 } 560 #ifdef __amd64__ 561 #ifdef COMPAT_FREEBSD12 562 case VM_ALLOC_MEMSEG_12: 563 mseg = (struct vm_memseg *)data; 564 565 adjust_segid(mseg); 566 error = alloc_memseg(sc, mseg, 567 sizeof(((struct vm_memseg_12 *)0)->name), NULL); 568 break; 569 case VM_GET_MEMSEG_12: 570 mseg = (struct vm_memseg *)data; 571 572 adjust_segid(mseg); 573 error = get_memseg(sc, mseg, 574 sizeof(((struct vm_memseg_12 *)0)->name)); 575 break; 576 #endif /* COMPAT_FREEBSD12 */ 577 #ifdef COMPAT_FREEBSD14 578 case VM_ALLOC_MEMSEG_14: 579 mseg = (struct vm_memseg *)data; 580 581 adjust_segid(mseg); 582 error = alloc_memseg(sc, mseg, 583 sizeof(((struct vm_memseg_14 *)0)->name), NULL); 584 break; 585 case VM_GET_MEMSEG_14: 586 mseg = (struct vm_memseg *)data; 587 588 adjust_segid(mseg); 589 error = get_memseg(sc, mseg, 590 sizeof(((struct vm_memseg_14 *)0)->name)); 591 break; 592 #endif /* COMPAT_FREEBSD14 */ 593 #endif /* __amd64__ */ 594 case VM_ALLOC_MEMSEG: { 595 domainset_t *mask; 596 struct domainset *domainset, domain; 597 598 domainset = NULL; 599 mseg = (struct vm_memseg *)data; 600 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) { 601 if (mseg->ds_mask_size < sizeof(domainset_t) || 602 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) { 603 error = ERANGE; 604 break; 605 } 606 memset(&domain, 0, sizeof(domain)); 607 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK); 608 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size); 609 if (error) { 610 free(mask, M_VMMDEV); 611 break; 612 } 613 error = domainset_populate(&domain, mask, mseg->ds_policy, 614 mseg->ds_mask_size); 615 if (error) { 616 free(mask, M_VMMDEV); 617 break; 618 } 619 domainset = domainset_create(&domain); 620 if (domainset == NULL) { 621 error = EINVAL; 622 free(mask, M_VMMDEV); 623 break; 624 } 625 free(mask, M_VMMDEV); 626 } 627 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); 628 629 break; 630 } 631 case VM_GET_MEMSEG: 632 error = get_memseg(sc, (struct vm_memseg *)data, 633 sizeof(((struct vm_memseg *)0)->name)); 634 break; 635 case VM_GET_REGISTER: { 636 struct vm_register *vmreg; 637 638 vmreg = (struct vm_register *)data; 639 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 640 break; 641 } 642 case VM_SET_REGISTER: { 643 struct vm_register *vmreg; 644 645 vmreg = (struct vm_register *)data; 646 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 647 break; 648 } 649 case VM_GET_REGISTER_SET: { 650 struct vm_register_set *vmregset; 651 uint64_t *regvals; 652 int *regnums; 653 654 vmregset = (struct vm_register_set *)data; 655 if (vmregset->count > VM_REG_LAST) { 656 error = EINVAL; 657 break; 658 } 659 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 660 M_WAITOK); 661 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 662 M_WAITOK); 663 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 664 vmregset->count); 665 if (error == 0) 666 error = vm_get_register_set(vcpu, 667 vmregset->count, regnums, regvals); 668 if (error == 0) 669 error = copyout(regvals, vmregset->regvals, 670 sizeof(regvals[0]) * vmregset->count); 671 free(regvals, M_VMMDEV); 672 free(regnums, M_VMMDEV); 673 break; 674 } 675 case VM_SET_REGISTER_SET: { 676 struct vm_register_set *vmregset; 677 uint64_t *regvals; 678 int *regnums; 679 680 vmregset = (struct vm_register_set *)data; 681 if (vmregset->count > VM_REG_LAST) { 682 error = EINVAL; 683 break; 684 } 685 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 686 M_WAITOK); 687 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 688 M_WAITOK); 689 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 690 vmregset->count); 691 if (error == 0) 692 error = copyin(vmregset->regvals, regvals, 693 sizeof(regvals[0]) * vmregset->count); 694 if (error == 0) 695 error = vm_set_register_set(vcpu, 696 vmregset->count, regnums, regvals); 697 free(regvals, M_VMMDEV); 698 free(regnums, M_VMMDEV); 699 break; 700 } 701 case VM_GET_CAPABILITY: { 702 struct vm_capability *vmcap; 703 704 vmcap = (struct vm_capability *)data; 705 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 706 break; 707 } 708 case VM_SET_CAPABILITY: { 709 struct vm_capability *vmcap; 710 711 vmcap = (struct vm_capability *)data; 712 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 713 break; 714 } 715 case VM_ACTIVATE_CPU: 716 error = vm_activate_cpu(vcpu); 717 break; 718 case VM_GET_CPUS: { 719 struct vm_cpuset *vm_cpuset; 720 cpuset_t *cpuset; 721 int size; 722 723 error = 0; 724 vm_cpuset = (struct vm_cpuset *)data; 725 size = vm_cpuset->cpusetsize; 726 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 727 error = ERANGE; 728 break; 729 } 730 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 731 M_WAITOK | M_ZERO); 732 if (vm_cpuset->which == VM_ACTIVE_CPUS) 733 *cpuset = vm_active_cpus(sc->vm); 734 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 735 *cpuset = vm_suspended_cpus(sc->vm); 736 else if (vm_cpuset->which == VM_DEBUG_CPUS) 737 *cpuset = vm_debug_cpus(sc->vm); 738 else 739 error = EINVAL; 740 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 741 error = ERANGE; 742 if (error == 0) 743 error = copyout(cpuset, vm_cpuset->cpus, size); 744 free(cpuset, M_TEMP); 745 break; 746 } 747 case VM_SUSPEND_CPU: 748 error = vm_suspend_cpu(sc->vm, vcpu); 749 break; 750 case VM_RESUME_CPU: 751 error = vm_resume_cpu(sc->vm, vcpu); 752 break; 753 case VM_SET_TOPOLOGY: { 754 struct vm_cpu_topology *topology; 755 756 topology = (struct vm_cpu_topology *)data; 757 error = vm_set_topology(sc->vm, topology->sockets, 758 topology->cores, topology->threads, topology->maxcpus); 759 break; 760 } 761 case VM_GET_TOPOLOGY: { 762 struct vm_cpu_topology *topology; 763 764 topology = (struct vm_cpu_topology *)data; 765 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 766 &topology->threads, &topology->maxcpus); 767 error = 0; 768 break; 769 } 770 default: 771 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 772 td); 773 break; 774 } 775 776 if ((ioctl->flags & 777 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 778 vm_unlock_memsegs(sc->vm); 779 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 780 vcpu_unlock_all(sc); 781 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 782 vcpu_unlock_one(vcpu); 783 784 /* 785 * Make sure that no handler returns a kernel-internal 786 * error value to userspace. 787 */ 788 KASSERT(error == ERESTART || error >= 0, 789 ("vmmdev_ioctl: invalid error return %d", error)); 790 return (error); 791 792 lockfail: 793 if ((ioctl->flags & 794 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 795 vm_unlock_memsegs(sc->vm); 796 return (error); 797 } 798 799 static int 800 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 801 struct vm_object **objp, int nprot) 802 { 803 struct vmmdev_softc *sc; 804 vm_paddr_t gpa; 805 size_t len; 806 vm_ooffset_t segoff, first, last; 807 int error, found, segid; 808 bool sysmem; 809 810 first = *offset; 811 last = first + mapsize; 812 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 813 return (EINVAL); 814 815 sc = vmmdev_lookup2(cdev); 816 if (sc == NULL) { 817 /* virtual machine is in the process of being created */ 818 return (EINVAL); 819 } 820 821 /* 822 * Get a read lock on the guest memory map. 823 */ 824 vm_slock_memsegs(sc->vm); 825 826 gpa = 0; 827 found = 0; 828 while (!found) { 829 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 830 NULL, NULL); 831 if (error) 832 break; 833 834 if (first >= gpa && last <= gpa + len) 835 found = 1; 836 else 837 gpa += len; 838 } 839 840 if (found) { 841 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 842 KASSERT(error == 0 && *objp != NULL, 843 ("%s: invalid memory segment %d", __func__, segid)); 844 if (sysmem) { 845 vm_object_reference(*objp); 846 *offset = segoff + (first - gpa); 847 } else { 848 error = EINVAL; 849 } 850 } 851 vm_unlock_memsegs(sc->vm); 852 return (error); 853 } 854 855 static void 856 vmmdev_destroy(struct vmmdev_softc *sc) 857 { 858 struct devmem_softc *dsc; 859 int error __diagused; 860 861 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 862 863 /* 864 * Destroy all cdevs: 865 * 866 * - any new operations on the 'cdev' will return an error (ENXIO). 867 * 868 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 869 */ 870 SLIST_FOREACH(dsc, &sc->devmem, link) { 871 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 872 devmem_destroy(dsc); 873 } 874 875 vm_disable_vcpu_creation(sc->vm); 876 error = vcpu_lock_all(sc); 877 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 878 vm_unlock_vcpus(sc->vm); 879 880 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 881 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 882 SLIST_REMOVE_HEAD(&sc->devmem, link); 883 free(dsc->name, M_VMMDEV); 884 free(dsc, M_VMMDEV); 885 } 886 887 if (sc->vm != NULL) 888 vm_destroy(sc->vm); 889 890 if (sc->ucred != NULL) 891 crfree(sc->ucred); 892 893 sx_xlock(&vmmdev_mtx); 894 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 895 sx_xunlock(&vmmdev_mtx); 896 free(sc, M_VMMDEV); 897 } 898 899 static int 900 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 901 { 902 struct cdev *cdev; 903 struct vmmdev_softc *sc; 904 905 sx_xlock(&vmmdev_mtx); 906 sc = vmmdev_lookup(name, cred); 907 if (sc == NULL || sc->cdev == NULL) { 908 sx_xunlock(&vmmdev_mtx); 909 return (EINVAL); 910 } 911 912 /* 913 * Setting 'sc->cdev' to NULL is used to indicate that the VM 914 * is scheduled for destruction. 915 */ 916 cdev = sc->cdev; 917 sc->cdev = NULL; 918 sx_xunlock(&vmmdev_mtx); 919 920 vm_suspend(sc->vm, VM_SUSPEND_DESTROY); 921 destroy_dev(cdev); 922 vmmdev_destroy(sc); 923 924 return (0); 925 } 926 927 static int 928 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 929 { 930 char *buf; 931 int error, buflen; 932 933 error = vmm_priv_check(req->td->td_ucred); 934 if (error) 935 return (error); 936 937 buflen = VM_MAX_NAMELEN + 1; 938 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 939 error = sysctl_handle_string(oidp, buf, buflen, req); 940 if (error == 0 && req->newptr != NULL) 941 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 942 free(buf, M_VMMDEV); 943 return (error); 944 } 945 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 946 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 947 NULL, 0, sysctl_vmm_destroy, "A", 948 "Destroy a vmm(4) instance (legacy interface)"); 949 950 static struct cdevsw vmmdevsw = { 951 .d_name = "vmmdev", 952 .d_version = D_VERSION, 953 .d_open = vmmdev_open, 954 .d_ioctl = vmmdev_ioctl, 955 .d_mmap_single = vmmdev_mmap_single, 956 .d_read = vmmdev_rw, 957 .d_write = vmmdev_rw, 958 }; 959 960 static struct vmmdev_softc * 961 vmmdev_alloc(struct vm *vm, struct ucred *cred) 962 { 963 struct vmmdev_softc *sc; 964 965 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 966 SLIST_INIT(&sc->devmem); 967 sc->vm = vm; 968 sc->ucred = crhold(cred); 969 return (sc); 970 } 971 972 static int 973 vmmdev_create(const char *name, struct ucred *cred) 974 { 975 struct make_dev_args mda; 976 struct cdev *cdev; 977 struct vmmdev_softc *sc; 978 struct vm *vm; 979 int error; 980 981 sx_xlock(&vmmdev_mtx); 982 sc = vmmdev_lookup(name, cred); 983 if (sc != NULL) { 984 sx_xunlock(&vmmdev_mtx); 985 return (EEXIST); 986 } 987 988 error = vm_create(name, &vm); 989 if (error != 0) { 990 sx_xunlock(&vmmdev_mtx); 991 return (error); 992 } 993 sc = vmmdev_alloc(vm, cred); 994 SLIST_INSERT_HEAD(&head, sc, link); 995 996 make_dev_args_init(&mda); 997 mda.mda_devsw = &vmmdevsw; 998 mda.mda_cr = sc->ucred; 999 mda.mda_uid = UID_ROOT; 1000 mda.mda_gid = GID_WHEEL; 1001 mda.mda_mode = 0600; 1002 mda.mda_si_drv1 = sc; 1003 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1004 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 1005 if (error != 0) { 1006 sx_xunlock(&vmmdev_mtx); 1007 vmmdev_destroy(sc); 1008 return (error); 1009 } 1010 sc->cdev = cdev; 1011 sx_xunlock(&vmmdev_mtx); 1012 return (0); 1013 } 1014 1015 static int 1016 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1017 { 1018 char *buf; 1019 int error, buflen; 1020 1021 error = vmm_priv_check(req->td->td_ucred); 1022 if (error != 0) 1023 return (error); 1024 1025 buflen = VM_MAX_NAMELEN + 1; 1026 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1027 error = sysctl_handle_string(oidp, buf, buflen, req); 1028 if (error == 0 && req->newptr != NULL) 1029 error = vmmdev_create(buf, req->td->td_ucred); 1030 free(buf, M_VMMDEV); 1031 return (error); 1032 } 1033 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1034 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1035 NULL, 0, sysctl_vmm_create, "A", 1036 "Create a vmm(4) instance (legacy interface)"); 1037 1038 static int 1039 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) 1040 { 1041 int error; 1042 1043 error = vmm_priv_check(td->td_ucred); 1044 if (error != 0) 1045 return (error); 1046 1047 if ((flags & FWRITE) == 0) 1048 return (EPERM); 1049 1050 return (0); 1051 } 1052 1053 static int 1054 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 1055 struct thread *td) 1056 { 1057 int error; 1058 1059 switch (cmd) { 1060 case VMMCTL_VM_CREATE: { 1061 struct vmmctl_vm_create *vmc; 1062 1063 vmc = (struct vmmctl_vm_create *)data; 1064 vmc->name[VM_MAX_NAMELEN] = '\0'; 1065 for (size_t i = 0; i < nitems(vmc->reserved); i++) { 1066 if (vmc->reserved[i] != 0) { 1067 error = EINVAL; 1068 return (error); 1069 } 1070 } 1071 1072 error = vmmdev_create(vmc->name, td->td_ucred); 1073 break; 1074 } 1075 case VMMCTL_VM_DESTROY: { 1076 struct vmmctl_vm_destroy *vmd; 1077 1078 vmd = (struct vmmctl_vm_destroy *)data; 1079 vmd->name[VM_MAX_NAMELEN] = '\0'; 1080 for (size_t i = 0; i < nitems(vmd->reserved); i++) { 1081 if (vmd->reserved[i] != 0) { 1082 error = EINVAL; 1083 return (error); 1084 } 1085 } 1086 1087 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred); 1088 break; 1089 } 1090 default: 1091 error = ENOTTY; 1092 break; 1093 } 1094 1095 return (error); 1096 } 1097 1098 static struct cdev *vmmctl_cdev; 1099 static struct cdevsw vmmctlsw = { 1100 .d_name = "vmmctl", 1101 .d_version = D_VERSION, 1102 .d_open = vmmctl_open, 1103 .d_ioctl = vmmctl_ioctl, 1104 }; 1105 1106 int 1107 vmmdev_init(void) 1108 { 1109 int error; 1110 1111 sx_xlock(&vmmdev_mtx); 1112 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL, 1113 UID_ROOT, GID_WHEEL, 0600, "vmmctl"); 1114 if (error == 0) 1115 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1116 "Allow use of vmm in a jail."); 1117 sx_xunlock(&vmmdev_mtx); 1118 1119 return (error); 1120 } 1121 1122 int 1123 vmmdev_cleanup(void) 1124 { 1125 sx_xlock(&vmmdev_mtx); 1126 if (!SLIST_EMPTY(&head)) { 1127 sx_xunlock(&vmmdev_mtx); 1128 return (EBUSY); 1129 } 1130 if (vmmctl_cdev != NULL) { 1131 destroy_dev(vmmctl_cdev); 1132 vmmctl_cdev = NULL; 1133 } 1134 sx_xunlock(&vmmdev_mtx); 1135 1136 return (0); 1137 } 1138 1139 static int 1140 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1141 struct vm_object **objp, int nprot) 1142 { 1143 struct devmem_softc *dsc; 1144 vm_ooffset_t first, last; 1145 size_t seglen; 1146 int error; 1147 bool sysmem; 1148 1149 dsc = cdev->si_drv1; 1150 if (dsc == NULL) { 1151 /* 'cdev' has been created but is not ready for use */ 1152 return (ENXIO); 1153 } 1154 1155 first = *offset; 1156 last = *offset + len; 1157 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1158 return (EINVAL); 1159 1160 vm_slock_memsegs(dsc->sc->vm); 1161 1162 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1163 KASSERT(error == 0 && !sysmem && *objp != NULL, 1164 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1165 1166 if (seglen >= last) 1167 vm_object_reference(*objp); 1168 else 1169 error = EINVAL; 1170 1171 vm_unlock_memsegs(dsc->sc->vm); 1172 return (error); 1173 } 1174 1175 static struct cdevsw devmemsw = { 1176 .d_name = "devmem", 1177 .d_version = D_VERSION, 1178 .d_mmap_single = devmem_mmap_single, 1179 }; 1180 1181 static int 1182 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1183 { 1184 struct make_dev_args mda; 1185 struct devmem_softc *dsc; 1186 int error; 1187 1188 sx_xlock(&vmmdev_mtx); 1189 1190 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1191 dsc->segid = segid; 1192 dsc->name = devname; 1193 dsc->sc = sc; 1194 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1195 1196 make_dev_args_init(&mda); 1197 mda.mda_devsw = &devmemsw; 1198 mda.mda_cr = sc->ucred; 1199 mda.mda_uid = UID_ROOT; 1200 mda.mda_gid = GID_WHEEL; 1201 mda.mda_mode = 0600; 1202 mda.mda_si_drv1 = dsc; 1203 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1204 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1205 devname); 1206 if (error != 0) { 1207 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1208 free(dsc->name, M_VMMDEV); 1209 free(dsc, M_VMMDEV); 1210 } 1211 1212 sx_xunlock(&vmmdev_mtx); 1213 1214 return (error); 1215 } 1216 1217 static void 1218 devmem_destroy(void *arg) 1219 { 1220 struct devmem_softc *dsc = arg; 1221 1222 destroy_dev(dsc->cdev); 1223 dsc->cdev = NULL; 1224 dsc->sc = NULL; 1225 } 1226