1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/fcntl.h> 12 #include <sys/ioccom.h> 13 #include <sys/jail.h> 14 #include <sys/kernel.h> 15 #include <sys/malloc.h> 16 #include <sys/mman.h> 17 #include <sys/priv.h> 18 #include <sys/proc.h> 19 #include <sys/queue.h> 20 #include <sys/sx.h> 21 #include <sys/sysctl.h> 22 #include <sys/ucred.h> 23 #include <sys/uio.h> 24 25 #include <machine/vmm.h> 26 27 #include <vm/vm.h> 28 #include <vm/vm_object.h> 29 30 #include <dev/vmm/vmm_dev.h> 31 #include <dev/vmm/vmm_mem.h> 32 #include <dev/vmm/vmm_stat.h> 33 34 #ifdef __amd64__ 35 #ifdef COMPAT_FREEBSD12 36 struct vm_memseg_12 { 37 int segid; 38 size_t len; 39 char name[64]; 40 }; 41 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 42 43 #define VM_ALLOC_MEMSEG_12 \ 44 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 45 #define VM_GET_MEMSEG_12 \ 46 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 47 #endif /* COMPAT_FREEBSD12 */ 48 #ifdef COMPAT_FREEBSD14 49 struct vm_memseg_14 { 50 int segid; 51 size_t len; 52 char name[VM_MAX_SUFFIXLEN + 1]; 53 }; 54 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16), 55 "COMPAT_FREEBSD14 ABI"); 56 57 #define VM_ALLOC_MEMSEG_14 \ 58 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14) 59 #define VM_GET_MEMSEG_14 \ 60 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14) 61 #endif /* COMPAT_FREEBSD14 */ 62 #endif /* __amd64__ */ 63 64 struct devmem_softc { 65 int segid; 66 char *name; 67 struct cdev *cdev; 68 struct vmmdev_softc *sc; 69 SLIST_ENTRY(devmem_softc) link; 70 }; 71 72 struct vmmdev_softc { 73 struct vm *vm; /* vm instance cookie */ 74 struct cdev *cdev; 75 struct ucred *ucred; 76 SLIST_ENTRY(vmmdev_softc) link; 77 SLIST_HEAD(, devmem_softc) devmem; 78 int flags; 79 }; 80 81 static SLIST_HEAD(, vmmdev_softc) head; 82 83 static unsigned pr_allow_flag; 84 static struct sx vmmdev_mtx; 85 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 86 87 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 88 89 SYSCTL_DECL(_hw_vmm); 90 91 static void devmem_destroy(void *arg); 92 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 93 94 static int 95 vmm_priv_check(struct ucred *ucred) 96 { 97 if (jailed(ucred) && 98 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 99 return (EPERM); 100 101 return (0); 102 } 103 104 static int 105 vcpu_lock_one(struct vcpu *vcpu) 106 { 107 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 108 } 109 110 static void 111 vcpu_unlock_one(struct vcpu *vcpu) 112 { 113 enum vcpu_state state; 114 115 state = vcpu_get_state(vcpu, NULL); 116 if (state != VCPU_FROZEN) { 117 panic("vcpu %s(%d) has invalid state %d", 118 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 119 } 120 121 vcpu_set_state(vcpu, VCPU_IDLE, false); 122 } 123 124 #ifndef __amd64__ 125 static int 126 vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) 127 { 128 struct vcpu *vcpu; 129 int error; 130 uint16_t i, j, maxcpus; 131 132 error = 0; 133 maxcpus = vm_get_maxcpus(vm); 134 for (i = 0; i < maxcpus; i++) { 135 vcpu = vm_vcpu(vm, i); 136 if (vcpu == NULL) 137 continue; 138 error = vcpu_lock_one(vcpu); 139 if (error) 140 break; 141 } 142 143 if (error) { 144 for (j = 0; j < i; j++) { 145 vcpu = vm_vcpu(vm, j); 146 if (vcpu == NULL) 147 continue; 148 vcpu_unlock_one(vcpu); 149 } 150 } 151 152 return (error); 153 } 154 #endif 155 156 static int 157 vcpu_lock_all(struct vmmdev_softc *sc) 158 { 159 int error; 160 161 /* 162 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked 163 * in a consistent order so we need to serialize to avoid deadlocks. 164 */ 165 vm_lock_vcpus(sc->vm); 166 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN); 167 if (error != 0) 168 vm_unlock_vcpus(sc->vm); 169 return (error); 170 } 171 172 static void 173 vcpu_unlock_all(struct vmmdev_softc *sc) 174 { 175 struct vcpu *vcpu; 176 uint16_t i, maxcpus; 177 178 maxcpus = vm_get_maxcpus(sc->vm); 179 for (i = 0; i < maxcpus; i++) { 180 vcpu = vm_vcpu(sc->vm, i); 181 if (vcpu == NULL) 182 continue; 183 vcpu_unlock_one(vcpu); 184 } 185 vm_unlock_vcpus(sc->vm); 186 } 187 188 static struct vmmdev_softc * 189 vmmdev_lookup(const char *name, struct ucred *cred) 190 { 191 struct vmmdev_softc *sc; 192 193 sx_assert(&vmmdev_mtx, SA_XLOCKED); 194 195 SLIST_FOREACH(sc, &head, link) { 196 if (strcmp(name, vm_name(sc->vm)) == 0) 197 break; 198 } 199 200 if (sc == NULL) 201 return (NULL); 202 203 if (cr_cansee(cred, sc->ucred)) 204 return (NULL); 205 206 return (sc); 207 } 208 209 static struct vmmdev_softc * 210 vmmdev_lookup2(struct cdev *cdev) 211 { 212 return (cdev->si_drv1); 213 } 214 215 static int 216 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 217 { 218 int error, off, c, prot; 219 vm_paddr_t gpa, maxaddr; 220 void *hpa, *cookie; 221 struct vmmdev_softc *sc; 222 223 sc = vmmdev_lookup2(cdev); 224 if (sc == NULL) 225 return (ENXIO); 226 227 /* 228 * Get a read lock on the guest memory map. 229 */ 230 vm_slock_memsegs(sc->vm); 231 232 error = 0; 233 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 234 maxaddr = vmm_sysmem_maxaddr(sc->vm); 235 while (uio->uio_resid > 0 && error == 0) { 236 gpa = uio->uio_offset; 237 off = gpa & PAGE_MASK; 238 c = min(uio->uio_resid, PAGE_SIZE - off); 239 240 /* 241 * The VM has a hole in its physical memory map. If we want to 242 * use 'dd' to inspect memory beyond the hole we need to 243 * provide bogus data for memory that lies in the hole. 244 * 245 * Since this device does not support lseek(2), dd(1) will 246 * read(2) blocks of data to simulate the lseek(2). 247 */ 248 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 249 if (hpa == NULL) { 250 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 251 error = uiomove(__DECONST(void *, zero_region), 252 c, uio); 253 else 254 error = EFAULT; 255 } else { 256 error = uiomove(hpa, c, uio); 257 vm_gpa_release(cookie); 258 } 259 } 260 vm_unlock_memsegs(sc->vm); 261 return (error); 262 } 263 264 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 265 266 static int 267 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 268 { 269 struct devmem_softc *dsc; 270 int error; 271 bool sysmem; 272 273 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 274 if (error || mseg->len == 0) 275 return (error); 276 277 if (!sysmem) { 278 SLIST_FOREACH(dsc, &sc->devmem, link) { 279 if (dsc->segid == mseg->segid) 280 break; 281 } 282 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 283 __func__, mseg->segid)); 284 error = copystr(dsc->name, mseg->name, len, NULL); 285 } else { 286 bzero(mseg->name, len); 287 } 288 289 return (error); 290 } 291 292 static int 293 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len, 294 struct domainset *domainset) 295 { 296 char *name; 297 int error; 298 bool sysmem; 299 300 error = 0; 301 name = NULL; 302 sysmem = true; 303 304 /* 305 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 306 * by stripped off when devfs processes the full string. 307 */ 308 if (VM_MEMSEG_NAME(mseg)) { 309 sysmem = false; 310 name = malloc(len, M_VMMDEV, M_WAITOK); 311 error = copystr(mseg->name, name, len, NULL); 312 if (error) 313 goto done; 314 } 315 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset); 316 if (error) 317 goto done; 318 319 if (VM_MEMSEG_NAME(mseg)) { 320 error = devmem_create_cdev(sc, mseg->segid, name); 321 if (error) 322 vm_free_memseg(sc->vm, mseg->segid); 323 else 324 name = NULL; /* freed when 'cdev' is destroyed */ 325 } 326 done: 327 free(name, M_VMMDEV); 328 return (error); 329 } 330 331 #if defined(__amd64__) && \ 332 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12)) 333 /* 334 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts. 335 */ 336 static void 337 adjust_segid(struct vm_memseg *mseg) 338 { 339 if (mseg->segid != VM_SYSMEM) { 340 mseg->segid += (VM_BOOTROM - 1); 341 } 342 } 343 #endif 344 345 static int 346 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 347 uint64_t *regval) 348 { 349 int error, i; 350 351 error = 0; 352 for (i = 0; i < count; i++) { 353 error = vm_get_register(vcpu, regnum[i], ®val[i]); 354 if (error) 355 break; 356 } 357 return (error); 358 } 359 360 static int 361 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 362 uint64_t *regval) 363 { 364 int error, i; 365 366 error = 0; 367 for (i = 0; i < count; i++) { 368 error = vm_set_register(vcpu, regnum[i], regval[i]); 369 if (error) 370 break; 371 } 372 return (error); 373 } 374 375 static int 376 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 377 { 378 int error; 379 380 /* 381 * A jail without vmm access shouldn't be able to access vmm device 382 * files at all, but check here just to be thorough. 383 */ 384 error = vmm_priv_check(td->td_ucred); 385 if (error != 0) 386 return (error); 387 388 return (0); 389 } 390 391 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 392 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 393 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 394 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 395 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 396 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 397 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 398 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 399 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 400 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 401 VMMDEV_IOCTL(VM_STAT_DESC, 0), 402 403 #ifdef __amd64__ 404 #ifdef COMPAT_FREEBSD12 405 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 406 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 407 #endif 408 #ifdef COMPAT_FREEBSD14 409 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14, 410 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 411 #endif 412 #endif /* __amd64__ */ 413 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 414 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 415 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 416 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 417 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 418 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 419 VMMDEV_IOCTL(VM_REINIT, 420 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 421 422 #ifdef __amd64__ 423 #if defined(COMPAT_FREEBSD12) 424 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 425 #endif 426 #ifdef COMPAT_FREEBSD14 427 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS), 428 #endif 429 #endif /* __amd64__ */ 430 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 431 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 432 433 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 434 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 435 436 VMMDEV_IOCTL(VM_SUSPEND, 0), 437 VMMDEV_IOCTL(VM_GET_CPUS, 0), 438 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 439 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 440 }; 441 442 static int 443 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 444 struct thread *td) 445 { 446 struct vmmdev_softc *sc; 447 struct vcpu *vcpu; 448 const struct vmmdev_ioctl *ioctl; 449 struct vm_memseg *mseg; 450 int error, vcpuid; 451 452 sc = vmmdev_lookup2(cdev); 453 if (sc == NULL) 454 return (ENXIO); 455 456 ioctl = NULL; 457 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 458 if (vmmdev_ioctls[i].cmd == cmd) { 459 ioctl = &vmmdev_ioctls[i]; 460 break; 461 } 462 } 463 if (ioctl == NULL) { 464 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 465 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 466 ioctl = &vmmdev_machdep_ioctls[i]; 467 break; 468 } 469 } 470 } 471 if (ioctl == NULL) 472 return (ENOTTY); 473 474 if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) { 475 error = priv_check(td, PRIV_DRIVER); 476 if (error != 0) 477 return (error); 478 } 479 480 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 481 vm_xlock_memsegs(sc->vm); 482 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 483 vm_slock_memsegs(sc->vm); 484 485 vcpu = NULL; 486 vcpuid = -1; 487 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 488 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 489 vcpuid = *(int *)data; 490 if (vcpuid == -1) { 491 if ((ioctl->flags & 492 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 493 error = EINVAL; 494 goto lockfail; 495 } 496 } else { 497 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 498 if (vcpu == NULL) { 499 error = EINVAL; 500 goto lockfail; 501 } 502 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 503 error = vcpu_lock_one(vcpu); 504 if (error) 505 goto lockfail; 506 } 507 } 508 } 509 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 510 error = vcpu_lock_all(sc); 511 if (error) 512 goto lockfail; 513 } 514 515 switch (cmd) { 516 case VM_SUSPEND: { 517 struct vm_suspend *vmsuspend; 518 519 vmsuspend = (struct vm_suspend *)data; 520 error = vm_suspend(sc->vm, vmsuspend->how); 521 break; 522 } 523 case VM_REINIT: 524 error = vm_reinit(sc->vm); 525 break; 526 case VM_STAT_DESC: { 527 struct vm_stat_desc *statdesc; 528 529 statdesc = (struct vm_stat_desc *)data; 530 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 531 sizeof(statdesc->desc)); 532 break; 533 } 534 case VM_STATS: { 535 struct vm_stats *vmstats; 536 537 vmstats = (struct vm_stats *)data; 538 getmicrotime(&vmstats->tv); 539 error = vmm_stat_copy(vcpu, vmstats->index, 540 nitems(vmstats->statbuf), &vmstats->num_entries, 541 vmstats->statbuf); 542 break; 543 } 544 case VM_MMAP_GETNEXT: { 545 struct vm_memmap *mm; 546 547 mm = (struct vm_memmap *)data; 548 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 549 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 550 break; 551 } 552 case VM_MMAP_MEMSEG: { 553 struct vm_memmap *mm; 554 555 mm = (struct vm_memmap *)data; 556 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 557 mm->len, mm->prot, mm->flags); 558 break; 559 } 560 case VM_MUNMAP_MEMSEG: { 561 struct vm_munmap *mu; 562 563 mu = (struct vm_munmap *)data; 564 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 565 break; 566 } 567 #ifdef __amd64__ 568 #ifdef COMPAT_FREEBSD12 569 case VM_ALLOC_MEMSEG_12: 570 mseg = (struct vm_memseg *)data; 571 572 adjust_segid(mseg); 573 error = alloc_memseg(sc, mseg, 574 sizeof(((struct vm_memseg_12 *)0)->name), NULL); 575 break; 576 case VM_GET_MEMSEG_12: 577 mseg = (struct vm_memseg *)data; 578 579 adjust_segid(mseg); 580 error = get_memseg(sc, mseg, 581 sizeof(((struct vm_memseg_12 *)0)->name)); 582 break; 583 #endif /* COMPAT_FREEBSD12 */ 584 #ifdef COMPAT_FREEBSD14 585 case VM_ALLOC_MEMSEG_14: 586 mseg = (struct vm_memseg *)data; 587 588 adjust_segid(mseg); 589 error = alloc_memseg(sc, mseg, 590 sizeof(((struct vm_memseg_14 *)0)->name), NULL); 591 break; 592 case VM_GET_MEMSEG_14: 593 mseg = (struct vm_memseg *)data; 594 595 adjust_segid(mseg); 596 error = get_memseg(sc, mseg, 597 sizeof(((struct vm_memseg_14 *)0)->name)); 598 break; 599 #endif /* COMPAT_FREEBSD14 */ 600 #endif /* __amd64__ */ 601 case VM_ALLOC_MEMSEG: { 602 domainset_t *mask; 603 struct domainset *domainset, domain; 604 605 domainset = NULL; 606 mseg = (struct vm_memseg *)data; 607 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) { 608 if (mseg->ds_mask_size < sizeof(domainset_t) || 609 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) { 610 error = ERANGE; 611 break; 612 } 613 memset(&domain, 0, sizeof(domain)); 614 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK); 615 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size); 616 if (error) { 617 free(mask, M_VMMDEV); 618 break; 619 } 620 error = domainset_populate(&domain, mask, mseg->ds_policy, 621 mseg->ds_mask_size); 622 if (error) { 623 free(mask, M_VMMDEV); 624 break; 625 } 626 domainset = domainset_create(&domain); 627 if (domainset == NULL) { 628 error = EINVAL; 629 free(mask, M_VMMDEV); 630 break; 631 } 632 free(mask, M_VMMDEV); 633 } 634 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); 635 636 break; 637 } 638 case VM_GET_MEMSEG: 639 error = get_memseg(sc, (struct vm_memseg *)data, 640 sizeof(((struct vm_memseg *)0)->name)); 641 break; 642 case VM_GET_REGISTER: { 643 struct vm_register *vmreg; 644 645 vmreg = (struct vm_register *)data; 646 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 647 break; 648 } 649 case VM_SET_REGISTER: { 650 struct vm_register *vmreg; 651 652 vmreg = (struct vm_register *)data; 653 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 654 break; 655 } 656 case VM_GET_REGISTER_SET: { 657 struct vm_register_set *vmregset; 658 uint64_t *regvals; 659 int *regnums; 660 661 vmregset = (struct vm_register_set *)data; 662 if (vmregset->count > VM_REG_LAST) { 663 error = EINVAL; 664 break; 665 } 666 regvals = mallocarray(vmregset->count, sizeof(regvals[0]), 667 M_VMMDEV, M_WAITOK); 668 regnums = mallocarray(vmregset->count, sizeof(regnums[0]), 669 M_VMMDEV, M_WAITOK); 670 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 671 vmregset->count); 672 if (error == 0) 673 error = vm_get_register_set(vcpu, 674 vmregset->count, regnums, regvals); 675 if (error == 0) 676 error = copyout(regvals, vmregset->regvals, 677 sizeof(regvals[0]) * vmregset->count); 678 free(regvals, M_VMMDEV); 679 free(regnums, M_VMMDEV); 680 break; 681 } 682 case VM_SET_REGISTER_SET: { 683 struct vm_register_set *vmregset; 684 uint64_t *regvals; 685 int *regnums; 686 687 vmregset = (struct vm_register_set *)data; 688 if (vmregset->count > VM_REG_LAST) { 689 error = EINVAL; 690 break; 691 } 692 regvals = mallocarray(vmregset->count, sizeof(regvals[0]), 693 M_VMMDEV, M_WAITOK); 694 regnums = mallocarray(vmregset->count, sizeof(regnums[0]), 695 M_VMMDEV, M_WAITOK); 696 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 697 vmregset->count); 698 if (error == 0) 699 error = copyin(vmregset->regvals, regvals, 700 sizeof(regvals[0]) * vmregset->count); 701 if (error == 0) 702 error = vm_set_register_set(vcpu, 703 vmregset->count, regnums, regvals); 704 free(regvals, M_VMMDEV); 705 free(regnums, M_VMMDEV); 706 break; 707 } 708 case VM_GET_CAPABILITY: { 709 struct vm_capability *vmcap; 710 711 vmcap = (struct vm_capability *)data; 712 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 713 break; 714 } 715 case VM_SET_CAPABILITY: { 716 struct vm_capability *vmcap; 717 718 vmcap = (struct vm_capability *)data; 719 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 720 break; 721 } 722 case VM_ACTIVATE_CPU: 723 error = vm_activate_cpu(vcpu); 724 break; 725 case VM_GET_CPUS: { 726 struct vm_cpuset *vm_cpuset; 727 cpuset_t *cpuset; 728 int size; 729 730 error = 0; 731 vm_cpuset = (struct vm_cpuset *)data; 732 size = vm_cpuset->cpusetsize; 733 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 734 error = ERANGE; 735 break; 736 } 737 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 738 M_WAITOK | M_ZERO); 739 if (vm_cpuset->which == VM_ACTIVE_CPUS) 740 *cpuset = vm_active_cpus(sc->vm); 741 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 742 *cpuset = vm_suspended_cpus(sc->vm); 743 else if (vm_cpuset->which == VM_DEBUG_CPUS) 744 *cpuset = vm_debug_cpus(sc->vm); 745 else 746 error = EINVAL; 747 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 748 error = ERANGE; 749 if (error == 0) 750 error = copyout(cpuset, vm_cpuset->cpus, size); 751 free(cpuset, M_TEMP); 752 break; 753 } 754 case VM_SUSPEND_CPU: 755 error = vm_suspend_cpu(sc->vm, vcpu); 756 break; 757 case VM_RESUME_CPU: 758 error = vm_resume_cpu(sc->vm, vcpu); 759 break; 760 case VM_SET_TOPOLOGY: { 761 struct vm_cpu_topology *topology; 762 763 topology = (struct vm_cpu_topology *)data; 764 error = vm_set_topology(sc->vm, topology->sockets, 765 topology->cores, topology->threads, topology->maxcpus); 766 break; 767 } 768 case VM_GET_TOPOLOGY: { 769 struct vm_cpu_topology *topology; 770 771 topology = (struct vm_cpu_topology *)data; 772 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 773 &topology->threads, &topology->maxcpus); 774 error = 0; 775 break; 776 } 777 default: 778 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 779 td); 780 break; 781 } 782 783 if ((ioctl->flags & 784 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 785 vm_unlock_memsegs(sc->vm); 786 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 787 vcpu_unlock_all(sc); 788 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 789 vcpu_unlock_one(vcpu); 790 791 /* 792 * Make sure that no handler returns a kernel-internal 793 * error value to userspace. 794 */ 795 KASSERT(error == ERESTART || error >= 0, 796 ("vmmdev_ioctl: invalid error return %d", error)); 797 return (error); 798 799 lockfail: 800 if ((ioctl->flags & 801 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 802 vm_unlock_memsegs(sc->vm); 803 return (error); 804 } 805 806 static int 807 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 808 struct vm_object **objp, int nprot) 809 { 810 struct vmmdev_softc *sc; 811 vm_paddr_t gpa; 812 size_t len; 813 vm_ooffset_t segoff, first, last; 814 int error, found, segid; 815 bool sysmem; 816 817 first = *offset; 818 last = first + mapsize; 819 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 820 return (EINVAL); 821 822 sc = vmmdev_lookup2(cdev); 823 if (sc == NULL) { 824 /* virtual machine is in the process of being created */ 825 return (EINVAL); 826 } 827 828 /* 829 * Get a read lock on the guest memory map. 830 */ 831 vm_slock_memsegs(sc->vm); 832 833 gpa = 0; 834 found = 0; 835 while (!found) { 836 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 837 NULL, NULL); 838 if (error) 839 break; 840 841 if (first >= gpa && last <= gpa + len) 842 found = 1; 843 else 844 gpa += len; 845 } 846 847 if (found) { 848 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 849 KASSERT(error == 0 && *objp != NULL, 850 ("%s: invalid memory segment %d", __func__, segid)); 851 if (sysmem) { 852 vm_object_reference(*objp); 853 *offset = segoff + (first - gpa); 854 } else { 855 error = EINVAL; 856 } 857 } 858 vm_unlock_memsegs(sc->vm); 859 return (error); 860 } 861 862 static void 863 vmmdev_destroy(struct vmmdev_softc *sc) 864 { 865 struct devmem_softc *dsc; 866 int error __diagused; 867 868 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 869 870 /* 871 * Destroy all cdevs: 872 * 873 * - any new operations on the 'cdev' will return an error (ENXIO). 874 * 875 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 876 */ 877 SLIST_FOREACH(dsc, &sc->devmem, link) { 878 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 879 devmem_destroy(dsc); 880 } 881 882 vm_disable_vcpu_creation(sc->vm); 883 error = vcpu_lock_all(sc); 884 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 885 vm_unlock_vcpus(sc->vm); 886 887 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 888 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 889 SLIST_REMOVE_HEAD(&sc->devmem, link); 890 free(dsc->name, M_VMMDEV); 891 free(dsc, M_VMMDEV); 892 } 893 894 if (sc->vm != NULL) 895 vm_destroy(sc->vm); 896 897 if (sc->ucred != NULL) 898 crfree(sc->ucred); 899 900 sx_xlock(&vmmdev_mtx); 901 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 902 sx_xunlock(&vmmdev_mtx); 903 free(sc, M_VMMDEV); 904 } 905 906 static int 907 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 908 { 909 struct cdev *cdev; 910 struct vmmdev_softc *sc; 911 912 sx_xlock(&vmmdev_mtx); 913 sc = vmmdev_lookup(name, cred); 914 if (sc == NULL || sc->cdev == NULL) { 915 sx_xunlock(&vmmdev_mtx); 916 return (EINVAL); 917 } 918 919 /* 920 * Setting 'sc->cdev' to NULL is used to indicate that the VM 921 * is scheduled for destruction. 922 */ 923 cdev = sc->cdev; 924 sc->cdev = NULL; 925 sx_xunlock(&vmmdev_mtx); 926 927 vm_suspend(sc->vm, VM_SUSPEND_DESTROY); 928 destroy_dev(cdev); 929 vmmdev_destroy(sc); 930 931 return (0); 932 } 933 934 static int 935 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 936 { 937 char *buf; 938 int error, buflen; 939 940 error = vmm_priv_check(req->td->td_ucred); 941 if (error) 942 return (error); 943 944 buflen = VM_MAX_NAMELEN + 1; 945 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 946 error = sysctl_handle_string(oidp, buf, buflen, req); 947 if (error == 0 && req->newptr != NULL) 948 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 949 free(buf, M_VMMDEV); 950 return (error); 951 } 952 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 953 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 954 NULL, 0, sysctl_vmm_destroy, "A", 955 "Destroy a vmm(4) instance (legacy interface)"); 956 957 static struct cdevsw vmmdevsw = { 958 .d_name = "vmmdev", 959 .d_version = D_VERSION, 960 .d_open = vmmdev_open, 961 .d_ioctl = vmmdev_ioctl, 962 .d_mmap_single = vmmdev_mmap_single, 963 .d_read = vmmdev_rw, 964 .d_write = vmmdev_rw, 965 }; 966 967 static struct vmmdev_softc * 968 vmmdev_alloc(struct vm *vm, struct ucred *cred) 969 { 970 struct vmmdev_softc *sc; 971 972 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 973 SLIST_INIT(&sc->devmem); 974 sc->vm = vm; 975 sc->ucred = crhold(cred); 976 return (sc); 977 } 978 979 static int 980 vmmdev_create(const char *name, struct ucred *cred) 981 { 982 struct make_dev_args mda; 983 struct cdev *cdev; 984 struct vmmdev_softc *sc; 985 struct vm *vm; 986 int error; 987 988 sx_xlock(&vmmdev_mtx); 989 sc = vmmdev_lookup(name, cred); 990 if (sc != NULL) { 991 sx_xunlock(&vmmdev_mtx); 992 return (EEXIST); 993 } 994 995 error = vm_create(name, &vm); 996 if (error != 0) { 997 sx_xunlock(&vmmdev_mtx); 998 return (error); 999 } 1000 sc = vmmdev_alloc(vm, cred); 1001 SLIST_INSERT_HEAD(&head, sc, link); 1002 1003 make_dev_args_init(&mda); 1004 mda.mda_devsw = &vmmdevsw; 1005 mda.mda_cr = sc->ucred; 1006 mda.mda_uid = UID_ROOT; 1007 mda.mda_gid = GID_WHEEL; 1008 mda.mda_mode = 0600; 1009 mda.mda_si_drv1 = sc; 1010 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1011 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 1012 if (error != 0) { 1013 sx_xunlock(&vmmdev_mtx); 1014 vmmdev_destroy(sc); 1015 return (error); 1016 } 1017 sc->cdev = cdev; 1018 sx_xunlock(&vmmdev_mtx); 1019 return (0); 1020 } 1021 1022 static int 1023 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1024 { 1025 char *buf; 1026 int error, buflen; 1027 1028 error = vmm_priv_check(req->td->td_ucred); 1029 if (error != 0) 1030 return (error); 1031 1032 buflen = VM_MAX_NAMELEN + 1; 1033 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1034 error = sysctl_handle_string(oidp, buf, buflen, req); 1035 if (error == 0 && req->newptr != NULL) 1036 error = vmmdev_create(buf, req->td->td_ucred); 1037 free(buf, M_VMMDEV); 1038 return (error); 1039 } 1040 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1041 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1042 NULL, 0, sysctl_vmm_create, "A", 1043 "Create a vmm(4) instance (legacy interface)"); 1044 1045 static int 1046 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) 1047 { 1048 int error; 1049 1050 error = vmm_priv_check(td->td_ucred); 1051 if (error != 0) 1052 return (error); 1053 1054 if ((flags & FWRITE) == 0) 1055 return (EPERM); 1056 1057 return (0); 1058 } 1059 1060 static int 1061 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 1062 struct thread *td) 1063 { 1064 int error; 1065 1066 switch (cmd) { 1067 case VMMCTL_VM_CREATE: { 1068 struct vmmctl_vm_create *vmc; 1069 1070 vmc = (struct vmmctl_vm_create *)data; 1071 vmc->name[VM_MAX_NAMELEN] = '\0'; 1072 for (size_t i = 0; i < nitems(vmc->reserved); i++) { 1073 if (vmc->reserved[i] != 0) { 1074 error = EINVAL; 1075 return (error); 1076 } 1077 } 1078 1079 error = vmmdev_create(vmc->name, td->td_ucred); 1080 break; 1081 } 1082 case VMMCTL_VM_DESTROY: { 1083 struct vmmctl_vm_destroy *vmd; 1084 1085 vmd = (struct vmmctl_vm_destroy *)data; 1086 vmd->name[VM_MAX_NAMELEN] = '\0'; 1087 for (size_t i = 0; i < nitems(vmd->reserved); i++) { 1088 if (vmd->reserved[i] != 0) { 1089 error = EINVAL; 1090 return (error); 1091 } 1092 } 1093 1094 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred); 1095 break; 1096 } 1097 default: 1098 error = ENOTTY; 1099 break; 1100 } 1101 1102 return (error); 1103 } 1104 1105 static struct cdev *vmmctl_cdev; 1106 static struct cdevsw vmmctlsw = { 1107 .d_name = "vmmctl", 1108 .d_version = D_VERSION, 1109 .d_open = vmmctl_open, 1110 .d_ioctl = vmmctl_ioctl, 1111 }; 1112 1113 int 1114 vmmdev_init(void) 1115 { 1116 int error; 1117 1118 sx_xlock(&vmmdev_mtx); 1119 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL, 1120 UID_ROOT, GID_WHEEL, 0600, "vmmctl"); 1121 if (error == 0) 1122 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1123 "Allow use of vmm in a jail."); 1124 sx_xunlock(&vmmdev_mtx); 1125 1126 return (error); 1127 } 1128 1129 int 1130 vmmdev_cleanup(void) 1131 { 1132 sx_xlock(&vmmdev_mtx); 1133 if (!SLIST_EMPTY(&head)) { 1134 sx_xunlock(&vmmdev_mtx); 1135 return (EBUSY); 1136 } 1137 if (vmmctl_cdev != NULL) { 1138 destroy_dev(vmmctl_cdev); 1139 vmmctl_cdev = NULL; 1140 } 1141 sx_xunlock(&vmmdev_mtx); 1142 1143 return (0); 1144 } 1145 1146 static int 1147 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1148 struct vm_object **objp, int nprot) 1149 { 1150 struct devmem_softc *dsc; 1151 vm_ooffset_t first, last; 1152 size_t seglen; 1153 int error; 1154 bool sysmem; 1155 1156 dsc = cdev->si_drv1; 1157 if (dsc == NULL) { 1158 /* 'cdev' has been created but is not ready for use */ 1159 return (ENXIO); 1160 } 1161 1162 first = *offset; 1163 last = *offset + len; 1164 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1165 return (EINVAL); 1166 1167 vm_slock_memsegs(dsc->sc->vm); 1168 1169 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1170 KASSERT(error == 0 && !sysmem && *objp != NULL, 1171 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1172 1173 if (seglen >= last) 1174 vm_object_reference(*objp); 1175 else 1176 error = EINVAL; 1177 1178 vm_unlock_memsegs(dsc->sc->vm); 1179 return (error); 1180 } 1181 1182 static struct cdevsw devmemsw = { 1183 .d_name = "devmem", 1184 .d_version = D_VERSION, 1185 .d_mmap_single = devmem_mmap_single, 1186 }; 1187 1188 static int 1189 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1190 { 1191 struct make_dev_args mda; 1192 struct devmem_softc *dsc; 1193 int error; 1194 1195 sx_xlock(&vmmdev_mtx); 1196 1197 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1198 dsc->segid = segid; 1199 dsc->name = devname; 1200 dsc->sc = sc; 1201 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1202 1203 make_dev_args_init(&mda); 1204 mda.mda_devsw = &devmemsw; 1205 mda.mda_cr = sc->ucred; 1206 mda.mda_uid = UID_ROOT; 1207 mda.mda_gid = GID_WHEEL; 1208 mda.mda_mode = 0600; 1209 mda.mda_si_drv1 = dsc; 1210 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1211 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1212 devname); 1213 if (error != 0) { 1214 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1215 free(dsc->name, M_VMMDEV); 1216 free(dsc, M_VMMDEV); 1217 } 1218 1219 sx_xunlock(&vmmdev_mtx); 1220 1221 return (error); 1222 } 1223 1224 static void 1225 devmem_destroy(void *arg) 1226 { 1227 struct devmem_softc *dsc = arg; 1228 1229 destroy_dev(dsc->cdev); 1230 dsc->cdev = NULL; 1231 dsc->sc = NULL; 1232 } 1233