1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/fcntl.h> 12 #include <sys/ioccom.h> 13 #include <sys/jail.h> 14 #include <sys/kernel.h> 15 #include <sys/malloc.h> 16 #include <sys/mman.h> 17 #include <sys/proc.h> 18 #include <sys/queue.h> 19 #include <sys/sx.h> 20 #include <sys/sysctl.h> 21 #include <sys/ucred.h> 22 #include <sys/uio.h> 23 24 #include <machine/vmm.h> 25 26 #include <vm/vm.h> 27 #include <vm/vm_object.h> 28 29 #include <dev/vmm/vmm_dev.h> 30 #include <dev/vmm/vmm_mem.h> 31 #include <dev/vmm/vmm_stat.h> 32 33 #ifdef __amd64__ 34 #ifdef COMPAT_FREEBSD12 35 struct vm_memseg_12 { 36 int segid; 37 size_t len; 38 char name[64]; 39 }; 40 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 41 42 #define VM_ALLOC_MEMSEG_12 \ 43 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 44 #define VM_GET_MEMSEG_12 \ 45 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 46 #endif /* COMPAT_FREEBSD12 */ 47 #ifdef COMPAT_FREEBSD14 48 struct vm_memseg_14 { 49 int segid; 50 size_t len; 51 char name[VM_MAX_SUFFIXLEN + 1]; 52 }; 53 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16), 54 "COMPAT_FREEBSD14 ABI"); 55 56 #define VM_ALLOC_MEMSEG_14 \ 57 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14) 58 #define VM_GET_MEMSEG_14 \ 59 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14) 60 #endif /* COMPAT_FREEBSD14 */ 61 #endif /* __amd64__ */ 62 63 struct devmem_softc { 64 int segid; 65 char *name; 66 struct cdev *cdev; 67 struct vmmdev_softc *sc; 68 SLIST_ENTRY(devmem_softc) link; 69 }; 70 71 struct vmmdev_softc { 72 struct vm *vm; /* vm instance cookie */ 73 struct cdev *cdev; 74 struct ucred *ucred; 75 SLIST_ENTRY(vmmdev_softc) link; 76 SLIST_HEAD(, devmem_softc) devmem; 77 int flags; 78 }; 79 80 static SLIST_HEAD(, vmmdev_softc) head; 81 82 static unsigned pr_allow_flag; 83 static struct sx vmmdev_mtx; 84 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 85 86 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 87 88 SYSCTL_DECL(_hw_vmm); 89 90 static void devmem_destroy(void *arg); 91 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 92 93 static int 94 vmm_priv_check(struct ucred *ucred) 95 { 96 if (jailed(ucred) && 97 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 98 return (EPERM); 99 100 return (0); 101 } 102 103 static int 104 vcpu_lock_one(struct vcpu *vcpu) 105 { 106 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 107 } 108 109 static void 110 vcpu_unlock_one(struct vcpu *vcpu) 111 { 112 enum vcpu_state state; 113 114 state = vcpu_get_state(vcpu, NULL); 115 if (state != VCPU_FROZEN) { 116 panic("vcpu %s(%d) has invalid state %d", 117 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 118 } 119 120 vcpu_set_state(vcpu, VCPU_IDLE, false); 121 } 122 123 static int 124 vcpu_lock_all(struct vmmdev_softc *sc) 125 { 126 struct vcpu *vcpu; 127 int error; 128 uint16_t i, j, maxcpus; 129 130 error = 0; 131 vm_slock_vcpus(sc->vm); 132 maxcpus = vm_get_maxcpus(sc->vm); 133 for (i = 0; i < maxcpus; i++) { 134 vcpu = vm_vcpu(sc->vm, i); 135 if (vcpu == NULL) 136 continue; 137 error = vcpu_lock_one(vcpu); 138 if (error) 139 break; 140 } 141 142 if (error) { 143 for (j = 0; j < i; j++) { 144 vcpu = vm_vcpu(sc->vm, j); 145 if (vcpu == NULL) 146 continue; 147 vcpu_unlock_one(vcpu); 148 } 149 vm_unlock_vcpus(sc->vm); 150 } 151 152 return (error); 153 } 154 155 static void 156 vcpu_unlock_all(struct vmmdev_softc *sc) 157 { 158 struct vcpu *vcpu; 159 uint16_t i, maxcpus; 160 161 maxcpus = vm_get_maxcpus(sc->vm); 162 for (i = 0; i < maxcpus; i++) { 163 vcpu = vm_vcpu(sc->vm, i); 164 if (vcpu == NULL) 165 continue; 166 vcpu_unlock_one(vcpu); 167 } 168 vm_unlock_vcpus(sc->vm); 169 } 170 171 static struct vmmdev_softc * 172 vmmdev_lookup(const char *name, struct ucred *cred) 173 { 174 struct vmmdev_softc *sc; 175 176 sx_assert(&vmmdev_mtx, SA_XLOCKED); 177 178 SLIST_FOREACH(sc, &head, link) { 179 if (strcmp(name, vm_name(sc->vm)) == 0) 180 break; 181 } 182 183 if (sc == NULL) 184 return (NULL); 185 186 if (cr_cansee(cred, sc->ucred)) 187 return (NULL); 188 189 return (sc); 190 } 191 192 static struct vmmdev_softc * 193 vmmdev_lookup2(struct cdev *cdev) 194 { 195 return (cdev->si_drv1); 196 } 197 198 static int 199 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 200 { 201 int error, off, c, prot; 202 vm_paddr_t gpa, maxaddr; 203 void *hpa, *cookie; 204 struct vmmdev_softc *sc; 205 206 sc = vmmdev_lookup2(cdev); 207 if (sc == NULL) 208 return (ENXIO); 209 210 /* 211 * Get a read lock on the guest memory map. 212 */ 213 vm_slock_memsegs(sc->vm); 214 215 error = 0; 216 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 217 maxaddr = vmm_sysmem_maxaddr(sc->vm); 218 while (uio->uio_resid > 0 && error == 0) { 219 gpa = uio->uio_offset; 220 off = gpa & PAGE_MASK; 221 c = min(uio->uio_resid, PAGE_SIZE - off); 222 223 /* 224 * The VM has a hole in its physical memory map. If we want to 225 * use 'dd' to inspect memory beyond the hole we need to 226 * provide bogus data for memory that lies in the hole. 227 * 228 * Since this device does not support lseek(2), dd(1) will 229 * read(2) blocks of data to simulate the lseek(2). 230 */ 231 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 232 if (hpa == NULL) { 233 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 234 error = uiomove(__DECONST(void *, zero_region), 235 c, uio); 236 else 237 error = EFAULT; 238 } else { 239 error = uiomove(hpa, c, uio); 240 vm_gpa_release(cookie); 241 } 242 } 243 vm_unlock_memsegs(sc->vm); 244 return (error); 245 } 246 247 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 248 249 static int 250 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 251 { 252 struct devmem_softc *dsc; 253 int error; 254 bool sysmem; 255 256 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 257 if (error || mseg->len == 0) 258 return (error); 259 260 if (!sysmem) { 261 SLIST_FOREACH(dsc, &sc->devmem, link) { 262 if (dsc->segid == mseg->segid) 263 break; 264 } 265 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 266 __func__, mseg->segid)); 267 error = copystr(dsc->name, mseg->name, len, NULL); 268 } else { 269 bzero(mseg->name, len); 270 } 271 272 return (error); 273 } 274 275 static int 276 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len, 277 struct domainset *domainset) 278 { 279 char *name; 280 int error; 281 bool sysmem; 282 283 error = 0; 284 name = NULL; 285 sysmem = true; 286 287 /* 288 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 289 * by stripped off when devfs processes the full string. 290 */ 291 if (VM_MEMSEG_NAME(mseg)) { 292 sysmem = false; 293 name = malloc(len, M_VMMDEV, M_WAITOK); 294 error = copystr(mseg->name, name, len, NULL); 295 if (error) 296 goto done; 297 } 298 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset); 299 if (error) 300 goto done; 301 302 if (VM_MEMSEG_NAME(mseg)) { 303 error = devmem_create_cdev(sc, mseg->segid, name); 304 if (error) 305 vm_free_memseg(sc->vm, mseg->segid); 306 else 307 name = NULL; /* freed when 'cdev' is destroyed */ 308 } 309 done: 310 free(name, M_VMMDEV); 311 return (error); 312 } 313 314 #if defined(__amd64__) && \ 315 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12)) 316 /* 317 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts. 318 */ 319 static void 320 adjust_segid(struct vm_memseg *mseg) 321 { 322 if (mseg->segid != VM_SYSMEM) { 323 mseg->segid += (VM_BOOTROM - 1); 324 } 325 } 326 #endif 327 328 static int 329 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 330 uint64_t *regval) 331 { 332 int error, i; 333 334 error = 0; 335 for (i = 0; i < count; i++) { 336 error = vm_get_register(vcpu, regnum[i], ®val[i]); 337 if (error) 338 break; 339 } 340 return (error); 341 } 342 343 static int 344 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 345 uint64_t *regval) 346 { 347 int error, i; 348 349 error = 0; 350 for (i = 0; i < count; i++) { 351 error = vm_set_register(vcpu, regnum[i], regval[i]); 352 if (error) 353 break; 354 } 355 return (error); 356 } 357 358 static int 359 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 360 { 361 int error; 362 363 /* 364 * A jail without vmm access shouldn't be able to access vmm device 365 * files at all, but check here just to be thorough. 366 */ 367 error = vmm_priv_check(td->td_ucred); 368 if (error != 0) 369 return (error); 370 371 return (0); 372 } 373 374 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 375 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 376 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 377 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 378 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 379 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 380 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 381 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 382 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 383 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 384 VMMDEV_IOCTL(VM_STAT_DESC, 0), 385 386 #ifdef __amd64__ 387 #ifdef COMPAT_FREEBSD12 388 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 389 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 390 #endif 391 #ifdef COMPAT_FREEBSD14 392 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14, 393 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 394 #endif 395 #endif /* __amd64__ */ 396 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 397 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 398 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 399 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 400 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 401 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 402 VMMDEV_IOCTL(VM_REINIT, 403 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 404 405 #ifdef __amd64__ 406 #if defined(COMPAT_FREEBSD12) 407 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 408 #endif 409 #ifdef COMPAT_FREEBSD14 410 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS), 411 #endif 412 #endif /* __amd64__ */ 413 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 414 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 415 416 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 417 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 418 419 VMMDEV_IOCTL(VM_SUSPEND, 0), 420 VMMDEV_IOCTL(VM_GET_CPUS, 0), 421 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 422 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 423 }; 424 425 static int 426 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 427 struct thread *td) 428 { 429 struct vmmdev_softc *sc; 430 struct vcpu *vcpu; 431 const struct vmmdev_ioctl *ioctl; 432 struct vm_memseg *mseg; 433 int error, vcpuid; 434 435 sc = vmmdev_lookup2(cdev); 436 if (sc == NULL) 437 return (ENXIO); 438 439 ioctl = NULL; 440 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 441 if (vmmdev_ioctls[i].cmd == cmd) { 442 ioctl = &vmmdev_ioctls[i]; 443 break; 444 } 445 } 446 if (ioctl == NULL) { 447 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 448 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 449 ioctl = &vmmdev_machdep_ioctls[i]; 450 break; 451 } 452 } 453 } 454 if (ioctl == NULL) 455 return (ENOTTY); 456 457 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 458 vm_xlock_memsegs(sc->vm); 459 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 460 vm_slock_memsegs(sc->vm); 461 462 vcpu = NULL; 463 vcpuid = -1; 464 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 465 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 466 vcpuid = *(int *)data; 467 if (vcpuid == -1) { 468 if ((ioctl->flags & 469 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 470 error = EINVAL; 471 goto lockfail; 472 } 473 } else { 474 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 475 if (vcpu == NULL) { 476 error = EINVAL; 477 goto lockfail; 478 } 479 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 480 error = vcpu_lock_one(vcpu); 481 if (error) 482 goto lockfail; 483 } 484 } 485 } 486 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 487 error = vcpu_lock_all(sc); 488 if (error) 489 goto lockfail; 490 } 491 492 switch (cmd) { 493 case VM_SUSPEND: { 494 struct vm_suspend *vmsuspend; 495 496 vmsuspend = (struct vm_suspend *)data; 497 error = vm_suspend(sc->vm, vmsuspend->how); 498 break; 499 } 500 case VM_REINIT: 501 error = vm_reinit(sc->vm); 502 break; 503 case VM_STAT_DESC: { 504 struct vm_stat_desc *statdesc; 505 506 statdesc = (struct vm_stat_desc *)data; 507 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 508 sizeof(statdesc->desc)); 509 break; 510 } 511 case VM_STATS: { 512 struct vm_stats *vmstats; 513 514 vmstats = (struct vm_stats *)data; 515 getmicrotime(&vmstats->tv); 516 error = vmm_stat_copy(vcpu, vmstats->index, 517 nitems(vmstats->statbuf), &vmstats->num_entries, 518 vmstats->statbuf); 519 break; 520 } 521 case VM_MMAP_GETNEXT: { 522 struct vm_memmap *mm; 523 524 mm = (struct vm_memmap *)data; 525 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 526 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 527 break; 528 } 529 case VM_MMAP_MEMSEG: { 530 struct vm_memmap *mm; 531 532 mm = (struct vm_memmap *)data; 533 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 534 mm->len, mm->prot, mm->flags); 535 break; 536 } 537 case VM_MUNMAP_MEMSEG: { 538 struct vm_munmap *mu; 539 540 mu = (struct vm_munmap *)data; 541 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 542 break; 543 } 544 #ifdef __amd64__ 545 #ifdef COMPAT_FREEBSD12 546 case VM_ALLOC_MEMSEG_12: 547 mseg = (struct vm_memseg *)data; 548 549 adjust_segid(mseg); 550 error = alloc_memseg(sc, mseg, 551 sizeof(((struct vm_memseg_12 *)0)->name), NULL); 552 break; 553 case VM_GET_MEMSEG_12: 554 mseg = (struct vm_memseg *)data; 555 556 adjust_segid(mseg); 557 error = get_memseg(sc, mseg, 558 sizeof(((struct vm_memseg_12 *)0)->name)); 559 break; 560 #endif /* COMPAT_FREEBSD12 */ 561 #ifdef COMPAT_FREEBSD14 562 case VM_ALLOC_MEMSEG_14: 563 mseg = (struct vm_memseg *)data; 564 565 adjust_segid(mseg); 566 error = alloc_memseg(sc, mseg, 567 sizeof(((struct vm_memseg_14 *)0)->name), NULL); 568 break; 569 case VM_GET_MEMSEG_14: 570 mseg = (struct vm_memseg *)data; 571 572 adjust_segid(mseg); 573 error = get_memseg(sc, mseg, 574 sizeof(((struct vm_memseg_14 *)0)->name)); 575 break; 576 #endif /* COMPAT_FREEBSD14 */ 577 #endif /* __amd64__ */ 578 case VM_ALLOC_MEMSEG: { 579 domainset_t *mask; 580 struct domainset *domainset, domain; 581 582 domainset = NULL; 583 mseg = (struct vm_memseg *)data; 584 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) { 585 if (mseg->ds_mask_size < sizeof(domainset_t) || 586 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) { 587 error = ERANGE; 588 break; 589 } 590 memset(&domain, 0, sizeof(domain)); 591 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK); 592 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size); 593 if (error) { 594 free(mask, M_VMMDEV); 595 break; 596 } 597 error = domainset_populate(&domain, mask, mseg->ds_policy, 598 mseg->ds_mask_size); 599 if (error) { 600 free(mask, M_VMMDEV); 601 break; 602 } 603 domainset = domainset_create(&domain); 604 if (domainset == NULL) { 605 error = EINVAL; 606 free(mask, M_VMMDEV); 607 break; 608 } 609 free(mask, M_VMMDEV); 610 } 611 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); 612 613 break; 614 } 615 case VM_GET_MEMSEG: 616 error = get_memseg(sc, (struct vm_memseg *)data, 617 sizeof(((struct vm_memseg *)0)->name)); 618 break; 619 case VM_GET_REGISTER: { 620 struct vm_register *vmreg; 621 622 vmreg = (struct vm_register *)data; 623 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 624 break; 625 } 626 case VM_SET_REGISTER: { 627 struct vm_register *vmreg; 628 629 vmreg = (struct vm_register *)data; 630 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 631 break; 632 } 633 case VM_GET_REGISTER_SET: { 634 struct vm_register_set *vmregset; 635 uint64_t *regvals; 636 int *regnums; 637 638 vmregset = (struct vm_register_set *)data; 639 if (vmregset->count > VM_REG_LAST) { 640 error = EINVAL; 641 break; 642 } 643 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 644 M_WAITOK); 645 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 646 M_WAITOK); 647 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 648 vmregset->count); 649 if (error == 0) 650 error = vm_get_register_set(vcpu, 651 vmregset->count, regnums, regvals); 652 if (error == 0) 653 error = copyout(regvals, vmregset->regvals, 654 sizeof(regvals[0]) * vmregset->count); 655 free(regvals, M_VMMDEV); 656 free(regnums, M_VMMDEV); 657 break; 658 } 659 case VM_SET_REGISTER_SET: { 660 struct vm_register_set *vmregset; 661 uint64_t *regvals; 662 int *regnums; 663 664 vmregset = (struct vm_register_set *)data; 665 if (vmregset->count > VM_REG_LAST) { 666 error = EINVAL; 667 break; 668 } 669 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 670 M_WAITOK); 671 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 672 M_WAITOK); 673 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 674 vmregset->count); 675 if (error == 0) 676 error = copyin(vmregset->regvals, regvals, 677 sizeof(regvals[0]) * vmregset->count); 678 if (error == 0) 679 error = vm_set_register_set(vcpu, 680 vmregset->count, regnums, regvals); 681 free(regvals, M_VMMDEV); 682 free(regnums, M_VMMDEV); 683 break; 684 } 685 case VM_GET_CAPABILITY: { 686 struct vm_capability *vmcap; 687 688 vmcap = (struct vm_capability *)data; 689 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 690 break; 691 } 692 case VM_SET_CAPABILITY: { 693 struct vm_capability *vmcap; 694 695 vmcap = (struct vm_capability *)data; 696 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 697 break; 698 } 699 case VM_ACTIVATE_CPU: 700 error = vm_activate_cpu(vcpu); 701 break; 702 case VM_GET_CPUS: { 703 struct vm_cpuset *vm_cpuset; 704 cpuset_t *cpuset; 705 int size; 706 707 error = 0; 708 vm_cpuset = (struct vm_cpuset *)data; 709 size = vm_cpuset->cpusetsize; 710 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 711 error = ERANGE; 712 break; 713 } 714 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 715 M_WAITOK | M_ZERO); 716 if (vm_cpuset->which == VM_ACTIVE_CPUS) 717 *cpuset = vm_active_cpus(sc->vm); 718 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 719 *cpuset = vm_suspended_cpus(sc->vm); 720 else if (vm_cpuset->which == VM_DEBUG_CPUS) 721 *cpuset = vm_debug_cpus(sc->vm); 722 else 723 error = EINVAL; 724 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 725 error = ERANGE; 726 if (error == 0) 727 error = copyout(cpuset, vm_cpuset->cpus, size); 728 free(cpuset, M_TEMP); 729 break; 730 } 731 case VM_SUSPEND_CPU: 732 error = vm_suspend_cpu(sc->vm, vcpu); 733 break; 734 case VM_RESUME_CPU: 735 error = vm_resume_cpu(sc->vm, vcpu); 736 break; 737 case VM_SET_TOPOLOGY: { 738 struct vm_cpu_topology *topology; 739 740 topology = (struct vm_cpu_topology *)data; 741 error = vm_set_topology(sc->vm, topology->sockets, 742 topology->cores, topology->threads, topology->maxcpus); 743 break; 744 } 745 case VM_GET_TOPOLOGY: { 746 struct vm_cpu_topology *topology; 747 748 topology = (struct vm_cpu_topology *)data; 749 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 750 &topology->threads, &topology->maxcpus); 751 error = 0; 752 break; 753 } 754 default: 755 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 756 td); 757 break; 758 } 759 760 if ((ioctl->flags & 761 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 762 vm_unlock_memsegs(sc->vm); 763 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 764 vcpu_unlock_all(sc); 765 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 766 vcpu_unlock_one(vcpu); 767 768 /* 769 * Make sure that no handler returns a kernel-internal 770 * error value to userspace. 771 */ 772 KASSERT(error == ERESTART || error >= 0, 773 ("vmmdev_ioctl: invalid error return %d", error)); 774 return (error); 775 776 lockfail: 777 if ((ioctl->flags & 778 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 779 vm_unlock_memsegs(sc->vm); 780 return (error); 781 } 782 783 static int 784 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 785 struct vm_object **objp, int nprot) 786 { 787 struct vmmdev_softc *sc; 788 vm_paddr_t gpa; 789 size_t len; 790 vm_ooffset_t segoff, first, last; 791 int error, found, segid; 792 bool sysmem; 793 794 first = *offset; 795 last = first + mapsize; 796 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 797 return (EINVAL); 798 799 sc = vmmdev_lookup2(cdev); 800 if (sc == NULL) { 801 /* virtual machine is in the process of being created */ 802 return (EINVAL); 803 } 804 805 /* 806 * Get a read lock on the guest memory map. 807 */ 808 vm_slock_memsegs(sc->vm); 809 810 gpa = 0; 811 found = 0; 812 while (!found) { 813 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 814 NULL, NULL); 815 if (error) 816 break; 817 818 if (first >= gpa && last <= gpa + len) 819 found = 1; 820 else 821 gpa += len; 822 } 823 824 if (found) { 825 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 826 KASSERT(error == 0 && *objp != NULL, 827 ("%s: invalid memory segment %d", __func__, segid)); 828 if (sysmem) { 829 vm_object_reference(*objp); 830 *offset = segoff + (first - gpa); 831 } else { 832 error = EINVAL; 833 } 834 } 835 vm_unlock_memsegs(sc->vm); 836 return (error); 837 } 838 839 static void 840 vmmdev_destroy(struct vmmdev_softc *sc) 841 { 842 struct devmem_softc *dsc; 843 int error __diagused; 844 845 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 846 847 /* 848 * Destroy all cdevs: 849 * 850 * - any new operations on the 'cdev' will return an error (ENXIO). 851 * 852 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 853 */ 854 SLIST_FOREACH(dsc, &sc->devmem, link) { 855 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 856 devmem_destroy(dsc); 857 } 858 859 vm_disable_vcpu_creation(sc->vm); 860 error = vcpu_lock_all(sc); 861 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 862 vm_unlock_vcpus(sc->vm); 863 864 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 865 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 866 SLIST_REMOVE_HEAD(&sc->devmem, link); 867 free(dsc->name, M_VMMDEV); 868 free(dsc, M_VMMDEV); 869 } 870 871 if (sc->vm != NULL) 872 vm_destroy(sc->vm); 873 874 if (sc->ucred != NULL) 875 crfree(sc->ucred); 876 877 sx_xlock(&vmmdev_mtx); 878 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 879 sx_xunlock(&vmmdev_mtx); 880 free(sc, M_VMMDEV); 881 } 882 883 static int 884 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 885 { 886 struct cdev *cdev; 887 struct vmmdev_softc *sc; 888 889 sx_xlock(&vmmdev_mtx); 890 sc = vmmdev_lookup(name, cred); 891 if (sc == NULL || sc->cdev == NULL) { 892 sx_xunlock(&vmmdev_mtx); 893 return (EINVAL); 894 } 895 896 /* 897 * Setting 'sc->cdev' to NULL is used to indicate that the VM 898 * is scheduled for destruction. 899 */ 900 cdev = sc->cdev; 901 sc->cdev = NULL; 902 sx_xunlock(&vmmdev_mtx); 903 904 vm_suspend(sc->vm, VM_SUSPEND_DESTROY); 905 destroy_dev(cdev); 906 vmmdev_destroy(sc); 907 908 return (0); 909 } 910 911 static int 912 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 913 { 914 char *buf; 915 int error, buflen; 916 917 error = vmm_priv_check(req->td->td_ucred); 918 if (error) 919 return (error); 920 921 buflen = VM_MAX_NAMELEN + 1; 922 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 923 error = sysctl_handle_string(oidp, buf, buflen, req); 924 if (error == 0 && req->newptr != NULL) 925 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 926 free(buf, M_VMMDEV); 927 return (error); 928 } 929 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 930 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 931 NULL, 0, sysctl_vmm_destroy, "A", 932 "Destroy a vmm(4) instance (legacy interface)"); 933 934 static struct cdevsw vmmdevsw = { 935 .d_name = "vmmdev", 936 .d_version = D_VERSION, 937 .d_open = vmmdev_open, 938 .d_ioctl = vmmdev_ioctl, 939 .d_mmap_single = vmmdev_mmap_single, 940 .d_read = vmmdev_rw, 941 .d_write = vmmdev_rw, 942 }; 943 944 static struct vmmdev_softc * 945 vmmdev_alloc(struct vm *vm, struct ucred *cred) 946 { 947 struct vmmdev_softc *sc; 948 949 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 950 SLIST_INIT(&sc->devmem); 951 sc->vm = vm; 952 sc->ucred = crhold(cred); 953 return (sc); 954 } 955 956 static int 957 vmmdev_create(const char *name, struct ucred *cred) 958 { 959 struct make_dev_args mda; 960 struct cdev *cdev; 961 struct vmmdev_softc *sc; 962 struct vm *vm; 963 int error; 964 965 sx_xlock(&vmmdev_mtx); 966 sc = vmmdev_lookup(name, cred); 967 if (sc != NULL) { 968 sx_xunlock(&vmmdev_mtx); 969 return (EEXIST); 970 } 971 972 error = vm_create(name, &vm); 973 if (error != 0) { 974 sx_xunlock(&vmmdev_mtx); 975 return (error); 976 } 977 sc = vmmdev_alloc(vm, cred); 978 SLIST_INSERT_HEAD(&head, sc, link); 979 980 make_dev_args_init(&mda); 981 mda.mda_devsw = &vmmdevsw; 982 mda.mda_cr = sc->ucred; 983 mda.mda_uid = UID_ROOT; 984 mda.mda_gid = GID_WHEEL; 985 mda.mda_mode = 0600; 986 mda.mda_si_drv1 = sc; 987 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 988 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 989 if (error != 0) { 990 sx_xunlock(&vmmdev_mtx); 991 vmmdev_destroy(sc); 992 return (error); 993 } 994 sc->cdev = cdev; 995 sx_xunlock(&vmmdev_mtx); 996 return (0); 997 } 998 999 static int 1000 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1001 { 1002 char *buf; 1003 int error, buflen; 1004 1005 error = vmm_priv_check(req->td->td_ucred); 1006 if (error != 0) 1007 return (error); 1008 1009 buflen = VM_MAX_NAMELEN + 1; 1010 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1011 error = sysctl_handle_string(oidp, buf, buflen, req); 1012 if (error == 0 && req->newptr != NULL) 1013 error = vmmdev_create(buf, req->td->td_ucred); 1014 free(buf, M_VMMDEV); 1015 return (error); 1016 } 1017 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1018 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1019 NULL, 0, sysctl_vmm_create, "A", 1020 "Create a vmm(4) instance (legacy interface)"); 1021 1022 static int 1023 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) 1024 { 1025 int error; 1026 1027 error = vmm_priv_check(td->td_ucred); 1028 if (error != 0) 1029 return (error); 1030 1031 if ((flags & FWRITE) == 0) 1032 return (EPERM); 1033 1034 return (0); 1035 } 1036 1037 static int 1038 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 1039 struct thread *td) 1040 { 1041 int error; 1042 1043 switch (cmd) { 1044 case VMMCTL_VM_CREATE: { 1045 struct vmmctl_vm_create *vmc; 1046 1047 vmc = (struct vmmctl_vm_create *)data; 1048 vmc->name[VM_MAX_NAMELEN] = '\0'; 1049 for (size_t i = 0; i < nitems(vmc->reserved); i++) { 1050 if (vmc->reserved[i] != 0) { 1051 error = EINVAL; 1052 return (error); 1053 } 1054 } 1055 1056 error = vmmdev_create(vmc->name, td->td_ucred); 1057 break; 1058 } 1059 case VMMCTL_VM_DESTROY: { 1060 struct vmmctl_vm_destroy *vmd; 1061 1062 vmd = (struct vmmctl_vm_destroy *)data; 1063 vmd->name[VM_MAX_NAMELEN] = '\0'; 1064 for (size_t i = 0; i < nitems(vmd->reserved); i++) { 1065 if (vmd->reserved[i] != 0) { 1066 error = EINVAL; 1067 return (error); 1068 } 1069 } 1070 1071 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred); 1072 break; 1073 } 1074 default: 1075 error = ENOTTY; 1076 break; 1077 } 1078 1079 return (error); 1080 } 1081 1082 static struct cdev *vmmctl_cdev; 1083 static struct cdevsw vmmctlsw = { 1084 .d_name = "vmmctl", 1085 .d_version = D_VERSION, 1086 .d_open = vmmctl_open, 1087 .d_ioctl = vmmctl_ioctl, 1088 }; 1089 1090 int 1091 vmmdev_init(void) 1092 { 1093 int error; 1094 1095 sx_xlock(&vmmdev_mtx); 1096 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL, 1097 UID_ROOT, GID_WHEEL, 0600, "vmmctl"); 1098 if (error == 0) 1099 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1100 "Allow use of vmm in a jail."); 1101 sx_xunlock(&vmmdev_mtx); 1102 1103 return (error); 1104 } 1105 1106 int 1107 vmmdev_cleanup(void) 1108 { 1109 sx_xlock(&vmmdev_mtx); 1110 if (!SLIST_EMPTY(&head)) { 1111 sx_xunlock(&vmmdev_mtx); 1112 return (EBUSY); 1113 } 1114 if (vmmctl_cdev != NULL) { 1115 destroy_dev(vmmctl_cdev); 1116 vmmctl_cdev = NULL; 1117 } 1118 sx_xunlock(&vmmdev_mtx); 1119 1120 return (0); 1121 } 1122 1123 static int 1124 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1125 struct vm_object **objp, int nprot) 1126 { 1127 struct devmem_softc *dsc; 1128 vm_ooffset_t first, last; 1129 size_t seglen; 1130 int error; 1131 bool sysmem; 1132 1133 dsc = cdev->si_drv1; 1134 if (dsc == NULL) { 1135 /* 'cdev' has been created but is not ready for use */ 1136 return (ENXIO); 1137 } 1138 1139 first = *offset; 1140 last = *offset + len; 1141 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1142 return (EINVAL); 1143 1144 vm_slock_memsegs(dsc->sc->vm); 1145 1146 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1147 KASSERT(error == 0 && !sysmem && *objp != NULL, 1148 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1149 1150 if (seglen >= last) 1151 vm_object_reference(*objp); 1152 else 1153 error = EINVAL; 1154 1155 vm_unlock_memsegs(dsc->sc->vm); 1156 return (error); 1157 } 1158 1159 static struct cdevsw devmemsw = { 1160 .d_name = "devmem", 1161 .d_version = D_VERSION, 1162 .d_mmap_single = devmem_mmap_single, 1163 }; 1164 1165 static int 1166 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1167 { 1168 struct make_dev_args mda; 1169 struct devmem_softc *dsc; 1170 int error; 1171 1172 sx_xlock(&vmmdev_mtx); 1173 1174 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1175 dsc->segid = segid; 1176 dsc->name = devname; 1177 dsc->sc = sc; 1178 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1179 1180 make_dev_args_init(&mda); 1181 mda.mda_devsw = &devmemsw; 1182 mda.mda_cr = sc->ucred; 1183 mda.mda_uid = UID_ROOT; 1184 mda.mda_gid = GID_WHEEL; 1185 mda.mda_mode = 0600; 1186 mda.mda_si_drv1 = dsc; 1187 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1188 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1189 devname); 1190 if (error != 0) { 1191 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1192 free(dsc->name, M_VMMDEV); 1193 free(dsc, M_VMMDEV); 1194 } 1195 1196 sx_xunlock(&vmmdev_mtx); 1197 1198 return (error); 1199 } 1200 1201 static void 1202 devmem_destroy(void *arg) 1203 { 1204 struct devmem_softc *dsc = arg; 1205 1206 destroy_dev(dsc->cdev); 1207 dsc->cdev = NULL; 1208 dsc->sc = NULL; 1209 } 1210