1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/fcntl.h> 12 #include <sys/ioccom.h> 13 #include <sys/jail.h> 14 #include <sys/kernel.h> 15 #include <sys/malloc.h> 16 #include <sys/mman.h> 17 #include <sys/module.h> 18 #include <sys/priv.h> 19 #include <sys/proc.h> 20 #include <sys/queue.h> 21 #include <sys/resourcevar.h> 22 #include <sys/smp.h> 23 #include <sys/sx.h> 24 #include <sys/sysctl.h> 25 #include <sys/ucred.h> 26 #include <sys/uio.h> 27 28 #include <machine/vmm.h> 29 30 #include <vm/vm.h> 31 #include <vm/vm_object.h> 32 33 #include <dev/vmm/vmm_dev.h> 34 #include <dev/vmm/vmm_mem.h> 35 #include <dev/vmm/vmm_stat.h> 36 37 #ifdef __amd64__ 38 #ifdef COMPAT_FREEBSD12 39 struct vm_memseg_12 { 40 int segid; 41 size_t len; 42 char name[64]; 43 }; 44 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 45 46 #define VM_ALLOC_MEMSEG_12 \ 47 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 48 #define VM_GET_MEMSEG_12 \ 49 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 50 #endif /* COMPAT_FREEBSD12 */ 51 #ifdef COMPAT_FREEBSD14 52 struct vm_memseg_14 { 53 int segid; 54 size_t len; 55 char name[VM_MAX_SUFFIXLEN + 1]; 56 }; 57 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16), 58 "COMPAT_FREEBSD14 ABI"); 59 60 #define VM_ALLOC_MEMSEG_14 \ 61 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14) 62 #define VM_GET_MEMSEG_14 \ 63 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14) 64 #endif /* COMPAT_FREEBSD14 */ 65 #endif /* __amd64__ */ 66 67 struct devmem_softc { 68 int segid; 69 char *name; 70 struct cdev *cdev; 71 struct vmmdev_softc *sc; 72 SLIST_ENTRY(devmem_softc) link; 73 }; 74 75 struct vmmdev_softc { 76 struct vm *vm; /* vm instance cookie */ 77 struct cdev *cdev; 78 struct ucred *ucred; 79 SLIST_ENTRY(vmmdev_softc) link; 80 LIST_ENTRY(vmmdev_softc) priv_link; 81 SLIST_HEAD(, devmem_softc) devmem; 82 int flags; 83 }; 84 85 struct vmmctl_priv { 86 LIST_HEAD(, vmmdev_softc) softcs; 87 }; 88 89 static bool vmm_initialized = false; 90 91 static SLIST_HEAD(, vmmdev_softc) head; 92 93 static unsigned pr_allow_flag; 94 static struct sx vmmdev_mtx; 95 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 96 97 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 98 99 SYSCTL_DECL(_hw_vmm); 100 101 u_int vm_maxcpu; 102 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 103 &vm_maxcpu, 0, "Maximum number of vCPUs"); 104 105 u_int vm_maxvmms; 106 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN, 107 &vm_maxvmms, 0, "Maximum number of VMM instances per user"); 108 109 static void devmem_destroy(void *arg); 110 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 111 static void vmmdev_destroy(struct vmmdev_softc *sc); 112 113 static int 114 vmm_priv_check(struct ucred *ucred) 115 { 116 if (jailed(ucred) && 117 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 118 return (EPERM); 119 120 return (0); 121 } 122 123 static int 124 vcpu_lock_one(struct vcpu *vcpu) 125 { 126 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 127 } 128 129 static void 130 vcpu_unlock_one(struct vcpu *vcpu) 131 { 132 enum vcpu_state state; 133 134 state = vcpu_get_state(vcpu, NULL); 135 if (state != VCPU_FROZEN) { 136 panic("vcpu %s(%d) has invalid state %d", 137 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 138 } 139 140 vcpu_set_state(vcpu, VCPU_IDLE, false); 141 } 142 143 #ifndef __amd64__ 144 static int 145 vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) 146 { 147 struct vcpu *vcpu; 148 int error; 149 uint16_t i, j, maxcpus; 150 151 error = 0; 152 maxcpus = vm_get_maxcpus(vm); 153 for (i = 0; i < maxcpus; i++) { 154 vcpu = vm_vcpu(vm, i); 155 if (vcpu == NULL) 156 continue; 157 error = vcpu_lock_one(vcpu); 158 if (error) 159 break; 160 } 161 162 if (error) { 163 for (j = 0; j < i; j++) { 164 vcpu = vm_vcpu(vm, j); 165 if (vcpu == NULL) 166 continue; 167 vcpu_unlock_one(vcpu); 168 } 169 } 170 171 return (error); 172 } 173 #endif 174 175 static int 176 vcpu_lock_all(struct vmmdev_softc *sc) 177 { 178 int error; 179 180 /* 181 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked 182 * in a consistent order so we need to serialize to avoid deadlocks. 183 */ 184 vm_lock_vcpus(sc->vm); 185 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN); 186 if (error != 0) 187 vm_unlock_vcpus(sc->vm); 188 return (error); 189 } 190 191 static void 192 vcpu_unlock_all(struct vmmdev_softc *sc) 193 { 194 struct vcpu *vcpu; 195 uint16_t i, maxcpus; 196 197 maxcpus = vm_get_maxcpus(sc->vm); 198 for (i = 0; i < maxcpus; i++) { 199 vcpu = vm_vcpu(sc->vm, i); 200 if (vcpu == NULL) 201 continue; 202 vcpu_unlock_one(vcpu); 203 } 204 vm_unlock_vcpus(sc->vm); 205 } 206 207 static struct vmmdev_softc * 208 vmmdev_lookup(const char *name, struct ucred *cred) 209 { 210 struct vmmdev_softc *sc; 211 212 sx_assert(&vmmdev_mtx, SA_XLOCKED); 213 214 SLIST_FOREACH(sc, &head, link) { 215 if (strcmp(name, vm_name(sc->vm)) == 0) 216 break; 217 } 218 219 if (sc == NULL) 220 return (NULL); 221 222 if (cr_cansee(cred, sc->ucred)) 223 return (NULL); 224 225 return (sc); 226 } 227 228 static struct vmmdev_softc * 229 vmmdev_lookup2(struct cdev *cdev) 230 { 231 return (cdev->si_drv1); 232 } 233 234 static int 235 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 236 { 237 int error, off, c, prot; 238 vm_paddr_t gpa, maxaddr; 239 void *hpa, *cookie; 240 struct vmmdev_softc *sc; 241 242 sc = vmmdev_lookup2(cdev); 243 if (sc == NULL) 244 return (ENXIO); 245 246 /* 247 * Get a read lock on the guest memory map. 248 */ 249 vm_slock_memsegs(sc->vm); 250 251 error = 0; 252 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 253 maxaddr = vmm_sysmem_maxaddr(sc->vm); 254 while (uio->uio_resid > 0 && error == 0) { 255 gpa = uio->uio_offset; 256 off = gpa & PAGE_MASK; 257 c = min(uio->uio_resid, PAGE_SIZE - off); 258 259 /* 260 * The VM has a hole in its physical memory map. If we want to 261 * use 'dd' to inspect memory beyond the hole we need to 262 * provide bogus data for memory that lies in the hole. 263 * 264 * Since this device does not support lseek(2), dd(1) will 265 * read(2) blocks of data to simulate the lseek(2). 266 */ 267 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 268 if (hpa == NULL) { 269 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 270 error = uiomove(__DECONST(void *, zero_region), 271 c, uio); 272 else 273 error = EFAULT; 274 } else { 275 error = uiomove(hpa, c, uio); 276 vm_gpa_release(cookie); 277 } 278 } 279 vm_unlock_memsegs(sc->vm); 280 return (error); 281 } 282 283 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 284 285 static int 286 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 287 { 288 struct devmem_softc *dsc; 289 int error; 290 bool sysmem; 291 292 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 293 if (error || mseg->len == 0) 294 return (error); 295 296 if (!sysmem) { 297 SLIST_FOREACH(dsc, &sc->devmem, link) { 298 if (dsc->segid == mseg->segid) 299 break; 300 } 301 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 302 __func__, mseg->segid)); 303 error = copystr(dsc->name, mseg->name, len, NULL); 304 } else { 305 bzero(mseg->name, len); 306 } 307 308 return (error); 309 } 310 311 static int 312 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len, 313 struct domainset *domainset) 314 { 315 char *name; 316 int error; 317 bool sysmem; 318 319 error = 0; 320 name = NULL; 321 sysmem = true; 322 323 /* 324 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 325 * by stripped off when devfs processes the full string. 326 */ 327 if (VM_MEMSEG_NAME(mseg)) { 328 sysmem = false; 329 name = malloc(len, M_VMMDEV, M_WAITOK); 330 error = copystr(mseg->name, name, len, NULL); 331 if (error) 332 goto done; 333 } 334 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset); 335 if (error) 336 goto done; 337 338 if (VM_MEMSEG_NAME(mseg)) { 339 error = devmem_create_cdev(sc, mseg->segid, name); 340 if (error) 341 vm_free_memseg(sc->vm, mseg->segid); 342 else 343 name = NULL; /* freed when 'cdev' is destroyed */ 344 } 345 done: 346 free(name, M_VMMDEV); 347 return (error); 348 } 349 350 #if defined(__amd64__) && \ 351 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12)) 352 /* 353 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts. 354 */ 355 static void 356 adjust_segid(struct vm_memseg *mseg) 357 { 358 if (mseg->segid != VM_SYSMEM) { 359 mseg->segid += (VM_BOOTROM - 1); 360 } 361 } 362 #endif 363 364 static int 365 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 366 uint64_t *regval) 367 { 368 int error, i; 369 370 error = 0; 371 for (i = 0; i < count; i++) { 372 error = vm_get_register(vcpu, regnum[i], ®val[i]); 373 if (error) 374 break; 375 } 376 return (error); 377 } 378 379 static int 380 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 381 uint64_t *regval) 382 { 383 int error, i; 384 385 error = 0; 386 for (i = 0; i < count; i++) { 387 error = vm_set_register(vcpu, regnum[i], regval[i]); 388 if (error) 389 break; 390 } 391 return (error); 392 } 393 394 static int 395 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 396 { 397 int error; 398 399 /* 400 * A jail without vmm access shouldn't be able to access vmm device 401 * files at all, but check here just to be thorough. 402 */ 403 error = vmm_priv_check(td->td_ucred); 404 if (error != 0) 405 return (error); 406 407 return (0); 408 } 409 410 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 411 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 412 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 413 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 414 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 415 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 416 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 417 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 418 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 419 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 420 VMMDEV_IOCTL(VM_STAT_DESC, 0), 421 422 #ifdef __amd64__ 423 #ifdef COMPAT_FREEBSD12 424 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 425 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 426 #endif 427 #ifdef COMPAT_FREEBSD14 428 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14, 429 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 430 #endif 431 #endif /* __amd64__ */ 432 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 433 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 434 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 435 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 436 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 437 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 438 VMMDEV_IOCTL(VM_REINIT, 439 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 440 441 #ifdef __amd64__ 442 #if defined(COMPAT_FREEBSD12) 443 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 444 #endif 445 #ifdef COMPAT_FREEBSD14 446 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS), 447 #endif 448 #endif /* __amd64__ */ 449 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 450 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 451 452 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 453 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 454 455 VMMDEV_IOCTL(VM_SUSPEND, 0), 456 VMMDEV_IOCTL(VM_GET_CPUS, 0), 457 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 458 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 459 }; 460 461 static int 462 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 463 struct thread *td) 464 { 465 struct vmmdev_softc *sc; 466 struct vcpu *vcpu; 467 const struct vmmdev_ioctl *ioctl; 468 struct vm_memseg *mseg; 469 int error, vcpuid; 470 471 sc = vmmdev_lookup2(cdev); 472 if (sc == NULL) 473 return (ENXIO); 474 475 ioctl = NULL; 476 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 477 if (vmmdev_ioctls[i].cmd == cmd) { 478 ioctl = &vmmdev_ioctls[i]; 479 break; 480 } 481 } 482 if (ioctl == NULL) { 483 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 484 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 485 ioctl = &vmmdev_machdep_ioctls[i]; 486 break; 487 } 488 } 489 } 490 if (ioctl == NULL) 491 return (ENOTTY); 492 493 if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) { 494 error = priv_check(td, PRIV_DRIVER); 495 if (error != 0) 496 return (error); 497 } 498 499 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 500 vm_xlock_memsegs(sc->vm); 501 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 502 vm_slock_memsegs(sc->vm); 503 504 vcpu = NULL; 505 vcpuid = -1; 506 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 507 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 508 vcpuid = *(int *)data; 509 if (vcpuid == -1) { 510 if ((ioctl->flags & 511 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 512 error = EINVAL; 513 goto lockfail; 514 } 515 } else { 516 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 517 if (vcpu == NULL) { 518 error = EINVAL; 519 goto lockfail; 520 } 521 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 522 error = vcpu_lock_one(vcpu); 523 if (error) 524 goto lockfail; 525 } 526 } 527 } 528 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 529 error = vcpu_lock_all(sc); 530 if (error) 531 goto lockfail; 532 } 533 534 switch (cmd) { 535 case VM_SUSPEND: { 536 struct vm_suspend *vmsuspend; 537 538 vmsuspend = (struct vm_suspend *)data; 539 error = vm_suspend(sc->vm, vmsuspend->how); 540 break; 541 } 542 case VM_REINIT: 543 error = vm_reinit(sc->vm); 544 break; 545 case VM_STAT_DESC: { 546 struct vm_stat_desc *statdesc; 547 548 statdesc = (struct vm_stat_desc *)data; 549 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 550 sizeof(statdesc->desc)); 551 break; 552 } 553 case VM_STATS: { 554 struct vm_stats *vmstats; 555 556 vmstats = (struct vm_stats *)data; 557 getmicrotime(&vmstats->tv); 558 error = vmm_stat_copy(vcpu, vmstats->index, 559 nitems(vmstats->statbuf), &vmstats->num_entries, 560 vmstats->statbuf); 561 break; 562 } 563 case VM_MMAP_GETNEXT: { 564 struct vm_memmap *mm; 565 566 mm = (struct vm_memmap *)data; 567 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 568 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 569 break; 570 } 571 case VM_MMAP_MEMSEG: { 572 struct vm_memmap *mm; 573 574 mm = (struct vm_memmap *)data; 575 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 576 mm->len, mm->prot, mm->flags); 577 break; 578 } 579 case VM_MUNMAP_MEMSEG: { 580 struct vm_munmap *mu; 581 582 mu = (struct vm_munmap *)data; 583 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 584 break; 585 } 586 #ifdef __amd64__ 587 #ifdef COMPAT_FREEBSD12 588 case VM_ALLOC_MEMSEG_12: 589 mseg = (struct vm_memseg *)data; 590 591 adjust_segid(mseg); 592 error = alloc_memseg(sc, mseg, 593 sizeof(((struct vm_memseg_12 *)0)->name), NULL); 594 break; 595 case VM_GET_MEMSEG_12: 596 mseg = (struct vm_memseg *)data; 597 598 adjust_segid(mseg); 599 error = get_memseg(sc, mseg, 600 sizeof(((struct vm_memseg_12 *)0)->name)); 601 break; 602 #endif /* COMPAT_FREEBSD12 */ 603 #ifdef COMPAT_FREEBSD14 604 case VM_ALLOC_MEMSEG_14: 605 mseg = (struct vm_memseg *)data; 606 607 adjust_segid(mseg); 608 error = alloc_memseg(sc, mseg, 609 sizeof(((struct vm_memseg_14 *)0)->name), NULL); 610 break; 611 case VM_GET_MEMSEG_14: 612 mseg = (struct vm_memseg *)data; 613 614 adjust_segid(mseg); 615 error = get_memseg(sc, mseg, 616 sizeof(((struct vm_memseg_14 *)0)->name)); 617 break; 618 #endif /* COMPAT_FREEBSD14 */ 619 #endif /* __amd64__ */ 620 case VM_ALLOC_MEMSEG: { 621 domainset_t *mask; 622 struct domainset *domainset, domain; 623 624 domainset = NULL; 625 mseg = (struct vm_memseg *)data; 626 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) { 627 if (mseg->ds_mask_size < sizeof(domainset_t) || 628 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) { 629 error = ERANGE; 630 break; 631 } 632 memset(&domain, 0, sizeof(domain)); 633 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK); 634 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size); 635 if (error) { 636 free(mask, M_VMMDEV); 637 break; 638 } 639 error = domainset_populate(&domain, mask, mseg->ds_policy, 640 mseg->ds_mask_size); 641 free(mask, M_VMMDEV); 642 if (error) 643 break; 644 domainset = domainset_create(&domain); 645 if (domainset == NULL) { 646 error = EINVAL; 647 break; 648 } 649 } 650 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); 651 break; 652 } 653 case VM_GET_MEMSEG: 654 error = get_memseg(sc, (struct vm_memseg *)data, 655 sizeof(((struct vm_memseg *)0)->name)); 656 break; 657 case VM_GET_REGISTER: { 658 struct vm_register *vmreg; 659 660 vmreg = (struct vm_register *)data; 661 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 662 break; 663 } 664 case VM_SET_REGISTER: { 665 struct vm_register *vmreg; 666 667 vmreg = (struct vm_register *)data; 668 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 669 break; 670 } 671 case VM_GET_REGISTER_SET: { 672 struct vm_register_set *vmregset; 673 uint64_t *regvals; 674 int *regnums; 675 676 vmregset = (struct vm_register_set *)data; 677 if (vmregset->count > VM_REG_LAST) { 678 error = EINVAL; 679 break; 680 } 681 regvals = mallocarray(vmregset->count, sizeof(regvals[0]), 682 M_VMMDEV, M_WAITOK); 683 regnums = mallocarray(vmregset->count, sizeof(regnums[0]), 684 M_VMMDEV, M_WAITOK); 685 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 686 vmregset->count); 687 if (error == 0) 688 error = vm_get_register_set(vcpu, 689 vmregset->count, regnums, regvals); 690 if (error == 0) 691 error = copyout(regvals, vmregset->regvals, 692 sizeof(regvals[0]) * vmregset->count); 693 free(regvals, M_VMMDEV); 694 free(regnums, M_VMMDEV); 695 break; 696 } 697 case VM_SET_REGISTER_SET: { 698 struct vm_register_set *vmregset; 699 uint64_t *regvals; 700 int *regnums; 701 702 vmregset = (struct vm_register_set *)data; 703 if (vmregset->count > VM_REG_LAST) { 704 error = EINVAL; 705 break; 706 } 707 regvals = mallocarray(vmregset->count, sizeof(regvals[0]), 708 M_VMMDEV, M_WAITOK); 709 regnums = mallocarray(vmregset->count, sizeof(regnums[0]), 710 M_VMMDEV, M_WAITOK); 711 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 712 vmregset->count); 713 if (error == 0) 714 error = copyin(vmregset->regvals, regvals, 715 sizeof(regvals[0]) * vmregset->count); 716 if (error == 0) 717 error = vm_set_register_set(vcpu, 718 vmregset->count, regnums, regvals); 719 free(regvals, M_VMMDEV); 720 free(regnums, M_VMMDEV); 721 break; 722 } 723 case VM_GET_CAPABILITY: { 724 struct vm_capability *vmcap; 725 726 vmcap = (struct vm_capability *)data; 727 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 728 break; 729 } 730 case VM_SET_CAPABILITY: { 731 struct vm_capability *vmcap; 732 733 vmcap = (struct vm_capability *)data; 734 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 735 break; 736 } 737 case VM_ACTIVATE_CPU: 738 error = vm_activate_cpu(vcpu); 739 break; 740 case VM_GET_CPUS: { 741 struct vm_cpuset *vm_cpuset; 742 cpuset_t *cpuset; 743 int size; 744 745 error = 0; 746 vm_cpuset = (struct vm_cpuset *)data; 747 size = vm_cpuset->cpusetsize; 748 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 749 error = ERANGE; 750 break; 751 } 752 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 753 M_WAITOK | M_ZERO); 754 if (vm_cpuset->which == VM_ACTIVE_CPUS) 755 *cpuset = vm_active_cpus(sc->vm); 756 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 757 *cpuset = vm_suspended_cpus(sc->vm); 758 else if (vm_cpuset->which == VM_DEBUG_CPUS) 759 *cpuset = vm_debug_cpus(sc->vm); 760 else 761 error = EINVAL; 762 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 763 error = ERANGE; 764 if (error == 0) 765 error = copyout(cpuset, vm_cpuset->cpus, size); 766 free(cpuset, M_TEMP); 767 break; 768 } 769 case VM_SUSPEND_CPU: 770 error = vm_suspend_cpu(sc->vm, vcpu); 771 break; 772 case VM_RESUME_CPU: 773 error = vm_resume_cpu(sc->vm, vcpu); 774 break; 775 case VM_SET_TOPOLOGY: { 776 struct vm_cpu_topology *topology; 777 778 topology = (struct vm_cpu_topology *)data; 779 error = vm_set_topology(sc->vm, topology->sockets, 780 topology->cores, topology->threads, topology->maxcpus); 781 break; 782 } 783 case VM_GET_TOPOLOGY: { 784 struct vm_cpu_topology *topology; 785 786 topology = (struct vm_cpu_topology *)data; 787 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 788 &topology->threads, &topology->maxcpus); 789 error = 0; 790 break; 791 } 792 default: 793 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 794 td); 795 break; 796 } 797 798 if ((ioctl->flags & 799 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 800 vm_unlock_memsegs(sc->vm); 801 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 802 vcpu_unlock_all(sc); 803 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 804 vcpu_unlock_one(vcpu); 805 806 /* 807 * Make sure that no handler returns a kernel-internal 808 * error value to userspace. 809 */ 810 KASSERT(error == ERESTART || error >= 0, 811 ("vmmdev_ioctl: invalid error return %d", error)); 812 return (error); 813 814 lockfail: 815 if ((ioctl->flags & 816 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 817 vm_unlock_memsegs(sc->vm); 818 return (error); 819 } 820 821 static int 822 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 823 struct vm_object **objp, int nprot) 824 { 825 struct vmmdev_softc *sc; 826 vm_paddr_t gpa; 827 size_t len; 828 vm_ooffset_t segoff, first, last; 829 int error, found, segid; 830 bool sysmem; 831 832 first = *offset; 833 last = first + mapsize; 834 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 835 return (EINVAL); 836 837 sc = vmmdev_lookup2(cdev); 838 if (sc == NULL) { 839 /* virtual machine is in the process of being created */ 840 return (EINVAL); 841 } 842 843 /* 844 * Get a read lock on the guest memory map. 845 */ 846 vm_slock_memsegs(sc->vm); 847 848 gpa = 0; 849 found = 0; 850 while (!found) { 851 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 852 NULL, NULL); 853 if (error) 854 break; 855 856 if (first >= gpa && last <= gpa + len) 857 found = 1; 858 else 859 gpa += len; 860 } 861 862 if (found) { 863 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 864 KASSERT(error == 0 && *objp != NULL, 865 ("%s: invalid memory segment %d", __func__, segid)); 866 if (sysmem) { 867 vm_object_reference(*objp); 868 *offset = segoff + (first - gpa); 869 } else { 870 error = EINVAL; 871 } 872 } 873 vm_unlock_memsegs(sc->vm); 874 return (error); 875 } 876 877 static void 878 vmmdev_destroy(struct vmmdev_softc *sc) 879 { 880 struct devmem_softc *dsc; 881 int error __diagused; 882 883 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 884 KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__)); 885 886 /* 887 * Destroy all cdevs: 888 * 889 * - any new operations on the 'cdev' will return an error (ENXIO). 890 * 891 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 892 */ 893 SLIST_FOREACH(dsc, &sc->devmem, link) { 894 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 895 devmem_destroy(dsc); 896 } 897 898 vm_disable_vcpu_creation(sc->vm); 899 error = vcpu_lock_all(sc); 900 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 901 vm_unlock_vcpus(sc->vm); 902 903 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 904 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 905 SLIST_REMOVE_HEAD(&sc->devmem, link); 906 free(dsc->name, M_VMMDEV); 907 free(dsc, M_VMMDEV); 908 } 909 910 if (sc->vm != NULL) 911 vm_destroy(sc->vm); 912 913 chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0); 914 crfree(sc->ucred); 915 916 sx_xlock(&vmmdev_mtx); 917 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 918 if ((sc->flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0) 919 LIST_REMOVE(sc, priv_link); 920 sx_xunlock(&vmmdev_mtx); 921 wakeup(sc); 922 free(sc, M_VMMDEV); 923 } 924 925 static int 926 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 927 { 928 struct cdev *cdev; 929 struct vmmdev_softc *sc; 930 931 sx_xlock(&vmmdev_mtx); 932 sc = vmmdev_lookup(name, cred); 933 if (sc == NULL || sc->cdev == NULL) { 934 sx_xunlock(&vmmdev_mtx); 935 return (EINVAL); 936 } 937 938 /* 939 * Setting 'sc->cdev' to NULL is used to indicate that the VM 940 * is scheduled for destruction. 941 */ 942 cdev = sc->cdev; 943 sc->cdev = NULL; 944 sx_xunlock(&vmmdev_mtx); 945 946 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY); 947 destroy_dev(cdev); 948 vmmdev_destroy(sc); 949 950 return (0); 951 } 952 953 static int 954 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 955 { 956 char *buf; 957 int error, buflen; 958 959 error = vmm_priv_check(req->td->td_ucred); 960 if (error) 961 return (error); 962 963 buflen = VM_MAX_NAMELEN + 1; 964 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 965 error = sysctl_handle_string(oidp, buf, buflen, req); 966 if (error == 0 && req->newptr != NULL) 967 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 968 free(buf, M_VMMDEV); 969 return (error); 970 } 971 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 972 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 973 NULL, 0, sysctl_vmm_destroy, "A", 974 "Destroy a vmm(4) instance (legacy interface)"); 975 976 static struct cdevsw vmmdevsw = { 977 .d_name = "vmmdev", 978 .d_version = D_VERSION, 979 .d_open = vmmdev_open, 980 .d_ioctl = vmmdev_ioctl, 981 .d_mmap_single = vmmdev_mmap_single, 982 .d_read = vmmdev_rw, 983 .d_write = vmmdev_rw, 984 }; 985 986 static struct vmmdev_softc * 987 vmmdev_alloc(struct vm *vm, struct ucred *cred) 988 { 989 struct vmmdev_softc *sc; 990 991 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 992 SLIST_INIT(&sc->devmem); 993 sc->vm = vm; 994 sc->ucred = crhold(cred); 995 return (sc); 996 } 997 998 static int 999 vmmdev_create(const char *name, uint32_t flags, struct ucred *cred) 1000 { 1001 struct make_dev_args mda; 1002 struct cdev *cdev; 1003 struct vmmdev_softc *sc; 1004 struct vmmctl_priv *priv; 1005 struct vm *vm; 1006 int error; 1007 1008 if (name == NULL || strlen(name) > VM_MAX_NAMELEN) 1009 return (EINVAL); 1010 1011 if ((flags & ~VMMCTL_FLAGS_MASK) != 0) 1012 return (EINVAL); 1013 error = devfs_get_cdevpriv((void **)&priv); 1014 if (error) 1015 return (error); 1016 1017 sx_xlock(&vmmdev_mtx); 1018 sc = vmmdev_lookup(name, cred); 1019 if (sc != NULL) { 1020 sx_xunlock(&vmmdev_mtx); 1021 return (EEXIST); 1022 } 1023 1024 error = vm_create(name, &vm); 1025 if (error != 0) { 1026 sx_xunlock(&vmmdev_mtx); 1027 return (error); 1028 } 1029 sc = vmmdev_alloc(vm, cred); 1030 SLIST_INSERT_HEAD(&head, sc, link); 1031 sc->flags = flags; 1032 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0) 1033 LIST_INSERT_HEAD(&priv->softcs, sc, priv_link); 1034 1035 make_dev_args_init(&mda); 1036 mda.mda_devsw = &vmmdevsw; 1037 mda.mda_cr = sc->ucred; 1038 mda.mda_uid = UID_ROOT; 1039 mda.mda_gid = GID_WHEEL; 1040 mda.mda_mode = 0600; 1041 mda.mda_si_drv1 = sc; 1042 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1043 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 1044 if (error != 0) { 1045 sx_xunlock(&vmmdev_mtx); 1046 vmmdev_destroy(sc); 1047 return (error); 1048 } 1049 if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) { 1050 sx_xunlock(&vmmdev_mtx); 1051 destroy_dev(cdev); 1052 vmmdev_destroy(sc); 1053 return (ENOMEM); 1054 } 1055 sc->cdev = cdev; 1056 sx_xunlock(&vmmdev_mtx); 1057 return (0); 1058 } 1059 1060 static int 1061 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1062 { 1063 char *buf; 1064 int error, buflen; 1065 1066 if (!vmm_initialized) 1067 return (ENXIO); 1068 1069 error = vmm_priv_check(req->td->td_ucred); 1070 if (error != 0) 1071 return (error); 1072 1073 buflen = VM_MAX_NAMELEN + 1; 1074 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1075 error = sysctl_handle_string(oidp, buf, buflen, req); 1076 if (error == 0 && req->newptr != NULL) 1077 error = vmmdev_create(buf, 0, req->td->td_ucred); 1078 free(buf, M_VMMDEV); 1079 return (error); 1080 } 1081 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1082 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1083 NULL, 0, sysctl_vmm_create, "A", 1084 "Create a vmm(4) instance (legacy interface)"); 1085 1086 static void 1087 vmmctl_dtor(void *arg) 1088 { 1089 struct cdev *sc_cdev; 1090 struct vmmdev_softc *sc; 1091 struct vmmctl_priv *priv = arg; 1092 1093 /* 1094 * Scan the softc list for any VMs associated with 1095 * the current descriptor and destroy them. 1096 */ 1097 sx_xlock(&vmmdev_mtx); 1098 while (!LIST_EMPTY(&priv->softcs)) { 1099 sc = LIST_FIRST(&priv->softcs); 1100 sc_cdev = sc->cdev; 1101 if (sc_cdev != NULL) { 1102 sc->cdev = NULL; 1103 } else { 1104 /* 1105 * Another thread has already 1106 * started the removal process. 1107 * Sleep until 'vmmdev_destroy' notifies us 1108 * that the removal has finished. 1109 */ 1110 sx_sleep(sc, &vmmdev_mtx, 0, "vmmctl_dtor", 0); 1111 continue; 1112 } 1113 /* 1114 * Temporarily drop the lock to allow vmmdev_destroy to run. 1115 */ 1116 sx_xunlock(&vmmdev_mtx); 1117 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY); 1118 destroy_dev(sc_cdev); 1119 /* vmmdev_destroy will unlink the 'priv_link' entry. */ 1120 vmmdev_destroy(sc); 1121 sx_xlock(&vmmdev_mtx); 1122 } 1123 sx_xunlock(&vmmdev_mtx); 1124 1125 free(priv, M_VMMDEV); 1126 } 1127 1128 static int 1129 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) 1130 { 1131 int error; 1132 struct vmmctl_priv *priv; 1133 1134 error = vmm_priv_check(td->td_ucred); 1135 if (error != 0) 1136 return (error); 1137 1138 if ((flags & FWRITE) == 0) 1139 return (EPERM); 1140 1141 priv = malloc(sizeof(*priv), M_VMMDEV, M_WAITOK | M_ZERO); 1142 LIST_INIT(&priv->softcs); 1143 error = devfs_set_cdevpriv(priv, vmmctl_dtor); 1144 if (error != 0) { 1145 free(priv, M_VMMDEV); 1146 return (error); 1147 } 1148 1149 return (0); 1150 } 1151 1152 static int 1153 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 1154 struct thread *td) 1155 { 1156 int error; 1157 1158 switch (cmd) { 1159 case VMMCTL_VM_CREATE: { 1160 struct vmmctl_vm_create *vmc; 1161 1162 vmc = (struct vmmctl_vm_create *)data; 1163 vmc->name[VM_MAX_NAMELEN] = '\0'; 1164 for (size_t i = 0; i < nitems(vmc->reserved); i++) { 1165 if (vmc->reserved[i] != 0) { 1166 error = EINVAL; 1167 return (error); 1168 } 1169 } 1170 1171 error = vmmdev_create(vmc->name, vmc->flags, td->td_ucred); 1172 break; 1173 } 1174 case VMMCTL_VM_DESTROY: { 1175 struct vmmctl_vm_destroy *vmd; 1176 1177 vmd = (struct vmmctl_vm_destroy *)data; 1178 vmd->name[VM_MAX_NAMELEN] = '\0'; 1179 for (size_t i = 0; i < nitems(vmd->reserved); i++) { 1180 if (vmd->reserved[i] != 0) { 1181 error = EINVAL; 1182 return (error); 1183 } 1184 } 1185 1186 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred); 1187 break; 1188 } 1189 default: 1190 error = ENOTTY; 1191 break; 1192 } 1193 1194 return (error); 1195 } 1196 1197 static struct cdev *vmmctl_cdev; 1198 static struct cdevsw vmmctlsw = { 1199 .d_name = "vmmctl", 1200 .d_version = D_VERSION, 1201 .d_open = vmmctl_open, 1202 .d_ioctl = vmmctl_ioctl, 1203 }; 1204 1205 static int 1206 vmmdev_init(void) 1207 { 1208 int error; 1209 1210 sx_xlock(&vmmdev_mtx); 1211 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL, 1212 UID_ROOT, GID_WHEEL, 0600, "vmmctl"); 1213 if (error == 0) 1214 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1215 "Allow use of vmm in a jail."); 1216 sx_xunlock(&vmmdev_mtx); 1217 1218 return (error); 1219 } 1220 1221 static int 1222 vmmdev_cleanup(void) 1223 { 1224 sx_xlock(&vmmdev_mtx); 1225 if (!SLIST_EMPTY(&head)) { 1226 sx_xunlock(&vmmdev_mtx); 1227 return (EBUSY); 1228 } 1229 if (vmmctl_cdev != NULL) { 1230 destroy_dev(vmmctl_cdev); 1231 vmmctl_cdev = NULL; 1232 } 1233 sx_xunlock(&vmmdev_mtx); 1234 1235 return (0); 1236 } 1237 1238 static int 1239 vmm_handler(module_t mod, int what, void *arg) 1240 { 1241 int error; 1242 1243 switch (what) { 1244 case MOD_LOAD: 1245 error = vmmdev_init(); 1246 if (error != 0) 1247 break; 1248 1249 vm_maxcpu = mp_ncpus; 1250 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 1251 if (vm_maxcpu > VM_MAXCPU) { 1252 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 1253 vm_maxcpu = VM_MAXCPU; 1254 } 1255 if (vm_maxcpu == 0) 1256 vm_maxcpu = 1; 1257 vm_maxvmms = 4 * mp_ncpus; 1258 error = vmm_modinit(); 1259 if (error == 0) 1260 vmm_initialized = true; 1261 else { 1262 error = vmmdev_cleanup(); 1263 KASSERT(error == 0, 1264 ("%s: vmmdev_cleanup failed: %d", __func__, error)); 1265 } 1266 break; 1267 case MOD_UNLOAD: 1268 error = vmmdev_cleanup(); 1269 if (error == 0 && vmm_initialized) { 1270 error = vmm_modcleanup(); 1271 if (error) { 1272 /* 1273 * Something bad happened - prevent new 1274 * VMs from being created 1275 */ 1276 vmm_initialized = false; 1277 } 1278 } 1279 break; 1280 default: 1281 error = 0; 1282 break; 1283 } 1284 return (error); 1285 } 1286 1287 static moduledata_t vmm_kmod = { 1288 "vmm", 1289 vmm_handler, 1290 NULL 1291 }; 1292 1293 /* 1294 * vmm initialization has the following dependencies: 1295 * 1296 * - Initialization requires smp_rendezvous() and therefore must happen 1297 * after SMP is fully functional (after SI_SUB_SMP). 1298 * - vmm device initialization requires an initialized devfs. 1299 */ 1300 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 1301 MODULE_VERSION(vmm, 1); 1302 1303 static int 1304 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1305 struct vm_object **objp, int nprot) 1306 { 1307 struct devmem_softc *dsc; 1308 vm_ooffset_t first, last; 1309 size_t seglen; 1310 int error; 1311 bool sysmem; 1312 1313 dsc = cdev->si_drv1; 1314 if (dsc == NULL) { 1315 /* 'cdev' has been created but is not ready for use */ 1316 return (ENXIO); 1317 } 1318 1319 first = *offset; 1320 last = *offset + len; 1321 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1322 return (EINVAL); 1323 1324 vm_slock_memsegs(dsc->sc->vm); 1325 1326 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1327 KASSERT(error == 0 && !sysmem && *objp != NULL, 1328 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1329 1330 if (seglen >= last) 1331 vm_object_reference(*objp); 1332 else 1333 error = EINVAL; 1334 1335 vm_unlock_memsegs(dsc->sc->vm); 1336 return (error); 1337 } 1338 1339 static struct cdevsw devmemsw = { 1340 .d_name = "devmem", 1341 .d_version = D_VERSION, 1342 .d_mmap_single = devmem_mmap_single, 1343 }; 1344 1345 static int 1346 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1347 { 1348 struct make_dev_args mda; 1349 struct devmem_softc *dsc; 1350 int error; 1351 1352 sx_xlock(&vmmdev_mtx); 1353 1354 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1355 dsc->segid = segid; 1356 dsc->name = devname; 1357 dsc->sc = sc; 1358 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1359 1360 make_dev_args_init(&mda); 1361 mda.mda_devsw = &devmemsw; 1362 mda.mda_cr = sc->ucred; 1363 mda.mda_uid = UID_ROOT; 1364 mda.mda_gid = GID_WHEEL; 1365 mda.mda_mode = 0600; 1366 mda.mda_si_drv1 = dsc; 1367 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1368 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1369 devname); 1370 if (error != 0) { 1371 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1372 free(dsc->name, M_VMMDEV); 1373 free(dsc, M_VMMDEV); 1374 } 1375 1376 sx_xunlock(&vmmdev_mtx); 1377 1378 return (error); 1379 } 1380 1381 static void 1382 devmem_destroy(void *arg) 1383 { 1384 struct devmem_softc *dsc = arg; 1385 1386 destroy_dev(dsc->cdev); 1387 dsc->cdev = NULL; 1388 dsc->sc = NULL; 1389 } 1390