1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #include <sys/fcntl.h> 12 #include <sys/ioccom.h> 13 #include <sys/jail.h> 14 #include <sys/kernel.h> 15 #include <sys/malloc.h> 16 #include <sys/mman.h> 17 #include <sys/module.h> 18 #include <sys/priv.h> 19 #include <sys/proc.h> 20 #include <sys/queue.h> 21 #include <sys/resourcevar.h> 22 #include <sys/smp.h> 23 #include <sys/sx.h> 24 #include <sys/sysctl.h> 25 #include <sys/ucred.h> 26 #include <sys/uio.h> 27 28 #include <machine/vmm.h> 29 30 #include <vm/vm.h> 31 #include <vm/vm_object.h> 32 33 #include <dev/vmm/vmm_dev.h> 34 #include <dev/vmm/vmm_mem.h> 35 #include <dev/vmm/vmm_stat.h> 36 #include <dev/vmm/vmm_vm.h> 37 38 #ifdef __amd64__ 39 #ifdef COMPAT_FREEBSD12 40 struct vm_memseg_12 { 41 int segid; 42 size_t len; 43 char name[64]; 44 }; 45 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 46 47 #define VM_ALLOC_MEMSEG_12 \ 48 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 49 #define VM_GET_MEMSEG_12 \ 50 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 51 #endif /* COMPAT_FREEBSD12 */ 52 #ifdef COMPAT_FREEBSD14 53 struct vm_memseg_14 { 54 int segid; 55 size_t len; 56 char name[VM_MAX_SUFFIXLEN + 1]; 57 }; 58 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16), 59 "COMPAT_FREEBSD14 ABI"); 60 61 #define VM_ALLOC_MEMSEG_14 \ 62 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14) 63 #define VM_GET_MEMSEG_14 \ 64 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14) 65 #endif /* COMPAT_FREEBSD14 */ 66 #endif /* __amd64__ */ 67 68 struct devmem_softc { 69 int segid; 70 char *name; 71 struct cdev *cdev; 72 struct vmmdev_softc *sc; 73 SLIST_ENTRY(devmem_softc) link; 74 }; 75 76 struct vmmdev_softc { 77 struct vm *vm; /* vm instance cookie */ 78 struct cdev *cdev; 79 struct ucred *ucred; 80 SLIST_ENTRY(vmmdev_softc) link; 81 LIST_ENTRY(vmmdev_softc) priv_link; 82 SLIST_HEAD(, devmem_softc) devmem; 83 int flags; 84 }; 85 86 struct vmmctl_priv { 87 LIST_HEAD(, vmmdev_softc) softcs; 88 }; 89 90 static bool vmm_initialized = false; 91 92 static SLIST_HEAD(, vmmdev_softc) head; 93 94 static unsigned int pr_allow_vmm_flag, pr_allow_vmm_ppt_flag; 95 static struct sx vmmdev_mtx; 96 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 97 98 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 99 100 SYSCTL_DECL(_hw_vmm); 101 102 u_int vm_maxcpu; 103 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 104 &vm_maxcpu, 0, "Maximum number of vCPUs"); 105 106 u_int vm_maxvmms; 107 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN, 108 &vm_maxvmms, 0, "Maximum number of VMM instances per user"); 109 110 static void devmem_destroy(void *arg); 111 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 112 static void vmmdev_destroy(struct vmmdev_softc *sc); 113 114 static int 115 vmm_priv_check(struct ucred *ucred) 116 { 117 if (jailed(ucred) && 118 (ucred->cr_prison->pr_allow & pr_allow_vmm_flag) == 0) 119 return (EPERM); 120 121 return (0); 122 } 123 124 static int 125 vcpu_lock_one(struct vcpu *vcpu) 126 { 127 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 128 } 129 130 static void 131 vcpu_unlock_one(struct vcpu *vcpu) 132 { 133 enum vcpu_state state; 134 135 state = vcpu_get_state(vcpu, NULL); 136 if (state != VCPU_FROZEN) { 137 panic("vcpu %s(%d) has invalid state %d", 138 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 139 } 140 141 vcpu_set_state(vcpu, VCPU_IDLE, false); 142 } 143 144 static int 145 vcpu_lock_all(struct vmmdev_softc *sc) 146 { 147 int error; 148 149 /* 150 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked 151 * in a consistent order so we need to serialize to avoid deadlocks. 152 */ 153 vm_lock_vcpus(sc->vm); 154 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN); 155 if (error != 0) 156 vm_unlock_vcpus(sc->vm); 157 return (error); 158 } 159 160 static void 161 vcpu_unlock_all(struct vmmdev_softc *sc) 162 { 163 struct vcpu *vcpu; 164 uint16_t i, maxcpus; 165 166 maxcpus = vm_get_maxcpus(sc->vm); 167 for (i = 0; i < maxcpus; i++) { 168 vcpu = vm_vcpu(sc->vm, i); 169 if (vcpu == NULL) 170 continue; 171 vcpu_unlock_one(vcpu); 172 } 173 vm_unlock_vcpus(sc->vm); 174 } 175 176 static struct vmmdev_softc * 177 vmmdev_lookup(const char *name, struct ucred *cred) 178 { 179 struct vmmdev_softc *sc; 180 181 sx_assert(&vmmdev_mtx, SA_XLOCKED); 182 183 SLIST_FOREACH(sc, &head, link) { 184 if (strcmp(name, vm_name(sc->vm)) == 0) 185 break; 186 } 187 188 if (sc == NULL) 189 return (NULL); 190 191 if (cr_cansee(cred, sc->ucred)) 192 return (NULL); 193 194 return (sc); 195 } 196 197 static struct vmmdev_softc * 198 vmmdev_lookup2(struct cdev *cdev) 199 { 200 return (cdev->si_drv1); 201 } 202 203 static int 204 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 205 { 206 int error, off, c, prot; 207 vm_paddr_t gpa, maxaddr; 208 void *hpa, *cookie; 209 struct vmmdev_softc *sc; 210 211 sc = vmmdev_lookup2(cdev); 212 if (sc == NULL) 213 return (ENXIO); 214 215 /* 216 * Get a read lock on the guest memory map. 217 */ 218 vm_slock_memsegs(sc->vm); 219 220 error = 0; 221 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 222 maxaddr = vmm_sysmem_maxaddr(sc->vm); 223 while (uio->uio_resid > 0 && error == 0) { 224 gpa = uio->uio_offset; 225 off = gpa & PAGE_MASK; 226 c = min(uio->uio_resid, PAGE_SIZE - off); 227 228 /* 229 * The VM has a hole in its physical memory map. If we want to 230 * use 'dd' to inspect memory beyond the hole we need to 231 * provide bogus data for memory that lies in the hole. 232 * 233 * Since this device does not support lseek(2), dd(1) will 234 * read(2) blocks of data to simulate the lseek(2). 235 */ 236 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 237 if (hpa == NULL) { 238 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 239 error = uiomove(__DECONST(void *, zero_region), 240 c, uio); 241 else 242 error = EFAULT; 243 } else { 244 error = uiomove(hpa, c, uio); 245 vm_gpa_release(cookie); 246 } 247 } 248 vm_unlock_memsegs(sc->vm); 249 return (error); 250 } 251 252 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 253 254 static int 255 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 256 { 257 struct devmem_softc *dsc; 258 int error; 259 bool sysmem; 260 261 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 262 if (error || mseg->len == 0) 263 return (error); 264 265 if (!sysmem) { 266 SLIST_FOREACH(dsc, &sc->devmem, link) { 267 if (dsc->segid == mseg->segid) 268 break; 269 } 270 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 271 __func__, mseg->segid)); 272 error = copystr(dsc->name, mseg->name, len, NULL); 273 } else { 274 bzero(mseg->name, len); 275 } 276 277 return (error); 278 } 279 280 static int 281 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len, 282 struct domainset *domainset) 283 { 284 char *name; 285 int error; 286 bool sysmem; 287 288 error = 0; 289 name = NULL; 290 sysmem = true; 291 292 /* 293 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 294 * by stripped off when devfs processes the full string. 295 */ 296 if (VM_MEMSEG_NAME(mseg)) { 297 sysmem = false; 298 name = malloc(len, M_VMMDEV, M_WAITOK); 299 error = copystr(mseg->name, name, len, NULL); 300 if (error) 301 goto done; 302 } 303 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset); 304 if (error) 305 goto done; 306 307 if (VM_MEMSEG_NAME(mseg)) { 308 error = devmem_create_cdev(sc, mseg->segid, name); 309 if (error) 310 vm_free_memseg(sc->vm, mseg->segid); 311 else 312 name = NULL; /* freed when 'cdev' is destroyed */ 313 } 314 done: 315 free(name, M_VMMDEV); 316 return (error); 317 } 318 319 #if defined(__amd64__) && \ 320 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12)) 321 /* 322 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts. 323 */ 324 static void 325 adjust_segid(struct vm_memseg *mseg) 326 { 327 if (mseg->segid != VM_SYSMEM) { 328 mseg->segid += (VM_BOOTROM - 1); 329 } 330 } 331 #endif 332 333 static int 334 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 335 uint64_t *regval) 336 { 337 int error, i; 338 339 error = 0; 340 for (i = 0; i < count; i++) { 341 error = vm_get_register(vcpu, regnum[i], ®val[i]); 342 if (error) 343 break; 344 } 345 return (error); 346 } 347 348 static int 349 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 350 uint64_t *regval) 351 { 352 int error, i; 353 354 error = 0; 355 for (i = 0; i < count; i++) { 356 error = vm_set_register(vcpu, regnum[i], regval[i]); 357 if (error) 358 break; 359 } 360 return (error); 361 } 362 363 static int 364 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 365 { 366 int error; 367 368 /* 369 * A jail without vmm access shouldn't be able to access vmm device 370 * files at all, but check here just to be thorough. 371 */ 372 error = vmm_priv_check(td->td_ucred); 373 if (error != 0) 374 return (error); 375 376 return (0); 377 } 378 379 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 380 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 381 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 382 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 383 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 384 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 385 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 386 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 387 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 388 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 389 VMMDEV_IOCTL(VM_STAT_DESC, 0), 390 391 #ifdef __amd64__ 392 #ifdef COMPAT_FREEBSD12 393 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 394 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 395 #endif 396 #ifdef COMPAT_FREEBSD14 397 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14, 398 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 399 #endif 400 #endif /* __amd64__ */ 401 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 402 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 403 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 404 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 405 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 406 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 407 VMMDEV_IOCTL(VM_REINIT, 408 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 409 410 #ifdef __amd64__ 411 #if defined(COMPAT_FREEBSD12) 412 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 413 #endif 414 #ifdef COMPAT_FREEBSD14 415 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS), 416 #endif 417 #endif /* __amd64__ */ 418 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 419 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 420 421 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 422 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 423 424 VMMDEV_IOCTL(VM_SUSPEND, 0), 425 VMMDEV_IOCTL(VM_GET_CPUS, 0), 426 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 427 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 428 }; 429 430 static int 431 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 432 struct thread *td) 433 { 434 struct vmmdev_softc *sc; 435 struct vcpu *vcpu; 436 const struct vmmdev_ioctl *ioctl; 437 struct vm_memseg *mseg; 438 int error, vcpuid; 439 440 sc = vmmdev_lookup2(cdev); 441 if (sc == NULL) 442 return (ENXIO); 443 444 ioctl = NULL; 445 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 446 if (vmmdev_ioctls[i].cmd == cmd) { 447 ioctl = &vmmdev_ioctls[i]; 448 break; 449 } 450 } 451 if (ioctl == NULL) { 452 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 453 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 454 ioctl = &vmmdev_machdep_ioctls[i]; 455 break; 456 } 457 } 458 } 459 if (ioctl == NULL) 460 return (ENOTTY); 461 462 if ((ioctl->flags & VMMDEV_IOCTL_PPT) != 0) { 463 if (jailed(td->td_ucred) && (td->td_ucred->cr_prison->pr_allow & 464 pr_allow_vmm_ppt_flag) == 0) 465 return (EPERM); 466 error = priv_check(td, PRIV_VMM_PPTDEV); 467 if (error != 0) 468 return (error); 469 } 470 471 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 472 vm_xlock_memsegs(sc->vm); 473 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 474 vm_slock_memsegs(sc->vm); 475 476 vcpu = NULL; 477 vcpuid = -1; 478 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 479 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 480 vcpuid = *(int *)data; 481 if (vcpuid == -1) { 482 if ((ioctl->flags & 483 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 484 error = EINVAL; 485 goto lockfail; 486 } 487 } else { 488 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 489 if (vcpu == NULL) { 490 error = EINVAL; 491 goto lockfail; 492 } 493 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 494 error = vcpu_lock_one(vcpu); 495 if (error) 496 goto lockfail; 497 } 498 } 499 } 500 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 501 error = vcpu_lock_all(sc); 502 if (error) 503 goto lockfail; 504 } 505 506 switch (cmd) { 507 case VM_SUSPEND: { 508 struct vm_suspend *vmsuspend; 509 510 vmsuspend = (struct vm_suspend *)data; 511 error = vm_suspend(sc->vm, vmsuspend->how); 512 break; 513 } 514 case VM_REINIT: 515 error = vm_reinit(sc->vm); 516 break; 517 case VM_STAT_DESC: { 518 struct vm_stat_desc *statdesc; 519 520 statdesc = (struct vm_stat_desc *)data; 521 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 522 sizeof(statdesc->desc)); 523 break; 524 } 525 case VM_STATS: { 526 struct vm_stats *vmstats; 527 528 vmstats = (struct vm_stats *)data; 529 getmicrotime(&vmstats->tv); 530 error = vmm_stat_copy(vcpu, vmstats->index, 531 nitems(vmstats->statbuf), &vmstats->num_entries, 532 vmstats->statbuf); 533 break; 534 } 535 case VM_MMAP_GETNEXT: { 536 struct vm_memmap *mm; 537 538 mm = (struct vm_memmap *)data; 539 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 540 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 541 break; 542 } 543 case VM_MMAP_MEMSEG: { 544 struct vm_memmap *mm; 545 546 mm = (struct vm_memmap *)data; 547 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 548 mm->len, mm->prot, mm->flags); 549 break; 550 } 551 case VM_MUNMAP_MEMSEG: { 552 struct vm_munmap *mu; 553 554 mu = (struct vm_munmap *)data; 555 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 556 break; 557 } 558 #ifdef __amd64__ 559 #ifdef COMPAT_FREEBSD12 560 case VM_ALLOC_MEMSEG_12: 561 mseg = (struct vm_memseg *)data; 562 563 adjust_segid(mseg); 564 error = alloc_memseg(sc, mseg, 565 sizeof(((struct vm_memseg_12 *)0)->name), NULL); 566 break; 567 case VM_GET_MEMSEG_12: 568 mseg = (struct vm_memseg *)data; 569 570 adjust_segid(mseg); 571 error = get_memseg(sc, mseg, 572 sizeof(((struct vm_memseg_12 *)0)->name)); 573 break; 574 #endif /* COMPAT_FREEBSD12 */ 575 #ifdef COMPAT_FREEBSD14 576 case VM_ALLOC_MEMSEG_14: 577 mseg = (struct vm_memseg *)data; 578 579 adjust_segid(mseg); 580 error = alloc_memseg(sc, mseg, 581 sizeof(((struct vm_memseg_14 *)0)->name), NULL); 582 break; 583 case VM_GET_MEMSEG_14: 584 mseg = (struct vm_memseg *)data; 585 586 adjust_segid(mseg); 587 error = get_memseg(sc, mseg, 588 sizeof(((struct vm_memseg_14 *)0)->name)); 589 break; 590 #endif /* COMPAT_FREEBSD14 */ 591 #endif /* __amd64__ */ 592 case VM_ALLOC_MEMSEG: { 593 domainset_t *mask; 594 struct domainset *domainset, domain; 595 596 domainset = NULL; 597 mseg = (struct vm_memseg *)data; 598 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) { 599 if (mseg->ds_mask_size < sizeof(domainset_t) || 600 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) { 601 error = ERANGE; 602 break; 603 } 604 memset(&domain, 0, sizeof(domain)); 605 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK); 606 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size); 607 if (error) { 608 free(mask, M_VMMDEV); 609 break; 610 } 611 error = domainset_populate(&domain, mask, mseg->ds_policy, 612 mseg->ds_mask_size); 613 free(mask, M_VMMDEV); 614 if (error) 615 break; 616 domainset = domainset_create(&domain); 617 if (domainset == NULL) { 618 error = EINVAL; 619 break; 620 } 621 } 622 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); 623 break; 624 } 625 case VM_GET_MEMSEG: 626 error = get_memseg(sc, (struct vm_memseg *)data, 627 sizeof(((struct vm_memseg *)0)->name)); 628 break; 629 case VM_GET_REGISTER: { 630 struct vm_register *vmreg; 631 632 vmreg = (struct vm_register *)data; 633 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 634 break; 635 } 636 case VM_SET_REGISTER: { 637 struct vm_register *vmreg; 638 639 vmreg = (struct vm_register *)data; 640 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 641 break; 642 } 643 case VM_GET_REGISTER_SET: { 644 struct vm_register_set *vmregset; 645 uint64_t *regvals; 646 int *regnums; 647 648 vmregset = (struct vm_register_set *)data; 649 if (vmregset->count > VM_REG_LAST) { 650 error = EINVAL; 651 break; 652 } 653 regvals = mallocarray(vmregset->count, sizeof(regvals[0]), 654 M_VMMDEV, M_WAITOK); 655 regnums = mallocarray(vmregset->count, sizeof(regnums[0]), 656 M_VMMDEV, M_WAITOK); 657 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 658 vmregset->count); 659 if (error == 0) 660 error = vm_get_register_set(vcpu, 661 vmregset->count, regnums, regvals); 662 if (error == 0) 663 error = copyout(regvals, vmregset->regvals, 664 sizeof(regvals[0]) * vmregset->count); 665 free(regvals, M_VMMDEV); 666 free(regnums, M_VMMDEV); 667 break; 668 } 669 case VM_SET_REGISTER_SET: { 670 struct vm_register_set *vmregset; 671 uint64_t *regvals; 672 int *regnums; 673 674 vmregset = (struct vm_register_set *)data; 675 if (vmregset->count > VM_REG_LAST) { 676 error = EINVAL; 677 break; 678 } 679 regvals = mallocarray(vmregset->count, sizeof(regvals[0]), 680 M_VMMDEV, M_WAITOK); 681 regnums = mallocarray(vmregset->count, sizeof(regnums[0]), 682 M_VMMDEV, M_WAITOK); 683 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 684 vmregset->count); 685 if (error == 0) 686 error = copyin(vmregset->regvals, regvals, 687 sizeof(regvals[0]) * vmregset->count); 688 if (error == 0) 689 error = vm_set_register_set(vcpu, 690 vmregset->count, regnums, regvals); 691 free(regvals, M_VMMDEV); 692 free(regnums, M_VMMDEV); 693 break; 694 } 695 case VM_GET_CAPABILITY: { 696 struct vm_capability *vmcap; 697 698 vmcap = (struct vm_capability *)data; 699 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 700 break; 701 } 702 case VM_SET_CAPABILITY: { 703 struct vm_capability *vmcap; 704 705 vmcap = (struct vm_capability *)data; 706 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 707 break; 708 } 709 case VM_ACTIVATE_CPU: 710 error = vm_activate_cpu(vcpu); 711 break; 712 case VM_GET_CPUS: { 713 struct vm_cpuset *vm_cpuset; 714 cpuset_t *cpuset; 715 int size; 716 717 error = 0; 718 vm_cpuset = (struct vm_cpuset *)data; 719 size = vm_cpuset->cpusetsize; 720 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 721 error = ERANGE; 722 break; 723 } 724 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 725 M_WAITOK | M_ZERO); 726 if (vm_cpuset->which == VM_ACTIVE_CPUS) 727 *cpuset = vm_active_cpus(sc->vm); 728 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 729 *cpuset = vm_suspended_cpus(sc->vm); 730 else if (vm_cpuset->which == VM_DEBUG_CPUS) 731 *cpuset = vm_debug_cpus(sc->vm); 732 else 733 error = EINVAL; 734 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 735 error = ERANGE; 736 if (error == 0) 737 error = copyout(cpuset, vm_cpuset->cpus, size); 738 free(cpuset, M_TEMP); 739 break; 740 } 741 case VM_SUSPEND_CPU: 742 error = vm_suspend_cpu(sc->vm, vcpu); 743 break; 744 case VM_RESUME_CPU: 745 error = vm_resume_cpu(sc->vm, vcpu); 746 break; 747 case VM_SET_TOPOLOGY: { 748 struct vm_cpu_topology *topology; 749 750 topology = (struct vm_cpu_topology *)data; 751 error = vm_set_topology(sc->vm, topology->sockets, 752 topology->cores, topology->threads, topology->maxcpus); 753 break; 754 } 755 case VM_GET_TOPOLOGY: { 756 struct vm_cpu_topology *topology; 757 758 topology = (struct vm_cpu_topology *)data; 759 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 760 &topology->threads, &topology->maxcpus); 761 error = 0; 762 break; 763 } 764 default: 765 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 766 td); 767 break; 768 } 769 770 if ((ioctl->flags & 771 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 772 vm_unlock_memsegs(sc->vm); 773 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 774 vcpu_unlock_all(sc); 775 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 776 vcpu_unlock_one(vcpu); 777 778 /* 779 * Make sure that no handler returns a kernel-internal 780 * error value to userspace. 781 */ 782 KASSERT(error == ERESTART || error >= 0, 783 ("vmmdev_ioctl: invalid error return %d", error)); 784 return (error); 785 786 lockfail: 787 if ((ioctl->flags & 788 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 789 vm_unlock_memsegs(sc->vm); 790 return (error); 791 } 792 793 static int 794 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 795 struct vm_object **objp, int nprot) 796 { 797 struct vmmdev_softc *sc; 798 vm_paddr_t gpa; 799 size_t len; 800 vm_ooffset_t segoff, first, last; 801 int error, found, segid; 802 bool sysmem; 803 804 first = *offset; 805 last = first + mapsize; 806 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 807 return (EINVAL); 808 809 sc = vmmdev_lookup2(cdev); 810 if (sc == NULL) { 811 /* virtual machine is in the process of being created */ 812 return (EINVAL); 813 } 814 815 /* 816 * Get a read lock on the guest memory map. 817 */ 818 vm_slock_memsegs(sc->vm); 819 820 gpa = 0; 821 found = 0; 822 while (!found) { 823 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 824 NULL, NULL); 825 if (error) 826 break; 827 828 if (first >= gpa && last <= gpa + len) 829 found = 1; 830 else 831 gpa += len; 832 } 833 834 if (found) { 835 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 836 KASSERT(error == 0 && *objp != NULL, 837 ("%s: invalid memory segment %d", __func__, segid)); 838 if (sysmem) { 839 vm_object_reference(*objp); 840 *offset = segoff + (first - gpa); 841 } else { 842 error = EINVAL; 843 } 844 } 845 vm_unlock_memsegs(sc->vm); 846 return (error); 847 } 848 849 static void 850 vmmdev_destroy(struct vmmdev_softc *sc) 851 { 852 struct devmem_softc *dsc; 853 int error __diagused; 854 855 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 856 KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__)); 857 858 /* 859 * Destroy all cdevs: 860 * 861 * - any new operations on the 'cdev' will return an error (ENXIO). 862 * 863 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 864 */ 865 SLIST_FOREACH(dsc, &sc->devmem, link) { 866 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 867 devmem_destroy(dsc); 868 } 869 870 vm_disable_vcpu_creation(sc->vm); 871 error = vcpu_lock_all(sc); 872 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 873 vm_unlock_vcpus(sc->vm); 874 875 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 876 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 877 SLIST_REMOVE_HEAD(&sc->devmem, link); 878 free(dsc->name, M_VMMDEV); 879 free(dsc, M_VMMDEV); 880 } 881 882 vm_destroy(sc->vm); 883 884 chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0); 885 crfree(sc->ucred); 886 887 sx_xlock(&vmmdev_mtx); 888 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 889 if ((sc->flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0) 890 LIST_REMOVE(sc, priv_link); 891 sx_xunlock(&vmmdev_mtx); 892 wakeup(sc); 893 free(sc, M_VMMDEV); 894 } 895 896 static int 897 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 898 { 899 struct cdev *cdev; 900 struct vmmdev_softc *sc; 901 int error; 902 903 sx_xlock(&vmmdev_mtx); 904 sc = vmmdev_lookup(name, cred); 905 if (sc == NULL || sc->cdev == NULL) { 906 sx_xunlock(&vmmdev_mtx); 907 return (EINVAL); 908 } 909 910 /* 911 * Only the creator of a VM or a privileged user can destroy it. 912 */ 913 if ((cred->cr_uid != sc->ucred->cr_uid || 914 cred->cr_prison != sc->ucred->cr_prison) && 915 (error = priv_check_cred(cred, PRIV_VMM_DESTROY)) != 0) { 916 sx_xunlock(&vmmdev_mtx); 917 return (error); 918 } 919 920 /* 921 * Setting 'sc->cdev' to NULL is used to indicate that the VM 922 * is scheduled for destruction. 923 */ 924 cdev = sc->cdev; 925 sc->cdev = NULL; 926 sx_xunlock(&vmmdev_mtx); 927 928 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY); 929 destroy_dev(cdev); 930 vmmdev_destroy(sc); 931 932 return (0); 933 } 934 935 static int 936 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 937 { 938 char *buf; 939 int error, buflen; 940 941 error = vmm_priv_check(req->td->td_ucred); 942 if (error) 943 return (error); 944 945 buflen = VM_MAX_NAMELEN + 1; 946 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 947 error = sysctl_handle_string(oidp, buf, buflen, req); 948 if (error == 0 && req->newptr != NULL) 949 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 950 free(buf, M_VMMDEV); 951 return (error); 952 } 953 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 954 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 955 NULL, 0, sysctl_vmm_destroy, "A", 956 "Destroy a vmm(4) instance (legacy interface)"); 957 958 static struct cdevsw vmmdevsw = { 959 .d_name = "vmmdev", 960 .d_version = D_VERSION, 961 .d_open = vmmdev_open, 962 .d_ioctl = vmmdev_ioctl, 963 .d_mmap_single = vmmdev_mmap_single, 964 .d_read = vmmdev_rw, 965 .d_write = vmmdev_rw, 966 }; 967 968 static struct vmmdev_softc * 969 vmmdev_alloc(struct vm *vm, struct ucred *cred) 970 { 971 struct vmmdev_softc *sc; 972 973 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 974 SLIST_INIT(&sc->devmem); 975 sc->vm = vm; 976 sc->ucred = crhold(cred); 977 return (sc); 978 } 979 980 static int 981 vmmdev_create(const char *name, uint32_t flags, struct ucred *cred) 982 { 983 struct make_dev_args mda; 984 struct cdev *cdev; 985 struct vmmdev_softc *sc; 986 struct vmmctl_priv *priv; 987 struct vm *vm; 988 int error; 989 990 if (name == NULL || strlen(name) > VM_MAX_NAMELEN) 991 return (EINVAL); 992 993 if ((flags & ~VMMCTL_FLAGS_MASK) != 0) 994 return (EINVAL); 995 error = devfs_get_cdevpriv((void **)&priv); 996 if (error) 997 return (error); 998 999 sx_xlock(&vmmdev_mtx); 1000 sc = vmmdev_lookup(name, cred); 1001 if (sc != NULL) { 1002 sx_xunlock(&vmmdev_mtx); 1003 return (EEXIST); 1004 } 1005 1006 /* 1007 * Unprivileged users can only create VMs that will be automatically 1008 * destroyed when the creating descriptor is closed. 1009 */ 1010 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) == 0 && 1011 (error = priv_check_cred(cred, PRIV_VMM_CREATE)) != 0) { 1012 sx_xunlock(&vmmdev_mtx); 1013 return (error); 1014 } 1015 1016 if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) { 1017 sx_xunlock(&vmmdev_mtx); 1018 return (ENOMEM); 1019 } 1020 1021 error = vm_create(name, &vm); 1022 if (error != 0) { 1023 sx_xunlock(&vmmdev_mtx); 1024 (void)chgvmmcnt(cred->cr_ruidinfo, -1, 0); 1025 return (error); 1026 } 1027 sc = vmmdev_alloc(vm, cred); 1028 SLIST_INSERT_HEAD(&head, sc, link); 1029 sc->flags = flags; 1030 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0) 1031 LIST_INSERT_HEAD(&priv->softcs, sc, priv_link); 1032 1033 make_dev_args_init(&mda); 1034 mda.mda_devsw = &vmmdevsw; 1035 mda.mda_cr = sc->ucred; 1036 mda.mda_uid = cred->cr_uid; 1037 mda.mda_gid = GID_VMM; 1038 mda.mda_mode = 0600; 1039 mda.mda_si_drv1 = sc; 1040 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1041 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 1042 if (error != 0) { 1043 sx_xunlock(&vmmdev_mtx); 1044 vmmdev_destroy(sc); 1045 return (error); 1046 } 1047 sc->cdev = cdev; 1048 sx_xunlock(&vmmdev_mtx); 1049 return (0); 1050 } 1051 1052 static int 1053 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1054 { 1055 char *buf; 1056 int error, buflen; 1057 1058 if (!vmm_initialized) 1059 return (ENXIO); 1060 1061 error = vmm_priv_check(req->td->td_ucred); 1062 if (error != 0) 1063 return (error); 1064 1065 buflen = VM_MAX_NAMELEN + 1; 1066 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1067 error = sysctl_handle_string(oidp, buf, buflen, req); 1068 if (error == 0 && req->newptr != NULL) 1069 error = vmmdev_create(buf, 0, req->td->td_ucred); 1070 free(buf, M_VMMDEV); 1071 return (error); 1072 } 1073 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1074 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1075 NULL, 0, sysctl_vmm_create, "A", 1076 "Create a vmm(4) instance (legacy interface)"); 1077 1078 static void 1079 vmmctl_dtor(void *arg) 1080 { 1081 struct cdev *sc_cdev; 1082 struct vmmdev_softc *sc; 1083 struct vmmctl_priv *priv = arg; 1084 1085 /* 1086 * Scan the softc list for any VMs associated with 1087 * the current descriptor and destroy them. 1088 */ 1089 sx_xlock(&vmmdev_mtx); 1090 while (!LIST_EMPTY(&priv->softcs)) { 1091 sc = LIST_FIRST(&priv->softcs); 1092 sc_cdev = sc->cdev; 1093 if (sc_cdev != NULL) { 1094 sc->cdev = NULL; 1095 } else { 1096 /* 1097 * Another thread has already 1098 * started the removal process. 1099 * Sleep until 'vmmdev_destroy' notifies us 1100 * that the removal has finished. 1101 */ 1102 sx_sleep(sc, &vmmdev_mtx, 0, "vmmctl_dtor", 0); 1103 continue; 1104 } 1105 /* 1106 * Temporarily drop the lock to allow vmmdev_destroy to run. 1107 */ 1108 sx_xunlock(&vmmdev_mtx); 1109 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY); 1110 destroy_dev(sc_cdev); 1111 /* vmmdev_destroy will unlink the 'priv_link' entry. */ 1112 vmmdev_destroy(sc); 1113 sx_xlock(&vmmdev_mtx); 1114 } 1115 sx_xunlock(&vmmdev_mtx); 1116 1117 free(priv, M_VMMDEV); 1118 } 1119 1120 static int 1121 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) 1122 { 1123 int error; 1124 struct vmmctl_priv *priv; 1125 1126 error = vmm_priv_check(td->td_ucred); 1127 if (error != 0) 1128 return (error); 1129 1130 if ((flags & FWRITE) == 0) 1131 return (EPERM); 1132 1133 priv = malloc(sizeof(*priv), M_VMMDEV, M_WAITOK | M_ZERO); 1134 LIST_INIT(&priv->softcs); 1135 error = devfs_set_cdevpriv(priv, vmmctl_dtor); 1136 if (error != 0) { 1137 free(priv, M_VMMDEV); 1138 return (error); 1139 } 1140 1141 return (0); 1142 } 1143 1144 static int 1145 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 1146 struct thread *td) 1147 { 1148 int error; 1149 1150 switch (cmd) { 1151 case VMMCTL_VM_CREATE: { 1152 struct vmmctl_vm_create *vmc; 1153 1154 vmc = (struct vmmctl_vm_create *)data; 1155 vmc->name[VM_MAX_NAMELEN] = '\0'; 1156 for (size_t i = 0; i < nitems(vmc->reserved); i++) { 1157 if (vmc->reserved[i] != 0) { 1158 error = EINVAL; 1159 return (error); 1160 } 1161 } 1162 1163 error = vmmdev_create(vmc->name, vmc->flags, td->td_ucred); 1164 break; 1165 } 1166 case VMMCTL_VM_DESTROY: { 1167 struct vmmctl_vm_destroy *vmd; 1168 1169 vmd = (struct vmmctl_vm_destroy *)data; 1170 vmd->name[VM_MAX_NAMELEN] = '\0'; 1171 for (size_t i = 0; i < nitems(vmd->reserved); i++) { 1172 if (vmd->reserved[i] != 0) { 1173 error = EINVAL; 1174 return (error); 1175 } 1176 } 1177 1178 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred); 1179 break; 1180 } 1181 default: 1182 error = ENOTTY; 1183 break; 1184 } 1185 1186 return (error); 1187 } 1188 1189 static struct cdev *vmmctl_cdev; 1190 static struct cdevsw vmmctlsw = { 1191 .d_name = "vmmctl", 1192 .d_version = D_VERSION, 1193 .d_open = vmmctl_open, 1194 .d_ioctl = vmmctl_ioctl, 1195 }; 1196 1197 static int 1198 vmmdev_init(void) 1199 { 1200 int error; 1201 1202 sx_xlock(&vmmdev_mtx); 1203 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL, 1204 UID_ROOT, GID_VMM, 0660, "vmmctl"); 1205 if (error == 0) { 1206 pr_allow_vmm_flag = prison_add_allow(NULL, "vmm", NULL, 1207 "Allow use of vmm in a jail"); 1208 pr_allow_vmm_ppt_flag = prison_add_allow(NULL, "vmm_ppt", NULL, 1209 "Allow use of vmm with ppt devices in a jail"); 1210 } 1211 sx_xunlock(&vmmdev_mtx); 1212 1213 return (error); 1214 } 1215 1216 static int 1217 vmmdev_cleanup(void) 1218 { 1219 sx_xlock(&vmmdev_mtx); 1220 if (!SLIST_EMPTY(&head)) { 1221 sx_xunlock(&vmmdev_mtx); 1222 return (EBUSY); 1223 } 1224 if (vmmctl_cdev != NULL) { 1225 destroy_dev(vmmctl_cdev); 1226 vmmctl_cdev = NULL; 1227 } 1228 sx_xunlock(&vmmdev_mtx); 1229 1230 return (0); 1231 } 1232 1233 static int 1234 vmm_handler(module_t mod, int what, void *arg) 1235 { 1236 int error; 1237 1238 switch (what) { 1239 case MOD_LOAD: 1240 error = vmmdev_init(); 1241 if (error != 0) 1242 break; 1243 1244 vm_maxcpu = mp_ncpus; 1245 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 1246 if (vm_maxcpu > VM_MAXCPU) { 1247 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 1248 vm_maxcpu = VM_MAXCPU; 1249 } 1250 if (vm_maxcpu == 0) 1251 vm_maxcpu = 1; 1252 vm_maxvmms = 4 * mp_ncpus; 1253 error = vmm_modinit(); 1254 if (error == 0) 1255 vmm_initialized = true; 1256 else { 1257 int error1 __diagused; 1258 1259 error1 = vmmdev_cleanup(); 1260 KASSERT(error1 == 0, 1261 ("%s: vmmdev_cleanup failed: %d", __func__, error1)); 1262 } 1263 break; 1264 case MOD_UNLOAD: 1265 error = vmmdev_cleanup(); 1266 if (error == 0 && vmm_initialized) { 1267 error = vmm_modcleanup(); 1268 if (error) { 1269 /* 1270 * Something bad happened - prevent new 1271 * VMs from being created 1272 */ 1273 vmm_initialized = false; 1274 } 1275 } 1276 break; 1277 default: 1278 error = 0; 1279 break; 1280 } 1281 return (error); 1282 } 1283 1284 static moduledata_t vmm_kmod = { 1285 "vmm", 1286 vmm_handler, 1287 NULL 1288 }; 1289 1290 /* 1291 * vmm initialization has the following dependencies: 1292 * 1293 * - Initialization requires smp_rendezvous() and therefore must happen 1294 * after SMP is fully functional (after SI_SUB_SMP). 1295 * - vmm device initialization requires an initialized devfs. 1296 */ 1297 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 1298 MODULE_VERSION(vmm, 1); 1299 1300 static int 1301 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1302 struct vm_object **objp, int nprot) 1303 { 1304 struct devmem_softc *dsc; 1305 vm_ooffset_t first, last; 1306 size_t seglen; 1307 int error; 1308 bool sysmem; 1309 1310 dsc = cdev->si_drv1; 1311 if (dsc == NULL) { 1312 /* 'cdev' has been created but is not ready for use */ 1313 return (ENXIO); 1314 } 1315 1316 first = *offset; 1317 last = *offset + len; 1318 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1319 return (EINVAL); 1320 1321 vm_slock_memsegs(dsc->sc->vm); 1322 1323 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1324 KASSERT(error == 0 && !sysmem && *objp != NULL, 1325 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1326 1327 if (seglen >= last) 1328 vm_object_reference(*objp); 1329 else 1330 error = EINVAL; 1331 1332 vm_unlock_memsegs(dsc->sc->vm); 1333 return (error); 1334 } 1335 1336 static struct cdevsw devmemsw = { 1337 .d_name = "devmem", 1338 .d_version = D_VERSION, 1339 .d_mmap_single = devmem_mmap_single, 1340 }; 1341 1342 static int 1343 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1344 { 1345 struct make_dev_args mda; 1346 struct devmem_softc *dsc; 1347 int error; 1348 1349 sx_xlock(&vmmdev_mtx); 1350 1351 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1352 dsc->segid = segid; 1353 dsc->name = devname; 1354 dsc->sc = sc; 1355 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1356 1357 make_dev_args_init(&mda); 1358 mda.mda_devsw = &devmemsw; 1359 mda.mda_cr = sc->ucred; 1360 mda.mda_uid = sc->ucred->cr_uid; 1361 mda.mda_gid = GID_VMM; 1362 mda.mda_mode = 0600; 1363 mda.mda_si_drv1 = dsc; 1364 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1365 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1366 devname); 1367 if (error != 0) { 1368 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1369 free(dsc->name, M_VMMDEV); 1370 free(dsc, M_VMMDEV); 1371 } 1372 1373 sx_xunlock(&vmmdev_mtx); 1374 1375 return (error); 1376 } 1377 1378 static void 1379 devmem_destroy(void *arg) 1380 { 1381 struct devmem_softc *dsc = arg; 1382 1383 destroy_dev(dsc->cdev); 1384 dsc->cdev = NULL; 1385 dsc->sc = NULL; 1386 } 1387