1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/conf.h> 11 #define EXTERR_CATEGORY EXTERR_CAT_VMM 12 #include <sys/exterrvar.h> 13 #include <sys/fcntl.h> 14 #include <sys/ioccom.h> 15 #include <sys/jail.h> 16 #include <sys/kernel.h> 17 #include <sys/malloc.h> 18 #include <sys/mman.h> 19 #include <sys/module.h> 20 #include <sys/priv.h> 21 #include <sys/proc.h> 22 #include <sys/queue.h> 23 #include <sys/resourcevar.h> 24 #include <sys/smp.h> 25 #include <sys/sx.h> 26 #include <sys/sysctl.h> 27 #include <sys/ucred.h> 28 #include <sys/uio.h> 29 30 #include <machine/vmm.h> 31 32 #include <vm/vm.h> 33 #include <vm/vm_object.h> 34 35 #include <dev/vmm/vmm_dev.h> 36 #include <dev/vmm/vmm_mem.h> 37 #include <dev/vmm/vmm_stat.h> 38 #include <dev/vmm/vmm_vm.h> 39 40 #ifdef __amd64__ 41 #ifdef COMPAT_FREEBSD12 42 struct vm_memseg_12 { 43 int segid; 44 size_t len; 45 char name[64]; 46 }; 47 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); 48 49 #define VM_ALLOC_MEMSEG_12 \ 50 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) 51 #define VM_GET_MEMSEG_12 \ 52 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) 53 #endif /* COMPAT_FREEBSD12 */ 54 #ifdef COMPAT_FREEBSD14 55 struct vm_memseg_14 { 56 int segid; 57 size_t len; 58 char name[VM_MAX_SUFFIXLEN + 1]; 59 }; 60 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16), 61 "COMPAT_FREEBSD14 ABI"); 62 63 #define VM_ALLOC_MEMSEG_14 \ 64 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14) 65 #define VM_GET_MEMSEG_14 \ 66 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14) 67 #endif /* COMPAT_FREEBSD14 */ 68 #endif /* __amd64__ */ 69 70 struct devmem_softc { 71 int segid; 72 char *name; 73 struct cdev *cdev; 74 struct vmmdev_softc *sc; 75 SLIST_ENTRY(devmem_softc) link; 76 }; 77 78 struct vmmdev_softc { 79 struct vm *vm; /* vm instance cookie */ 80 struct cdev *cdev; 81 struct ucred *ucred; 82 SLIST_ENTRY(vmmdev_softc) link; 83 LIST_ENTRY(vmmdev_softc) priv_link; 84 SLIST_HEAD(, devmem_softc) devmem; 85 int flags; 86 }; 87 88 struct vmmctl_priv { 89 LIST_HEAD(, vmmdev_softc) softcs; 90 }; 91 92 static bool vmm_initialized = false; 93 94 static SLIST_HEAD(, vmmdev_softc) head; 95 96 static unsigned int pr_allow_vmm_flag, pr_allow_vmm_ppt_flag; 97 static struct sx vmmdev_mtx; 98 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex"); 99 100 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 101 102 SYSCTL_DECL(_hw_vmm); 103 104 u_int vm_maxcpu; 105 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 106 &vm_maxcpu, 0, "Maximum number of vCPUs"); 107 108 u_int vm_maxvmms; 109 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN, 110 &vm_maxvmms, 0, "Maximum number of VMM instances per user"); 111 112 static void devmem_destroy(void *arg); 113 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); 114 static void vmmdev_destroy(struct vmmdev_softc *sc); 115 116 static int 117 vmm_priv_check(struct ucred *ucred) 118 { 119 if (jailed(ucred) && 120 (ucred->cr_prison->pr_allow & pr_allow_vmm_flag) == 0) 121 return (EPERM); 122 123 return (0); 124 } 125 126 static int 127 vcpu_lock_one(struct vcpu *vcpu) 128 { 129 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 130 } 131 132 static void 133 vcpu_unlock_one(struct vcpu *vcpu) 134 { 135 enum vcpu_state state; 136 137 state = vcpu_get_state(vcpu, NULL); 138 if (state != VCPU_FROZEN) { 139 panic("vcpu %s(%d) has invalid state %d", 140 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 141 } 142 143 vcpu_set_state(vcpu, VCPU_IDLE, false); 144 } 145 146 static int 147 vcpu_lock_all(struct vmmdev_softc *sc) 148 { 149 int error; 150 151 /* 152 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked 153 * in a consistent order so we need to serialize to avoid deadlocks. 154 */ 155 vm_lock_vcpus(sc->vm); 156 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN); 157 if (error != 0) 158 vm_unlock_vcpus(sc->vm); 159 return (error); 160 } 161 162 static void 163 vcpu_unlock_all(struct vmmdev_softc *sc) 164 { 165 struct vcpu *vcpu; 166 uint16_t i, maxcpus; 167 168 maxcpus = vm_get_maxcpus(sc->vm); 169 for (i = 0; i < maxcpus; i++) { 170 vcpu = vm_vcpu(sc->vm, i); 171 if (vcpu == NULL) 172 continue; 173 vcpu_unlock_one(vcpu); 174 } 175 vm_unlock_vcpus(sc->vm); 176 } 177 178 static struct vmmdev_softc * 179 vmmdev_lookup(const char *name, struct ucred *cred) 180 { 181 struct vmmdev_softc *sc; 182 183 sx_assert(&vmmdev_mtx, SA_XLOCKED); 184 185 SLIST_FOREACH(sc, &head, link) { 186 if (strcmp(name, vm_name(sc->vm)) == 0) 187 break; 188 } 189 190 if (sc == NULL) 191 return (NULL); 192 193 if (cr_cansee(cred, sc->ucred)) 194 return (NULL); 195 196 return (sc); 197 } 198 199 static struct vmmdev_softc * 200 vmmdev_lookup2(struct cdev *cdev) 201 { 202 return (cdev->si_drv1); 203 } 204 205 static int 206 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 207 { 208 int error, off, c, prot; 209 vm_paddr_t gpa, maxaddr; 210 void *hpa, *cookie; 211 struct vmmdev_softc *sc; 212 213 sc = vmmdev_lookup2(cdev); 214 if (sc == NULL) 215 return (ENXIO); 216 217 /* 218 * Get a read lock on the guest memory map. 219 */ 220 vm_slock_memsegs(sc->vm); 221 222 error = 0; 223 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 224 maxaddr = vmm_sysmem_maxaddr(sc->vm); 225 while (uio->uio_resid > 0 && error == 0) { 226 gpa = uio->uio_offset; 227 off = gpa & PAGE_MASK; 228 c = min(uio->uio_resid, PAGE_SIZE - off); 229 230 /* 231 * The VM has a hole in its physical memory map. If we want to 232 * use 'dd' to inspect memory beyond the hole we need to 233 * provide bogus data for memory that lies in the hole. 234 * 235 * Since this device does not support lseek(2), dd(1) will 236 * read(2) blocks of data to simulate the lseek(2). 237 */ 238 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 239 if (hpa == NULL) { 240 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 241 error = uiomove(__DECONST(void *, zero_region), 242 c, uio); 243 else 244 error = EFAULT; 245 } else { 246 error = uiomove(hpa, c, uio); 247 vm_gpa_release(cookie); 248 } 249 } 250 vm_unlock_memsegs(sc->vm); 251 return (error); 252 } 253 254 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 255 256 static int 257 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 258 { 259 struct devmem_softc *dsc; 260 int error; 261 bool sysmem; 262 263 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 264 if (error || mseg->len == 0) 265 return (error); 266 267 if (!sysmem) { 268 SLIST_FOREACH(dsc, &sc->devmem, link) { 269 if (dsc->segid == mseg->segid) 270 break; 271 } 272 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 273 __func__, mseg->segid)); 274 error = copystr(dsc->name, mseg->name, len, NULL); 275 } else { 276 bzero(mseg->name, len); 277 } 278 279 return (error); 280 } 281 282 static int 283 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len, 284 struct domainset *domainset) 285 { 286 char *name; 287 int error; 288 bool sysmem; 289 290 error = 0; 291 name = NULL; 292 sysmem = true; 293 294 /* 295 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 296 * by stripped off when devfs processes the full string. 297 */ 298 if (VM_MEMSEG_NAME(mseg)) { 299 sysmem = false; 300 name = malloc(len, M_VMMDEV, M_WAITOK); 301 error = copystr(mseg->name, name, len, NULL); 302 if (error) 303 goto done; 304 } 305 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset); 306 if (error) 307 goto done; 308 309 if (VM_MEMSEG_NAME(mseg)) { 310 error = devmem_create_cdev(sc, mseg->segid, name); 311 if (error) 312 vm_free_memseg(sc->vm, mseg->segid); 313 else 314 name = NULL; /* freed when 'cdev' is destroyed */ 315 } 316 done: 317 free(name, M_VMMDEV); 318 return (error); 319 } 320 321 #if defined(__amd64__) && \ 322 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12)) 323 /* 324 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts. 325 */ 326 static void 327 adjust_segid(struct vm_memseg *mseg) 328 { 329 if (mseg->segid != VM_SYSMEM) { 330 mseg->segid += (VM_BOOTROM - 1); 331 } 332 } 333 #endif 334 335 static int 336 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 337 uint64_t *regval) 338 { 339 int error, i; 340 341 error = 0; 342 for (i = 0; i < count; i++) { 343 error = vm_get_register(vcpu, regnum[i], ®val[i]); 344 if (error) 345 break; 346 } 347 return (error); 348 } 349 350 static int 351 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 352 uint64_t *regval) 353 { 354 int error, i; 355 356 error = 0; 357 for (i = 0; i < count; i++) { 358 error = vm_set_register(vcpu, regnum[i], regval[i]); 359 if (error) 360 break; 361 } 362 return (error); 363 } 364 365 static int 366 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) 367 { 368 int error; 369 370 /* 371 * A jail without vmm access shouldn't be able to access vmm device 372 * files at all, but check here just to be thorough. 373 */ 374 error = vmm_priv_check(td->td_ucred); 375 if (error != 0) 376 return (error); 377 378 return (0); 379 } 380 381 static const struct vmmdev_ioctl vmmdev_ioctls[] = { 382 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 383 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), 384 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 385 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), 386 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 387 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), 388 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), 389 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), 390 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), 391 VMMDEV_IOCTL(VM_STAT_DESC, 0), 392 393 #ifdef __amd64__ 394 #ifdef COMPAT_FREEBSD12 395 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, 396 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 397 #endif 398 #ifdef COMPAT_FREEBSD14 399 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14, 400 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 401 #endif 402 #endif /* __amd64__ */ 403 VMMDEV_IOCTL(VM_ALLOC_MEMSEG, 404 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 405 VMMDEV_IOCTL(VM_MMAP_MEMSEG, 406 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 407 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, 408 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 409 VMMDEV_IOCTL(VM_REINIT, 410 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), 411 412 #ifdef __amd64__ 413 #if defined(COMPAT_FREEBSD12) 414 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), 415 #endif 416 #ifdef COMPAT_FREEBSD14 417 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS), 418 #endif 419 #endif /* __amd64__ */ 420 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), 421 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), 422 423 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 424 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), 425 426 VMMDEV_IOCTL(VM_SUSPEND, 0), 427 VMMDEV_IOCTL(VM_GET_CPUS, 0), 428 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), 429 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), 430 }; 431 432 static int 433 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 434 struct thread *td) 435 { 436 struct vmmdev_softc *sc; 437 struct vcpu *vcpu; 438 const struct vmmdev_ioctl *ioctl; 439 struct vm_memseg *mseg; 440 int error, vcpuid; 441 442 sc = vmmdev_lookup2(cdev); 443 if (sc == NULL) 444 return (ENXIO); 445 446 ioctl = NULL; 447 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { 448 if (vmmdev_ioctls[i].cmd == cmd) { 449 ioctl = &vmmdev_ioctls[i]; 450 break; 451 } 452 } 453 if (ioctl == NULL) { 454 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { 455 if (vmmdev_machdep_ioctls[i].cmd == cmd) { 456 ioctl = &vmmdev_machdep_ioctls[i]; 457 break; 458 } 459 } 460 } 461 if (ioctl == NULL) 462 return (ENOTTY); 463 464 if ((ioctl->flags & VMMDEV_IOCTL_PPT) != 0) { 465 if (jailed(td->td_ucred) && (td->td_ucred->cr_prison->pr_allow & 466 pr_allow_vmm_ppt_flag) == 0) 467 return (EPERM); 468 error = priv_check(td, PRIV_VMM_PPTDEV); 469 if (error != 0) 470 return (error); 471 } 472 473 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) 474 vm_xlock_memsegs(sc->vm); 475 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) 476 vm_slock_memsegs(sc->vm); 477 478 vcpu = NULL; 479 vcpuid = -1; 480 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | 481 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { 482 vcpuid = *(int *)data; 483 if (vcpuid == -1) { 484 if ((ioctl->flags & 485 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { 486 error = EINVAL; 487 goto lockfail; 488 } 489 } else { 490 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 491 if (vcpu == NULL) { 492 error = EINVAL; 493 goto lockfail; 494 } 495 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { 496 error = vcpu_lock_one(vcpu); 497 if (error) 498 goto lockfail; 499 } 500 } 501 } 502 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { 503 error = vcpu_lock_all(sc); 504 if (error) 505 goto lockfail; 506 } 507 508 switch (cmd) { 509 case VM_SUSPEND: { 510 struct vm_suspend *vmsuspend; 511 512 vmsuspend = (struct vm_suspend *)data; 513 error = vm_suspend(sc->vm, vmsuspend->how); 514 break; 515 } 516 case VM_REINIT: 517 error = vm_reinit(sc->vm); 518 break; 519 case VM_STAT_DESC: { 520 struct vm_stat_desc *statdesc; 521 522 statdesc = (struct vm_stat_desc *)data; 523 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, 524 sizeof(statdesc->desc)); 525 break; 526 } 527 case VM_STATS: { 528 struct vm_stats *vmstats; 529 530 vmstats = (struct vm_stats *)data; 531 getmicrotime(&vmstats->tv); 532 error = vmm_stat_copy(vcpu, vmstats->index, 533 nitems(vmstats->statbuf), &vmstats->num_entries, 534 vmstats->statbuf); 535 break; 536 } 537 case VM_MMAP_GETNEXT: { 538 struct vm_memmap *mm; 539 540 mm = (struct vm_memmap *)data; 541 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 542 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 543 break; 544 } 545 case VM_MMAP_MEMSEG: { 546 struct vm_memmap *mm; 547 548 mm = (struct vm_memmap *)data; 549 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 550 mm->len, mm->prot, mm->flags); 551 break; 552 } 553 case VM_MUNMAP_MEMSEG: { 554 struct vm_munmap *mu; 555 556 mu = (struct vm_munmap *)data; 557 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 558 break; 559 } 560 #ifdef __amd64__ 561 #ifdef COMPAT_FREEBSD12 562 case VM_ALLOC_MEMSEG_12: 563 mseg = (struct vm_memseg *)data; 564 565 adjust_segid(mseg); 566 error = alloc_memseg(sc, mseg, 567 sizeof(((struct vm_memseg_12 *)0)->name), NULL); 568 break; 569 case VM_GET_MEMSEG_12: 570 mseg = (struct vm_memseg *)data; 571 572 adjust_segid(mseg); 573 error = get_memseg(sc, mseg, 574 sizeof(((struct vm_memseg_12 *)0)->name)); 575 break; 576 #endif /* COMPAT_FREEBSD12 */ 577 #ifdef COMPAT_FREEBSD14 578 case VM_ALLOC_MEMSEG_14: 579 mseg = (struct vm_memseg *)data; 580 581 adjust_segid(mseg); 582 error = alloc_memseg(sc, mseg, 583 sizeof(((struct vm_memseg_14 *)0)->name), NULL); 584 break; 585 case VM_GET_MEMSEG_14: 586 mseg = (struct vm_memseg *)data; 587 588 adjust_segid(mseg); 589 error = get_memseg(sc, mseg, 590 sizeof(((struct vm_memseg_14 *)0)->name)); 591 break; 592 #endif /* COMPAT_FREEBSD14 */ 593 #endif /* __amd64__ */ 594 case VM_ALLOC_MEMSEG: { 595 domainset_t *mask; 596 struct domainset *domainset, domain; 597 598 domainset = NULL; 599 mseg = (struct vm_memseg *)data; 600 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) { 601 if (mseg->ds_mask_size < sizeof(domainset_t) || 602 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) { 603 error = ERANGE; 604 break; 605 } 606 memset(&domain, 0, sizeof(domain)); 607 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK); 608 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size); 609 if (error) { 610 free(mask, M_VMMDEV); 611 break; 612 } 613 error = domainset_populate(&domain, mask, mseg->ds_policy, 614 mseg->ds_mask_size); 615 free(mask, M_VMMDEV); 616 if (error) 617 break; 618 domainset = domainset_create(&domain); 619 if (domainset == NULL) { 620 error = EINVAL; 621 break; 622 } 623 } 624 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); 625 break; 626 } 627 case VM_GET_MEMSEG: 628 error = get_memseg(sc, (struct vm_memseg *)data, 629 sizeof(((struct vm_memseg *)0)->name)); 630 break; 631 case VM_GET_REGISTER: { 632 struct vm_register *vmreg; 633 634 vmreg = (struct vm_register *)data; 635 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 636 break; 637 } 638 case VM_SET_REGISTER: { 639 struct vm_register *vmreg; 640 641 vmreg = (struct vm_register *)data; 642 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 643 break; 644 } 645 case VM_GET_REGISTER_SET: { 646 struct vm_register_set *vmregset; 647 uint64_t *regvals; 648 int *regnums; 649 650 vmregset = (struct vm_register_set *)data; 651 if (vmregset->count > VM_REG_LAST) { 652 error = EINVAL; 653 break; 654 } 655 regvals = mallocarray(vmregset->count, sizeof(regvals[0]), 656 M_VMMDEV, M_WAITOK); 657 regnums = mallocarray(vmregset->count, sizeof(regnums[0]), 658 M_VMMDEV, M_WAITOK); 659 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 660 vmregset->count); 661 if (error == 0) 662 error = vm_get_register_set(vcpu, 663 vmregset->count, regnums, regvals); 664 if (error == 0) 665 error = copyout(regvals, vmregset->regvals, 666 sizeof(regvals[0]) * vmregset->count); 667 free(regvals, M_VMMDEV); 668 free(regnums, M_VMMDEV); 669 break; 670 } 671 case VM_SET_REGISTER_SET: { 672 struct vm_register_set *vmregset; 673 uint64_t *regvals; 674 int *regnums; 675 676 vmregset = (struct vm_register_set *)data; 677 if (vmregset->count > VM_REG_LAST) { 678 error = EINVAL; 679 break; 680 } 681 regvals = mallocarray(vmregset->count, sizeof(regvals[0]), 682 M_VMMDEV, M_WAITOK); 683 regnums = mallocarray(vmregset->count, sizeof(regnums[0]), 684 M_VMMDEV, M_WAITOK); 685 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 686 vmregset->count); 687 if (error == 0) 688 error = copyin(vmregset->regvals, regvals, 689 sizeof(regvals[0]) * vmregset->count); 690 if (error == 0) 691 error = vm_set_register_set(vcpu, 692 vmregset->count, regnums, regvals); 693 free(regvals, M_VMMDEV); 694 free(regnums, M_VMMDEV); 695 break; 696 } 697 case VM_GET_CAPABILITY: { 698 struct vm_capability *vmcap; 699 700 vmcap = (struct vm_capability *)data; 701 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); 702 break; 703 } 704 case VM_SET_CAPABILITY: { 705 struct vm_capability *vmcap; 706 707 vmcap = (struct vm_capability *)data; 708 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); 709 break; 710 } 711 case VM_ACTIVATE_CPU: 712 error = vm_activate_cpu(vcpu); 713 break; 714 case VM_GET_CPUS: { 715 struct vm_cpuset *vm_cpuset; 716 cpuset_t *cpuset; 717 int size; 718 719 error = 0; 720 vm_cpuset = (struct vm_cpuset *)data; 721 size = vm_cpuset->cpusetsize; 722 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 723 error = ERANGE; 724 break; 725 } 726 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 727 M_WAITOK | M_ZERO); 728 if (vm_cpuset->which == VM_ACTIVE_CPUS) 729 *cpuset = vm_active_cpus(sc->vm); 730 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 731 *cpuset = vm_suspended_cpus(sc->vm); 732 else if (vm_cpuset->which == VM_DEBUG_CPUS) 733 *cpuset = vm_debug_cpus(sc->vm); 734 else 735 error = EINVAL; 736 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 737 error = ERANGE; 738 if (error == 0) 739 error = copyout(cpuset, vm_cpuset->cpus, size); 740 free(cpuset, M_TEMP); 741 break; 742 } 743 case VM_SUSPEND_CPU: 744 error = vm_suspend_cpu(sc->vm, vcpu); 745 break; 746 case VM_RESUME_CPU: 747 error = vm_resume_cpu(sc->vm, vcpu); 748 break; 749 case VM_SET_TOPOLOGY: { 750 struct vm_cpu_topology *topology; 751 752 topology = (struct vm_cpu_topology *)data; 753 error = vm_set_topology(sc->vm, topology->sockets, 754 topology->cores, topology->threads, topology->maxcpus); 755 break; 756 } 757 case VM_GET_TOPOLOGY: { 758 struct vm_cpu_topology *topology; 759 760 topology = (struct vm_cpu_topology *)data; 761 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 762 &topology->threads, &topology->maxcpus); 763 error = 0; 764 break; 765 } 766 default: 767 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, 768 td); 769 break; 770 } 771 772 if ((ioctl->flags & 773 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 774 vm_unlock_memsegs(sc->vm); 775 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) 776 vcpu_unlock_all(sc); 777 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) 778 vcpu_unlock_one(vcpu); 779 780 /* 781 * Make sure that no handler returns a kernel-internal 782 * error value to userspace. 783 */ 784 KASSERT(error == ERESTART || error >= 0, 785 ("vmmdev_ioctl: invalid error return %d", error)); 786 return (error); 787 788 lockfail: 789 if ((ioctl->flags & 790 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) 791 vm_unlock_memsegs(sc->vm); 792 return (error); 793 } 794 795 static int 796 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 797 struct vm_object **objp, int nprot) 798 { 799 struct vmmdev_softc *sc; 800 vm_paddr_t gpa; 801 size_t len; 802 vm_ooffset_t segoff, first, last; 803 int error, found, segid; 804 bool sysmem; 805 806 first = *offset; 807 last = first + mapsize; 808 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 809 return (EINVAL); 810 811 sc = vmmdev_lookup2(cdev); 812 if (sc == NULL) { 813 /* virtual machine is in the process of being created */ 814 return (EINVAL); 815 } 816 817 /* 818 * Get a read lock on the guest memory map. 819 */ 820 vm_slock_memsegs(sc->vm); 821 822 gpa = 0; 823 found = 0; 824 while (!found) { 825 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 826 NULL, NULL); 827 if (error) 828 break; 829 830 if (first >= gpa && last <= gpa + len) 831 found = 1; 832 else 833 gpa += len; 834 } 835 836 if (found) { 837 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 838 KASSERT(error == 0 && *objp != NULL, 839 ("%s: invalid memory segment %d", __func__, segid)); 840 if (sysmem) { 841 vm_object_reference(*objp); 842 *offset = segoff + (first - gpa); 843 } else { 844 error = EINVAL; 845 } 846 } 847 vm_unlock_memsegs(sc->vm); 848 return (error); 849 } 850 851 static void 852 vmmdev_destroy(struct vmmdev_softc *sc) 853 { 854 struct devmem_softc *dsc; 855 int error __diagused; 856 857 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); 858 KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__)); 859 860 /* 861 * Destroy all cdevs: 862 * 863 * - any new operations on the 'cdev' will return an error (ENXIO). 864 * 865 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 866 */ 867 SLIST_FOREACH(dsc, &sc->devmem, link) { 868 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 869 devmem_destroy(dsc); 870 } 871 872 vm_disable_vcpu_creation(sc->vm); 873 error = vcpu_lock_all(sc); 874 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 875 vm_unlock_vcpus(sc->vm); 876 877 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 878 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 879 SLIST_REMOVE_HEAD(&sc->devmem, link); 880 free(dsc->name, M_VMMDEV); 881 free(dsc, M_VMMDEV); 882 } 883 884 vm_destroy(sc->vm); 885 886 chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0); 887 crfree(sc->ucred); 888 889 sx_xlock(&vmmdev_mtx); 890 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 891 if ((sc->flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0) 892 LIST_REMOVE(sc, priv_link); 893 sx_xunlock(&vmmdev_mtx); 894 wakeup(sc); 895 free(sc, M_VMMDEV); 896 } 897 898 static int 899 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) 900 { 901 struct cdev *cdev; 902 struct vmmdev_softc *sc; 903 int error; 904 905 sx_xlock(&vmmdev_mtx); 906 sc = vmmdev_lookup(name, cred); 907 if (sc == NULL || sc->cdev == NULL) { 908 sx_xunlock(&vmmdev_mtx); 909 return (EINVAL); 910 } 911 912 /* 913 * Only the creator of a VM or a privileged user can destroy it. 914 */ 915 if ((cred->cr_uid != sc->ucred->cr_uid || 916 cred->cr_prison != sc->ucred->cr_prison) && 917 (error = priv_check_cred(cred, PRIV_VMM_DESTROY)) != 0) { 918 sx_xunlock(&vmmdev_mtx); 919 return (error); 920 } 921 922 /* 923 * Setting 'sc->cdev' to NULL is used to indicate that the VM 924 * is scheduled for destruction. 925 */ 926 cdev = sc->cdev; 927 sc->cdev = NULL; 928 sx_xunlock(&vmmdev_mtx); 929 930 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY); 931 destroy_dev(cdev); 932 vmmdev_destroy(sc); 933 934 return (0); 935 } 936 937 static int 938 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 939 { 940 char *buf; 941 int error, buflen; 942 943 error = vmm_priv_check(req->td->td_ucred); 944 if (error) 945 return (error); 946 947 buflen = VM_MAX_NAMELEN + 1; 948 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 949 error = sysctl_handle_string(oidp, buf, buflen, req); 950 if (error == 0 && req->newptr != NULL) 951 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); 952 free(buf, M_VMMDEV); 953 return (error); 954 } 955 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 956 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 957 NULL, 0, sysctl_vmm_destroy, "A", 958 "Destroy a vmm(4) instance (legacy interface)"); 959 960 static struct cdevsw vmmdevsw = { 961 .d_name = "vmmdev", 962 .d_version = D_VERSION, 963 .d_open = vmmdev_open, 964 .d_ioctl = vmmdev_ioctl, 965 .d_mmap_single = vmmdev_mmap_single, 966 .d_read = vmmdev_rw, 967 .d_write = vmmdev_rw, 968 }; 969 970 static struct vmmdev_softc * 971 vmmdev_alloc(struct vm *vm, struct ucred *cred) 972 { 973 struct vmmdev_softc *sc; 974 975 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); 976 SLIST_INIT(&sc->devmem); 977 sc->vm = vm; 978 sc->ucred = crhold(cred); 979 return (sc); 980 } 981 982 static int 983 vmmdev_create(const char *name, uint32_t flags, struct ucred *cred) 984 { 985 struct make_dev_args mda; 986 struct cdev *cdev; 987 struct vmmdev_softc *sc; 988 struct vmmctl_priv *priv; 989 struct vm *vm; 990 int error; 991 992 if (name == NULL || strlen(name) > VM_MAX_NAMELEN) 993 return (EINVAL); 994 995 if ((flags & ~VMMCTL_FLAGS_MASK) != 0) 996 return (EINVAL); 997 error = devfs_get_cdevpriv((void **)&priv); 998 if (error) 999 return (error); 1000 1001 sx_xlock(&vmmdev_mtx); 1002 sc = vmmdev_lookup(name, cred); 1003 if (sc != NULL) { 1004 sx_xunlock(&vmmdev_mtx); 1005 return (EEXIST); 1006 } 1007 1008 /* 1009 * Unprivileged users can only create VMs that will be automatically 1010 * destroyed when the creating descriptor is closed. 1011 */ 1012 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) == 0 && 1013 (error = priv_check_cred(cred, PRIV_VMM_CREATE)) != 0) { 1014 sx_xunlock(&vmmdev_mtx); 1015 return (EXTERROR(error, 1016 "An unprivileged user must run VMs in monitor mode")); 1017 } 1018 1019 if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) { 1020 sx_xunlock(&vmmdev_mtx); 1021 return (ENOMEM); 1022 } 1023 1024 error = vm_create(name, &vm); 1025 if (error != 0) { 1026 sx_xunlock(&vmmdev_mtx); 1027 (void)chgvmmcnt(cred->cr_ruidinfo, -1, 0); 1028 return (error); 1029 } 1030 sc = vmmdev_alloc(vm, cred); 1031 SLIST_INSERT_HEAD(&head, sc, link); 1032 sc->flags = flags; 1033 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0) 1034 LIST_INSERT_HEAD(&priv->softcs, sc, priv_link); 1035 1036 make_dev_args_init(&mda); 1037 mda.mda_devsw = &vmmdevsw; 1038 mda.mda_cr = sc->ucred; 1039 mda.mda_uid = cred->cr_uid; 1040 mda.mda_gid = GID_VMM; 1041 mda.mda_mode = 0600; 1042 mda.mda_si_drv1 = sc; 1043 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1044 error = make_dev_s(&mda, &cdev, "vmm/%s", name); 1045 if (error != 0) { 1046 sx_xunlock(&vmmdev_mtx); 1047 vmmdev_destroy(sc); 1048 return (error); 1049 } 1050 sc->cdev = cdev; 1051 sx_xunlock(&vmmdev_mtx); 1052 return (0); 1053 } 1054 1055 static int 1056 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1057 { 1058 char *buf; 1059 int error, buflen; 1060 1061 if (!vmm_initialized) 1062 return (ENXIO); 1063 1064 error = vmm_priv_check(req->td->td_ucred); 1065 if (error != 0) 1066 return (error); 1067 1068 buflen = VM_MAX_NAMELEN + 1; 1069 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1070 error = sysctl_handle_string(oidp, buf, buflen, req); 1071 if (error == 0 && req->newptr != NULL) 1072 error = vmmdev_create(buf, 0, req->td->td_ucred); 1073 free(buf, M_VMMDEV); 1074 return (error); 1075 } 1076 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1077 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1078 NULL, 0, sysctl_vmm_create, "A", 1079 "Create a vmm(4) instance (legacy interface)"); 1080 1081 static void 1082 vmmctl_dtor(void *arg) 1083 { 1084 struct cdev *sc_cdev; 1085 struct vmmdev_softc *sc; 1086 struct vmmctl_priv *priv = arg; 1087 1088 /* 1089 * Scan the softc list for any VMs associated with 1090 * the current descriptor and destroy them. 1091 */ 1092 sx_xlock(&vmmdev_mtx); 1093 while (!LIST_EMPTY(&priv->softcs)) { 1094 sc = LIST_FIRST(&priv->softcs); 1095 sc_cdev = sc->cdev; 1096 if (sc_cdev != NULL) { 1097 sc->cdev = NULL; 1098 } else { 1099 /* 1100 * Another thread has already 1101 * started the removal process. 1102 * Sleep until 'vmmdev_destroy' notifies us 1103 * that the removal has finished. 1104 */ 1105 sx_sleep(sc, &vmmdev_mtx, 0, "vmmctl_dtor", 0); 1106 continue; 1107 } 1108 /* 1109 * Temporarily drop the lock to allow vmmdev_destroy to run. 1110 */ 1111 sx_xunlock(&vmmdev_mtx); 1112 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY); 1113 destroy_dev(sc_cdev); 1114 /* vmmdev_destroy will unlink the 'priv_link' entry. */ 1115 vmmdev_destroy(sc); 1116 sx_xlock(&vmmdev_mtx); 1117 } 1118 sx_xunlock(&vmmdev_mtx); 1119 1120 free(priv, M_VMMDEV); 1121 } 1122 1123 static int 1124 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) 1125 { 1126 int error; 1127 struct vmmctl_priv *priv; 1128 1129 error = vmm_priv_check(td->td_ucred); 1130 if (error != 0) 1131 return (error); 1132 1133 if ((flags & FWRITE) == 0) 1134 return (EPERM); 1135 1136 priv = malloc(sizeof(*priv), M_VMMDEV, M_WAITOK | M_ZERO); 1137 LIST_INIT(&priv->softcs); 1138 error = devfs_set_cdevpriv(priv, vmmctl_dtor); 1139 if (error != 0) { 1140 free(priv, M_VMMDEV); 1141 return (error); 1142 } 1143 1144 return (0); 1145 } 1146 1147 static int 1148 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 1149 struct thread *td) 1150 { 1151 int error; 1152 1153 switch (cmd) { 1154 case VMMCTL_VM_CREATE: { 1155 struct vmmctl_vm_create *vmc; 1156 1157 vmc = (struct vmmctl_vm_create *)data; 1158 vmc->name[VM_MAX_NAMELEN] = '\0'; 1159 for (size_t i = 0; i < nitems(vmc->reserved); i++) { 1160 if (vmc->reserved[i] != 0) { 1161 error = EINVAL; 1162 return (error); 1163 } 1164 } 1165 1166 error = vmmdev_create(vmc->name, vmc->flags, td->td_ucred); 1167 break; 1168 } 1169 case VMMCTL_VM_DESTROY: { 1170 struct vmmctl_vm_destroy *vmd; 1171 1172 vmd = (struct vmmctl_vm_destroy *)data; 1173 vmd->name[VM_MAX_NAMELEN] = '\0'; 1174 for (size_t i = 0; i < nitems(vmd->reserved); i++) { 1175 if (vmd->reserved[i] != 0) { 1176 error = EINVAL; 1177 return (error); 1178 } 1179 } 1180 1181 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred); 1182 break; 1183 } 1184 default: 1185 error = ENOTTY; 1186 break; 1187 } 1188 1189 return (error); 1190 } 1191 1192 static struct cdev *vmmctl_cdev; 1193 static struct cdevsw vmmctlsw = { 1194 .d_name = "vmmctl", 1195 .d_version = D_VERSION, 1196 .d_open = vmmctl_open, 1197 .d_ioctl = vmmctl_ioctl, 1198 }; 1199 1200 static int 1201 vmmdev_init(void) 1202 { 1203 int error; 1204 1205 sx_xlock(&vmmdev_mtx); 1206 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL, 1207 UID_ROOT, GID_VMM, 0660, "vmmctl"); 1208 if (error == 0) { 1209 pr_allow_vmm_flag = prison_add_allow(NULL, "vmm", NULL, 1210 "Allow use of vmm in a jail"); 1211 pr_allow_vmm_ppt_flag = prison_add_allow(NULL, "vmm_ppt", NULL, 1212 "Allow use of vmm with ppt devices in a jail"); 1213 } 1214 sx_xunlock(&vmmdev_mtx); 1215 1216 return (error); 1217 } 1218 1219 static int 1220 vmmdev_cleanup(void) 1221 { 1222 sx_xlock(&vmmdev_mtx); 1223 if (!SLIST_EMPTY(&head)) { 1224 sx_xunlock(&vmmdev_mtx); 1225 return (EBUSY); 1226 } 1227 if (vmmctl_cdev != NULL) { 1228 destroy_dev(vmmctl_cdev); 1229 vmmctl_cdev = NULL; 1230 } 1231 sx_xunlock(&vmmdev_mtx); 1232 1233 return (0); 1234 } 1235 1236 static int 1237 vmm_handler(module_t mod, int what, void *arg) 1238 { 1239 int error; 1240 1241 switch (what) { 1242 case MOD_LOAD: 1243 error = vmmdev_init(); 1244 if (error != 0) 1245 break; 1246 1247 vm_maxcpu = mp_ncpus; 1248 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 1249 if (vm_maxcpu > VM_MAXCPU) { 1250 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 1251 vm_maxcpu = VM_MAXCPU; 1252 } 1253 if (vm_maxcpu == 0) 1254 vm_maxcpu = 1; 1255 vm_maxvmms = 4 * mp_ncpus; 1256 error = vmm_modinit(); 1257 if (error == 0) 1258 vmm_initialized = true; 1259 else { 1260 int error1 __diagused; 1261 1262 error1 = vmmdev_cleanup(); 1263 KASSERT(error1 == 0, 1264 ("%s: vmmdev_cleanup failed: %d", __func__, error1)); 1265 } 1266 break; 1267 case MOD_UNLOAD: 1268 error = vmmdev_cleanup(); 1269 if (error == 0 && vmm_initialized) { 1270 error = vmm_modcleanup(); 1271 if (error) { 1272 /* 1273 * Something bad happened - prevent new 1274 * VMs from being created 1275 */ 1276 vmm_initialized = false; 1277 } 1278 } 1279 break; 1280 default: 1281 error = 0; 1282 break; 1283 } 1284 return (error); 1285 } 1286 1287 static moduledata_t vmm_kmod = { 1288 "vmm", 1289 vmm_handler, 1290 NULL 1291 }; 1292 1293 /* 1294 * vmm initialization has the following dependencies: 1295 * 1296 * - Initialization requires smp_rendezvous() and therefore must happen 1297 * after SMP is fully functional (after SI_SUB_SMP). 1298 * - vmm device initialization requires an initialized devfs. 1299 */ 1300 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 1301 MODULE_VERSION(vmm, 1); 1302 1303 static int 1304 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1305 struct vm_object **objp, int nprot) 1306 { 1307 struct devmem_softc *dsc; 1308 vm_ooffset_t first, last; 1309 size_t seglen; 1310 int error; 1311 bool sysmem; 1312 1313 dsc = cdev->si_drv1; 1314 if (dsc == NULL) { 1315 /* 'cdev' has been created but is not ready for use */ 1316 return (ENXIO); 1317 } 1318 1319 first = *offset; 1320 last = *offset + len; 1321 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1322 return (EINVAL); 1323 1324 vm_slock_memsegs(dsc->sc->vm); 1325 1326 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1327 KASSERT(error == 0 && !sysmem && *objp != NULL, 1328 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1329 1330 if (seglen >= last) 1331 vm_object_reference(*objp); 1332 else 1333 error = EINVAL; 1334 1335 vm_unlock_memsegs(dsc->sc->vm); 1336 return (error); 1337 } 1338 1339 static struct cdevsw devmemsw = { 1340 .d_name = "devmem", 1341 .d_version = D_VERSION, 1342 .d_mmap_single = devmem_mmap_single, 1343 }; 1344 1345 static int 1346 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname) 1347 { 1348 struct make_dev_args mda; 1349 struct devmem_softc *dsc; 1350 int error; 1351 1352 sx_xlock(&vmmdev_mtx); 1353 1354 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1355 dsc->segid = segid; 1356 dsc->name = devname; 1357 dsc->sc = sc; 1358 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1359 1360 make_dev_args_init(&mda); 1361 mda.mda_devsw = &devmemsw; 1362 mda.mda_cr = sc->ucred; 1363 mda.mda_uid = sc->ucred->cr_uid; 1364 mda.mda_gid = GID_VMM; 1365 mda.mda_mode = 0600; 1366 mda.mda_si_drv1 = dsc; 1367 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 1368 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm), 1369 devname); 1370 if (error != 0) { 1371 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link); 1372 free(dsc->name, M_VMMDEV); 1373 free(dsc, M_VMMDEV); 1374 } 1375 1376 sx_xunlock(&vmmdev_mtx); 1377 1378 return (error); 1379 } 1380 1381 static void 1382 devmem_destroy(void *arg) 1383 { 1384 struct devmem_softc *dsc = arg; 1385 1386 destroy_dev(dsc->cdev); 1387 dsc->cdev = NULL; 1388 dsc->sc = NULL; 1389 } 1390