1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */ 12 13 /* 14 * Copyright 2015 Pluribus Networks Inc. 15 * Copyright 2019 Joyent, Inc. 16 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 17 * Copyright 2023 Oxide Computer Company 18 */ 19 20 #include <sys/types.h> 21 #include <sys/conf.h> 22 #include <sys/cpuvar.h> 23 #include <sys/ioccom.h> 24 #include <sys/stat.h> 25 #include <sys/vmsystm.h> 26 #include <sys/ddi.h> 27 #include <sys/mkdev.h> 28 #include <sys/sunddi.h> 29 #include <sys/fs/dv_node.h> 30 #include <sys/cpuset.h> 31 #include <sys/id_space.h> 32 #include <sys/fs/sdev_plugin.h> 33 #include <sys/smt.h> 34 #include <sys/kstat.h> 35 36 #include <sys/kernel.h> 37 #include <sys/hma.h> 38 #include <sys/x86_archext.h> 39 #include <x86/apicreg.h> 40 41 #include <sys/vmm.h> 42 #include <sys/vmm_kernel.h> 43 #include <sys/vmm_instruction_emul.h> 44 #include <sys/vmm_dev.h> 45 #include <sys/vmm_impl.h> 46 #include <sys/vmm_drv.h> 47 #include <sys/vmm_vm.h> 48 #include <sys/vmm_reservoir.h> 49 50 #include <vm/seg_dev.h> 51 52 #include "io/ppt.h" 53 #include "io/vatpic.h" 54 #include "io/vioapic.h" 55 #include "io/vrtc.h" 56 #include "io/vhpet.h" 57 #include "io/vpmtmr.h" 58 #include "vmm_lapic.h" 59 #include "vmm_stat.h" 60 #include "vmm_util.h" 61 62 /* 63 * Locking details: 64 * 65 * Driver-wide data (vmmdev_*) , including HMA and sdev registration, is 66 * protected by vmmdev_mtx. The list of vmm_softc_t instances and related data 67 * (vmm_*) are protected by vmm_mtx. Actions requiring both locks must acquire 68 * vmmdev_mtx before vmm_mtx. The sdev plugin functions must not attempt to 69 * acquire vmmdev_mtx, as they could deadlock with plugin unregistration. 70 */ 71 72 static kmutex_t vmmdev_mtx; 73 static dev_info_t *vmmdev_dip; 74 static hma_reg_t *vmmdev_hma_reg; 75 static uint_t vmmdev_hma_ref; 76 static sdev_plugin_hdl_t vmmdev_sdev_hdl; 77 78 static kmutex_t vmm_mtx; 79 static list_t vmm_list; 80 static id_space_t *vmm_minors; 81 static void *vmm_statep; 82 83 /* 84 * Until device emulation in bhyve had been adequately scrutinized and tested, 85 * there was (justified) concern that unusual or corrupt device state payloads 86 * could crash the host when loaded via the vmm-data interface. 87 * 88 * Now that those concerns have been mitigated, this protection is loosened to 89 * default-allow, but the switch is left in place, in case there is a need to 90 * once again clamp down on vmm-data writes. 91 */ 92 int vmm_allow_state_writes = 1; 93 94 static const char *vmmdev_hvm_name = "bhyve"; 95 96 /* For sdev plugin (/dev) */ 97 #define VMM_SDEV_ROOT "/dev/vmm" 98 99 /* From uts/intel/io/vmm/intel/vmx.c */ 100 extern int vmx_x86_supported(const char **); 101 102 /* Holds and hooks from drivers external to vmm */ 103 struct vmm_hold { 104 list_node_t vmh_node; 105 vmm_softc_t *vmh_sc; 106 boolean_t vmh_release_req; 107 uint_t vmh_ioport_hook_cnt; 108 }; 109 110 struct vmm_lease { 111 list_node_t vml_node; 112 struct vm *vml_vm; 113 vm_client_t *vml_vmclient; 114 boolean_t vml_expired; 115 boolean_t vml_break_deferred; 116 boolean_t (*vml_expire_func)(void *); 117 void *vml_expire_arg; 118 struct vmm_hold *vml_hold; 119 }; 120 121 /* Options for vmm_destroy_locked */ 122 typedef enum vmm_destroy_opts { 123 VDO_DEFAULT = 0, 124 /* 125 * Indicate that zone-specific-data associated with this VM not be 126 * cleaned up as part of the destroy. Skipping ZSD clean-up is 127 * necessary when VM is being destroyed as part of zone destruction, 128 * when said ZSD is already being cleaned up. 129 */ 130 VDO_NO_CLEAN_ZSD = (1 << 0), 131 /* 132 * Attempt to wait for VM destruction to complete. This is opt-in, 133 * since there are many normal conditions which could lead to 134 * destruction being stalled pending other clean-up. 135 */ 136 VDO_ATTEMPT_WAIT = (1 << 1), 137 } vmm_destroy_opts_t; 138 139 static void vmm_hma_release(void); 140 static int vmm_destroy_locked(vmm_softc_t *, vmm_destroy_opts_t, bool *); 141 static int vmm_drv_block_hook(vmm_softc_t *, boolean_t); 142 static void vmm_lease_block(vmm_softc_t *); 143 static void vmm_lease_unblock(vmm_softc_t *); 144 static int vmm_kstat_alloc(vmm_softc_t *, minor_t, const cred_t *); 145 static void vmm_kstat_init(vmm_softc_t *); 146 static void vmm_kstat_fini(vmm_softc_t *); 147 148 /* 149 * The 'devmem' hack: 150 * 151 * On native FreeBSD, bhyve consumers are allowed to create 'devmem' segments 152 * in the vm which appear with their own name related to the vm under /dev. 153 * Since this would be a hassle from an sdev perspective and would require a 154 * new cdev interface (or complicate the existing one), we choose to implement 155 * this in a different manner. Direct access to the underlying vm memory 156 * segments is exposed by placing them in a range of offsets beyond the normal 157 * guest memory space. Userspace can query the appropriate offset to mmap() 158 * for a given segment-id with the VM_DEVMEM_GETOFFSET ioctl. 159 */ 160 161 static vmm_devmem_entry_t * 162 vmmdev_devmem_find(vmm_softc_t *sc, int segid) 163 { 164 vmm_devmem_entry_t *ent = NULL; 165 list_t *dl = &sc->vmm_devmem_list; 166 167 for (ent = list_head(dl); ent != NULL; ent = list_next(dl, ent)) { 168 if (ent->vde_segid == segid) { 169 return (ent); 170 } 171 } 172 return (NULL); 173 } 174 175 static int 176 vmmdev_get_memseg(vmm_softc_t *sc, struct vm_memseg *mseg) 177 { 178 int error; 179 bool sysmem; 180 181 error = vm_get_memseg(sc->vmm_vm, mseg->segid, &mseg->len, &sysmem, 182 NULL); 183 if (error || mseg->len == 0) 184 return (error); 185 186 if (!sysmem) { 187 vmm_devmem_entry_t *de; 188 189 de = vmmdev_devmem_find(sc, mseg->segid); 190 if (de != NULL) { 191 (void) strlcpy(mseg->name, de->vde_name, 192 sizeof (mseg->name)); 193 } 194 } else { 195 bzero(mseg->name, sizeof (mseg->name)); 196 } 197 198 return (error); 199 } 200 201 static int 202 vmmdev_devmem_create(vmm_softc_t *sc, struct vm_memseg *mseg, const char *name) 203 { 204 off_t map_offset; 205 vmm_devmem_entry_t *entry; 206 207 if (list_is_empty(&sc->vmm_devmem_list)) { 208 map_offset = VM_DEVMEM_START; 209 } else { 210 entry = list_tail(&sc->vmm_devmem_list); 211 map_offset = entry->vde_off + entry->vde_len; 212 if (map_offset < entry->vde_off) { 213 /* Do not tolerate overflow */ 214 return (ERANGE); 215 } 216 /* 217 * XXXJOY: We could choose to search the list for duplicate 218 * names and toss an error. Since we're using the offset 219 * method for now, it does not make much of a difference. 220 */ 221 } 222 223 entry = kmem_zalloc(sizeof (*entry), KM_SLEEP); 224 entry->vde_segid = mseg->segid; 225 entry->vde_len = mseg->len; 226 entry->vde_off = map_offset; 227 (void) strlcpy(entry->vde_name, name, sizeof (entry->vde_name)); 228 list_insert_tail(&sc->vmm_devmem_list, entry); 229 230 return (0); 231 } 232 233 static boolean_t 234 vmmdev_devmem_segid(vmm_softc_t *sc, off_t off, off_t len, int *segidp, 235 off_t *map_offp) 236 { 237 list_t *dl = &sc->vmm_devmem_list; 238 vmm_devmem_entry_t *de = NULL; 239 const off_t map_end = off + len; 240 241 VERIFY(off >= VM_DEVMEM_START); 242 243 if (map_end < off) { 244 /* No match on overflow */ 245 return (B_FALSE); 246 } 247 248 for (de = list_head(dl); de != NULL; de = list_next(dl, de)) { 249 const off_t item_end = de->vde_off + de->vde_len; 250 251 if (de->vde_off <= off && item_end >= map_end) { 252 *segidp = de->vde_segid; 253 *map_offp = off - de->vde_off; 254 return (B_TRUE); 255 } 256 } 257 return (B_FALSE); 258 } 259 260 /* 261 * When an instance is being destroyed, the devmem list of named memory objects 262 * can be torn down, as no new mappings are allowed. 263 */ 264 static void 265 vmmdev_devmem_purge(vmm_softc_t *sc) 266 { 267 vmm_devmem_entry_t *entry; 268 269 while ((entry = list_remove_head(&sc->vmm_devmem_list)) != NULL) { 270 kmem_free(entry, sizeof (*entry)); 271 } 272 } 273 274 static int 275 vmmdev_alloc_memseg(vmm_softc_t *sc, struct vm_memseg *mseg) 276 { 277 int error; 278 bool sysmem = true; 279 280 if (VM_MEMSEG_NAME(mseg)) { 281 sysmem = false; 282 } 283 error = vm_alloc_memseg(sc->vmm_vm, mseg->segid, mseg->len, sysmem); 284 285 if (error == 0) { 286 /* 287 * Rather than create a whole fresh device from which userspace 288 * can mmap this segment, instead make it available at an 289 * offset above where the main guest memory resides. 290 */ 291 error = vmmdev_devmem_create(sc, mseg, mseg->name); 292 if (error != 0) { 293 vm_free_memseg(sc->vmm_vm, mseg->segid); 294 } 295 } 296 return (error); 297 } 298 299 /* 300 * Resource Locking and Exclusion 301 * 302 * Much of bhyve depends on key portions of VM state, such as the guest memory 303 * map, to remain unchanged while the guest is running. As ported from 304 * FreeBSD, the initial strategy for this resource exclusion hinged on gating 305 * access to the instance vCPUs. Threads acting on a single vCPU, like those 306 * performing the work of actually running the guest in VMX/SVM, would lock 307 * only that vCPU during ioctl() entry. For ioctls which would change VM-wide 308 * state, all of the vCPUs would be first locked, ensuring that the 309 * operation(s) could complete without any other threads stumbling into 310 * intermediate states. 311 * 312 * This approach is largely effective for bhyve. Common operations, such as 313 * running the vCPUs, steer clear of lock contention. The model begins to 314 * break down for operations which do not occur in the context of a specific 315 * vCPU. LAPIC MSI delivery, for example, may be initiated from a worker 316 * thread in the bhyve process. In order to properly protect those vCPU-less 317 * operations from encountering invalid states, additional locking is required. 318 * This was solved by forcing those operations to lock the VM_MAXCPU-1 vCPU. 319 * It does mean that class of operations will be serialized on locking the 320 * specific vCPU and that instances sized at VM_MAXCPU will potentially see 321 * undue contention on the VM_MAXCPU-1 vCPU. 322 * 323 * In order to address the shortcomings of this model, the concept of a 324 * read/write lock has been added to bhyve. Operations which change 325 * fundamental aspects of a VM (such as the memory map) must acquire the write 326 * lock, which also implies locking all of the vCPUs and waiting for all read 327 * lock holders to release. While it increases the cost and waiting time for 328 * those few operations, it allows most hot-path operations on the VM (which 329 * depend on its configuration remaining stable) to occur with minimal locking. 330 * 331 * Consumers of the Driver API (see below) are a special case when it comes to 332 * this locking, since they may hold a read lock via the drv_lease mechanism 333 * for an extended period of time. Rather than forcing those consumers to 334 * continuously poll for a write lock attempt, the lease system forces them to 335 * provide a release callback to trigger their clean-up (and potential later 336 * reacquisition) of the read lock. 337 */ 338 339 static void 340 vcpu_lock_one(vmm_softc_t *sc, int vcpu) 341 { 342 ASSERT(vcpu >= 0 && vcpu < VM_MAXCPU); 343 344 /* 345 * Since this state transition is utilizing from_idle=true, it should 346 * not fail, but rather block until it can be successful. 347 */ 348 VERIFY0(vcpu_set_state(sc->vmm_vm, vcpu, VCPU_FROZEN, true)); 349 } 350 351 static void 352 vcpu_unlock_one(vmm_softc_t *sc, int vcpu) 353 { 354 ASSERT(vcpu >= 0 && vcpu < VM_MAXCPU); 355 356 VERIFY3U(vcpu_get_state(sc->vmm_vm, vcpu, NULL), ==, VCPU_FROZEN); 357 VERIFY0(vcpu_set_state(sc->vmm_vm, vcpu, VCPU_IDLE, false)); 358 } 359 360 static void 361 vmm_read_lock(vmm_softc_t *sc) 362 { 363 rw_enter(&sc->vmm_rwlock, RW_READER); 364 } 365 366 static void 367 vmm_read_unlock(vmm_softc_t *sc) 368 { 369 rw_exit(&sc->vmm_rwlock); 370 } 371 372 static void 373 vmm_write_lock(vmm_softc_t *sc) 374 { 375 int maxcpus; 376 377 /* First lock all the vCPUs */ 378 maxcpus = vm_get_maxcpus(sc->vmm_vm); 379 for (int vcpu = 0; vcpu < maxcpus; vcpu++) { 380 vcpu_lock_one(sc, vcpu); 381 } 382 383 /* 384 * Block vmm_drv leases from being acquired or held while the VM write 385 * lock is held. 386 */ 387 vmm_lease_block(sc); 388 389 rw_enter(&sc->vmm_rwlock, RW_WRITER); 390 /* 391 * For now, the 'maxcpus' value for an instance is fixed at the 392 * compile-time constant of VM_MAXCPU at creation. If this changes in 393 * the future, allowing for dynamic vCPU resource sizing, acquisition 394 * of the write lock will need to be wary of such changes. 395 */ 396 VERIFY(maxcpus == vm_get_maxcpus(sc->vmm_vm)); 397 } 398 399 static void 400 vmm_write_unlock(vmm_softc_t *sc) 401 { 402 int maxcpus; 403 404 /* Allow vmm_drv leases to be acquired once write lock is dropped */ 405 vmm_lease_unblock(sc); 406 407 /* 408 * The VM write lock _must_ be released from the same thread it was 409 * acquired in, unlike the read lock. 410 */ 411 VERIFY(rw_write_held(&sc->vmm_rwlock)); 412 rw_exit(&sc->vmm_rwlock); 413 414 /* Unlock all the vCPUs */ 415 maxcpus = vm_get_maxcpus(sc->vmm_vm); 416 for (int vcpu = 0; vcpu < maxcpus; vcpu++) { 417 vcpu_unlock_one(sc, vcpu); 418 } 419 } 420 421 static int 422 vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, 423 cred_t *credp, int *rvalp) 424 { 425 int error = 0, vcpu = -1; 426 void *datap = (void *)arg; 427 enum vm_lock_type { 428 LOCK_NONE = 0, 429 LOCK_VCPU, 430 LOCK_READ_HOLD, 431 LOCK_WRITE_HOLD 432 } lock_type = LOCK_NONE; 433 434 /* Acquire any exclusion resources needed for the operation. */ 435 switch (cmd) { 436 case VM_RUN: 437 case VM_GET_REGISTER: 438 case VM_SET_REGISTER: 439 case VM_GET_SEGMENT_DESCRIPTOR: 440 case VM_SET_SEGMENT_DESCRIPTOR: 441 case VM_GET_REGISTER_SET: 442 case VM_SET_REGISTER_SET: 443 case VM_INJECT_EXCEPTION: 444 case VM_GET_CAPABILITY: 445 case VM_SET_CAPABILITY: 446 case VM_PPTDEV_MSI: 447 case VM_PPTDEV_MSIX: 448 case VM_SET_X2APIC_STATE: 449 case VM_GLA2GPA: 450 case VM_GLA2GPA_NOFAULT: 451 case VM_ACTIVATE_CPU: 452 case VM_SET_INTINFO: 453 case VM_GET_INTINFO: 454 case VM_RESTART_INSTRUCTION: 455 case VM_SET_KERNEMU_DEV: 456 case VM_GET_KERNEMU_DEV: 457 case VM_RESET_CPU: 458 case VM_GET_RUN_STATE: 459 case VM_SET_RUN_STATE: 460 case VM_GET_FPU: 461 case VM_SET_FPU: 462 case VM_GET_CPUID: 463 case VM_SET_CPUID: 464 case VM_LEGACY_CPUID: 465 /* 466 * Copy in the ID of the vCPU chosen for this operation. 467 * Since a nefarious caller could update their struct between 468 * this locking and when the rest of the ioctl data is copied 469 * in, it is _critical_ that this local 'vcpu' variable be used 470 * rather than the in-struct one when performing the ioctl. 471 */ 472 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) { 473 return (EFAULT); 474 } 475 if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vmm_vm)) { 476 return (EINVAL); 477 } 478 vcpu_lock_one(sc, vcpu); 479 lock_type = LOCK_VCPU; 480 break; 481 482 case VM_REINIT: 483 case VM_BIND_PPTDEV: 484 case VM_UNBIND_PPTDEV: 485 case VM_MAP_PPTDEV_MMIO: 486 case VM_UNMAP_PPTDEV_MMIO: 487 case VM_ALLOC_MEMSEG: 488 case VM_MMAP_MEMSEG: 489 case VM_MUNMAP_MEMSEG: 490 case VM_WRLOCK_CYCLE: 491 case VM_PMTMR_LOCATE: 492 case VM_PAUSE: 493 case VM_RESUME: 494 vmm_write_lock(sc); 495 lock_type = LOCK_WRITE_HOLD; 496 break; 497 498 case VM_GET_MEMSEG: 499 case VM_MMAP_GETNEXT: 500 case VM_LAPIC_IRQ: 501 case VM_INJECT_NMI: 502 case VM_IOAPIC_ASSERT_IRQ: 503 case VM_IOAPIC_DEASSERT_IRQ: 504 case VM_IOAPIC_PULSE_IRQ: 505 case VM_LAPIC_MSI: 506 case VM_LAPIC_LOCAL_IRQ: 507 case VM_GET_X2APIC_STATE: 508 case VM_RTC_READ: 509 case VM_RTC_WRITE: 510 case VM_RTC_SETTIME: 511 case VM_RTC_GETTIME: 512 case VM_PPTDEV_DISABLE_MSIX: 513 case VM_DEVMEM_GETOFFSET: 514 case VM_TRACK_DIRTY_PAGES: 515 case VM_NPT_OPERATION: 516 vmm_read_lock(sc); 517 lock_type = LOCK_READ_HOLD; 518 break; 519 520 case VM_DATA_READ: 521 case VM_DATA_WRITE: 522 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) { 523 return (EFAULT); 524 } 525 if (vcpu == -1) { 526 /* Access data for VM-wide devices */ 527 vmm_write_lock(sc); 528 lock_type = LOCK_WRITE_HOLD; 529 } else if (vcpu >= 0 && vcpu < vm_get_maxcpus(sc->vmm_vm)) { 530 /* Access data associated with a specific vCPU */ 531 vcpu_lock_one(sc, vcpu); 532 lock_type = LOCK_VCPU; 533 } else { 534 return (EINVAL); 535 } 536 break; 537 538 case VM_GET_GPA_PMAP: 539 case VM_IOAPIC_PINCOUNT: 540 case VM_SUSPEND: 541 case VM_DESC_FPU_AREA: 542 case VM_SET_AUTODESTRUCT: 543 case VM_DESTROY_SELF: 544 case VM_DESTROY_PENDING: 545 case VM_VCPU_BARRIER: 546 default: 547 break; 548 } 549 550 /* Execute the primary logic for the ioctl. */ 551 switch (cmd) { 552 case VM_RUN: { 553 struct vm_entry entry; 554 555 if (ddi_copyin(datap, &entry, sizeof (entry), md)) { 556 error = EFAULT; 557 break; 558 } 559 560 if (!(curthread->t_schedflag & TS_VCPU)) 561 smt_mark_as_vcpu(); 562 563 error = vm_run(sc->vmm_vm, vcpu, &entry); 564 565 /* 566 * Unexpected states in vm_run() are expressed through positive 567 * errno-oriented return values. VM states which expect further 568 * processing in userspace (necessary context via exitinfo) are 569 * expressed through negative return values. For the time being 570 * a return value of 0 is not expected from vm_run(). 571 */ 572 ASSERT(error != 0); 573 if (error < 0) { 574 const struct vm_exit *vme; 575 void *outp = entry.exit_data; 576 577 error = 0; 578 vme = vm_exitinfo(sc->vmm_vm, vcpu); 579 if (ddi_copyout(vme, outp, sizeof (*vme), md)) { 580 error = EFAULT; 581 } 582 } 583 break; 584 } 585 case VM_SUSPEND: { 586 struct vm_suspend vmsuspend; 587 588 if (ddi_copyin(datap, &vmsuspend, sizeof (vmsuspend), md)) { 589 error = EFAULT; 590 break; 591 } 592 error = vm_suspend(sc->vmm_vm, vmsuspend.how, vmsuspend.source); 593 break; 594 } 595 case VM_REINIT: { 596 struct vm_reinit reinit; 597 598 if (ddi_copyin(datap, &reinit, sizeof (reinit), md)) { 599 error = EFAULT; 600 break; 601 } 602 if ((error = vmm_drv_block_hook(sc, B_TRUE)) != 0) { 603 /* 604 * The VM instance should be free of driver-attached 605 * hooks during the reinitialization process. 606 */ 607 break; 608 } 609 error = vm_reinit(sc->vmm_vm, reinit.flags); 610 (void) vmm_drv_block_hook(sc, B_FALSE); 611 break; 612 } 613 case VM_STAT_DESC: { 614 struct vm_stat_desc statdesc; 615 616 if (ddi_copyin(datap, &statdesc, sizeof (statdesc), md)) { 617 error = EFAULT; 618 break; 619 } 620 error = vmm_stat_desc_copy(statdesc.index, statdesc.desc, 621 sizeof (statdesc.desc)); 622 if (error == 0 && 623 ddi_copyout(&statdesc, datap, sizeof (statdesc), md)) { 624 error = EFAULT; 625 break; 626 } 627 break; 628 } 629 case VM_STATS_IOC: { 630 struct vm_stats vmstats; 631 632 if (ddi_copyin(datap, &vmstats, sizeof (vmstats), md)) { 633 error = EFAULT; 634 break; 635 } 636 hrt2tv(gethrtime(), &vmstats.tv); 637 error = vmm_stat_copy(sc->vmm_vm, vmstats.cpuid, vmstats.index, 638 nitems(vmstats.statbuf), 639 &vmstats.num_entries, vmstats.statbuf); 640 if (error == 0 && 641 ddi_copyout(&vmstats, datap, sizeof (vmstats), md)) { 642 error = EFAULT; 643 break; 644 } 645 break; 646 } 647 648 case VM_PPTDEV_MSI: { 649 struct vm_pptdev_msi pptmsi; 650 651 if (ddi_copyin(datap, &pptmsi, sizeof (pptmsi), md)) { 652 error = EFAULT; 653 break; 654 } 655 error = ppt_setup_msi(sc->vmm_vm, pptmsi.vcpu, pptmsi.pptfd, 656 pptmsi.addr, pptmsi.msg, pptmsi.numvec); 657 break; 658 } 659 case VM_PPTDEV_MSIX: { 660 struct vm_pptdev_msix pptmsix; 661 662 if (ddi_copyin(datap, &pptmsix, sizeof (pptmsix), md)) { 663 error = EFAULT; 664 break; 665 } 666 error = ppt_setup_msix(sc->vmm_vm, pptmsix.vcpu, pptmsix.pptfd, 667 pptmsix.idx, pptmsix.addr, pptmsix.msg, 668 pptmsix.vector_control); 669 break; 670 } 671 case VM_PPTDEV_DISABLE_MSIX: { 672 struct vm_pptdev pptdev; 673 674 if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) { 675 error = EFAULT; 676 break; 677 } 678 error = ppt_disable_msix(sc->vmm_vm, pptdev.pptfd); 679 break; 680 } 681 case VM_MAP_PPTDEV_MMIO: { 682 struct vm_pptdev_mmio pptmmio; 683 684 if (ddi_copyin(datap, &pptmmio, sizeof (pptmmio), md)) { 685 error = EFAULT; 686 break; 687 } 688 error = ppt_map_mmio(sc->vmm_vm, pptmmio.pptfd, pptmmio.gpa, 689 pptmmio.len, pptmmio.hpa); 690 break; 691 } 692 case VM_UNMAP_PPTDEV_MMIO: { 693 struct vm_pptdev_mmio pptmmio; 694 695 if (ddi_copyin(datap, &pptmmio, sizeof (pptmmio), md)) { 696 error = EFAULT; 697 break; 698 } 699 error = ppt_unmap_mmio(sc->vmm_vm, pptmmio.pptfd, pptmmio.gpa, 700 pptmmio.len); 701 break; 702 } 703 case VM_BIND_PPTDEV: { 704 struct vm_pptdev pptdev; 705 706 if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) { 707 error = EFAULT; 708 break; 709 } 710 error = vm_assign_pptdev(sc->vmm_vm, pptdev.pptfd); 711 break; 712 } 713 case VM_UNBIND_PPTDEV: { 714 struct vm_pptdev pptdev; 715 716 if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) { 717 error = EFAULT; 718 break; 719 } 720 error = vm_unassign_pptdev(sc->vmm_vm, pptdev.pptfd); 721 break; 722 } 723 case VM_GET_PPTDEV_LIMITS: { 724 struct vm_pptdev_limits pptlimits; 725 726 if (ddi_copyin(datap, &pptlimits, sizeof (pptlimits), md)) { 727 error = EFAULT; 728 break; 729 } 730 error = ppt_get_limits(sc->vmm_vm, pptlimits.pptfd, 731 &pptlimits.msi_limit, &pptlimits.msix_limit); 732 if (error == 0 && 733 ddi_copyout(&pptlimits, datap, sizeof (pptlimits), md)) { 734 error = EFAULT; 735 break; 736 } 737 break; 738 } 739 case VM_INJECT_EXCEPTION: { 740 struct vm_exception vmexc; 741 if (ddi_copyin(datap, &vmexc, sizeof (vmexc), md)) { 742 error = EFAULT; 743 break; 744 } 745 error = vm_inject_exception(sc->vmm_vm, vcpu, vmexc.vector, 746 vmexc.error_code_valid != 0, vmexc.error_code, 747 vmexc.restart_instruction != 0); 748 break; 749 } 750 case VM_INJECT_NMI: { 751 struct vm_nmi vmnmi; 752 753 if (ddi_copyin(datap, &vmnmi, sizeof (vmnmi), md)) { 754 error = EFAULT; 755 break; 756 } 757 error = vm_inject_nmi(sc->vmm_vm, vmnmi.cpuid); 758 break; 759 } 760 case VM_LAPIC_IRQ: { 761 struct vm_lapic_irq vmirq; 762 763 if (ddi_copyin(datap, &vmirq, sizeof (vmirq), md)) { 764 error = EFAULT; 765 break; 766 } 767 error = lapic_intr_edge(sc->vmm_vm, vmirq.cpuid, vmirq.vector); 768 break; 769 } 770 case VM_LAPIC_LOCAL_IRQ: { 771 struct vm_lapic_irq vmirq; 772 773 if (ddi_copyin(datap, &vmirq, sizeof (vmirq), md)) { 774 error = EFAULT; 775 break; 776 } 777 error = lapic_set_local_intr(sc->vmm_vm, vmirq.cpuid, 778 vmirq.vector); 779 break; 780 } 781 case VM_LAPIC_MSI: { 782 struct vm_lapic_msi vmmsi; 783 784 if (ddi_copyin(datap, &vmmsi, sizeof (vmmsi), md)) { 785 error = EFAULT; 786 break; 787 } 788 error = lapic_intr_msi(sc->vmm_vm, vmmsi.addr, vmmsi.msg); 789 break; 790 } 791 792 case VM_IOAPIC_ASSERT_IRQ: { 793 struct vm_ioapic_irq ioapic_irq; 794 795 if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) { 796 error = EFAULT; 797 break; 798 } 799 error = vioapic_assert_irq(sc->vmm_vm, ioapic_irq.irq); 800 break; 801 } 802 case VM_IOAPIC_DEASSERT_IRQ: { 803 struct vm_ioapic_irq ioapic_irq; 804 805 if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) { 806 error = EFAULT; 807 break; 808 } 809 error = vioapic_deassert_irq(sc->vmm_vm, ioapic_irq.irq); 810 break; 811 } 812 case VM_IOAPIC_PULSE_IRQ: { 813 struct vm_ioapic_irq ioapic_irq; 814 815 if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) { 816 error = EFAULT; 817 break; 818 } 819 error = vioapic_pulse_irq(sc->vmm_vm, ioapic_irq.irq); 820 break; 821 } 822 case VM_IOAPIC_PINCOUNT: { 823 int pincount; 824 825 pincount = vioapic_pincount(sc->vmm_vm); 826 if (ddi_copyout(&pincount, datap, sizeof (int), md)) { 827 error = EFAULT; 828 break; 829 } 830 break; 831 } 832 case VM_DESC_FPU_AREA: { 833 struct vm_fpu_desc desc; 834 void *buf = NULL; 835 836 if (ddi_copyin(datap, &desc, sizeof (desc), md)) { 837 error = EFAULT; 838 break; 839 } 840 if (desc.vfd_num_entries > 64) { 841 error = EINVAL; 842 break; 843 } 844 const size_t buf_sz = sizeof (struct vm_fpu_desc_entry) * 845 desc.vfd_num_entries; 846 if (buf_sz != 0) { 847 buf = kmem_zalloc(buf_sz, KM_SLEEP); 848 } 849 850 /* 851 * For now, we are depending on vm_fpu_desc_entry and 852 * hma_xsave_state_desc_t having the same format. 853 */ 854 CTASSERT(sizeof (struct vm_fpu_desc_entry) == 855 sizeof (hma_xsave_state_desc_t)); 856 857 size_t req_size; 858 const uint_t max_entries = hma_fpu_describe_xsave_state( 859 (hma_xsave_state_desc_t *)buf, 860 desc.vfd_num_entries, 861 &req_size); 862 863 desc.vfd_req_size = req_size; 864 desc.vfd_num_entries = max_entries; 865 if (buf_sz != 0) { 866 if (ddi_copyout(buf, desc.vfd_entry_data, buf_sz, md)) { 867 error = EFAULT; 868 } 869 kmem_free(buf, buf_sz); 870 } 871 872 if (error == 0) { 873 if (ddi_copyout(&desc, datap, sizeof (desc), md)) { 874 error = EFAULT; 875 } 876 } 877 break; 878 } 879 case VM_SET_AUTODESTRUCT: { 880 /* 881 * Since this has to do with controlling the lifetime of the 882 * greater vmm_softc_t, the flag is protected by vmm_mtx, rather 883 * than the vcpu-centric or rwlock exclusion mechanisms. 884 */ 885 mutex_enter(&vmm_mtx); 886 if (arg != 0) { 887 sc->vmm_flags |= VMM_AUTODESTROY; 888 } else { 889 sc->vmm_flags &= ~VMM_AUTODESTROY; 890 } 891 mutex_exit(&vmm_mtx); 892 break; 893 } 894 case VM_DESTROY_SELF: { 895 bool hma_release = false; 896 897 /* 898 * Just like VMM_DESTROY_VM, but on the instance file descriptor 899 * itself, rather than having to perform a racy name lookup as 900 * part of the destroy process. 901 * 902 * Since vmm_destroy_locked() performs vCPU lock acquisition in 903 * order to kick the vCPUs out of guest context as part of any 904 * destruction, we do not need to worry about it ourself using 905 * the `lock_type` logic here. 906 */ 907 mutex_enter(&vmm_mtx); 908 VERIFY0(vmm_destroy_locked(sc, VDO_DEFAULT, &hma_release)); 909 mutex_exit(&vmm_mtx); 910 if (hma_release) { 911 vmm_hma_release(); 912 } 913 break; 914 } 915 case VM_DESTROY_PENDING: { 916 /* 917 * If we have made it this far, then destruction of the instance 918 * has not been initiated. 919 */ 920 *rvalp = 0; 921 break; 922 } 923 924 case VM_ISA_ASSERT_IRQ: { 925 struct vm_isa_irq isa_irq; 926 927 if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) { 928 error = EFAULT; 929 break; 930 } 931 error = vatpic_assert_irq(sc->vmm_vm, isa_irq.atpic_irq); 932 if (error == 0 && isa_irq.ioapic_irq != -1) { 933 error = vioapic_assert_irq(sc->vmm_vm, 934 isa_irq.ioapic_irq); 935 } 936 break; 937 } 938 case VM_ISA_DEASSERT_IRQ: { 939 struct vm_isa_irq isa_irq; 940 941 if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) { 942 error = EFAULT; 943 break; 944 } 945 error = vatpic_deassert_irq(sc->vmm_vm, isa_irq.atpic_irq); 946 if (error == 0 && isa_irq.ioapic_irq != -1) { 947 error = vioapic_deassert_irq(sc->vmm_vm, 948 isa_irq.ioapic_irq); 949 } 950 break; 951 } 952 case VM_ISA_PULSE_IRQ: { 953 struct vm_isa_irq isa_irq; 954 955 if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) { 956 error = EFAULT; 957 break; 958 } 959 error = vatpic_pulse_irq(sc->vmm_vm, isa_irq.atpic_irq); 960 if (error == 0 && isa_irq.ioapic_irq != -1) { 961 error = vioapic_pulse_irq(sc->vmm_vm, 962 isa_irq.ioapic_irq); 963 } 964 break; 965 } 966 case VM_ISA_SET_IRQ_TRIGGER: { 967 struct vm_isa_irq_trigger isa_irq_trigger; 968 969 if (ddi_copyin(datap, &isa_irq_trigger, 970 sizeof (isa_irq_trigger), md)) { 971 error = EFAULT; 972 break; 973 } 974 error = vatpic_set_irq_trigger(sc->vmm_vm, 975 isa_irq_trigger.atpic_irq, isa_irq_trigger.trigger); 976 break; 977 } 978 979 case VM_MMAP_GETNEXT: { 980 struct vm_memmap mm; 981 982 if (ddi_copyin(datap, &mm, sizeof (mm), md)) { 983 error = EFAULT; 984 break; 985 } 986 error = vm_mmap_getnext(sc->vmm_vm, &mm.gpa, &mm.segid, 987 &mm.segoff, &mm.len, &mm.prot, &mm.flags); 988 if (error == 0 && ddi_copyout(&mm, datap, sizeof (mm), md)) { 989 error = EFAULT; 990 break; 991 } 992 break; 993 } 994 case VM_MMAP_MEMSEG: { 995 struct vm_memmap mm; 996 997 if (ddi_copyin(datap, &mm, sizeof (mm), md)) { 998 error = EFAULT; 999 break; 1000 } 1001 error = vm_mmap_memseg(sc->vmm_vm, mm.gpa, mm.segid, mm.segoff, 1002 mm.len, mm.prot, mm.flags); 1003 break; 1004 } 1005 case VM_MUNMAP_MEMSEG: { 1006 struct vm_munmap mu; 1007 1008 if (ddi_copyin(datap, &mu, sizeof (mu), md)) { 1009 error = EFAULT; 1010 break; 1011 } 1012 error = vm_munmap_memseg(sc->vmm_vm, mu.gpa, mu.len); 1013 break; 1014 } 1015 case VM_ALLOC_MEMSEG: { 1016 struct vm_memseg vmseg; 1017 1018 if (ddi_copyin(datap, &vmseg, sizeof (vmseg), md)) { 1019 error = EFAULT; 1020 break; 1021 } 1022 error = vmmdev_alloc_memseg(sc, &vmseg); 1023 break; 1024 } 1025 case VM_GET_MEMSEG: { 1026 struct vm_memseg vmseg; 1027 1028 if (ddi_copyin(datap, &vmseg, sizeof (vmseg), md)) { 1029 error = EFAULT; 1030 break; 1031 } 1032 error = vmmdev_get_memseg(sc, &vmseg); 1033 if (error == 0 && 1034 ddi_copyout(&vmseg, datap, sizeof (vmseg), md)) { 1035 error = EFAULT; 1036 break; 1037 } 1038 break; 1039 } 1040 case VM_GET_REGISTER: { 1041 struct vm_register vmreg; 1042 1043 if (ddi_copyin(datap, &vmreg, sizeof (vmreg), md)) { 1044 error = EFAULT; 1045 break; 1046 } 1047 error = vm_get_register(sc->vmm_vm, vcpu, vmreg.regnum, 1048 &vmreg.regval); 1049 if (error == 0 && 1050 ddi_copyout(&vmreg, datap, sizeof (vmreg), md)) { 1051 error = EFAULT; 1052 break; 1053 } 1054 break; 1055 } 1056 case VM_SET_REGISTER: { 1057 struct vm_register vmreg; 1058 1059 if (ddi_copyin(datap, &vmreg, sizeof (vmreg), md)) { 1060 error = EFAULT; 1061 break; 1062 } 1063 error = vm_set_register(sc->vmm_vm, vcpu, vmreg.regnum, 1064 vmreg.regval); 1065 break; 1066 } 1067 case VM_SET_SEGMENT_DESCRIPTOR: { 1068 struct vm_seg_desc vmsegd; 1069 1070 if (ddi_copyin(datap, &vmsegd, sizeof (vmsegd), md)) { 1071 error = EFAULT; 1072 break; 1073 } 1074 error = vm_set_seg_desc(sc->vmm_vm, vcpu, vmsegd.regnum, 1075 &vmsegd.desc); 1076 break; 1077 } 1078 case VM_GET_SEGMENT_DESCRIPTOR: { 1079 struct vm_seg_desc vmsegd; 1080 1081 if (ddi_copyin(datap, &vmsegd, sizeof (vmsegd), md)) { 1082 error = EFAULT; 1083 break; 1084 } 1085 error = vm_get_seg_desc(sc->vmm_vm, vcpu, vmsegd.regnum, 1086 &vmsegd.desc); 1087 if (error == 0 && 1088 ddi_copyout(&vmsegd, datap, sizeof (vmsegd), md)) { 1089 error = EFAULT; 1090 break; 1091 } 1092 break; 1093 } 1094 case VM_GET_REGISTER_SET: { 1095 struct vm_register_set vrs; 1096 int regnums[VM_REG_LAST]; 1097 uint64_t regvals[VM_REG_LAST]; 1098 1099 if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) { 1100 error = EFAULT; 1101 break; 1102 } 1103 if (vrs.count > VM_REG_LAST || vrs.count == 0) { 1104 error = EINVAL; 1105 break; 1106 } 1107 if (ddi_copyin(vrs.regnums, regnums, 1108 sizeof (int) * vrs.count, md)) { 1109 error = EFAULT; 1110 break; 1111 } 1112 1113 error = 0; 1114 for (uint_t i = 0; i < vrs.count && error == 0; i++) { 1115 if (regnums[i] < 0) { 1116 error = EINVAL; 1117 break; 1118 } 1119 error = vm_get_register(sc->vmm_vm, vcpu, regnums[i], 1120 ®vals[i]); 1121 } 1122 if (error == 0 && ddi_copyout(regvals, vrs.regvals, 1123 sizeof (uint64_t) * vrs.count, md)) { 1124 error = EFAULT; 1125 } 1126 break; 1127 } 1128 case VM_SET_REGISTER_SET: { 1129 struct vm_register_set vrs; 1130 int regnums[VM_REG_LAST]; 1131 uint64_t regvals[VM_REG_LAST]; 1132 1133 if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) { 1134 error = EFAULT; 1135 break; 1136 } 1137 if (vrs.count > VM_REG_LAST || vrs.count == 0) { 1138 error = EINVAL; 1139 break; 1140 } 1141 if (ddi_copyin(vrs.regnums, regnums, 1142 sizeof (int) * vrs.count, md)) { 1143 error = EFAULT; 1144 break; 1145 } 1146 if (ddi_copyin(vrs.regvals, regvals, 1147 sizeof (uint64_t) * vrs.count, md)) { 1148 error = EFAULT; 1149 break; 1150 } 1151 1152 error = 0; 1153 for (uint_t i = 0; i < vrs.count && error == 0; i++) { 1154 /* 1155 * Setting registers in a set is not atomic, since a 1156 * failure in the middle of the set will cause a 1157 * bail-out and inconsistent register state. Callers 1158 * should be wary of this. 1159 */ 1160 if (regnums[i] < 0) { 1161 error = EINVAL; 1162 break; 1163 } 1164 error = vm_set_register(sc->vmm_vm, vcpu, regnums[i], 1165 regvals[i]); 1166 } 1167 break; 1168 } 1169 case VM_RESET_CPU: { 1170 struct vm_vcpu_reset vvr; 1171 1172 if (ddi_copyin(datap, &vvr, sizeof (vvr), md)) { 1173 error = EFAULT; 1174 break; 1175 } 1176 if (vvr.kind != VRK_RESET && vvr.kind != VRK_INIT) { 1177 error = EINVAL; 1178 } 1179 1180 error = vcpu_arch_reset(sc->vmm_vm, vcpu, vvr.kind == VRK_INIT); 1181 break; 1182 } 1183 case VM_GET_RUN_STATE: { 1184 struct vm_run_state vrs; 1185 1186 bzero(&vrs, sizeof (vrs)); 1187 error = vm_get_run_state(sc->vmm_vm, vcpu, &vrs.state, 1188 &vrs.sipi_vector); 1189 if (error == 0) { 1190 if (ddi_copyout(&vrs, datap, sizeof (vrs), md)) { 1191 error = EFAULT; 1192 break; 1193 } 1194 } 1195 break; 1196 } 1197 case VM_SET_RUN_STATE: { 1198 struct vm_run_state vrs; 1199 1200 if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) { 1201 error = EFAULT; 1202 break; 1203 } 1204 error = vm_set_run_state(sc->vmm_vm, vcpu, vrs.state, 1205 vrs.sipi_vector); 1206 break; 1207 } 1208 case VM_GET_FPU: { 1209 struct vm_fpu_state req; 1210 const size_t max_len = (PAGESIZE * 2); 1211 void *kbuf; 1212 1213 if (ddi_copyin(datap, &req, sizeof (req), md)) { 1214 error = EFAULT; 1215 break; 1216 } 1217 if (req.len > max_len || req.len == 0) { 1218 error = EINVAL; 1219 break; 1220 } 1221 kbuf = kmem_zalloc(req.len, KM_SLEEP); 1222 error = vm_get_fpu(sc->vmm_vm, vcpu, kbuf, req.len); 1223 if (error == 0) { 1224 if (ddi_copyout(kbuf, req.buf, req.len, md)) { 1225 error = EFAULT; 1226 } 1227 } 1228 kmem_free(kbuf, req.len); 1229 break; 1230 } 1231 case VM_SET_FPU: { 1232 struct vm_fpu_state req; 1233 const size_t max_len = (PAGESIZE * 2); 1234 void *kbuf; 1235 1236 if (ddi_copyin(datap, &req, sizeof (req), md)) { 1237 error = EFAULT; 1238 break; 1239 } 1240 if (req.len > max_len || req.len == 0) { 1241 error = EINVAL; 1242 break; 1243 } 1244 kbuf = kmem_alloc(req.len, KM_SLEEP); 1245 if (ddi_copyin(req.buf, kbuf, req.len, md)) { 1246 error = EFAULT; 1247 } else { 1248 error = vm_set_fpu(sc->vmm_vm, vcpu, kbuf, req.len); 1249 } 1250 kmem_free(kbuf, req.len); 1251 break; 1252 } 1253 case VM_GET_CPUID: { 1254 struct vm_vcpu_cpuid_config cfg; 1255 struct vcpu_cpuid_entry *entries = NULL; 1256 1257 if (ddi_copyin(datap, &cfg, sizeof (cfg), md)) { 1258 error = EFAULT; 1259 break; 1260 } 1261 if (cfg.vvcc_nent > VMM_MAX_CPUID_ENTRIES) { 1262 error = EINVAL; 1263 break; 1264 } 1265 1266 const size_t entries_size = 1267 cfg.vvcc_nent * sizeof (struct vcpu_cpuid_entry); 1268 if (entries_size != 0) { 1269 entries = kmem_zalloc(entries_size, KM_SLEEP); 1270 } 1271 1272 vcpu_cpuid_config_t vm_cfg = { 1273 .vcc_nent = cfg.vvcc_nent, 1274 .vcc_entries = entries, 1275 }; 1276 error = vm_get_cpuid(sc->vmm_vm, vcpu, &vm_cfg); 1277 1278 /* 1279 * Only attempt to copy out the resultant entries if we were 1280 * able to query them from the instance. The flags and number 1281 * of entries are emitted regardless. 1282 */ 1283 cfg.vvcc_flags = vm_cfg.vcc_flags; 1284 cfg.vvcc_nent = vm_cfg.vcc_nent; 1285 if (entries != NULL) { 1286 if (error == 0 && ddi_copyout(entries, cfg.vvcc_entries, 1287 entries_size, md) != 0) { 1288 error = EFAULT; 1289 } 1290 1291 kmem_free(entries, entries_size); 1292 } 1293 1294 if (ddi_copyout(&cfg, datap, sizeof (cfg), md) != 0) { 1295 error = EFAULT; 1296 } 1297 break; 1298 } 1299 case VM_SET_CPUID: { 1300 struct vm_vcpu_cpuid_config cfg; 1301 struct vcpu_cpuid_entry *entries = NULL; 1302 size_t entries_size = 0; 1303 1304 if (ddi_copyin(datap, &cfg, sizeof (cfg), md)) { 1305 error = EFAULT; 1306 break; 1307 } 1308 if (cfg.vvcc_nent > VMM_MAX_CPUID_ENTRIES) { 1309 error = EFBIG; 1310 break; 1311 } 1312 if ((cfg.vvcc_flags & VCC_FLAG_LEGACY_HANDLING) != 0) { 1313 /* 1314 * If we are being instructed to use "legacy" handling, 1315 * then no entries should be provided, since the static 1316 * in-kernel masking will be used. 1317 */ 1318 if (cfg.vvcc_nent != 0) { 1319 error = EINVAL; 1320 break; 1321 } 1322 } else if (cfg.vvcc_nent != 0) { 1323 entries_size = 1324 cfg.vvcc_nent * sizeof (struct vcpu_cpuid_entry); 1325 entries = kmem_alloc(entries_size, KM_SLEEP); 1326 1327 if (ddi_copyin(cfg.vvcc_entries, entries, entries_size, 1328 md) != 0) { 1329 error = EFAULT; 1330 kmem_free(entries, entries_size); 1331 break; 1332 } 1333 } 1334 1335 vcpu_cpuid_config_t vm_cfg = { 1336 .vcc_flags = cfg.vvcc_flags, 1337 .vcc_nent = cfg.vvcc_nent, 1338 .vcc_entries = entries, 1339 }; 1340 error = vm_set_cpuid(sc->vmm_vm, vcpu, &vm_cfg); 1341 1342 if (entries != NULL) { 1343 kmem_free(entries, entries_size); 1344 } 1345 break; 1346 } 1347 case VM_LEGACY_CPUID: { 1348 struct vm_legacy_cpuid vlc; 1349 if (ddi_copyin(datap, &vlc, sizeof (vlc), md)) { 1350 error = EFAULT; 1351 break; 1352 } 1353 vlc.vlc_vcpuid = vcpu; 1354 1355 legacy_emulate_cpuid(sc->vmm_vm, vcpu, &vlc.vlc_eax, 1356 &vlc.vlc_ebx, &vlc.vlc_ecx, &vlc.vlc_edx); 1357 1358 if (ddi_copyout(&vlc, datap, sizeof (vlc), md)) { 1359 error = EFAULT; 1360 break; 1361 } 1362 break; 1363 } 1364 1365 case VM_SET_KERNEMU_DEV: 1366 case VM_GET_KERNEMU_DEV: { 1367 struct vm_readwrite_kernemu_device kemu; 1368 size_t size = 0; 1369 1370 if (ddi_copyin(datap, &kemu, sizeof (kemu), md)) { 1371 error = EFAULT; 1372 break; 1373 } 1374 1375 if (kemu.access_width > 3) { 1376 error = EINVAL; 1377 break; 1378 } 1379 size = (1 << kemu.access_width); 1380 ASSERT(size >= 1 && size <= 8); 1381 1382 if (cmd == VM_SET_KERNEMU_DEV) { 1383 error = vm_service_mmio_write(sc->vmm_vm, vcpu, 1384 kemu.gpa, kemu.value, size); 1385 } else { 1386 error = vm_service_mmio_read(sc->vmm_vm, vcpu, 1387 kemu.gpa, &kemu.value, size); 1388 } 1389 1390 if (error == 0) { 1391 if (ddi_copyout(&kemu, datap, sizeof (kemu), md)) { 1392 error = EFAULT; 1393 break; 1394 } 1395 } 1396 break; 1397 } 1398 1399 case VM_GET_CAPABILITY: { 1400 struct vm_capability vmcap; 1401 1402 if (ddi_copyin(datap, &vmcap, sizeof (vmcap), md)) { 1403 error = EFAULT; 1404 break; 1405 } 1406 error = vm_get_capability(sc->vmm_vm, vcpu, vmcap.captype, 1407 &vmcap.capval); 1408 if (error == 0 && 1409 ddi_copyout(&vmcap, datap, sizeof (vmcap), md)) { 1410 error = EFAULT; 1411 break; 1412 } 1413 break; 1414 } 1415 case VM_SET_CAPABILITY: { 1416 struct vm_capability vmcap; 1417 1418 if (ddi_copyin(datap, &vmcap, sizeof (vmcap), md)) { 1419 error = EFAULT; 1420 break; 1421 } 1422 error = vm_set_capability(sc->vmm_vm, vcpu, vmcap.captype, 1423 vmcap.capval); 1424 break; 1425 } 1426 case VM_SET_X2APIC_STATE: { 1427 struct vm_x2apic x2apic; 1428 1429 if (ddi_copyin(datap, &x2apic, sizeof (x2apic), md)) { 1430 error = EFAULT; 1431 break; 1432 } 1433 error = vm_set_x2apic_state(sc->vmm_vm, vcpu, x2apic.state); 1434 break; 1435 } 1436 case VM_GET_X2APIC_STATE: { 1437 struct vm_x2apic x2apic; 1438 1439 if (ddi_copyin(datap, &x2apic, sizeof (x2apic), md)) { 1440 error = EFAULT; 1441 break; 1442 } 1443 error = vm_get_x2apic_state(sc->vmm_vm, x2apic.cpuid, 1444 &x2apic.state); 1445 if (error == 0 && 1446 ddi_copyout(&x2apic, datap, sizeof (x2apic), md)) { 1447 error = EFAULT; 1448 break; 1449 } 1450 break; 1451 } 1452 case VM_GET_GPA_PMAP: { 1453 /* 1454 * Until there is a necessity to leak EPT/RVI PTE values to 1455 * userspace, this will remain unimplemented 1456 */ 1457 error = EINVAL; 1458 break; 1459 } 1460 case VM_GET_HPET_CAPABILITIES: { 1461 struct vm_hpet_cap hpetcap; 1462 1463 error = vhpet_getcap(&hpetcap); 1464 if (error == 0 && 1465 ddi_copyout(&hpetcap, datap, sizeof (hpetcap), md)) { 1466 error = EFAULT; 1467 break; 1468 } 1469 break; 1470 } 1471 case VM_GLA2GPA: { 1472 struct vm_gla2gpa gg; 1473 1474 if (ddi_copyin(datap, &gg, sizeof (gg), md)) { 1475 error = EFAULT; 1476 break; 1477 } 1478 gg.vcpuid = vcpu; 1479 error = vm_gla2gpa(sc->vmm_vm, vcpu, &gg.paging, gg.gla, 1480 gg.prot, &gg.gpa, &gg.fault); 1481 if (error == 0 && ddi_copyout(&gg, datap, sizeof (gg), md)) { 1482 error = EFAULT; 1483 break; 1484 } 1485 break; 1486 } 1487 case VM_GLA2GPA_NOFAULT: { 1488 struct vm_gla2gpa gg; 1489 1490 if (ddi_copyin(datap, &gg, sizeof (gg), md)) { 1491 error = EFAULT; 1492 break; 1493 } 1494 gg.vcpuid = vcpu; 1495 error = vm_gla2gpa_nofault(sc->vmm_vm, vcpu, &gg.paging, 1496 gg.gla, gg.prot, &gg.gpa, &gg.fault); 1497 if (error == 0 && ddi_copyout(&gg, datap, sizeof (gg), md)) { 1498 error = EFAULT; 1499 break; 1500 } 1501 break; 1502 } 1503 1504 case VM_ACTIVATE_CPU: 1505 error = vm_activate_cpu(sc->vmm_vm, vcpu); 1506 break; 1507 1508 case VM_SUSPEND_CPU: 1509 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) { 1510 error = EFAULT; 1511 } else { 1512 error = vm_suspend_cpu(sc->vmm_vm, vcpu); 1513 } 1514 break; 1515 1516 case VM_RESUME_CPU: 1517 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) { 1518 error = EFAULT; 1519 } else { 1520 error = vm_resume_cpu(sc->vmm_vm, vcpu); 1521 } 1522 break; 1523 1524 case VM_VCPU_BARRIER: 1525 vcpu = arg; 1526 error = vm_vcpu_barrier(sc->vmm_vm, vcpu); 1527 break; 1528 1529 case VM_GET_CPUS: { 1530 struct vm_cpuset vm_cpuset; 1531 cpuset_t tempset; 1532 void *srcp = &tempset; 1533 int size; 1534 1535 if (ddi_copyin(datap, &vm_cpuset, sizeof (vm_cpuset), md)) { 1536 error = EFAULT; 1537 break; 1538 } 1539 1540 /* Be more generous about sizing since our cpuset_t is large. */ 1541 size = vm_cpuset.cpusetsize; 1542 if (size <= 0 || size > sizeof (cpuset_t)) { 1543 error = ERANGE; 1544 } 1545 /* 1546 * If they want a ulong_t or less, make sure they receive the 1547 * low bits with all the useful information. 1548 */ 1549 if (size <= sizeof (tempset.cpub[0])) { 1550 srcp = &tempset.cpub[0]; 1551 } 1552 1553 if (vm_cpuset.which == VM_ACTIVE_CPUS) { 1554 tempset = vm_active_cpus(sc->vmm_vm); 1555 } else if (vm_cpuset.which == VM_DEBUG_CPUS) { 1556 tempset = vm_debug_cpus(sc->vmm_vm); 1557 } else { 1558 error = EINVAL; 1559 } 1560 1561 ASSERT(size > 0 && size <= sizeof (tempset)); 1562 if (error == 0 && 1563 ddi_copyout(srcp, vm_cpuset.cpus, size, md)) { 1564 error = EFAULT; 1565 break; 1566 } 1567 break; 1568 } 1569 case VM_SET_INTINFO: { 1570 struct vm_intinfo vmii; 1571 1572 if (ddi_copyin(datap, &vmii, sizeof (vmii), md)) { 1573 error = EFAULT; 1574 break; 1575 } 1576 error = vm_exit_intinfo(sc->vmm_vm, vcpu, vmii.info1); 1577 break; 1578 } 1579 case VM_GET_INTINFO: { 1580 struct vm_intinfo vmii; 1581 1582 vmii.vcpuid = vcpu; 1583 error = vm_get_intinfo(sc->vmm_vm, vcpu, &vmii.info1, 1584 &vmii.info2); 1585 if (error == 0 && 1586 ddi_copyout(&vmii, datap, sizeof (vmii), md)) { 1587 error = EFAULT; 1588 break; 1589 } 1590 break; 1591 } 1592 case VM_RTC_WRITE: { 1593 struct vm_rtc_data rtcdata; 1594 1595 if (ddi_copyin(datap, &rtcdata, sizeof (rtcdata), md)) { 1596 error = EFAULT; 1597 break; 1598 } 1599 error = vrtc_nvram_write(sc->vmm_vm, rtcdata.offset, 1600 rtcdata.value); 1601 break; 1602 } 1603 case VM_RTC_READ: { 1604 struct vm_rtc_data rtcdata; 1605 1606 if (ddi_copyin(datap, &rtcdata, sizeof (rtcdata), md)) { 1607 error = EFAULT; 1608 break; 1609 } 1610 error = vrtc_nvram_read(sc->vmm_vm, rtcdata.offset, 1611 &rtcdata.value); 1612 if (error == 0 && 1613 ddi_copyout(&rtcdata, datap, sizeof (rtcdata), md)) { 1614 error = EFAULT; 1615 break; 1616 } 1617 break; 1618 } 1619 case VM_RTC_SETTIME: { 1620 timespec_t ts; 1621 1622 if (ddi_copyin(datap, &ts, sizeof (ts), md)) { 1623 error = EFAULT; 1624 break; 1625 } 1626 error = vrtc_set_time(sc->vmm_vm, &ts); 1627 break; 1628 } 1629 case VM_RTC_GETTIME: { 1630 timespec_t ts; 1631 1632 vrtc_get_time(sc->vmm_vm, &ts); 1633 if (ddi_copyout(&ts, datap, sizeof (ts), md)) { 1634 error = EFAULT; 1635 break; 1636 } 1637 break; 1638 } 1639 1640 case VM_PMTMR_LOCATE: { 1641 uint16_t port = arg; 1642 error = vpmtmr_set_location(sc->vmm_vm, port); 1643 break; 1644 } 1645 1646 case VM_RESTART_INSTRUCTION: 1647 error = vm_restart_instruction(sc->vmm_vm, vcpu); 1648 break; 1649 1650 case VM_SET_TOPOLOGY: { 1651 struct vm_cpu_topology topo; 1652 1653 if (ddi_copyin(datap, &topo, sizeof (topo), md) != 0) { 1654 error = EFAULT; 1655 break; 1656 } 1657 error = vm_set_topology(sc->vmm_vm, topo.sockets, topo.cores, 1658 topo.threads, topo.maxcpus); 1659 break; 1660 } 1661 case VM_GET_TOPOLOGY: { 1662 struct vm_cpu_topology topo; 1663 1664 vm_get_topology(sc->vmm_vm, &topo.sockets, &topo.cores, 1665 &topo.threads, &topo.maxcpus); 1666 if (ddi_copyout(&topo, datap, sizeof (topo), md) != 0) { 1667 error = EFAULT; 1668 break; 1669 } 1670 break; 1671 } 1672 case VM_DEVMEM_GETOFFSET: { 1673 struct vm_devmem_offset vdo; 1674 vmm_devmem_entry_t *de; 1675 1676 if (ddi_copyin(datap, &vdo, sizeof (vdo), md) != 0) { 1677 error = EFAULT; 1678 break; 1679 } 1680 1681 de = vmmdev_devmem_find(sc, vdo.segid); 1682 if (de != NULL) { 1683 vdo.offset = de->vde_off; 1684 if (ddi_copyout(&vdo, datap, sizeof (vdo), md) != 0) { 1685 error = EFAULT; 1686 } 1687 } else { 1688 error = ENOENT; 1689 } 1690 break; 1691 } 1692 case VM_TRACK_DIRTY_PAGES: { 1693 const size_t max_track_region_len = 8 * PAGESIZE * 8 * PAGESIZE; 1694 struct vmm_dirty_tracker tracker; 1695 uint8_t *bitmap; 1696 size_t len; 1697 1698 if (ddi_copyin(datap, &tracker, sizeof (tracker), md) != 0) { 1699 error = EFAULT; 1700 break; 1701 } 1702 if ((tracker.vdt_start_gpa & PAGEOFFSET) != 0) { 1703 error = EINVAL; 1704 break; 1705 } 1706 if (tracker.vdt_len == 0) { 1707 break; 1708 } 1709 if ((tracker.vdt_len & PAGEOFFSET) != 0) { 1710 error = EINVAL; 1711 break; 1712 } 1713 if (tracker.vdt_len > max_track_region_len) { 1714 error = EINVAL; 1715 break; 1716 } 1717 len = roundup(tracker.vdt_len / PAGESIZE, 8) / 8; 1718 bitmap = kmem_zalloc(len, KM_SLEEP); 1719 error = vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa, 1720 tracker.vdt_len, bitmap); 1721 if (error == 0 && 1722 ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) { 1723 error = EFAULT; 1724 } 1725 kmem_free(bitmap, len); 1726 1727 break; 1728 } 1729 case VM_NPT_OPERATION: { 1730 struct vm_npt_operation vno; 1731 uint8_t *bitmap = NULL; 1732 uint64_t bitmap_size = 0; 1733 1734 if (ddi_copyin(datap, &vno, sizeof (vno), md) != 0) { 1735 error = EFAULT; 1736 break; 1737 } 1738 if ((vno.vno_gpa & PAGEOFFSET) != 0 || 1739 (vno.vno_len & PAGEOFFSET) != 0) { 1740 error = EINVAL; 1741 break; 1742 } 1743 if ((UINT64_MAX - vno.vno_len) < vno.vno_gpa) { 1744 error = EOVERFLOW; 1745 break; 1746 } 1747 1748 /* 1749 * Allocate a bitmap for the operation if it is specified as 1750 * part of the input or output. 1751 */ 1752 if ((vno.vno_operation & 1753 (VNO_FLAG_BITMAP_IN | VNO_FLAG_BITMAP_OUT)) != 0) { 1754 /* 1755 * Operations expecting data to be copied in or out 1756 * should not have zero length. 1757 */ 1758 if (vno.vno_len == 0) { 1759 error = EINVAL; 1760 break; 1761 } 1762 1763 /* 1764 * Maximum bitmap size of 8 pages results in 1 GiB of 1765 * coverage. 1766 */ 1767 const uint64_t max_bitmap_size = 8 * PAGESIZE; 1768 1769 bitmap_size = roundup(vno.vno_len / PAGESIZE, 8) / 8; 1770 if (bitmap_size > max_bitmap_size) { 1771 error = E2BIG; 1772 break; 1773 } 1774 bitmap = kmem_zalloc(bitmap_size, KM_SLEEP); 1775 } 1776 1777 if ((vno.vno_operation & VNO_FLAG_BITMAP_IN) != 0) { 1778 ASSERT(bitmap != NULL); 1779 if (ddi_copyin(vno.vno_bitmap, bitmap, bitmap_size, 1780 md) != 0) { 1781 error = EFAULT; 1782 } 1783 } 1784 1785 if (error == 0) { 1786 error = vm_npt_do_operation(sc->vmm_vm, vno.vno_gpa, 1787 vno.vno_len, vno.vno_operation, bitmap, rvalp); 1788 } 1789 1790 if ((vno.vno_operation & VNO_FLAG_BITMAP_OUT) != 0 && 1791 error == 0) { 1792 ASSERT(bitmap != NULL); 1793 if (ddi_copyout(bitmap, vno.vno_bitmap, bitmap_size, 1794 md) != 0) { 1795 error = EFAULT; 1796 } 1797 } 1798 1799 if (bitmap != NULL) { 1800 kmem_free(bitmap, bitmap_size); 1801 } 1802 1803 break; 1804 } 1805 case VM_WRLOCK_CYCLE: { 1806 /* 1807 * Present a test mechanism to acquire/release the write lock 1808 * on the VM without any other effects. 1809 */ 1810 break; 1811 } 1812 case VM_DATA_READ: { 1813 struct vm_data_xfer vdx; 1814 1815 if (ddi_copyin(datap, &vdx, sizeof (vdx), md) != 0) { 1816 error = EFAULT; 1817 break; 1818 } 1819 if ((vdx.vdx_flags & ~VDX_FLAGS_VALID) != 0) { 1820 error = EINVAL; 1821 break; 1822 } 1823 if (vdx.vdx_len > VM_DATA_XFER_LIMIT) { 1824 error = EFBIG; 1825 break; 1826 } 1827 1828 const size_t len = vdx.vdx_len; 1829 void *buf = NULL; 1830 if (len != 0) { 1831 const void *udata = vdx.vdx_data; 1832 1833 buf = kmem_alloc(len, KM_SLEEP); 1834 if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) == 0) { 1835 bzero(buf, len); 1836 } else if (ddi_copyin(udata, buf, len, md) != 0) { 1837 kmem_free(buf, len); 1838 error = EFAULT; 1839 break; 1840 } 1841 } 1842 1843 vdx.vdx_result_len = 0; 1844 vmm_data_req_t req = { 1845 .vdr_class = vdx.vdx_class, 1846 .vdr_version = vdx.vdx_version, 1847 .vdr_flags = vdx.vdx_flags, 1848 .vdr_len = len, 1849 .vdr_data = buf, 1850 .vdr_result_len = &vdx.vdx_result_len, 1851 .vdr_vcpuid = vdx.vdx_vcpuid, 1852 }; 1853 error = vmm_data_read(sc->vmm_vm, &req); 1854 1855 if (error == 0 && buf != NULL) { 1856 if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) { 1857 error = EFAULT; 1858 } 1859 } 1860 1861 /* 1862 * Copy out the transfer request so that the value of 1863 * vdx_result_len can be made available, regardless of any 1864 * error(s) which may have occurred. 1865 */ 1866 if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) { 1867 error = (error != 0) ? error : EFAULT; 1868 } 1869 1870 if (buf != NULL) { 1871 kmem_free(buf, len); 1872 } 1873 break; 1874 } 1875 case VM_DATA_WRITE: { 1876 struct vm_data_xfer vdx; 1877 1878 if (ddi_copyin(datap, &vdx, sizeof (vdx), md) != 0) { 1879 error = EFAULT; 1880 break; 1881 } 1882 if ((vdx.vdx_flags & ~VDX_FLAGS_VALID) != 0) { 1883 error = EINVAL; 1884 break; 1885 } 1886 if (vdx.vdx_len > VM_DATA_XFER_LIMIT) { 1887 error = EFBIG; 1888 break; 1889 } 1890 1891 const size_t len = vdx.vdx_len; 1892 void *buf = NULL; 1893 if (len != 0) { 1894 buf = kmem_alloc(len, KM_SLEEP); 1895 if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) { 1896 kmem_free(buf, len); 1897 error = EFAULT; 1898 break; 1899 } 1900 } 1901 1902 vdx.vdx_result_len = 0; 1903 vmm_data_req_t req = { 1904 .vdr_class = vdx.vdx_class, 1905 .vdr_version = vdx.vdx_version, 1906 .vdr_flags = vdx.vdx_flags, 1907 .vdr_len = len, 1908 .vdr_data = buf, 1909 .vdr_result_len = &vdx.vdx_result_len, 1910 .vdr_vcpuid = vdx.vdx_vcpuid, 1911 }; 1912 if (vmm_allow_state_writes != 0) { 1913 error = vmm_data_write(sc->vmm_vm, &req); 1914 } else { 1915 /* 1916 * Reject the write if somone has thrown the switch back 1917 * into the "disallow" position. 1918 */ 1919 error = EPERM; 1920 } 1921 1922 if (error == 0 && buf != NULL && 1923 (vdx.vdx_flags & VDX_FLAG_WRITE_COPYOUT) != 0) { 1924 if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) { 1925 error = EFAULT; 1926 } 1927 } 1928 1929 /* 1930 * Copy out the transfer request so that the value of 1931 * vdx_result_len can be made available, regardless of any 1932 * error(s) which may have occurred. 1933 */ 1934 if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) { 1935 error = (error != 0) ? error : EFAULT; 1936 } 1937 1938 if (buf != NULL) { 1939 kmem_free(buf, len); 1940 } 1941 break; 1942 } 1943 1944 case VM_PAUSE: { 1945 error = vm_pause_instance(sc->vmm_vm); 1946 break; 1947 } 1948 case VM_RESUME: { 1949 error = vm_resume_instance(sc->vmm_vm); 1950 break; 1951 } 1952 1953 default: 1954 error = ENOTTY; 1955 break; 1956 } 1957 1958 /* Release exclusion resources */ 1959 switch (lock_type) { 1960 case LOCK_NONE: 1961 break; 1962 case LOCK_VCPU: 1963 vcpu_unlock_one(sc, vcpu); 1964 break; 1965 case LOCK_READ_HOLD: 1966 vmm_read_unlock(sc); 1967 break; 1968 case LOCK_WRITE_HOLD: 1969 vmm_write_unlock(sc); 1970 break; 1971 default: 1972 panic("unexpected lock type"); 1973 break; 1974 } 1975 1976 return (error); 1977 } 1978 1979 static vmm_softc_t * 1980 vmm_lookup(const char *name) 1981 { 1982 list_t *vml = &vmm_list; 1983 vmm_softc_t *sc; 1984 1985 ASSERT(MUTEX_HELD(&vmm_mtx)); 1986 1987 for (sc = list_head(vml); sc != NULL; sc = list_next(vml, sc)) { 1988 if (strcmp(sc->vmm_name, name) == 0) { 1989 break; 1990 } 1991 } 1992 1993 return (sc); 1994 } 1995 1996 /* 1997 * Acquire an HMA registration if not already held. 1998 */ 1999 static boolean_t 2000 vmm_hma_acquire(void) 2001 { 2002 ASSERT(MUTEX_NOT_HELD(&vmm_mtx)); 2003 2004 mutex_enter(&vmmdev_mtx); 2005 2006 if (vmmdev_hma_reg == NULL) { 2007 VERIFY3U(vmmdev_hma_ref, ==, 0); 2008 vmmdev_hma_reg = hma_register(vmmdev_hvm_name); 2009 if (vmmdev_hma_reg == NULL) { 2010 cmn_err(CE_WARN, "%s HMA registration failed.", 2011 vmmdev_hvm_name); 2012 mutex_exit(&vmmdev_mtx); 2013 return (B_FALSE); 2014 } 2015 } 2016 2017 vmmdev_hma_ref++; 2018 2019 mutex_exit(&vmmdev_mtx); 2020 2021 return (B_TRUE); 2022 } 2023 2024 /* 2025 * Release the HMA registration if held and there are no remaining VMs. 2026 */ 2027 static void 2028 vmm_hma_release(void) 2029 { 2030 ASSERT(MUTEX_NOT_HELD(&vmm_mtx)); 2031 2032 mutex_enter(&vmmdev_mtx); 2033 2034 VERIFY3U(vmmdev_hma_ref, !=, 0); 2035 2036 vmmdev_hma_ref--; 2037 2038 if (vmmdev_hma_ref == 0) { 2039 VERIFY(vmmdev_hma_reg != NULL); 2040 hma_unregister(vmmdev_hma_reg); 2041 vmmdev_hma_reg = NULL; 2042 } 2043 mutex_exit(&vmmdev_mtx); 2044 } 2045 2046 static int 2047 vmmdev_do_vm_create(const struct vm_create_req *req, cred_t *cr) 2048 { 2049 vmm_softc_t *sc = NULL; 2050 minor_t minor; 2051 int error = ENOMEM; 2052 size_t len; 2053 const char *name = req->name; 2054 2055 len = strnlen(name, VM_MAX_NAMELEN); 2056 if (len == 0) { 2057 return (EINVAL); 2058 } 2059 if (len >= VM_MAX_NAMELEN) { 2060 return (ENAMETOOLONG); 2061 } 2062 if (strchr(name, '/') != NULL) { 2063 return (EINVAL); 2064 } 2065 2066 if (!vmm_hma_acquire()) 2067 return (ENXIO); 2068 2069 mutex_enter(&vmm_mtx); 2070 2071 /* Look for duplicate names */ 2072 if (vmm_lookup(name) != NULL) { 2073 mutex_exit(&vmm_mtx); 2074 vmm_hma_release(); 2075 return (EEXIST); 2076 } 2077 2078 /* Allow only one instance per non-global zone. */ 2079 if (!INGLOBALZONE(curproc)) { 2080 for (sc = list_head(&vmm_list); sc != NULL; 2081 sc = list_next(&vmm_list, sc)) { 2082 if (sc->vmm_zone == curzone) { 2083 mutex_exit(&vmm_mtx); 2084 vmm_hma_release(); 2085 return (EINVAL); 2086 } 2087 } 2088 } 2089 2090 minor = id_alloc(vmm_minors); 2091 if (ddi_soft_state_zalloc(vmm_statep, minor) != DDI_SUCCESS) { 2092 goto fail; 2093 } else if ((sc = ddi_get_soft_state(vmm_statep, minor)) == NULL) { 2094 ddi_soft_state_free(vmm_statep, minor); 2095 goto fail; 2096 } else if (ddi_create_minor_node(vmmdev_dip, name, S_IFCHR, minor, 2097 DDI_PSEUDO, 0) != DDI_SUCCESS) { 2098 goto fail; 2099 } 2100 2101 if (vmm_kstat_alloc(sc, minor, cr) != 0) { 2102 goto fail; 2103 } 2104 2105 error = vm_create(req->flags, &sc->vmm_vm); 2106 if (error == 0) { 2107 /* Complete VM intialization and report success. */ 2108 (void) strlcpy(sc->vmm_name, name, sizeof (sc->vmm_name)); 2109 sc->vmm_minor = minor; 2110 list_create(&sc->vmm_devmem_list, sizeof (vmm_devmem_entry_t), 2111 offsetof(vmm_devmem_entry_t, vde_node)); 2112 2113 list_create(&sc->vmm_holds, sizeof (vmm_hold_t), 2114 offsetof(vmm_hold_t, vmh_node)); 2115 cv_init(&sc->vmm_cv, NULL, CV_DEFAULT, NULL); 2116 2117 mutex_init(&sc->vmm_lease_lock, NULL, MUTEX_DEFAULT, NULL); 2118 list_create(&sc->vmm_lease_list, sizeof (vmm_lease_t), 2119 offsetof(vmm_lease_t, vml_node)); 2120 cv_init(&sc->vmm_lease_cv, NULL, CV_DEFAULT, NULL); 2121 rw_init(&sc->vmm_rwlock, NULL, RW_DEFAULT, NULL); 2122 2123 sc->vmm_zone = crgetzone(cr); 2124 zone_hold(sc->vmm_zone); 2125 vmm_zsd_add_vm(sc); 2126 vmm_kstat_init(sc); 2127 2128 list_insert_tail(&vmm_list, sc); 2129 mutex_exit(&vmm_mtx); 2130 return (0); 2131 } 2132 2133 vmm_kstat_fini(sc); 2134 ddi_remove_minor_node(vmmdev_dip, name); 2135 fail: 2136 id_free(vmm_minors, minor); 2137 if (sc != NULL) { 2138 ddi_soft_state_free(vmm_statep, minor); 2139 } 2140 mutex_exit(&vmm_mtx); 2141 vmm_hma_release(); 2142 2143 return (error); 2144 } 2145 2146 /* 2147 * Bhyve 'Driver' Interface 2148 * 2149 * While many devices are emulated in the bhyve userspace process, there are 2150 * others with performance constraints which require that they run mostly or 2151 * entirely in-kernel. For those not integrated directly into bhyve, an API is 2152 * needed so they can query/manipulate the portions of VM state needed to 2153 * fulfill their purpose. 2154 * 2155 * This includes: 2156 * - Translating guest-physical addresses to host-virtual pointers 2157 * - Injecting MSIs 2158 * - Hooking IO port addresses 2159 * 2160 * The vmm_drv interface exists to provide that functionality to its consumers. 2161 * (At this time, 'viona' is the only user) 2162 */ 2163 int 2164 vmm_drv_hold(file_t *fp, cred_t *cr, vmm_hold_t **holdp) 2165 { 2166 vnode_t *vp = fp->f_vnode; 2167 const dev_t dev = vp->v_rdev; 2168 vmm_softc_t *sc; 2169 vmm_hold_t *hold; 2170 int err = 0; 2171 2172 if (vp->v_type != VCHR) { 2173 return (ENXIO); 2174 } 2175 const major_t major = getmajor(dev); 2176 const minor_t minor = getminor(dev); 2177 2178 mutex_enter(&vmmdev_mtx); 2179 if (vmmdev_dip == NULL || major != ddi_driver_major(vmmdev_dip)) { 2180 mutex_exit(&vmmdev_mtx); 2181 return (ENOENT); 2182 } 2183 mutex_enter(&vmm_mtx); 2184 mutex_exit(&vmmdev_mtx); 2185 2186 if ((sc = ddi_get_soft_state(vmm_statep, minor)) == NULL) { 2187 err = ENOENT; 2188 goto out; 2189 } 2190 /* XXXJOY: check cred permissions against instance */ 2191 2192 if ((sc->vmm_flags & VMM_DESTROY) != 0) { 2193 err = EBUSY; 2194 goto out; 2195 } 2196 2197 hold = kmem_zalloc(sizeof (*hold), KM_SLEEP); 2198 hold->vmh_sc = sc; 2199 hold->vmh_release_req = B_FALSE; 2200 2201 list_insert_tail(&sc->vmm_holds, hold); 2202 sc->vmm_flags |= VMM_HELD; 2203 *holdp = hold; 2204 2205 out: 2206 mutex_exit(&vmm_mtx); 2207 return (err); 2208 } 2209 2210 void 2211 vmm_drv_rele(vmm_hold_t *hold) 2212 { 2213 vmm_softc_t *sc; 2214 bool hma_release = false; 2215 2216 ASSERT(hold != NULL); 2217 ASSERT(hold->vmh_sc != NULL); 2218 VERIFY(hold->vmh_ioport_hook_cnt == 0); 2219 2220 mutex_enter(&vmm_mtx); 2221 sc = hold->vmh_sc; 2222 list_remove(&sc->vmm_holds, hold); 2223 kmem_free(hold, sizeof (*hold)); 2224 2225 if (list_is_empty(&sc->vmm_holds)) { 2226 sc->vmm_flags &= ~VMM_HELD; 2227 2228 /* 2229 * Since outstanding holds would prevent instance destruction 2230 * from completing, attempt to finish it now if it was already 2231 * set in motion. 2232 */ 2233 if ((sc->vmm_flags & VMM_DESTROY) != 0) { 2234 VERIFY0(vmm_destroy_locked(sc, VDO_DEFAULT, 2235 &hma_release)); 2236 } 2237 } 2238 mutex_exit(&vmm_mtx); 2239 2240 if (hma_release) { 2241 vmm_hma_release(); 2242 } 2243 } 2244 2245 boolean_t 2246 vmm_drv_release_reqd(vmm_hold_t *hold) 2247 { 2248 ASSERT(hold != NULL); 2249 2250 return (hold->vmh_release_req); 2251 } 2252 2253 vmm_lease_t * 2254 vmm_drv_lease_sign(vmm_hold_t *hold, boolean_t (*expiref)(void *), void *arg) 2255 { 2256 vmm_softc_t *sc = hold->vmh_sc; 2257 vmm_lease_t *lease; 2258 2259 ASSERT3P(expiref, !=, NULL); 2260 2261 if (hold->vmh_release_req) { 2262 return (NULL); 2263 } 2264 2265 lease = kmem_alloc(sizeof (*lease), KM_SLEEP); 2266 list_link_init(&lease->vml_node); 2267 lease->vml_expire_func = expiref; 2268 lease->vml_expire_arg = arg; 2269 lease->vml_expired = B_FALSE; 2270 lease->vml_break_deferred = B_FALSE; 2271 lease->vml_hold = hold; 2272 /* cache the VM pointer for one less pointer chase */ 2273 lease->vml_vm = sc->vmm_vm; 2274 lease->vml_vmclient = vmspace_client_alloc(vm_get_vmspace(sc->vmm_vm)); 2275 2276 mutex_enter(&sc->vmm_lease_lock); 2277 while (sc->vmm_lease_blocker != 0) { 2278 cv_wait(&sc->vmm_lease_cv, &sc->vmm_lease_lock); 2279 } 2280 list_insert_tail(&sc->vmm_lease_list, lease); 2281 vmm_read_lock(sc); 2282 mutex_exit(&sc->vmm_lease_lock); 2283 2284 return (lease); 2285 } 2286 2287 static void 2288 vmm_lease_break_locked(vmm_softc_t *sc, vmm_lease_t *lease) 2289 { 2290 ASSERT(MUTEX_HELD(&sc->vmm_lease_lock)); 2291 2292 list_remove(&sc->vmm_lease_list, lease); 2293 vmm_read_unlock(sc); 2294 vmc_destroy(lease->vml_vmclient); 2295 kmem_free(lease, sizeof (*lease)); 2296 } 2297 2298 static void 2299 vmm_lease_block(vmm_softc_t *sc) 2300 { 2301 mutex_enter(&sc->vmm_lease_lock); 2302 VERIFY3U(sc->vmm_lease_blocker, !=, UINT_MAX); 2303 sc->vmm_lease_blocker++; 2304 if (sc->vmm_lease_blocker == 1) { 2305 list_t *list = &sc->vmm_lease_list; 2306 vmm_lease_t *lease = list_head(list); 2307 2308 while (lease != NULL) { 2309 void *arg = lease->vml_expire_arg; 2310 boolean_t (*expiref)(void *) = lease->vml_expire_func; 2311 boolean_t sync_break = B_FALSE; 2312 2313 /* 2314 * Since the lease expiration notification may 2315 * need to take locks which would deadlock with 2316 * vmm_lease_lock, drop it across the call. 2317 * 2318 * We are the only one allowed to manipulate 2319 * vmm_lease_list right now, so it is safe to 2320 * continue iterating through it after 2321 * reacquiring the lock. 2322 */ 2323 lease->vml_expired = B_TRUE; 2324 mutex_exit(&sc->vmm_lease_lock); 2325 sync_break = expiref(arg); 2326 mutex_enter(&sc->vmm_lease_lock); 2327 2328 if (sync_break) { 2329 vmm_lease_t *next; 2330 2331 /* 2332 * These leases which are synchronously broken 2333 * result in vmm_read_unlock() calls from a 2334 * different thread than the corresponding 2335 * vmm_read_lock(). This is acceptable, given 2336 * that the rwlock underpinning the whole 2337 * mechanism tolerates the behavior. This 2338 * flexibility is _only_ afforded to VM read 2339 * lock (RW_READER) holders. 2340 */ 2341 next = list_next(list, lease); 2342 vmm_lease_break_locked(sc, lease); 2343 lease = next; 2344 } else { 2345 lease = list_next(list, lease); 2346 } 2347 } 2348 2349 /* Process leases which were not broken synchronously. */ 2350 while (!list_is_empty(list)) { 2351 /* 2352 * Although the nested loops are quadratic, the number 2353 * of leases is small. 2354 */ 2355 lease = list_head(list); 2356 while (lease != NULL) { 2357 vmm_lease_t *next = list_next(list, lease); 2358 if (lease->vml_break_deferred) { 2359 vmm_lease_break_locked(sc, lease); 2360 } 2361 lease = next; 2362 } 2363 if (list_is_empty(list)) { 2364 break; 2365 } 2366 cv_wait(&sc->vmm_lease_cv, &sc->vmm_lease_lock); 2367 } 2368 /* Wake anyone else waiting for the lease list to be empty */ 2369 cv_broadcast(&sc->vmm_lease_cv); 2370 } else { 2371 list_t *list = &sc->vmm_lease_list; 2372 2373 /* 2374 * Some other thread beat us to the duty of lease cleanup. 2375 * Wait until that is complete. 2376 */ 2377 while (!list_is_empty(list)) { 2378 cv_wait(&sc->vmm_lease_cv, &sc->vmm_lease_lock); 2379 } 2380 } 2381 mutex_exit(&sc->vmm_lease_lock); 2382 } 2383 2384 static void 2385 vmm_lease_unblock(vmm_softc_t *sc) 2386 { 2387 mutex_enter(&sc->vmm_lease_lock); 2388 VERIFY3U(sc->vmm_lease_blocker, !=, 0); 2389 sc->vmm_lease_blocker--; 2390 if (sc->vmm_lease_blocker == 0) { 2391 cv_broadcast(&sc->vmm_lease_cv); 2392 } 2393 mutex_exit(&sc->vmm_lease_lock); 2394 } 2395 2396 void 2397 vmm_drv_lease_break(vmm_hold_t *hold, vmm_lease_t *lease) 2398 { 2399 vmm_softc_t *sc = hold->vmh_sc; 2400 2401 VERIFY3P(hold, ==, lease->vml_hold); 2402 VERIFY(!lease->vml_break_deferred); 2403 2404 mutex_enter(&sc->vmm_lease_lock); 2405 if (sc->vmm_lease_blocker == 0) { 2406 vmm_lease_break_locked(sc, lease); 2407 } else { 2408 /* 2409 * Defer the lease-breaking to whichever thread is currently 2410 * cleaning up all leases as part of a vmm_lease_block() call. 2411 */ 2412 lease->vml_break_deferred = B_TRUE; 2413 cv_broadcast(&sc->vmm_lease_cv); 2414 } 2415 mutex_exit(&sc->vmm_lease_lock); 2416 } 2417 2418 boolean_t 2419 vmm_drv_lease_expired(vmm_lease_t *lease) 2420 { 2421 return (lease->vml_expired); 2422 } 2423 2424 vmm_page_t * 2425 vmm_drv_page_hold(vmm_lease_t *lease, uintptr_t gpa, int prot) 2426 { 2427 ASSERT(lease != NULL); 2428 ASSERT0(gpa & PAGEOFFSET); 2429 2430 return ((vmm_page_t *)vmc_hold(lease->vml_vmclient, gpa, prot)); 2431 } 2432 2433 2434 /* Ensure that flags mirrored by vmm_drv interface properly match up */ 2435 CTASSERT(VMPF_DEFER_DIRTY == VPF_DEFER_DIRTY); 2436 2437 vmm_page_t * 2438 vmm_drv_page_hold_ext(vmm_lease_t *lease, uintptr_t gpa, int prot, int flags) 2439 { 2440 ASSERT(lease != NULL); 2441 ASSERT0(gpa & PAGEOFFSET); 2442 2443 vmm_page_t *page = 2444 (vmm_page_t *)vmc_hold_ext(lease->vml_vmclient, gpa, prot, flags); 2445 return (page); 2446 } 2447 2448 void 2449 vmm_drv_page_release(vmm_page_t *vmmp) 2450 { 2451 (void) vmp_release((vm_page_t *)vmmp); 2452 } 2453 2454 void 2455 vmm_drv_page_release_chain(vmm_page_t *vmmp) 2456 { 2457 (void) vmp_release_chain((vm_page_t *)vmmp); 2458 } 2459 2460 const void * 2461 vmm_drv_page_readable(const vmm_page_t *vmmp) 2462 { 2463 return (vmp_get_readable((const vm_page_t *)vmmp)); 2464 } 2465 2466 void * 2467 vmm_drv_page_writable(const vmm_page_t *vmmp) 2468 { 2469 return (vmp_get_writable((const vm_page_t *)vmmp)); 2470 } 2471 2472 void 2473 vmm_drv_page_mark_dirty(vmm_page_t *vmmp) 2474 { 2475 return (vmp_mark_dirty((vm_page_t *)vmmp)); 2476 } 2477 2478 void 2479 vmm_drv_page_chain(vmm_page_t *vmmp, vmm_page_t *to_chain) 2480 { 2481 vmp_chain((vm_page_t *)vmmp, (vm_page_t *)to_chain); 2482 } 2483 2484 vmm_page_t * 2485 vmm_drv_page_next(const vmm_page_t *vmmp) 2486 { 2487 return ((vmm_page_t *)vmp_next((vm_page_t *)vmmp)); 2488 } 2489 2490 int 2491 vmm_drv_msi(vmm_lease_t *lease, uint64_t addr, uint64_t msg) 2492 { 2493 ASSERT(lease != NULL); 2494 2495 return (lapic_intr_msi(lease->vml_vm, addr, msg)); 2496 } 2497 2498 int 2499 vmm_drv_ioport_hook(vmm_hold_t *hold, uint16_t ioport, vmm_drv_iop_cb_t func, 2500 void *arg, void **cookie) 2501 { 2502 vmm_softc_t *sc; 2503 int err; 2504 2505 ASSERT(hold != NULL); 2506 ASSERT(cookie != NULL); 2507 2508 sc = hold->vmh_sc; 2509 mutex_enter(&vmm_mtx); 2510 /* Confirm that hook installation is not blocked */ 2511 if ((sc->vmm_flags & VMM_BLOCK_HOOK) != 0) { 2512 mutex_exit(&vmm_mtx); 2513 return (EBUSY); 2514 } 2515 /* 2516 * Optimistically record an installed hook which will prevent a block 2517 * from being asserted while the mutex is dropped. 2518 */ 2519 hold->vmh_ioport_hook_cnt++; 2520 mutex_exit(&vmm_mtx); 2521 2522 vmm_write_lock(sc); 2523 err = vm_ioport_hook(sc->vmm_vm, ioport, (ioport_handler_t)func, 2524 arg, cookie); 2525 vmm_write_unlock(sc); 2526 2527 if (err != 0) { 2528 mutex_enter(&vmm_mtx); 2529 /* Walk back optimism about the hook installation */ 2530 hold->vmh_ioport_hook_cnt--; 2531 mutex_exit(&vmm_mtx); 2532 } 2533 return (err); 2534 } 2535 2536 void 2537 vmm_drv_ioport_unhook(vmm_hold_t *hold, void **cookie) 2538 { 2539 vmm_softc_t *sc; 2540 2541 ASSERT(hold != NULL); 2542 ASSERT(cookie != NULL); 2543 ASSERT(hold->vmh_ioport_hook_cnt != 0); 2544 2545 sc = hold->vmh_sc; 2546 vmm_write_lock(sc); 2547 vm_ioport_unhook(sc->vmm_vm, cookie); 2548 vmm_write_unlock(sc); 2549 2550 mutex_enter(&vmm_mtx); 2551 hold->vmh_ioport_hook_cnt--; 2552 mutex_exit(&vmm_mtx); 2553 } 2554 2555 static void 2556 vmm_drv_purge(vmm_softc_t *sc) 2557 { 2558 ASSERT(MUTEX_HELD(&vmm_mtx)); 2559 2560 if ((sc->vmm_flags & VMM_HELD) != 0) { 2561 vmm_hold_t *hold; 2562 2563 for (hold = list_head(&sc->vmm_holds); hold != NULL; 2564 hold = list_next(&sc->vmm_holds, hold)) { 2565 hold->vmh_release_req = B_TRUE; 2566 } 2567 2568 /* 2569 * Require that all leases on the instance be broken, now that 2570 * all associated holds have been marked as needing release. 2571 * 2572 * Dropping vmm_mtx is not strictly necessary, but if any of the 2573 * lessees are slow to respond, it would be nice to leave it 2574 * available for other parties. 2575 */ 2576 mutex_exit(&vmm_mtx); 2577 vmm_lease_block(sc); 2578 vmm_lease_unblock(sc); 2579 mutex_enter(&vmm_mtx); 2580 } 2581 } 2582 2583 static int 2584 vmm_drv_block_hook(vmm_softc_t *sc, boolean_t enable_block) 2585 { 2586 int err = 0; 2587 2588 mutex_enter(&vmm_mtx); 2589 if (!enable_block) { 2590 VERIFY((sc->vmm_flags & VMM_BLOCK_HOOK) != 0); 2591 2592 sc->vmm_flags &= ~VMM_BLOCK_HOOK; 2593 goto done; 2594 } 2595 2596 /* If any holds have hooks installed, the block is a failure */ 2597 if (!list_is_empty(&sc->vmm_holds)) { 2598 vmm_hold_t *hold; 2599 2600 for (hold = list_head(&sc->vmm_holds); hold != NULL; 2601 hold = list_next(&sc->vmm_holds, hold)) { 2602 if (hold->vmh_ioport_hook_cnt != 0) { 2603 err = EBUSY; 2604 goto done; 2605 } 2606 } 2607 } 2608 sc->vmm_flags |= VMM_BLOCK_HOOK; 2609 2610 done: 2611 mutex_exit(&vmm_mtx); 2612 return (err); 2613 } 2614 2615 2616 static void 2617 vmm_destroy_begin(vmm_softc_t *sc, vmm_destroy_opts_t opts) 2618 { 2619 ASSERT(MUTEX_HELD(&vmm_mtx)); 2620 ASSERT0(sc->vmm_flags & VMM_DESTROY); 2621 2622 sc->vmm_flags |= VMM_DESTROY; 2623 2624 /* 2625 * Lock and unlock all of the vCPUs to ensure that they are kicked out 2626 * of guest context, being unable to return now that the instance is 2627 * marked for destruction. 2628 */ 2629 const int maxcpus = vm_get_maxcpus(sc->vmm_vm); 2630 for (int vcpu = 0; vcpu < maxcpus; vcpu++) { 2631 vcpu_lock_one(sc, vcpu); 2632 vcpu_unlock_one(sc, vcpu); 2633 } 2634 2635 vmmdev_devmem_purge(sc); 2636 if ((opts & VDO_NO_CLEAN_ZSD) == 0) { 2637 /* 2638 * The ZSD should be cleaned up now, unless destruction of the 2639 * instance was initated by destruction of the containing zone, 2640 * in which case the ZSD has already been removed. 2641 */ 2642 vmm_zsd_rem_vm(sc); 2643 } 2644 zone_rele(sc->vmm_zone); 2645 2646 vmm_drv_purge(sc); 2647 } 2648 2649 static bool 2650 vmm_destroy_ready(vmm_softc_t *sc) 2651 { 2652 ASSERT(MUTEX_HELD(&vmm_mtx)); 2653 2654 if ((sc->vmm_flags & (VMM_HELD | VMM_IS_OPEN)) == 0) { 2655 VERIFY(list_is_empty(&sc->vmm_holds)); 2656 return (true); 2657 } 2658 2659 return (false); 2660 } 2661 2662 static void 2663 vmm_destroy_finish(vmm_softc_t *sc) 2664 { 2665 ASSERT(MUTEX_HELD(&vmm_mtx)); 2666 ASSERT(vmm_destroy_ready(sc)); 2667 2668 list_remove(&vmm_list, sc); 2669 vmm_kstat_fini(sc); 2670 vm_destroy(sc->vmm_vm); 2671 ddi_remove_minor_node(vmmdev_dip, sc->vmm_name); 2672 (void) devfs_clean(ddi_get_parent(vmmdev_dip), NULL, DV_CLEAN_FORCE); 2673 2674 const minor_t minor = sc->vmm_minor; 2675 ddi_soft_state_free(vmm_statep, minor); 2676 id_free(vmm_minors, minor); 2677 } 2678 2679 /* 2680 * Initiate or attempt to finish destruction of a VMM instance. 2681 * 2682 * This is called from several contexts: 2683 * - An explicit destroy ioctl is made 2684 * - A vmm_drv consumer releases its hold (being the last on the instance) 2685 * - The vmm device is closed, and auto-destruct is enabled 2686 */ 2687 static int 2688 vmm_destroy_locked(vmm_softc_t *sc, vmm_destroy_opts_t opts, 2689 bool *hma_release) 2690 { 2691 ASSERT(MUTEX_HELD(&vmm_mtx)); 2692 2693 *hma_release = false; 2694 2695 /* 2696 * When instance destruction begins, it is so marked such that any 2697 * further requests to operate the instance will fail. 2698 */ 2699 if ((sc->vmm_flags & VMM_DESTROY) == 0) { 2700 vmm_destroy_begin(sc, opts); 2701 } 2702 2703 if (vmm_destroy_ready(sc)) { 2704 2705 /* 2706 * Notify anyone waiting for the destruction to finish. They 2707 * must be clear before we can safely tear down the softc. 2708 */ 2709 if (sc->vmm_destroy_waiters != 0) { 2710 cv_broadcast(&sc->vmm_cv); 2711 while (sc->vmm_destroy_waiters != 0) { 2712 cv_wait(&sc->vmm_cv, &vmm_mtx); 2713 } 2714 } 2715 2716 /* 2717 * Finish destruction of instance. After this point, the softc 2718 * is freed and cannot be accessed again. 2719 * 2720 * With destruction complete, the HMA hold can be released 2721 */ 2722 vmm_destroy_finish(sc); 2723 *hma_release = true; 2724 return (0); 2725 } else if ((opts & VDO_ATTEMPT_WAIT) != 0) { 2726 int err = 0; 2727 2728 sc->vmm_destroy_waiters++; 2729 while (!vmm_destroy_ready(sc) && err == 0) { 2730 if (cv_wait_sig(&sc->vmm_cv, &vmm_mtx) <= 0) { 2731 err = EINTR; 2732 } 2733 } 2734 sc->vmm_destroy_waiters--; 2735 2736 if (sc->vmm_destroy_waiters == 0) { 2737 /* 2738 * If we were the last waiter, it could be that VM 2739 * destruction is waiting on _us_ to proceed with the 2740 * final clean-up. 2741 */ 2742 cv_signal(&sc->vmm_cv); 2743 } 2744 return (err); 2745 } else { 2746 /* 2747 * Since the instance is not ready for destruction, and the 2748 * caller did not ask to wait, consider it a success for now. 2749 */ 2750 return (0); 2751 } 2752 } 2753 2754 void 2755 vmm_zone_vm_destroy(vmm_softc_t *sc) 2756 { 2757 bool hma_release = false; 2758 int err; 2759 2760 mutex_enter(&vmm_mtx); 2761 err = vmm_destroy_locked(sc, VDO_NO_CLEAN_ZSD, &hma_release); 2762 mutex_exit(&vmm_mtx); 2763 2764 VERIFY0(err); 2765 2766 if (hma_release) { 2767 vmm_hma_release(); 2768 } 2769 } 2770 2771 static int 2772 vmmdev_do_vm_destroy(const struct vm_destroy_req *req, cred_t *cr) 2773 { 2774 vmm_softc_t *sc; 2775 bool hma_release = false; 2776 int err; 2777 2778 if (crgetuid(cr) != 0) { 2779 return (EPERM); 2780 } 2781 2782 mutex_enter(&vmm_mtx); 2783 sc = vmm_lookup(req->name); 2784 if (sc == NULL) { 2785 mutex_exit(&vmm_mtx); 2786 return (ENOENT); 2787 } 2788 /* 2789 * We don't check this in vmm_lookup() since that function is also used 2790 * for validation during create and currently vmm names must be unique. 2791 */ 2792 if (!INGLOBALZONE(curproc) && sc->vmm_zone != curzone) { 2793 mutex_exit(&vmm_mtx); 2794 return (EPERM); 2795 } 2796 2797 err = vmm_destroy_locked(sc, VDO_ATTEMPT_WAIT, &hma_release); 2798 mutex_exit(&vmm_mtx); 2799 2800 if (hma_release) { 2801 vmm_hma_release(); 2802 } 2803 2804 return (err); 2805 } 2806 2807 #define VCPU_NAME_BUFLEN 32 2808 2809 static int 2810 vmm_kstat_alloc(vmm_softc_t *sc, minor_t minor, const cred_t *cr) 2811 { 2812 zoneid_t zid = crgetzoneid(cr); 2813 int instance = minor; 2814 kstat_t *ksp; 2815 2816 ASSERT3P(sc->vmm_kstat_vm, ==, NULL); 2817 2818 ksp = kstat_create_zone(VMM_MODULE_NAME, instance, "vm", 2819 VMM_KSTAT_CLASS, KSTAT_TYPE_NAMED, 2820 sizeof (vmm_kstats_t) / sizeof (kstat_named_t), 0, zid); 2821 2822 if (ksp == NULL) { 2823 return (-1); 2824 } 2825 sc->vmm_kstat_vm = ksp; 2826 2827 for (uint_t i = 0; i < VM_MAXCPU; i++) { 2828 char namebuf[VCPU_NAME_BUFLEN]; 2829 2830 ASSERT3P(sc->vmm_kstat_vcpu[i], ==, NULL); 2831 2832 (void) snprintf(namebuf, VCPU_NAME_BUFLEN, "vcpu%u", i); 2833 ksp = kstat_create_zone(VMM_MODULE_NAME, instance, namebuf, 2834 VMM_KSTAT_CLASS, KSTAT_TYPE_NAMED, 2835 sizeof (vmm_vcpu_kstats_t) / sizeof (kstat_named_t), 2836 0, zid); 2837 if (ksp == NULL) { 2838 goto fail; 2839 } 2840 2841 sc->vmm_kstat_vcpu[i] = ksp; 2842 } 2843 2844 /* 2845 * If this instance is associated with a non-global zone, make its 2846 * kstats visible from the GZ. 2847 */ 2848 if (zid != GLOBAL_ZONEID) { 2849 kstat_zone_add(sc->vmm_kstat_vm, GLOBAL_ZONEID); 2850 for (uint_t i = 0; i < VM_MAXCPU; i++) { 2851 kstat_zone_add(sc->vmm_kstat_vcpu[i], GLOBAL_ZONEID); 2852 } 2853 } 2854 2855 return (0); 2856 2857 fail: 2858 for (uint_t i = 0; i < VM_MAXCPU; i++) { 2859 if (sc->vmm_kstat_vcpu[i] != NULL) { 2860 kstat_delete(sc->vmm_kstat_vcpu[i]); 2861 sc->vmm_kstat_vcpu[i] = NULL; 2862 } else { 2863 break; 2864 } 2865 } 2866 kstat_delete(sc->vmm_kstat_vm); 2867 sc->vmm_kstat_vm = NULL; 2868 return (-1); 2869 } 2870 2871 static void 2872 vmm_kstat_init(vmm_softc_t *sc) 2873 { 2874 kstat_t *ksp; 2875 2876 ASSERT3P(sc->vmm_vm, !=, NULL); 2877 ASSERT3P(sc->vmm_kstat_vm, !=, NULL); 2878 2879 ksp = sc->vmm_kstat_vm; 2880 vmm_kstats_t *vk = ksp->ks_data; 2881 ksp->ks_private = sc->vmm_vm; 2882 kstat_named_init(&vk->vk_name, "vm_name", KSTAT_DATA_STRING); 2883 kstat_named_setstr(&vk->vk_name, sc->vmm_name); 2884 2885 for (uint_t i = 0; i < VM_MAXCPU; i++) { 2886 ASSERT3P(sc->vmm_kstat_vcpu[i], !=, NULL); 2887 2888 ksp = sc->vmm_kstat_vcpu[i]; 2889 vmm_vcpu_kstats_t *vvk = ksp->ks_data; 2890 2891 kstat_named_init(&vvk->vvk_vcpu, "vcpu", KSTAT_DATA_UINT32); 2892 vvk->vvk_vcpu.value.ui32 = i; 2893 kstat_named_init(&vvk->vvk_time_init, "time_init", 2894 KSTAT_DATA_UINT64); 2895 kstat_named_init(&vvk->vvk_time_run, "time_run", 2896 KSTAT_DATA_UINT64); 2897 kstat_named_init(&vvk->vvk_time_idle, "time_idle", 2898 KSTAT_DATA_UINT64); 2899 kstat_named_init(&vvk->vvk_time_emu_kern, "time_emu_kern", 2900 KSTAT_DATA_UINT64); 2901 kstat_named_init(&vvk->vvk_time_emu_user, "time_emu_user", 2902 KSTAT_DATA_UINT64); 2903 kstat_named_init(&vvk->vvk_time_sched, "time_sched", 2904 KSTAT_DATA_UINT64); 2905 ksp->ks_private = sc->vmm_vm; 2906 ksp->ks_update = vmm_kstat_update_vcpu; 2907 } 2908 2909 kstat_install(sc->vmm_kstat_vm); 2910 for (uint_t i = 0; i < VM_MAXCPU; i++) { 2911 kstat_install(sc->vmm_kstat_vcpu[i]); 2912 } 2913 } 2914 2915 static void 2916 vmm_kstat_fini(vmm_softc_t *sc) 2917 { 2918 ASSERT(sc->vmm_kstat_vm != NULL); 2919 2920 kstat_delete(sc->vmm_kstat_vm); 2921 sc->vmm_kstat_vm = NULL; 2922 2923 for (uint_t i = 0; i < VM_MAXCPU; i++) { 2924 ASSERT3P(sc->vmm_kstat_vcpu[i], !=, NULL); 2925 2926 kstat_delete(sc->vmm_kstat_vcpu[i]); 2927 sc->vmm_kstat_vcpu[i] = NULL; 2928 } 2929 } 2930 2931 static int 2932 vmm_open(dev_t *devp, int flag, int otyp, cred_t *credp) 2933 { 2934 minor_t minor; 2935 vmm_softc_t *sc; 2936 2937 /* 2938 * Forbid running bhyve in a 32-bit process until it has been tested and 2939 * verified to be safe. 2940 */ 2941 if (curproc->p_model != DATAMODEL_LP64) { 2942 return (EFBIG); 2943 } 2944 2945 minor = getminor(*devp); 2946 if (minor == VMM_CTL_MINOR) { 2947 /* 2948 * Master control device must be opened exclusively. 2949 */ 2950 if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) { 2951 return (EINVAL); 2952 } 2953 2954 return (0); 2955 } 2956 2957 mutex_enter(&vmm_mtx); 2958 sc = ddi_get_soft_state(vmm_statep, minor); 2959 if (sc == NULL) { 2960 mutex_exit(&vmm_mtx); 2961 return (ENXIO); 2962 } 2963 2964 sc->vmm_flags |= VMM_IS_OPEN; 2965 mutex_exit(&vmm_mtx); 2966 2967 return (0); 2968 } 2969 2970 static int 2971 vmm_close(dev_t dev, int flag, int otyp, cred_t *credp) 2972 { 2973 const minor_t minor = getminor(dev); 2974 vmm_softc_t *sc; 2975 bool hma_release = false; 2976 2977 if (minor == VMM_CTL_MINOR) { 2978 return (0); 2979 } 2980 2981 mutex_enter(&vmm_mtx); 2982 sc = ddi_get_soft_state(vmm_statep, minor); 2983 if (sc == NULL) { 2984 mutex_exit(&vmm_mtx); 2985 return (ENXIO); 2986 } 2987 2988 VERIFY3U(sc->vmm_flags & VMM_IS_OPEN, !=, 0); 2989 sc->vmm_flags &= ~VMM_IS_OPEN; 2990 2991 /* 2992 * If instance was marked for auto-destruction begin that now. Instance 2993 * destruction may have been initated already, so try to make progress 2994 * in that case, since closure of the device is one of its requirements. 2995 */ 2996 if ((sc->vmm_flags & VMM_DESTROY) != 0 || 2997 (sc->vmm_flags & VMM_AUTODESTROY) != 0) { 2998 VERIFY0(vmm_destroy_locked(sc, VDO_DEFAULT, &hma_release)); 2999 } 3000 mutex_exit(&vmm_mtx); 3001 3002 if (hma_release) { 3003 vmm_hma_release(); 3004 } 3005 3006 return (0); 3007 } 3008 3009 static int 3010 vmm_is_supported(intptr_t arg) 3011 { 3012 int r; 3013 const char *msg; 3014 3015 if (vmm_is_intel()) { 3016 r = vmx_x86_supported(&msg); 3017 } else if (vmm_is_svm()) { 3018 /* 3019 * HMA already ensured that the features necessary for SVM 3020 * operation were present and online during vmm_attach(). 3021 */ 3022 r = 0; 3023 } else { 3024 r = ENXIO; 3025 msg = "Unsupported CPU vendor"; 3026 } 3027 3028 if (r != 0 && arg != (intptr_t)NULL) { 3029 if (copyoutstr(msg, (char *)arg, strlen(msg) + 1, NULL) != 0) 3030 return (EFAULT); 3031 } 3032 return (r); 3033 } 3034 3035 static int 3036 vmm_ctl_ioctl(int cmd, intptr_t arg, int md, cred_t *cr, int *rvalp) 3037 { 3038 void *argp = (void *)arg; 3039 3040 switch (cmd) { 3041 case VMM_CREATE_VM: { 3042 struct vm_create_req req; 3043 3044 if ((md & FWRITE) == 0) { 3045 return (EPERM); 3046 } 3047 if (ddi_copyin(argp, &req, sizeof (req), md) != 0) { 3048 return (EFAULT); 3049 } 3050 return (vmmdev_do_vm_create(&req, cr)); 3051 } 3052 case VMM_DESTROY_VM: { 3053 struct vm_destroy_req req; 3054 3055 if ((md & FWRITE) == 0) { 3056 return (EPERM); 3057 } 3058 if (ddi_copyin(argp, &req, sizeof (req), md) != 0) { 3059 return (EFAULT); 3060 } 3061 return (vmmdev_do_vm_destroy(&req, cr)); 3062 } 3063 case VMM_VM_SUPPORTED: 3064 return (vmm_is_supported(arg)); 3065 case VMM_CHECK_IOMMU: 3066 if (!vmm_check_iommu()) { 3067 return (ENXIO); 3068 } 3069 return (0); 3070 case VMM_RESV_QUERY: 3071 case VMM_RESV_SET_TARGET: 3072 return (vmmr_ioctl(cmd, arg, md, cr, rvalp)); 3073 default: 3074 break; 3075 } 3076 /* No other actions are legal on ctl device */ 3077 return (ENOTTY); 3078 } 3079 3080 static int 3081 vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 3082 int *rvalp) 3083 { 3084 vmm_softc_t *sc; 3085 minor_t minor; 3086 3087 /* 3088 * Forbid running bhyve in a 32-bit process until it has been tested and 3089 * verified to be safe. 3090 */ 3091 if (curproc->p_model != DATAMODEL_LP64) { 3092 return (EFBIG); 3093 } 3094 3095 /* The structs in bhyve ioctls assume a 64-bit datamodel */ 3096 if (ddi_model_convert_from(mode & FMODELS) != DDI_MODEL_NONE) { 3097 return (ENOTSUP); 3098 } 3099 3100 /* 3101 * Regardless of minor (vmmctl or instance), we respond to queries of 3102 * the interface version. 3103 */ 3104 if (cmd == VMM_INTERFACE_VERSION) { 3105 *rvalp = VMM_CURRENT_INTERFACE_VERSION; 3106 return (0); 3107 } 3108 3109 minor = getminor(dev); 3110 3111 if (minor == VMM_CTL_MINOR) { 3112 return (vmm_ctl_ioctl(cmd, arg, mode, credp, rvalp)); 3113 } 3114 3115 sc = ddi_get_soft_state(vmm_statep, minor); 3116 ASSERT(sc != NULL); 3117 3118 /* 3119 * Turn away any ioctls against an instance when it is being destroyed. 3120 * (Except for the ioctl inquiring about that destroy-in-progress.) 3121 */ 3122 if ((sc->vmm_flags & VMM_DESTROY) != 0) { 3123 if (cmd == VM_DESTROY_PENDING) { 3124 *rvalp = 1; 3125 return (0); 3126 } 3127 return (ENXIO); 3128 } 3129 3130 return (vmmdev_do_ioctl(sc, cmd, arg, mode, credp, rvalp)); 3131 } 3132 3133 static int 3134 vmm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 3135 unsigned int prot, unsigned int maxprot, unsigned int flags, cred_t *credp) 3136 { 3137 vmm_softc_t *sc; 3138 const minor_t minor = getminor(dev); 3139 int err; 3140 3141 if (minor == VMM_CTL_MINOR) { 3142 return (ENODEV); 3143 } 3144 if (off < 0 || (off + len) <= 0) { 3145 return (EINVAL); 3146 } 3147 if ((prot & PROT_USER) == 0) { 3148 return (EACCES); 3149 } 3150 3151 sc = ddi_get_soft_state(vmm_statep, minor); 3152 ASSERT(sc); 3153 3154 if (sc->vmm_flags & VMM_DESTROY) 3155 return (ENXIO); 3156 3157 /* Grab read lock on the VM to prevent any changes to the memory map */ 3158 vmm_read_lock(sc); 3159 3160 if (off >= VM_DEVMEM_START) { 3161 int segid; 3162 off_t segoff; 3163 3164 /* Mapping a devmem "device" */ 3165 if (!vmmdev_devmem_segid(sc, off, len, &segid, &segoff)) { 3166 err = ENODEV; 3167 } else { 3168 err = vm_segmap_obj(sc->vmm_vm, segid, segoff, len, as, 3169 addrp, prot, maxprot, flags); 3170 } 3171 } else { 3172 /* Mapping a part of the guest physical space */ 3173 err = vm_segmap_space(sc->vmm_vm, off, as, addrp, len, prot, 3174 maxprot, flags); 3175 } 3176 3177 vmm_read_unlock(sc); 3178 return (err); 3179 } 3180 3181 static sdev_plugin_validate_t 3182 vmm_sdev_validate(sdev_ctx_t ctx) 3183 { 3184 const char *name = sdev_ctx_name(ctx); 3185 vmm_softc_t *sc; 3186 sdev_plugin_validate_t ret; 3187 minor_t minor; 3188 3189 if (sdev_ctx_vtype(ctx) != VCHR) 3190 return (SDEV_VTOR_INVALID); 3191 3192 VERIFY3S(sdev_ctx_minor(ctx, &minor), ==, 0); 3193 3194 mutex_enter(&vmm_mtx); 3195 if ((sc = vmm_lookup(name)) == NULL) 3196 ret = SDEV_VTOR_INVALID; 3197 else if (sc->vmm_minor != minor) 3198 ret = SDEV_VTOR_STALE; 3199 else 3200 ret = SDEV_VTOR_VALID; 3201 mutex_exit(&vmm_mtx); 3202 3203 return (ret); 3204 } 3205 3206 static int 3207 vmm_sdev_filldir(sdev_ctx_t ctx) 3208 { 3209 vmm_softc_t *sc; 3210 int ret; 3211 3212 if (strcmp(sdev_ctx_path(ctx), VMM_SDEV_ROOT) != 0) { 3213 cmn_err(CE_WARN, "%s: bad path '%s' != '%s'\n", __func__, 3214 sdev_ctx_path(ctx), VMM_SDEV_ROOT); 3215 return (EINVAL); 3216 } 3217 3218 mutex_enter(&vmm_mtx); 3219 ASSERT(vmmdev_dip != NULL); 3220 for (sc = list_head(&vmm_list); sc != NULL; 3221 sc = list_next(&vmm_list, sc)) { 3222 if (INGLOBALZONE(curproc) || sc->vmm_zone == curzone) { 3223 ret = sdev_plugin_mknod(ctx, sc->vmm_name, 3224 S_IFCHR | 0600, 3225 makedevice(ddi_driver_major(vmmdev_dip), 3226 sc->vmm_minor)); 3227 } else { 3228 continue; 3229 } 3230 if (ret != 0 && ret != EEXIST) 3231 goto out; 3232 } 3233 3234 ret = 0; 3235 3236 out: 3237 mutex_exit(&vmm_mtx); 3238 return (ret); 3239 } 3240 3241 /* ARGSUSED */ 3242 static void 3243 vmm_sdev_inactive(sdev_ctx_t ctx) 3244 { 3245 } 3246 3247 static sdev_plugin_ops_t vmm_sdev_ops = { 3248 .spo_version = SDEV_PLUGIN_VERSION, 3249 .spo_flags = SDEV_PLUGIN_SUBDIR, 3250 .spo_validate = vmm_sdev_validate, 3251 .spo_filldir = vmm_sdev_filldir, 3252 .spo_inactive = vmm_sdev_inactive 3253 }; 3254 3255 /* ARGSUSED */ 3256 static int 3257 vmm_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 3258 { 3259 int error; 3260 3261 switch (cmd) { 3262 case DDI_INFO_DEVT2DEVINFO: 3263 *result = (void *)vmmdev_dip; 3264 error = DDI_SUCCESS; 3265 break; 3266 case DDI_INFO_DEVT2INSTANCE: 3267 *result = (void *)0; 3268 error = DDI_SUCCESS; 3269 break; 3270 default: 3271 error = DDI_FAILURE; 3272 break; 3273 } 3274 return (error); 3275 } 3276 3277 static int 3278 vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3279 { 3280 sdev_plugin_hdl_t sph; 3281 hma_reg_t *reg = NULL; 3282 boolean_t vmm_loaded = B_FALSE; 3283 3284 if (cmd != DDI_ATTACH) { 3285 return (DDI_FAILURE); 3286 } 3287 3288 mutex_enter(&vmmdev_mtx); 3289 /* Ensure we are not already attached. */ 3290 if (vmmdev_dip != NULL) { 3291 mutex_exit(&vmmdev_mtx); 3292 return (DDI_FAILURE); 3293 } 3294 3295 vmm_sol_glue_init(); 3296 3297 /* 3298 * Perform temporary HMA registration to determine if the system 3299 * is capable. 3300 */ 3301 if ((reg = hma_register(vmmdev_hvm_name)) == NULL) { 3302 goto fail; 3303 } else if (vmm_mod_load() != 0) { 3304 goto fail; 3305 } 3306 vmm_loaded = B_TRUE; 3307 hma_unregister(reg); 3308 reg = NULL; 3309 3310 /* Create control node. Other nodes will be created on demand. */ 3311 if (ddi_create_minor_node(dip, "ctl", S_IFCHR, 3312 VMM_CTL_MINOR, DDI_PSEUDO, 0) != 0) { 3313 goto fail; 3314 } 3315 3316 sph = sdev_plugin_register(VMM_MODULE_NAME, &vmm_sdev_ops, NULL); 3317 if (sph == (sdev_plugin_hdl_t)NULL) { 3318 ddi_remove_minor_node(dip, NULL); 3319 goto fail; 3320 } 3321 3322 ddi_report_dev(dip); 3323 vmmdev_sdev_hdl = sph; 3324 vmmdev_dip = dip; 3325 mutex_exit(&vmmdev_mtx); 3326 return (DDI_SUCCESS); 3327 3328 fail: 3329 if (vmm_loaded) { 3330 VERIFY0(vmm_mod_unload()); 3331 } 3332 if (reg != NULL) { 3333 hma_unregister(reg); 3334 } 3335 vmm_sol_glue_cleanup(); 3336 mutex_exit(&vmmdev_mtx); 3337 return (DDI_FAILURE); 3338 } 3339 3340 static int 3341 vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3342 { 3343 if (cmd != DDI_DETACH) { 3344 return (DDI_FAILURE); 3345 } 3346 3347 /* 3348 * Ensure that all resources have been cleaned up. 3349 * 3350 * To prevent a deadlock with iommu_cleanup() we'll fail the detach if 3351 * vmmdev_mtx is already held. We can't wait for vmmdev_mtx with our 3352 * devinfo locked as iommu_cleanup() tries to recursively lock each 3353 * devinfo, including our own, while holding vmmdev_mtx. 3354 */ 3355 if (mutex_tryenter(&vmmdev_mtx) == 0) 3356 return (DDI_FAILURE); 3357 3358 mutex_enter(&vmm_mtx); 3359 if (!list_is_empty(&vmm_list)) { 3360 mutex_exit(&vmm_mtx); 3361 mutex_exit(&vmmdev_mtx); 3362 return (DDI_FAILURE); 3363 } 3364 mutex_exit(&vmm_mtx); 3365 3366 if (!vmmr_is_empty()) { 3367 mutex_exit(&vmmdev_mtx); 3368 return (DDI_FAILURE); 3369 } 3370 3371 VERIFY(vmmdev_sdev_hdl != (sdev_plugin_hdl_t)NULL); 3372 if (sdev_plugin_unregister(vmmdev_sdev_hdl) != 0) { 3373 mutex_exit(&vmmdev_mtx); 3374 return (DDI_FAILURE); 3375 } 3376 vmmdev_sdev_hdl = (sdev_plugin_hdl_t)NULL; 3377 3378 /* Remove the control node. */ 3379 ddi_remove_minor_node(dip, "ctl"); 3380 vmmdev_dip = NULL; 3381 3382 VERIFY0(vmm_mod_unload()); 3383 VERIFY3U(vmmdev_hma_reg, ==, NULL); 3384 vmm_sol_glue_cleanup(); 3385 3386 mutex_exit(&vmmdev_mtx); 3387 3388 return (DDI_SUCCESS); 3389 } 3390 3391 static struct cb_ops vmm_cb_ops = { 3392 vmm_open, 3393 vmm_close, 3394 nodev, /* strategy */ 3395 nodev, /* print */ 3396 nodev, /* dump */ 3397 nodev, /* read */ 3398 nodev, /* write */ 3399 vmm_ioctl, 3400 nodev, /* devmap */ 3401 nodev, /* mmap */ 3402 vmm_segmap, 3403 nochpoll, /* poll */ 3404 ddi_prop_op, 3405 NULL, 3406 D_NEW | D_MP | D_DEVMAP 3407 }; 3408 3409 static struct dev_ops vmm_ops = { 3410 DEVO_REV, 3411 0, 3412 vmm_info, 3413 nulldev, /* identify */ 3414 nulldev, /* probe */ 3415 vmm_attach, 3416 vmm_detach, 3417 nodev, /* reset */ 3418 &vmm_cb_ops, 3419 (struct bus_ops *)NULL 3420 }; 3421 3422 static struct modldrv modldrv = { 3423 &mod_driverops, 3424 "bhyve vmm", 3425 &vmm_ops 3426 }; 3427 3428 static struct modlinkage modlinkage = { 3429 MODREV_1, 3430 &modldrv, 3431 NULL 3432 }; 3433 3434 int 3435 _init(void) 3436 { 3437 int error; 3438 3439 sysinit(); 3440 3441 mutex_init(&vmmdev_mtx, NULL, MUTEX_DRIVER, NULL); 3442 mutex_init(&vmm_mtx, NULL, MUTEX_DRIVER, NULL); 3443 list_create(&vmm_list, sizeof (vmm_softc_t), 3444 offsetof(vmm_softc_t, vmm_node)); 3445 vmm_minors = id_space_create("vmm_minors", VMM_CTL_MINOR + 1, MAXMIN32); 3446 3447 error = ddi_soft_state_init(&vmm_statep, sizeof (vmm_softc_t), 0); 3448 if (error) { 3449 return (error); 3450 } 3451 3452 error = vmmr_init(); 3453 if (error) { 3454 ddi_soft_state_fini(&vmm_statep); 3455 return (error); 3456 } 3457 3458 vmm_zsd_init(); 3459 3460 error = mod_install(&modlinkage); 3461 if (error) { 3462 ddi_soft_state_fini(&vmm_statep); 3463 vmm_zsd_fini(); 3464 vmmr_fini(); 3465 } 3466 3467 return (error); 3468 } 3469 3470 int 3471 _fini(void) 3472 { 3473 int error; 3474 3475 error = mod_remove(&modlinkage); 3476 if (error) { 3477 return (error); 3478 } 3479 3480 vmm_zsd_fini(); 3481 vmmr_fini(); 3482 3483 ddi_soft_state_fini(&vmm_statep); 3484 3485 return (0); 3486 } 3487 3488 int 3489 _info(struct modinfo *modinfop) 3490 { 3491 return (mod_info(&modlinkage, modinfop)); 3492 } 3493