/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. * Copyright 2024 Oxide Computer Company */ #ifndef _VMM_DEV_H_ #define _VMM_DEV_H_ #include #include #include #include struct vm_create_req { char name[VM_MAX_NAMELEN]; uint64_t flags; }; struct vm_destroy_req { char name[VM_MAX_NAMELEN]; }; struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ vm_ooffset_t segoff; /* offset into memory segment */ size_t len; /* mmap length */ int prot; /* RWX */ int flags; }; #define VM_MEMMAP_F_WIRED 0x01 #define VM_MEMMAP_F_IOMMU 0x02 struct vm_munmap { vm_paddr_t gpa; size_t len; }; #define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) struct vm_memseg { int segid; size_t len; char name[VM_MAX_SEG_NAMELEN]; }; struct vm_register { int cpuid; int regnum; /* enum vm_reg_name */ uint64_t regval; }; struct vm_seg_desc { /* data or code segment */ int cpuid; int regnum; /* enum vm_reg_name */ struct seg_desc desc; }; struct vm_register_set { int cpuid; unsigned int count; const int *regnums; /* enum vm_reg_name */ uint64_t *regvals; }; struct vm_exception { int cpuid; int vector; uint32_t error_code; int error_code_valid; int restart_instruction; }; struct vm_lapic_msi { uint64_t msg; uint64_t addr; }; struct vm_lapic_irq { int cpuid; int vector; }; struct vm_ioapic_irq { int irq; }; struct vm_isa_irq { int atpic_irq; int ioapic_irq; }; struct vm_isa_irq_trigger { int atpic_irq; enum vm_intr_trigger trigger; }; struct vm_capability { int cpuid; enum vm_cap_type captype; int capval; int allcpus; }; struct vm_pptdev { int pptfd; }; struct vm_pptdev_mmio { int pptfd; vm_paddr_t gpa; vm_paddr_t hpa; size_t len; }; struct vm_pptdev_msi { int vcpu; int pptfd; int numvec; /* 0 means disabled */ uint64_t msg; uint64_t addr; }; struct vm_pptdev_msix { int vcpu; int pptfd; int idx; uint64_t msg; uint32_t vector_control; uint64_t addr; }; struct vm_pptdev_limits { int pptfd; int msi_limit; int msix_limit; }; struct vm_nmi { int cpuid; }; #define MAX_VM_STATS 64 struct vm_stats { int cpuid; /* in */ int index; /* in */ int num_entries; /* out */ struct timeval tv; uint64_t statbuf[MAX_VM_STATS]; }; struct vm_stat_desc { int index; /* in */ char desc[128]; /* out */ }; struct vm_x2apic { int cpuid; enum x2apic_state state; }; struct vm_gpa_pte { uint64_t gpa; /* in */ uint64_t pte[4]; /* out */ int ptenum; }; struct vm_hpet_cap { uint32_t capabilities; /* lower 32 bits of HPET capabilities */ }; struct vm_suspend { enum vm_suspend_how how; int source; }; /* * Deprecated flags for vm_reinit`flags: * * Suspend (by force) VM as part of reinit. Effectively a no-op since * suspension requirements during reinit have been lifted. * * #define VM_REINIT_F_FORCE_SUSPEND (1 << 0) */ struct vm_reinit { uint64_t flags; }; struct vm_gla2gpa { int vcpuid; /* inputs */ int prot; /* PROT_READ or PROT_WRITE */ uint64_t gla; struct vm_guest_paging paging; int fault; /* outputs */ uint64_t gpa; }; struct vm_activate_cpu { int vcpuid; }; struct vm_cpuset { int which; int cpusetsize; #ifndef _KERNEL cpuset_t *cpus; #else void *cpus; #endif }; #define VM_ACTIVE_CPUS 0 /* * Deprecated: * #define VM_SUSPENDED_CPUS 1 */ #define VM_DEBUG_CPUS 2 struct vm_intinfo { int vcpuid; uint64_t info1; uint64_t info2; }; struct vm_rtc_data { int offset; uint8_t value; }; struct vm_devmem_offset { int segid; off_t offset; }; struct vm_cpu_topology { uint16_t sockets; uint16_t cores; uint16_t threads; uint16_t maxcpus; }; struct vm_readwrite_kernemu_device { int vcpuid; unsigned access_width : 3; unsigned _unused : 29; uint64_t gpa; uint64_t value; }; _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); enum vcpu_reset_kind { VRK_RESET = 0, /* * The reset performed by an INIT IPI clears much of the CPU state, but * some portions are left untouched, unlike VRK_RESET, which represents * a "full" reset as if the system was freshly powered on. */ VRK_INIT = 1, }; struct vm_vcpu_reset { int vcpuid; uint32_t kind; /* contains: enum vcpu_reset_kind */ }; struct vm_run_state { int vcpuid; uint32_t state; /* of enum cpu_init_status type */ uint8_t sipi_vector; /* vector of SIPI, if any */ uint8_t _pad[3]; }; /* Transfer data for VM_GET_FPU and VM_SET_FPU */ struct vm_fpu_state { int vcpuid; void *buf; size_t len; }; struct vm_fpu_desc_entry { uint64_t vfde_feature; uint32_t vfde_size; uint32_t vfde_off; }; struct vm_fpu_desc { struct vm_fpu_desc_entry *vfd_entry_data; size_t vfd_req_size; uint32_t vfd_num_entries; }; struct vmm_resv_query { size_t vrq_free_sz; size_t vrq_alloc_sz; size_t vrq_alloc_transient_sz; size_t vrq_limit; }; struct vmm_resv_target { /* Target size for VMM reservoir */ size_t vrt_target_sz; /* * Change of reservoir size to meet target will be done in multiple * steps of chunk size (or smaller) */ size_t vrt_chunk_sz; /* * Resultant size of reservoir after operation. Should match target * size, except when interrupted. */ size_t vrt_result_sz; }; /* * The VM_TRACK_DIRTY_PAGES ioctl uses the vmm_dirty_page_tracker struct as * input. That ioctl is deprecated in favor of VM_NPT_OPERATION, which exposes * equivalent functionality. * * - The `vdt_start_gpa` field specifies the offset from the beginning of * guest physical memory to track; * - `vdt_pfns` points to a bit vector indexed by guest PFN relative to the * given start address. Each bit indicates whether the given guest page * is dirty or not. * - `vdt_pfns_len` specifies the length of the of the guest physical memory * region in bytes. It also de facto bounds the range of guest addresses * we will examine on any one `VM_TRACK_DIRTY_PAGES` ioctl(). If the * range of the bit vector spans an unallocated region (or extends beyond * the end of the guest physical address space) the corresponding bits in * `vdt_pfns` will be zeroed. */ struct vmm_dirty_tracker { uint64_t vdt_start_gpa; size_t vdt_len; /* length of region */ void *vdt_pfns; /* bit vector of dirty bits */ }; /* * Perform an operation the nested page tables for the guest. * * The vno_operation field determines how (if at all) the other fields are used. * If the VNO_FLAG_BITMAP_IN or VNO_FLAG_BITMAP_OUT flags are present in * vno_operation, then vno_bitmap is expected to point to a region of memory * sized adequately (1 bit per page) for the region specified by vno_gpa and * vno_len. Presently that region size is limited to 1GiB (256k 4k pages). * * Several operations act on the entire guest memory space as whole, and thus * expect that no memory region (or bitmap) are provided. These operations are: * * - VNO_OP_GET_TRACK_DIRTY: Get status of dirty-page-tracking for the VM. * Return value of the ioctl will indicate the status (0 = off, 1 = on). * - VNO_OP_EN_TRACK_DIRTY: Enable dirty-page-tracking for the VM. Will emit an * error if such tracking is not supported by hardware. * - VNO_OP_DIS_TRACK_DIRTY: Disable dirty-page-tracking for the VM. * * The remaining operations act upon PTEs in the range specified by vno_gpa and * vno_len. * * If the VNO_FLAG_BITMAP_IN flag is set, the operation will be executed only * for pages with a corresponding bit set in the bitmap. When the flag is not * set, the operation is applied to all pages in the region specified by * vno_gpa/vno_len. * * For operations which yield per-page results, that will be returned to the * caller via the bitmap if the VNO_FLAG_BITMAP_OUT flag is set. Those * operations are as follows: * * - VNO_OP_GET_DIRTY: Gets the state of the dirty bit for the page(s) * - VNO_OP_RESET_DIRTY: Clears any existing dirty bit for the page(s), * returning it via the bitmap * - VNO_OP_SET_DIRTY: Asserts the state of the dirty bit for the page(s). This * is only performed for pages which are mapped into the guest as writable. * * The above bitmap operations on dirty bits in the NPTs are possible * independent of whether dirty-page-tracking is enabled for the vmspace. * Querying dirty bits from a vmspace without such tracking enabled will return * only bits which have been manually set via a preceding NPT operation. */ struct vm_npt_operation { uint64_t vno_gpa; uint64_t vno_len; uint8_t *vno_bitmap; uint32_t vno_operation; }; #define VNO_OP_RESET_DIRTY 0x1 #define VNO_OP_SET_DIRTY 0x2 #define VNO_OP_GET_DIRTY 0x3 #define VNO_OP_GET_TRACK_DIRTY 0x20 #define VNO_OP_EN_TRACK_DIRTY 0x21 #define VNO_OP_DIS_TRACK_DIRTY 0x22 #define VNO_FLAG_BITMAP_IN (1 << 30) #define VNO_FLAG_BITMAP_OUT (1 << 31) /* Current (arbitrary) max length for vm_data_xfer */ #define VM_DATA_XFER_LIMIT 8192 #define VDX_FLAG_READ_COPYIN (1 << 0) #define VDX_FLAG_WRITE_COPYOUT (1 << 1) #define VDX_FLAGS_VALID (VDX_FLAG_READ_COPYIN | VDX_FLAG_WRITE_COPYOUT) struct vm_data_xfer { int vdx_vcpuid; uint16_t vdx_class; uint16_t vdx_version; uint32_t vdx_flags; uint32_t vdx_len; uint32_t vdx_result_len; void *vdx_data; }; struct vm_vcpu_cpuid_config { int vvcc_vcpuid; uint32_t vvcc_flags; uint32_t vvcc_nent; uint32_t _pad; void *vvcc_entries; }; /* Query the computed legacy cpuid value for a vcpuid with VM_LEGACY_CPUID */ struct vm_legacy_cpuid { int vlc_vcpuid; uint32_t vlc_eax; uint32_t vlc_ebx; uint32_t vlc_ecx; uint32_t vlc_edx; }; /* * VMM Interface Version * * Despite the fact that the kernel interface to bhyve is explicitly considered * Private, there are out-of-gate consumers which utilize it. While they assume * the risk of any breakage incurred by changes to bhyve, we can at least try to * make it easier to detect changes by exposing a "version" of the interface. * It can also be used by the in-gate userland to detect if packaging updates * somehow result in the userland and kernel falling out of sync. * * There are no established criteria for the magnitude of change which requires * this version to be incremented, and maintenance of it is considered a * best-effort activity. Nothing is to be inferred about the magnitude of a * change when the version is modified. It follows no rules like semver. */ #define VMM_CURRENT_INTERFACE_VERSION 17 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) #define VMM_IOC_BASE (('v' << 16) | ('m' << 8)) #define VMM_LOCK_IOC_BASE (('v' << 16) | ('l' << 8)) #define VMM_CPU_IOC_BASE (('v' << 16) | ('p' << 8)) /* Operations performed on the vmmctl device */ #define VMM_CREATE_VM (VMMCTL_IOC_BASE | 0x01) #define VMM_DESTROY_VM (VMMCTL_IOC_BASE | 0x02) #define VMM_VM_SUPPORTED (VMMCTL_IOC_BASE | 0x03) #define VMM_INTERFACE_VERSION (VMMCTL_IOC_BASE | 0x04) #define VMM_CHECK_IOMMU (VMMCTL_IOC_BASE | 0x05) #define VMM_RESV_QUERY (VMMCTL_IOC_BASE | 0x10) #define VMM_RESV_SET_TARGET (VMMCTL_IOC_BASE | 0x11) /* Operations performed in the context of a given vCPU */ #define VM_RUN (VMM_CPU_IOC_BASE | 0x01) #define VM_SET_REGISTER (VMM_CPU_IOC_BASE | 0x02) #define VM_GET_REGISTER (VMM_CPU_IOC_BASE | 0x03) #define VM_SET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x04) #define VM_GET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x05) #define VM_SET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x06) #define VM_GET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x07) #define VM_INJECT_EXCEPTION (VMM_CPU_IOC_BASE | 0x08) #define VM_SET_CAPABILITY (VMM_CPU_IOC_BASE | 0x09) #define VM_GET_CAPABILITY (VMM_CPU_IOC_BASE | 0x0a) #define VM_PPTDEV_MSI (VMM_CPU_IOC_BASE | 0x0b) #define VM_PPTDEV_MSIX (VMM_CPU_IOC_BASE | 0x0c) #define VM_SET_X2APIC_STATE (VMM_CPU_IOC_BASE | 0x0d) #define VM_GLA2GPA (VMM_CPU_IOC_BASE | 0x0e) #define VM_GLA2GPA_NOFAULT (VMM_CPU_IOC_BASE | 0x0f) #define VM_ACTIVATE_CPU (VMM_CPU_IOC_BASE | 0x10) #define VM_SET_INTINFO (VMM_CPU_IOC_BASE | 0x11) #define VM_GET_INTINFO (VMM_CPU_IOC_BASE | 0x12) #define VM_RESTART_INSTRUCTION (VMM_CPU_IOC_BASE | 0x13) #define VM_SET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x14) #define VM_GET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x15) #define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16) #define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17) #define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18) #define VM_GET_FPU (VMM_CPU_IOC_BASE | 0x19) #define VM_SET_FPU (VMM_CPU_IOC_BASE | 0x1a) #define VM_GET_CPUID (VMM_CPU_IOC_BASE | 0x1b) #define VM_SET_CPUID (VMM_CPU_IOC_BASE | 0x1c) #define VM_LEGACY_CPUID (VMM_CPU_IOC_BASE | 0x1d) /* Operations requiring write-locking the VM */ #define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01) #define VM_BIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x02) #define VM_UNBIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x03) #define VM_MAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x04) #define VM_ALLOC_MEMSEG (VMM_LOCK_IOC_BASE | 0x05) #define VM_MMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x06) #define VM_PMTMR_LOCATE (VMM_LOCK_IOC_BASE | 0x07) #define VM_MUNMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x08) #define VM_UNMAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x09) #define VM_PAUSE (VMM_LOCK_IOC_BASE | 0x0a) #define VM_RESUME (VMM_LOCK_IOC_BASE | 0x0b) #define VM_WRLOCK_CYCLE (VMM_LOCK_IOC_BASE | 0xff) /* All other ioctls */ #define VM_GET_GPA_PMAP (VMM_IOC_BASE | 0x01) #define VM_GET_MEMSEG (VMM_IOC_BASE | 0x02) #define VM_MMAP_GETNEXT (VMM_IOC_BASE | 0x03) #define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04) #define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05) #define VM_LAPIC_MSI (VMM_IOC_BASE | 0x06) #define VM_IOAPIC_ASSERT_IRQ (VMM_IOC_BASE | 0x07) #define VM_IOAPIC_DEASSERT_IRQ (VMM_IOC_BASE | 0x08) #define VM_IOAPIC_PULSE_IRQ (VMM_IOC_BASE | 0x09) #define VM_ISA_ASSERT_IRQ (VMM_IOC_BASE | 0x0a) #define VM_ISA_DEASSERT_IRQ (VMM_IOC_BASE | 0x0b) #define VM_ISA_PULSE_IRQ (VMM_IOC_BASE | 0x0c) #define VM_ISA_SET_IRQ_TRIGGER (VMM_IOC_BASE | 0x0d) #define VM_RTC_WRITE (VMM_IOC_BASE | 0x0e) #define VM_RTC_READ (VMM_IOC_BASE | 0x0f) #define VM_RTC_SETTIME (VMM_IOC_BASE | 0x10) #define VM_RTC_GETTIME (VMM_IOC_BASE | 0x11) #define VM_SUSPEND (VMM_IOC_BASE | 0x12) #define VM_IOAPIC_PINCOUNT (VMM_IOC_BASE | 0x13) #define VM_GET_PPTDEV_LIMITS (VMM_IOC_BASE | 0x14) #define VM_GET_HPET_CAPABILITIES (VMM_IOC_BASE | 0x15) #define VM_STATS_IOC (VMM_IOC_BASE | 0x16) #define VM_STAT_DESC (VMM_IOC_BASE | 0x17) #define VM_INJECT_NMI (VMM_IOC_BASE | 0x18) #define VM_GET_X2APIC_STATE (VMM_IOC_BASE | 0x19) #define VM_SET_TOPOLOGY (VMM_IOC_BASE | 0x1a) #define VM_GET_TOPOLOGY (VMM_IOC_BASE | 0x1b) #define VM_GET_CPUS (VMM_IOC_BASE | 0x1c) #define VM_SUSPEND_CPU (VMM_IOC_BASE | 0x1d) #define VM_RESUME_CPU (VMM_IOC_BASE | 0x1e) #define VM_PPTDEV_DISABLE_MSIX (VMM_IOC_BASE | 0x1f) /* Note: forces a barrier on a flush operation before returning. */ #define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20) #define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21) #define VM_DATA_READ (VMM_IOC_BASE | 0x22) #define VM_DATA_WRITE (VMM_IOC_BASE | 0x23) #define VM_SET_AUTODESTRUCT (VMM_IOC_BASE | 0x24) #define VM_DESTROY_SELF (VMM_IOC_BASE | 0x25) #define VM_DESTROY_PENDING (VMM_IOC_BASE | 0x26) #define VM_VCPU_BARRIER (VMM_IOC_BASE | 0x27) #define VM_NPT_OPERATION (VMM_IOC_BASE | 0x28) #define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff) #define VMM_CTL_DEV "/dev/vmmctl" #endif