1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 * 38 * Copyright 2015 Pluribus Networks Inc. 39 * Copyright 2019 Joyent, Inc. 40 * Copyright 2023 Oxide Computer Company 41 */ 42 43 #ifndef _VMM_DEV_H_ 44 #define _VMM_DEV_H_ 45 46 #include <machine/vmm.h> 47 48 #include <sys/param.h> 49 #include <sys/cpuset.h> 50 #include <sys/vmm_data.h> 51 52 struct vm_create_req { 53 char name[VM_MAX_NAMELEN]; 54 uint64_t flags; 55 }; 56 57 58 struct vm_destroy_req { 59 char name[VM_MAX_NAMELEN]; 60 }; 61 62 struct vm_memmap { 63 vm_paddr_t gpa; 64 int segid; /* memory segment */ 65 vm_ooffset_t segoff; /* offset into memory segment */ 66 size_t len; /* mmap length */ 67 int prot; /* RWX */ 68 int flags; 69 }; 70 #define VM_MEMMAP_F_WIRED 0x01 71 #define VM_MEMMAP_F_IOMMU 0x02 72 73 struct vm_munmap { 74 vm_paddr_t gpa; 75 size_t len; 76 }; 77 78 #define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) 79 struct vm_memseg { 80 int segid; 81 size_t len; 82 char name[VM_MAX_SEG_NAMELEN]; 83 }; 84 85 struct vm_register { 86 int cpuid; 87 int regnum; /* enum vm_reg_name */ 88 uint64_t regval; 89 }; 90 91 struct vm_seg_desc { /* data or code segment */ 92 int cpuid; 93 int regnum; /* enum vm_reg_name */ 94 struct seg_desc desc; 95 }; 96 97 struct vm_register_set { 98 int cpuid; 99 unsigned int count; 100 const int *regnums; /* enum vm_reg_name */ 101 uint64_t *regvals; 102 }; 103 104 struct vm_exception { 105 int cpuid; 106 int vector; 107 uint32_t error_code; 108 int error_code_valid; 109 int restart_instruction; 110 }; 111 112 struct vm_lapic_msi { 113 uint64_t msg; 114 uint64_t addr; 115 }; 116 117 struct vm_lapic_irq { 118 int cpuid; 119 int vector; 120 }; 121 122 struct vm_ioapic_irq { 123 int irq; 124 }; 125 126 struct vm_isa_irq { 127 int atpic_irq; 128 int ioapic_irq; 129 }; 130 131 struct vm_isa_irq_trigger { 132 int atpic_irq; 133 enum vm_intr_trigger trigger; 134 }; 135 136 struct vm_capability { 137 int cpuid; 138 enum vm_cap_type captype; 139 int capval; 140 int allcpus; 141 }; 142 143 struct vm_pptdev { 144 int pptfd; 145 }; 146 147 struct vm_pptdev_mmio { 148 int pptfd; 149 vm_paddr_t gpa; 150 vm_paddr_t hpa; 151 size_t len; 152 }; 153 154 struct vm_pptdev_msi { 155 int vcpu; 156 int pptfd; 157 int numvec; /* 0 means disabled */ 158 uint64_t msg; 159 uint64_t addr; 160 }; 161 162 struct vm_pptdev_msix { 163 int vcpu; 164 int pptfd; 165 int idx; 166 uint64_t msg; 167 uint32_t vector_control; 168 uint64_t addr; 169 }; 170 171 struct vm_pptdev_limits { 172 int pptfd; 173 int msi_limit; 174 int msix_limit; 175 }; 176 177 struct vm_nmi { 178 int cpuid; 179 }; 180 181 #define MAX_VM_STATS 64 182 183 struct vm_stats { 184 int cpuid; /* in */ 185 int index; /* in */ 186 int num_entries; /* out */ 187 struct timeval tv; 188 uint64_t statbuf[MAX_VM_STATS]; 189 }; 190 191 struct vm_stat_desc { 192 int index; /* in */ 193 char desc[128]; /* out */ 194 }; 195 196 struct vm_x2apic { 197 int cpuid; 198 enum x2apic_state state; 199 }; 200 201 struct vm_gpa_pte { 202 uint64_t gpa; /* in */ 203 uint64_t pte[4]; /* out */ 204 int ptenum; 205 }; 206 207 struct vm_hpet_cap { 208 uint32_t capabilities; /* lower 32 bits of HPET capabilities */ 209 }; 210 211 struct vm_suspend { 212 enum vm_suspend_how how; 213 int source; 214 }; 215 216 /* 217 * Deprecated flags for vm_reinit`flags: 218 * 219 * Suspend (by force) VM as part of reinit. Effectively a no-op since 220 * suspension requirements during reinit have been lifted. 221 * 222 * #define VM_REINIT_F_FORCE_SUSPEND (1 << 0) 223 */ 224 225 struct vm_reinit { 226 uint64_t flags; 227 }; 228 229 struct vm_gla2gpa { 230 int vcpuid; /* inputs */ 231 int prot; /* PROT_READ or PROT_WRITE */ 232 uint64_t gla; 233 struct vm_guest_paging paging; 234 int fault; /* outputs */ 235 uint64_t gpa; 236 }; 237 238 struct vm_activate_cpu { 239 int vcpuid; 240 }; 241 242 struct vm_cpuset { 243 int which; 244 int cpusetsize; 245 #ifndef _KERNEL 246 cpuset_t *cpus; 247 #else 248 void *cpus; 249 #endif 250 }; 251 #define VM_ACTIVE_CPUS 0 252 /* 253 * Deprecated: 254 * #define VM_SUSPENDED_CPUS 1 255 */ 256 #define VM_DEBUG_CPUS 2 257 258 struct vm_intinfo { 259 int vcpuid; 260 uint64_t info1; 261 uint64_t info2; 262 }; 263 264 struct vm_rtc_data { 265 int offset; 266 uint8_t value; 267 }; 268 269 struct vm_devmem_offset { 270 int segid; 271 off_t offset; 272 }; 273 274 struct vm_cpu_topology { 275 uint16_t sockets; 276 uint16_t cores; 277 uint16_t threads; 278 uint16_t maxcpus; 279 }; 280 281 struct vm_readwrite_kernemu_device { 282 int vcpuid; 283 unsigned access_width : 3; 284 unsigned _unused : 29; 285 uint64_t gpa; 286 uint64_t value; 287 }; 288 _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); 289 290 enum vcpu_reset_kind { 291 VRK_RESET = 0, 292 /* 293 * The reset performed by an INIT IPI clears much of the CPU state, but 294 * some portions are left untouched, unlike VRK_RESET, which represents 295 * a "full" reset as if the system was freshly powered on. 296 */ 297 VRK_INIT = 1, 298 }; 299 300 struct vm_vcpu_reset { 301 int vcpuid; 302 uint32_t kind; /* contains: enum vcpu_reset_kind */ 303 }; 304 305 struct vm_run_state { 306 int vcpuid; 307 uint32_t state; /* of enum cpu_init_status type */ 308 uint8_t sipi_vector; /* vector of SIPI, if any */ 309 uint8_t _pad[3]; 310 }; 311 312 /* Transfer data for VM_GET_FPU and VM_SET_FPU */ 313 struct vm_fpu_state { 314 int vcpuid; 315 void *buf; 316 size_t len; 317 }; 318 319 struct vm_fpu_desc_entry { 320 uint64_t vfde_feature; 321 uint32_t vfde_size; 322 uint32_t vfde_off; 323 }; 324 325 struct vm_fpu_desc { 326 struct vm_fpu_desc_entry *vfd_entry_data; 327 size_t vfd_req_size; 328 uint32_t vfd_num_entries; 329 }; 330 331 struct vmm_resv_query { 332 size_t vrq_free_sz; 333 size_t vrq_alloc_sz; 334 size_t vrq_alloc_transient_sz; 335 size_t vrq_limit; 336 }; 337 338 struct vmm_resv_target { 339 /* Target size for VMM reservoir */ 340 size_t vrt_target_sz; 341 342 /* 343 * Change of reservoir size to meet target will be done in multiple 344 * steps of chunk size (or smaller) 345 */ 346 size_t vrt_chunk_sz; 347 348 /* 349 * Resultant size of reservoir after operation. Should match target 350 * size, except when interrupted. 351 */ 352 size_t vrt_result_sz; 353 }; 354 355 /* 356 * struct vmm_dirty_tracker is used for tracking dirty guest pages during 357 * e.g. live migration. 358 * 359 * - The `vdt_start_gpa` field specifies the offset from the beginning of 360 * guest physical memory to track; 361 * - `vdt_pfns` points to a bit vector indexed by guest PFN relative to the 362 * given start address. Each bit indicates whether the given guest page 363 * is dirty or not. 364 * - `vdt_pfns_len` specifies the length of the of the guest physical memory 365 * region in bytes. It also de facto bounds the range of guest addresses 366 * we will examine on any one `VM_TRACK_DIRTY_PAGES` ioctl(). If the 367 * range of the bit vector spans an unallocated region (or extends beyond 368 * the end of the guest physical address space) the corresponding bits in 369 * `vdt_pfns` will be zeroed. 370 */ 371 struct vmm_dirty_tracker { 372 uint64_t vdt_start_gpa; 373 size_t vdt_len; /* length of region */ 374 void *vdt_pfns; /* bit vector of dirty bits */ 375 }; 376 377 /* Current (arbitrary) max length for vm_data_xfer */ 378 #define VM_DATA_XFER_LIMIT 8192 379 380 #define VDX_FLAG_READ_COPYIN (1 << 0) 381 #define VDX_FLAG_WRITE_COPYOUT (1 << 1) 382 383 #define VDX_FLAGS_VALID (VDX_FLAG_READ_COPYIN | VDX_FLAG_WRITE_COPYOUT) 384 385 struct vm_data_xfer { 386 int vdx_vcpuid; 387 uint16_t vdx_class; 388 uint16_t vdx_version; 389 uint32_t vdx_flags; 390 uint32_t vdx_len; 391 uint32_t vdx_result_len; 392 void *vdx_data; 393 }; 394 395 struct vm_vcpu_cpuid_config { 396 int vvcc_vcpuid; 397 uint32_t vvcc_flags; 398 uint32_t vvcc_nent; 399 uint32_t _pad; 400 void *vvcc_entries; 401 }; 402 403 /* Query the computed legacy cpuid value for a vcpuid with VM_LEGACY_CPUID */ 404 struct vm_legacy_cpuid { 405 int vlc_vcpuid; 406 uint32_t vlc_eax; 407 uint32_t vlc_ebx; 408 uint32_t vlc_ecx; 409 uint32_t vlc_edx; 410 }; 411 412 /* 413 * VMM Interface Version 414 * 415 * Despite the fact that the kernel interface to bhyve is explicitly considered 416 * Private, there are out-of-gate consumers which utilize it. While they assume 417 * the risk of any breakage incurred by changes to bhyve, we can at least try to 418 * make it easier to detect changes by exposing a "version" of the interface. 419 * It can also be used by the in-gate userland to detect if packaging updates 420 * somehow result in the userland and kernel falling out of sync. 421 * 422 * There are no established criteria for the magnitude of change which requires 423 * this version to be incremented, and maintenance of it is considered a 424 * best-effort activity. Nothing is to be inferred about the magnitude of a 425 * change when the version is modified. It follows no rules like semver. 426 */ 427 #define VMM_CURRENT_INTERFACE_VERSION 16 428 429 430 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) 431 #define VMM_IOC_BASE (('v' << 16) | ('m' << 8)) 432 #define VMM_LOCK_IOC_BASE (('v' << 16) | ('l' << 8)) 433 #define VMM_CPU_IOC_BASE (('v' << 16) | ('p' << 8)) 434 435 /* Operations performed on the vmmctl device */ 436 #define VMM_CREATE_VM (VMMCTL_IOC_BASE | 0x01) 437 #define VMM_DESTROY_VM (VMMCTL_IOC_BASE | 0x02) 438 #define VMM_VM_SUPPORTED (VMMCTL_IOC_BASE | 0x03) 439 #define VMM_INTERFACE_VERSION (VMMCTL_IOC_BASE | 0x04) 440 #define VMM_CHECK_IOMMU (VMMCTL_IOC_BASE | 0x05) 441 442 #define VMM_RESV_QUERY (VMMCTL_IOC_BASE | 0x10) 443 #define VMM_RESV_SET_TARGET (VMMCTL_IOC_BASE | 0x11) 444 445 /* Operations performed in the context of a given vCPU */ 446 #define VM_RUN (VMM_CPU_IOC_BASE | 0x01) 447 #define VM_SET_REGISTER (VMM_CPU_IOC_BASE | 0x02) 448 #define VM_GET_REGISTER (VMM_CPU_IOC_BASE | 0x03) 449 #define VM_SET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x04) 450 #define VM_GET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x05) 451 #define VM_SET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x06) 452 #define VM_GET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x07) 453 #define VM_INJECT_EXCEPTION (VMM_CPU_IOC_BASE | 0x08) 454 #define VM_SET_CAPABILITY (VMM_CPU_IOC_BASE | 0x09) 455 #define VM_GET_CAPABILITY (VMM_CPU_IOC_BASE | 0x0a) 456 #define VM_PPTDEV_MSI (VMM_CPU_IOC_BASE | 0x0b) 457 #define VM_PPTDEV_MSIX (VMM_CPU_IOC_BASE | 0x0c) 458 #define VM_SET_X2APIC_STATE (VMM_CPU_IOC_BASE | 0x0d) 459 #define VM_GLA2GPA (VMM_CPU_IOC_BASE | 0x0e) 460 #define VM_GLA2GPA_NOFAULT (VMM_CPU_IOC_BASE | 0x0f) 461 #define VM_ACTIVATE_CPU (VMM_CPU_IOC_BASE | 0x10) 462 #define VM_SET_INTINFO (VMM_CPU_IOC_BASE | 0x11) 463 #define VM_GET_INTINFO (VMM_CPU_IOC_BASE | 0x12) 464 #define VM_RESTART_INSTRUCTION (VMM_CPU_IOC_BASE | 0x13) 465 #define VM_SET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x14) 466 #define VM_GET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x15) 467 #define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16) 468 #define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17) 469 #define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18) 470 #define VM_GET_FPU (VMM_CPU_IOC_BASE | 0x19) 471 #define VM_SET_FPU (VMM_CPU_IOC_BASE | 0x1a) 472 #define VM_GET_CPUID (VMM_CPU_IOC_BASE | 0x1b) 473 #define VM_SET_CPUID (VMM_CPU_IOC_BASE | 0x1c) 474 #define VM_LEGACY_CPUID (VMM_CPU_IOC_BASE | 0x1d) 475 476 /* Operations requiring write-locking the VM */ 477 #define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01) 478 #define VM_BIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x02) 479 #define VM_UNBIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x03) 480 #define VM_MAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x04) 481 #define VM_ALLOC_MEMSEG (VMM_LOCK_IOC_BASE | 0x05) 482 #define VM_MMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x06) 483 #define VM_PMTMR_LOCATE (VMM_LOCK_IOC_BASE | 0x07) 484 #define VM_MUNMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x08) 485 #define VM_UNMAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x09) 486 #define VM_PAUSE (VMM_LOCK_IOC_BASE | 0x0a) 487 #define VM_RESUME (VMM_LOCK_IOC_BASE | 0x0b) 488 489 #define VM_WRLOCK_CYCLE (VMM_LOCK_IOC_BASE | 0xff) 490 491 /* All other ioctls */ 492 #define VM_GET_GPA_PMAP (VMM_IOC_BASE | 0x01) 493 #define VM_GET_MEMSEG (VMM_IOC_BASE | 0x02) 494 #define VM_MMAP_GETNEXT (VMM_IOC_BASE | 0x03) 495 496 #define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04) 497 #define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05) 498 #define VM_LAPIC_MSI (VMM_IOC_BASE | 0x06) 499 500 #define VM_IOAPIC_ASSERT_IRQ (VMM_IOC_BASE | 0x07) 501 #define VM_IOAPIC_DEASSERT_IRQ (VMM_IOC_BASE | 0x08) 502 #define VM_IOAPIC_PULSE_IRQ (VMM_IOC_BASE | 0x09) 503 504 #define VM_ISA_ASSERT_IRQ (VMM_IOC_BASE | 0x0a) 505 #define VM_ISA_DEASSERT_IRQ (VMM_IOC_BASE | 0x0b) 506 #define VM_ISA_PULSE_IRQ (VMM_IOC_BASE | 0x0c) 507 #define VM_ISA_SET_IRQ_TRIGGER (VMM_IOC_BASE | 0x0d) 508 509 #define VM_RTC_WRITE (VMM_IOC_BASE | 0x0e) 510 #define VM_RTC_READ (VMM_IOC_BASE | 0x0f) 511 #define VM_RTC_SETTIME (VMM_IOC_BASE | 0x10) 512 #define VM_RTC_GETTIME (VMM_IOC_BASE | 0x11) 513 514 #define VM_SUSPEND (VMM_IOC_BASE | 0x12) 515 516 #define VM_IOAPIC_PINCOUNT (VMM_IOC_BASE | 0x13) 517 #define VM_GET_PPTDEV_LIMITS (VMM_IOC_BASE | 0x14) 518 #define VM_GET_HPET_CAPABILITIES (VMM_IOC_BASE | 0x15) 519 520 #define VM_STATS_IOC (VMM_IOC_BASE | 0x16) 521 #define VM_STAT_DESC (VMM_IOC_BASE | 0x17) 522 523 #define VM_INJECT_NMI (VMM_IOC_BASE | 0x18) 524 #define VM_GET_X2APIC_STATE (VMM_IOC_BASE | 0x19) 525 #define VM_SET_TOPOLOGY (VMM_IOC_BASE | 0x1a) 526 #define VM_GET_TOPOLOGY (VMM_IOC_BASE | 0x1b) 527 #define VM_GET_CPUS (VMM_IOC_BASE | 0x1c) 528 #define VM_SUSPEND_CPU (VMM_IOC_BASE | 0x1d) 529 #define VM_RESUME_CPU (VMM_IOC_BASE | 0x1e) 530 531 #define VM_PPTDEV_DISABLE_MSIX (VMM_IOC_BASE | 0x1f) 532 533 /* Note: forces a barrier on a flush operation before returning. */ 534 #define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20) 535 #define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21) 536 537 #define VM_DATA_READ (VMM_IOC_BASE | 0x22) 538 #define VM_DATA_WRITE (VMM_IOC_BASE | 0x23) 539 540 #define VM_SET_AUTODESTRUCT (VMM_IOC_BASE | 0x24) 541 #define VM_DESTROY_SELF (VMM_IOC_BASE | 0x25) 542 #define VM_DESTROY_PENDING (VMM_IOC_BASE | 0x26) 543 544 #define VM_VCPU_BARRIER (VMM_IOC_BASE | 0x27) 545 546 #define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff) 547 548 #define VMM_CTL_DEV "/dev/vmmctl" 549 550 #endif 551