1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2015 Pluribus Networks Inc. 41 * Copyright 2019 Joyent, Inc. 42 * Copyright 2023 Oxide Computer Company 43 */ 44 45 #ifndef _VMM_DEV_H_ 46 #define _VMM_DEV_H_ 47 48 #include <machine/vmm.h> 49 50 #include <sys/param.h> 51 #include <sys/cpuset.h> 52 #include <sys/vmm_data.h> 53 54 struct vm_create_req { 55 char name[VM_MAX_NAMELEN]; 56 uint64_t flags; 57 }; 58 59 60 struct vm_destroy_req { 61 char name[VM_MAX_NAMELEN]; 62 }; 63 64 struct vm_memmap { 65 vm_paddr_t gpa; 66 int segid; /* memory segment */ 67 vm_ooffset_t segoff; /* offset into memory segment */ 68 size_t len; /* mmap length */ 69 int prot; /* RWX */ 70 int flags; 71 }; 72 #define VM_MEMMAP_F_WIRED 0x01 73 #define VM_MEMMAP_F_IOMMU 0x02 74 75 struct vm_munmap { 76 vm_paddr_t gpa; 77 size_t len; 78 }; 79 80 #define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) 81 struct vm_memseg { 82 int segid; 83 size_t len; 84 char name[VM_MAX_SEG_NAMELEN]; 85 }; 86 87 struct vm_register { 88 int cpuid; 89 int regnum; /* enum vm_reg_name */ 90 uint64_t regval; 91 }; 92 93 struct vm_seg_desc { /* data or code segment */ 94 int cpuid; 95 int regnum; /* enum vm_reg_name */ 96 struct seg_desc desc; 97 }; 98 99 struct vm_register_set { 100 int cpuid; 101 unsigned int count; 102 const int *regnums; /* enum vm_reg_name */ 103 uint64_t *regvals; 104 }; 105 106 struct vm_exception { 107 int cpuid; 108 int vector; 109 uint32_t error_code; 110 int error_code_valid; 111 int restart_instruction; 112 }; 113 114 struct vm_lapic_msi { 115 uint64_t msg; 116 uint64_t addr; 117 }; 118 119 struct vm_lapic_irq { 120 int cpuid; 121 int vector; 122 }; 123 124 struct vm_ioapic_irq { 125 int irq; 126 }; 127 128 struct vm_isa_irq { 129 int atpic_irq; 130 int ioapic_irq; 131 }; 132 133 struct vm_isa_irq_trigger { 134 int atpic_irq; 135 enum vm_intr_trigger trigger; 136 }; 137 138 struct vm_capability { 139 int cpuid; 140 enum vm_cap_type captype; 141 int capval; 142 int allcpus; 143 }; 144 145 struct vm_pptdev { 146 int pptfd; 147 }; 148 149 struct vm_pptdev_mmio { 150 int pptfd; 151 vm_paddr_t gpa; 152 vm_paddr_t hpa; 153 size_t len; 154 }; 155 156 struct vm_pptdev_msi { 157 int vcpu; 158 int pptfd; 159 int numvec; /* 0 means disabled */ 160 uint64_t msg; 161 uint64_t addr; 162 }; 163 164 struct vm_pptdev_msix { 165 int vcpu; 166 int pptfd; 167 int idx; 168 uint64_t msg; 169 uint32_t vector_control; 170 uint64_t addr; 171 }; 172 173 struct vm_pptdev_limits { 174 int pptfd; 175 int msi_limit; 176 int msix_limit; 177 }; 178 179 struct vm_nmi { 180 int cpuid; 181 }; 182 183 #define MAX_VM_STATS 64 184 185 struct vm_stats { 186 int cpuid; /* in */ 187 int index; /* in */ 188 int num_entries; /* out */ 189 struct timeval tv; 190 uint64_t statbuf[MAX_VM_STATS]; 191 }; 192 193 struct vm_stat_desc { 194 int index; /* in */ 195 char desc[128]; /* out */ 196 }; 197 198 struct vm_x2apic { 199 int cpuid; 200 enum x2apic_state state; 201 }; 202 203 struct vm_gpa_pte { 204 uint64_t gpa; /* in */ 205 uint64_t pte[4]; /* out */ 206 int ptenum; 207 }; 208 209 struct vm_hpet_cap { 210 uint32_t capabilities; /* lower 32 bits of HPET capabilities */ 211 }; 212 213 struct vm_suspend { 214 enum vm_suspend_how how; 215 }; 216 217 #define VM_REINIT_F_FORCE_SUSPEND (1 << 0) 218 219 struct vm_reinit { 220 uint64_t flags; 221 }; 222 223 struct vm_gla2gpa { 224 int vcpuid; /* inputs */ 225 int prot; /* PROT_READ or PROT_WRITE */ 226 uint64_t gla; 227 struct vm_guest_paging paging; 228 int fault; /* outputs */ 229 uint64_t gpa; 230 }; 231 232 struct vm_activate_cpu { 233 int vcpuid; 234 }; 235 236 struct vm_cpuset { 237 int which; 238 int cpusetsize; 239 #ifndef _KERNEL 240 cpuset_t *cpus; 241 #else 242 void *cpus; 243 #endif 244 }; 245 #define VM_ACTIVE_CPUS 0 246 #define VM_SUSPENDED_CPUS 1 247 #define VM_DEBUG_CPUS 2 248 249 struct vm_intinfo { 250 int vcpuid; 251 uint64_t info1; 252 uint64_t info2; 253 }; 254 255 struct vm_rtc_data { 256 int offset; 257 uint8_t value; 258 }; 259 260 struct vm_devmem_offset { 261 int segid; 262 off_t offset; 263 }; 264 265 struct vm_cpu_topology { 266 uint16_t sockets; 267 uint16_t cores; 268 uint16_t threads; 269 uint16_t maxcpus; 270 }; 271 272 struct vm_readwrite_kernemu_device { 273 int vcpuid; 274 unsigned access_width : 3; 275 unsigned _unused : 29; 276 uint64_t gpa; 277 uint64_t value; 278 }; 279 _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); 280 281 enum vcpu_reset_kind { 282 VRK_RESET = 0, 283 /* 284 * The reset performed by an INIT IPI clears much of the CPU state, but 285 * some portions are left untouched, unlike VRK_RESET, which represents 286 * a "full" reset as if the system was freshly powered on. 287 */ 288 VRK_INIT = 1, 289 }; 290 291 struct vm_vcpu_reset { 292 int vcpuid; 293 uint32_t kind; /* contains: enum vcpu_reset_kind */ 294 }; 295 296 struct vm_run_state { 297 int vcpuid; 298 uint32_t state; /* of enum cpu_init_status type */ 299 uint8_t sipi_vector; /* vector of SIPI, if any */ 300 uint8_t _pad[3]; 301 }; 302 303 /* Transfer data for VM_GET_FPU and VM_SET_FPU */ 304 struct vm_fpu_state { 305 int vcpuid; 306 void *buf; 307 size_t len; 308 }; 309 310 struct vm_fpu_desc_entry { 311 uint64_t vfde_feature; 312 uint32_t vfde_size; 313 uint32_t vfde_off; 314 }; 315 316 struct vm_fpu_desc { 317 struct vm_fpu_desc_entry *vfd_entry_data; 318 size_t vfd_req_size; 319 uint32_t vfd_num_entries; 320 }; 321 322 struct vmm_resv_query { 323 size_t vrq_free_sz; 324 size_t vrq_alloc_sz; 325 size_t vrq_alloc_transient_sz; 326 size_t vrq_limit; 327 }; 328 329 struct vmm_resv_target { 330 /* Target size for VMM reservoir */ 331 size_t vrt_target_sz; 332 333 /* 334 * Change of reservoir size to meet target will be done in multiple 335 * steps of chunk size (or smaller) 336 */ 337 size_t vrt_chunk_sz; 338 339 /* 340 * Resultant size of reservoir after operation. Should match target 341 * size, except when interrupted. 342 */ 343 size_t vrt_result_sz; 344 }; 345 346 /* 347 * struct vmm_dirty_tracker is used for tracking dirty guest pages during 348 * e.g. live migration. 349 * 350 * - The `vdt_start_gpa` field specifies the offset from the beginning of 351 * guest physical memory to track; 352 * - `vdt_pfns` points to a bit vector indexed by guest PFN relative to the 353 * given start address. Each bit indicates whether the given guest page 354 * is dirty or not. 355 * - `vdt_pfns_len` specifies the length of the of the guest physical memory 356 * region in bytes. It also de facto bounds the range of guest addresses 357 * we will examine on any one `VM_TRACK_DIRTY_PAGES` ioctl(). If the 358 * range of the bit vector spans an unallocated region (or extends beyond 359 * the end of the guest physical address space) the corresponding bits in 360 * `vdt_pfns` will be zeroed. 361 */ 362 struct vmm_dirty_tracker { 363 uint64_t vdt_start_gpa; 364 size_t vdt_len; /* length of region */ 365 void *vdt_pfns; /* bit vector of dirty bits */ 366 }; 367 368 /* Current (arbitrary) max length for vm_data_xfer */ 369 #define VM_DATA_XFER_LIMIT 8192 370 371 #define VDX_FLAG_READ_COPYIN (1 << 0) 372 #define VDX_FLAG_WRITE_COPYOUT (1 << 1) 373 374 #define VDX_FLAGS_VALID (VDX_FLAG_READ_COPYIN | VDX_FLAG_WRITE_COPYOUT) 375 376 struct vm_data_xfer { 377 int vdx_vcpuid; 378 uint16_t vdx_class; 379 uint16_t vdx_version; 380 uint32_t vdx_flags; 381 uint32_t vdx_len; 382 uint32_t vdx_result_len; 383 void *vdx_data; 384 }; 385 386 struct vm_vcpu_cpuid_config { 387 int vvcc_vcpuid; 388 uint32_t vvcc_flags; 389 uint32_t vvcc_nent; 390 uint32_t _pad; 391 void *vvcc_entries; 392 }; 393 394 /* Query the computed legacy cpuid value for a vcpuid with VM_LEGACY_CPUID */ 395 struct vm_legacy_cpuid { 396 int vlc_vcpuid; 397 uint32_t vlc_eax; 398 uint32_t vlc_ebx; 399 uint32_t vlc_ecx; 400 uint32_t vlc_edx; 401 }; 402 403 /* 404 * VMM Interface Version 405 * 406 * Despite the fact that the kernel interface to bhyve is explicitly considered 407 * Private, there are out-of-gate consumers which utilize it. While they assume 408 * the risk of any breakage incurred by changes to bhyve, we can at least try to 409 * make it easier to detect changes by exposing a "version" of the interface. 410 * It can also be used by the in-gate userland to detect if packaging updates 411 * somehow result in the userland and kernel falling out of sync. 412 * 413 * There are no established criteria for the magnitude of change which requires 414 * this version to be incremented, and maintenance of it is considered a 415 * best-effort activity. Nothing is to be inferred about the magnitude of a 416 * change when the version is modified. It follows no rules like semver. 417 */ 418 #define VMM_CURRENT_INTERFACE_VERSION 15 419 420 421 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) 422 #define VMM_IOC_BASE (('v' << 16) | ('m' << 8)) 423 #define VMM_LOCK_IOC_BASE (('v' << 16) | ('l' << 8)) 424 #define VMM_CPU_IOC_BASE (('v' << 16) | ('p' << 8)) 425 426 /* Operations performed on the vmmctl device */ 427 #define VMM_CREATE_VM (VMMCTL_IOC_BASE | 0x01) 428 #define VMM_DESTROY_VM (VMMCTL_IOC_BASE | 0x02) 429 #define VMM_VM_SUPPORTED (VMMCTL_IOC_BASE | 0x03) 430 #define VMM_INTERFACE_VERSION (VMMCTL_IOC_BASE | 0x04) 431 #define VMM_CHECK_IOMMU (VMMCTL_IOC_BASE | 0x05) 432 433 #define VMM_RESV_QUERY (VMMCTL_IOC_BASE | 0x10) 434 #define VMM_RESV_SET_TARGET (VMMCTL_IOC_BASE | 0x11) 435 436 /* Operations performed in the context of a given vCPU */ 437 #define VM_RUN (VMM_CPU_IOC_BASE | 0x01) 438 #define VM_SET_REGISTER (VMM_CPU_IOC_BASE | 0x02) 439 #define VM_GET_REGISTER (VMM_CPU_IOC_BASE | 0x03) 440 #define VM_SET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x04) 441 #define VM_GET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x05) 442 #define VM_SET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x06) 443 #define VM_GET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x07) 444 #define VM_INJECT_EXCEPTION (VMM_CPU_IOC_BASE | 0x08) 445 #define VM_SET_CAPABILITY (VMM_CPU_IOC_BASE | 0x09) 446 #define VM_GET_CAPABILITY (VMM_CPU_IOC_BASE | 0x0a) 447 #define VM_PPTDEV_MSI (VMM_CPU_IOC_BASE | 0x0b) 448 #define VM_PPTDEV_MSIX (VMM_CPU_IOC_BASE | 0x0c) 449 #define VM_SET_X2APIC_STATE (VMM_CPU_IOC_BASE | 0x0d) 450 #define VM_GLA2GPA (VMM_CPU_IOC_BASE | 0x0e) 451 #define VM_GLA2GPA_NOFAULT (VMM_CPU_IOC_BASE | 0x0f) 452 #define VM_ACTIVATE_CPU (VMM_CPU_IOC_BASE | 0x10) 453 #define VM_SET_INTINFO (VMM_CPU_IOC_BASE | 0x11) 454 #define VM_GET_INTINFO (VMM_CPU_IOC_BASE | 0x12) 455 #define VM_RESTART_INSTRUCTION (VMM_CPU_IOC_BASE | 0x13) 456 #define VM_SET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x14) 457 #define VM_GET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x15) 458 #define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16) 459 #define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17) 460 #define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18) 461 #define VM_GET_FPU (VMM_CPU_IOC_BASE | 0x19) 462 #define VM_SET_FPU (VMM_CPU_IOC_BASE | 0x1a) 463 #define VM_GET_CPUID (VMM_CPU_IOC_BASE | 0x1b) 464 #define VM_SET_CPUID (VMM_CPU_IOC_BASE | 0x1c) 465 #define VM_LEGACY_CPUID (VMM_CPU_IOC_BASE | 0x1d) 466 467 /* Operations requiring write-locking the VM */ 468 #define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01) 469 #define VM_BIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x02) 470 #define VM_UNBIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x03) 471 #define VM_MAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x04) 472 #define VM_ALLOC_MEMSEG (VMM_LOCK_IOC_BASE | 0x05) 473 #define VM_MMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x06) 474 #define VM_PMTMR_LOCATE (VMM_LOCK_IOC_BASE | 0x07) 475 #define VM_MUNMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x08) 476 #define VM_UNMAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x09) 477 #define VM_PAUSE (VMM_LOCK_IOC_BASE | 0x0a) 478 #define VM_RESUME (VMM_LOCK_IOC_BASE | 0x0b) 479 480 #define VM_WRLOCK_CYCLE (VMM_LOCK_IOC_BASE | 0xff) 481 482 /* All other ioctls */ 483 #define VM_GET_GPA_PMAP (VMM_IOC_BASE | 0x01) 484 #define VM_GET_MEMSEG (VMM_IOC_BASE | 0x02) 485 #define VM_MMAP_GETNEXT (VMM_IOC_BASE | 0x03) 486 487 #define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04) 488 #define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05) 489 #define VM_LAPIC_MSI (VMM_IOC_BASE | 0x06) 490 491 #define VM_IOAPIC_ASSERT_IRQ (VMM_IOC_BASE | 0x07) 492 #define VM_IOAPIC_DEASSERT_IRQ (VMM_IOC_BASE | 0x08) 493 #define VM_IOAPIC_PULSE_IRQ (VMM_IOC_BASE | 0x09) 494 495 #define VM_ISA_ASSERT_IRQ (VMM_IOC_BASE | 0x0a) 496 #define VM_ISA_DEASSERT_IRQ (VMM_IOC_BASE | 0x0b) 497 #define VM_ISA_PULSE_IRQ (VMM_IOC_BASE | 0x0c) 498 #define VM_ISA_SET_IRQ_TRIGGER (VMM_IOC_BASE | 0x0d) 499 500 #define VM_RTC_WRITE (VMM_IOC_BASE | 0x0e) 501 #define VM_RTC_READ (VMM_IOC_BASE | 0x0f) 502 #define VM_RTC_SETTIME (VMM_IOC_BASE | 0x10) 503 #define VM_RTC_GETTIME (VMM_IOC_BASE | 0x11) 504 505 #define VM_SUSPEND (VMM_IOC_BASE | 0x12) 506 507 #define VM_IOAPIC_PINCOUNT (VMM_IOC_BASE | 0x13) 508 #define VM_GET_PPTDEV_LIMITS (VMM_IOC_BASE | 0x14) 509 #define VM_GET_HPET_CAPABILITIES (VMM_IOC_BASE | 0x15) 510 511 #define VM_STATS_IOC (VMM_IOC_BASE | 0x16) 512 #define VM_STAT_DESC (VMM_IOC_BASE | 0x17) 513 514 #define VM_INJECT_NMI (VMM_IOC_BASE | 0x18) 515 #define VM_GET_X2APIC_STATE (VMM_IOC_BASE | 0x19) 516 #define VM_SET_TOPOLOGY (VMM_IOC_BASE | 0x1a) 517 #define VM_GET_TOPOLOGY (VMM_IOC_BASE | 0x1b) 518 #define VM_GET_CPUS (VMM_IOC_BASE | 0x1c) 519 #define VM_SUSPEND_CPU (VMM_IOC_BASE | 0x1d) 520 #define VM_RESUME_CPU (VMM_IOC_BASE | 0x1e) 521 522 #define VM_PPTDEV_DISABLE_MSIX (VMM_IOC_BASE | 0x1f) 523 524 /* Note: forces a barrier on a flush operation before returning. */ 525 #define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20) 526 #define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21) 527 528 #define VM_DATA_READ (VMM_IOC_BASE | 0x22) 529 #define VM_DATA_WRITE (VMM_IOC_BASE | 0x23) 530 531 #define VM_SET_AUTODESTRUCT (VMM_IOC_BASE | 0x24) 532 #define VM_DESTROY_SELF (VMM_IOC_BASE | 0x25) 533 #define VM_DESTROY_PENDING (VMM_IOC_BASE | 0x26) 534 535 #define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff) 536 537 #define VMM_CTL_DEV "/dev/vmmctl" 538 539 #endif 540