1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2015 Pluribus Networks Inc. 41 * Copyright 2019 Joyent, Inc. 42 * Copyright 2023 Oxide Computer Company 43 */ 44 45 #ifndef _VMM_DEV_H_ 46 #define _VMM_DEV_H_ 47 48 #include <machine/vmm.h> 49 50 #include <sys/param.h> 51 #include <sys/cpuset.h> 52 #include <sys/vmm_data.h> 53 54 struct vm_create_req { 55 char name[VM_MAX_NAMELEN]; 56 uint64_t flags; 57 }; 58 59 60 struct vm_destroy_req { 61 char name[VM_MAX_NAMELEN]; 62 }; 63 64 struct vm_memmap { 65 vm_paddr_t gpa; 66 int segid; /* memory segment */ 67 vm_ooffset_t segoff; /* offset into memory segment */ 68 size_t len; /* mmap length */ 69 int prot; /* RWX */ 70 int flags; 71 }; 72 #define VM_MEMMAP_F_WIRED 0x01 73 #define VM_MEMMAP_F_IOMMU 0x02 74 75 struct vm_munmap { 76 vm_paddr_t gpa; 77 size_t len; 78 }; 79 80 #define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) 81 struct vm_memseg { 82 int segid; 83 size_t len; 84 char name[VM_MAX_SEG_NAMELEN]; 85 }; 86 87 struct vm_register { 88 int cpuid; 89 int regnum; /* enum vm_reg_name */ 90 uint64_t regval; 91 }; 92 93 struct vm_seg_desc { /* data or code segment */ 94 int cpuid; 95 int regnum; /* enum vm_reg_name */ 96 struct seg_desc desc; 97 }; 98 99 struct vm_register_set { 100 int cpuid; 101 unsigned int count; 102 const int *regnums; /* enum vm_reg_name */ 103 uint64_t *regvals; 104 }; 105 106 struct vm_exception { 107 int cpuid; 108 int vector; 109 uint32_t error_code; 110 int error_code_valid; 111 int restart_instruction; 112 }; 113 114 struct vm_lapic_msi { 115 uint64_t msg; 116 uint64_t addr; 117 }; 118 119 struct vm_lapic_irq { 120 int cpuid; 121 int vector; 122 }; 123 124 struct vm_ioapic_irq { 125 int irq; 126 }; 127 128 struct vm_isa_irq { 129 int atpic_irq; 130 int ioapic_irq; 131 }; 132 133 struct vm_isa_irq_trigger { 134 int atpic_irq; 135 enum vm_intr_trigger trigger; 136 }; 137 138 struct vm_capability { 139 int cpuid; 140 enum vm_cap_type captype; 141 int capval; 142 int allcpus; 143 }; 144 145 struct vm_pptdev { 146 int pptfd; 147 }; 148 149 struct vm_pptdev_mmio { 150 int pptfd; 151 vm_paddr_t gpa; 152 vm_paddr_t hpa; 153 size_t len; 154 }; 155 156 struct vm_pptdev_msi { 157 int vcpu; 158 int pptfd; 159 int numvec; /* 0 means disabled */ 160 uint64_t msg; 161 uint64_t addr; 162 }; 163 164 struct vm_pptdev_msix { 165 int vcpu; 166 int pptfd; 167 int idx; 168 uint64_t msg; 169 uint32_t vector_control; 170 uint64_t addr; 171 }; 172 173 struct vm_pptdev_limits { 174 int pptfd; 175 int msi_limit; 176 int msix_limit; 177 }; 178 179 struct vm_nmi { 180 int cpuid; 181 }; 182 183 #define MAX_VM_STATS 64 184 185 struct vm_stats { 186 int cpuid; /* in */ 187 int index; /* in */ 188 int num_entries; /* out */ 189 struct timeval tv; 190 uint64_t statbuf[MAX_VM_STATS]; 191 }; 192 193 struct vm_stat_desc { 194 int index; /* in */ 195 char desc[128]; /* out */ 196 }; 197 198 struct vm_x2apic { 199 int cpuid; 200 enum x2apic_state state; 201 }; 202 203 struct vm_gpa_pte { 204 uint64_t gpa; /* in */ 205 uint64_t pte[4]; /* out */ 206 int ptenum; 207 }; 208 209 struct vm_hpet_cap { 210 uint32_t capabilities; /* lower 32 bits of HPET capabilities */ 211 }; 212 213 struct vm_suspend { 214 enum vm_suspend_how how; 215 int source; 216 }; 217 218 /* 219 * Deprecated flags for vm_reinit`flags: 220 * 221 * Suspend (by force) VM as part of reinit. Effectively a no-op since 222 * suspension requirements during reinit have been lifted. 223 * 224 * #define VM_REINIT_F_FORCE_SUSPEND (1 << 0) 225 */ 226 227 struct vm_reinit { 228 uint64_t flags; 229 }; 230 231 struct vm_gla2gpa { 232 int vcpuid; /* inputs */ 233 int prot; /* PROT_READ or PROT_WRITE */ 234 uint64_t gla; 235 struct vm_guest_paging paging; 236 int fault; /* outputs */ 237 uint64_t gpa; 238 }; 239 240 struct vm_activate_cpu { 241 int vcpuid; 242 }; 243 244 struct vm_cpuset { 245 int which; 246 int cpusetsize; 247 #ifndef _KERNEL 248 cpuset_t *cpus; 249 #else 250 void *cpus; 251 #endif 252 }; 253 #define VM_ACTIVE_CPUS 0 254 /* 255 * Deprecated: 256 * #define VM_SUSPENDED_CPUS 1 257 */ 258 #define VM_DEBUG_CPUS 2 259 260 struct vm_intinfo { 261 int vcpuid; 262 uint64_t info1; 263 uint64_t info2; 264 }; 265 266 struct vm_rtc_data { 267 int offset; 268 uint8_t value; 269 }; 270 271 struct vm_devmem_offset { 272 int segid; 273 off_t offset; 274 }; 275 276 struct vm_cpu_topology { 277 uint16_t sockets; 278 uint16_t cores; 279 uint16_t threads; 280 uint16_t maxcpus; 281 }; 282 283 struct vm_readwrite_kernemu_device { 284 int vcpuid; 285 unsigned access_width : 3; 286 unsigned _unused : 29; 287 uint64_t gpa; 288 uint64_t value; 289 }; 290 _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); 291 292 enum vcpu_reset_kind { 293 VRK_RESET = 0, 294 /* 295 * The reset performed by an INIT IPI clears much of the CPU state, but 296 * some portions are left untouched, unlike VRK_RESET, which represents 297 * a "full" reset as if the system was freshly powered on. 298 */ 299 VRK_INIT = 1, 300 }; 301 302 struct vm_vcpu_reset { 303 int vcpuid; 304 uint32_t kind; /* contains: enum vcpu_reset_kind */ 305 }; 306 307 struct vm_run_state { 308 int vcpuid; 309 uint32_t state; /* of enum cpu_init_status type */ 310 uint8_t sipi_vector; /* vector of SIPI, if any */ 311 uint8_t _pad[3]; 312 }; 313 314 /* Transfer data for VM_GET_FPU and VM_SET_FPU */ 315 struct vm_fpu_state { 316 int vcpuid; 317 void *buf; 318 size_t len; 319 }; 320 321 struct vm_fpu_desc_entry { 322 uint64_t vfde_feature; 323 uint32_t vfde_size; 324 uint32_t vfde_off; 325 }; 326 327 struct vm_fpu_desc { 328 struct vm_fpu_desc_entry *vfd_entry_data; 329 size_t vfd_req_size; 330 uint32_t vfd_num_entries; 331 }; 332 333 struct vmm_resv_query { 334 size_t vrq_free_sz; 335 size_t vrq_alloc_sz; 336 size_t vrq_alloc_transient_sz; 337 size_t vrq_limit; 338 }; 339 340 struct vmm_resv_target { 341 /* Target size for VMM reservoir */ 342 size_t vrt_target_sz; 343 344 /* 345 * Change of reservoir size to meet target will be done in multiple 346 * steps of chunk size (or smaller) 347 */ 348 size_t vrt_chunk_sz; 349 350 /* 351 * Resultant size of reservoir after operation. Should match target 352 * size, except when interrupted. 353 */ 354 size_t vrt_result_sz; 355 }; 356 357 /* 358 * struct vmm_dirty_tracker is used for tracking dirty guest pages during 359 * e.g. live migration. 360 * 361 * - The `vdt_start_gpa` field specifies the offset from the beginning of 362 * guest physical memory to track; 363 * - `vdt_pfns` points to a bit vector indexed by guest PFN relative to the 364 * given start address. Each bit indicates whether the given guest page 365 * is dirty or not. 366 * - `vdt_pfns_len` specifies the length of the of the guest physical memory 367 * region in bytes. It also de facto bounds the range of guest addresses 368 * we will examine on any one `VM_TRACK_DIRTY_PAGES` ioctl(). If the 369 * range of the bit vector spans an unallocated region (or extends beyond 370 * the end of the guest physical address space) the corresponding bits in 371 * `vdt_pfns` will be zeroed. 372 */ 373 struct vmm_dirty_tracker { 374 uint64_t vdt_start_gpa; 375 size_t vdt_len; /* length of region */ 376 void *vdt_pfns; /* bit vector of dirty bits */ 377 }; 378 379 /* Current (arbitrary) max length for vm_data_xfer */ 380 #define VM_DATA_XFER_LIMIT 8192 381 382 #define VDX_FLAG_READ_COPYIN (1 << 0) 383 #define VDX_FLAG_WRITE_COPYOUT (1 << 1) 384 385 #define VDX_FLAGS_VALID (VDX_FLAG_READ_COPYIN | VDX_FLAG_WRITE_COPYOUT) 386 387 struct vm_data_xfer { 388 int vdx_vcpuid; 389 uint16_t vdx_class; 390 uint16_t vdx_version; 391 uint32_t vdx_flags; 392 uint32_t vdx_len; 393 uint32_t vdx_result_len; 394 void *vdx_data; 395 }; 396 397 struct vm_vcpu_cpuid_config { 398 int vvcc_vcpuid; 399 uint32_t vvcc_flags; 400 uint32_t vvcc_nent; 401 uint32_t _pad; 402 void *vvcc_entries; 403 }; 404 405 /* Query the computed legacy cpuid value for a vcpuid with VM_LEGACY_CPUID */ 406 struct vm_legacy_cpuid { 407 int vlc_vcpuid; 408 uint32_t vlc_eax; 409 uint32_t vlc_ebx; 410 uint32_t vlc_ecx; 411 uint32_t vlc_edx; 412 }; 413 414 /* 415 * VMM Interface Version 416 * 417 * Despite the fact that the kernel interface to bhyve is explicitly considered 418 * Private, there are out-of-gate consumers which utilize it. While they assume 419 * the risk of any breakage incurred by changes to bhyve, we can at least try to 420 * make it easier to detect changes by exposing a "version" of the interface. 421 * It can also be used by the in-gate userland to detect if packaging updates 422 * somehow result in the userland and kernel falling out of sync. 423 * 424 * There are no established criteria for the magnitude of change which requires 425 * this version to be incremented, and maintenance of it is considered a 426 * best-effort activity. Nothing is to be inferred about the magnitude of a 427 * change when the version is modified. It follows no rules like semver. 428 */ 429 #define VMM_CURRENT_INTERFACE_VERSION 16 430 431 432 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) 433 #define VMM_IOC_BASE (('v' << 16) | ('m' << 8)) 434 #define VMM_LOCK_IOC_BASE (('v' << 16) | ('l' << 8)) 435 #define VMM_CPU_IOC_BASE (('v' << 16) | ('p' << 8)) 436 437 /* Operations performed on the vmmctl device */ 438 #define VMM_CREATE_VM (VMMCTL_IOC_BASE | 0x01) 439 #define VMM_DESTROY_VM (VMMCTL_IOC_BASE | 0x02) 440 #define VMM_VM_SUPPORTED (VMMCTL_IOC_BASE | 0x03) 441 #define VMM_INTERFACE_VERSION (VMMCTL_IOC_BASE | 0x04) 442 #define VMM_CHECK_IOMMU (VMMCTL_IOC_BASE | 0x05) 443 444 #define VMM_RESV_QUERY (VMMCTL_IOC_BASE | 0x10) 445 #define VMM_RESV_SET_TARGET (VMMCTL_IOC_BASE | 0x11) 446 447 /* Operations performed in the context of a given vCPU */ 448 #define VM_RUN (VMM_CPU_IOC_BASE | 0x01) 449 #define VM_SET_REGISTER (VMM_CPU_IOC_BASE | 0x02) 450 #define VM_GET_REGISTER (VMM_CPU_IOC_BASE | 0x03) 451 #define VM_SET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x04) 452 #define VM_GET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x05) 453 #define VM_SET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x06) 454 #define VM_GET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x07) 455 #define VM_INJECT_EXCEPTION (VMM_CPU_IOC_BASE | 0x08) 456 #define VM_SET_CAPABILITY (VMM_CPU_IOC_BASE | 0x09) 457 #define VM_GET_CAPABILITY (VMM_CPU_IOC_BASE | 0x0a) 458 #define VM_PPTDEV_MSI (VMM_CPU_IOC_BASE | 0x0b) 459 #define VM_PPTDEV_MSIX (VMM_CPU_IOC_BASE | 0x0c) 460 #define VM_SET_X2APIC_STATE (VMM_CPU_IOC_BASE | 0x0d) 461 #define VM_GLA2GPA (VMM_CPU_IOC_BASE | 0x0e) 462 #define VM_GLA2GPA_NOFAULT (VMM_CPU_IOC_BASE | 0x0f) 463 #define VM_ACTIVATE_CPU (VMM_CPU_IOC_BASE | 0x10) 464 #define VM_SET_INTINFO (VMM_CPU_IOC_BASE | 0x11) 465 #define VM_GET_INTINFO (VMM_CPU_IOC_BASE | 0x12) 466 #define VM_RESTART_INSTRUCTION (VMM_CPU_IOC_BASE | 0x13) 467 #define VM_SET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x14) 468 #define VM_GET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x15) 469 #define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16) 470 #define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17) 471 #define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18) 472 #define VM_GET_FPU (VMM_CPU_IOC_BASE | 0x19) 473 #define VM_SET_FPU (VMM_CPU_IOC_BASE | 0x1a) 474 #define VM_GET_CPUID (VMM_CPU_IOC_BASE | 0x1b) 475 #define VM_SET_CPUID (VMM_CPU_IOC_BASE | 0x1c) 476 #define VM_LEGACY_CPUID (VMM_CPU_IOC_BASE | 0x1d) 477 478 /* Operations requiring write-locking the VM */ 479 #define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01) 480 #define VM_BIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x02) 481 #define VM_UNBIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x03) 482 #define VM_MAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x04) 483 #define VM_ALLOC_MEMSEG (VMM_LOCK_IOC_BASE | 0x05) 484 #define VM_MMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x06) 485 #define VM_PMTMR_LOCATE (VMM_LOCK_IOC_BASE | 0x07) 486 #define VM_MUNMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x08) 487 #define VM_UNMAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x09) 488 #define VM_PAUSE (VMM_LOCK_IOC_BASE | 0x0a) 489 #define VM_RESUME (VMM_LOCK_IOC_BASE | 0x0b) 490 491 #define VM_WRLOCK_CYCLE (VMM_LOCK_IOC_BASE | 0xff) 492 493 /* All other ioctls */ 494 #define VM_GET_GPA_PMAP (VMM_IOC_BASE | 0x01) 495 #define VM_GET_MEMSEG (VMM_IOC_BASE | 0x02) 496 #define VM_MMAP_GETNEXT (VMM_IOC_BASE | 0x03) 497 498 #define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04) 499 #define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05) 500 #define VM_LAPIC_MSI (VMM_IOC_BASE | 0x06) 501 502 #define VM_IOAPIC_ASSERT_IRQ (VMM_IOC_BASE | 0x07) 503 #define VM_IOAPIC_DEASSERT_IRQ (VMM_IOC_BASE | 0x08) 504 #define VM_IOAPIC_PULSE_IRQ (VMM_IOC_BASE | 0x09) 505 506 #define VM_ISA_ASSERT_IRQ (VMM_IOC_BASE | 0x0a) 507 #define VM_ISA_DEASSERT_IRQ (VMM_IOC_BASE | 0x0b) 508 #define VM_ISA_PULSE_IRQ (VMM_IOC_BASE | 0x0c) 509 #define VM_ISA_SET_IRQ_TRIGGER (VMM_IOC_BASE | 0x0d) 510 511 #define VM_RTC_WRITE (VMM_IOC_BASE | 0x0e) 512 #define VM_RTC_READ (VMM_IOC_BASE | 0x0f) 513 #define VM_RTC_SETTIME (VMM_IOC_BASE | 0x10) 514 #define VM_RTC_GETTIME (VMM_IOC_BASE | 0x11) 515 516 #define VM_SUSPEND (VMM_IOC_BASE | 0x12) 517 518 #define VM_IOAPIC_PINCOUNT (VMM_IOC_BASE | 0x13) 519 #define VM_GET_PPTDEV_LIMITS (VMM_IOC_BASE | 0x14) 520 #define VM_GET_HPET_CAPABILITIES (VMM_IOC_BASE | 0x15) 521 522 #define VM_STATS_IOC (VMM_IOC_BASE | 0x16) 523 #define VM_STAT_DESC (VMM_IOC_BASE | 0x17) 524 525 #define VM_INJECT_NMI (VMM_IOC_BASE | 0x18) 526 #define VM_GET_X2APIC_STATE (VMM_IOC_BASE | 0x19) 527 #define VM_SET_TOPOLOGY (VMM_IOC_BASE | 0x1a) 528 #define VM_GET_TOPOLOGY (VMM_IOC_BASE | 0x1b) 529 #define VM_GET_CPUS (VMM_IOC_BASE | 0x1c) 530 #define VM_SUSPEND_CPU (VMM_IOC_BASE | 0x1d) 531 #define VM_RESUME_CPU (VMM_IOC_BASE | 0x1e) 532 533 #define VM_PPTDEV_DISABLE_MSIX (VMM_IOC_BASE | 0x1f) 534 535 /* Note: forces a barrier on a flush operation before returning. */ 536 #define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20) 537 #define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21) 538 539 #define VM_DATA_READ (VMM_IOC_BASE | 0x22) 540 #define VM_DATA_WRITE (VMM_IOC_BASE | 0x23) 541 542 #define VM_SET_AUTODESTRUCT (VMM_IOC_BASE | 0x24) 543 #define VM_DESTROY_SELF (VMM_IOC_BASE | 0x25) 544 #define VM_DESTROY_PENDING (VMM_IOC_BASE | 0x26) 545 546 #define VM_VCPU_BARRIER (VMM_IOC_BASE | 0x27) 547 548 #define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff) 549 550 #define VMM_CTL_DEV "/dev/vmmctl" 551 552 #endif 553