1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2015 Pluribus Networks Inc. 41 * Copyright 2019 Joyent, Inc. 42 * Copyright 2023 Oxide Computer Company 43 */ 44 45 #ifndef _VMM_DEV_H_ 46 #define _VMM_DEV_H_ 47 48 #include <machine/vmm.h> 49 50 #include <sys/param.h> 51 #include <sys/cpuset.h> 52 #include <sys/vmm_data.h> 53 54 struct vm_create_req { 55 char name[VM_MAX_NAMELEN]; 56 uint64_t flags; 57 }; 58 59 60 struct vm_destroy_req { 61 char name[VM_MAX_NAMELEN]; 62 }; 63 64 struct vm_memmap { 65 vm_paddr_t gpa; 66 int segid; /* memory segment */ 67 vm_ooffset_t segoff; /* offset into memory segment */ 68 size_t len; /* mmap length */ 69 int prot; /* RWX */ 70 int flags; 71 }; 72 #define VM_MEMMAP_F_WIRED 0x01 73 #define VM_MEMMAP_F_IOMMU 0x02 74 75 struct vm_munmap { 76 vm_paddr_t gpa; 77 size_t len; 78 }; 79 80 #define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) 81 struct vm_memseg { 82 int segid; 83 size_t len; 84 char name[VM_MAX_SEG_NAMELEN]; 85 }; 86 87 struct vm_register { 88 int cpuid; 89 int regnum; /* enum vm_reg_name */ 90 uint64_t regval; 91 }; 92 93 struct vm_seg_desc { /* data or code segment */ 94 int cpuid; 95 int regnum; /* enum vm_reg_name */ 96 struct seg_desc desc; 97 }; 98 99 struct vm_register_set { 100 int cpuid; 101 unsigned int count; 102 const int *regnums; /* enum vm_reg_name */ 103 uint64_t *regvals; 104 }; 105 106 struct vm_exception { 107 int cpuid; 108 int vector; 109 uint32_t error_code; 110 int error_code_valid; 111 int restart_instruction; 112 }; 113 114 struct vm_lapic_msi { 115 uint64_t msg; 116 uint64_t addr; 117 }; 118 119 struct vm_lapic_irq { 120 int cpuid; 121 int vector; 122 }; 123 124 struct vm_ioapic_irq { 125 int irq; 126 }; 127 128 struct vm_isa_irq { 129 int atpic_irq; 130 int ioapic_irq; 131 }; 132 133 struct vm_isa_irq_trigger { 134 int atpic_irq; 135 enum vm_intr_trigger trigger; 136 }; 137 138 struct vm_capability { 139 int cpuid; 140 enum vm_cap_type captype; 141 int capval; 142 int allcpus; 143 }; 144 145 struct vm_pptdev { 146 int pptfd; 147 }; 148 149 struct vm_pptdev_mmio { 150 int pptfd; 151 vm_paddr_t gpa; 152 vm_paddr_t hpa; 153 size_t len; 154 }; 155 156 struct vm_pptdev_msi { 157 int vcpu; 158 int pptfd; 159 int numvec; /* 0 means disabled */ 160 uint64_t msg; 161 uint64_t addr; 162 }; 163 164 struct vm_pptdev_msix { 165 int vcpu; 166 int pptfd; 167 int idx; 168 uint64_t msg; 169 uint32_t vector_control; 170 uint64_t addr; 171 }; 172 173 struct vm_pptdev_limits { 174 int pptfd; 175 int msi_limit; 176 int msix_limit; 177 }; 178 179 struct vm_nmi { 180 int cpuid; 181 }; 182 183 #define MAX_VM_STATS 64 184 185 struct vm_stats { 186 int cpuid; /* in */ 187 int index; /* in */ 188 int num_entries; /* out */ 189 struct timeval tv; 190 uint64_t statbuf[MAX_VM_STATS]; 191 }; 192 193 struct vm_stat_desc { 194 int index; /* in */ 195 char desc[128]; /* out */ 196 }; 197 198 struct vm_x2apic { 199 int cpuid; 200 enum x2apic_state state; 201 }; 202 203 struct vm_gpa_pte { 204 uint64_t gpa; /* in */ 205 uint64_t pte[4]; /* out */ 206 int ptenum; 207 }; 208 209 struct vm_hpet_cap { 210 uint32_t capabilities; /* lower 32 bits of HPET capabilities */ 211 }; 212 213 struct vm_suspend { 214 enum vm_suspend_how how; 215 }; 216 217 #define VM_REINIT_F_FORCE_SUSPEND (1 << 0) 218 219 struct vm_reinit { 220 uint64_t flags; 221 }; 222 223 struct vm_gla2gpa { 224 int vcpuid; /* inputs */ 225 int prot; /* PROT_READ or PROT_WRITE */ 226 uint64_t gla; 227 struct vm_guest_paging paging; 228 int fault; /* outputs */ 229 uint64_t gpa; 230 }; 231 232 struct vm_activate_cpu { 233 int vcpuid; 234 }; 235 236 struct vm_cpuset { 237 int which; 238 int cpusetsize; 239 #ifndef _KERNEL 240 cpuset_t *cpus; 241 #else 242 void *cpus; 243 #endif 244 }; 245 #define VM_ACTIVE_CPUS 0 246 #define VM_SUSPENDED_CPUS 1 247 #define VM_DEBUG_CPUS 2 248 249 struct vm_intinfo { 250 int vcpuid; 251 uint64_t info1; 252 uint64_t info2; 253 }; 254 255 struct vm_rtc_time { 256 time_t secs; 257 }; 258 259 struct vm_rtc_data { 260 int offset; 261 uint8_t value; 262 }; 263 264 struct vm_devmem_offset { 265 int segid; 266 off_t offset; 267 }; 268 269 struct vm_cpu_topology { 270 uint16_t sockets; 271 uint16_t cores; 272 uint16_t threads; 273 uint16_t maxcpus; 274 }; 275 276 struct vm_readwrite_kernemu_device { 277 int vcpuid; 278 unsigned access_width : 3; 279 unsigned _unused : 29; 280 uint64_t gpa; 281 uint64_t value; 282 }; 283 _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); 284 285 enum vcpu_reset_kind { 286 VRK_RESET = 0, 287 /* 288 * The reset performed by an INIT IPI clears much of the CPU state, but 289 * some portions are left untouched, unlike VRK_RESET, which represents 290 * a "full" reset as if the system was freshly powered on. 291 */ 292 VRK_INIT = 1, 293 }; 294 295 struct vm_vcpu_reset { 296 int vcpuid; 297 uint32_t kind; /* contains: enum vcpu_reset_kind */ 298 }; 299 300 struct vm_run_state { 301 int vcpuid; 302 uint32_t state; /* of enum cpu_init_status type */ 303 uint8_t sipi_vector; /* vector of SIPI, if any */ 304 uint8_t _pad[3]; 305 }; 306 307 /* Transfer data for VM_GET_FPU and VM_SET_FPU */ 308 struct vm_fpu_state { 309 int vcpuid; 310 void *buf; 311 size_t len; 312 }; 313 314 struct vm_fpu_desc_entry { 315 uint64_t vfde_feature; 316 uint32_t vfde_size; 317 uint32_t vfde_off; 318 }; 319 320 struct vm_fpu_desc { 321 struct vm_fpu_desc_entry *vfd_entry_data; 322 size_t vfd_req_size; 323 uint32_t vfd_num_entries; 324 }; 325 326 struct vmm_resv_query { 327 size_t vrq_free_sz; 328 size_t vrq_alloc_sz; 329 size_t vrq_alloc_transient_sz; 330 size_t vrq_limit; 331 }; 332 333 struct vmm_resv_target { 334 /* Target size for VMM reservoir */ 335 size_t vrt_target_sz; 336 337 /* 338 * Change of reservoir size to meet target will be done in multiple 339 * steps of chunk size (or smaller) 340 */ 341 size_t vrt_chunk_sz; 342 343 /* 344 * Resultant size of reservoir after operation. Should match target 345 * size, except when interrupted. 346 */ 347 size_t vrt_result_sz; 348 }; 349 350 /* 351 * struct vmm_dirty_tracker is used for tracking dirty guest pages during 352 * e.g. live migration. 353 * 354 * - The `vdt_start_gpa` field specifies the offset from the beginning of 355 * guest physical memory to track; 356 * - `vdt_pfns` points to a bit vector indexed by guest PFN relative to the 357 * given start address. Each bit indicates whether the given guest page 358 * is dirty or not. 359 * - `vdt_pfns_len` specifies the length of the of the guest physical memory 360 * region in bytes. It also de facto bounds the range of guest addresses 361 * we will examine on any one `VM_TRACK_DIRTY_PAGES` ioctl(). If the 362 * range of the bit vector spans an unallocated region (or extends beyond 363 * the end of the guest physical address space) the corresponding bits in 364 * `vdt_pfns` will be zeroed. 365 */ 366 struct vmm_dirty_tracker { 367 uint64_t vdt_start_gpa; 368 size_t vdt_len; /* length of region */ 369 void *vdt_pfns; /* bit vector of dirty bits */ 370 }; 371 372 /* Current (arbitrary) max length for vm_data_xfer */ 373 #define VM_DATA_XFER_LIMIT 8192 374 375 #define VDX_FLAG_READ_COPYIN (1 << 0) 376 #define VDX_FLAG_WRITE_COPYOUT (1 << 1) 377 378 #define VDX_FLAGS_VALID (VDX_FLAG_READ_COPYIN | VDX_FLAG_WRITE_COPYOUT) 379 380 struct vm_data_xfer { 381 int vdx_vcpuid; 382 uint16_t vdx_class; 383 uint16_t vdx_version; 384 uint32_t vdx_flags; 385 uint32_t vdx_len; 386 uint32_t vdx_result_len; 387 void *vdx_data; 388 }; 389 390 struct vm_vcpu_cpuid_config { 391 int vvcc_vcpuid; 392 uint32_t vvcc_flags; 393 uint32_t vvcc_nent; 394 uint32_t _pad; 395 void *vvcc_entries; 396 }; 397 398 /* Query the computed legacy cpuid value for a vcpuid with VM_LEGACY_CPUID */ 399 struct vm_legacy_cpuid { 400 int vlc_vcpuid; 401 uint32_t vlc_eax; 402 uint32_t vlc_ebx; 403 uint32_t vlc_ecx; 404 uint32_t vlc_edx; 405 }; 406 407 /* 408 * VMM Interface Version 409 * 410 * Despite the fact that the kernel interface to bhyve is explicitly considered 411 * Private, there are out-of-gate consumers which utilize it. While they assume 412 * the risk of any breakage incurred by changes to bhyve, we can at least try to 413 * make it easier to detect changes by exposing a "version" of the interface. 414 * It can also be used by the in-gate userland to detect if packaging updates 415 * somehow result in the userland and kernel falling out of sync. 416 * 417 * There are no established criteria for the magnitude of change which requires 418 * this version to be incremented, and maintenance of it is considered a 419 * best-effort activity. Nothing is to be inferred about the magnitude of a 420 * change when the version is modified. It follows no rules like semver. 421 */ 422 #define VMM_CURRENT_INTERFACE_VERSION 10 423 424 425 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) 426 #define VMM_IOC_BASE (('v' << 16) | ('m' << 8)) 427 #define VMM_LOCK_IOC_BASE (('v' << 16) | ('l' << 8)) 428 #define VMM_CPU_IOC_BASE (('v' << 16) | ('p' << 8)) 429 430 /* Operations performed on the vmmctl device */ 431 #define VMM_CREATE_VM (VMMCTL_IOC_BASE | 0x01) 432 #define VMM_DESTROY_VM (VMMCTL_IOC_BASE | 0x02) 433 #define VMM_VM_SUPPORTED (VMMCTL_IOC_BASE | 0x03) 434 #define VMM_INTERFACE_VERSION (VMMCTL_IOC_BASE | 0x04) 435 #define VMM_CHECK_IOMMU (VMMCTL_IOC_BASE | 0x05) 436 437 #define VMM_RESV_QUERY (VMMCTL_IOC_BASE | 0x10) 438 #define VMM_RESV_SET_TARGET (VMMCTL_IOC_BASE | 0x11) 439 440 /* Operations performed in the context of a given vCPU */ 441 #define VM_RUN (VMM_CPU_IOC_BASE | 0x01) 442 #define VM_SET_REGISTER (VMM_CPU_IOC_BASE | 0x02) 443 #define VM_GET_REGISTER (VMM_CPU_IOC_BASE | 0x03) 444 #define VM_SET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x04) 445 #define VM_GET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x05) 446 #define VM_SET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x06) 447 #define VM_GET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x07) 448 #define VM_INJECT_EXCEPTION (VMM_CPU_IOC_BASE | 0x08) 449 #define VM_SET_CAPABILITY (VMM_CPU_IOC_BASE | 0x09) 450 #define VM_GET_CAPABILITY (VMM_CPU_IOC_BASE | 0x0a) 451 #define VM_PPTDEV_MSI (VMM_CPU_IOC_BASE | 0x0b) 452 #define VM_PPTDEV_MSIX (VMM_CPU_IOC_BASE | 0x0c) 453 #define VM_SET_X2APIC_STATE (VMM_CPU_IOC_BASE | 0x0d) 454 #define VM_GLA2GPA (VMM_CPU_IOC_BASE | 0x0e) 455 #define VM_GLA2GPA_NOFAULT (VMM_CPU_IOC_BASE | 0x0f) 456 #define VM_ACTIVATE_CPU (VMM_CPU_IOC_BASE | 0x10) 457 #define VM_SET_INTINFO (VMM_CPU_IOC_BASE | 0x11) 458 #define VM_GET_INTINFO (VMM_CPU_IOC_BASE | 0x12) 459 #define VM_RESTART_INSTRUCTION (VMM_CPU_IOC_BASE | 0x13) 460 #define VM_SET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x14) 461 #define VM_GET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x15) 462 #define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16) 463 #define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17) 464 #define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18) 465 #define VM_GET_FPU (VMM_CPU_IOC_BASE | 0x19) 466 #define VM_SET_FPU (VMM_CPU_IOC_BASE | 0x1a) 467 #define VM_GET_CPUID (VMM_CPU_IOC_BASE | 0x1b) 468 #define VM_SET_CPUID (VMM_CPU_IOC_BASE | 0x1c) 469 #define VM_LEGACY_CPUID (VMM_CPU_IOC_BASE | 0x1d) 470 471 /* Operations requiring write-locking the VM */ 472 #define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01) 473 #define VM_BIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x02) 474 #define VM_UNBIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x03) 475 #define VM_MAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x04) 476 #define VM_ALLOC_MEMSEG (VMM_LOCK_IOC_BASE | 0x05) 477 #define VM_MMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x06) 478 #define VM_PMTMR_LOCATE (VMM_LOCK_IOC_BASE | 0x07) 479 #define VM_MUNMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x08) 480 #define VM_UNMAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x09) 481 #define VM_PAUSE (VMM_LOCK_IOC_BASE | 0x0a) 482 #define VM_RESUME (VMM_LOCK_IOC_BASE | 0x0b) 483 484 #define VM_WRLOCK_CYCLE (VMM_LOCK_IOC_BASE | 0xff) 485 486 /* All other ioctls */ 487 #define VM_GET_GPA_PMAP (VMM_IOC_BASE | 0x01) 488 #define VM_GET_MEMSEG (VMM_IOC_BASE | 0x02) 489 #define VM_MMAP_GETNEXT (VMM_IOC_BASE | 0x03) 490 491 #define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04) 492 #define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05) 493 #define VM_LAPIC_MSI (VMM_IOC_BASE | 0x06) 494 495 #define VM_IOAPIC_ASSERT_IRQ (VMM_IOC_BASE | 0x07) 496 #define VM_IOAPIC_DEASSERT_IRQ (VMM_IOC_BASE | 0x08) 497 #define VM_IOAPIC_PULSE_IRQ (VMM_IOC_BASE | 0x09) 498 499 #define VM_ISA_ASSERT_IRQ (VMM_IOC_BASE | 0x0a) 500 #define VM_ISA_DEASSERT_IRQ (VMM_IOC_BASE | 0x0b) 501 #define VM_ISA_PULSE_IRQ (VMM_IOC_BASE | 0x0c) 502 #define VM_ISA_SET_IRQ_TRIGGER (VMM_IOC_BASE | 0x0d) 503 504 #define VM_RTC_WRITE (VMM_IOC_BASE | 0x0e) 505 #define VM_RTC_READ (VMM_IOC_BASE | 0x0f) 506 #define VM_RTC_SETTIME (VMM_IOC_BASE | 0x10) 507 #define VM_RTC_GETTIME (VMM_IOC_BASE | 0x11) 508 509 #define VM_SUSPEND (VMM_IOC_BASE | 0x12) 510 511 #define VM_IOAPIC_PINCOUNT (VMM_IOC_BASE | 0x13) 512 #define VM_GET_PPTDEV_LIMITS (VMM_IOC_BASE | 0x14) 513 #define VM_GET_HPET_CAPABILITIES (VMM_IOC_BASE | 0x15) 514 515 #define VM_STATS_IOC (VMM_IOC_BASE | 0x16) 516 #define VM_STAT_DESC (VMM_IOC_BASE | 0x17) 517 518 #define VM_INJECT_NMI (VMM_IOC_BASE | 0x18) 519 #define VM_GET_X2APIC_STATE (VMM_IOC_BASE | 0x19) 520 #define VM_SET_TOPOLOGY (VMM_IOC_BASE | 0x1a) 521 #define VM_GET_TOPOLOGY (VMM_IOC_BASE | 0x1b) 522 #define VM_GET_CPUS (VMM_IOC_BASE | 0x1c) 523 #define VM_SUSPEND_CPU (VMM_IOC_BASE | 0x1d) 524 #define VM_RESUME_CPU (VMM_IOC_BASE | 0x1e) 525 526 #define VM_PPTDEV_DISABLE_MSIX (VMM_IOC_BASE | 0x1f) 527 528 /* Note: forces a barrier on a flush operation before returning. */ 529 #define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20) 530 #define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21) 531 532 #define VM_DATA_READ (VMM_IOC_BASE | 0x22) 533 #define VM_DATA_WRITE (VMM_IOC_BASE | 0x23) 534 535 #define VM_SET_AUTODESTRUCT (VMM_IOC_BASE | 0x24) 536 #define VM_DESTROY_SELF (VMM_IOC_BASE | 0x25) 537 #define VM_DESTROY_PENDING (VMM_IOC_BASE | 0x26) 538 539 #define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff) 540 541 #define VMM_CTL_DEV "/dev/vmmctl" 542 543 #endif 544