1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2015 Pluribus Networks Inc. 41 * Copyright 2019 Joyent, Inc. 42 * Copyright 2021 Oxide Computer Company 43 */ 44 45 #ifndef _VMM_H_ 46 #define _VMM_H_ 47 48 enum vm_suspend_how { 49 VM_SUSPEND_NONE, 50 VM_SUSPEND_RESET, 51 VM_SUSPEND_POWEROFF, 52 VM_SUSPEND_HALT, 53 VM_SUSPEND_TRIPLEFAULT, 54 VM_SUSPEND_LAST 55 }; 56 57 /* 58 * Identifiers for architecturally defined registers. 59 */ 60 enum vm_reg_name { 61 VM_REG_GUEST_RAX, 62 VM_REG_GUEST_RBX, 63 VM_REG_GUEST_RCX, 64 VM_REG_GUEST_RDX, 65 VM_REG_GUEST_RSI, 66 VM_REG_GUEST_RDI, 67 VM_REG_GUEST_RBP, 68 VM_REG_GUEST_R8, 69 VM_REG_GUEST_R9, 70 VM_REG_GUEST_R10, 71 VM_REG_GUEST_R11, 72 VM_REG_GUEST_R12, 73 VM_REG_GUEST_R13, 74 VM_REG_GUEST_R14, 75 VM_REG_GUEST_R15, 76 VM_REG_GUEST_CR0, 77 VM_REG_GUEST_CR3, 78 VM_REG_GUEST_CR4, 79 VM_REG_GUEST_DR7, 80 VM_REG_GUEST_RSP, 81 VM_REG_GUEST_RIP, 82 VM_REG_GUEST_RFLAGS, 83 VM_REG_GUEST_ES, 84 VM_REG_GUEST_CS, 85 VM_REG_GUEST_SS, 86 VM_REG_GUEST_DS, 87 VM_REG_GUEST_FS, 88 VM_REG_GUEST_GS, 89 VM_REG_GUEST_LDTR, 90 VM_REG_GUEST_TR, 91 VM_REG_GUEST_IDTR, 92 VM_REG_GUEST_GDTR, 93 VM_REG_GUEST_EFER, 94 VM_REG_GUEST_CR2, 95 VM_REG_GUEST_PDPTE0, 96 VM_REG_GUEST_PDPTE1, 97 VM_REG_GUEST_PDPTE2, 98 VM_REG_GUEST_PDPTE3, 99 VM_REG_GUEST_INTR_SHADOW, 100 VM_REG_GUEST_DR0, 101 VM_REG_GUEST_DR1, 102 VM_REG_GUEST_DR2, 103 VM_REG_GUEST_DR3, 104 VM_REG_GUEST_DR6, 105 VM_REG_GUEST_ENTRY_INST_LENGTH, 106 VM_REG_LAST 107 }; 108 109 enum x2apic_state { 110 X2APIC_DISABLED, 111 X2APIC_ENABLED, 112 X2APIC_STATE_LAST 113 }; 114 115 #define VM_INTINFO_VECTOR(info) ((info) & 0xff) 116 #define VM_INTINFO_DEL_ERRCODE 0x800 117 #define VM_INTINFO_RSVD 0x7ffff000 118 #define VM_INTINFO_VALID 0x80000000 119 #define VM_INTINFO_TYPE 0x700 120 #define VM_INTINFO_HWINTR (0 << 8) 121 #define VM_INTINFO_NMI (2 << 8) 122 #define VM_INTINFO_HWEXCEPTION (3 << 8) 123 #define VM_INTINFO_SWINTR (4 << 8) 124 125 /* 126 * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does. 127 * To simplify structure definitions, an arbitrary limit has been chosen. 128 * This same limit is used for memory segment names 129 */ 130 131 #define VM_MAX_NAMELEN 128 132 #define VM_MAX_SEG_NAMELEN 128 133 134 #define VM_MAXCPU 32 /* maximum virtual cpus */ 135 136 /* 137 * Identifiers for optional vmm capabilities 138 */ 139 enum vm_cap_type { 140 VM_CAP_HALT_EXIT, 141 VM_CAP_MTRAP_EXIT, 142 VM_CAP_PAUSE_EXIT, 143 VM_CAP_ENABLE_INVPCID, 144 VM_CAP_BPT_EXIT, 145 VM_CAP_MAX 146 }; 147 148 enum vmx_caps { 149 VMX_CAP_NONE = 0, 150 VMX_CAP_TPR_SHADOW = (1UL << 0), 151 VMX_CAP_APICV = (1UL << 1), 152 VMX_CAP_APICV_X2APIC = (1UL << 2), 153 VMX_CAP_APICV_PIR = (1UL << 3), 154 }; 155 156 enum vm_intr_trigger { 157 EDGE_TRIGGER, 158 LEVEL_TRIGGER 159 }; 160 161 /* 162 * The 'access' field has the format specified in Table 21-2 of the Intel 163 * Architecture Manual vol 3b. 164 * 165 * XXX The contents of the 'access' field are architecturally defined except 166 * bit 16 - Segment Unusable. 167 */ 168 struct seg_desc { 169 uint64_t base; 170 uint32_t limit; 171 uint32_t access; 172 }; 173 #define SEG_DESC_TYPE(access) ((access) & 0x001f) 174 #define SEG_DESC_DPL(access) (((access) >> 5) & 0x3) 175 #define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0) 176 #define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0) 177 #define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) 178 #define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) 179 180 enum vm_cpu_mode { 181 CPU_MODE_REAL, 182 CPU_MODE_PROTECTED, 183 CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ 184 CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ 185 }; 186 187 enum vm_paging_mode { 188 PAGING_MODE_FLAT, 189 PAGING_MODE_32, 190 PAGING_MODE_PAE, 191 PAGING_MODE_64, 192 }; 193 194 struct vm_guest_paging { 195 uint64_t cr3; 196 int cpl; 197 enum vm_cpu_mode cpu_mode; 198 enum vm_paging_mode paging_mode; 199 }; 200 201 enum vm_exitcode { 202 VM_EXITCODE_INOUT, 203 VM_EXITCODE_VMX, 204 VM_EXITCODE_BOGUS, 205 VM_EXITCODE_RDMSR, 206 VM_EXITCODE_WRMSR, 207 VM_EXITCODE_HLT, 208 VM_EXITCODE_MTRAP, 209 VM_EXITCODE_PAUSE, 210 VM_EXITCODE_PAGING, 211 VM_EXITCODE_INST_EMUL, 212 VM_EXITCODE_RUN_STATE, 213 VM_EXITCODE_MMIO_EMUL, 214 VM_EXITCODE_DEPRECATED, /* formerly RUNBLOCK */ 215 VM_EXITCODE_IOAPIC_EOI, 216 VM_EXITCODE_SUSPENDED, 217 VM_EXITCODE_MMIO, 218 VM_EXITCODE_TASK_SWITCH, 219 VM_EXITCODE_MONITOR, 220 VM_EXITCODE_MWAIT, 221 VM_EXITCODE_SVM, 222 VM_EXITCODE_REQIDLE, 223 VM_EXITCODE_DEBUG, 224 VM_EXITCODE_VMINSN, 225 VM_EXITCODE_BPT, 226 VM_EXITCODE_HT, 227 VM_EXITCODE_MAX 228 }; 229 230 enum inout_flags { 231 INOUT_IN = (1U << 0), /* direction: 'in' when set, else 'out' */ 232 233 /* 234 * The following flags are used only for in-kernel emulation logic and 235 * are not exposed to userspace. 236 */ 237 INOUT_STR = (1U << 1), /* ins/outs operation */ 238 INOUT_REP = (1U << 2), /* 'rep' prefix present on instruction */ 239 }; 240 241 struct vm_inout { 242 uint32_t eax; 243 uint16_t port; 244 uint8_t bytes; /* 1 or 2 or 4 */ 245 uint8_t flags; /* see: inout_flags */ 246 247 /* 248 * The address size and segment are relevant to INS/OUTS operations. 249 * Userspace is not concerned with them since the in-kernel emulation 250 * handles those specific aspects. 251 */ 252 uint8_t addrsize; 253 uint8_t segment; 254 }; 255 256 struct vm_mmio { 257 uint8_t bytes; /* 1/2/4/8 bytes */ 258 uint8_t read; /* read: 1, write: 0 */ 259 uint16_t _pad[3]; 260 uint64_t gpa; 261 uint64_t data; 262 }; 263 264 enum task_switch_reason { 265 TSR_CALL, 266 TSR_IRET, 267 TSR_JMP, 268 TSR_IDT_GATE, /* task gate in IDT */ 269 }; 270 271 struct vm_task_switch { 272 uint16_t tsssel; /* new TSS selector */ 273 int ext; /* task switch due to external event */ 274 uint32_t errcode; 275 int errcode_valid; /* push 'errcode' on the new stack */ 276 enum task_switch_reason reason; 277 struct vm_guest_paging paging; 278 }; 279 280 enum vcpu_run_state { 281 VRS_HALT = 0, 282 VRS_INIT = (1 << 0), 283 VRS_RUN = (1 << 1), 284 285 VRS_PEND_INIT = (1 << 14), 286 VRS_PEND_SIPI = (1 << 15), 287 }; 288 #define VRS_MASK_VALID(v) \ 289 ((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI)) 290 #define VRS_IS_VALID(v) ((v) == VRS_MASK_VALID(v)) 291 292 struct vm_exit { 293 enum vm_exitcode exitcode; 294 int inst_length; /* 0 means unknown */ 295 uint64_t rip; 296 union { 297 struct vm_inout inout; 298 struct vm_mmio mmio; 299 struct { 300 uint64_t gpa; 301 int fault_type; 302 } paging; 303 /* 304 * Kernel-internal MMIO decoding and emulation. 305 * Userspace should not expect to see this, but rather a 306 * VM_EXITCODE_MMIO with the above 'mmio' context. 307 */ 308 struct { 309 uint64_t gpa; 310 uint64_t gla; 311 uint64_t cs_base; 312 int cs_d; /* CS.D */ 313 } mmio_emul; 314 struct { 315 uint8_t inst[15]; 316 uint8_t num_valid; 317 } inst_emul; 318 /* 319 * VMX specific payload. Used when there is no "better" 320 * exitcode to represent the VM-exit. 321 */ 322 struct { 323 int status; /* vmx inst status */ 324 /* 325 * 'exit_reason' and 'exit_qualification' are valid 326 * only if 'status' is zero. 327 */ 328 uint32_t exit_reason; 329 uint64_t exit_qualification; 330 /* 331 * 'inst_error' and 'inst_type' are valid 332 * only if 'status' is non-zero. 333 */ 334 int inst_type; 335 int inst_error; 336 } vmx; 337 /* 338 * SVM specific payload. 339 */ 340 struct { 341 uint64_t exitcode; 342 uint64_t exitinfo1; 343 uint64_t exitinfo2; 344 } svm; 345 struct { 346 int inst_length; 347 } bpt; 348 struct { 349 uint32_t code; /* ecx value */ 350 uint64_t wval; 351 } msr; 352 struct { 353 uint64_t rflags; 354 } hlt; 355 struct { 356 int vector; 357 } ioapic_eoi; 358 struct { 359 enum vm_suspend_how how; 360 } suspended; 361 struct vm_task_switch task_switch; 362 } u; 363 }; 364 365 enum vm_entry_cmds { 366 VEC_DEFAULT = 0, 367 VEC_DISCARD_INSTR, /* discard inst emul state */ 368 VEC_FULFILL_MMIO, /* entry includes result for mmio emul */ 369 VEC_FULFILL_INOUT, /* entry includes result for inout emul */ 370 }; 371 372 struct vm_entry { 373 int cpuid; 374 uint_t cmd; /* see: vm_entry_cmds */ 375 void *exit_data; 376 union { 377 struct vm_inout inout; 378 struct vm_mmio mmio; 379 } u; 380 }; 381 382 int vm_restart_instruction(void *vm, int vcpuid); 383 384 enum vm_create_flags { 385 /* 386 * Allocate guest memory segments from existing reservoir capacity, 387 * rather than attempting to create transient allocations. 388 */ 389 VCF_RESERVOIR_MEM = (1 << 0), 390 }; 391 392 #endif /* _VMM_H_ */ 393