1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #ifndef _VMM_H_ 30 #define _VMM_H_ 31 32 enum vm_suspend_how { 33 VM_SUSPEND_NONE, 34 VM_SUSPEND_RESET, 35 VM_SUSPEND_POWEROFF, 36 VM_SUSPEND_HALT, 37 VM_SUSPEND_LAST 38 }; 39 40 /* 41 * Identifiers for architecturally defined registers. 42 */ 43 enum vm_reg_name { 44 VM_REG_GUEST_RAX, 45 VM_REG_GUEST_RBX, 46 VM_REG_GUEST_RCX, 47 VM_REG_GUEST_RDX, 48 VM_REG_GUEST_RSI, 49 VM_REG_GUEST_RDI, 50 VM_REG_GUEST_RBP, 51 VM_REG_GUEST_R8, 52 VM_REG_GUEST_R9, 53 VM_REG_GUEST_R10, 54 VM_REG_GUEST_R11, 55 VM_REG_GUEST_R12, 56 VM_REG_GUEST_R13, 57 VM_REG_GUEST_R14, 58 VM_REG_GUEST_R15, 59 VM_REG_GUEST_CR0, 60 VM_REG_GUEST_CR3, 61 VM_REG_GUEST_CR4, 62 VM_REG_GUEST_DR7, 63 VM_REG_GUEST_RSP, 64 VM_REG_GUEST_RIP, 65 VM_REG_GUEST_RFLAGS, 66 VM_REG_GUEST_ES, 67 VM_REG_GUEST_CS, 68 VM_REG_GUEST_SS, 69 VM_REG_GUEST_DS, 70 VM_REG_GUEST_FS, 71 VM_REG_GUEST_GS, 72 VM_REG_GUEST_LDTR, 73 VM_REG_GUEST_TR, 74 VM_REG_GUEST_IDTR, 75 VM_REG_GUEST_GDTR, 76 VM_REG_GUEST_EFER, 77 VM_REG_GUEST_CR2, 78 VM_REG_LAST 79 }; 80 81 enum x2apic_state { 82 X2APIC_DISABLED, 83 X2APIC_ENABLED, 84 X2APIC_STATE_LAST 85 }; 86 87 #ifdef _KERNEL 88 89 #define VM_MAX_NAMELEN 32 90 91 struct vm; 92 struct vm_exception; 93 struct vm_memory_segment; 94 struct seg_desc; 95 struct vm_exit; 96 struct vm_run; 97 struct vhpet; 98 struct vioapic; 99 struct vlapic; 100 struct vmspace; 101 struct vm_object; 102 struct pmap; 103 104 typedef int (*vmm_init_func_t)(int ipinum); 105 typedef int (*vmm_cleanup_func_t)(void); 106 typedef void (*vmm_resume_func_t)(void); 107 typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); 108 typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, 109 struct pmap *pmap, void *rendezvous_cookie, 110 void *suspend_cookie); 111 typedef void (*vmi_cleanup_func_t)(void *vmi); 112 typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, 113 uint64_t *retval); 114 typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num, 115 uint64_t val); 116 typedef int (*vmi_get_desc_t)(void *vmi, int vcpu, int num, 117 struct seg_desc *desc); 118 typedef int (*vmi_set_desc_t)(void *vmi, int vcpu, int num, 119 struct seg_desc *desc); 120 typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval); 121 typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val); 122 typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max); 123 typedef void (*vmi_vmspace_free)(struct vmspace *vmspace); 124 typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu); 125 typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic); 126 127 struct vmm_ops { 128 vmm_init_func_t init; /* module wide initialization */ 129 vmm_cleanup_func_t cleanup; 130 vmm_resume_func_t resume; 131 132 vmi_init_func_t vminit; /* vm-specific initialization */ 133 vmi_run_func_t vmrun; 134 vmi_cleanup_func_t vmcleanup; 135 vmi_get_register_t vmgetreg; 136 vmi_set_register_t vmsetreg; 137 vmi_get_desc_t vmgetdesc; 138 vmi_set_desc_t vmsetdesc; 139 vmi_get_cap_t vmgetcap; 140 vmi_set_cap_t vmsetcap; 141 vmi_vmspace_alloc vmspace_alloc; 142 vmi_vmspace_free vmspace_free; 143 vmi_vlapic_init vlapic_init; 144 vmi_vlapic_cleanup vlapic_cleanup; 145 }; 146 147 extern struct vmm_ops vmm_ops_intel; 148 extern struct vmm_ops vmm_ops_amd; 149 150 int vm_create(const char *name, struct vm **retvm); 151 void vm_destroy(struct vm *vm); 152 int vm_reinit(struct vm *vm); 153 const char *vm_name(struct vm *vm); 154 int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len); 155 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); 156 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len); 157 void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot, 158 void **cookie); 159 void vm_gpa_release(void *cookie); 160 int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 161 struct vm_memory_segment *seg); 162 int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 163 vm_offset_t *offset, struct vm_object **object); 164 boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa); 165 int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval); 166 int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val); 167 int vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 168 struct seg_desc *ret_desc); 169 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 170 struct seg_desc *desc); 171 int vm_run(struct vm *vm, struct vm_run *vmrun); 172 int vm_suspend(struct vm *vm, enum vm_suspend_how how); 173 int vm_inject_nmi(struct vm *vm, int vcpu); 174 int vm_nmi_pending(struct vm *vm, int vcpuid); 175 void vm_nmi_clear(struct vm *vm, int vcpuid); 176 int vm_inject_extint(struct vm *vm, int vcpu); 177 int vm_extint_pending(struct vm *vm, int vcpuid); 178 void vm_extint_clear(struct vm *vm, int vcpuid); 179 uint64_t *vm_guest_msrs(struct vm *vm, int cpu); 180 struct vlapic *vm_lapic(struct vm *vm, int cpu); 181 struct vioapic *vm_ioapic(struct vm *vm); 182 struct vhpet *vm_hpet(struct vm *vm); 183 int vm_get_capability(struct vm *vm, int vcpu, int type, int *val); 184 int vm_set_capability(struct vm *vm, int vcpu, int type, int val); 185 int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state); 186 int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state); 187 int vm_apicid2vcpuid(struct vm *vm, int apicid); 188 int vm_activate_cpu(struct vm *vm, int vcpu); 189 cpuset_t vm_active_cpus(struct vm *vm); 190 cpuset_t vm_suspended_cpus(struct vm *vm); 191 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); 192 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); 193 void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); 194 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); 195 196 /* 197 * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'. 198 * The rendezvous 'func(arg)' is not allowed to do anything that will 199 * cause the thread to be put to sleep. 200 * 201 * If the rendezvous is being initiated from a vcpu context then the 202 * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1. 203 * 204 * The caller cannot hold any locks when initiating the rendezvous. 205 * 206 * The implementation of this API may cause vcpus other than those specified 207 * by 'dest' to be stalled. The caller should not rely on any vcpus making 208 * forward progress when the rendezvous is in progress. 209 */ 210 typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg); 211 void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 212 vm_rendezvous_func_t func, void *arg); 213 214 static __inline int 215 vcpu_rendezvous_pending(void *rendezvous_cookie) 216 { 217 218 return (*(uintptr_t *)rendezvous_cookie != 0); 219 } 220 221 static __inline int 222 vcpu_suspended(void *suspend_cookie) 223 { 224 225 return (*(int *)suspend_cookie); 226 } 227 228 /* 229 * Return 1 if device indicated by bus/slot/func is supposed to be a 230 * pci passthrough device. 231 * 232 * Return 0 otherwise. 233 */ 234 int vmm_is_pptdev(int bus, int slot, int func); 235 236 void *vm_iommu_domain(struct vm *vm); 237 238 enum vcpu_state { 239 VCPU_IDLE, 240 VCPU_FROZEN, 241 VCPU_RUNNING, 242 VCPU_SLEEPING, 243 }; 244 245 int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state, 246 bool from_idle); 247 enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu); 248 249 static int __inline 250 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu) 251 { 252 return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING); 253 } 254 255 void *vcpu_stats(struct vm *vm, int vcpu); 256 void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr); 257 struct vmspace *vm_get_vmspace(struct vm *vm); 258 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); 259 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); 260 struct vatpic *vm_atpic(struct vm *vm); 261 struct vatpit *vm_atpit(struct vm *vm); 262 263 /* 264 * Inject exception 'vme' into the guest vcpu. This function returns 0 on 265 * success and non-zero on failure. 266 * 267 * Wrapper functions like 'vm_inject_gp()' should be preferred to calling 268 * this function directly because they enforce the trap-like or fault-like 269 * behavior of an exception. 270 * 271 * This function should only be called in the context of the thread that is 272 * executing this vcpu. 273 */ 274 int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme); 275 276 /* 277 * Returns 0 if there is no exception pending for this vcpu. Returns 1 if an 278 * exception is pending and also updates 'vme'. The pending exception is 279 * cleared when this function returns. 280 * 281 * This function should only be called in the context of the thread that is 282 * executing this vcpu. 283 */ 284 int vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *vme); 285 286 void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ 287 void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ 288 void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2); 289 290 enum vm_reg_name vm_segment_name(int seg_encoding); 291 292 #endif /* KERNEL */ 293 294 #define VM_MAXCPU 16 /* maximum virtual cpus */ 295 296 /* 297 * Identifiers for optional vmm capabilities 298 */ 299 enum vm_cap_type { 300 VM_CAP_HALT_EXIT, 301 VM_CAP_MTRAP_EXIT, 302 VM_CAP_PAUSE_EXIT, 303 VM_CAP_UNRESTRICTED_GUEST, 304 VM_CAP_ENABLE_INVPCID, 305 VM_CAP_MAX 306 }; 307 308 enum vm_intr_trigger { 309 EDGE_TRIGGER, 310 LEVEL_TRIGGER 311 }; 312 313 /* 314 * The 'access' field has the format specified in Table 21-2 of the Intel 315 * Architecture Manual vol 3b. 316 * 317 * XXX The contents of the 'access' field are architecturally defined except 318 * bit 16 - Segment Unusable. 319 */ 320 struct seg_desc { 321 uint64_t base; 322 uint32_t limit; 323 uint32_t access; 324 }; 325 #define SEG_DESC_TYPE(desc) ((desc)->access & 0x001f) 326 #define SEG_DESC_PRESENT(desc) ((desc)->access & 0x0080) 327 #define SEG_DESC_DEF32(desc) ((desc)->access & 0x4000) 328 #define SEG_DESC_GRANULARITY(desc) ((desc)->access & 0x8000) 329 #define SEG_DESC_UNUSABLE(desc) ((desc)->access & 0x10000) 330 331 enum vm_cpu_mode { 332 CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ 333 CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ 334 }; 335 336 enum vm_paging_mode { 337 PAGING_MODE_FLAT, 338 PAGING_MODE_32, 339 PAGING_MODE_PAE, 340 PAGING_MODE_64, 341 }; 342 343 struct vm_guest_paging { 344 uint64_t cr3; 345 int cpl; 346 enum vm_cpu_mode cpu_mode; 347 enum vm_paging_mode paging_mode; 348 }; 349 350 /* 351 * The data structures 'vie' and 'vie_op' are meant to be opaque to the 352 * consumers of instruction decoding. The only reason why their contents 353 * need to be exposed is because they are part of the 'vm_exit' structure. 354 */ 355 struct vie_op { 356 uint8_t op_byte; /* actual opcode byte */ 357 uint8_t op_type; /* type of operation (e.g. MOV) */ 358 uint16_t op_flags; 359 }; 360 361 #define VIE_INST_SIZE 15 362 struct vie { 363 uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ 364 uint8_t num_valid; /* size of the instruction */ 365 uint8_t num_processed; 366 367 uint8_t rex_w:1, /* REX prefix */ 368 rex_r:1, 369 rex_x:1, 370 rex_b:1, 371 rex_present:1; 372 373 uint8_t mod:2, /* ModRM byte */ 374 reg:4, 375 rm:4; 376 377 uint8_t ss:2, /* SIB byte */ 378 index:4, 379 base:4; 380 381 uint8_t disp_bytes; 382 uint8_t imm_bytes; 383 384 uint8_t scale; 385 int base_register; /* VM_REG_GUEST_xyz */ 386 int index_register; /* VM_REG_GUEST_xyz */ 387 388 int64_t displacement; /* optional addr displacement */ 389 int64_t immediate; /* optional immediate operand */ 390 391 uint8_t decoded; /* set to 1 if successfully decoded */ 392 393 struct vie_op op; /* opcode description */ 394 }; 395 396 enum vm_exitcode { 397 VM_EXITCODE_INOUT, 398 VM_EXITCODE_VMX, 399 VM_EXITCODE_BOGUS, 400 VM_EXITCODE_RDMSR, 401 VM_EXITCODE_WRMSR, 402 VM_EXITCODE_HLT, 403 VM_EXITCODE_MTRAP, 404 VM_EXITCODE_PAUSE, 405 VM_EXITCODE_PAGING, 406 VM_EXITCODE_INST_EMUL, 407 VM_EXITCODE_SPINUP_AP, 408 VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */ 409 VM_EXITCODE_RENDEZVOUS, 410 VM_EXITCODE_IOAPIC_EOI, 411 VM_EXITCODE_SUSPENDED, 412 VM_EXITCODE_INOUT_STR, 413 VM_EXITCODE_MAX 414 }; 415 416 struct vm_inout { 417 uint16_t bytes:3; /* 1 or 2 or 4 */ 418 uint16_t in:1; 419 uint16_t string:1; 420 uint16_t rep:1; 421 uint16_t port; 422 uint32_t eax; /* valid for out */ 423 }; 424 425 struct vm_inout_str { 426 struct vm_inout inout; /* must be the first element */ 427 struct vm_guest_paging paging; 428 uint64_t rflags; 429 uint64_t cr0; 430 uint64_t index; 431 uint64_t count; /* rep=1 (%rcx), rep=0 (1) */ 432 int addrsize; 433 enum vm_reg_name seg_name; 434 struct seg_desc seg_desc; 435 }; 436 437 struct vm_exit { 438 enum vm_exitcode exitcode; 439 int inst_length; /* 0 means unknown */ 440 uint64_t rip; 441 union { 442 struct vm_inout inout; 443 struct vm_inout_str inout_str; 444 struct { 445 uint64_t gpa; 446 int fault_type; 447 } paging; 448 struct { 449 uint64_t gpa; 450 uint64_t gla; 451 struct vm_guest_paging paging; 452 struct vie vie; 453 } inst_emul; 454 /* 455 * VMX specific payload. Used when there is no "better" 456 * exitcode to represent the VM-exit. 457 */ 458 struct { 459 int status; /* vmx inst status */ 460 /* 461 * 'exit_reason' and 'exit_qualification' are valid 462 * only if 'status' is zero. 463 */ 464 uint32_t exit_reason; 465 uint64_t exit_qualification; 466 /* 467 * 'inst_error' and 'inst_type' are valid 468 * only if 'status' is non-zero. 469 */ 470 int inst_type; 471 int inst_error; 472 } vmx; 473 struct { 474 uint32_t code; /* ecx value */ 475 uint64_t wval; 476 } msr; 477 struct { 478 int vcpu; 479 uint64_t rip; 480 } spinup_ap; 481 struct { 482 uint64_t rflags; 483 } hlt; 484 struct { 485 int vector; 486 } ioapic_eoi; 487 struct { 488 enum vm_suspend_how how; 489 } suspended; 490 } u; 491 }; 492 493 #endif /* _VMM_H_ */ 494