1366f6083SPeter Grehan /*- 2366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc. 3366f6083SPeter Grehan * All rights reserved. 4366f6083SPeter Grehan * 5366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without 6366f6083SPeter Grehan * modification, are permitted provided that the following conditions 7366f6083SPeter Grehan * are met: 8366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright 9366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer. 10366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 11366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the 12366f6083SPeter Grehan * documentation and/or other materials provided with the distribution. 13366f6083SPeter Grehan * 14366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24366f6083SPeter Grehan * SUCH DAMAGE. 25366f6083SPeter Grehan * 26366f6083SPeter Grehan * $FreeBSD$ 27366f6083SPeter Grehan */ 28366f6083SPeter Grehan 29366f6083SPeter Grehan #include <sys/cdefs.h> 30366f6083SPeter Grehan __FBSDID("$FreeBSD$"); 31366f6083SPeter Grehan 32366f6083SPeter Grehan #include <sys/param.h> 3338f1b189SPeter Grehan #include <sys/systm.h> 34366f6083SPeter Grehan #include <sys/kernel.h> 35366f6083SPeter Grehan #include <sys/module.h> 36366f6083SPeter Grehan #include <sys/sysctl.h> 37366f6083SPeter Grehan #include <sys/malloc.h> 38366f6083SPeter Grehan #include <sys/pcpu.h> 39366f6083SPeter Grehan #include <sys/lock.h> 40366f6083SPeter Grehan #include <sys/mutex.h> 41366f6083SPeter Grehan #include <sys/proc.h> 42318224bbSNeel Natu #include <sys/rwlock.h> 43366f6083SPeter Grehan #include <sys/sched.h> 44366f6083SPeter Grehan #include <sys/smp.h> 45366f6083SPeter Grehan #include <sys/systm.h> 46366f6083SPeter Grehan 47366f6083SPeter Grehan #include <vm/vm.h> 48318224bbSNeel Natu #include <vm/vm_object.h> 49318224bbSNeel Natu #include <vm/vm_page.h> 50318224bbSNeel Natu #include <vm/pmap.h> 51318224bbSNeel Natu #include <vm/vm_map.h> 52318224bbSNeel Natu #include <vm/vm_extern.h> 53318224bbSNeel Natu #include <vm/vm_param.h> 54366f6083SPeter Grehan 5563e62d39SJohn Baldwin #include <machine/cpu.h> 56366f6083SPeter Grehan #include <machine/vm.h> 57366f6083SPeter Grehan #include <machine/pcb.h> 5875dd3366SNeel Natu #include <machine/smp.h> 591c052192SNeel Natu #include <x86/psl.h> 6034a6b2d6SJohn Baldwin #include <x86/apicreg.h> 61318224bbSNeel Natu #include <machine/vmparam.h> 62366f6083SPeter Grehan 63366f6083SPeter Grehan #include <machine/vmm.h> 64565bbb86SNeel Natu #include <machine/vmm_dev.h> 65565bbb86SNeel Natu 66d17b5104SNeel Natu #include "vmm_ioport.h" 67318224bbSNeel Natu #include "vmm_ktr.h" 68b01c2033SNeel Natu #include "vmm_host.h" 69366f6083SPeter Grehan #include "vmm_mem.h" 70366f6083SPeter Grehan #include "vmm_util.h" 71762fd208STycho Nightingale #include "vatpic.h" 72e883c9bbSTycho Nightingale #include "vatpit.h" 7308e3ff32SNeel Natu #include "vhpet.h" 74565bbb86SNeel Natu #include "vioapic.h" 75366f6083SPeter Grehan #include "vlapic.h" 76366f6083SPeter Grehan #include "vmm_msr.h" 77366f6083SPeter Grehan #include "vmm_ipi.h" 78366f6083SPeter Grehan #include "vmm_stat.h" 79f76fc5d4SNeel Natu #include "vmm_lapic.h" 80366f6083SPeter Grehan 81366f6083SPeter Grehan #include "io/ppt.h" 82366f6083SPeter Grehan #include "io/iommu.h" 83366f6083SPeter Grehan 84366f6083SPeter Grehan struct vlapic; 85366f6083SPeter Grehan 86366f6083SPeter Grehan struct vcpu { 87366f6083SPeter Grehan int flags; 8875dd3366SNeel Natu enum vcpu_state state; 8975dd3366SNeel Natu struct mtx mtx; 90366f6083SPeter Grehan int hostcpu; /* host cpuid this vcpu last ran on */ 91366f6083SPeter Grehan uint64_t guest_msrs[VMM_MSR_NUM]; 92366f6083SPeter Grehan struct vlapic *vlapic; 93366f6083SPeter Grehan int vcpuid; 9438f1b189SPeter Grehan struct savefpu *guestfpu; /* guest fpu state */ 95abb023fbSJohn Baldwin uint64_t guest_xcr0; 96366f6083SPeter Grehan void *stats; 9798ed632cSNeel Natu struct vm_exit exitinfo; 98e9027382SNeel Natu enum x2apic_state x2apic_state; 99f352ff0cSNeel Natu int nmi_pending; 1000775fbb4STycho Nightingale int extint_pending; 101dc506506SNeel Natu struct vm_exception exception; 102dc506506SNeel Natu int exception_pending; 103366f6083SPeter Grehan }; 104366f6083SPeter Grehan 105f76fc5d4SNeel Natu #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 106f76fc5d4SNeel Natu #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 107f76fc5d4SNeel Natu #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 108318224bbSNeel Natu #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 10975dd3366SNeel Natu 110318224bbSNeel Natu struct mem_seg { 111318224bbSNeel Natu vm_paddr_t gpa; 112318224bbSNeel Natu size_t len; 113318224bbSNeel Natu boolean_t wired; 114318224bbSNeel Natu vm_object_t object; 115318224bbSNeel Natu }; 116366f6083SPeter Grehan #define VM_MAX_MEMORY_SEGMENTS 2 117366f6083SPeter Grehan 118366f6083SPeter Grehan struct vm { 119366f6083SPeter Grehan void *cookie; /* processor-specific data */ 120366f6083SPeter Grehan void *iommu; /* iommu-specific data */ 12108e3ff32SNeel Natu struct vhpet *vhpet; /* virtual HPET */ 122565bbb86SNeel Natu struct vioapic *vioapic; /* virtual ioapic */ 123762fd208STycho Nightingale struct vatpic *vatpic; /* virtual atpic */ 124e883c9bbSTycho Nightingale struct vatpit *vatpit; /* virtual atpit */ 125318224bbSNeel Natu struct vmspace *vmspace; /* guest's address space */ 126366f6083SPeter Grehan struct vcpu vcpu[VM_MAXCPU]; 127366f6083SPeter Grehan int num_mem_segs; 128318224bbSNeel Natu struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; 129366f6083SPeter Grehan char name[VM_MAX_NAMELEN]; 130366f6083SPeter Grehan 131366f6083SPeter Grehan /* 132a5615c90SPeter Grehan * Set of active vcpus. 133366f6083SPeter Grehan * An active vcpu is one that has been started implicitly (BSP) or 134366f6083SPeter Grehan * explicitly (AP) by sending it a startup ipi. 135366f6083SPeter Grehan */ 13622d822c6SNeel Natu volatile cpuset_t active_cpus; 1375b8a8cd1SNeel Natu 1385b8a8cd1SNeel Natu struct mtx rendezvous_mtx; 1395b8a8cd1SNeel Natu cpuset_t rendezvous_req_cpus; 1405b8a8cd1SNeel Natu cpuset_t rendezvous_done_cpus; 1415b8a8cd1SNeel Natu void *rendezvous_arg; 1425b8a8cd1SNeel Natu vm_rendezvous_func_t rendezvous_func; 143b15a09c0SNeel Natu 144b15a09c0SNeel Natu int suspend; 145b15a09c0SNeel Natu volatile cpuset_t suspended_cpus; 146e50ce2aaSNeel Natu 147e50ce2aaSNeel Natu volatile cpuset_t halted_cpus; 148366f6083SPeter Grehan }; 149366f6083SPeter Grehan 150d5408b1dSNeel Natu static int vmm_initialized; 151d5408b1dSNeel Natu 152366f6083SPeter Grehan static struct vmm_ops *ops; 153add611fdSNeel Natu #define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) 154366f6083SPeter Grehan #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 15563e62d39SJohn Baldwin #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 156366f6083SPeter Grehan 157318224bbSNeel Natu #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 158b15a09c0SNeel Natu #define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \ 159b15a09c0SNeel Natu (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO) 160366f6083SPeter Grehan #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 161318224bbSNeel Natu #define VMSPACE_ALLOC(min, max) \ 162318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 163318224bbSNeel Natu #define VMSPACE_FREE(vmspace) \ 164318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 165366f6083SPeter Grehan #define VMGETREG(vmi, vcpu, num, retval) \ 166366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 167366f6083SPeter Grehan #define VMSETREG(vmi, vcpu, num, val) \ 168366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 169366f6083SPeter Grehan #define VMGETDESC(vmi, vcpu, num, desc) \ 170366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 171366f6083SPeter Grehan #define VMSETDESC(vmi, vcpu, num, desc) \ 172366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 173366f6083SPeter Grehan #define VMGETCAP(vmi, vcpu, num, retval) \ 174366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 175366f6083SPeter Grehan #define VMSETCAP(vmi, vcpu, num, val) \ 176366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 177de5ea6b6SNeel Natu #define VLAPIC_INIT(vmi, vcpu) \ 178de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) 179de5ea6b6SNeel Natu #define VLAPIC_CLEANUP(vmi, vlapic) \ 180de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) 181366f6083SPeter Grehan 182014a52f3SNeel Natu #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 183014a52f3SNeel Natu #define fpu_stop_emulating() clts() 184366f6083SPeter Grehan 185366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm"); 186366f6083SPeter Grehan CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 187366f6083SPeter Grehan 188366f6083SPeter Grehan /* statistics */ 18961592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 190366f6083SPeter Grehan 191add611fdSNeel Natu SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 192add611fdSNeel Natu 193055fc2cbSNeel Natu /* 194055fc2cbSNeel Natu * Halt the guest if all vcpus are executing a HLT instruction with 195055fc2cbSNeel Natu * interrupts disabled. 196055fc2cbSNeel Natu */ 197055fc2cbSNeel Natu static int halt_detection_enabled = 1; 198055fc2cbSNeel Natu TUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled); 199055fc2cbSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 200055fc2cbSNeel Natu &halt_detection_enabled, 0, 201055fc2cbSNeel Natu "Halt VM if all vcpus execute HLT with interrupts disabled"); 202055fc2cbSNeel Natu 203add611fdSNeel Natu static int vmm_ipinum; 204add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 205add611fdSNeel Natu "IPI vector used for vcpu notifications"); 206add611fdSNeel Natu 207366f6083SPeter Grehan static void 208de5ea6b6SNeel Natu vcpu_cleanup(struct vm *vm, int i) 209366f6083SPeter Grehan { 210de5ea6b6SNeel Natu struct vcpu *vcpu = &vm->vcpu[i]; 211de5ea6b6SNeel Natu 212de5ea6b6SNeel Natu VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); 213366f6083SPeter Grehan vmm_stat_free(vcpu->stats); 21438f1b189SPeter Grehan fpu_save_area_free(vcpu->guestfpu); 215366f6083SPeter Grehan } 216366f6083SPeter Grehan 217366f6083SPeter Grehan static void 218366f6083SPeter Grehan vcpu_init(struct vm *vm, uint32_t vcpu_id) 219366f6083SPeter Grehan { 220366f6083SPeter Grehan struct vcpu *vcpu; 221366f6083SPeter Grehan 222366f6083SPeter Grehan vcpu = &vm->vcpu[vcpu_id]; 223366f6083SPeter Grehan 22475dd3366SNeel Natu vcpu_lock_init(vcpu); 22575dd3366SNeel Natu vcpu->hostcpu = NOCPU; 226366f6083SPeter Grehan vcpu->vcpuid = vcpu_id; 227de5ea6b6SNeel Natu vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); 22852e5c8a2SNeel Natu vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); 229abb023fbSJohn Baldwin vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 23038f1b189SPeter Grehan vcpu->guestfpu = fpu_save_area_alloc(); 23138f1b189SPeter Grehan fpu_save_area_reset(vcpu->guestfpu); 232366f6083SPeter Grehan vcpu->stats = vmm_stat_alloc(); 233366f6083SPeter Grehan } 234366f6083SPeter Grehan 23598ed632cSNeel Natu struct vm_exit * 23698ed632cSNeel Natu vm_exitinfo(struct vm *vm, int cpuid) 23798ed632cSNeel Natu { 23898ed632cSNeel Natu struct vcpu *vcpu; 23998ed632cSNeel Natu 24098ed632cSNeel Natu if (cpuid < 0 || cpuid >= VM_MAXCPU) 24198ed632cSNeel Natu panic("vm_exitinfo: invalid cpuid %d", cpuid); 24298ed632cSNeel Natu 24398ed632cSNeel Natu vcpu = &vm->vcpu[cpuid]; 24498ed632cSNeel Natu 24598ed632cSNeel Natu return (&vcpu->exitinfo); 24698ed632cSNeel Natu } 24798ed632cSNeel Natu 24863e62d39SJohn Baldwin static void 24963e62d39SJohn Baldwin vmm_resume(void) 25063e62d39SJohn Baldwin { 25163e62d39SJohn Baldwin VMM_RESUME(); 25263e62d39SJohn Baldwin } 25363e62d39SJohn Baldwin 254366f6083SPeter Grehan static int 255366f6083SPeter Grehan vmm_init(void) 256366f6083SPeter Grehan { 257366f6083SPeter Grehan int error; 258366f6083SPeter Grehan 259b01c2033SNeel Natu vmm_host_state_init(); 260add611fdSNeel Natu 261add611fdSNeel Natu vmm_ipinum = vmm_ipi_alloc(); 262add611fdSNeel Natu if (vmm_ipinum == 0) 263add611fdSNeel Natu vmm_ipinum = IPI_AST; 264366f6083SPeter Grehan 265366f6083SPeter Grehan error = vmm_mem_init(); 266366f6083SPeter Grehan if (error) 267366f6083SPeter Grehan return (error); 268366f6083SPeter Grehan 269366f6083SPeter Grehan if (vmm_is_intel()) 270366f6083SPeter Grehan ops = &vmm_ops_intel; 271366f6083SPeter Grehan else if (vmm_is_amd()) 272366f6083SPeter Grehan ops = &vmm_ops_amd; 273366f6083SPeter Grehan else 274366f6083SPeter Grehan return (ENXIO); 275366f6083SPeter Grehan 276366f6083SPeter Grehan vmm_msr_init(); 27763e62d39SJohn Baldwin vmm_resume_p = vmm_resume; 278366f6083SPeter Grehan 279add611fdSNeel Natu return (VMM_INIT(vmm_ipinum)); 280366f6083SPeter Grehan } 281366f6083SPeter Grehan 282366f6083SPeter Grehan static int 283366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg) 284366f6083SPeter Grehan { 285366f6083SPeter Grehan int error; 286366f6083SPeter Grehan 287366f6083SPeter Grehan switch (what) { 288366f6083SPeter Grehan case MOD_LOAD: 289366f6083SPeter Grehan vmmdev_init(); 29051f45d01SNeel Natu if (ppt_avail_devices() > 0) 291366f6083SPeter Grehan iommu_init(); 292366f6083SPeter Grehan error = vmm_init(); 293d5408b1dSNeel Natu if (error == 0) 294d5408b1dSNeel Natu vmm_initialized = 1; 295366f6083SPeter Grehan break; 296366f6083SPeter Grehan case MOD_UNLOAD: 297cdc5b9e7SNeel Natu error = vmmdev_cleanup(); 298cdc5b9e7SNeel Natu if (error == 0) { 29963e62d39SJohn Baldwin vmm_resume_p = NULL; 300366f6083SPeter Grehan iommu_cleanup(); 301add611fdSNeel Natu if (vmm_ipinum != IPI_AST) 302add611fdSNeel Natu vmm_ipi_free(vmm_ipinum); 303366f6083SPeter Grehan error = VMM_CLEANUP(); 30481ef6611SPeter Grehan /* 30581ef6611SPeter Grehan * Something bad happened - prevent new 30681ef6611SPeter Grehan * VMs from being created 30781ef6611SPeter Grehan */ 30881ef6611SPeter Grehan if (error) 309d5408b1dSNeel Natu vmm_initialized = 0; 31081ef6611SPeter Grehan } 311366f6083SPeter Grehan break; 312366f6083SPeter Grehan default: 313366f6083SPeter Grehan error = 0; 314366f6083SPeter Grehan break; 315366f6083SPeter Grehan } 316366f6083SPeter Grehan return (error); 317366f6083SPeter Grehan } 318366f6083SPeter Grehan 319366f6083SPeter Grehan static moduledata_t vmm_kmod = { 320366f6083SPeter Grehan "vmm", 321366f6083SPeter Grehan vmm_handler, 322366f6083SPeter Grehan NULL 323366f6083SPeter Grehan }; 324366f6083SPeter Grehan 325366f6083SPeter Grehan /* 326e3f0800bSNeel Natu * vmm initialization has the following dependencies: 327e3f0800bSNeel Natu * 328e3f0800bSNeel Natu * - iommu initialization must happen after the pci passthru driver has had 329e3f0800bSNeel Natu * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 330e3f0800bSNeel Natu * 331e3f0800bSNeel Natu * - VT-x initialization requires smp_rendezvous() and therefore must happen 332e3f0800bSNeel Natu * after SMP is fully functional (after SI_SUB_SMP). 333366f6083SPeter Grehan */ 334e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 335366f6083SPeter Grehan MODULE_VERSION(vmm, 1); 336366f6083SPeter Grehan 337d5408b1dSNeel Natu int 338d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm) 339366f6083SPeter Grehan { 340366f6083SPeter Grehan int i; 341366f6083SPeter Grehan struct vm *vm; 342318224bbSNeel Natu struct vmspace *vmspace; 343366f6083SPeter Grehan 344366f6083SPeter Grehan const int BSP = 0; 345366f6083SPeter Grehan 346d5408b1dSNeel Natu /* 347d5408b1dSNeel Natu * If vmm.ko could not be successfully initialized then don't attempt 348d5408b1dSNeel Natu * to create the virtual machine. 349d5408b1dSNeel Natu */ 350d5408b1dSNeel Natu if (!vmm_initialized) 351d5408b1dSNeel Natu return (ENXIO); 352d5408b1dSNeel Natu 353366f6083SPeter Grehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 354d5408b1dSNeel Natu return (EINVAL); 355366f6083SPeter Grehan 356318224bbSNeel Natu vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 357318224bbSNeel Natu if (vmspace == NULL) 358318224bbSNeel Natu return (ENOMEM); 359318224bbSNeel Natu 360366f6083SPeter Grehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 361366f6083SPeter Grehan strcpy(vm->name, name); 36288c4b8d1SNeel Natu vm->vmspace = vmspace; 3635b8a8cd1SNeel Natu mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 364318224bbSNeel Natu vm->cookie = VMINIT(vm, vmspace_pmap(vmspace)); 365565bbb86SNeel Natu vm->vioapic = vioapic_init(vm); 36608e3ff32SNeel Natu vm->vhpet = vhpet_init(vm); 367762fd208STycho Nightingale vm->vatpic = vatpic_init(vm); 368e883c9bbSTycho Nightingale vm->vatpit = vatpit_init(vm); 369366f6083SPeter Grehan 370366f6083SPeter Grehan for (i = 0; i < VM_MAXCPU; i++) { 371366f6083SPeter Grehan vcpu_init(vm, i); 372366f6083SPeter Grehan guest_msrs_init(vm, i); 373366f6083SPeter Grehan } 374366f6083SPeter Grehan 375366f6083SPeter Grehan vm_activate_cpu(vm, BSP); 376366f6083SPeter Grehan 377d5408b1dSNeel Natu *retvm = vm; 378d5408b1dSNeel Natu return (0); 379366f6083SPeter Grehan } 380366f6083SPeter Grehan 381f7d51510SNeel Natu static void 382318224bbSNeel Natu vm_free_mem_seg(struct vm *vm, struct mem_seg *seg) 383f7d51510SNeel Natu { 3847ce04d0aSNeel Natu 385318224bbSNeel Natu if (seg->object != NULL) 386318224bbSNeel Natu vmm_mem_free(vm->vmspace, seg->gpa, seg->len); 387f7d51510SNeel Natu 388318224bbSNeel Natu bzero(seg, sizeof(*seg)); 389f7d51510SNeel Natu } 390f7d51510SNeel Natu 391366f6083SPeter Grehan void 392366f6083SPeter Grehan vm_destroy(struct vm *vm) 393366f6083SPeter Grehan { 394366f6083SPeter Grehan int i; 395366f6083SPeter Grehan 396366f6083SPeter Grehan ppt_unassign_all(vm); 397366f6083SPeter Grehan 398318224bbSNeel Natu if (vm->iommu != NULL) 399318224bbSNeel Natu iommu_destroy_domain(vm->iommu); 400318224bbSNeel Natu 401e883c9bbSTycho Nightingale vatpit_cleanup(vm->vatpit); 40208e3ff32SNeel Natu vhpet_cleanup(vm->vhpet); 403762fd208STycho Nightingale vatpic_cleanup(vm->vatpic); 40408e3ff32SNeel Natu vioapic_cleanup(vm->vioapic); 40508e3ff32SNeel Natu 406366f6083SPeter Grehan for (i = 0; i < vm->num_mem_segs; i++) 407f7d51510SNeel Natu vm_free_mem_seg(vm, &vm->mem_segs[i]); 408f7d51510SNeel Natu 409f7d51510SNeel Natu vm->num_mem_segs = 0; 410366f6083SPeter Grehan 411366f6083SPeter Grehan for (i = 0; i < VM_MAXCPU; i++) 412de5ea6b6SNeel Natu vcpu_cleanup(vm, i); 413366f6083SPeter Grehan 414318224bbSNeel Natu VMSPACE_FREE(vm->vmspace); 415366f6083SPeter Grehan 416366f6083SPeter Grehan VMCLEANUP(vm->cookie); 417366f6083SPeter Grehan 418366f6083SPeter Grehan free(vm, M_VM); 419366f6083SPeter Grehan } 420366f6083SPeter Grehan 421366f6083SPeter Grehan const char * 422366f6083SPeter Grehan vm_name(struct vm *vm) 423366f6083SPeter Grehan { 424366f6083SPeter Grehan return (vm->name); 425366f6083SPeter Grehan } 426366f6083SPeter Grehan 427366f6083SPeter Grehan int 428366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 429366f6083SPeter Grehan { 430318224bbSNeel Natu vm_object_t obj; 431366f6083SPeter Grehan 432318224bbSNeel Natu if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 433318224bbSNeel Natu return (ENOMEM); 434318224bbSNeel Natu else 435318224bbSNeel Natu return (0); 436366f6083SPeter Grehan } 437366f6083SPeter Grehan 438366f6083SPeter Grehan int 439366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 440366f6083SPeter Grehan { 441366f6083SPeter Grehan 442318224bbSNeel Natu vmm_mmio_free(vm->vmspace, gpa, len); 443318224bbSNeel Natu return (0); 444366f6083SPeter Grehan } 445366f6083SPeter Grehan 446318224bbSNeel Natu boolean_t 447318224bbSNeel Natu vm_mem_allocated(struct vm *vm, vm_paddr_t gpa) 448366f6083SPeter Grehan { 449341f19c9SNeel Natu int i; 450341f19c9SNeel Natu vm_paddr_t gpabase, gpalimit; 451341f19c9SNeel Natu 452341f19c9SNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 453341f19c9SNeel Natu gpabase = vm->mem_segs[i].gpa; 454341f19c9SNeel Natu gpalimit = gpabase + vm->mem_segs[i].len; 455341f19c9SNeel Natu if (gpa >= gpabase && gpa < gpalimit) 456318224bbSNeel Natu return (TRUE); /* 'gpa' is regular memory */ 457341f19c9SNeel Natu } 458341f19c9SNeel Natu 459318224bbSNeel Natu if (ppt_is_mmio(vm, gpa)) 460318224bbSNeel Natu return (TRUE); /* 'gpa' is pci passthru mmio */ 461318224bbSNeel Natu 462318224bbSNeel Natu return (FALSE); 463341f19c9SNeel Natu } 464341f19c9SNeel Natu 465341f19c9SNeel Natu int 466341f19c9SNeel Natu vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 467341f19c9SNeel Natu { 468318224bbSNeel Natu int available, allocated; 469318224bbSNeel Natu struct mem_seg *seg; 470318224bbSNeel Natu vm_object_t object; 471318224bbSNeel Natu vm_paddr_t g; 472366f6083SPeter Grehan 473341f19c9SNeel Natu if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 474341f19c9SNeel Natu return (EINVAL); 475341f19c9SNeel Natu 476341f19c9SNeel Natu available = allocated = 0; 477341f19c9SNeel Natu g = gpa; 478341f19c9SNeel Natu while (g < gpa + len) { 479318224bbSNeel Natu if (vm_mem_allocated(vm, g)) 480341f19c9SNeel Natu allocated++; 481318224bbSNeel Natu else 482318224bbSNeel Natu available++; 483341f19c9SNeel Natu 484341f19c9SNeel Natu g += PAGE_SIZE; 485341f19c9SNeel Natu } 486341f19c9SNeel Natu 487366f6083SPeter Grehan /* 488341f19c9SNeel Natu * If there are some allocated and some available pages in the address 489341f19c9SNeel Natu * range then it is an error. 490366f6083SPeter Grehan */ 491341f19c9SNeel Natu if (allocated && available) 492341f19c9SNeel Natu return (EINVAL); 493341f19c9SNeel Natu 494341f19c9SNeel Natu /* 495341f19c9SNeel Natu * If the entire address range being requested has already been 496341f19c9SNeel Natu * allocated then there isn't anything more to do. 497341f19c9SNeel Natu */ 498341f19c9SNeel Natu if (allocated && available == 0) 499341f19c9SNeel Natu return (0); 500366f6083SPeter Grehan 501366f6083SPeter Grehan if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 502366f6083SPeter Grehan return (E2BIG); 503366f6083SPeter Grehan 504f7d51510SNeel Natu seg = &vm->mem_segs[vm->num_mem_segs]; 505366f6083SPeter Grehan 506318224bbSNeel Natu if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) 507318224bbSNeel Natu return (ENOMEM); 508318224bbSNeel Natu 509f7d51510SNeel Natu seg->gpa = gpa; 510318224bbSNeel Natu seg->len = len; 511318224bbSNeel Natu seg->object = object; 512318224bbSNeel Natu seg->wired = FALSE; 5137ce04d0aSNeel Natu 514366f6083SPeter Grehan vm->num_mem_segs++; 515341f19c9SNeel Natu 516366f6083SPeter Grehan return (0); 517366f6083SPeter Grehan } 518366f6083SPeter Grehan 519318224bbSNeel Natu static void 520318224bbSNeel Natu vm_gpa_unwire(struct vm *vm) 521366f6083SPeter Grehan { 522318224bbSNeel Natu int i, rv; 523318224bbSNeel Natu struct mem_seg *seg; 5244db4fb2cSNeel Natu 525318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 526318224bbSNeel Natu seg = &vm->mem_segs[i]; 527318224bbSNeel Natu if (!seg->wired) 528318224bbSNeel Natu continue; 529366f6083SPeter Grehan 530318224bbSNeel Natu rv = vm_map_unwire(&vm->vmspace->vm_map, 531318224bbSNeel Natu seg->gpa, seg->gpa + seg->len, 532318224bbSNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 533318224bbSNeel Natu KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " 534318224bbSNeel Natu "%#lx/%ld could not be unwired: %d", 535318224bbSNeel Natu vm_name(vm), seg->gpa, seg->len, rv)); 536318224bbSNeel Natu 537318224bbSNeel Natu seg->wired = FALSE; 538318224bbSNeel Natu } 539318224bbSNeel Natu } 540318224bbSNeel Natu 541318224bbSNeel Natu static int 542318224bbSNeel Natu vm_gpa_wire(struct vm *vm) 543318224bbSNeel Natu { 544318224bbSNeel Natu int i, rv; 545318224bbSNeel Natu struct mem_seg *seg; 546318224bbSNeel Natu 547318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 548318224bbSNeel Natu seg = &vm->mem_segs[i]; 549318224bbSNeel Natu if (seg->wired) 550318224bbSNeel Natu continue; 551318224bbSNeel Natu 552318224bbSNeel Natu /* XXX rlimits? */ 553318224bbSNeel Natu rv = vm_map_wire(&vm->vmspace->vm_map, 554318224bbSNeel Natu seg->gpa, seg->gpa + seg->len, 555318224bbSNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 556318224bbSNeel Natu if (rv != KERN_SUCCESS) 557318224bbSNeel Natu break; 558318224bbSNeel Natu 559318224bbSNeel Natu seg->wired = TRUE; 560318224bbSNeel Natu } 561318224bbSNeel Natu 562318224bbSNeel Natu if (i < vm->num_mem_segs) { 563318224bbSNeel Natu /* 564318224bbSNeel Natu * Undo the wiring before returning an error. 565318224bbSNeel Natu */ 566318224bbSNeel Natu vm_gpa_unwire(vm); 567318224bbSNeel Natu return (EAGAIN); 568318224bbSNeel Natu } 569318224bbSNeel Natu 570318224bbSNeel Natu return (0); 571318224bbSNeel Natu } 572318224bbSNeel Natu 573318224bbSNeel Natu static void 574318224bbSNeel Natu vm_iommu_modify(struct vm *vm, boolean_t map) 575318224bbSNeel Natu { 576318224bbSNeel Natu int i, sz; 577318224bbSNeel Natu vm_paddr_t gpa, hpa; 578318224bbSNeel Natu struct mem_seg *seg; 579318224bbSNeel Natu void *vp, *cookie, *host_domain; 580318224bbSNeel Natu 581318224bbSNeel Natu sz = PAGE_SIZE; 582318224bbSNeel Natu host_domain = iommu_host_domain(); 583318224bbSNeel Natu 584318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 585318224bbSNeel Natu seg = &vm->mem_segs[i]; 586318224bbSNeel Natu KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", 587318224bbSNeel Natu vm_name(vm), seg->gpa, seg->len)); 588318224bbSNeel Natu 589318224bbSNeel Natu gpa = seg->gpa; 590318224bbSNeel Natu while (gpa < seg->gpa + seg->len) { 591318224bbSNeel Natu vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, 592318224bbSNeel Natu &cookie); 593318224bbSNeel Natu KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 594318224bbSNeel Natu vm_name(vm), gpa)); 595318224bbSNeel Natu 596318224bbSNeel Natu vm_gpa_release(cookie); 597318224bbSNeel Natu 598318224bbSNeel Natu hpa = DMAP_TO_PHYS((uintptr_t)vp); 599318224bbSNeel Natu if (map) { 600318224bbSNeel Natu iommu_create_mapping(vm->iommu, gpa, hpa, sz); 601318224bbSNeel Natu iommu_remove_mapping(host_domain, hpa, sz); 602318224bbSNeel Natu } else { 603318224bbSNeel Natu iommu_remove_mapping(vm->iommu, gpa, sz); 604318224bbSNeel Natu iommu_create_mapping(host_domain, hpa, hpa, sz); 605318224bbSNeel Natu } 606318224bbSNeel Natu 607318224bbSNeel Natu gpa += PAGE_SIZE; 608318224bbSNeel Natu } 609318224bbSNeel Natu } 610318224bbSNeel Natu 611318224bbSNeel Natu /* 612318224bbSNeel Natu * Invalidate the cached translations associated with the domain 613318224bbSNeel Natu * from which pages were removed. 614318224bbSNeel Natu */ 615318224bbSNeel Natu if (map) 616318224bbSNeel Natu iommu_invalidate_tlb(host_domain); 617318224bbSNeel Natu else 618318224bbSNeel Natu iommu_invalidate_tlb(vm->iommu); 619318224bbSNeel Natu } 620318224bbSNeel Natu 621318224bbSNeel Natu #define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 622318224bbSNeel Natu #define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 623318224bbSNeel Natu 624318224bbSNeel Natu int 625318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 626318224bbSNeel Natu { 627318224bbSNeel Natu int error; 628318224bbSNeel Natu 629318224bbSNeel Natu error = ppt_unassign_device(vm, bus, slot, func); 630318224bbSNeel Natu if (error) 631318224bbSNeel Natu return (error); 632318224bbSNeel Natu 63351f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 634318224bbSNeel Natu vm_iommu_unmap(vm); 635318224bbSNeel Natu vm_gpa_unwire(vm); 636318224bbSNeel Natu } 637318224bbSNeel Natu return (0); 638318224bbSNeel Natu } 639318224bbSNeel Natu 640318224bbSNeel Natu int 641318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 642318224bbSNeel Natu { 643318224bbSNeel Natu int error; 644318224bbSNeel Natu vm_paddr_t maxaddr; 645318224bbSNeel Natu 646318224bbSNeel Natu /* 647318224bbSNeel Natu * Virtual machines with pci passthru devices get special treatment: 648318224bbSNeel Natu * - the guest physical memory is wired 649318224bbSNeel Natu * - the iommu is programmed to do the 'gpa' to 'hpa' translation 650318224bbSNeel Natu * 651318224bbSNeel Natu * We need to do this before the first pci passthru device is attached. 652318224bbSNeel Natu */ 65351f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 654318224bbSNeel Natu KASSERT(vm->iommu == NULL, 655318224bbSNeel Natu ("vm_assign_pptdev: iommu must be NULL")); 656318224bbSNeel Natu maxaddr = vmm_mem_maxaddr(); 657318224bbSNeel Natu vm->iommu = iommu_create_domain(maxaddr); 658318224bbSNeel Natu 659318224bbSNeel Natu error = vm_gpa_wire(vm); 660318224bbSNeel Natu if (error) 661318224bbSNeel Natu return (error); 662318224bbSNeel Natu 663318224bbSNeel Natu vm_iommu_map(vm); 664318224bbSNeel Natu } 665318224bbSNeel Natu 666318224bbSNeel Natu error = ppt_assign_device(vm, bus, slot, func); 667318224bbSNeel Natu return (error); 668318224bbSNeel Natu } 669318224bbSNeel Natu 670318224bbSNeel Natu void * 671318224bbSNeel Natu vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 672318224bbSNeel Natu void **cookie) 673318224bbSNeel Natu { 674318224bbSNeel Natu int count, pageoff; 675318224bbSNeel Natu vm_page_t m; 676318224bbSNeel Natu 677318224bbSNeel Natu pageoff = gpa & PAGE_MASK; 678318224bbSNeel Natu if (len > PAGE_SIZE - pageoff) 679318224bbSNeel Natu panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 680318224bbSNeel Natu 681318224bbSNeel Natu count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 682318224bbSNeel Natu trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 683318224bbSNeel Natu 684318224bbSNeel Natu if (count == 1) { 685318224bbSNeel Natu *cookie = m; 686318224bbSNeel Natu return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 687318224bbSNeel Natu } else { 688318224bbSNeel Natu *cookie = NULL; 689318224bbSNeel Natu return (NULL); 690318224bbSNeel Natu } 691318224bbSNeel Natu } 692318224bbSNeel Natu 693318224bbSNeel Natu void 694318224bbSNeel Natu vm_gpa_release(void *cookie) 695318224bbSNeel Natu { 696318224bbSNeel Natu vm_page_t m = cookie; 697318224bbSNeel Natu 698318224bbSNeel Natu vm_page_lock(m); 699318224bbSNeel Natu vm_page_unhold(m); 700318224bbSNeel Natu vm_page_unlock(m); 701366f6083SPeter Grehan } 702366f6083SPeter Grehan 703366f6083SPeter Grehan int 704366f6083SPeter Grehan vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 705366f6083SPeter Grehan struct vm_memory_segment *seg) 706366f6083SPeter Grehan { 707366f6083SPeter Grehan int i; 708366f6083SPeter Grehan 709366f6083SPeter Grehan for (i = 0; i < vm->num_mem_segs; i++) { 710366f6083SPeter Grehan if (gpabase == vm->mem_segs[i].gpa) { 711318224bbSNeel Natu seg->gpa = vm->mem_segs[i].gpa; 712318224bbSNeel Natu seg->len = vm->mem_segs[i].len; 713318224bbSNeel Natu seg->wired = vm->mem_segs[i].wired; 714366f6083SPeter Grehan return (0); 715366f6083SPeter Grehan } 716366f6083SPeter Grehan } 717366f6083SPeter Grehan return (-1); 718366f6083SPeter Grehan } 719366f6083SPeter Grehan 720366f6083SPeter Grehan int 721318224bbSNeel Natu vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 722318224bbSNeel Natu vm_offset_t *offset, struct vm_object **object) 723318224bbSNeel Natu { 724318224bbSNeel Natu int i; 725318224bbSNeel Natu size_t seg_len; 726318224bbSNeel Natu vm_paddr_t seg_gpa; 727318224bbSNeel Natu vm_object_t seg_obj; 728318224bbSNeel Natu 729318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 730318224bbSNeel Natu if ((seg_obj = vm->mem_segs[i].object) == NULL) 731318224bbSNeel Natu continue; 732318224bbSNeel Natu 733318224bbSNeel Natu seg_gpa = vm->mem_segs[i].gpa; 734318224bbSNeel Natu seg_len = vm->mem_segs[i].len; 735318224bbSNeel Natu 736318224bbSNeel Natu if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { 737318224bbSNeel Natu *offset = gpa - seg_gpa; 738318224bbSNeel Natu *object = seg_obj; 739318224bbSNeel Natu vm_object_reference(seg_obj); 740318224bbSNeel Natu return (0); 741318224bbSNeel Natu } 742318224bbSNeel Natu } 743318224bbSNeel Natu 744318224bbSNeel Natu return (EINVAL); 745318224bbSNeel Natu } 746318224bbSNeel Natu 747318224bbSNeel Natu int 748366f6083SPeter Grehan vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 749366f6083SPeter Grehan { 750366f6083SPeter Grehan 751366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 752366f6083SPeter Grehan return (EINVAL); 753366f6083SPeter Grehan 754366f6083SPeter Grehan if (reg >= VM_REG_LAST) 755366f6083SPeter Grehan return (EINVAL); 756366f6083SPeter Grehan 757366f6083SPeter Grehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 758366f6083SPeter Grehan } 759366f6083SPeter Grehan 760366f6083SPeter Grehan int 761366f6083SPeter Grehan vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 762366f6083SPeter Grehan { 763366f6083SPeter Grehan 764366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 765366f6083SPeter Grehan return (EINVAL); 766366f6083SPeter Grehan 767366f6083SPeter Grehan if (reg >= VM_REG_LAST) 768366f6083SPeter Grehan return (EINVAL); 769366f6083SPeter Grehan 770366f6083SPeter Grehan return (VMSETREG(vm->cookie, vcpu, reg, val)); 771366f6083SPeter Grehan } 772366f6083SPeter Grehan 773366f6083SPeter Grehan static boolean_t 774366f6083SPeter Grehan is_descriptor_table(int reg) 775366f6083SPeter Grehan { 776366f6083SPeter Grehan 777366f6083SPeter Grehan switch (reg) { 778366f6083SPeter Grehan case VM_REG_GUEST_IDTR: 779366f6083SPeter Grehan case VM_REG_GUEST_GDTR: 780366f6083SPeter Grehan return (TRUE); 781366f6083SPeter Grehan default: 782366f6083SPeter Grehan return (FALSE); 783366f6083SPeter Grehan } 784366f6083SPeter Grehan } 785366f6083SPeter Grehan 786366f6083SPeter Grehan static boolean_t 787366f6083SPeter Grehan is_segment_register(int reg) 788366f6083SPeter Grehan { 789366f6083SPeter Grehan 790366f6083SPeter Grehan switch (reg) { 791366f6083SPeter Grehan case VM_REG_GUEST_ES: 792366f6083SPeter Grehan case VM_REG_GUEST_CS: 793366f6083SPeter Grehan case VM_REG_GUEST_SS: 794366f6083SPeter Grehan case VM_REG_GUEST_DS: 795366f6083SPeter Grehan case VM_REG_GUEST_FS: 796366f6083SPeter Grehan case VM_REG_GUEST_GS: 797366f6083SPeter Grehan case VM_REG_GUEST_TR: 798366f6083SPeter Grehan case VM_REG_GUEST_LDTR: 799366f6083SPeter Grehan return (TRUE); 800366f6083SPeter Grehan default: 801366f6083SPeter Grehan return (FALSE); 802366f6083SPeter Grehan } 803366f6083SPeter Grehan } 804366f6083SPeter Grehan 805366f6083SPeter Grehan int 806366f6083SPeter Grehan vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 807366f6083SPeter Grehan struct seg_desc *desc) 808366f6083SPeter Grehan { 809366f6083SPeter Grehan 810366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 811366f6083SPeter Grehan return (EINVAL); 812366f6083SPeter Grehan 813366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 814366f6083SPeter Grehan return (EINVAL); 815366f6083SPeter Grehan 816366f6083SPeter Grehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 817366f6083SPeter Grehan } 818366f6083SPeter Grehan 819366f6083SPeter Grehan int 820366f6083SPeter Grehan vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 821366f6083SPeter Grehan struct seg_desc *desc) 822366f6083SPeter Grehan { 823366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 824366f6083SPeter Grehan return (EINVAL); 825366f6083SPeter Grehan 826366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 827366f6083SPeter Grehan return (EINVAL); 828366f6083SPeter Grehan 829366f6083SPeter Grehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 830366f6083SPeter Grehan } 831366f6083SPeter Grehan 832366f6083SPeter Grehan static void 833366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu) 834366f6083SPeter Grehan { 835366f6083SPeter Grehan 83638f1b189SPeter Grehan /* flush host state to the pcb */ 83738f1b189SPeter Grehan fpuexit(curthread); 838bd8572e0SNeel Natu 839bd8572e0SNeel Natu /* restore guest FPU state */ 840366f6083SPeter Grehan fpu_stop_emulating(); 84138f1b189SPeter Grehan fpurestore(vcpu->guestfpu); 842bd8572e0SNeel Natu 843abb023fbSJohn Baldwin /* restore guest XCR0 if XSAVE is enabled in the host */ 844abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) 845abb023fbSJohn Baldwin load_xcr(0, vcpu->guest_xcr0); 846abb023fbSJohn Baldwin 847bd8572e0SNeel Natu /* 848bd8572e0SNeel Natu * The FPU is now "dirty" with the guest's state so turn on emulation 849bd8572e0SNeel Natu * to trap any access to the FPU by the host. 850bd8572e0SNeel Natu */ 851bd8572e0SNeel Natu fpu_start_emulating(); 852366f6083SPeter Grehan } 853366f6083SPeter Grehan 854366f6083SPeter Grehan static void 855366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu) 856366f6083SPeter Grehan { 857366f6083SPeter Grehan 858bd8572e0SNeel Natu if ((rcr0() & CR0_TS) == 0) 859bd8572e0SNeel Natu panic("fpu emulation not enabled in host!"); 860bd8572e0SNeel Natu 861abb023fbSJohn Baldwin /* save guest XCR0 and restore host XCR0 */ 862abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) { 863abb023fbSJohn Baldwin vcpu->guest_xcr0 = rxcr(0); 864abb023fbSJohn Baldwin load_xcr(0, vmm_get_host_xcr0()); 865abb023fbSJohn Baldwin } 866abb023fbSJohn Baldwin 867bd8572e0SNeel Natu /* save guest FPU state */ 868bd8572e0SNeel Natu fpu_stop_emulating(); 86938f1b189SPeter Grehan fpusave(vcpu->guestfpu); 870366f6083SPeter Grehan fpu_start_emulating(); 871366f6083SPeter Grehan } 872366f6083SPeter Grehan 87361592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 874f76fc5d4SNeel Natu 875318224bbSNeel Natu static int 876f80330a8SNeel Natu vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 877f80330a8SNeel Natu bool from_idle) 878366f6083SPeter Grehan { 879318224bbSNeel Natu int error; 880366f6083SPeter Grehan 881318224bbSNeel Natu vcpu_assert_locked(vcpu); 882366f6083SPeter Grehan 883f76fc5d4SNeel Natu /* 884f80330a8SNeel Natu * State transitions from the vmmdev_ioctl() must always begin from 885f80330a8SNeel Natu * the VCPU_IDLE state. This guarantees that there is only a single 886f80330a8SNeel Natu * ioctl() operating on a vcpu at any point. 887f80330a8SNeel Natu */ 888f80330a8SNeel Natu if (from_idle) { 889f80330a8SNeel Natu while (vcpu->state != VCPU_IDLE) 890f80330a8SNeel Natu msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 891f80330a8SNeel Natu } else { 892f80330a8SNeel Natu KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 893f80330a8SNeel Natu "vcpu idle state")); 894f80330a8SNeel Natu } 895f80330a8SNeel Natu 896ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 897ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 898ef39d7e9SNeel Natu "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 899ef39d7e9SNeel Natu } else { 900ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 901ef39d7e9SNeel Natu "vcpu that is not running", vcpu->hostcpu)); 902ef39d7e9SNeel Natu } 903ef39d7e9SNeel Natu 904f80330a8SNeel Natu /* 905318224bbSNeel Natu * The following state transitions are allowed: 906318224bbSNeel Natu * IDLE -> FROZEN -> IDLE 907318224bbSNeel Natu * FROZEN -> RUNNING -> FROZEN 908318224bbSNeel Natu * FROZEN -> SLEEPING -> FROZEN 909f76fc5d4SNeel Natu */ 910318224bbSNeel Natu switch (vcpu->state) { 911318224bbSNeel Natu case VCPU_IDLE: 912318224bbSNeel Natu case VCPU_RUNNING: 913318224bbSNeel Natu case VCPU_SLEEPING: 914318224bbSNeel Natu error = (newstate != VCPU_FROZEN); 915318224bbSNeel Natu break; 916318224bbSNeel Natu case VCPU_FROZEN: 917318224bbSNeel Natu error = (newstate == VCPU_FROZEN); 918318224bbSNeel Natu break; 919318224bbSNeel Natu default: 920318224bbSNeel Natu error = 1; 921318224bbSNeel Natu break; 922318224bbSNeel Natu } 923318224bbSNeel Natu 924f80330a8SNeel Natu if (error) 925f80330a8SNeel Natu return (EBUSY); 926318224bbSNeel Natu 927f80330a8SNeel Natu vcpu->state = newstate; 928ef39d7e9SNeel Natu if (newstate == VCPU_RUNNING) 929ef39d7e9SNeel Natu vcpu->hostcpu = curcpu; 930ef39d7e9SNeel Natu else 931ef39d7e9SNeel Natu vcpu->hostcpu = NOCPU; 932ef39d7e9SNeel Natu 933f80330a8SNeel Natu if (newstate == VCPU_IDLE) 934f80330a8SNeel Natu wakeup(&vcpu->state); 935f80330a8SNeel Natu 936f80330a8SNeel Natu return (0); 937318224bbSNeel Natu } 938318224bbSNeel Natu 939318224bbSNeel Natu static void 940318224bbSNeel Natu vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 941318224bbSNeel Natu { 942318224bbSNeel Natu int error; 943318224bbSNeel Natu 944f80330a8SNeel Natu if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) 945318224bbSNeel Natu panic("Error %d setting state to %d\n", error, newstate); 946318224bbSNeel Natu } 947318224bbSNeel Natu 948318224bbSNeel Natu static void 949318224bbSNeel Natu vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 950318224bbSNeel Natu { 951318224bbSNeel Natu int error; 952318224bbSNeel Natu 953f80330a8SNeel Natu if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 954318224bbSNeel Natu panic("Error %d setting state to %d", error, newstate); 955318224bbSNeel Natu } 956318224bbSNeel Natu 9575b8a8cd1SNeel Natu static void 9585b8a8cd1SNeel Natu vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) 9595b8a8cd1SNeel Natu { 9605b8a8cd1SNeel Natu 9615b8a8cd1SNeel Natu KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); 9625b8a8cd1SNeel Natu 9635b8a8cd1SNeel Natu /* 9645b8a8cd1SNeel Natu * Update 'rendezvous_func' and execute a write memory barrier to 9655b8a8cd1SNeel Natu * ensure that it is visible across all host cpus. This is not needed 9665b8a8cd1SNeel Natu * for correctness but it does ensure that all the vcpus will notice 9675b8a8cd1SNeel Natu * that the rendezvous is requested immediately. 9685b8a8cd1SNeel Natu */ 9695b8a8cd1SNeel Natu vm->rendezvous_func = func; 9705b8a8cd1SNeel Natu wmb(); 9715b8a8cd1SNeel Natu } 9725b8a8cd1SNeel Natu 9735b8a8cd1SNeel Natu #define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ 9745b8a8cd1SNeel Natu do { \ 9755b8a8cd1SNeel Natu if (vcpuid >= 0) \ 9765b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, fmt); \ 9775b8a8cd1SNeel Natu else \ 9785b8a8cd1SNeel Natu VM_CTR0(vm, fmt); \ 9795b8a8cd1SNeel Natu } while (0) 9805b8a8cd1SNeel Natu 9815b8a8cd1SNeel Natu static void 9825b8a8cd1SNeel Natu vm_handle_rendezvous(struct vm *vm, int vcpuid) 9835b8a8cd1SNeel Natu { 9845b8a8cd1SNeel Natu 9855b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 9865b8a8cd1SNeel Natu ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); 9875b8a8cd1SNeel Natu 9885b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 9895b8a8cd1SNeel Natu while (vm->rendezvous_func != NULL) { 99022d822c6SNeel Natu /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 99122d822c6SNeel Natu CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); 99222d822c6SNeel Natu 9935b8a8cd1SNeel Natu if (vcpuid != -1 && 99422d822c6SNeel Natu CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 99522d822c6SNeel Natu !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 9965b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); 9975b8a8cd1SNeel Natu (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); 9985b8a8cd1SNeel Natu CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 9995b8a8cd1SNeel Natu } 10005b8a8cd1SNeel Natu if (CPU_CMP(&vm->rendezvous_req_cpus, 10015b8a8cd1SNeel Natu &vm->rendezvous_done_cpus) == 0) { 10025b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); 10035b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, NULL); 10045b8a8cd1SNeel Natu wakeup(&vm->rendezvous_func); 10055b8a8cd1SNeel Natu break; 10065b8a8cd1SNeel Natu } 10075b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); 10085b8a8cd1SNeel Natu mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 10095b8a8cd1SNeel Natu "vmrndv", 0); 10105b8a8cd1SNeel Natu } 10115b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 10125b8a8cd1SNeel Natu } 10135b8a8cd1SNeel Natu 1014318224bbSNeel Natu /* 1015318224bbSNeel Natu * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1016318224bbSNeel Natu */ 1017318224bbSNeel Natu static int 1018becd9849SNeel Natu vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) 1019318224bbSNeel Natu { 1020318224bbSNeel Natu struct vcpu *vcpu; 1021c6a0cc2eSNeel Natu const char *wmesg; 1022e50ce2aaSNeel Natu int t, vcpu_halted, vm_halted; 1023e50ce2aaSNeel Natu 1024e50ce2aaSNeel Natu KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 1025318224bbSNeel Natu 1026318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1027e50ce2aaSNeel Natu vcpu_halted = 0; 1028e50ce2aaSNeel Natu vm_halted = 0; 1029318224bbSNeel Natu 1030f76fc5d4SNeel Natu vcpu_lock(vcpu); 1031c6a0cc2eSNeel Natu while (1) { 1032f76fc5d4SNeel Natu /* 1033f76fc5d4SNeel Natu * Do a final check for pending NMI or interrupts before 1034c6a0cc2eSNeel Natu * really putting this thread to sleep. Also check for 1035c6a0cc2eSNeel Natu * software events that would cause this vcpu to wakeup. 1036f76fc5d4SNeel Natu * 1037c6a0cc2eSNeel Natu * These interrupts/events could have happened after the 1038c6a0cc2eSNeel Natu * vcpu returned from VMRUN() and before it acquired the 1039c6a0cc2eSNeel Natu * vcpu lock above. 1040f76fc5d4SNeel Natu */ 1041c6a0cc2eSNeel Natu if (vm->rendezvous_func != NULL || vm->suspend) 1042c6a0cc2eSNeel Natu break; 1043c6a0cc2eSNeel Natu if (vm_nmi_pending(vm, vcpuid)) 1044c6a0cc2eSNeel Natu break; 1045c6a0cc2eSNeel Natu if (!intr_disabled) { 1046c6a0cc2eSNeel Natu if (vm_extint_pending(vm, vcpuid) || 1047c6a0cc2eSNeel Natu vlapic_pending_intr(vcpu->vlapic, NULL)) { 1048c6a0cc2eSNeel Natu break; 1049c6a0cc2eSNeel Natu } 1050c6a0cc2eSNeel Natu } 1051c6a0cc2eSNeel Natu 1052e50ce2aaSNeel Natu /* 1053e50ce2aaSNeel Natu * Some Linux guests implement "halt" by having all vcpus 1054e50ce2aaSNeel Natu * execute HLT with interrupts disabled. 'halted_cpus' keeps 1055e50ce2aaSNeel Natu * track of the vcpus that have entered this state. When all 1056e50ce2aaSNeel Natu * vcpus enter the halted state the virtual machine is halted. 1057e50ce2aaSNeel Natu */ 1058e50ce2aaSNeel Natu if (intr_disabled) { 1059c6a0cc2eSNeel Natu wmesg = "vmhalt"; 1060e50ce2aaSNeel Natu VCPU_CTR0(vm, vcpuid, "Halted"); 1061055fc2cbSNeel Natu if (!vcpu_halted && halt_detection_enabled) { 1062e50ce2aaSNeel Natu vcpu_halted = 1; 1063e50ce2aaSNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1064e50ce2aaSNeel Natu } 1065e50ce2aaSNeel Natu if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1066e50ce2aaSNeel Natu vm_halted = 1; 1067e50ce2aaSNeel Natu break; 1068e50ce2aaSNeel Natu } 1069e50ce2aaSNeel Natu } else { 1070e50ce2aaSNeel Natu wmesg = "vmidle"; 1071e50ce2aaSNeel Natu } 1072c6a0cc2eSNeel Natu 1073f76fc5d4SNeel Natu t = ticks; 1074318224bbSNeel Natu vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1075c6a0cc2eSNeel Natu msleep_spin(vcpu, &vcpu->mtx, wmesg, 0); 107622d822c6SNeel Natu vcpu_require_state_locked(vcpu, VCPU_FROZEN); 107722d822c6SNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 107822d822c6SNeel Natu } 1079e50ce2aaSNeel Natu 1080e50ce2aaSNeel Natu if (vcpu_halted) 1081e50ce2aaSNeel Natu CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1082e50ce2aaSNeel Natu 108322d822c6SNeel Natu vcpu_unlock(vcpu); 108422d822c6SNeel Natu 1085e50ce2aaSNeel Natu if (vm_halted) 1086e50ce2aaSNeel Natu vm_suspend(vm, VM_SUSPEND_HALT); 1087e50ce2aaSNeel Natu 1088318224bbSNeel Natu return (0); 1089318224bbSNeel Natu } 1090318224bbSNeel Natu 1091318224bbSNeel Natu static int 1092becd9849SNeel Natu vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) 1093318224bbSNeel Natu { 1094318224bbSNeel Natu int rv, ftype; 1095318224bbSNeel Natu struct vm_map *map; 1096318224bbSNeel Natu struct vcpu *vcpu; 1097318224bbSNeel Natu struct vm_exit *vme; 1098318224bbSNeel Natu 1099318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1100318224bbSNeel Natu vme = &vcpu->exitinfo; 1101318224bbSNeel Natu 1102318224bbSNeel Natu ftype = vme->u.paging.fault_type; 1103318224bbSNeel Natu KASSERT(ftype == VM_PROT_READ || 1104318224bbSNeel Natu ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1105318224bbSNeel Natu ("vm_handle_paging: invalid fault_type %d", ftype)); 1106318224bbSNeel Natu 1107318224bbSNeel Natu if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1108318224bbSNeel Natu rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1109318224bbSNeel Natu vme->u.paging.gpa, ftype); 1110318224bbSNeel Natu if (rv == 0) 1111318224bbSNeel Natu goto done; 1112318224bbSNeel Natu } 1113318224bbSNeel Natu 1114318224bbSNeel Natu map = &vm->vmspace->vm_map; 1115318224bbSNeel Natu rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 1116318224bbSNeel Natu 1117513c8d33SNeel Natu VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 1118513c8d33SNeel Natu "ftype = %d", rv, vme->u.paging.gpa, ftype); 1119318224bbSNeel Natu 1120318224bbSNeel Natu if (rv != KERN_SUCCESS) 1121318224bbSNeel Natu return (EFAULT); 1122318224bbSNeel Natu done: 1123318224bbSNeel Natu /* restart execution at the faulting instruction */ 1124318224bbSNeel Natu vme->inst_length = 0; 1125318224bbSNeel Natu 1126318224bbSNeel Natu return (0); 1127318224bbSNeel Natu } 1128318224bbSNeel Natu 1129318224bbSNeel Natu static int 1130becd9849SNeel Natu vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) 1131318224bbSNeel Natu { 1132318224bbSNeel Natu struct vie *vie; 1133318224bbSNeel Natu struct vcpu *vcpu; 1134318224bbSNeel Natu struct vm_exit *vme; 1135e4c8a13dSNeel Natu int cpl, error, inst_length; 1136318224bbSNeel Natu uint64_t rip, gla, gpa, cr3; 113700f3efe1SJohn Baldwin enum vie_cpu_mode cpu_mode; 113800f3efe1SJohn Baldwin enum vie_paging_mode paging_mode; 1139565bbb86SNeel Natu mem_region_read_t mread; 1140565bbb86SNeel Natu mem_region_write_t mwrite; 1141318224bbSNeel Natu 1142318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1143318224bbSNeel Natu vme = &vcpu->exitinfo; 1144318224bbSNeel Natu 1145318224bbSNeel Natu rip = vme->rip; 1146318224bbSNeel Natu inst_length = vme->inst_length; 1147318224bbSNeel Natu 1148318224bbSNeel Natu gla = vme->u.inst_emul.gla; 1149318224bbSNeel Natu gpa = vme->u.inst_emul.gpa; 1150318224bbSNeel Natu cr3 = vme->u.inst_emul.cr3; 1151e4c8a13dSNeel Natu cpl = vme->u.inst_emul.cpl; 115200f3efe1SJohn Baldwin cpu_mode = vme->u.inst_emul.cpu_mode; 115300f3efe1SJohn Baldwin paging_mode = vme->u.inst_emul.paging_mode; 1154318224bbSNeel Natu vie = &vme->u.inst_emul.vie; 1155318224bbSNeel Natu 1156318224bbSNeel Natu vie_init(vie); 1157318224bbSNeel Natu 1158318224bbSNeel Natu /* Fetch, decode and emulate the faulting instruction */ 1159fd949af6SNeel Natu error = vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, 1160fd949af6SNeel Natu paging_mode, cpl, vie); 1161fd949af6SNeel Natu if (error == 1) 1162fd949af6SNeel Natu return (0); /* Resume guest to handle page fault */ 1163fd949af6SNeel Natu else if (error == -1) 1164318224bbSNeel Natu return (EFAULT); 1165fd949af6SNeel Natu else if (error != 0) 1166fd949af6SNeel Natu panic("%s: vmm_fetch_instruction error %d", __func__, error); 1167318224bbSNeel Natu 116800f3efe1SJohn Baldwin if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0) 1169318224bbSNeel Natu return (EFAULT); 1170318224bbSNeel Natu 117108e3ff32SNeel Natu /* return to userland unless this is an in-kernel emulated device */ 1172565bbb86SNeel Natu if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1173565bbb86SNeel Natu mread = lapic_mmio_read; 1174565bbb86SNeel Natu mwrite = lapic_mmio_write; 1175565bbb86SNeel Natu } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1176565bbb86SNeel Natu mread = vioapic_mmio_read; 1177565bbb86SNeel Natu mwrite = vioapic_mmio_write; 117808e3ff32SNeel Natu } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 117908e3ff32SNeel Natu mread = vhpet_mmio_read; 118008e3ff32SNeel Natu mwrite = vhpet_mmio_write; 1181565bbb86SNeel Natu } else { 1182becd9849SNeel Natu *retu = true; 1183318224bbSNeel Natu return (0); 1184318224bbSNeel Natu } 1185318224bbSNeel Natu 1186becd9849SNeel Natu error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, 1187becd9849SNeel Natu retu); 1188318224bbSNeel Natu 1189318224bbSNeel Natu return (error); 1190318224bbSNeel Natu } 1191318224bbSNeel Natu 1192b15a09c0SNeel Natu static int 1193b15a09c0SNeel Natu vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) 1194b15a09c0SNeel Natu { 1195b15a09c0SNeel Natu int i, done; 1196b15a09c0SNeel Natu struct vcpu *vcpu; 1197b15a09c0SNeel Natu 1198b15a09c0SNeel Natu done = 0; 1199b15a09c0SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1200b15a09c0SNeel Natu 1201b15a09c0SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); 1202b15a09c0SNeel Natu 1203b15a09c0SNeel Natu /* 1204b15a09c0SNeel Natu * Wait until all 'active_cpus' have suspended themselves. 1205b15a09c0SNeel Natu * 1206b15a09c0SNeel Natu * Since a VM may be suspended at any time including when one or 1207b15a09c0SNeel Natu * more vcpus are doing a rendezvous we need to call the rendezvous 1208b15a09c0SNeel Natu * handler while we are waiting to prevent a deadlock. 1209b15a09c0SNeel Natu */ 1210b15a09c0SNeel Natu vcpu_lock(vcpu); 1211b15a09c0SNeel Natu while (1) { 1212b15a09c0SNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 1213b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); 1214b15a09c0SNeel Natu break; 1215b15a09c0SNeel Natu } 1216b15a09c0SNeel Natu 1217b15a09c0SNeel Natu if (vm->rendezvous_func == NULL) { 1218b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); 1219b15a09c0SNeel Natu vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1220b15a09c0SNeel Natu msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1221b15a09c0SNeel Natu vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1222b15a09c0SNeel Natu } else { 1223b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); 1224b15a09c0SNeel Natu vcpu_unlock(vcpu); 1225b15a09c0SNeel Natu vm_handle_rendezvous(vm, vcpuid); 1226b15a09c0SNeel Natu vcpu_lock(vcpu); 1227b15a09c0SNeel Natu } 1228b15a09c0SNeel Natu } 1229b15a09c0SNeel Natu vcpu_unlock(vcpu); 1230b15a09c0SNeel Natu 1231b15a09c0SNeel Natu /* 1232b15a09c0SNeel Natu * Wakeup the other sleeping vcpus and return to userspace. 1233b15a09c0SNeel Natu */ 1234b15a09c0SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1235b15a09c0SNeel Natu if (CPU_ISSET(i, &vm->suspended_cpus)) { 1236b15a09c0SNeel Natu vcpu_notify_event(vm, i, false); 1237b15a09c0SNeel Natu } 1238b15a09c0SNeel Natu } 1239b15a09c0SNeel Natu 1240b15a09c0SNeel Natu *retu = true; 1241b15a09c0SNeel Natu return (0); 1242b15a09c0SNeel Natu } 1243b15a09c0SNeel Natu 1244b15a09c0SNeel Natu int 1245f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how) 1246b15a09c0SNeel Natu { 1247f0fdcfe2SNeel Natu int i; 1248b15a09c0SNeel Natu 1249f0fdcfe2SNeel Natu if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1250f0fdcfe2SNeel Natu return (EINVAL); 1251f0fdcfe2SNeel Natu 1252f0fdcfe2SNeel Natu if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1253f0fdcfe2SNeel Natu VM_CTR2(vm, "virtual machine already suspended %d/%d", 1254f0fdcfe2SNeel Natu vm->suspend, how); 1255b15a09c0SNeel Natu return (EALREADY); 1256b15a09c0SNeel Natu } 1257f0fdcfe2SNeel Natu 1258f0fdcfe2SNeel Natu VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1259f0fdcfe2SNeel Natu 1260f0fdcfe2SNeel Natu /* 1261f0fdcfe2SNeel Natu * Notify all active vcpus that they are now suspended. 1262f0fdcfe2SNeel Natu */ 1263f0fdcfe2SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1264f0fdcfe2SNeel Natu if (CPU_ISSET(i, &vm->active_cpus)) 1265f0fdcfe2SNeel Natu vcpu_notify_event(vm, i, false); 1266f0fdcfe2SNeel Natu } 1267f0fdcfe2SNeel Natu 1268f0fdcfe2SNeel Natu return (0); 1269f0fdcfe2SNeel Natu } 1270f0fdcfe2SNeel Natu 1271f0fdcfe2SNeel Natu void 1272f0fdcfe2SNeel Natu vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) 1273f0fdcfe2SNeel Natu { 1274f0fdcfe2SNeel Natu struct vm_exit *vmexit; 1275f0fdcfe2SNeel Natu 1276f0fdcfe2SNeel Natu KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1277f0fdcfe2SNeel Natu ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1278f0fdcfe2SNeel Natu 1279f0fdcfe2SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1280f0fdcfe2SNeel Natu vmexit->rip = rip; 1281f0fdcfe2SNeel Natu vmexit->inst_length = 0; 1282f0fdcfe2SNeel Natu vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1283f0fdcfe2SNeel Natu vmexit->u.suspended.how = vm->suspend; 1284b15a09c0SNeel Natu } 1285b15a09c0SNeel Natu 1286318224bbSNeel Natu int 1287318224bbSNeel Natu vm_run(struct vm *vm, struct vm_run *vmrun) 1288318224bbSNeel Natu { 1289318224bbSNeel Natu int error, vcpuid; 1290318224bbSNeel Natu struct vcpu *vcpu; 1291318224bbSNeel Natu struct pcb *pcb; 1292318224bbSNeel Natu uint64_t tscval, rip; 1293318224bbSNeel Natu struct vm_exit *vme; 1294becd9849SNeel Natu bool retu, intr_disabled; 1295318224bbSNeel Natu pmap_t pmap; 1296b15a09c0SNeel Natu void *rptr, *sptr; 1297318224bbSNeel Natu 1298318224bbSNeel Natu vcpuid = vmrun->cpuid; 1299318224bbSNeel Natu 1300318224bbSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1301318224bbSNeel Natu return (EINVAL); 1302318224bbSNeel Natu 1303b15a09c0SNeel Natu rptr = &vm->rendezvous_func; 1304b15a09c0SNeel Natu sptr = &vm->suspend; 1305318224bbSNeel Natu pmap = vmspace_pmap(vm->vmspace); 1306318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1307318224bbSNeel Natu vme = &vcpu->exitinfo; 1308318224bbSNeel Natu rip = vmrun->rip; 1309318224bbSNeel Natu restart: 1310318224bbSNeel Natu critical_enter(); 1311318224bbSNeel Natu 1312318224bbSNeel Natu KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1313318224bbSNeel Natu ("vm_run: absurd pm_active")); 1314318224bbSNeel Natu 1315318224bbSNeel Natu tscval = rdtsc(); 1316318224bbSNeel Natu 1317318224bbSNeel Natu pcb = PCPU_GET(curpcb); 1318318224bbSNeel Natu set_pcb_flags(pcb, PCB_FULL_IRET); 1319318224bbSNeel Natu 1320318224bbSNeel Natu restore_guest_msrs(vm, vcpuid); 1321318224bbSNeel Natu restore_guest_fpustate(vcpu); 1322318224bbSNeel Natu 1323318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1324b15a09c0SNeel Natu error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr); 1325318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1326318224bbSNeel Natu 1327318224bbSNeel Natu save_guest_fpustate(vcpu); 1328318224bbSNeel Natu restore_host_msrs(vm, vcpuid); 1329318224bbSNeel Natu 1330318224bbSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1331318224bbSNeel Natu 1332318224bbSNeel Natu critical_exit(); 1333318224bbSNeel Natu 1334318224bbSNeel Natu if (error == 0) { 1335becd9849SNeel Natu retu = false; 1336318224bbSNeel Natu switch (vme->exitcode) { 1337b15a09c0SNeel Natu case VM_EXITCODE_SUSPENDED: 1338b15a09c0SNeel Natu error = vm_handle_suspend(vm, vcpuid, &retu); 1339b15a09c0SNeel Natu break; 134030b94db8SNeel Natu case VM_EXITCODE_IOAPIC_EOI: 134130b94db8SNeel Natu vioapic_process_eoi(vm, vcpuid, 134230b94db8SNeel Natu vme->u.ioapic_eoi.vector); 134330b94db8SNeel Natu break; 13445b8a8cd1SNeel Natu case VM_EXITCODE_RENDEZVOUS: 13455b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 13465b8a8cd1SNeel Natu error = 0; 13475b8a8cd1SNeel Natu break; 1348318224bbSNeel Natu case VM_EXITCODE_HLT: 1349becd9849SNeel Natu intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 13501c052192SNeel Natu error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); 1351318224bbSNeel Natu break; 1352318224bbSNeel Natu case VM_EXITCODE_PAGING: 1353318224bbSNeel Natu error = vm_handle_paging(vm, vcpuid, &retu); 1354318224bbSNeel Natu break; 1355318224bbSNeel Natu case VM_EXITCODE_INST_EMUL: 1356318224bbSNeel Natu error = vm_handle_inst_emul(vm, vcpuid, &retu); 1357318224bbSNeel Natu break; 1358d17b5104SNeel Natu case VM_EXITCODE_INOUT: 1359d17b5104SNeel Natu case VM_EXITCODE_INOUT_STR: 1360d17b5104SNeel Natu error = vm_handle_inout(vm, vcpuid, vme, &retu); 1361d17b5104SNeel Natu break; 1362318224bbSNeel Natu default: 1363becd9849SNeel Natu retu = true; /* handled in userland */ 1364318224bbSNeel Natu break; 1365318224bbSNeel Natu } 1366318224bbSNeel Natu } 1367318224bbSNeel Natu 1368becd9849SNeel Natu if (error == 0 && retu == false) { 1369f76fc5d4SNeel Natu rip = vme->rip + vme->inst_length; 1370f76fc5d4SNeel Natu goto restart; 1371f76fc5d4SNeel Natu } 1372f76fc5d4SNeel Natu 1373318224bbSNeel Natu /* copy the exit information */ 1374318224bbSNeel Natu bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1375366f6083SPeter Grehan return (error); 1376366f6083SPeter Grehan } 1377366f6083SPeter Grehan 1378366f6083SPeter Grehan int 1379dc506506SNeel Natu vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) 1380366f6083SPeter Grehan { 1381dc506506SNeel Natu struct vcpu *vcpu; 1382dc506506SNeel Natu 1383366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1384366f6083SPeter Grehan return (EINVAL); 1385366f6083SPeter Grehan 1386dc506506SNeel Natu if (exception->vector < 0 || exception->vector >= 32) 1387366f6083SPeter Grehan return (EINVAL); 1388366f6083SPeter Grehan 1389dc506506SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1390366f6083SPeter Grehan 1391dc506506SNeel Natu if (vcpu->exception_pending) { 1392dc506506SNeel Natu VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " 1393dc506506SNeel Natu "pending exception %d", exception->vector, 1394dc506506SNeel Natu vcpu->exception.vector); 1395dc506506SNeel Natu return (EBUSY); 1396dc506506SNeel Natu } 1397dc506506SNeel Natu 1398dc506506SNeel Natu vcpu->exception_pending = 1; 1399dc506506SNeel Natu vcpu->exception = *exception; 1400dc506506SNeel Natu VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector); 1401dc506506SNeel Natu return (0); 1402dc506506SNeel Natu } 1403dc506506SNeel Natu 1404dc506506SNeel Natu int 1405dc506506SNeel Natu vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception) 1406dc506506SNeel Natu { 1407dc506506SNeel Natu struct vcpu *vcpu; 1408dc506506SNeel Natu int pending; 1409dc506506SNeel Natu 1410dc506506SNeel Natu KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); 1411dc506506SNeel Natu 1412dc506506SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1413dc506506SNeel Natu pending = vcpu->exception_pending; 1414dc506506SNeel Natu if (pending) { 1415dc506506SNeel Natu vcpu->exception_pending = 0; 1416dc506506SNeel Natu *exception = vcpu->exception; 1417dc506506SNeel Natu VCPU_CTR1(vm, vcpuid, "Exception %d delivered", 1418dc506506SNeel Natu exception->vector); 1419dc506506SNeel Natu } 1420dc506506SNeel Natu return (pending); 1421dc506506SNeel Natu } 1422dc506506SNeel Natu 1423dc506506SNeel Natu static void 1424dc506506SNeel Natu vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception) 1425dc506506SNeel Natu { 1426dc506506SNeel Natu struct vm_exit *vmexit; 1427dc506506SNeel Natu int error; 1428dc506506SNeel Natu 1429dc506506SNeel Natu error = vm_inject_exception(vm, vcpuid, exception); 1430dc506506SNeel Natu KASSERT(error == 0, ("vm_inject_exception error %d", error)); 1431dc506506SNeel Natu 1432dc506506SNeel Natu /* 1433dc506506SNeel Natu * A fault-like exception allows the instruction to be restarted 1434dc506506SNeel Natu * after the exception handler returns. 1435dc506506SNeel Natu * 1436dc506506SNeel Natu * By setting the inst_length to 0 we ensure that the instruction 1437dc506506SNeel Natu * pointer remains at the faulting instruction. 1438dc506506SNeel Natu */ 1439dc506506SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1440dc506506SNeel Natu vmexit->inst_length = 0; 1441dc506506SNeel Natu } 1442dc506506SNeel Natu 1443dc506506SNeel Natu void 1444*37a723a5SNeel Natu vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2) 1445fd949af6SNeel Natu { 1446fd949af6SNeel Natu struct vm_exception pf = { 1447fd949af6SNeel Natu .vector = IDT_PF, 1448fd949af6SNeel Natu .error_code_valid = 1, 1449fd949af6SNeel Natu .error_code = error_code 1450fd949af6SNeel Natu }; 1451*37a723a5SNeel Natu int error; 1452*37a723a5SNeel Natu 1453*37a723a5SNeel Natu VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", 1454*37a723a5SNeel Natu error_code, cr2); 1455*37a723a5SNeel Natu 1456*37a723a5SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); 1457*37a723a5SNeel Natu KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 1458fd949af6SNeel Natu 1459fd949af6SNeel Natu vm_inject_fault(vm, vcpuid, &pf); 1460fd949af6SNeel Natu } 1461fd949af6SNeel Natu 1462fd949af6SNeel Natu void 1463dc506506SNeel Natu vm_inject_gp(struct vm *vm, int vcpuid) 1464dc506506SNeel Natu { 1465dc506506SNeel Natu struct vm_exception gpf = { 1466dc506506SNeel Natu .vector = IDT_GP, 1467dc506506SNeel Natu .error_code_valid = 1, 1468dc506506SNeel Natu .error_code = 0 1469dc506506SNeel Natu }; 1470dc506506SNeel Natu 1471dc506506SNeel Natu vm_inject_fault(vm, vcpuid, &gpf); 1472dc506506SNeel Natu } 1473dc506506SNeel Natu 1474dc506506SNeel Natu void 1475dc506506SNeel Natu vm_inject_ud(struct vm *vm, int vcpuid) 1476dc506506SNeel Natu { 1477dc506506SNeel Natu struct vm_exception udf = { 1478dc506506SNeel Natu .vector = IDT_UD, 1479dc506506SNeel Natu .error_code_valid = 0 1480dc506506SNeel Natu }; 1481dc506506SNeel Natu 1482dc506506SNeel Natu vm_inject_fault(vm, vcpuid, &udf); 1483366f6083SPeter Grehan } 1484366f6083SPeter Grehan 148561592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 1486366f6083SPeter Grehan 1487f352ff0cSNeel Natu int 1488f352ff0cSNeel Natu vm_inject_nmi(struct vm *vm, int vcpuid) 1489f352ff0cSNeel Natu { 1490f352ff0cSNeel Natu struct vcpu *vcpu; 1491f352ff0cSNeel Natu 1492f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1493366f6083SPeter Grehan return (EINVAL); 1494366f6083SPeter Grehan 1495f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1496f352ff0cSNeel Natu 1497f352ff0cSNeel Natu vcpu->nmi_pending = 1; 1498de5ea6b6SNeel Natu vcpu_notify_event(vm, vcpuid, false); 1499f352ff0cSNeel Natu return (0); 1500f352ff0cSNeel Natu } 1501f352ff0cSNeel Natu 1502f352ff0cSNeel Natu int 1503f352ff0cSNeel Natu vm_nmi_pending(struct vm *vm, int vcpuid) 1504f352ff0cSNeel Natu { 1505f352ff0cSNeel Natu struct vcpu *vcpu; 1506f352ff0cSNeel Natu 1507f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1508f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1509f352ff0cSNeel Natu 1510f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1511f352ff0cSNeel Natu 1512f352ff0cSNeel Natu return (vcpu->nmi_pending); 1513f352ff0cSNeel Natu } 1514f352ff0cSNeel Natu 1515f352ff0cSNeel Natu void 1516f352ff0cSNeel Natu vm_nmi_clear(struct vm *vm, int vcpuid) 1517f352ff0cSNeel Natu { 1518f352ff0cSNeel Natu struct vcpu *vcpu; 1519f352ff0cSNeel Natu 1520f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1521f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1522f352ff0cSNeel Natu 1523f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1524f352ff0cSNeel Natu 1525f352ff0cSNeel Natu if (vcpu->nmi_pending == 0) 1526f352ff0cSNeel Natu panic("vm_nmi_clear: inconsistent nmi_pending state"); 1527f352ff0cSNeel Natu 1528f352ff0cSNeel Natu vcpu->nmi_pending = 0; 1529f352ff0cSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 1530366f6083SPeter Grehan } 1531366f6083SPeter Grehan 15320775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 15330775fbb4STycho Nightingale 15340775fbb4STycho Nightingale int 15350775fbb4STycho Nightingale vm_inject_extint(struct vm *vm, int vcpuid) 15360775fbb4STycho Nightingale { 15370775fbb4STycho Nightingale struct vcpu *vcpu; 15380775fbb4STycho Nightingale 15390775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 15400775fbb4STycho Nightingale return (EINVAL); 15410775fbb4STycho Nightingale 15420775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 15430775fbb4STycho Nightingale 15440775fbb4STycho Nightingale vcpu->extint_pending = 1; 15450775fbb4STycho Nightingale vcpu_notify_event(vm, vcpuid, false); 15460775fbb4STycho Nightingale return (0); 15470775fbb4STycho Nightingale } 15480775fbb4STycho Nightingale 15490775fbb4STycho Nightingale int 15500775fbb4STycho Nightingale vm_extint_pending(struct vm *vm, int vcpuid) 15510775fbb4STycho Nightingale { 15520775fbb4STycho Nightingale struct vcpu *vcpu; 15530775fbb4STycho Nightingale 15540775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 15550775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 15560775fbb4STycho Nightingale 15570775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 15580775fbb4STycho Nightingale 15590775fbb4STycho Nightingale return (vcpu->extint_pending); 15600775fbb4STycho Nightingale } 15610775fbb4STycho Nightingale 15620775fbb4STycho Nightingale void 15630775fbb4STycho Nightingale vm_extint_clear(struct vm *vm, int vcpuid) 15640775fbb4STycho Nightingale { 15650775fbb4STycho Nightingale struct vcpu *vcpu; 15660775fbb4STycho Nightingale 15670775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 15680775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 15690775fbb4STycho Nightingale 15700775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 15710775fbb4STycho Nightingale 15720775fbb4STycho Nightingale if (vcpu->extint_pending == 0) 15730775fbb4STycho Nightingale panic("vm_extint_clear: inconsistent extint_pending state"); 15740775fbb4STycho Nightingale 15750775fbb4STycho Nightingale vcpu->extint_pending = 0; 15760775fbb4STycho Nightingale vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); 15770775fbb4STycho Nightingale } 15780775fbb4STycho Nightingale 1579366f6083SPeter Grehan int 1580366f6083SPeter Grehan vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 1581366f6083SPeter Grehan { 1582366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1583366f6083SPeter Grehan return (EINVAL); 1584366f6083SPeter Grehan 1585366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 1586366f6083SPeter Grehan return (EINVAL); 1587366f6083SPeter Grehan 1588366f6083SPeter Grehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 1589366f6083SPeter Grehan } 1590366f6083SPeter Grehan 1591366f6083SPeter Grehan int 1592366f6083SPeter Grehan vm_set_capability(struct vm *vm, int vcpu, int type, int val) 1593366f6083SPeter Grehan { 1594366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1595366f6083SPeter Grehan return (EINVAL); 1596366f6083SPeter Grehan 1597366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 1598366f6083SPeter Grehan return (EINVAL); 1599366f6083SPeter Grehan 1600366f6083SPeter Grehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 1601366f6083SPeter Grehan } 1602366f6083SPeter Grehan 1603366f6083SPeter Grehan uint64_t * 1604366f6083SPeter Grehan vm_guest_msrs(struct vm *vm, int cpu) 1605366f6083SPeter Grehan { 1606366f6083SPeter Grehan return (vm->vcpu[cpu].guest_msrs); 1607366f6083SPeter Grehan } 1608366f6083SPeter Grehan 1609366f6083SPeter Grehan struct vlapic * 1610366f6083SPeter Grehan vm_lapic(struct vm *vm, int cpu) 1611366f6083SPeter Grehan { 1612366f6083SPeter Grehan return (vm->vcpu[cpu].vlapic); 1613366f6083SPeter Grehan } 1614366f6083SPeter Grehan 1615565bbb86SNeel Natu struct vioapic * 1616565bbb86SNeel Natu vm_ioapic(struct vm *vm) 1617565bbb86SNeel Natu { 1618565bbb86SNeel Natu 1619565bbb86SNeel Natu return (vm->vioapic); 1620565bbb86SNeel Natu } 1621565bbb86SNeel Natu 162208e3ff32SNeel Natu struct vhpet * 162308e3ff32SNeel Natu vm_hpet(struct vm *vm) 162408e3ff32SNeel Natu { 162508e3ff32SNeel Natu 162608e3ff32SNeel Natu return (vm->vhpet); 162708e3ff32SNeel Natu } 162808e3ff32SNeel Natu 1629366f6083SPeter Grehan boolean_t 1630366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func) 1631366f6083SPeter Grehan { 163207044a96SNeel Natu int found, i, n; 163307044a96SNeel Natu int b, s, f; 1634366f6083SPeter Grehan char *val, *cp, *cp2; 1635366f6083SPeter Grehan 1636366f6083SPeter Grehan /* 163707044a96SNeel Natu * XXX 163807044a96SNeel Natu * The length of an environment variable is limited to 128 bytes which 163907044a96SNeel Natu * puts an upper limit on the number of passthru devices that may be 164007044a96SNeel Natu * specified using a single environment variable. 164107044a96SNeel Natu * 164207044a96SNeel Natu * Work around this by scanning multiple environment variable 164307044a96SNeel Natu * names instead of a single one - yuck! 1644366f6083SPeter Grehan */ 164507044a96SNeel Natu const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 164607044a96SNeel Natu 164707044a96SNeel Natu /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 1648366f6083SPeter Grehan found = 0; 164907044a96SNeel Natu for (i = 0; names[i] != NULL && !found; i++) { 165007044a96SNeel Natu cp = val = getenv(names[i]); 1651366f6083SPeter Grehan while (cp != NULL && *cp != '\0') { 1652366f6083SPeter Grehan if ((cp2 = strchr(cp, ' ')) != NULL) 1653366f6083SPeter Grehan *cp2 = '\0'; 1654366f6083SPeter Grehan 1655366f6083SPeter Grehan n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 1656366f6083SPeter Grehan if (n == 3 && bus == b && slot == s && func == f) { 1657366f6083SPeter Grehan found = 1; 1658366f6083SPeter Grehan break; 1659366f6083SPeter Grehan } 1660366f6083SPeter Grehan 1661366f6083SPeter Grehan if (cp2 != NULL) 1662366f6083SPeter Grehan *cp2++ = ' '; 1663366f6083SPeter Grehan 1664366f6083SPeter Grehan cp = cp2; 1665366f6083SPeter Grehan } 1666366f6083SPeter Grehan freeenv(val); 166707044a96SNeel Natu } 1668366f6083SPeter Grehan return (found); 1669366f6083SPeter Grehan } 1670366f6083SPeter Grehan 1671366f6083SPeter Grehan void * 1672366f6083SPeter Grehan vm_iommu_domain(struct vm *vm) 1673366f6083SPeter Grehan { 1674366f6083SPeter Grehan 1675366f6083SPeter Grehan return (vm->iommu); 1676366f6083SPeter Grehan } 1677366f6083SPeter Grehan 167875dd3366SNeel Natu int 1679f80330a8SNeel Natu vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, 1680f80330a8SNeel Natu bool from_idle) 1681366f6083SPeter Grehan { 168275dd3366SNeel Natu int error; 1683366f6083SPeter Grehan struct vcpu *vcpu; 1684366f6083SPeter Grehan 1685366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1686366f6083SPeter Grehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 1687366f6083SPeter Grehan 1688366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 1689366f6083SPeter Grehan 169075dd3366SNeel Natu vcpu_lock(vcpu); 1691f80330a8SNeel Natu error = vcpu_set_state_locked(vcpu, newstate, from_idle); 169275dd3366SNeel Natu vcpu_unlock(vcpu); 169375dd3366SNeel Natu 169475dd3366SNeel Natu return (error); 169575dd3366SNeel Natu } 169675dd3366SNeel Natu 169775dd3366SNeel Natu enum vcpu_state 1698d3c11f40SPeter Grehan vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 1699366f6083SPeter Grehan { 1700366f6083SPeter Grehan struct vcpu *vcpu; 170175dd3366SNeel Natu enum vcpu_state state; 1702366f6083SPeter Grehan 1703366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1704366f6083SPeter Grehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 1705366f6083SPeter Grehan 1706366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 1707366f6083SPeter Grehan 170875dd3366SNeel Natu vcpu_lock(vcpu); 170975dd3366SNeel Natu state = vcpu->state; 1710d3c11f40SPeter Grehan if (hostcpu != NULL) 1711d3c11f40SPeter Grehan *hostcpu = vcpu->hostcpu; 171275dd3366SNeel Natu vcpu_unlock(vcpu); 1713366f6083SPeter Grehan 171475dd3366SNeel Natu return (state); 1715366f6083SPeter Grehan } 1716366f6083SPeter Grehan 1717366f6083SPeter Grehan void 1718366f6083SPeter Grehan vm_activate_cpu(struct vm *vm, int vcpuid) 1719366f6083SPeter Grehan { 1720366f6083SPeter Grehan 172122d822c6SNeel Natu KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, 172222d822c6SNeel Natu ("vm_activate_cpu: invalid vcpuid %d", vcpuid)); 172322d822c6SNeel Natu KASSERT(!CPU_ISSET(vcpuid, &vm->active_cpus), 172422d822c6SNeel Natu ("vm_activate_cpu: vcpuid %d is already active", vcpuid)); 172522d822c6SNeel Natu 172622d822c6SNeel Natu VCPU_CTR0(vm, vcpuid, "activated"); 172722d822c6SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); 1728366f6083SPeter Grehan } 1729366f6083SPeter Grehan 1730a5615c90SPeter Grehan cpuset_t 1731366f6083SPeter Grehan vm_active_cpus(struct vm *vm) 1732366f6083SPeter Grehan { 1733366f6083SPeter Grehan 1734366f6083SPeter Grehan return (vm->active_cpus); 1735366f6083SPeter Grehan } 1736366f6083SPeter Grehan 1737366f6083SPeter Grehan void * 1738366f6083SPeter Grehan vcpu_stats(struct vm *vm, int vcpuid) 1739366f6083SPeter Grehan { 1740366f6083SPeter Grehan 1741366f6083SPeter Grehan return (vm->vcpu[vcpuid].stats); 1742366f6083SPeter Grehan } 1743e9027382SNeel Natu 1744e9027382SNeel Natu int 1745e9027382SNeel Natu vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 1746e9027382SNeel Natu { 1747e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1748e9027382SNeel Natu return (EINVAL); 1749e9027382SNeel Natu 1750e9027382SNeel Natu *state = vm->vcpu[vcpuid].x2apic_state; 1751e9027382SNeel Natu 1752e9027382SNeel Natu return (0); 1753e9027382SNeel Natu } 1754e9027382SNeel Natu 1755e9027382SNeel Natu int 1756e9027382SNeel Natu vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1757e9027382SNeel Natu { 1758e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1759e9027382SNeel Natu return (EINVAL); 1760e9027382SNeel Natu 17613f23d3caSNeel Natu if (state >= X2APIC_STATE_LAST) 1762e9027382SNeel Natu return (EINVAL); 1763e9027382SNeel Natu 1764e9027382SNeel Natu vm->vcpu[vcpuid].x2apic_state = state; 1765e9027382SNeel Natu 176673820fb0SNeel Natu vlapic_set_x2apic_state(vm, vcpuid, state); 176773820fb0SNeel Natu 1768e9027382SNeel Natu return (0); 1769e9027382SNeel Natu } 177075dd3366SNeel Natu 177122821874SNeel Natu /* 177222821874SNeel Natu * This function is called to ensure that a vcpu "sees" a pending event 177322821874SNeel Natu * as soon as possible: 177422821874SNeel Natu * - If the vcpu thread is sleeping then it is woken up. 177522821874SNeel Natu * - If the vcpu is running on a different host_cpu then an IPI will be directed 177622821874SNeel Natu * to the host_cpu to cause the vcpu to trap into the hypervisor. 177722821874SNeel Natu */ 177875dd3366SNeel Natu void 1779de5ea6b6SNeel Natu vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) 178075dd3366SNeel Natu { 178175dd3366SNeel Natu int hostcpu; 178275dd3366SNeel Natu struct vcpu *vcpu; 178375dd3366SNeel Natu 178475dd3366SNeel Natu vcpu = &vm->vcpu[vcpuid]; 178575dd3366SNeel Natu 1786f76fc5d4SNeel Natu vcpu_lock(vcpu); 178775dd3366SNeel Natu hostcpu = vcpu->hostcpu; 1788ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 1789ef39d7e9SNeel Natu KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1790de5ea6b6SNeel Natu if (hostcpu != curcpu) { 1791ef39d7e9SNeel Natu if (lapic_intr) { 1792add611fdSNeel Natu vlapic_post_intr(vcpu->vlapic, hostcpu, 1793add611fdSNeel Natu vmm_ipinum); 1794ef39d7e9SNeel Natu } else { 179575dd3366SNeel Natu ipi_cpu(hostcpu, vmm_ipinum); 179675dd3366SNeel Natu } 1797ef39d7e9SNeel Natu } else { 1798ef39d7e9SNeel Natu /* 1799ef39d7e9SNeel Natu * If the 'vcpu' is running on 'curcpu' then it must 1800ef39d7e9SNeel Natu * be sending a notification to itself (e.g. SELF_IPI). 1801ef39d7e9SNeel Natu * The pending event will be picked up when the vcpu 1802ef39d7e9SNeel Natu * transitions back to guest context. 1803ef39d7e9SNeel Natu */ 1804ef39d7e9SNeel Natu } 1805ef39d7e9SNeel Natu } else { 1806ef39d7e9SNeel Natu KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1807ef39d7e9SNeel Natu "with hostcpu %d", vcpu->state, hostcpu)); 1808ef39d7e9SNeel Natu if (vcpu->state == VCPU_SLEEPING) 1809ef39d7e9SNeel Natu wakeup_one(vcpu); 1810de5ea6b6SNeel Natu } 1811f76fc5d4SNeel Natu vcpu_unlock(vcpu); 1812f76fc5d4SNeel Natu } 1813318224bbSNeel Natu 1814318224bbSNeel Natu struct vmspace * 1815318224bbSNeel Natu vm_get_vmspace(struct vm *vm) 1816318224bbSNeel Natu { 1817318224bbSNeel Natu 1818318224bbSNeel Natu return (vm->vmspace); 1819318224bbSNeel Natu } 1820565bbb86SNeel Natu 1821565bbb86SNeel Natu int 1822565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid) 1823565bbb86SNeel Natu { 1824565bbb86SNeel Natu /* 1825565bbb86SNeel Natu * XXX apic id is assumed to be numerically identical to vcpu id 1826565bbb86SNeel Natu */ 1827565bbb86SNeel Natu return (apicid); 1828565bbb86SNeel Natu } 18295b8a8cd1SNeel Natu 18305b8a8cd1SNeel Natu void 18315b8a8cd1SNeel Natu vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 18325b8a8cd1SNeel Natu vm_rendezvous_func_t func, void *arg) 18335b8a8cd1SNeel Natu { 1834970955e4SNeel Natu int i; 1835970955e4SNeel Natu 18365b8a8cd1SNeel Natu /* 18375b8a8cd1SNeel Natu * Enforce that this function is called without any locks 18385b8a8cd1SNeel Natu */ 18395b8a8cd1SNeel Natu WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 18405b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 18415b8a8cd1SNeel Natu ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); 18425b8a8cd1SNeel Natu 18435b8a8cd1SNeel Natu restart: 18445b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 18455b8a8cd1SNeel Natu if (vm->rendezvous_func != NULL) { 18465b8a8cd1SNeel Natu /* 18475b8a8cd1SNeel Natu * If a rendezvous is already in progress then we need to 18485b8a8cd1SNeel Natu * call the rendezvous handler in case this 'vcpuid' is one 18495b8a8cd1SNeel Natu * of the targets of the rendezvous. 18505b8a8cd1SNeel Natu */ 18515b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); 18525b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 18535b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 18545b8a8cd1SNeel Natu goto restart; 18555b8a8cd1SNeel Natu } 18565b8a8cd1SNeel Natu KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 18575b8a8cd1SNeel Natu "rendezvous is still in progress")); 18585b8a8cd1SNeel Natu 18595b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); 18605b8a8cd1SNeel Natu vm->rendezvous_req_cpus = dest; 18615b8a8cd1SNeel Natu CPU_ZERO(&vm->rendezvous_done_cpus); 18625b8a8cd1SNeel Natu vm->rendezvous_arg = arg; 18635b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, func); 18645b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 18655b8a8cd1SNeel Natu 1866970955e4SNeel Natu /* 1867970955e4SNeel Natu * Wake up any sleeping vcpus and trigger a VM-exit in any running 1868970955e4SNeel Natu * vcpus so they handle the rendezvous as soon as possible. 1869970955e4SNeel Natu */ 1870970955e4SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1871970955e4SNeel Natu if (CPU_ISSET(i, &dest)) 1872970955e4SNeel Natu vcpu_notify_event(vm, i, false); 1873970955e4SNeel Natu } 1874970955e4SNeel Natu 18755b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 18765b8a8cd1SNeel Natu } 1877762fd208STycho Nightingale 1878762fd208STycho Nightingale struct vatpic * 1879762fd208STycho Nightingale vm_atpic(struct vm *vm) 1880762fd208STycho Nightingale { 1881762fd208STycho Nightingale return (vm->vatpic); 1882762fd208STycho Nightingale } 1883e883c9bbSTycho Nightingale 1884e883c9bbSTycho Nightingale struct vatpit * 1885e883c9bbSTycho Nightingale vm_atpit(struct vm *vm) 1886e883c9bbSTycho Nightingale { 1887e883c9bbSTycho Nightingale return (vm->vatpit); 1888e883c9bbSTycho Nightingale } 1889d17b5104SNeel Natu 1890d17b5104SNeel Natu enum vm_reg_name 1891d17b5104SNeel Natu vm_segment_name(int seg) 1892d17b5104SNeel Natu { 1893d17b5104SNeel Natu static enum vm_reg_name seg_names[] = { 1894d17b5104SNeel Natu VM_REG_GUEST_ES, 1895d17b5104SNeel Natu VM_REG_GUEST_CS, 1896d17b5104SNeel Natu VM_REG_GUEST_SS, 1897d17b5104SNeel Natu VM_REG_GUEST_DS, 1898d17b5104SNeel Natu VM_REG_GUEST_FS, 1899d17b5104SNeel Natu VM_REG_GUEST_GS 1900d17b5104SNeel Natu }; 1901d17b5104SNeel Natu 1902d17b5104SNeel Natu KASSERT(seg >= 0 && seg < nitems(seg_names), 1903d17b5104SNeel Natu ("%s: invalid segment encoding %d", __func__, seg)); 1904d17b5104SNeel Natu return (seg_names[seg]); 1905d17b5104SNeel Natu } 1906