1366f6083SPeter Grehan /*- 2366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc. 3366f6083SPeter Grehan * All rights reserved. 4366f6083SPeter Grehan * 5366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without 6366f6083SPeter Grehan * modification, are permitted provided that the following conditions 7366f6083SPeter Grehan * are met: 8366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright 9366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer. 10366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 11366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the 12366f6083SPeter Grehan * documentation and/or other materials provided with the distribution. 13366f6083SPeter Grehan * 14366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24366f6083SPeter Grehan * SUCH DAMAGE. 25366f6083SPeter Grehan * 26366f6083SPeter Grehan * $FreeBSD$ 27366f6083SPeter Grehan */ 28366f6083SPeter Grehan 29366f6083SPeter Grehan #include <sys/cdefs.h> 30366f6083SPeter Grehan __FBSDID("$FreeBSD$"); 31366f6083SPeter Grehan 32366f6083SPeter Grehan #include <sys/param.h> 3338f1b189SPeter Grehan #include <sys/systm.h> 34366f6083SPeter Grehan #include <sys/kernel.h> 35366f6083SPeter Grehan #include <sys/module.h> 36366f6083SPeter Grehan #include <sys/sysctl.h> 37366f6083SPeter Grehan #include <sys/malloc.h> 38366f6083SPeter Grehan #include <sys/pcpu.h> 39366f6083SPeter Grehan #include <sys/lock.h> 40366f6083SPeter Grehan #include <sys/mutex.h> 41366f6083SPeter Grehan #include <sys/proc.h> 42318224bbSNeel Natu #include <sys/rwlock.h> 43366f6083SPeter Grehan #include <sys/sched.h> 44366f6083SPeter Grehan #include <sys/smp.h> 45366f6083SPeter Grehan #include <sys/systm.h> 46366f6083SPeter Grehan 47366f6083SPeter Grehan #include <vm/vm.h> 48318224bbSNeel Natu #include <vm/vm_object.h> 49318224bbSNeel Natu #include <vm/vm_page.h> 50318224bbSNeel Natu #include <vm/pmap.h> 51318224bbSNeel Natu #include <vm/vm_map.h> 52318224bbSNeel Natu #include <vm/vm_extern.h> 53318224bbSNeel Natu #include <vm/vm_param.h> 54366f6083SPeter Grehan 5563e62d39SJohn Baldwin #include <machine/cpu.h> 56366f6083SPeter Grehan #include <machine/vm.h> 57366f6083SPeter Grehan #include <machine/pcb.h> 5875dd3366SNeel Natu #include <machine/smp.h> 591c052192SNeel Natu #include <x86/psl.h> 6034a6b2d6SJohn Baldwin #include <x86/apicreg.h> 61318224bbSNeel Natu #include <machine/vmparam.h> 62366f6083SPeter Grehan 63366f6083SPeter Grehan #include <machine/vmm.h> 64565bbb86SNeel Natu #include <machine/vmm_dev.h> 65e813a873SNeel Natu #include <machine/vmm_instruction_emul.h> 66565bbb86SNeel Natu 67d17b5104SNeel Natu #include "vmm_ioport.h" 68318224bbSNeel Natu #include "vmm_ktr.h" 69b01c2033SNeel Natu #include "vmm_host.h" 70366f6083SPeter Grehan #include "vmm_mem.h" 71366f6083SPeter Grehan #include "vmm_util.h" 72762fd208STycho Nightingale #include "vatpic.h" 73e883c9bbSTycho Nightingale #include "vatpit.h" 7408e3ff32SNeel Natu #include "vhpet.h" 75565bbb86SNeel Natu #include "vioapic.h" 76366f6083SPeter Grehan #include "vlapic.h" 77366f6083SPeter Grehan #include "vmm_ipi.h" 78366f6083SPeter Grehan #include "vmm_stat.h" 79f76fc5d4SNeel Natu #include "vmm_lapic.h" 80366f6083SPeter Grehan 81366f6083SPeter Grehan #include "io/ppt.h" 82366f6083SPeter Grehan #include "io/iommu.h" 83366f6083SPeter Grehan 84366f6083SPeter Grehan struct vlapic; 85366f6083SPeter Grehan 865fcf252fSNeel Natu /* 875fcf252fSNeel Natu * Initialization: 885fcf252fSNeel Natu * (a) allocated when vcpu is created 895fcf252fSNeel Natu * (i) initialized when vcpu is created and when it is reinitialized 905fcf252fSNeel Natu * (o) initialized the first time the vcpu is created 915fcf252fSNeel Natu * (x) initialized before use 925fcf252fSNeel Natu */ 93366f6083SPeter Grehan struct vcpu { 945fcf252fSNeel Natu struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ 955fcf252fSNeel Natu enum vcpu_state state; /* (o) vcpu state */ 965fcf252fSNeel Natu int hostcpu; /* (o) vcpu's host cpu */ 975fcf252fSNeel Natu struct vlapic *vlapic; /* (i) APIC device model */ 985fcf252fSNeel Natu enum x2apic_state x2apic_state; /* (i) APIC mode */ 99091d4532SNeel Natu uint64_t exitintinfo; /* (i) events pending at VM exit */ 1005fcf252fSNeel Natu int nmi_pending; /* (i) NMI pending */ 1015fcf252fSNeel Natu int extint_pending; /* (i) INTR pending */ 1025fcf252fSNeel Natu struct vm_exception exception; /* (x) exception collateral */ 1035fcf252fSNeel Natu int exception_pending; /* (i) exception pending */ 1045fcf252fSNeel Natu struct savefpu *guestfpu; /* (a,i) guest fpu state */ 1055fcf252fSNeel Natu uint64_t guest_xcr0; /* (i) guest %xcr0 register */ 1065fcf252fSNeel Natu void *stats; /* (a,i) statistics */ 1075fcf252fSNeel Natu struct vm_exit exitinfo; /* (x) exit reason and collateral */ 108366f6083SPeter Grehan }; 109366f6083SPeter Grehan 1105fcf252fSNeel Natu #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 111f76fc5d4SNeel Natu #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 112f76fc5d4SNeel Natu #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 113f76fc5d4SNeel Natu #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 114318224bbSNeel Natu #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 11575dd3366SNeel Natu 116318224bbSNeel Natu struct mem_seg { 117318224bbSNeel Natu vm_paddr_t gpa; 118318224bbSNeel Natu size_t len; 119318224bbSNeel Natu boolean_t wired; 120318224bbSNeel Natu vm_object_t object; 121318224bbSNeel Natu }; 122366f6083SPeter Grehan #define VM_MAX_MEMORY_SEGMENTS 2 123366f6083SPeter Grehan 124366f6083SPeter Grehan /* 1255fcf252fSNeel Natu * Initialization: 1265fcf252fSNeel Natu * (o) initialized the first time the VM is created 1275fcf252fSNeel Natu * (i) initialized when VM is created and when it is reinitialized 1285fcf252fSNeel Natu * (x) initialized before use 129366f6083SPeter Grehan */ 1305fcf252fSNeel Natu struct vm { 1315fcf252fSNeel Natu void *cookie; /* (i) cpu-specific data */ 1325fcf252fSNeel Natu void *iommu; /* (x) iommu-specific data */ 1335fcf252fSNeel Natu struct vhpet *vhpet; /* (i) virtual HPET */ 1345fcf252fSNeel Natu struct vioapic *vioapic; /* (i) virtual ioapic */ 1355fcf252fSNeel Natu struct vatpic *vatpic; /* (i) virtual atpic */ 1365fcf252fSNeel Natu struct vatpit *vatpit; /* (i) virtual atpit */ 1375fcf252fSNeel Natu volatile cpuset_t active_cpus; /* (i) active vcpus */ 1385fcf252fSNeel Natu int suspend; /* (i) stop VM execution */ 1395fcf252fSNeel Natu volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 1405fcf252fSNeel Natu volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 1415fcf252fSNeel Natu cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ 1425fcf252fSNeel Natu cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ 1435fcf252fSNeel Natu void *rendezvous_arg; /* (x) rendezvous func/arg */ 1445b8a8cd1SNeel Natu vm_rendezvous_func_t rendezvous_func; 1455fcf252fSNeel Natu struct mtx rendezvous_mtx; /* (o) rendezvous lock */ 1465fcf252fSNeel Natu int num_mem_segs; /* (o) guest memory segments */ 1475fcf252fSNeel Natu struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; 1485fcf252fSNeel Natu struct vmspace *vmspace; /* (o) guest's address space */ 1495fcf252fSNeel Natu char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 1505fcf252fSNeel Natu struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ 151366f6083SPeter Grehan }; 152366f6083SPeter Grehan 153d5408b1dSNeel Natu static int vmm_initialized; 154d5408b1dSNeel Natu 155366f6083SPeter Grehan static struct vmm_ops *ops; 156add611fdSNeel Natu #define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) 157366f6083SPeter Grehan #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 15863e62d39SJohn Baldwin #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 159366f6083SPeter Grehan 160318224bbSNeel Natu #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 161b15a09c0SNeel Natu #define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \ 162b15a09c0SNeel Natu (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO) 163366f6083SPeter Grehan #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 164318224bbSNeel Natu #define VMSPACE_ALLOC(min, max) \ 165318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 166318224bbSNeel Natu #define VMSPACE_FREE(vmspace) \ 167318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 168366f6083SPeter Grehan #define VMGETREG(vmi, vcpu, num, retval) \ 169366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 170366f6083SPeter Grehan #define VMSETREG(vmi, vcpu, num, val) \ 171366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 172366f6083SPeter Grehan #define VMGETDESC(vmi, vcpu, num, desc) \ 173366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 174366f6083SPeter Grehan #define VMSETDESC(vmi, vcpu, num, desc) \ 175366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 176366f6083SPeter Grehan #define VMGETCAP(vmi, vcpu, num, retval) \ 177366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 178366f6083SPeter Grehan #define VMSETCAP(vmi, vcpu, num, val) \ 179366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 180de5ea6b6SNeel Natu #define VLAPIC_INIT(vmi, vcpu) \ 181de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) 182de5ea6b6SNeel Natu #define VLAPIC_CLEANUP(vmi, vlapic) \ 183de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) 184366f6083SPeter Grehan 185014a52f3SNeel Natu #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 186014a52f3SNeel Natu #define fpu_stop_emulating() clts() 187366f6083SPeter Grehan 188366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm"); 189366f6083SPeter Grehan 190366f6083SPeter Grehan /* statistics */ 19161592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 192366f6083SPeter Grehan 193add611fdSNeel Natu SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 194add611fdSNeel Natu 195055fc2cbSNeel Natu /* 196055fc2cbSNeel Natu * Halt the guest if all vcpus are executing a HLT instruction with 197055fc2cbSNeel Natu * interrupts disabled. 198055fc2cbSNeel Natu */ 199055fc2cbSNeel Natu static int halt_detection_enabled = 1; 200055fc2cbSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 201055fc2cbSNeel Natu &halt_detection_enabled, 0, 202055fc2cbSNeel Natu "Halt VM if all vcpus execute HLT with interrupts disabled"); 203055fc2cbSNeel Natu 204add611fdSNeel Natu static int vmm_ipinum; 205add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 206add611fdSNeel Natu "IPI vector used for vcpu notifications"); 207add611fdSNeel Natu 208366f6083SPeter Grehan static void 2095fcf252fSNeel Natu vcpu_cleanup(struct vm *vm, int i, bool destroy) 210366f6083SPeter Grehan { 211de5ea6b6SNeel Natu struct vcpu *vcpu = &vm->vcpu[i]; 212de5ea6b6SNeel Natu 213de5ea6b6SNeel Natu VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); 2145fcf252fSNeel Natu if (destroy) { 215366f6083SPeter Grehan vmm_stat_free(vcpu->stats); 21638f1b189SPeter Grehan fpu_save_area_free(vcpu->guestfpu); 217366f6083SPeter Grehan } 2185fcf252fSNeel Natu } 219366f6083SPeter Grehan 220366f6083SPeter Grehan static void 2215fcf252fSNeel Natu vcpu_init(struct vm *vm, int vcpu_id, bool create) 222366f6083SPeter Grehan { 223366f6083SPeter Grehan struct vcpu *vcpu; 224366f6083SPeter Grehan 2255fcf252fSNeel Natu KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU, 2265fcf252fSNeel Natu ("vcpu_init: invalid vcpu %d", vcpu_id)); 2275fcf252fSNeel Natu 228366f6083SPeter Grehan vcpu = &vm->vcpu[vcpu_id]; 229366f6083SPeter Grehan 2305fcf252fSNeel Natu if (create) { 2315fcf252fSNeel Natu KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " 2325fcf252fSNeel Natu "initialized", vcpu_id)); 23375dd3366SNeel Natu vcpu_lock_init(vcpu); 2345fcf252fSNeel Natu vcpu->state = VCPU_IDLE; 23575dd3366SNeel Natu vcpu->hostcpu = NOCPU; 2365fcf252fSNeel Natu vcpu->guestfpu = fpu_save_area_alloc(); 2375fcf252fSNeel Natu vcpu->stats = vmm_stat_alloc(); 2385fcf252fSNeel Natu } 2395fcf252fSNeel Natu 240de5ea6b6SNeel Natu vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); 24152e5c8a2SNeel Natu vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); 242091d4532SNeel Natu vcpu->exitintinfo = 0; 2435fcf252fSNeel Natu vcpu->nmi_pending = 0; 2445fcf252fSNeel Natu vcpu->extint_pending = 0; 2455fcf252fSNeel Natu vcpu->exception_pending = 0; 246abb023fbSJohn Baldwin vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 24738f1b189SPeter Grehan fpu_save_area_reset(vcpu->guestfpu); 2485fcf252fSNeel Natu vmm_stat_init(vcpu->stats); 249366f6083SPeter Grehan } 250366f6083SPeter Grehan 25198ed632cSNeel Natu struct vm_exit * 25298ed632cSNeel Natu vm_exitinfo(struct vm *vm, int cpuid) 25398ed632cSNeel Natu { 25498ed632cSNeel Natu struct vcpu *vcpu; 25598ed632cSNeel Natu 25698ed632cSNeel Natu if (cpuid < 0 || cpuid >= VM_MAXCPU) 25798ed632cSNeel Natu panic("vm_exitinfo: invalid cpuid %d", cpuid); 25898ed632cSNeel Natu 25998ed632cSNeel Natu vcpu = &vm->vcpu[cpuid]; 26098ed632cSNeel Natu 26198ed632cSNeel Natu return (&vcpu->exitinfo); 26298ed632cSNeel Natu } 26398ed632cSNeel Natu 26463e62d39SJohn Baldwin static void 26563e62d39SJohn Baldwin vmm_resume(void) 26663e62d39SJohn Baldwin { 26763e62d39SJohn Baldwin VMM_RESUME(); 26863e62d39SJohn Baldwin } 26963e62d39SJohn Baldwin 270366f6083SPeter Grehan static int 271366f6083SPeter Grehan vmm_init(void) 272366f6083SPeter Grehan { 273366f6083SPeter Grehan int error; 274366f6083SPeter Grehan 275b01c2033SNeel Natu vmm_host_state_init(); 276add611fdSNeel Natu 277add611fdSNeel Natu vmm_ipinum = vmm_ipi_alloc(); 278add611fdSNeel Natu if (vmm_ipinum == 0) 279add611fdSNeel Natu vmm_ipinum = IPI_AST; 280366f6083SPeter Grehan 281366f6083SPeter Grehan error = vmm_mem_init(); 282366f6083SPeter Grehan if (error) 283366f6083SPeter Grehan return (error); 284366f6083SPeter Grehan 285366f6083SPeter Grehan if (vmm_is_intel()) 286366f6083SPeter Grehan ops = &vmm_ops_intel; 287366f6083SPeter Grehan else if (vmm_is_amd()) 288366f6083SPeter Grehan ops = &vmm_ops_amd; 289366f6083SPeter Grehan else 290366f6083SPeter Grehan return (ENXIO); 291366f6083SPeter Grehan 29263e62d39SJohn Baldwin vmm_resume_p = vmm_resume; 293366f6083SPeter Grehan 294add611fdSNeel Natu return (VMM_INIT(vmm_ipinum)); 295366f6083SPeter Grehan } 296366f6083SPeter Grehan 297366f6083SPeter Grehan static int 298366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg) 299366f6083SPeter Grehan { 300366f6083SPeter Grehan int error; 301366f6083SPeter Grehan 302366f6083SPeter Grehan switch (what) { 303366f6083SPeter Grehan case MOD_LOAD: 304366f6083SPeter Grehan vmmdev_init(); 30551f45d01SNeel Natu if (ppt_avail_devices() > 0) 306366f6083SPeter Grehan iommu_init(); 307366f6083SPeter Grehan error = vmm_init(); 308d5408b1dSNeel Natu if (error == 0) 309d5408b1dSNeel Natu vmm_initialized = 1; 310366f6083SPeter Grehan break; 311366f6083SPeter Grehan case MOD_UNLOAD: 312cdc5b9e7SNeel Natu error = vmmdev_cleanup(); 313cdc5b9e7SNeel Natu if (error == 0) { 31463e62d39SJohn Baldwin vmm_resume_p = NULL; 315366f6083SPeter Grehan iommu_cleanup(); 316add611fdSNeel Natu if (vmm_ipinum != IPI_AST) 317add611fdSNeel Natu vmm_ipi_free(vmm_ipinum); 318366f6083SPeter Grehan error = VMM_CLEANUP(); 31981ef6611SPeter Grehan /* 32081ef6611SPeter Grehan * Something bad happened - prevent new 32181ef6611SPeter Grehan * VMs from being created 32281ef6611SPeter Grehan */ 32381ef6611SPeter Grehan if (error) 324d5408b1dSNeel Natu vmm_initialized = 0; 32581ef6611SPeter Grehan } 326366f6083SPeter Grehan break; 327366f6083SPeter Grehan default: 328366f6083SPeter Grehan error = 0; 329366f6083SPeter Grehan break; 330366f6083SPeter Grehan } 331366f6083SPeter Grehan return (error); 332366f6083SPeter Grehan } 333366f6083SPeter Grehan 334366f6083SPeter Grehan static moduledata_t vmm_kmod = { 335366f6083SPeter Grehan "vmm", 336366f6083SPeter Grehan vmm_handler, 337366f6083SPeter Grehan NULL 338366f6083SPeter Grehan }; 339366f6083SPeter Grehan 340366f6083SPeter Grehan /* 341e3f0800bSNeel Natu * vmm initialization has the following dependencies: 342e3f0800bSNeel Natu * 343e3f0800bSNeel Natu * - iommu initialization must happen after the pci passthru driver has had 344e3f0800bSNeel Natu * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 345e3f0800bSNeel Natu * 346e3f0800bSNeel Natu * - VT-x initialization requires smp_rendezvous() and therefore must happen 347e3f0800bSNeel Natu * after SMP is fully functional (after SI_SUB_SMP). 348366f6083SPeter Grehan */ 349e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 350366f6083SPeter Grehan MODULE_VERSION(vmm, 1); 351366f6083SPeter Grehan 3525fcf252fSNeel Natu static void 3535fcf252fSNeel Natu vm_init(struct vm *vm, bool create) 3545fcf252fSNeel Natu { 3555fcf252fSNeel Natu int i; 3565fcf252fSNeel Natu 3575fcf252fSNeel Natu vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace)); 3585fcf252fSNeel Natu vm->iommu = NULL; 3595fcf252fSNeel Natu vm->vioapic = vioapic_init(vm); 3605fcf252fSNeel Natu vm->vhpet = vhpet_init(vm); 3615fcf252fSNeel Natu vm->vatpic = vatpic_init(vm); 3625fcf252fSNeel Natu vm->vatpit = vatpit_init(vm); 3635fcf252fSNeel Natu 3645fcf252fSNeel Natu CPU_ZERO(&vm->active_cpus); 3655fcf252fSNeel Natu 3665fcf252fSNeel Natu vm->suspend = 0; 3675fcf252fSNeel Natu CPU_ZERO(&vm->suspended_cpus); 3685fcf252fSNeel Natu 3695fcf252fSNeel Natu for (i = 0; i < VM_MAXCPU; i++) 3705fcf252fSNeel Natu vcpu_init(vm, i, create); 3715fcf252fSNeel Natu } 3725fcf252fSNeel Natu 373d5408b1dSNeel Natu int 374d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm) 375366f6083SPeter Grehan { 376366f6083SPeter Grehan struct vm *vm; 377318224bbSNeel Natu struct vmspace *vmspace; 378366f6083SPeter Grehan 379d5408b1dSNeel Natu /* 380d5408b1dSNeel Natu * If vmm.ko could not be successfully initialized then don't attempt 381d5408b1dSNeel Natu * to create the virtual machine. 382d5408b1dSNeel Natu */ 383d5408b1dSNeel Natu if (!vmm_initialized) 384d5408b1dSNeel Natu return (ENXIO); 385d5408b1dSNeel Natu 386366f6083SPeter Grehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 387d5408b1dSNeel Natu return (EINVAL); 388366f6083SPeter Grehan 389318224bbSNeel Natu vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 390318224bbSNeel Natu if (vmspace == NULL) 391318224bbSNeel Natu return (ENOMEM); 392318224bbSNeel Natu 393366f6083SPeter Grehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 394366f6083SPeter Grehan strcpy(vm->name, name); 3955fcf252fSNeel Natu vm->num_mem_segs = 0; 39688c4b8d1SNeel Natu vm->vmspace = vmspace; 3975b8a8cd1SNeel Natu mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 398366f6083SPeter Grehan 3995fcf252fSNeel Natu vm_init(vm, true); 400366f6083SPeter Grehan 401d5408b1dSNeel Natu *retvm = vm; 402d5408b1dSNeel Natu return (0); 403366f6083SPeter Grehan } 404366f6083SPeter Grehan 405f7d51510SNeel Natu static void 406318224bbSNeel Natu vm_free_mem_seg(struct vm *vm, struct mem_seg *seg) 407f7d51510SNeel Natu { 4087ce04d0aSNeel Natu 409318224bbSNeel Natu if (seg->object != NULL) 410318224bbSNeel Natu vmm_mem_free(vm->vmspace, seg->gpa, seg->len); 411f7d51510SNeel Natu 412318224bbSNeel Natu bzero(seg, sizeof(*seg)); 413f7d51510SNeel Natu } 414f7d51510SNeel Natu 4155fcf252fSNeel Natu static void 4165fcf252fSNeel Natu vm_cleanup(struct vm *vm, bool destroy) 417366f6083SPeter Grehan { 418366f6083SPeter Grehan int i; 419366f6083SPeter Grehan 420366f6083SPeter Grehan ppt_unassign_all(vm); 421366f6083SPeter Grehan 422318224bbSNeel Natu if (vm->iommu != NULL) 423318224bbSNeel Natu iommu_destroy_domain(vm->iommu); 424318224bbSNeel Natu 425e883c9bbSTycho Nightingale vatpit_cleanup(vm->vatpit); 42608e3ff32SNeel Natu vhpet_cleanup(vm->vhpet); 427762fd208STycho Nightingale vatpic_cleanup(vm->vatpic); 42808e3ff32SNeel Natu vioapic_cleanup(vm->vioapic); 42908e3ff32SNeel Natu 4305fcf252fSNeel Natu for (i = 0; i < VM_MAXCPU; i++) 4315fcf252fSNeel Natu vcpu_cleanup(vm, i, destroy); 4325fcf252fSNeel Natu 4335fcf252fSNeel Natu VMCLEANUP(vm->cookie); 4345fcf252fSNeel Natu 4355fcf252fSNeel Natu if (destroy) { 436366f6083SPeter Grehan for (i = 0; i < vm->num_mem_segs; i++) 437f7d51510SNeel Natu vm_free_mem_seg(vm, &vm->mem_segs[i]); 438f7d51510SNeel Natu 439f7d51510SNeel Natu vm->num_mem_segs = 0; 440366f6083SPeter Grehan 441318224bbSNeel Natu VMSPACE_FREE(vm->vmspace); 4425fcf252fSNeel Natu vm->vmspace = NULL; 4435fcf252fSNeel Natu } 4445fcf252fSNeel Natu } 445366f6083SPeter Grehan 4465fcf252fSNeel Natu void 4475fcf252fSNeel Natu vm_destroy(struct vm *vm) 4485fcf252fSNeel Natu { 4495fcf252fSNeel Natu vm_cleanup(vm, true); 450366f6083SPeter Grehan free(vm, M_VM); 451366f6083SPeter Grehan } 452366f6083SPeter Grehan 4535fcf252fSNeel Natu int 4545fcf252fSNeel Natu vm_reinit(struct vm *vm) 4555fcf252fSNeel Natu { 4565fcf252fSNeel Natu int error; 4575fcf252fSNeel Natu 4585fcf252fSNeel Natu /* 4595fcf252fSNeel Natu * A virtual machine can be reset only if all vcpus are suspended. 4605fcf252fSNeel Natu */ 4615fcf252fSNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 4625fcf252fSNeel Natu vm_cleanup(vm, false); 4635fcf252fSNeel Natu vm_init(vm, false); 4645fcf252fSNeel Natu error = 0; 4655fcf252fSNeel Natu } else { 4665fcf252fSNeel Natu error = EBUSY; 4675fcf252fSNeel Natu } 4685fcf252fSNeel Natu 4695fcf252fSNeel Natu return (error); 4705fcf252fSNeel Natu } 4715fcf252fSNeel Natu 472366f6083SPeter Grehan const char * 473366f6083SPeter Grehan vm_name(struct vm *vm) 474366f6083SPeter Grehan { 475366f6083SPeter Grehan return (vm->name); 476366f6083SPeter Grehan } 477366f6083SPeter Grehan 478366f6083SPeter Grehan int 479366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 480366f6083SPeter Grehan { 481318224bbSNeel Natu vm_object_t obj; 482366f6083SPeter Grehan 483318224bbSNeel Natu if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 484318224bbSNeel Natu return (ENOMEM); 485318224bbSNeel Natu else 486318224bbSNeel Natu return (0); 487366f6083SPeter Grehan } 488366f6083SPeter Grehan 489366f6083SPeter Grehan int 490366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 491366f6083SPeter Grehan { 492366f6083SPeter Grehan 493318224bbSNeel Natu vmm_mmio_free(vm->vmspace, gpa, len); 494318224bbSNeel Natu return (0); 495366f6083SPeter Grehan } 496366f6083SPeter Grehan 497318224bbSNeel Natu boolean_t 498318224bbSNeel Natu vm_mem_allocated(struct vm *vm, vm_paddr_t gpa) 499366f6083SPeter Grehan { 500341f19c9SNeel Natu int i; 501341f19c9SNeel Natu vm_paddr_t gpabase, gpalimit; 502341f19c9SNeel Natu 503341f19c9SNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 504341f19c9SNeel Natu gpabase = vm->mem_segs[i].gpa; 505341f19c9SNeel Natu gpalimit = gpabase + vm->mem_segs[i].len; 506341f19c9SNeel Natu if (gpa >= gpabase && gpa < gpalimit) 507318224bbSNeel Natu return (TRUE); /* 'gpa' is regular memory */ 508341f19c9SNeel Natu } 509341f19c9SNeel Natu 510318224bbSNeel Natu if (ppt_is_mmio(vm, gpa)) 511318224bbSNeel Natu return (TRUE); /* 'gpa' is pci passthru mmio */ 512318224bbSNeel Natu 513318224bbSNeel Natu return (FALSE); 514341f19c9SNeel Natu } 515341f19c9SNeel Natu 516341f19c9SNeel Natu int 517341f19c9SNeel Natu vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 518341f19c9SNeel Natu { 519318224bbSNeel Natu int available, allocated; 520318224bbSNeel Natu struct mem_seg *seg; 521318224bbSNeel Natu vm_object_t object; 522318224bbSNeel Natu vm_paddr_t g; 523366f6083SPeter Grehan 524341f19c9SNeel Natu if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 525341f19c9SNeel Natu return (EINVAL); 526341f19c9SNeel Natu 527341f19c9SNeel Natu available = allocated = 0; 528341f19c9SNeel Natu g = gpa; 529341f19c9SNeel Natu while (g < gpa + len) { 530318224bbSNeel Natu if (vm_mem_allocated(vm, g)) 531341f19c9SNeel Natu allocated++; 532318224bbSNeel Natu else 533318224bbSNeel Natu available++; 534341f19c9SNeel Natu 535341f19c9SNeel Natu g += PAGE_SIZE; 536341f19c9SNeel Natu } 537341f19c9SNeel Natu 538366f6083SPeter Grehan /* 539341f19c9SNeel Natu * If there are some allocated and some available pages in the address 540341f19c9SNeel Natu * range then it is an error. 541366f6083SPeter Grehan */ 542341f19c9SNeel Natu if (allocated && available) 543341f19c9SNeel Natu return (EINVAL); 544341f19c9SNeel Natu 545341f19c9SNeel Natu /* 546341f19c9SNeel Natu * If the entire address range being requested has already been 547341f19c9SNeel Natu * allocated then there isn't anything more to do. 548341f19c9SNeel Natu */ 549341f19c9SNeel Natu if (allocated && available == 0) 550341f19c9SNeel Natu return (0); 551366f6083SPeter Grehan 552366f6083SPeter Grehan if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 553366f6083SPeter Grehan return (E2BIG); 554366f6083SPeter Grehan 555f7d51510SNeel Natu seg = &vm->mem_segs[vm->num_mem_segs]; 556366f6083SPeter Grehan 557318224bbSNeel Natu if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) 558318224bbSNeel Natu return (ENOMEM); 559318224bbSNeel Natu 560f7d51510SNeel Natu seg->gpa = gpa; 561318224bbSNeel Natu seg->len = len; 562318224bbSNeel Natu seg->object = object; 563318224bbSNeel Natu seg->wired = FALSE; 5647ce04d0aSNeel Natu 565366f6083SPeter Grehan vm->num_mem_segs++; 566341f19c9SNeel Natu 567366f6083SPeter Grehan return (0); 568366f6083SPeter Grehan } 569366f6083SPeter Grehan 570477867a0SNeel Natu static vm_paddr_t 571477867a0SNeel Natu vm_maxmem(struct vm *vm) 572477867a0SNeel Natu { 573477867a0SNeel Natu int i; 574477867a0SNeel Natu vm_paddr_t gpa, maxmem; 575477867a0SNeel Natu 576477867a0SNeel Natu maxmem = 0; 577477867a0SNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 578477867a0SNeel Natu gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len; 579477867a0SNeel Natu if (gpa > maxmem) 580477867a0SNeel Natu maxmem = gpa; 581477867a0SNeel Natu } 582477867a0SNeel Natu return (maxmem); 583477867a0SNeel Natu } 584477867a0SNeel Natu 585318224bbSNeel Natu static void 586318224bbSNeel Natu vm_gpa_unwire(struct vm *vm) 587366f6083SPeter Grehan { 588318224bbSNeel Natu int i, rv; 589318224bbSNeel Natu struct mem_seg *seg; 5904db4fb2cSNeel Natu 591318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 592318224bbSNeel Natu seg = &vm->mem_segs[i]; 593318224bbSNeel Natu if (!seg->wired) 594318224bbSNeel Natu continue; 595366f6083SPeter Grehan 596318224bbSNeel Natu rv = vm_map_unwire(&vm->vmspace->vm_map, 597318224bbSNeel Natu seg->gpa, seg->gpa + seg->len, 598318224bbSNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 599318224bbSNeel Natu KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " 600318224bbSNeel Natu "%#lx/%ld could not be unwired: %d", 601318224bbSNeel Natu vm_name(vm), seg->gpa, seg->len, rv)); 602318224bbSNeel Natu 603318224bbSNeel Natu seg->wired = FALSE; 604318224bbSNeel Natu } 605318224bbSNeel Natu } 606318224bbSNeel Natu 607318224bbSNeel Natu static int 608318224bbSNeel Natu vm_gpa_wire(struct vm *vm) 609318224bbSNeel Natu { 610318224bbSNeel Natu int i, rv; 611318224bbSNeel Natu struct mem_seg *seg; 612318224bbSNeel Natu 613318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 614318224bbSNeel Natu seg = &vm->mem_segs[i]; 615318224bbSNeel Natu if (seg->wired) 616318224bbSNeel Natu continue; 617318224bbSNeel Natu 618318224bbSNeel Natu /* XXX rlimits? */ 619318224bbSNeel Natu rv = vm_map_wire(&vm->vmspace->vm_map, 620318224bbSNeel Natu seg->gpa, seg->gpa + seg->len, 621318224bbSNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 622318224bbSNeel Natu if (rv != KERN_SUCCESS) 623318224bbSNeel Natu break; 624318224bbSNeel Natu 625318224bbSNeel Natu seg->wired = TRUE; 626318224bbSNeel Natu } 627318224bbSNeel Natu 628318224bbSNeel Natu if (i < vm->num_mem_segs) { 629318224bbSNeel Natu /* 630318224bbSNeel Natu * Undo the wiring before returning an error. 631318224bbSNeel Natu */ 632318224bbSNeel Natu vm_gpa_unwire(vm); 633318224bbSNeel Natu return (EAGAIN); 634318224bbSNeel Natu } 635318224bbSNeel Natu 636318224bbSNeel Natu return (0); 637318224bbSNeel Natu } 638318224bbSNeel Natu 639318224bbSNeel Natu static void 640318224bbSNeel Natu vm_iommu_modify(struct vm *vm, boolean_t map) 641318224bbSNeel Natu { 642318224bbSNeel Natu int i, sz; 643318224bbSNeel Natu vm_paddr_t gpa, hpa; 644318224bbSNeel Natu struct mem_seg *seg; 645318224bbSNeel Natu void *vp, *cookie, *host_domain; 646318224bbSNeel Natu 647318224bbSNeel Natu sz = PAGE_SIZE; 648318224bbSNeel Natu host_domain = iommu_host_domain(); 649318224bbSNeel Natu 650318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 651318224bbSNeel Natu seg = &vm->mem_segs[i]; 652318224bbSNeel Natu KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", 653318224bbSNeel Natu vm_name(vm), seg->gpa, seg->len)); 654318224bbSNeel Natu 655318224bbSNeel Natu gpa = seg->gpa; 656318224bbSNeel Natu while (gpa < seg->gpa + seg->len) { 657318224bbSNeel Natu vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, 658318224bbSNeel Natu &cookie); 659318224bbSNeel Natu KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 660318224bbSNeel Natu vm_name(vm), gpa)); 661318224bbSNeel Natu 662318224bbSNeel Natu vm_gpa_release(cookie); 663318224bbSNeel Natu 664318224bbSNeel Natu hpa = DMAP_TO_PHYS((uintptr_t)vp); 665318224bbSNeel Natu if (map) { 666318224bbSNeel Natu iommu_create_mapping(vm->iommu, gpa, hpa, sz); 667318224bbSNeel Natu iommu_remove_mapping(host_domain, hpa, sz); 668318224bbSNeel Natu } else { 669318224bbSNeel Natu iommu_remove_mapping(vm->iommu, gpa, sz); 670318224bbSNeel Natu iommu_create_mapping(host_domain, hpa, hpa, sz); 671318224bbSNeel Natu } 672318224bbSNeel Natu 673318224bbSNeel Natu gpa += PAGE_SIZE; 674318224bbSNeel Natu } 675318224bbSNeel Natu } 676318224bbSNeel Natu 677318224bbSNeel Natu /* 678318224bbSNeel Natu * Invalidate the cached translations associated with the domain 679318224bbSNeel Natu * from which pages were removed. 680318224bbSNeel Natu */ 681318224bbSNeel Natu if (map) 682318224bbSNeel Natu iommu_invalidate_tlb(host_domain); 683318224bbSNeel Natu else 684318224bbSNeel Natu iommu_invalidate_tlb(vm->iommu); 685318224bbSNeel Natu } 686318224bbSNeel Natu 687318224bbSNeel Natu #define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 688318224bbSNeel Natu #define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 689318224bbSNeel Natu 690318224bbSNeel Natu int 691318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 692318224bbSNeel Natu { 693318224bbSNeel Natu int error; 694318224bbSNeel Natu 695318224bbSNeel Natu error = ppt_unassign_device(vm, bus, slot, func); 696318224bbSNeel Natu if (error) 697318224bbSNeel Natu return (error); 698318224bbSNeel Natu 69951f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 700318224bbSNeel Natu vm_iommu_unmap(vm); 701318224bbSNeel Natu vm_gpa_unwire(vm); 702318224bbSNeel Natu } 703318224bbSNeel Natu return (0); 704318224bbSNeel Natu } 705318224bbSNeel Natu 706318224bbSNeel Natu int 707318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 708318224bbSNeel Natu { 709318224bbSNeel Natu int error; 710318224bbSNeel Natu vm_paddr_t maxaddr; 711318224bbSNeel Natu 712318224bbSNeel Natu /* 713318224bbSNeel Natu * Virtual machines with pci passthru devices get special treatment: 714318224bbSNeel Natu * - the guest physical memory is wired 715318224bbSNeel Natu * - the iommu is programmed to do the 'gpa' to 'hpa' translation 716318224bbSNeel Natu * 717318224bbSNeel Natu * We need to do this before the first pci passthru device is attached. 718318224bbSNeel Natu */ 71951f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 720318224bbSNeel Natu KASSERT(vm->iommu == NULL, 721318224bbSNeel Natu ("vm_assign_pptdev: iommu must be NULL")); 722477867a0SNeel Natu maxaddr = vm_maxmem(vm); 723318224bbSNeel Natu vm->iommu = iommu_create_domain(maxaddr); 724318224bbSNeel Natu 725318224bbSNeel Natu error = vm_gpa_wire(vm); 726318224bbSNeel Natu if (error) 727318224bbSNeel Natu return (error); 728318224bbSNeel Natu 729318224bbSNeel Natu vm_iommu_map(vm); 730318224bbSNeel Natu } 731318224bbSNeel Natu 732318224bbSNeel Natu error = ppt_assign_device(vm, bus, slot, func); 733318224bbSNeel Natu return (error); 734318224bbSNeel Natu } 735318224bbSNeel Natu 736318224bbSNeel Natu void * 737318224bbSNeel Natu vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 738318224bbSNeel Natu void **cookie) 739318224bbSNeel Natu { 740318224bbSNeel Natu int count, pageoff; 741318224bbSNeel Natu vm_page_t m; 742318224bbSNeel Natu 743318224bbSNeel Natu pageoff = gpa & PAGE_MASK; 744318224bbSNeel Natu if (len > PAGE_SIZE - pageoff) 745318224bbSNeel Natu panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 746318224bbSNeel Natu 747318224bbSNeel Natu count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 748318224bbSNeel Natu trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 749318224bbSNeel Natu 750318224bbSNeel Natu if (count == 1) { 751318224bbSNeel Natu *cookie = m; 752318224bbSNeel Natu return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 753318224bbSNeel Natu } else { 754318224bbSNeel Natu *cookie = NULL; 755318224bbSNeel Natu return (NULL); 756318224bbSNeel Natu } 757318224bbSNeel Natu } 758318224bbSNeel Natu 759318224bbSNeel Natu void 760318224bbSNeel Natu vm_gpa_release(void *cookie) 761318224bbSNeel Natu { 762318224bbSNeel Natu vm_page_t m = cookie; 763318224bbSNeel Natu 764318224bbSNeel Natu vm_page_lock(m); 765318224bbSNeel Natu vm_page_unhold(m); 766318224bbSNeel Natu vm_page_unlock(m); 767366f6083SPeter Grehan } 768366f6083SPeter Grehan 769366f6083SPeter Grehan int 770366f6083SPeter Grehan vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 771366f6083SPeter Grehan struct vm_memory_segment *seg) 772366f6083SPeter Grehan { 773366f6083SPeter Grehan int i; 774366f6083SPeter Grehan 775366f6083SPeter Grehan for (i = 0; i < vm->num_mem_segs; i++) { 776366f6083SPeter Grehan if (gpabase == vm->mem_segs[i].gpa) { 777318224bbSNeel Natu seg->gpa = vm->mem_segs[i].gpa; 778318224bbSNeel Natu seg->len = vm->mem_segs[i].len; 779318224bbSNeel Natu seg->wired = vm->mem_segs[i].wired; 780366f6083SPeter Grehan return (0); 781366f6083SPeter Grehan } 782366f6083SPeter Grehan } 783366f6083SPeter Grehan return (-1); 784366f6083SPeter Grehan } 785366f6083SPeter Grehan 786366f6083SPeter Grehan int 787318224bbSNeel Natu vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 788318224bbSNeel Natu vm_offset_t *offset, struct vm_object **object) 789318224bbSNeel Natu { 790318224bbSNeel Natu int i; 791318224bbSNeel Natu size_t seg_len; 792318224bbSNeel Natu vm_paddr_t seg_gpa; 793318224bbSNeel Natu vm_object_t seg_obj; 794318224bbSNeel Natu 795318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 796318224bbSNeel Natu if ((seg_obj = vm->mem_segs[i].object) == NULL) 797318224bbSNeel Natu continue; 798318224bbSNeel Natu 799318224bbSNeel Natu seg_gpa = vm->mem_segs[i].gpa; 800318224bbSNeel Natu seg_len = vm->mem_segs[i].len; 801318224bbSNeel Natu 802318224bbSNeel Natu if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { 803318224bbSNeel Natu *offset = gpa - seg_gpa; 804318224bbSNeel Natu *object = seg_obj; 805318224bbSNeel Natu vm_object_reference(seg_obj); 806318224bbSNeel Natu return (0); 807318224bbSNeel Natu } 808318224bbSNeel Natu } 809318224bbSNeel Natu 810318224bbSNeel Natu return (EINVAL); 811318224bbSNeel Natu } 812318224bbSNeel Natu 813318224bbSNeel Natu int 814366f6083SPeter Grehan vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 815366f6083SPeter Grehan { 816366f6083SPeter Grehan 817366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 818366f6083SPeter Grehan return (EINVAL); 819366f6083SPeter Grehan 820366f6083SPeter Grehan if (reg >= VM_REG_LAST) 821366f6083SPeter Grehan return (EINVAL); 822366f6083SPeter Grehan 823366f6083SPeter Grehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 824366f6083SPeter Grehan } 825366f6083SPeter Grehan 826366f6083SPeter Grehan int 827366f6083SPeter Grehan vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 828366f6083SPeter Grehan { 829366f6083SPeter Grehan 830366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 831366f6083SPeter Grehan return (EINVAL); 832366f6083SPeter Grehan 833366f6083SPeter Grehan if (reg >= VM_REG_LAST) 834366f6083SPeter Grehan return (EINVAL); 835366f6083SPeter Grehan 836366f6083SPeter Grehan return (VMSETREG(vm->cookie, vcpu, reg, val)); 837366f6083SPeter Grehan } 838366f6083SPeter Grehan 839366f6083SPeter Grehan static boolean_t 840366f6083SPeter Grehan is_descriptor_table(int reg) 841366f6083SPeter Grehan { 842366f6083SPeter Grehan 843366f6083SPeter Grehan switch (reg) { 844366f6083SPeter Grehan case VM_REG_GUEST_IDTR: 845366f6083SPeter Grehan case VM_REG_GUEST_GDTR: 846366f6083SPeter Grehan return (TRUE); 847366f6083SPeter Grehan default: 848366f6083SPeter Grehan return (FALSE); 849366f6083SPeter Grehan } 850366f6083SPeter Grehan } 851366f6083SPeter Grehan 852366f6083SPeter Grehan static boolean_t 853366f6083SPeter Grehan is_segment_register(int reg) 854366f6083SPeter Grehan { 855366f6083SPeter Grehan 856366f6083SPeter Grehan switch (reg) { 857366f6083SPeter Grehan case VM_REG_GUEST_ES: 858366f6083SPeter Grehan case VM_REG_GUEST_CS: 859366f6083SPeter Grehan case VM_REG_GUEST_SS: 860366f6083SPeter Grehan case VM_REG_GUEST_DS: 861366f6083SPeter Grehan case VM_REG_GUEST_FS: 862366f6083SPeter Grehan case VM_REG_GUEST_GS: 863366f6083SPeter Grehan case VM_REG_GUEST_TR: 864366f6083SPeter Grehan case VM_REG_GUEST_LDTR: 865366f6083SPeter Grehan return (TRUE); 866366f6083SPeter Grehan default: 867366f6083SPeter Grehan return (FALSE); 868366f6083SPeter Grehan } 869366f6083SPeter Grehan } 870366f6083SPeter Grehan 871366f6083SPeter Grehan int 872366f6083SPeter Grehan vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 873366f6083SPeter Grehan struct seg_desc *desc) 874366f6083SPeter Grehan { 875366f6083SPeter Grehan 876366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 877366f6083SPeter Grehan return (EINVAL); 878366f6083SPeter Grehan 879366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 880366f6083SPeter Grehan return (EINVAL); 881366f6083SPeter Grehan 882366f6083SPeter Grehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 883366f6083SPeter Grehan } 884366f6083SPeter Grehan 885366f6083SPeter Grehan int 886366f6083SPeter Grehan vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 887366f6083SPeter Grehan struct seg_desc *desc) 888366f6083SPeter Grehan { 889366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 890366f6083SPeter Grehan return (EINVAL); 891366f6083SPeter Grehan 892366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 893366f6083SPeter Grehan return (EINVAL); 894366f6083SPeter Grehan 895366f6083SPeter Grehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 896366f6083SPeter Grehan } 897366f6083SPeter Grehan 898366f6083SPeter Grehan static void 899366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu) 900366f6083SPeter Grehan { 901366f6083SPeter Grehan 90238f1b189SPeter Grehan /* flush host state to the pcb */ 90338f1b189SPeter Grehan fpuexit(curthread); 904bd8572e0SNeel Natu 905bd8572e0SNeel Natu /* restore guest FPU state */ 906366f6083SPeter Grehan fpu_stop_emulating(); 90738f1b189SPeter Grehan fpurestore(vcpu->guestfpu); 908bd8572e0SNeel Natu 909abb023fbSJohn Baldwin /* restore guest XCR0 if XSAVE is enabled in the host */ 910abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) 911abb023fbSJohn Baldwin load_xcr(0, vcpu->guest_xcr0); 912abb023fbSJohn Baldwin 913bd8572e0SNeel Natu /* 914bd8572e0SNeel Natu * The FPU is now "dirty" with the guest's state so turn on emulation 915bd8572e0SNeel Natu * to trap any access to the FPU by the host. 916bd8572e0SNeel Natu */ 917bd8572e0SNeel Natu fpu_start_emulating(); 918366f6083SPeter Grehan } 919366f6083SPeter Grehan 920366f6083SPeter Grehan static void 921366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu) 922366f6083SPeter Grehan { 923366f6083SPeter Grehan 924bd8572e0SNeel Natu if ((rcr0() & CR0_TS) == 0) 925bd8572e0SNeel Natu panic("fpu emulation not enabled in host!"); 926bd8572e0SNeel Natu 927abb023fbSJohn Baldwin /* save guest XCR0 and restore host XCR0 */ 928abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) { 929abb023fbSJohn Baldwin vcpu->guest_xcr0 = rxcr(0); 930abb023fbSJohn Baldwin load_xcr(0, vmm_get_host_xcr0()); 931abb023fbSJohn Baldwin } 932abb023fbSJohn Baldwin 933bd8572e0SNeel Natu /* save guest FPU state */ 934bd8572e0SNeel Natu fpu_stop_emulating(); 93538f1b189SPeter Grehan fpusave(vcpu->guestfpu); 936366f6083SPeter Grehan fpu_start_emulating(); 937366f6083SPeter Grehan } 938366f6083SPeter Grehan 93961592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 940f76fc5d4SNeel Natu 941318224bbSNeel Natu static int 942f80330a8SNeel Natu vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 943f80330a8SNeel Natu bool from_idle) 944366f6083SPeter Grehan { 945318224bbSNeel Natu int error; 946366f6083SPeter Grehan 947318224bbSNeel Natu vcpu_assert_locked(vcpu); 948366f6083SPeter Grehan 949f76fc5d4SNeel Natu /* 950f80330a8SNeel Natu * State transitions from the vmmdev_ioctl() must always begin from 951f80330a8SNeel Natu * the VCPU_IDLE state. This guarantees that there is only a single 952f80330a8SNeel Natu * ioctl() operating on a vcpu at any point. 953f80330a8SNeel Natu */ 954f80330a8SNeel Natu if (from_idle) { 955f80330a8SNeel Natu while (vcpu->state != VCPU_IDLE) 956f80330a8SNeel Natu msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 957f80330a8SNeel Natu } else { 958f80330a8SNeel Natu KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 959f80330a8SNeel Natu "vcpu idle state")); 960f80330a8SNeel Natu } 961f80330a8SNeel Natu 962ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 963ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 964ef39d7e9SNeel Natu "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 965ef39d7e9SNeel Natu } else { 966ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 967ef39d7e9SNeel Natu "vcpu that is not running", vcpu->hostcpu)); 968ef39d7e9SNeel Natu } 969ef39d7e9SNeel Natu 970f80330a8SNeel Natu /* 971318224bbSNeel Natu * The following state transitions are allowed: 972318224bbSNeel Natu * IDLE -> FROZEN -> IDLE 973318224bbSNeel Natu * FROZEN -> RUNNING -> FROZEN 974318224bbSNeel Natu * FROZEN -> SLEEPING -> FROZEN 975f76fc5d4SNeel Natu */ 976318224bbSNeel Natu switch (vcpu->state) { 977318224bbSNeel Natu case VCPU_IDLE: 978318224bbSNeel Natu case VCPU_RUNNING: 979318224bbSNeel Natu case VCPU_SLEEPING: 980318224bbSNeel Natu error = (newstate != VCPU_FROZEN); 981318224bbSNeel Natu break; 982318224bbSNeel Natu case VCPU_FROZEN: 983318224bbSNeel Natu error = (newstate == VCPU_FROZEN); 984318224bbSNeel Natu break; 985318224bbSNeel Natu default: 986318224bbSNeel Natu error = 1; 987318224bbSNeel Natu break; 988318224bbSNeel Natu } 989318224bbSNeel Natu 990f80330a8SNeel Natu if (error) 991f80330a8SNeel Natu return (EBUSY); 992318224bbSNeel Natu 993f80330a8SNeel Natu vcpu->state = newstate; 994ef39d7e9SNeel Natu if (newstate == VCPU_RUNNING) 995ef39d7e9SNeel Natu vcpu->hostcpu = curcpu; 996ef39d7e9SNeel Natu else 997ef39d7e9SNeel Natu vcpu->hostcpu = NOCPU; 998ef39d7e9SNeel Natu 999f80330a8SNeel Natu if (newstate == VCPU_IDLE) 1000f80330a8SNeel Natu wakeup(&vcpu->state); 1001f80330a8SNeel Natu 1002f80330a8SNeel Natu return (0); 1003318224bbSNeel Natu } 1004318224bbSNeel Natu 1005318224bbSNeel Natu static void 1006318224bbSNeel Natu vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1007318224bbSNeel Natu { 1008318224bbSNeel Natu int error; 1009318224bbSNeel Natu 1010f80330a8SNeel Natu if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) 1011318224bbSNeel Natu panic("Error %d setting state to %d\n", error, newstate); 1012318224bbSNeel Natu } 1013318224bbSNeel Natu 1014318224bbSNeel Natu static void 1015318224bbSNeel Natu vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1016318224bbSNeel Natu { 1017318224bbSNeel Natu int error; 1018318224bbSNeel Natu 1019f80330a8SNeel Natu if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1020318224bbSNeel Natu panic("Error %d setting state to %d", error, newstate); 1021318224bbSNeel Natu } 1022318224bbSNeel Natu 10235b8a8cd1SNeel Natu static void 10245b8a8cd1SNeel Natu vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) 10255b8a8cd1SNeel Natu { 10265b8a8cd1SNeel Natu 10275b8a8cd1SNeel Natu KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); 10285b8a8cd1SNeel Natu 10295b8a8cd1SNeel Natu /* 10305b8a8cd1SNeel Natu * Update 'rendezvous_func' and execute a write memory barrier to 10315b8a8cd1SNeel Natu * ensure that it is visible across all host cpus. This is not needed 10325b8a8cd1SNeel Natu * for correctness but it does ensure that all the vcpus will notice 10335b8a8cd1SNeel Natu * that the rendezvous is requested immediately. 10345b8a8cd1SNeel Natu */ 10355b8a8cd1SNeel Natu vm->rendezvous_func = func; 10365b8a8cd1SNeel Natu wmb(); 10375b8a8cd1SNeel Natu } 10385b8a8cd1SNeel Natu 10395b8a8cd1SNeel Natu #define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ 10405b8a8cd1SNeel Natu do { \ 10415b8a8cd1SNeel Natu if (vcpuid >= 0) \ 10425b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, fmt); \ 10435b8a8cd1SNeel Natu else \ 10445b8a8cd1SNeel Natu VM_CTR0(vm, fmt); \ 10455b8a8cd1SNeel Natu } while (0) 10465b8a8cd1SNeel Natu 10475b8a8cd1SNeel Natu static void 10485b8a8cd1SNeel Natu vm_handle_rendezvous(struct vm *vm, int vcpuid) 10495b8a8cd1SNeel Natu { 10505b8a8cd1SNeel Natu 10515b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 10525b8a8cd1SNeel Natu ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); 10535b8a8cd1SNeel Natu 10545b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 10555b8a8cd1SNeel Natu while (vm->rendezvous_func != NULL) { 105622d822c6SNeel Natu /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 105722d822c6SNeel Natu CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); 105822d822c6SNeel Natu 10595b8a8cd1SNeel Natu if (vcpuid != -1 && 106022d822c6SNeel Natu CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 106122d822c6SNeel Natu !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 10625b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); 10635b8a8cd1SNeel Natu (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); 10645b8a8cd1SNeel Natu CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 10655b8a8cd1SNeel Natu } 10665b8a8cd1SNeel Natu if (CPU_CMP(&vm->rendezvous_req_cpus, 10675b8a8cd1SNeel Natu &vm->rendezvous_done_cpus) == 0) { 10685b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); 10695b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, NULL); 10705b8a8cd1SNeel Natu wakeup(&vm->rendezvous_func); 10715b8a8cd1SNeel Natu break; 10725b8a8cd1SNeel Natu } 10735b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); 10745b8a8cd1SNeel Natu mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 10755b8a8cd1SNeel Natu "vmrndv", 0); 10765b8a8cd1SNeel Natu } 10775b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 10785b8a8cd1SNeel Natu } 10795b8a8cd1SNeel Natu 1080318224bbSNeel Natu /* 1081318224bbSNeel Natu * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1082318224bbSNeel Natu */ 1083318224bbSNeel Natu static int 1084becd9849SNeel Natu vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) 1085318224bbSNeel Natu { 1086318224bbSNeel Natu struct vcpu *vcpu; 1087c6a0cc2eSNeel Natu const char *wmesg; 1088d1819632SNeel Natu int error, t, vcpu_halted, vm_halted; 1089e50ce2aaSNeel Natu 1090e50ce2aaSNeel Natu KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 1091318224bbSNeel Natu 1092318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1093e50ce2aaSNeel Natu vcpu_halted = 0; 1094e50ce2aaSNeel Natu vm_halted = 0; 1095318224bbSNeel Natu 1096d1819632SNeel Natu /* 1097d1819632SNeel Natu * The typical way to halt a cpu is to execute: "sti; hlt" 1098d1819632SNeel Natu * 1099d1819632SNeel Natu * STI sets RFLAGS.IF to enable interrupts. However, the processor 1100d1819632SNeel Natu * remains in an "interrupt shadow" for an additional instruction 1101d1819632SNeel Natu * following the STI. This guarantees that "sti; hlt" sequence is 1102d1819632SNeel Natu * atomic and a pending interrupt will be recognized after the HLT. 1103d1819632SNeel Natu * 1104d1819632SNeel Natu * After the HLT emulation is done the vcpu is no longer in an 1105d1819632SNeel Natu * interrupt shadow and a pending interrupt can be injected on 1106d1819632SNeel Natu * the next entry into the guest. 1107d1819632SNeel Natu */ 1108d1819632SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0); 1109d1819632SNeel Natu KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", 1110d1819632SNeel Natu __func__, error)); 1111d1819632SNeel Natu 1112f76fc5d4SNeel Natu vcpu_lock(vcpu); 1113c6a0cc2eSNeel Natu while (1) { 1114f76fc5d4SNeel Natu /* 1115f76fc5d4SNeel Natu * Do a final check for pending NMI or interrupts before 1116c6a0cc2eSNeel Natu * really putting this thread to sleep. Also check for 1117c6a0cc2eSNeel Natu * software events that would cause this vcpu to wakeup. 1118f76fc5d4SNeel Natu * 1119c6a0cc2eSNeel Natu * These interrupts/events could have happened after the 1120c6a0cc2eSNeel Natu * vcpu returned from VMRUN() and before it acquired the 1121c6a0cc2eSNeel Natu * vcpu lock above. 1122f76fc5d4SNeel Natu */ 1123c6a0cc2eSNeel Natu if (vm->rendezvous_func != NULL || vm->suspend) 1124c6a0cc2eSNeel Natu break; 1125c6a0cc2eSNeel Natu if (vm_nmi_pending(vm, vcpuid)) 1126c6a0cc2eSNeel Natu break; 1127c6a0cc2eSNeel Natu if (!intr_disabled) { 1128c6a0cc2eSNeel Natu if (vm_extint_pending(vm, vcpuid) || 1129c6a0cc2eSNeel Natu vlapic_pending_intr(vcpu->vlapic, NULL)) { 1130c6a0cc2eSNeel Natu break; 1131c6a0cc2eSNeel Natu } 1132c6a0cc2eSNeel Natu } 1133c6a0cc2eSNeel Natu 1134f008d157SNeel Natu /* Don't go to sleep if the vcpu thread needs to yield */ 1135f008d157SNeel Natu if (vcpu_should_yield(vm, vcpuid)) 1136f008d157SNeel Natu break; 1137f008d157SNeel Natu 1138e50ce2aaSNeel Natu /* 1139e50ce2aaSNeel Natu * Some Linux guests implement "halt" by having all vcpus 1140e50ce2aaSNeel Natu * execute HLT with interrupts disabled. 'halted_cpus' keeps 1141e50ce2aaSNeel Natu * track of the vcpus that have entered this state. When all 1142e50ce2aaSNeel Natu * vcpus enter the halted state the virtual machine is halted. 1143e50ce2aaSNeel Natu */ 1144e50ce2aaSNeel Natu if (intr_disabled) { 1145c6a0cc2eSNeel Natu wmesg = "vmhalt"; 1146e50ce2aaSNeel Natu VCPU_CTR0(vm, vcpuid, "Halted"); 1147055fc2cbSNeel Natu if (!vcpu_halted && halt_detection_enabled) { 1148e50ce2aaSNeel Natu vcpu_halted = 1; 1149e50ce2aaSNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1150e50ce2aaSNeel Natu } 1151e50ce2aaSNeel Natu if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1152e50ce2aaSNeel Natu vm_halted = 1; 1153e50ce2aaSNeel Natu break; 1154e50ce2aaSNeel Natu } 1155e50ce2aaSNeel Natu } else { 1156e50ce2aaSNeel Natu wmesg = "vmidle"; 1157e50ce2aaSNeel Natu } 1158c6a0cc2eSNeel Natu 1159f76fc5d4SNeel Natu t = ticks; 1160318224bbSNeel Natu vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1161f008d157SNeel Natu /* 1162f008d157SNeel Natu * XXX msleep_spin() cannot be interrupted by signals so 1163f008d157SNeel Natu * wake up periodically to check pending signals. 1164f008d157SNeel Natu */ 1165f008d157SNeel Natu msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); 116622d822c6SNeel Natu vcpu_require_state_locked(vcpu, VCPU_FROZEN); 116722d822c6SNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 116822d822c6SNeel Natu } 1169e50ce2aaSNeel Natu 1170e50ce2aaSNeel Natu if (vcpu_halted) 1171e50ce2aaSNeel Natu CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1172e50ce2aaSNeel Natu 117322d822c6SNeel Natu vcpu_unlock(vcpu); 117422d822c6SNeel Natu 1175e50ce2aaSNeel Natu if (vm_halted) 1176e50ce2aaSNeel Natu vm_suspend(vm, VM_SUSPEND_HALT); 1177e50ce2aaSNeel Natu 1178318224bbSNeel Natu return (0); 1179318224bbSNeel Natu } 1180318224bbSNeel Natu 1181318224bbSNeel Natu static int 1182becd9849SNeel Natu vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) 1183318224bbSNeel Natu { 1184318224bbSNeel Natu int rv, ftype; 1185318224bbSNeel Natu struct vm_map *map; 1186318224bbSNeel Natu struct vcpu *vcpu; 1187318224bbSNeel Natu struct vm_exit *vme; 1188318224bbSNeel Natu 1189318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1190318224bbSNeel Natu vme = &vcpu->exitinfo; 1191318224bbSNeel Natu 1192318224bbSNeel Natu ftype = vme->u.paging.fault_type; 1193318224bbSNeel Natu KASSERT(ftype == VM_PROT_READ || 1194318224bbSNeel Natu ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1195318224bbSNeel Natu ("vm_handle_paging: invalid fault_type %d", ftype)); 1196318224bbSNeel Natu 1197318224bbSNeel Natu if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1198318224bbSNeel Natu rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1199318224bbSNeel Natu vme->u.paging.gpa, ftype); 12009d8d8e3eSNeel Natu if (rv == 0) { 12019d8d8e3eSNeel Natu VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", 12029d8d8e3eSNeel Natu ftype == VM_PROT_READ ? "accessed" : "dirty", 12039d8d8e3eSNeel Natu vme->u.paging.gpa); 1204318224bbSNeel Natu goto done; 1205318224bbSNeel Natu } 12069d8d8e3eSNeel Natu } 1207318224bbSNeel Natu 1208318224bbSNeel Natu map = &vm->vmspace->vm_map; 1209318224bbSNeel Natu rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 1210318224bbSNeel Natu 1211513c8d33SNeel Natu VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 1212513c8d33SNeel Natu "ftype = %d", rv, vme->u.paging.gpa, ftype); 1213318224bbSNeel Natu 1214318224bbSNeel Natu if (rv != KERN_SUCCESS) 1215318224bbSNeel Natu return (EFAULT); 1216318224bbSNeel Natu done: 1217318224bbSNeel Natu /* restart execution at the faulting instruction */ 1218318224bbSNeel Natu vme->inst_length = 0; 1219318224bbSNeel Natu 1220318224bbSNeel Natu return (0); 1221318224bbSNeel Natu } 1222318224bbSNeel Natu 1223318224bbSNeel Natu static int 1224becd9849SNeel Natu vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) 1225318224bbSNeel Natu { 1226318224bbSNeel Natu struct vie *vie; 1227318224bbSNeel Natu struct vcpu *vcpu; 1228318224bbSNeel Natu struct vm_exit *vme; 1229e813a873SNeel Natu uint64_t gla, gpa; 1230e813a873SNeel Natu struct vm_guest_paging *paging; 1231565bbb86SNeel Natu mem_region_read_t mread; 1232565bbb86SNeel Natu mem_region_write_t mwrite; 1233f7a9f178SNeel Natu enum vm_cpu_mode cpu_mode; 1234f7a9f178SNeel Natu int cs_d, error; 1235318224bbSNeel Natu 1236318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1237318224bbSNeel Natu vme = &vcpu->exitinfo; 1238318224bbSNeel Natu 1239318224bbSNeel Natu gla = vme->u.inst_emul.gla; 1240318224bbSNeel Natu gpa = vme->u.inst_emul.gpa; 1241f7a9f178SNeel Natu cs_d = vme->u.inst_emul.cs_d; 1242318224bbSNeel Natu vie = &vme->u.inst_emul.vie; 1243e813a873SNeel Natu paging = &vme->u.inst_emul.paging; 1244f7a9f178SNeel Natu cpu_mode = paging->cpu_mode; 1245318224bbSNeel Natu 12469d8d8e3eSNeel Natu VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa); 12479d8d8e3eSNeel Natu 1248318224bbSNeel Natu vie_init(vie); 1249318224bbSNeel Natu 1250318224bbSNeel Natu /* Fetch, decode and emulate the faulting instruction */ 1251e813a873SNeel Natu error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip, 1252e813a873SNeel Natu vme->inst_length, vie); 1253fd949af6SNeel Natu if (error == 1) 1254fd949af6SNeel Natu return (0); /* Resume guest to handle page fault */ 1255fd949af6SNeel Natu else if (error == -1) 1256318224bbSNeel Natu return (EFAULT); 1257fd949af6SNeel Natu else if (error != 0) 1258fd949af6SNeel Natu panic("%s: vmm_fetch_instruction error %d", __func__, error); 1259318224bbSNeel Natu 1260f7a9f178SNeel Natu if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) 1261318224bbSNeel Natu return (EFAULT); 1262318224bbSNeel Natu 126308e3ff32SNeel Natu /* return to userland unless this is an in-kernel emulated device */ 1264565bbb86SNeel Natu if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1265565bbb86SNeel Natu mread = lapic_mmio_read; 1266565bbb86SNeel Natu mwrite = lapic_mmio_write; 1267565bbb86SNeel Natu } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1268565bbb86SNeel Natu mread = vioapic_mmio_read; 1269565bbb86SNeel Natu mwrite = vioapic_mmio_write; 127008e3ff32SNeel Natu } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 127108e3ff32SNeel Natu mread = vhpet_mmio_read; 127208e3ff32SNeel Natu mwrite = vhpet_mmio_write; 1273565bbb86SNeel Natu } else { 1274becd9849SNeel Natu *retu = true; 1275318224bbSNeel Natu return (0); 1276318224bbSNeel Natu } 1277318224bbSNeel Natu 1278d665d229SNeel Natu error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, 1279d665d229SNeel Natu mread, mwrite, retu); 1280318224bbSNeel Natu 1281318224bbSNeel Natu return (error); 1282318224bbSNeel Natu } 1283318224bbSNeel Natu 1284b15a09c0SNeel Natu static int 1285b15a09c0SNeel Natu vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) 1286b15a09c0SNeel Natu { 1287b15a09c0SNeel Natu int i, done; 1288b15a09c0SNeel Natu struct vcpu *vcpu; 1289b15a09c0SNeel Natu 1290b15a09c0SNeel Natu done = 0; 1291b15a09c0SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1292b15a09c0SNeel Natu 1293b15a09c0SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); 1294b15a09c0SNeel Natu 1295b15a09c0SNeel Natu /* 1296b15a09c0SNeel Natu * Wait until all 'active_cpus' have suspended themselves. 1297b15a09c0SNeel Natu * 1298b15a09c0SNeel Natu * Since a VM may be suspended at any time including when one or 1299b15a09c0SNeel Natu * more vcpus are doing a rendezvous we need to call the rendezvous 1300b15a09c0SNeel Natu * handler while we are waiting to prevent a deadlock. 1301b15a09c0SNeel Natu */ 1302b15a09c0SNeel Natu vcpu_lock(vcpu); 1303b15a09c0SNeel Natu while (1) { 1304b15a09c0SNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 1305b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); 1306b15a09c0SNeel Natu break; 1307b15a09c0SNeel Natu } 1308b15a09c0SNeel Natu 1309b15a09c0SNeel Natu if (vm->rendezvous_func == NULL) { 1310b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); 1311b15a09c0SNeel Natu vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1312b15a09c0SNeel Natu msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1313b15a09c0SNeel Natu vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1314b15a09c0SNeel Natu } else { 1315b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); 1316b15a09c0SNeel Natu vcpu_unlock(vcpu); 1317b15a09c0SNeel Natu vm_handle_rendezvous(vm, vcpuid); 1318b15a09c0SNeel Natu vcpu_lock(vcpu); 1319b15a09c0SNeel Natu } 1320b15a09c0SNeel Natu } 1321b15a09c0SNeel Natu vcpu_unlock(vcpu); 1322b15a09c0SNeel Natu 1323b15a09c0SNeel Natu /* 1324b15a09c0SNeel Natu * Wakeup the other sleeping vcpus and return to userspace. 1325b15a09c0SNeel Natu */ 1326b15a09c0SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1327b15a09c0SNeel Natu if (CPU_ISSET(i, &vm->suspended_cpus)) { 1328b15a09c0SNeel Natu vcpu_notify_event(vm, i, false); 1329b15a09c0SNeel Natu } 1330b15a09c0SNeel Natu } 1331b15a09c0SNeel Natu 1332b15a09c0SNeel Natu *retu = true; 1333b15a09c0SNeel Natu return (0); 1334b15a09c0SNeel Natu } 1335b15a09c0SNeel Natu 1336b15a09c0SNeel Natu int 1337f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how) 1338b15a09c0SNeel Natu { 1339f0fdcfe2SNeel Natu int i; 1340b15a09c0SNeel Natu 1341f0fdcfe2SNeel Natu if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1342f0fdcfe2SNeel Natu return (EINVAL); 1343f0fdcfe2SNeel Natu 1344f0fdcfe2SNeel Natu if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1345f0fdcfe2SNeel Natu VM_CTR2(vm, "virtual machine already suspended %d/%d", 1346f0fdcfe2SNeel Natu vm->suspend, how); 1347b15a09c0SNeel Natu return (EALREADY); 1348b15a09c0SNeel Natu } 1349f0fdcfe2SNeel Natu 1350f0fdcfe2SNeel Natu VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1351f0fdcfe2SNeel Natu 1352f0fdcfe2SNeel Natu /* 1353f0fdcfe2SNeel Natu * Notify all active vcpus that they are now suspended. 1354f0fdcfe2SNeel Natu */ 1355f0fdcfe2SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1356f0fdcfe2SNeel Natu if (CPU_ISSET(i, &vm->active_cpus)) 1357f0fdcfe2SNeel Natu vcpu_notify_event(vm, i, false); 1358f0fdcfe2SNeel Natu } 1359f0fdcfe2SNeel Natu 1360f0fdcfe2SNeel Natu return (0); 1361f0fdcfe2SNeel Natu } 1362f0fdcfe2SNeel Natu 1363f0fdcfe2SNeel Natu void 1364f0fdcfe2SNeel Natu vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) 1365f0fdcfe2SNeel Natu { 1366f0fdcfe2SNeel Natu struct vm_exit *vmexit; 1367f0fdcfe2SNeel Natu 1368f0fdcfe2SNeel Natu KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1369f0fdcfe2SNeel Natu ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1370f0fdcfe2SNeel Natu 1371f0fdcfe2SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1372f0fdcfe2SNeel Natu vmexit->rip = rip; 1373f0fdcfe2SNeel Natu vmexit->inst_length = 0; 1374f0fdcfe2SNeel Natu vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1375f0fdcfe2SNeel Natu vmexit->u.suspended.how = vm->suspend; 1376b15a09c0SNeel Natu } 1377b15a09c0SNeel Natu 137840487465SNeel Natu void 137940487465SNeel Natu vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) 138040487465SNeel Natu { 138140487465SNeel Natu struct vm_exit *vmexit; 138240487465SNeel Natu 138340487465SNeel Natu KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress")); 138440487465SNeel Natu 138540487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 138640487465SNeel Natu vmexit->rip = rip; 138740487465SNeel Natu vmexit->inst_length = 0; 138840487465SNeel Natu vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 138940487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1); 139040487465SNeel Natu } 139140487465SNeel Natu 139240487465SNeel Natu void 139340487465SNeel Natu vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) 139440487465SNeel Natu { 139540487465SNeel Natu struct vm_exit *vmexit; 139640487465SNeel Natu 139740487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 139840487465SNeel Natu vmexit->rip = rip; 139940487465SNeel Natu vmexit->inst_length = 0; 140040487465SNeel Natu vmexit->exitcode = VM_EXITCODE_BOGUS; 140140487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1); 140240487465SNeel Natu } 140340487465SNeel Natu 1404318224bbSNeel Natu int 1405318224bbSNeel Natu vm_run(struct vm *vm, struct vm_run *vmrun) 1406318224bbSNeel Natu { 1407318224bbSNeel Natu int error, vcpuid; 1408318224bbSNeel Natu struct vcpu *vcpu; 1409318224bbSNeel Natu struct pcb *pcb; 1410318224bbSNeel Natu uint64_t tscval, rip; 1411318224bbSNeel Natu struct vm_exit *vme; 1412becd9849SNeel Natu bool retu, intr_disabled; 1413318224bbSNeel Natu pmap_t pmap; 1414b15a09c0SNeel Natu void *rptr, *sptr; 1415318224bbSNeel Natu 1416318224bbSNeel Natu vcpuid = vmrun->cpuid; 1417318224bbSNeel Natu 1418318224bbSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1419318224bbSNeel Natu return (EINVAL); 1420318224bbSNeel Natu 142195ebc360SNeel Natu if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 142295ebc360SNeel Natu return (EINVAL); 142395ebc360SNeel Natu 142495ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 142595ebc360SNeel Natu return (EINVAL); 142695ebc360SNeel Natu 1427b15a09c0SNeel Natu rptr = &vm->rendezvous_func; 1428b15a09c0SNeel Natu sptr = &vm->suspend; 1429318224bbSNeel Natu pmap = vmspace_pmap(vm->vmspace); 1430318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1431318224bbSNeel Natu vme = &vcpu->exitinfo; 1432318224bbSNeel Natu rip = vmrun->rip; 1433318224bbSNeel Natu restart: 1434318224bbSNeel Natu critical_enter(); 1435318224bbSNeel Natu 1436318224bbSNeel Natu KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1437318224bbSNeel Natu ("vm_run: absurd pm_active")); 1438318224bbSNeel Natu 1439318224bbSNeel Natu tscval = rdtsc(); 1440318224bbSNeel Natu 1441318224bbSNeel Natu pcb = PCPU_GET(curpcb); 1442318224bbSNeel Natu set_pcb_flags(pcb, PCB_FULL_IRET); 1443318224bbSNeel Natu 1444318224bbSNeel Natu restore_guest_fpustate(vcpu); 1445318224bbSNeel Natu 1446318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1447b15a09c0SNeel Natu error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr); 1448318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1449318224bbSNeel Natu 1450318224bbSNeel Natu save_guest_fpustate(vcpu); 1451318224bbSNeel Natu 1452318224bbSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1453318224bbSNeel Natu 1454318224bbSNeel Natu critical_exit(); 1455318224bbSNeel Natu 1456318224bbSNeel Natu if (error == 0) { 1457becd9849SNeel Natu retu = false; 1458318224bbSNeel Natu switch (vme->exitcode) { 1459b15a09c0SNeel Natu case VM_EXITCODE_SUSPENDED: 1460b15a09c0SNeel Natu error = vm_handle_suspend(vm, vcpuid, &retu); 1461b15a09c0SNeel Natu break; 146230b94db8SNeel Natu case VM_EXITCODE_IOAPIC_EOI: 146330b94db8SNeel Natu vioapic_process_eoi(vm, vcpuid, 146430b94db8SNeel Natu vme->u.ioapic_eoi.vector); 146530b94db8SNeel Natu break; 14665b8a8cd1SNeel Natu case VM_EXITCODE_RENDEZVOUS: 14675b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 14685b8a8cd1SNeel Natu error = 0; 14695b8a8cd1SNeel Natu break; 1470318224bbSNeel Natu case VM_EXITCODE_HLT: 1471becd9849SNeel Natu intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 14721c052192SNeel Natu error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); 1473318224bbSNeel Natu break; 1474318224bbSNeel Natu case VM_EXITCODE_PAGING: 1475318224bbSNeel Natu error = vm_handle_paging(vm, vcpuid, &retu); 1476318224bbSNeel Natu break; 1477318224bbSNeel Natu case VM_EXITCODE_INST_EMUL: 1478318224bbSNeel Natu error = vm_handle_inst_emul(vm, vcpuid, &retu); 1479318224bbSNeel Natu break; 1480d17b5104SNeel Natu case VM_EXITCODE_INOUT: 1481d17b5104SNeel Natu case VM_EXITCODE_INOUT_STR: 1482d17b5104SNeel Natu error = vm_handle_inout(vm, vcpuid, vme, &retu); 1483d17b5104SNeel Natu break; 148465145c7fSNeel Natu case VM_EXITCODE_MONITOR: 148565145c7fSNeel Natu case VM_EXITCODE_MWAIT: 148665145c7fSNeel Natu vm_inject_ud(vm, vcpuid); 148765145c7fSNeel Natu break; 1488318224bbSNeel Natu default: 1489becd9849SNeel Natu retu = true; /* handled in userland */ 1490318224bbSNeel Natu break; 1491318224bbSNeel Natu } 1492318224bbSNeel Natu } 1493318224bbSNeel Natu 1494becd9849SNeel Natu if (error == 0 && retu == false) { 1495f76fc5d4SNeel Natu rip = vme->rip + vme->inst_length; 1496f76fc5d4SNeel Natu goto restart; 1497f76fc5d4SNeel Natu } 1498f76fc5d4SNeel Natu 1499318224bbSNeel Natu /* copy the exit information */ 1500318224bbSNeel Natu bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1501366f6083SPeter Grehan return (error); 1502366f6083SPeter Grehan } 1503366f6083SPeter Grehan 1504366f6083SPeter Grehan int 1505091d4532SNeel Natu vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) 1506091d4532SNeel Natu { 1507091d4532SNeel Natu struct vcpu *vcpu; 1508091d4532SNeel Natu int type, vector; 1509091d4532SNeel Natu 1510091d4532SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1511091d4532SNeel Natu return (EINVAL); 1512091d4532SNeel Natu 1513091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1514091d4532SNeel Natu 1515091d4532SNeel Natu if (info & VM_INTINFO_VALID) { 1516091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1517091d4532SNeel Natu vector = info & 0xff; 1518091d4532SNeel Natu if (type == VM_INTINFO_NMI && vector != IDT_NMI) 1519091d4532SNeel Natu return (EINVAL); 1520091d4532SNeel Natu if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) 1521091d4532SNeel Natu return (EINVAL); 1522091d4532SNeel Natu if (info & VM_INTINFO_RSVD) 1523091d4532SNeel Natu return (EINVAL); 1524091d4532SNeel Natu } else { 1525091d4532SNeel Natu info = 0; 1526091d4532SNeel Natu } 1527091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); 1528091d4532SNeel Natu vcpu->exitintinfo = info; 1529091d4532SNeel Natu return (0); 1530091d4532SNeel Natu } 1531091d4532SNeel Natu 1532091d4532SNeel Natu enum exc_class { 1533091d4532SNeel Natu EXC_BENIGN, 1534091d4532SNeel Natu EXC_CONTRIBUTORY, 1535091d4532SNeel Natu EXC_PAGEFAULT 1536091d4532SNeel Natu }; 1537091d4532SNeel Natu 1538091d4532SNeel Natu #define IDT_VE 20 /* Virtualization Exception (Intel specific) */ 1539091d4532SNeel Natu 1540091d4532SNeel Natu static enum exc_class 1541091d4532SNeel Natu exception_class(uint64_t info) 1542091d4532SNeel Natu { 1543091d4532SNeel Natu int type, vector; 1544091d4532SNeel Natu 1545091d4532SNeel Natu KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); 1546091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1547091d4532SNeel Natu vector = info & 0xff; 1548091d4532SNeel Natu 1549091d4532SNeel Natu /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ 1550091d4532SNeel Natu switch (type) { 1551091d4532SNeel Natu case VM_INTINFO_HWINTR: 1552091d4532SNeel Natu case VM_INTINFO_SWINTR: 1553091d4532SNeel Natu case VM_INTINFO_NMI: 1554091d4532SNeel Natu return (EXC_BENIGN); 1555091d4532SNeel Natu default: 1556091d4532SNeel Natu /* 1557091d4532SNeel Natu * Hardware exception. 1558091d4532SNeel Natu * 1559091d4532SNeel Natu * SVM and VT-x use identical type values to represent NMI, 1560091d4532SNeel Natu * hardware interrupt and software interrupt. 1561091d4532SNeel Natu * 1562091d4532SNeel Natu * SVM uses type '3' for all exceptions. VT-x uses type '3' 1563091d4532SNeel Natu * for exceptions except #BP and #OF. #BP and #OF use a type 1564091d4532SNeel Natu * value of '5' or '6'. Therefore we don't check for explicit 1565091d4532SNeel Natu * values of 'type' to classify 'intinfo' into a hardware 1566091d4532SNeel Natu * exception. 1567091d4532SNeel Natu */ 1568091d4532SNeel Natu break; 1569091d4532SNeel Natu } 1570091d4532SNeel Natu 1571091d4532SNeel Natu switch (vector) { 1572091d4532SNeel Natu case IDT_PF: 1573091d4532SNeel Natu case IDT_VE: 1574091d4532SNeel Natu return (EXC_PAGEFAULT); 1575091d4532SNeel Natu case IDT_DE: 1576091d4532SNeel Natu case IDT_TS: 1577091d4532SNeel Natu case IDT_NP: 1578091d4532SNeel Natu case IDT_SS: 1579091d4532SNeel Natu case IDT_GP: 1580091d4532SNeel Natu return (EXC_CONTRIBUTORY); 1581091d4532SNeel Natu default: 1582091d4532SNeel Natu return (EXC_BENIGN); 1583091d4532SNeel Natu } 1584091d4532SNeel Natu } 1585091d4532SNeel Natu 1586091d4532SNeel Natu static int 1587091d4532SNeel Natu nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, 1588091d4532SNeel Natu uint64_t *retinfo) 1589091d4532SNeel Natu { 1590091d4532SNeel Natu enum exc_class exc1, exc2; 1591091d4532SNeel Natu int type1, vector1; 1592091d4532SNeel Natu 1593091d4532SNeel Natu KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); 1594091d4532SNeel Natu KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); 1595091d4532SNeel Natu 1596091d4532SNeel Natu /* 1597091d4532SNeel Natu * If an exception occurs while attempting to call the double-fault 1598091d4532SNeel Natu * handler the processor enters shutdown mode (aka triple fault). 1599091d4532SNeel Natu */ 1600091d4532SNeel Natu type1 = info1 & VM_INTINFO_TYPE; 1601091d4532SNeel Natu vector1 = info1 & 0xff; 1602091d4532SNeel Natu if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { 1603091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", 1604091d4532SNeel Natu info1, info2); 1605091d4532SNeel Natu vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); 1606091d4532SNeel Natu *retinfo = 0; 1607091d4532SNeel Natu return (0); 1608091d4532SNeel Natu } 1609091d4532SNeel Natu 1610091d4532SNeel Natu /* 1611091d4532SNeel Natu * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 1612091d4532SNeel Natu */ 1613091d4532SNeel Natu exc1 = exception_class(info1); 1614091d4532SNeel Natu exc2 = exception_class(info2); 1615091d4532SNeel Natu if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || 1616091d4532SNeel Natu (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { 1617091d4532SNeel Natu /* Convert nested fault into a double fault. */ 1618091d4532SNeel Natu *retinfo = IDT_DF; 1619091d4532SNeel Natu *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1620091d4532SNeel Natu *retinfo |= VM_INTINFO_DEL_ERRCODE; 1621091d4532SNeel Natu } else { 1622091d4532SNeel Natu /* Handle exceptions serially */ 1623091d4532SNeel Natu *retinfo = info2; 1624091d4532SNeel Natu } 1625091d4532SNeel Natu return (1); 1626091d4532SNeel Natu } 1627091d4532SNeel Natu 1628091d4532SNeel Natu static uint64_t 1629091d4532SNeel Natu vcpu_exception_intinfo(struct vcpu *vcpu) 1630091d4532SNeel Natu { 1631091d4532SNeel Natu uint64_t info = 0; 1632091d4532SNeel Natu 1633091d4532SNeel Natu if (vcpu->exception_pending) { 1634091d4532SNeel Natu info = vcpu->exception.vector & 0xff; 1635091d4532SNeel Natu info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1636091d4532SNeel Natu if (vcpu->exception.error_code_valid) { 1637091d4532SNeel Natu info |= VM_INTINFO_DEL_ERRCODE; 1638091d4532SNeel Natu info |= (uint64_t)vcpu->exception.error_code << 32; 1639091d4532SNeel Natu } 1640091d4532SNeel Natu } 1641091d4532SNeel Natu return (info); 1642091d4532SNeel Natu } 1643091d4532SNeel Natu 1644091d4532SNeel Natu int 1645091d4532SNeel Natu vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) 1646091d4532SNeel Natu { 1647091d4532SNeel Natu struct vcpu *vcpu; 1648091d4532SNeel Natu uint64_t info1, info2; 1649091d4532SNeel Natu int valid; 1650091d4532SNeel Natu 1651091d4532SNeel Natu KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); 1652091d4532SNeel Natu 1653091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1654091d4532SNeel Natu 1655091d4532SNeel Natu info1 = vcpu->exitintinfo; 1656091d4532SNeel Natu vcpu->exitintinfo = 0; 1657091d4532SNeel Natu 1658091d4532SNeel Natu info2 = 0; 1659091d4532SNeel Natu if (vcpu->exception_pending) { 1660091d4532SNeel Natu info2 = vcpu_exception_intinfo(vcpu); 1661091d4532SNeel Natu vcpu->exception_pending = 0; 1662091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", 1663091d4532SNeel Natu vcpu->exception.vector, info2); 1664091d4532SNeel Natu } 1665091d4532SNeel Natu 1666091d4532SNeel Natu if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { 1667091d4532SNeel Natu valid = nested_fault(vm, vcpuid, info1, info2, retinfo); 1668091d4532SNeel Natu } else if (info1 & VM_INTINFO_VALID) { 1669091d4532SNeel Natu *retinfo = info1; 1670091d4532SNeel Natu valid = 1; 1671091d4532SNeel Natu } else if (info2 & VM_INTINFO_VALID) { 1672091d4532SNeel Natu *retinfo = info2; 1673091d4532SNeel Natu valid = 1; 1674091d4532SNeel Natu } else { 1675091d4532SNeel Natu valid = 0; 1676091d4532SNeel Natu } 1677091d4532SNeel Natu 1678091d4532SNeel Natu if (valid) { 1679091d4532SNeel Natu VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " 1680091d4532SNeel Natu "retinfo(%#lx)", __func__, info1, info2, *retinfo); 1681091d4532SNeel Natu } 1682091d4532SNeel Natu 1683091d4532SNeel Natu return (valid); 1684091d4532SNeel Natu } 1685091d4532SNeel Natu 1686091d4532SNeel Natu int 1687091d4532SNeel Natu vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) 1688091d4532SNeel Natu { 1689091d4532SNeel Natu struct vcpu *vcpu; 1690091d4532SNeel Natu 1691091d4532SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1692091d4532SNeel Natu return (EINVAL); 1693091d4532SNeel Natu 1694091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1695091d4532SNeel Natu *info1 = vcpu->exitintinfo; 1696091d4532SNeel Natu *info2 = vcpu_exception_intinfo(vcpu); 1697091d4532SNeel Natu return (0); 1698091d4532SNeel Natu } 1699091d4532SNeel Natu 1700091d4532SNeel Natu int 1701dc506506SNeel Natu vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) 1702366f6083SPeter Grehan { 1703dc506506SNeel Natu struct vcpu *vcpu; 1704dc506506SNeel Natu 1705366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1706366f6083SPeter Grehan return (EINVAL); 1707366f6083SPeter Grehan 1708dc506506SNeel Natu if (exception->vector < 0 || exception->vector >= 32) 1709366f6083SPeter Grehan return (EINVAL); 1710366f6083SPeter Grehan 1711091d4532SNeel Natu /* 1712091d4532SNeel Natu * A double fault exception should never be injected directly into 1713091d4532SNeel Natu * the guest. It is a derived exception that results from specific 1714091d4532SNeel Natu * combinations of nested faults. 1715091d4532SNeel Natu */ 1716091d4532SNeel Natu if (exception->vector == IDT_DF) 1717091d4532SNeel Natu return (EINVAL); 1718091d4532SNeel Natu 1719dc506506SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1720366f6083SPeter Grehan 1721dc506506SNeel Natu if (vcpu->exception_pending) { 1722dc506506SNeel Natu VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " 1723dc506506SNeel Natu "pending exception %d", exception->vector, 1724dc506506SNeel Natu vcpu->exception.vector); 1725dc506506SNeel Natu return (EBUSY); 1726dc506506SNeel Natu } 1727dc506506SNeel Natu 1728dc506506SNeel Natu vcpu->exception_pending = 1; 1729dc506506SNeel Natu vcpu->exception = *exception; 1730dc506506SNeel Natu VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector); 1731dc506506SNeel Natu return (0); 1732dc506506SNeel Natu } 1733dc506506SNeel Natu 1734d37f2adbSNeel Natu void 1735d37f2adbSNeel Natu vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, 1736d37f2adbSNeel Natu int errcode) 1737dc506506SNeel Natu { 1738d37f2adbSNeel Natu struct vm_exception exception; 1739dc506506SNeel Natu struct vm_exit *vmexit; 1740d37f2adbSNeel Natu struct vm *vm; 1741dc506506SNeel Natu int error; 1742dc506506SNeel Natu 1743d37f2adbSNeel Natu vm = vmarg; 1744d37f2adbSNeel Natu 1745d37f2adbSNeel Natu exception.vector = vector; 1746d37f2adbSNeel Natu exception.error_code = errcode; 1747d37f2adbSNeel Natu exception.error_code_valid = errcode_valid; 1748d37f2adbSNeel Natu error = vm_inject_exception(vm, vcpuid, &exception); 1749dc506506SNeel Natu KASSERT(error == 0, ("vm_inject_exception error %d", error)); 1750dc506506SNeel Natu 1751dc506506SNeel Natu /* 1752dc506506SNeel Natu * A fault-like exception allows the instruction to be restarted 1753dc506506SNeel Natu * after the exception handler returns. 1754dc506506SNeel Natu * 1755dc506506SNeel Natu * By setting the inst_length to 0 we ensure that the instruction 1756dc506506SNeel Natu * pointer remains at the faulting instruction. 1757dc506506SNeel Natu */ 1758dc506506SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1759dc506506SNeel Natu vmexit->inst_length = 0; 1760dc506506SNeel Natu } 1761dc506506SNeel Natu 1762dc506506SNeel Natu void 1763d37f2adbSNeel Natu vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) 1764fd949af6SNeel Natu { 1765d37f2adbSNeel Natu struct vm *vm; 176637a723a5SNeel Natu int error; 176737a723a5SNeel Natu 1768d37f2adbSNeel Natu vm = vmarg; 176937a723a5SNeel Natu VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", 177037a723a5SNeel Natu error_code, cr2); 177137a723a5SNeel Natu 177237a723a5SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); 177337a723a5SNeel Natu KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 1774fd949af6SNeel Natu 1775d37f2adbSNeel Natu vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); 1776d665d229SNeel Natu } 1777d665d229SNeel Natu 177861592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 1779366f6083SPeter Grehan 1780f352ff0cSNeel Natu int 1781f352ff0cSNeel Natu vm_inject_nmi(struct vm *vm, int vcpuid) 1782f352ff0cSNeel Natu { 1783f352ff0cSNeel Natu struct vcpu *vcpu; 1784f352ff0cSNeel Natu 1785f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1786366f6083SPeter Grehan return (EINVAL); 1787366f6083SPeter Grehan 1788f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1789f352ff0cSNeel Natu 1790f352ff0cSNeel Natu vcpu->nmi_pending = 1; 1791de5ea6b6SNeel Natu vcpu_notify_event(vm, vcpuid, false); 1792f352ff0cSNeel Natu return (0); 1793f352ff0cSNeel Natu } 1794f352ff0cSNeel Natu 1795f352ff0cSNeel Natu int 1796f352ff0cSNeel Natu vm_nmi_pending(struct vm *vm, int vcpuid) 1797f352ff0cSNeel Natu { 1798f352ff0cSNeel Natu struct vcpu *vcpu; 1799f352ff0cSNeel Natu 1800f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1801f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1802f352ff0cSNeel Natu 1803f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1804f352ff0cSNeel Natu 1805f352ff0cSNeel Natu return (vcpu->nmi_pending); 1806f352ff0cSNeel Natu } 1807f352ff0cSNeel Natu 1808f352ff0cSNeel Natu void 1809f352ff0cSNeel Natu vm_nmi_clear(struct vm *vm, int vcpuid) 1810f352ff0cSNeel Natu { 1811f352ff0cSNeel Natu struct vcpu *vcpu; 1812f352ff0cSNeel Natu 1813f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1814f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1815f352ff0cSNeel Natu 1816f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1817f352ff0cSNeel Natu 1818f352ff0cSNeel Natu if (vcpu->nmi_pending == 0) 1819f352ff0cSNeel Natu panic("vm_nmi_clear: inconsistent nmi_pending state"); 1820f352ff0cSNeel Natu 1821f352ff0cSNeel Natu vcpu->nmi_pending = 0; 1822f352ff0cSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 1823366f6083SPeter Grehan } 1824366f6083SPeter Grehan 18250775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 18260775fbb4STycho Nightingale 18270775fbb4STycho Nightingale int 18280775fbb4STycho Nightingale vm_inject_extint(struct vm *vm, int vcpuid) 18290775fbb4STycho Nightingale { 18300775fbb4STycho Nightingale struct vcpu *vcpu; 18310775fbb4STycho Nightingale 18320775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 18330775fbb4STycho Nightingale return (EINVAL); 18340775fbb4STycho Nightingale 18350775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 18360775fbb4STycho Nightingale 18370775fbb4STycho Nightingale vcpu->extint_pending = 1; 18380775fbb4STycho Nightingale vcpu_notify_event(vm, vcpuid, false); 18390775fbb4STycho Nightingale return (0); 18400775fbb4STycho Nightingale } 18410775fbb4STycho Nightingale 18420775fbb4STycho Nightingale int 18430775fbb4STycho Nightingale vm_extint_pending(struct vm *vm, int vcpuid) 18440775fbb4STycho Nightingale { 18450775fbb4STycho Nightingale struct vcpu *vcpu; 18460775fbb4STycho Nightingale 18470775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 18480775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 18490775fbb4STycho Nightingale 18500775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 18510775fbb4STycho Nightingale 18520775fbb4STycho Nightingale return (vcpu->extint_pending); 18530775fbb4STycho Nightingale } 18540775fbb4STycho Nightingale 18550775fbb4STycho Nightingale void 18560775fbb4STycho Nightingale vm_extint_clear(struct vm *vm, int vcpuid) 18570775fbb4STycho Nightingale { 18580775fbb4STycho Nightingale struct vcpu *vcpu; 18590775fbb4STycho Nightingale 18600775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 18610775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 18620775fbb4STycho Nightingale 18630775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 18640775fbb4STycho Nightingale 18650775fbb4STycho Nightingale if (vcpu->extint_pending == 0) 18660775fbb4STycho Nightingale panic("vm_extint_clear: inconsistent extint_pending state"); 18670775fbb4STycho Nightingale 18680775fbb4STycho Nightingale vcpu->extint_pending = 0; 18690775fbb4STycho Nightingale vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); 18700775fbb4STycho Nightingale } 18710775fbb4STycho Nightingale 1872366f6083SPeter Grehan int 1873366f6083SPeter Grehan vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 1874366f6083SPeter Grehan { 1875366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1876366f6083SPeter Grehan return (EINVAL); 1877366f6083SPeter Grehan 1878366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 1879366f6083SPeter Grehan return (EINVAL); 1880366f6083SPeter Grehan 1881366f6083SPeter Grehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 1882366f6083SPeter Grehan } 1883366f6083SPeter Grehan 1884366f6083SPeter Grehan int 1885366f6083SPeter Grehan vm_set_capability(struct vm *vm, int vcpu, int type, int val) 1886366f6083SPeter Grehan { 1887366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1888366f6083SPeter Grehan return (EINVAL); 1889366f6083SPeter Grehan 1890366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 1891366f6083SPeter Grehan return (EINVAL); 1892366f6083SPeter Grehan 1893366f6083SPeter Grehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 1894366f6083SPeter Grehan } 1895366f6083SPeter Grehan 1896366f6083SPeter Grehan struct vlapic * 1897366f6083SPeter Grehan vm_lapic(struct vm *vm, int cpu) 1898366f6083SPeter Grehan { 1899366f6083SPeter Grehan return (vm->vcpu[cpu].vlapic); 1900366f6083SPeter Grehan } 1901366f6083SPeter Grehan 1902565bbb86SNeel Natu struct vioapic * 1903565bbb86SNeel Natu vm_ioapic(struct vm *vm) 1904565bbb86SNeel Natu { 1905565bbb86SNeel Natu 1906565bbb86SNeel Natu return (vm->vioapic); 1907565bbb86SNeel Natu } 1908565bbb86SNeel Natu 190908e3ff32SNeel Natu struct vhpet * 191008e3ff32SNeel Natu vm_hpet(struct vm *vm) 191108e3ff32SNeel Natu { 191208e3ff32SNeel Natu 191308e3ff32SNeel Natu return (vm->vhpet); 191408e3ff32SNeel Natu } 191508e3ff32SNeel Natu 1916366f6083SPeter Grehan boolean_t 1917366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func) 1918366f6083SPeter Grehan { 191907044a96SNeel Natu int found, i, n; 192007044a96SNeel Natu int b, s, f; 1921366f6083SPeter Grehan char *val, *cp, *cp2; 1922366f6083SPeter Grehan 1923366f6083SPeter Grehan /* 192407044a96SNeel Natu * XXX 192507044a96SNeel Natu * The length of an environment variable is limited to 128 bytes which 192607044a96SNeel Natu * puts an upper limit on the number of passthru devices that may be 192707044a96SNeel Natu * specified using a single environment variable. 192807044a96SNeel Natu * 192907044a96SNeel Natu * Work around this by scanning multiple environment variable 193007044a96SNeel Natu * names instead of a single one - yuck! 1931366f6083SPeter Grehan */ 193207044a96SNeel Natu const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 193307044a96SNeel Natu 193407044a96SNeel Natu /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 1935366f6083SPeter Grehan found = 0; 193607044a96SNeel Natu for (i = 0; names[i] != NULL && !found; i++) { 1937*2be111bfSDavide Italiano cp = val = kern_getenv(names[i]); 1938366f6083SPeter Grehan while (cp != NULL && *cp != '\0') { 1939366f6083SPeter Grehan if ((cp2 = strchr(cp, ' ')) != NULL) 1940366f6083SPeter Grehan *cp2 = '\0'; 1941366f6083SPeter Grehan 1942366f6083SPeter Grehan n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 1943366f6083SPeter Grehan if (n == 3 && bus == b && slot == s && func == f) { 1944366f6083SPeter Grehan found = 1; 1945366f6083SPeter Grehan break; 1946366f6083SPeter Grehan } 1947366f6083SPeter Grehan 1948366f6083SPeter Grehan if (cp2 != NULL) 1949366f6083SPeter Grehan *cp2++ = ' '; 1950366f6083SPeter Grehan 1951366f6083SPeter Grehan cp = cp2; 1952366f6083SPeter Grehan } 1953366f6083SPeter Grehan freeenv(val); 195407044a96SNeel Natu } 1955366f6083SPeter Grehan return (found); 1956366f6083SPeter Grehan } 1957366f6083SPeter Grehan 1958366f6083SPeter Grehan void * 1959366f6083SPeter Grehan vm_iommu_domain(struct vm *vm) 1960366f6083SPeter Grehan { 1961366f6083SPeter Grehan 1962366f6083SPeter Grehan return (vm->iommu); 1963366f6083SPeter Grehan } 1964366f6083SPeter Grehan 196575dd3366SNeel Natu int 1966f80330a8SNeel Natu vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, 1967f80330a8SNeel Natu bool from_idle) 1968366f6083SPeter Grehan { 196975dd3366SNeel Natu int error; 1970366f6083SPeter Grehan struct vcpu *vcpu; 1971366f6083SPeter Grehan 1972366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1973366f6083SPeter Grehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 1974366f6083SPeter Grehan 1975366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 1976366f6083SPeter Grehan 197775dd3366SNeel Natu vcpu_lock(vcpu); 1978f80330a8SNeel Natu error = vcpu_set_state_locked(vcpu, newstate, from_idle); 197975dd3366SNeel Natu vcpu_unlock(vcpu); 198075dd3366SNeel Natu 198175dd3366SNeel Natu return (error); 198275dd3366SNeel Natu } 198375dd3366SNeel Natu 198475dd3366SNeel Natu enum vcpu_state 1985d3c11f40SPeter Grehan vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 1986366f6083SPeter Grehan { 1987366f6083SPeter Grehan struct vcpu *vcpu; 198875dd3366SNeel Natu enum vcpu_state state; 1989366f6083SPeter Grehan 1990366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1991366f6083SPeter Grehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 1992366f6083SPeter Grehan 1993366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 1994366f6083SPeter Grehan 199575dd3366SNeel Natu vcpu_lock(vcpu); 199675dd3366SNeel Natu state = vcpu->state; 1997d3c11f40SPeter Grehan if (hostcpu != NULL) 1998d3c11f40SPeter Grehan *hostcpu = vcpu->hostcpu; 199975dd3366SNeel Natu vcpu_unlock(vcpu); 2000366f6083SPeter Grehan 200175dd3366SNeel Natu return (state); 2002366f6083SPeter Grehan } 2003366f6083SPeter Grehan 200495ebc360SNeel Natu int 2005366f6083SPeter Grehan vm_activate_cpu(struct vm *vm, int vcpuid) 2006366f6083SPeter Grehan { 2007366f6083SPeter Grehan 200895ebc360SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 200995ebc360SNeel Natu return (EINVAL); 201095ebc360SNeel Natu 201195ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->active_cpus)) 201295ebc360SNeel Natu return (EBUSY); 201322d822c6SNeel Natu 201422d822c6SNeel Natu VCPU_CTR0(vm, vcpuid, "activated"); 201522d822c6SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); 201695ebc360SNeel Natu return (0); 2017366f6083SPeter Grehan } 2018366f6083SPeter Grehan 2019a5615c90SPeter Grehan cpuset_t 2020366f6083SPeter Grehan vm_active_cpus(struct vm *vm) 2021366f6083SPeter Grehan { 2022366f6083SPeter Grehan 2023366f6083SPeter Grehan return (vm->active_cpus); 2024366f6083SPeter Grehan } 2025366f6083SPeter Grehan 202695ebc360SNeel Natu cpuset_t 202795ebc360SNeel Natu vm_suspended_cpus(struct vm *vm) 202895ebc360SNeel Natu { 202995ebc360SNeel Natu 203095ebc360SNeel Natu return (vm->suspended_cpus); 203195ebc360SNeel Natu } 203295ebc360SNeel Natu 2033366f6083SPeter Grehan void * 2034366f6083SPeter Grehan vcpu_stats(struct vm *vm, int vcpuid) 2035366f6083SPeter Grehan { 2036366f6083SPeter Grehan 2037366f6083SPeter Grehan return (vm->vcpu[vcpuid].stats); 2038366f6083SPeter Grehan } 2039e9027382SNeel Natu 2040e9027382SNeel Natu int 2041e9027382SNeel Natu vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 2042e9027382SNeel Natu { 2043e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2044e9027382SNeel Natu return (EINVAL); 2045e9027382SNeel Natu 2046e9027382SNeel Natu *state = vm->vcpu[vcpuid].x2apic_state; 2047e9027382SNeel Natu 2048e9027382SNeel Natu return (0); 2049e9027382SNeel Natu } 2050e9027382SNeel Natu 2051e9027382SNeel Natu int 2052e9027382SNeel Natu vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 2053e9027382SNeel Natu { 2054e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2055e9027382SNeel Natu return (EINVAL); 2056e9027382SNeel Natu 20573f23d3caSNeel Natu if (state >= X2APIC_STATE_LAST) 2058e9027382SNeel Natu return (EINVAL); 2059e9027382SNeel Natu 2060e9027382SNeel Natu vm->vcpu[vcpuid].x2apic_state = state; 2061e9027382SNeel Natu 206273820fb0SNeel Natu vlapic_set_x2apic_state(vm, vcpuid, state); 206373820fb0SNeel Natu 2064e9027382SNeel Natu return (0); 2065e9027382SNeel Natu } 206675dd3366SNeel Natu 206722821874SNeel Natu /* 206822821874SNeel Natu * This function is called to ensure that a vcpu "sees" a pending event 206922821874SNeel Natu * as soon as possible: 207022821874SNeel Natu * - If the vcpu thread is sleeping then it is woken up. 207122821874SNeel Natu * - If the vcpu is running on a different host_cpu then an IPI will be directed 207222821874SNeel Natu * to the host_cpu to cause the vcpu to trap into the hypervisor. 207322821874SNeel Natu */ 207475dd3366SNeel Natu void 2075de5ea6b6SNeel Natu vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) 207675dd3366SNeel Natu { 207775dd3366SNeel Natu int hostcpu; 207875dd3366SNeel Natu struct vcpu *vcpu; 207975dd3366SNeel Natu 208075dd3366SNeel Natu vcpu = &vm->vcpu[vcpuid]; 208175dd3366SNeel Natu 2082f76fc5d4SNeel Natu vcpu_lock(vcpu); 208375dd3366SNeel Natu hostcpu = vcpu->hostcpu; 2084ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 2085ef39d7e9SNeel Natu KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 2086de5ea6b6SNeel Natu if (hostcpu != curcpu) { 2087ef39d7e9SNeel Natu if (lapic_intr) { 2088add611fdSNeel Natu vlapic_post_intr(vcpu->vlapic, hostcpu, 2089add611fdSNeel Natu vmm_ipinum); 2090ef39d7e9SNeel Natu } else { 209175dd3366SNeel Natu ipi_cpu(hostcpu, vmm_ipinum); 209275dd3366SNeel Natu } 2093ef39d7e9SNeel Natu } else { 2094ef39d7e9SNeel Natu /* 2095ef39d7e9SNeel Natu * If the 'vcpu' is running on 'curcpu' then it must 2096ef39d7e9SNeel Natu * be sending a notification to itself (e.g. SELF_IPI). 2097ef39d7e9SNeel Natu * The pending event will be picked up when the vcpu 2098ef39d7e9SNeel Natu * transitions back to guest context. 2099ef39d7e9SNeel Natu */ 2100ef39d7e9SNeel Natu } 2101ef39d7e9SNeel Natu } else { 2102ef39d7e9SNeel Natu KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 2103ef39d7e9SNeel Natu "with hostcpu %d", vcpu->state, hostcpu)); 2104ef39d7e9SNeel Natu if (vcpu->state == VCPU_SLEEPING) 2105ef39d7e9SNeel Natu wakeup_one(vcpu); 2106de5ea6b6SNeel Natu } 2107f76fc5d4SNeel Natu vcpu_unlock(vcpu); 2108f76fc5d4SNeel Natu } 2109318224bbSNeel Natu 2110318224bbSNeel Natu struct vmspace * 2111318224bbSNeel Natu vm_get_vmspace(struct vm *vm) 2112318224bbSNeel Natu { 2113318224bbSNeel Natu 2114318224bbSNeel Natu return (vm->vmspace); 2115318224bbSNeel Natu } 2116565bbb86SNeel Natu 2117565bbb86SNeel Natu int 2118565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid) 2119565bbb86SNeel Natu { 2120565bbb86SNeel Natu /* 2121565bbb86SNeel Natu * XXX apic id is assumed to be numerically identical to vcpu id 2122565bbb86SNeel Natu */ 2123565bbb86SNeel Natu return (apicid); 2124565bbb86SNeel Natu } 21255b8a8cd1SNeel Natu 21265b8a8cd1SNeel Natu void 21275b8a8cd1SNeel Natu vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 21285b8a8cd1SNeel Natu vm_rendezvous_func_t func, void *arg) 21295b8a8cd1SNeel Natu { 2130970955e4SNeel Natu int i; 2131970955e4SNeel Natu 21325b8a8cd1SNeel Natu /* 21335b8a8cd1SNeel Natu * Enforce that this function is called without any locks 21345b8a8cd1SNeel Natu */ 21355b8a8cd1SNeel Natu WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 21365b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 21375b8a8cd1SNeel Natu ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); 21385b8a8cd1SNeel Natu 21395b8a8cd1SNeel Natu restart: 21405b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 21415b8a8cd1SNeel Natu if (vm->rendezvous_func != NULL) { 21425b8a8cd1SNeel Natu /* 21435b8a8cd1SNeel Natu * If a rendezvous is already in progress then we need to 21445b8a8cd1SNeel Natu * call the rendezvous handler in case this 'vcpuid' is one 21455b8a8cd1SNeel Natu * of the targets of the rendezvous. 21465b8a8cd1SNeel Natu */ 21475b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); 21485b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 21495b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 21505b8a8cd1SNeel Natu goto restart; 21515b8a8cd1SNeel Natu } 21525b8a8cd1SNeel Natu KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 21535b8a8cd1SNeel Natu "rendezvous is still in progress")); 21545b8a8cd1SNeel Natu 21555b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); 21565b8a8cd1SNeel Natu vm->rendezvous_req_cpus = dest; 21575b8a8cd1SNeel Natu CPU_ZERO(&vm->rendezvous_done_cpus); 21585b8a8cd1SNeel Natu vm->rendezvous_arg = arg; 21595b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, func); 21605b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 21615b8a8cd1SNeel Natu 2162970955e4SNeel Natu /* 2163970955e4SNeel Natu * Wake up any sleeping vcpus and trigger a VM-exit in any running 2164970955e4SNeel Natu * vcpus so they handle the rendezvous as soon as possible. 2165970955e4SNeel Natu */ 2166970955e4SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 2167970955e4SNeel Natu if (CPU_ISSET(i, &dest)) 2168970955e4SNeel Natu vcpu_notify_event(vm, i, false); 2169970955e4SNeel Natu } 2170970955e4SNeel Natu 21715b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 21725b8a8cd1SNeel Natu } 2173762fd208STycho Nightingale 2174762fd208STycho Nightingale struct vatpic * 2175762fd208STycho Nightingale vm_atpic(struct vm *vm) 2176762fd208STycho Nightingale { 2177762fd208STycho Nightingale return (vm->vatpic); 2178762fd208STycho Nightingale } 2179e883c9bbSTycho Nightingale 2180e883c9bbSTycho Nightingale struct vatpit * 2181e883c9bbSTycho Nightingale vm_atpit(struct vm *vm) 2182e883c9bbSTycho Nightingale { 2183e883c9bbSTycho Nightingale return (vm->vatpit); 2184e883c9bbSTycho Nightingale } 2185d17b5104SNeel Natu 2186d17b5104SNeel Natu enum vm_reg_name 2187d17b5104SNeel Natu vm_segment_name(int seg) 2188d17b5104SNeel Natu { 2189d17b5104SNeel Natu static enum vm_reg_name seg_names[] = { 2190d17b5104SNeel Natu VM_REG_GUEST_ES, 2191d17b5104SNeel Natu VM_REG_GUEST_CS, 2192d17b5104SNeel Natu VM_REG_GUEST_SS, 2193d17b5104SNeel Natu VM_REG_GUEST_DS, 2194d17b5104SNeel Natu VM_REG_GUEST_FS, 2195d17b5104SNeel Natu VM_REG_GUEST_GS 2196d17b5104SNeel Natu }; 2197d17b5104SNeel Natu 2198d17b5104SNeel Natu KASSERT(seg >= 0 && seg < nitems(seg_names), 2199d17b5104SNeel Natu ("%s: invalid segment encoding %d", __func__, seg)); 2200d17b5104SNeel Natu return (seg_names[seg]); 2201d17b5104SNeel Natu } 2202cf1d80d8SPeter Grehan 2203d665d229SNeel Natu void 2204d665d229SNeel Natu vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, 2205d665d229SNeel Natu int num_copyinfo) 2206d665d229SNeel Natu { 2207d665d229SNeel Natu int idx; 2208d665d229SNeel Natu 2209d665d229SNeel Natu for (idx = 0; idx < num_copyinfo; idx++) { 2210d665d229SNeel Natu if (copyinfo[idx].cookie != NULL) 2211d665d229SNeel Natu vm_gpa_release(copyinfo[idx].cookie); 2212d665d229SNeel Natu } 2213d665d229SNeel Natu bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); 2214d665d229SNeel Natu } 2215d665d229SNeel Natu 2216d665d229SNeel Natu int 2217d665d229SNeel Natu vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 2218d665d229SNeel Natu uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 2219d665d229SNeel Natu int num_copyinfo) 2220d665d229SNeel Natu { 2221d665d229SNeel Natu int error, idx, nused; 2222d665d229SNeel Natu size_t n, off, remaining; 2223d665d229SNeel Natu void *hva, *cookie; 2224d665d229SNeel Natu uint64_t gpa; 2225d665d229SNeel Natu 2226d665d229SNeel Natu bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); 2227d665d229SNeel Natu 2228d665d229SNeel Natu nused = 0; 2229d665d229SNeel Natu remaining = len; 2230d665d229SNeel Natu while (remaining > 0) { 2231d665d229SNeel Natu KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); 2232d665d229SNeel Natu error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa); 2233d665d229SNeel Natu if (error) 2234d665d229SNeel Natu return (error); 2235d665d229SNeel Natu off = gpa & PAGE_MASK; 2236d665d229SNeel Natu n = min(remaining, PAGE_SIZE - off); 2237d665d229SNeel Natu copyinfo[nused].gpa = gpa; 2238d665d229SNeel Natu copyinfo[nused].len = n; 2239d665d229SNeel Natu remaining -= n; 2240d665d229SNeel Natu gla += n; 2241d665d229SNeel Natu nused++; 2242d665d229SNeel Natu } 2243d665d229SNeel Natu 2244d665d229SNeel Natu for (idx = 0; idx < nused; idx++) { 2245d665d229SNeel Natu hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len, 2246d665d229SNeel Natu prot, &cookie); 2247d665d229SNeel Natu if (hva == NULL) 2248d665d229SNeel Natu break; 2249d665d229SNeel Natu copyinfo[idx].hva = hva; 2250d665d229SNeel Natu copyinfo[idx].cookie = cookie; 2251d665d229SNeel Natu } 2252d665d229SNeel Natu 2253d665d229SNeel Natu if (idx != nused) { 2254d665d229SNeel Natu vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); 2255d665d229SNeel Natu return (-1); 2256d665d229SNeel Natu } else { 2257d665d229SNeel Natu return (0); 2258d665d229SNeel Natu } 2259d665d229SNeel Natu } 2260d665d229SNeel Natu 2261d665d229SNeel Natu void 2262d665d229SNeel Natu vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, 2263d665d229SNeel Natu size_t len) 2264d665d229SNeel Natu { 2265d665d229SNeel Natu char *dst; 2266d665d229SNeel Natu int idx; 2267d665d229SNeel Natu 2268d665d229SNeel Natu dst = kaddr; 2269d665d229SNeel Natu idx = 0; 2270d665d229SNeel Natu while (len > 0) { 2271d665d229SNeel Natu bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); 2272d665d229SNeel Natu len -= copyinfo[idx].len; 2273d665d229SNeel Natu dst += copyinfo[idx].len; 2274d665d229SNeel Natu idx++; 2275d665d229SNeel Natu } 2276d665d229SNeel Natu } 2277d665d229SNeel Natu 2278d665d229SNeel Natu void 2279d665d229SNeel Natu vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, 2280d665d229SNeel Natu struct vm_copyinfo *copyinfo, size_t len) 2281d665d229SNeel Natu { 2282d665d229SNeel Natu const char *src; 2283d665d229SNeel Natu int idx; 2284d665d229SNeel Natu 2285d665d229SNeel Natu src = kaddr; 2286d665d229SNeel Natu idx = 0; 2287d665d229SNeel Natu while (len > 0) { 2288d665d229SNeel Natu bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); 2289d665d229SNeel Natu len -= copyinfo[idx].len; 2290d665d229SNeel Natu src += copyinfo[idx].len; 2291d665d229SNeel Natu idx++; 2292d665d229SNeel Natu } 2293d665d229SNeel Natu } 2294cf1d80d8SPeter Grehan 2295cf1d80d8SPeter Grehan /* 2296cf1d80d8SPeter Grehan * Return the amount of in-use and wired memory for the VM. Since 2297cf1d80d8SPeter Grehan * these are global stats, only return the values with for vCPU 0 2298cf1d80d8SPeter Grehan */ 2299cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_RESIDENT); 2300cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_WIRED); 2301cf1d80d8SPeter Grehan 2302cf1d80d8SPeter Grehan static void 2303cf1d80d8SPeter Grehan vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2304cf1d80d8SPeter Grehan { 2305cf1d80d8SPeter Grehan 2306cf1d80d8SPeter Grehan if (vcpu == 0) { 2307cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, 2308cf1d80d8SPeter Grehan PAGE_SIZE * vmspace_resident_count(vm->vmspace)); 2309cf1d80d8SPeter Grehan } 2310cf1d80d8SPeter Grehan } 2311cf1d80d8SPeter Grehan 2312cf1d80d8SPeter Grehan static void 2313cf1d80d8SPeter Grehan vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2314cf1d80d8SPeter Grehan { 2315cf1d80d8SPeter Grehan 2316cf1d80d8SPeter Grehan if (vcpu == 0) { 2317cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, 2318cf1d80d8SPeter Grehan PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); 2319cf1d80d8SPeter Grehan } 2320cf1d80d8SPeter Grehan } 2321cf1d80d8SPeter Grehan 2322cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); 2323cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); 2324