1366f6083SPeter Grehan /*- 2366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc. 3366f6083SPeter Grehan * All rights reserved. 4366f6083SPeter Grehan * 5366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without 6366f6083SPeter Grehan * modification, are permitted provided that the following conditions 7366f6083SPeter Grehan * are met: 8366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright 9366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer. 10366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 11366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the 12366f6083SPeter Grehan * documentation and/or other materials provided with the distribution. 13366f6083SPeter Grehan * 14366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24366f6083SPeter Grehan * SUCH DAMAGE. 25366f6083SPeter Grehan * 26366f6083SPeter Grehan * $FreeBSD$ 27366f6083SPeter Grehan */ 28366f6083SPeter Grehan 29366f6083SPeter Grehan #include <sys/cdefs.h> 30366f6083SPeter Grehan __FBSDID("$FreeBSD$"); 31366f6083SPeter Grehan 32366f6083SPeter Grehan #include <sys/param.h> 3338f1b189SPeter Grehan #include <sys/systm.h> 34366f6083SPeter Grehan #include <sys/kernel.h> 35366f6083SPeter Grehan #include <sys/module.h> 36366f6083SPeter Grehan #include <sys/sysctl.h> 37366f6083SPeter Grehan #include <sys/malloc.h> 38366f6083SPeter Grehan #include <sys/pcpu.h> 39366f6083SPeter Grehan #include <sys/lock.h> 40366f6083SPeter Grehan #include <sys/mutex.h> 41366f6083SPeter Grehan #include <sys/proc.h> 42318224bbSNeel Natu #include <sys/rwlock.h> 43366f6083SPeter Grehan #include <sys/sched.h> 44366f6083SPeter Grehan #include <sys/smp.h> 45366f6083SPeter Grehan #include <sys/systm.h> 46366f6083SPeter Grehan 47366f6083SPeter Grehan #include <vm/vm.h> 48318224bbSNeel Natu #include <vm/vm_object.h> 49318224bbSNeel Natu #include <vm/vm_page.h> 50318224bbSNeel Natu #include <vm/pmap.h> 51318224bbSNeel Natu #include <vm/vm_map.h> 52318224bbSNeel Natu #include <vm/vm_extern.h> 53318224bbSNeel Natu #include <vm/vm_param.h> 54366f6083SPeter Grehan 5563e62d39SJohn Baldwin #include <machine/cpu.h> 56366f6083SPeter Grehan #include <machine/pcb.h> 5775dd3366SNeel Natu #include <machine/smp.h> 581c052192SNeel Natu #include <x86/psl.h> 5934a6b2d6SJohn Baldwin #include <x86/apicreg.h> 60366f6083SPeter Grehan 61366f6083SPeter Grehan #include <machine/vmm.h> 62565bbb86SNeel Natu #include <machine/vmm_dev.h> 63e813a873SNeel Natu #include <machine/vmm_instruction_emul.h> 64565bbb86SNeel Natu 65d17b5104SNeel Natu #include "vmm_ioport.h" 66318224bbSNeel Natu #include "vmm_ktr.h" 67b01c2033SNeel Natu #include "vmm_host.h" 68366f6083SPeter Grehan #include "vmm_mem.h" 69366f6083SPeter Grehan #include "vmm_util.h" 70762fd208STycho Nightingale #include "vatpic.h" 71e883c9bbSTycho Nightingale #include "vatpit.h" 7208e3ff32SNeel Natu #include "vhpet.h" 73565bbb86SNeel Natu #include "vioapic.h" 74366f6083SPeter Grehan #include "vlapic.h" 75160ef77aSNeel Natu #include "vpmtmr.h" 760dafa5cdSNeel Natu #include "vrtc.h" 77366f6083SPeter Grehan #include "vmm_stat.h" 78f76fc5d4SNeel Natu #include "vmm_lapic.h" 79366f6083SPeter Grehan 80366f6083SPeter Grehan #include "io/ppt.h" 81366f6083SPeter Grehan #include "io/iommu.h" 82366f6083SPeter Grehan 83366f6083SPeter Grehan struct vlapic; 84366f6083SPeter Grehan 855fcf252fSNeel Natu /* 865fcf252fSNeel Natu * Initialization: 875fcf252fSNeel Natu * (a) allocated when vcpu is created 885fcf252fSNeel Natu * (i) initialized when vcpu is created and when it is reinitialized 895fcf252fSNeel Natu * (o) initialized the first time the vcpu is created 905fcf252fSNeel Natu * (x) initialized before use 915fcf252fSNeel Natu */ 92366f6083SPeter Grehan struct vcpu { 935fcf252fSNeel Natu struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ 945fcf252fSNeel Natu enum vcpu_state state; /* (o) vcpu state */ 955fcf252fSNeel Natu int hostcpu; /* (o) vcpu's host cpu */ 96248e6799SNeel Natu int reqidle; /* (i) request vcpu to idle */ 975fcf252fSNeel Natu struct vlapic *vlapic; /* (i) APIC device model */ 985fcf252fSNeel Natu enum x2apic_state x2apic_state; /* (i) APIC mode */ 99091d4532SNeel Natu uint64_t exitintinfo; /* (i) events pending at VM exit */ 1005fcf252fSNeel Natu int nmi_pending; /* (i) NMI pending */ 1015fcf252fSNeel Natu int extint_pending; /* (i) INTR pending */ 1025fcf252fSNeel Natu int exception_pending; /* (i) exception pending */ 103c9c75df4SNeel Natu int exc_vector; /* (x) exception collateral */ 104c9c75df4SNeel Natu int exc_errcode_valid; 105c9c75df4SNeel Natu uint32_t exc_errcode; 1065fcf252fSNeel Natu struct savefpu *guestfpu; /* (a,i) guest fpu state */ 1075fcf252fSNeel Natu uint64_t guest_xcr0; /* (i) guest %xcr0 register */ 1085fcf252fSNeel Natu void *stats; /* (a,i) statistics */ 1095fcf252fSNeel Natu struct vm_exit exitinfo; /* (x) exit reason and collateral */ 110d087a399SNeel Natu uint64_t nextrip; /* (x) next instruction to execute */ 111366f6083SPeter Grehan }; 112366f6083SPeter Grehan 1135fcf252fSNeel Natu #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 114f76fc5d4SNeel Natu #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 115f76fc5d4SNeel Natu #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 116f76fc5d4SNeel Natu #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 117318224bbSNeel Natu #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 11875dd3366SNeel Natu 119318224bbSNeel Natu struct mem_seg { 1209b1aa8d6SNeel Natu size_t len; 1219b1aa8d6SNeel Natu bool sysmem; 1229b1aa8d6SNeel Natu struct vm_object *object; 1239b1aa8d6SNeel Natu }; 1246bcf245eSMarcel Moolenaar #define VM_MAX_MEMSEGS 3 1259b1aa8d6SNeel Natu 1269b1aa8d6SNeel Natu struct mem_map { 127318224bbSNeel Natu vm_paddr_t gpa; 128318224bbSNeel Natu size_t len; 1299b1aa8d6SNeel Natu vm_ooffset_t segoff; 1309b1aa8d6SNeel Natu int segid; 1319b1aa8d6SNeel Natu int prot; 1329b1aa8d6SNeel Natu int flags; 133318224bbSNeel Natu }; 1349b1aa8d6SNeel Natu #define VM_MAX_MEMMAPS 4 135366f6083SPeter Grehan 136366f6083SPeter Grehan /* 1375fcf252fSNeel Natu * Initialization: 1385fcf252fSNeel Natu * (o) initialized the first time the VM is created 1395fcf252fSNeel Natu * (i) initialized when VM is created and when it is reinitialized 1405fcf252fSNeel Natu * (x) initialized before use 141366f6083SPeter Grehan */ 1425fcf252fSNeel Natu struct vm { 1435fcf252fSNeel Natu void *cookie; /* (i) cpu-specific data */ 1445fcf252fSNeel Natu void *iommu; /* (x) iommu-specific data */ 1455fcf252fSNeel Natu struct vhpet *vhpet; /* (i) virtual HPET */ 1465fcf252fSNeel Natu struct vioapic *vioapic; /* (i) virtual ioapic */ 1475fcf252fSNeel Natu struct vatpic *vatpic; /* (i) virtual atpic */ 1485fcf252fSNeel Natu struct vatpit *vatpit; /* (i) virtual atpit */ 149160ef77aSNeel Natu struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ 1500dafa5cdSNeel Natu struct vrtc *vrtc; /* (o) virtual RTC */ 1515fcf252fSNeel Natu volatile cpuset_t active_cpus; /* (i) active vcpus */ 1525fcf252fSNeel Natu int suspend; /* (i) stop VM execution */ 1535fcf252fSNeel Natu volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 1545fcf252fSNeel Natu volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 1555fcf252fSNeel Natu cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ 1565fcf252fSNeel Natu cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ 1575fcf252fSNeel Natu void *rendezvous_arg; /* (x) rendezvous func/arg */ 1585b8a8cd1SNeel Natu vm_rendezvous_func_t rendezvous_func; 1595fcf252fSNeel Natu struct mtx rendezvous_mtx; /* (o) rendezvous lock */ 1609b1aa8d6SNeel Natu struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 1619b1aa8d6SNeel Natu struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 1625fcf252fSNeel Natu struct vmspace *vmspace; /* (o) guest's address space */ 1635fcf252fSNeel Natu char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 1645fcf252fSNeel Natu struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ 165366f6083SPeter Grehan }; 166366f6083SPeter Grehan 167d5408b1dSNeel Natu static int vmm_initialized; 168d5408b1dSNeel Natu 169366f6083SPeter Grehan static struct vmm_ops *ops; 170add611fdSNeel Natu #define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) 171366f6083SPeter Grehan #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 17263e62d39SJohn Baldwin #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 173366f6083SPeter Grehan 174318224bbSNeel Natu #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 175248e6799SNeel Natu #define VMRUN(vmi, vcpu, rip, pmap, evinfo) \ 176248e6799SNeel Natu (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO) 177366f6083SPeter Grehan #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 178318224bbSNeel Natu #define VMSPACE_ALLOC(min, max) \ 179318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 180318224bbSNeel Natu #define VMSPACE_FREE(vmspace) \ 181318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 182366f6083SPeter Grehan #define VMGETREG(vmi, vcpu, num, retval) \ 183366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 184366f6083SPeter Grehan #define VMSETREG(vmi, vcpu, num, val) \ 185366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 186366f6083SPeter Grehan #define VMGETDESC(vmi, vcpu, num, desc) \ 187366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 188366f6083SPeter Grehan #define VMSETDESC(vmi, vcpu, num, desc) \ 189366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 190366f6083SPeter Grehan #define VMGETCAP(vmi, vcpu, num, retval) \ 191366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 192366f6083SPeter Grehan #define VMSETCAP(vmi, vcpu, num, val) \ 193366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 194de5ea6b6SNeel Natu #define VLAPIC_INIT(vmi, vcpu) \ 195de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) 196de5ea6b6SNeel Natu #define VLAPIC_CLEANUP(vmi, vlapic) \ 197de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) 198366f6083SPeter Grehan 199014a52f3SNeel Natu #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 200014a52f3SNeel Natu #define fpu_stop_emulating() clts() 201366f6083SPeter Grehan 202366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm"); 203366f6083SPeter Grehan 204366f6083SPeter Grehan /* statistics */ 20561592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 206366f6083SPeter Grehan 207add611fdSNeel Natu SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 208add611fdSNeel Natu 209055fc2cbSNeel Natu /* 210055fc2cbSNeel Natu * Halt the guest if all vcpus are executing a HLT instruction with 211055fc2cbSNeel Natu * interrupts disabled. 212055fc2cbSNeel Natu */ 213055fc2cbSNeel Natu static int halt_detection_enabled = 1; 214055fc2cbSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 215055fc2cbSNeel Natu &halt_detection_enabled, 0, 216055fc2cbSNeel Natu "Halt VM if all vcpus execute HLT with interrupts disabled"); 217055fc2cbSNeel Natu 218add611fdSNeel Natu static int vmm_ipinum; 219add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 220add611fdSNeel Natu "IPI vector used for vcpu notifications"); 221add611fdSNeel Natu 222b0538143SNeel Natu static int trace_guest_exceptions; 223b0538143SNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, 224b0538143SNeel Natu &trace_guest_exceptions, 0, 225b0538143SNeel Natu "Trap into hypervisor on all guest exceptions and reflect them back"); 226b0538143SNeel Natu 2279b1aa8d6SNeel Natu static void vm_free_memmap(struct vm *vm, int ident); 2289b1aa8d6SNeel Natu static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 229248e6799SNeel Natu static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); 230248e6799SNeel Natu 231248e6799SNeel Natu #ifdef KTR 232248e6799SNeel Natu static const char * 233248e6799SNeel Natu vcpu_state2str(enum vcpu_state state) 234248e6799SNeel Natu { 235248e6799SNeel Natu 236248e6799SNeel Natu switch (state) { 237248e6799SNeel Natu case VCPU_IDLE: 238248e6799SNeel Natu return ("idle"); 239248e6799SNeel Natu case VCPU_FROZEN: 240248e6799SNeel Natu return ("frozen"); 241248e6799SNeel Natu case VCPU_RUNNING: 242248e6799SNeel Natu return ("running"); 243248e6799SNeel Natu case VCPU_SLEEPING: 244248e6799SNeel Natu return ("sleeping"); 245248e6799SNeel Natu default: 246248e6799SNeel Natu return ("unknown"); 247248e6799SNeel Natu } 248248e6799SNeel Natu } 249248e6799SNeel Natu #endif 250248e6799SNeel Natu 251366f6083SPeter Grehan static void 2525fcf252fSNeel Natu vcpu_cleanup(struct vm *vm, int i, bool destroy) 253366f6083SPeter Grehan { 254de5ea6b6SNeel Natu struct vcpu *vcpu = &vm->vcpu[i]; 255de5ea6b6SNeel Natu 256de5ea6b6SNeel Natu VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); 2575fcf252fSNeel Natu if (destroy) { 258366f6083SPeter Grehan vmm_stat_free(vcpu->stats); 25938f1b189SPeter Grehan fpu_save_area_free(vcpu->guestfpu); 260366f6083SPeter Grehan } 2615fcf252fSNeel Natu } 262366f6083SPeter Grehan 263366f6083SPeter Grehan static void 2645fcf252fSNeel Natu vcpu_init(struct vm *vm, int vcpu_id, bool create) 265366f6083SPeter Grehan { 266366f6083SPeter Grehan struct vcpu *vcpu; 267366f6083SPeter Grehan 2685fcf252fSNeel Natu KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU, 2695fcf252fSNeel Natu ("vcpu_init: invalid vcpu %d", vcpu_id)); 2705fcf252fSNeel Natu 271366f6083SPeter Grehan vcpu = &vm->vcpu[vcpu_id]; 272366f6083SPeter Grehan 2735fcf252fSNeel Natu if (create) { 2745fcf252fSNeel Natu KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " 2755fcf252fSNeel Natu "initialized", vcpu_id)); 27675dd3366SNeel Natu vcpu_lock_init(vcpu); 2775fcf252fSNeel Natu vcpu->state = VCPU_IDLE; 27875dd3366SNeel Natu vcpu->hostcpu = NOCPU; 2795fcf252fSNeel Natu vcpu->guestfpu = fpu_save_area_alloc(); 2805fcf252fSNeel Natu vcpu->stats = vmm_stat_alloc(); 2815fcf252fSNeel Natu } 2825fcf252fSNeel Natu 283de5ea6b6SNeel Natu vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); 28452e5c8a2SNeel Natu vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); 285248e6799SNeel Natu vcpu->reqidle = 0; 286091d4532SNeel Natu vcpu->exitintinfo = 0; 2875fcf252fSNeel Natu vcpu->nmi_pending = 0; 2885fcf252fSNeel Natu vcpu->extint_pending = 0; 2895fcf252fSNeel Natu vcpu->exception_pending = 0; 290abb023fbSJohn Baldwin vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 29138f1b189SPeter Grehan fpu_save_area_reset(vcpu->guestfpu); 2925fcf252fSNeel Natu vmm_stat_init(vcpu->stats); 293366f6083SPeter Grehan } 294366f6083SPeter Grehan 295b0538143SNeel Natu int 296b0538143SNeel Natu vcpu_trace_exceptions(struct vm *vm, int vcpuid) 297b0538143SNeel Natu { 298b0538143SNeel Natu 299b0538143SNeel Natu return (trace_guest_exceptions); 300b0538143SNeel Natu } 301b0538143SNeel Natu 30298ed632cSNeel Natu struct vm_exit * 30398ed632cSNeel Natu vm_exitinfo(struct vm *vm, int cpuid) 30498ed632cSNeel Natu { 30598ed632cSNeel Natu struct vcpu *vcpu; 30698ed632cSNeel Natu 30798ed632cSNeel Natu if (cpuid < 0 || cpuid >= VM_MAXCPU) 30898ed632cSNeel Natu panic("vm_exitinfo: invalid cpuid %d", cpuid); 30998ed632cSNeel Natu 31098ed632cSNeel Natu vcpu = &vm->vcpu[cpuid]; 31198ed632cSNeel Natu 31298ed632cSNeel Natu return (&vcpu->exitinfo); 31398ed632cSNeel Natu } 31498ed632cSNeel Natu 31563e62d39SJohn Baldwin static void 31663e62d39SJohn Baldwin vmm_resume(void) 31763e62d39SJohn Baldwin { 31863e62d39SJohn Baldwin VMM_RESUME(); 31963e62d39SJohn Baldwin } 32063e62d39SJohn Baldwin 321366f6083SPeter Grehan static int 322366f6083SPeter Grehan vmm_init(void) 323366f6083SPeter Grehan { 324366f6083SPeter Grehan int error; 325366f6083SPeter Grehan 326b01c2033SNeel Natu vmm_host_state_init(); 327add611fdSNeel Natu 32818a2b08eSNeel Natu vmm_ipinum = lapic_ipi_alloc(&IDTVEC(justreturn)); 32918a2b08eSNeel Natu if (vmm_ipinum < 0) 330add611fdSNeel Natu vmm_ipinum = IPI_AST; 331366f6083SPeter Grehan 332366f6083SPeter Grehan error = vmm_mem_init(); 333366f6083SPeter Grehan if (error) 334366f6083SPeter Grehan return (error); 335366f6083SPeter Grehan 336366f6083SPeter Grehan if (vmm_is_intel()) 337366f6083SPeter Grehan ops = &vmm_ops_intel; 338366f6083SPeter Grehan else if (vmm_is_amd()) 339366f6083SPeter Grehan ops = &vmm_ops_amd; 340366f6083SPeter Grehan else 341366f6083SPeter Grehan return (ENXIO); 342366f6083SPeter Grehan 34363e62d39SJohn Baldwin vmm_resume_p = vmm_resume; 344366f6083SPeter Grehan 345add611fdSNeel Natu return (VMM_INIT(vmm_ipinum)); 346366f6083SPeter Grehan } 347366f6083SPeter Grehan 348366f6083SPeter Grehan static int 349366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg) 350366f6083SPeter Grehan { 351366f6083SPeter Grehan int error; 352366f6083SPeter Grehan 353366f6083SPeter Grehan switch (what) { 354366f6083SPeter Grehan case MOD_LOAD: 355366f6083SPeter Grehan vmmdev_init(); 356366f6083SPeter Grehan error = vmm_init(); 357d5408b1dSNeel Natu if (error == 0) 358d5408b1dSNeel Natu vmm_initialized = 1; 359366f6083SPeter Grehan break; 360366f6083SPeter Grehan case MOD_UNLOAD: 361cdc5b9e7SNeel Natu error = vmmdev_cleanup(); 362cdc5b9e7SNeel Natu if (error == 0) { 36363e62d39SJohn Baldwin vmm_resume_p = NULL; 364366f6083SPeter Grehan iommu_cleanup(); 365add611fdSNeel Natu if (vmm_ipinum != IPI_AST) 36618a2b08eSNeel Natu lapic_ipi_free(vmm_ipinum); 367366f6083SPeter Grehan error = VMM_CLEANUP(); 36881ef6611SPeter Grehan /* 36981ef6611SPeter Grehan * Something bad happened - prevent new 37081ef6611SPeter Grehan * VMs from being created 37181ef6611SPeter Grehan */ 37281ef6611SPeter Grehan if (error) 373d5408b1dSNeel Natu vmm_initialized = 0; 37481ef6611SPeter Grehan } 375366f6083SPeter Grehan break; 376366f6083SPeter Grehan default: 377366f6083SPeter Grehan error = 0; 378366f6083SPeter Grehan break; 379366f6083SPeter Grehan } 380366f6083SPeter Grehan return (error); 381366f6083SPeter Grehan } 382366f6083SPeter Grehan 383366f6083SPeter Grehan static moduledata_t vmm_kmod = { 384366f6083SPeter Grehan "vmm", 385366f6083SPeter Grehan vmm_handler, 386366f6083SPeter Grehan NULL 387366f6083SPeter Grehan }; 388366f6083SPeter Grehan 389366f6083SPeter Grehan /* 390e3f0800bSNeel Natu * vmm initialization has the following dependencies: 391e3f0800bSNeel Natu * 392e3f0800bSNeel Natu * - VT-x initialization requires smp_rendezvous() and therefore must happen 393e3f0800bSNeel Natu * after SMP is fully functional (after SI_SUB_SMP). 394366f6083SPeter Grehan */ 395e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 396366f6083SPeter Grehan MODULE_VERSION(vmm, 1); 397366f6083SPeter Grehan 3985fcf252fSNeel Natu static void 3995fcf252fSNeel Natu vm_init(struct vm *vm, bool create) 4005fcf252fSNeel Natu { 4015fcf252fSNeel Natu int i; 4025fcf252fSNeel Natu 4035fcf252fSNeel Natu vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace)); 4045fcf252fSNeel Natu vm->iommu = NULL; 4055fcf252fSNeel Natu vm->vioapic = vioapic_init(vm); 4065fcf252fSNeel Natu vm->vhpet = vhpet_init(vm); 4075fcf252fSNeel Natu vm->vatpic = vatpic_init(vm); 4085fcf252fSNeel Natu vm->vatpit = vatpit_init(vm); 409160ef77aSNeel Natu vm->vpmtmr = vpmtmr_init(vm); 4100dafa5cdSNeel Natu if (create) 4110dafa5cdSNeel Natu vm->vrtc = vrtc_init(vm); 4125fcf252fSNeel Natu 4135fcf252fSNeel Natu CPU_ZERO(&vm->active_cpus); 4145fcf252fSNeel Natu 4155fcf252fSNeel Natu vm->suspend = 0; 4165fcf252fSNeel Natu CPU_ZERO(&vm->suspended_cpus); 4175fcf252fSNeel Natu 4185fcf252fSNeel Natu for (i = 0; i < VM_MAXCPU; i++) 4195fcf252fSNeel Natu vcpu_init(vm, i, create); 4205fcf252fSNeel Natu } 4215fcf252fSNeel Natu 422d5408b1dSNeel Natu int 423d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm) 424366f6083SPeter Grehan { 425366f6083SPeter Grehan struct vm *vm; 426318224bbSNeel Natu struct vmspace *vmspace; 427366f6083SPeter Grehan 428d5408b1dSNeel Natu /* 429d5408b1dSNeel Natu * If vmm.ko could not be successfully initialized then don't attempt 430d5408b1dSNeel Natu * to create the virtual machine. 431d5408b1dSNeel Natu */ 432d5408b1dSNeel Natu if (!vmm_initialized) 433d5408b1dSNeel Natu return (ENXIO); 434d5408b1dSNeel Natu 435366f6083SPeter Grehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 436d5408b1dSNeel Natu return (EINVAL); 437366f6083SPeter Grehan 438526c8885SPeter Grehan vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS); 439318224bbSNeel Natu if (vmspace == NULL) 440318224bbSNeel Natu return (ENOMEM); 441318224bbSNeel Natu 442366f6083SPeter Grehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 443366f6083SPeter Grehan strcpy(vm->name, name); 44488c4b8d1SNeel Natu vm->vmspace = vmspace; 4455b8a8cd1SNeel Natu mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 446366f6083SPeter Grehan 4475fcf252fSNeel Natu vm_init(vm, true); 448366f6083SPeter Grehan 449d5408b1dSNeel Natu *retvm = vm; 450d5408b1dSNeel Natu return (0); 451366f6083SPeter Grehan } 452366f6083SPeter Grehan 453f7d51510SNeel Natu static void 4545fcf252fSNeel Natu vm_cleanup(struct vm *vm, bool destroy) 455366f6083SPeter Grehan { 4569b1aa8d6SNeel Natu struct mem_map *mm; 457366f6083SPeter Grehan int i; 458366f6083SPeter Grehan 459366f6083SPeter Grehan ppt_unassign_all(vm); 460366f6083SPeter Grehan 461318224bbSNeel Natu if (vm->iommu != NULL) 462318224bbSNeel Natu iommu_destroy_domain(vm->iommu); 463318224bbSNeel Natu 4640dafa5cdSNeel Natu if (destroy) 4650dafa5cdSNeel Natu vrtc_cleanup(vm->vrtc); 4660dafa5cdSNeel Natu else 4670dafa5cdSNeel Natu vrtc_reset(vm->vrtc); 468160ef77aSNeel Natu vpmtmr_cleanup(vm->vpmtmr); 469e883c9bbSTycho Nightingale vatpit_cleanup(vm->vatpit); 47008e3ff32SNeel Natu vhpet_cleanup(vm->vhpet); 471762fd208STycho Nightingale vatpic_cleanup(vm->vatpic); 47208e3ff32SNeel Natu vioapic_cleanup(vm->vioapic); 47308e3ff32SNeel Natu 4745fcf252fSNeel Natu for (i = 0; i < VM_MAXCPU; i++) 4755fcf252fSNeel Natu vcpu_cleanup(vm, i, destroy); 4765fcf252fSNeel Natu 4775fcf252fSNeel Natu VMCLEANUP(vm->cookie); 4785fcf252fSNeel Natu 4799b1aa8d6SNeel Natu /* 4809b1aa8d6SNeel Natu * System memory is removed from the guest address space only when 4819b1aa8d6SNeel Natu * the VM is destroyed. This is because the mapping remains the same 4829b1aa8d6SNeel Natu * across VM reset. 4839b1aa8d6SNeel Natu * 4849b1aa8d6SNeel Natu * Device memory can be relocated by the guest (e.g. using PCI BARs) 4859b1aa8d6SNeel Natu * so those mappings are removed on a VM reset. 4869b1aa8d6SNeel Natu */ 4879b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 4889b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 4899b1aa8d6SNeel Natu if (destroy || !sysmem_mapping(vm, mm)) 4909b1aa8d6SNeel Natu vm_free_memmap(vm, i); 4919b1aa8d6SNeel Natu } 492f7d51510SNeel Natu 4939b1aa8d6SNeel Natu if (destroy) { 4949b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMSEGS; i++) 4959b1aa8d6SNeel Natu vm_free_memseg(vm, i); 496366f6083SPeter Grehan 497318224bbSNeel Natu VMSPACE_FREE(vm->vmspace); 4985fcf252fSNeel Natu vm->vmspace = NULL; 4995fcf252fSNeel Natu } 5005fcf252fSNeel Natu } 501366f6083SPeter Grehan 5025fcf252fSNeel Natu void 5035fcf252fSNeel Natu vm_destroy(struct vm *vm) 5045fcf252fSNeel Natu { 5055fcf252fSNeel Natu vm_cleanup(vm, true); 506366f6083SPeter Grehan free(vm, M_VM); 507366f6083SPeter Grehan } 508366f6083SPeter Grehan 5095fcf252fSNeel Natu int 5105fcf252fSNeel Natu vm_reinit(struct vm *vm) 5115fcf252fSNeel Natu { 5125fcf252fSNeel Natu int error; 5135fcf252fSNeel Natu 5145fcf252fSNeel Natu /* 5155fcf252fSNeel Natu * A virtual machine can be reset only if all vcpus are suspended. 5165fcf252fSNeel Natu */ 5175fcf252fSNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 5185fcf252fSNeel Natu vm_cleanup(vm, false); 5195fcf252fSNeel Natu vm_init(vm, false); 5205fcf252fSNeel Natu error = 0; 5215fcf252fSNeel Natu } else { 5225fcf252fSNeel Natu error = EBUSY; 5235fcf252fSNeel Natu } 5245fcf252fSNeel Natu 5255fcf252fSNeel Natu return (error); 5265fcf252fSNeel Natu } 5275fcf252fSNeel Natu 528366f6083SPeter Grehan const char * 529366f6083SPeter Grehan vm_name(struct vm *vm) 530366f6083SPeter Grehan { 531366f6083SPeter Grehan return (vm->name); 532366f6083SPeter Grehan } 533366f6083SPeter Grehan 534366f6083SPeter Grehan int 535366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 536366f6083SPeter Grehan { 537318224bbSNeel Natu vm_object_t obj; 538366f6083SPeter Grehan 539318224bbSNeel Natu if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 540318224bbSNeel Natu return (ENOMEM); 541318224bbSNeel Natu else 542318224bbSNeel Natu return (0); 543366f6083SPeter Grehan } 544366f6083SPeter Grehan 545366f6083SPeter Grehan int 546366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 547366f6083SPeter Grehan { 548366f6083SPeter Grehan 549318224bbSNeel Natu vmm_mmio_free(vm->vmspace, gpa, len); 550318224bbSNeel Natu return (0); 551366f6083SPeter Grehan } 552366f6083SPeter Grehan 5539b1aa8d6SNeel Natu /* 5549b1aa8d6SNeel Natu * Return 'true' if 'gpa' is allocated in the guest address space. 5559b1aa8d6SNeel Natu * 5569b1aa8d6SNeel Natu * This function is called in the context of a running vcpu which acts as 5579b1aa8d6SNeel Natu * an implicit lock on 'vm->mem_maps[]'. 5589b1aa8d6SNeel Natu */ 5599b1aa8d6SNeel Natu bool 5609b1aa8d6SNeel Natu vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa) 561366f6083SPeter Grehan { 5629b1aa8d6SNeel Natu struct mem_map *mm; 563341f19c9SNeel Natu int i; 564341f19c9SNeel Natu 5659b1aa8d6SNeel Natu #ifdef INVARIANTS 5669b1aa8d6SNeel Natu int hostcpu, state; 5679b1aa8d6SNeel Natu state = vcpu_get_state(vm, vcpuid, &hostcpu); 5689b1aa8d6SNeel Natu KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 5699b1aa8d6SNeel Natu ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 5709b1aa8d6SNeel Natu #endif 5719b1aa8d6SNeel Natu 5729b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 5739b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 5749b1aa8d6SNeel Natu if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 5759b1aa8d6SNeel Natu return (true); /* 'gpa' is sysmem or devmem */ 576341f19c9SNeel Natu } 577341f19c9SNeel Natu 578318224bbSNeel Natu if (ppt_is_mmio(vm, gpa)) 5799b1aa8d6SNeel Natu return (true); /* 'gpa' is pci passthru mmio */ 580318224bbSNeel Natu 5819b1aa8d6SNeel Natu return (false); 582341f19c9SNeel Natu } 583341f19c9SNeel Natu 584341f19c9SNeel Natu int 5859b1aa8d6SNeel Natu vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 586341f19c9SNeel Natu { 587318224bbSNeel Natu struct mem_seg *seg; 5889b1aa8d6SNeel Natu vm_object_t obj; 589366f6083SPeter Grehan 5909b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 591341f19c9SNeel Natu return (EINVAL); 592341f19c9SNeel Natu 5939b1aa8d6SNeel Natu if (len == 0 || (len & PAGE_MASK)) 5949b1aa8d6SNeel Natu return (EINVAL); 595341f19c9SNeel Natu 5969b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 5979b1aa8d6SNeel Natu if (seg->object != NULL) { 5989b1aa8d6SNeel Natu if (seg->len == len && seg->sysmem == sysmem) 5999b1aa8d6SNeel Natu return (EEXIST); 6009b1aa8d6SNeel Natu else 6019b1aa8d6SNeel Natu return (EINVAL); 602341f19c9SNeel Natu } 603341f19c9SNeel Natu 6049b1aa8d6SNeel Natu obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 6059b1aa8d6SNeel Natu if (obj == NULL) 606318224bbSNeel Natu return (ENOMEM); 607318224bbSNeel Natu 608318224bbSNeel Natu seg->len = len; 6099b1aa8d6SNeel Natu seg->object = obj; 6109b1aa8d6SNeel Natu seg->sysmem = sysmem; 611366f6083SPeter Grehan return (0); 612366f6083SPeter Grehan } 613366f6083SPeter Grehan 6149b1aa8d6SNeel Natu int 6159b1aa8d6SNeel Natu vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 6169b1aa8d6SNeel Natu vm_object_t *objptr) 617477867a0SNeel Natu { 6189b1aa8d6SNeel Natu struct mem_seg *seg; 619477867a0SNeel Natu 6209b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 6219b1aa8d6SNeel Natu return (EINVAL); 6229b1aa8d6SNeel Natu 6239b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 6249b1aa8d6SNeel Natu if (len) 6259b1aa8d6SNeel Natu *len = seg->len; 6269b1aa8d6SNeel Natu if (sysmem) 6279b1aa8d6SNeel Natu *sysmem = seg->sysmem; 6289b1aa8d6SNeel Natu if (objptr) 6299b1aa8d6SNeel Natu *objptr = seg->object; 6309b1aa8d6SNeel Natu return (0); 631477867a0SNeel Natu } 6329b1aa8d6SNeel Natu 6339b1aa8d6SNeel Natu void 6349b1aa8d6SNeel Natu vm_free_memseg(struct vm *vm, int ident) 6359b1aa8d6SNeel Natu { 6369b1aa8d6SNeel Natu struct mem_seg *seg; 6379b1aa8d6SNeel Natu 6389b1aa8d6SNeel Natu KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 6399b1aa8d6SNeel Natu ("%s: invalid memseg ident %d", __func__, ident)); 6409b1aa8d6SNeel Natu 6419b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 6429b1aa8d6SNeel Natu if (seg->object != NULL) { 6439b1aa8d6SNeel Natu vm_object_deallocate(seg->object); 6449b1aa8d6SNeel Natu bzero(seg, sizeof(struct mem_seg)); 6459b1aa8d6SNeel Natu } 6469b1aa8d6SNeel Natu } 6479b1aa8d6SNeel Natu 6489b1aa8d6SNeel Natu int 6499b1aa8d6SNeel Natu vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 6509b1aa8d6SNeel Natu size_t len, int prot, int flags) 6519b1aa8d6SNeel Natu { 6529b1aa8d6SNeel Natu struct mem_seg *seg; 6539b1aa8d6SNeel Natu struct mem_map *m, *map; 6549b1aa8d6SNeel Natu vm_ooffset_t last; 6559b1aa8d6SNeel Natu int i, error; 6569b1aa8d6SNeel Natu 6579b1aa8d6SNeel Natu if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 6589b1aa8d6SNeel Natu return (EINVAL); 6599b1aa8d6SNeel Natu 6609b1aa8d6SNeel Natu if (flags & ~VM_MEMMAP_F_WIRED) 6619b1aa8d6SNeel Natu return (EINVAL); 6629b1aa8d6SNeel Natu 6639b1aa8d6SNeel Natu if (segid < 0 || segid >= VM_MAX_MEMSEGS) 6649b1aa8d6SNeel Natu return (EINVAL); 6659b1aa8d6SNeel Natu 6669b1aa8d6SNeel Natu seg = &vm->mem_segs[segid]; 6679b1aa8d6SNeel Natu if (seg->object == NULL) 6689b1aa8d6SNeel Natu return (EINVAL); 6699b1aa8d6SNeel Natu 6709b1aa8d6SNeel Natu last = first + len; 6719b1aa8d6SNeel Natu if (first < 0 || first >= last || last > seg->len) 6729b1aa8d6SNeel Natu return (EINVAL); 6739b1aa8d6SNeel Natu 6749b1aa8d6SNeel Natu if ((gpa | first | last) & PAGE_MASK) 6759b1aa8d6SNeel Natu return (EINVAL); 6769b1aa8d6SNeel Natu 6779b1aa8d6SNeel Natu map = NULL; 6789b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 6799b1aa8d6SNeel Natu m = &vm->mem_maps[i]; 6809b1aa8d6SNeel Natu if (m->len == 0) { 6819b1aa8d6SNeel Natu map = m; 6829b1aa8d6SNeel Natu break; 6839b1aa8d6SNeel Natu } 6849b1aa8d6SNeel Natu } 6859b1aa8d6SNeel Natu 6869b1aa8d6SNeel Natu if (map == NULL) 6879b1aa8d6SNeel Natu return (ENOSPC); 6889b1aa8d6SNeel Natu 6899b1aa8d6SNeel Natu error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 6909b1aa8d6SNeel Natu len, 0, VMFS_NO_SPACE, prot, prot, 0); 6919b1aa8d6SNeel Natu if (error != KERN_SUCCESS) 6929b1aa8d6SNeel Natu return (EFAULT); 6939b1aa8d6SNeel Natu 6949b1aa8d6SNeel Natu vm_object_reference(seg->object); 6959b1aa8d6SNeel Natu 6969b1aa8d6SNeel Natu if (flags & VM_MEMMAP_F_WIRED) { 6979b1aa8d6SNeel Natu error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 6989b1aa8d6SNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 6999b1aa8d6SNeel Natu if (error != KERN_SUCCESS) { 7009b1aa8d6SNeel Natu vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 7019b1aa8d6SNeel Natu return (EFAULT); 7029b1aa8d6SNeel Natu } 7039b1aa8d6SNeel Natu } 7049b1aa8d6SNeel Natu 7059b1aa8d6SNeel Natu map->gpa = gpa; 7069b1aa8d6SNeel Natu map->len = len; 7079b1aa8d6SNeel Natu map->segoff = first; 7089b1aa8d6SNeel Natu map->segid = segid; 7099b1aa8d6SNeel Natu map->prot = prot; 7109b1aa8d6SNeel Natu map->flags = flags; 7119b1aa8d6SNeel Natu return (0); 7129b1aa8d6SNeel Natu } 7139b1aa8d6SNeel Natu 7149b1aa8d6SNeel Natu int 7159b1aa8d6SNeel Natu vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 7169b1aa8d6SNeel Natu vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 7179b1aa8d6SNeel Natu { 7189b1aa8d6SNeel Natu struct mem_map *mm, *mmnext; 7199b1aa8d6SNeel Natu int i; 7209b1aa8d6SNeel Natu 7219b1aa8d6SNeel Natu mmnext = NULL; 7229b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 7239b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 7249b1aa8d6SNeel Natu if (mm->len == 0 || mm->gpa < *gpa) 7259b1aa8d6SNeel Natu continue; 7269b1aa8d6SNeel Natu if (mmnext == NULL || mm->gpa < mmnext->gpa) 7279b1aa8d6SNeel Natu mmnext = mm; 7289b1aa8d6SNeel Natu } 7299b1aa8d6SNeel Natu 7309b1aa8d6SNeel Natu if (mmnext != NULL) { 7319b1aa8d6SNeel Natu *gpa = mmnext->gpa; 7329b1aa8d6SNeel Natu if (segid) 7339b1aa8d6SNeel Natu *segid = mmnext->segid; 7349b1aa8d6SNeel Natu if (segoff) 7359b1aa8d6SNeel Natu *segoff = mmnext->segoff; 7369b1aa8d6SNeel Natu if (len) 7379b1aa8d6SNeel Natu *len = mmnext->len; 7389b1aa8d6SNeel Natu if (prot) 7399b1aa8d6SNeel Natu *prot = mmnext->prot; 7409b1aa8d6SNeel Natu if (flags) 7419b1aa8d6SNeel Natu *flags = mmnext->flags; 7429b1aa8d6SNeel Natu return (0); 7439b1aa8d6SNeel Natu } else { 7449b1aa8d6SNeel Natu return (ENOENT); 7459b1aa8d6SNeel Natu } 746477867a0SNeel Natu } 747477867a0SNeel Natu 748318224bbSNeel Natu static void 7499b1aa8d6SNeel Natu vm_free_memmap(struct vm *vm, int ident) 750366f6083SPeter Grehan { 7519b1aa8d6SNeel Natu struct mem_map *mm; 7529b1aa8d6SNeel Natu int error; 7534db4fb2cSNeel Natu 7549b1aa8d6SNeel Natu mm = &vm->mem_maps[ident]; 7559b1aa8d6SNeel Natu if (mm->len) { 7569b1aa8d6SNeel Natu error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 7579b1aa8d6SNeel Natu mm->gpa + mm->len); 7589b1aa8d6SNeel Natu KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 7599b1aa8d6SNeel Natu __func__, error)); 7609b1aa8d6SNeel Natu bzero(mm, sizeof(struct mem_map)); 761318224bbSNeel Natu } 762318224bbSNeel Natu } 763318224bbSNeel Natu 7649b1aa8d6SNeel Natu static __inline bool 7659b1aa8d6SNeel Natu sysmem_mapping(struct vm *vm, struct mem_map *mm) 766318224bbSNeel Natu { 767318224bbSNeel Natu 7689b1aa8d6SNeel Natu if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 7699b1aa8d6SNeel Natu return (true); 7709b1aa8d6SNeel Natu else 7719b1aa8d6SNeel Natu return (false); 772318224bbSNeel Natu } 773318224bbSNeel Natu 7749b1aa8d6SNeel Natu static vm_paddr_t 7759b1aa8d6SNeel Natu sysmem_maxaddr(struct vm *vm) 7769b1aa8d6SNeel Natu { 7779b1aa8d6SNeel Natu struct mem_map *mm; 7789b1aa8d6SNeel Natu vm_paddr_t maxaddr; 7799b1aa8d6SNeel Natu int i; 780318224bbSNeel Natu 7819b1aa8d6SNeel Natu maxaddr = 0; 7829b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 7839b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 7849b1aa8d6SNeel Natu if (sysmem_mapping(vm, mm)) { 7859b1aa8d6SNeel Natu if (maxaddr < mm->gpa + mm->len) 7869b1aa8d6SNeel Natu maxaddr = mm->gpa + mm->len; 7879b1aa8d6SNeel Natu } 7889b1aa8d6SNeel Natu } 7899b1aa8d6SNeel Natu return (maxaddr); 790318224bbSNeel Natu } 791318224bbSNeel Natu 792318224bbSNeel Natu static void 793318224bbSNeel Natu vm_iommu_modify(struct vm *vm, boolean_t map) 794318224bbSNeel Natu { 795318224bbSNeel Natu int i, sz; 796318224bbSNeel Natu vm_paddr_t gpa, hpa; 7979b1aa8d6SNeel Natu struct mem_map *mm; 798318224bbSNeel Natu void *vp, *cookie, *host_domain; 799318224bbSNeel Natu 800318224bbSNeel Natu sz = PAGE_SIZE; 801318224bbSNeel Natu host_domain = iommu_host_domain(); 802318224bbSNeel Natu 8039b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 8049b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 8059b1aa8d6SNeel Natu if (!sysmem_mapping(vm, mm)) 8069b1aa8d6SNeel Natu continue; 807318224bbSNeel Natu 8089b1aa8d6SNeel Natu if (map) { 8099b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, 8109b1aa8d6SNeel Natu ("iommu map found invalid memmap %#lx/%#lx/%#x", 8119b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 8129b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) 8139b1aa8d6SNeel Natu continue; 8149b1aa8d6SNeel Natu mm->flags |= VM_MEMMAP_F_IOMMU; 8159b1aa8d6SNeel Natu } else { 8169b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0) 8179b1aa8d6SNeel Natu continue; 8189b1aa8d6SNeel Natu mm->flags &= ~VM_MEMMAP_F_IOMMU; 8199b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, 8209b1aa8d6SNeel Natu ("iommu unmap found invalid memmap %#lx/%#lx/%#x", 8219b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 8229b1aa8d6SNeel Natu } 8239b1aa8d6SNeel Natu 8249b1aa8d6SNeel Natu gpa = mm->gpa; 8259b1aa8d6SNeel Natu while (gpa < mm->gpa + mm->len) { 8269b1aa8d6SNeel Natu vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE, 827318224bbSNeel Natu &cookie); 828318224bbSNeel Natu KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 829318224bbSNeel Natu vm_name(vm), gpa)); 830318224bbSNeel Natu 831318224bbSNeel Natu vm_gpa_release(cookie); 832318224bbSNeel Natu 833318224bbSNeel Natu hpa = DMAP_TO_PHYS((uintptr_t)vp); 834318224bbSNeel Natu if (map) { 835318224bbSNeel Natu iommu_create_mapping(vm->iommu, gpa, hpa, sz); 836318224bbSNeel Natu iommu_remove_mapping(host_domain, hpa, sz); 837318224bbSNeel Natu } else { 838318224bbSNeel Natu iommu_remove_mapping(vm->iommu, gpa, sz); 839318224bbSNeel Natu iommu_create_mapping(host_domain, hpa, hpa, sz); 840318224bbSNeel Natu } 841318224bbSNeel Natu 842318224bbSNeel Natu gpa += PAGE_SIZE; 843318224bbSNeel Natu } 844318224bbSNeel Natu } 845318224bbSNeel Natu 846318224bbSNeel Natu /* 847318224bbSNeel Natu * Invalidate the cached translations associated with the domain 848318224bbSNeel Natu * from which pages were removed. 849318224bbSNeel Natu */ 850318224bbSNeel Natu if (map) 851318224bbSNeel Natu iommu_invalidate_tlb(host_domain); 852318224bbSNeel Natu else 853318224bbSNeel Natu iommu_invalidate_tlb(vm->iommu); 854318224bbSNeel Natu } 855318224bbSNeel Natu 856318224bbSNeel Natu #define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 857318224bbSNeel Natu #define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 858318224bbSNeel Natu 859318224bbSNeel Natu int 860318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 861318224bbSNeel Natu { 862318224bbSNeel Natu int error; 863318224bbSNeel Natu 864318224bbSNeel Natu error = ppt_unassign_device(vm, bus, slot, func); 865318224bbSNeel Natu if (error) 866318224bbSNeel Natu return (error); 867318224bbSNeel Natu 8689b1aa8d6SNeel Natu if (ppt_assigned_devices(vm) == 0) 869318224bbSNeel Natu vm_iommu_unmap(vm); 8709b1aa8d6SNeel Natu 871318224bbSNeel Natu return (0); 872318224bbSNeel Natu } 873318224bbSNeel Natu 874318224bbSNeel Natu int 875318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 876318224bbSNeel Natu { 877318224bbSNeel Natu int error; 878318224bbSNeel Natu vm_paddr_t maxaddr; 879318224bbSNeel Natu 8809b1aa8d6SNeel Natu /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */ 88151f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 882318224bbSNeel Natu KASSERT(vm->iommu == NULL, 883318224bbSNeel Natu ("vm_assign_pptdev: iommu must be NULL")); 8849b1aa8d6SNeel Natu maxaddr = sysmem_maxaddr(vm); 885318224bbSNeel Natu vm->iommu = iommu_create_domain(maxaddr); 886*ffe1b10dSJohn Baldwin if (vm->iommu == NULL) 887*ffe1b10dSJohn Baldwin return (ENXIO); 888318224bbSNeel Natu vm_iommu_map(vm); 889318224bbSNeel Natu } 890318224bbSNeel Natu 891318224bbSNeel Natu error = ppt_assign_device(vm, bus, slot, func); 892318224bbSNeel Natu return (error); 893318224bbSNeel Natu } 894318224bbSNeel Natu 895318224bbSNeel Natu void * 8969b1aa8d6SNeel Natu vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot, 897318224bbSNeel Natu void **cookie) 898318224bbSNeel Natu { 8999b1aa8d6SNeel Natu int i, count, pageoff; 9009b1aa8d6SNeel Natu struct mem_map *mm; 901318224bbSNeel Natu vm_page_t m; 9029b1aa8d6SNeel Natu #ifdef INVARIANTS 9039b1aa8d6SNeel Natu /* 9049b1aa8d6SNeel Natu * All vcpus are frozen by ioctls that modify the memory map 9059b1aa8d6SNeel Natu * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is 9069b1aa8d6SNeel Natu * guaranteed if at least one vcpu is in the VCPU_FROZEN state. 9079b1aa8d6SNeel Natu */ 9089b1aa8d6SNeel Natu int state; 9092de70600SJohn Baldwin KASSERT(vcpuid >= -1 && vcpuid < VM_MAXCPU, ("%s: invalid vcpuid %d", 9109b1aa8d6SNeel Natu __func__, vcpuid)); 9119b1aa8d6SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 9129b1aa8d6SNeel Natu if (vcpuid != -1 && vcpuid != i) 9139b1aa8d6SNeel Natu continue; 9149b1aa8d6SNeel Natu state = vcpu_get_state(vm, i, NULL); 9159b1aa8d6SNeel Natu KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 9169b1aa8d6SNeel Natu __func__, state)); 9179b1aa8d6SNeel Natu } 9189b1aa8d6SNeel Natu #endif 919318224bbSNeel Natu pageoff = gpa & PAGE_MASK; 920318224bbSNeel Natu if (len > PAGE_SIZE - pageoff) 921318224bbSNeel Natu panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 922318224bbSNeel Natu 9239b1aa8d6SNeel Natu count = 0; 9249b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 9259b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 9269b1aa8d6SNeel Natu if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 9279b1aa8d6SNeel Natu gpa < mm->gpa + mm->len) { 928318224bbSNeel Natu count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 929318224bbSNeel Natu trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 9309b1aa8d6SNeel Natu break; 9319b1aa8d6SNeel Natu } 9329b1aa8d6SNeel Natu } 933318224bbSNeel Natu 934318224bbSNeel Natu if (count == 1) { 935318224bbSNeel Natu *cookie = m; 936318224bbSNeel Natu return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 937318224bbSNeel Natu } else { 938318224bbSNeel Natu *cookie = NULL; 939318224bbSNeel Natu return (NULL); 940318224bbSNeel Natu } 941318224bbSNeel Natu } 942318224bbSNeel Natu 943318224bbSNeel Natu void 944318224bbSNeel Natu vm_gpa_release(void *cookie) 945318224bbSNeel Natu { 946318224bbSNeel Natu vm_page_t m = cookie; 947318224bbSNeel Natu 948318224bbSNeel Natu vm_page_lock(m); 949318224bbSNeel Natu vm_page_unhold(m); 950318224bbSNeel Natu vm_page_unlock(m); 951366f6083SPeter Grehan } 952366f6083SPeter Grehan 953366f6083SPeter Grehan int 954366f6083SPeter Grehan vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 955366f6083SPeter Grehan { 956366f6083SPeter Grehan 957366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 958366f6083SPeter Grehan return (EINVAL); 959366f6083SPeter Grehan 960366f6083SPeter Grehan if (reg >= VM_REG_LAST) 961366f6083SPeter Grehan return (EINVAL); 962366f6083SPeter Grehan 963366f6083SPeter Grehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 964366f6083SPeter Grehan } 965366f6083SPeter Grehan 966366f6083SPeter Grehan int 967d087a399SNeel Natu vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) 968366f6083SPeter Grehan { 969d087a399SNeel Natu struct vcpu *vcpu; 970d087a399SNeel Natu int error; 971366f6083SPeter Grehan 972d087a399SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 973366f6083SPeter Grehan return (EINVAL); 974366f6083SPeter Grehan 975366f6083SPeter Grehan if (reg >= VM_REG_LAST) 976366f6083SPeter Grehan return (EINVAL); 977366f6083SPeter Grehan 978d087a399SNeel Natu error = VMSETREG(vm->cookie, vcpuid, reg, val); 979d087a399SNeel Natu if (error || reg != VM_REG_GUEST_RIP) 980d087a399SNeel Natu return (error); 981d087a399SNeel Natu 982d087a399SNeel Natu /* Set 'nextrip' to match the value of %rip */ 983d087a399SNeel Natu VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); 984d087a399SNeel Natu vcpu = &vm->vcpu[vcpuid]; 985d087a399SNeel Natu vcpu->nextrip = val; 986d087a399SNeel Natu return (0); 987366f6083SPeter Grehan } 988366f6083SPeter Grehan 989366f6083SPeter Grehan static boolean_t 990366f6083SPeter Grehan is_descriptor_table(int reg) 991366f6083SPeter Grehan { 992366f6083SPeter Grehan 993366f6083SPeter Grehan switch (reg) { 994366f6083SPeter Grehan case VM_REG_GUEST_IDTR: 995366f6083SPeter Grehan case VM_REG_GUEST_GDTR: 996366f6083SPeter Grehan return (TRUE); 997366f6083SPeter Grehan default: 998366f6083SPeter Grehan return (FALSE); 999366f6083SPeter Grehan } 1000366f6083SPeter Grehan } 1001366f6083SPeter Grehan 1002366f6083SPeter Grehan static boolean_t 1003366f6083SPeter Grehan is_segment_register(int reg) 1004366f6083SPeter Grehan { 1005366f6083SPeter Grehan 1006366f6083SPeter Grehan switch (reg) { 1007366f6083SPeter Grehan case VM_REG_GUEST_ES: 1008366f6083SPeter Grehan case VM_REG_GUEST_CS: 1009366f6083SPeter Grehan case VM_REG_GUEST_SS: 1010366f6083SPeter Grehan case VM_REG_GUEST_DS: 1011366f6083SPeter Grehan case VM_REG_GUEST_FS: 1012366f6083SPeter Grehan case VM_REG_GUEST_GS: 1013366f6083SPeter Grehan case VM_REG_GUEST_TR: 1014366f6083SPeter Grehan case VM_REG_GUEST_LDTR: 1015366f6083SPeter Grehan return (TRUE); 1016366f6083SPeter Grehan default: 1017366f6083SPeter Grehan return (FALSE); 1018366f6083SPeter Grehan } 1019366f6083SPeter Grehan } 1020366f6083SPeter Grehan 1021366f6083SPeter Grehan int 1022366f6083SPeter Grehan vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 1023366f6083SPeter Grehan struct seg_desc *desc) 1024366f6083SPeter Grehan { 1025366f6083SPeter Grehan 1026366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1027366f6083SPeter Grehan return (EINVAL); 1028366f6083SPeter Grehan 1029366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1030366f6083SPeter Grehan return (EINVAL); 1031366f6083SPeter Grehan 1032366f6083SPeter Grehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 1033366f6083SPeter Grehan } 1034366f6083SPeter Grehan 1035366f6083SPeter Grehan int 1036366f6083SPeter Grehan vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 1037366f6083SPeter Grehan struct seg_desc *desc) 1038366f6083SPeter Grehan { 1039366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1040366f6083SPeter Grehan return (EINVAL); 1041366f6083SPeter Grehan 1042366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1043366f6083SPeter Grehan return (EINVAL); 1044366f6083SPeter Grehan 1045366f6083SPeter Grehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 1046366f6083SPeter Grehan } 1047366f6083SPeter Grehan 1048366f6083SPeter Grehan static void 1049366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu) 1050366f6083SPeter Grehan { 1051366f6083SPeter Grehan 105238f1b189SPeter Grehan /* flush host state to the pcb */ 105338f1b189SPeter Grehan fpuexit(curthread); 1054bd8572e0SNeel Natu 1055bd8572e0SNeel Natu /* restore guest FPU state */ 1056366f6083SPeter Grehan fpu_stop_emulating(); 105738f1b189SPeter Grehan fpurestore(vcpu->guestfpu); 1058bd8572e0SNeel Natu 1059abb023fbSJohn Baldwin /* restore guest XCR0 if XSAVE is enabled in the host */ 1060abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) 1061abb023fbSJohn Baldwin load_xcr(0, vcpu->guest_xcr0); 1062abb023fbSJohn Baldwin 1063bd8572e0SNeel Natu /* 1064bd8572e0SNeel Natu * The FPU is now "dirty" with the guest's state so turn on emulation 1065bd8572e0SNeel Natu * to trap any access to the FPU by the host. 1066bd8572e0SNeel Natu */ 1067bd8572e0SNeel Natu fpu_start_emulating(); 1068366f6083SPeter Grehan } 1069366f6083SPeter Grehan 1070366f6083SPeter Grehan static void 1071366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu) 1072366f6083SPeter Grehan { 1073366f6083SPeter Grehan 1074bd8572e0SNeel Natu if ((rcr0() & CR0_TS) == 0) 1075bd8572e0SNeel Natu panic("fpu emulation not enabled in host!"); 1076bd8572e0SNeel Natu 1077abb023fbSJohn Baldwin /* save guest XCR0 and restore host XCR0 */ 1078abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) { 1079abb023fbSJohn Baldwin vcpu->guest_xcr0 = rxcr(0); 1080abb023fbSJohn Baldwin load_xcr(0, vmm_get_host_xcr0()); 1081abb023fbSJohn Baldwin } 1082abb023fbSJohn Baldwin 1083bd8572e0SNeel Natu /* save guest FPU state */ 1084bd8572e0SNeel Natu fpu_stop_emulating(); 108538f1b189SPeter Grehan fpusave(vcpu->guestfpu); 1086366f6083SPeter Grehan fpu_start_emulating(); 1087366f6083SPeter Grehan } 1088366f6083SPeter Grehan 108961592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 1090f76fc5d4SNeel Natu 1091318224bbSNeel Natu static int 1092248e6799SNeel Natu vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, 1093f80330a8SNeel Natu bool from_idle) 1094366f6083SPeter Grehan { 1095248e6799SNeel Natu struct vcpu *vcpu; 1096318224bbSNeel Natu int error; 1097366f6083SPeter Grehan 1098248e6799SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1099318224bbSNeel Natu vcpu_assert_locked(vcpu); 1100366f6083SPeter Grehan 1101f76fc5d4SNeel Natu /* 1102f80330a8SNeel Natu * State transitions from the vmmdev_ioctl() must always begin from 1103f80330a8SNeel Natu * the VCPU_IDLE state. This guarantees that there is only a single 1104f80330a8SNeel Natu * ioctl() operating on a vcpu at any point. 1105f80330a8SNeel Natu */ 1106f80330a8SNeel Natu if (from_idle) { 1107248e6799SNeel Natu while (vcpu->state != VCPU_IDLE) { 1108248e6799SNeel Natu vcpu->reqidle = 1; 1109248e6799SNeel Natu vcpu_notify_event_locked(vcpu, false); 1110248e6799SNeel Natu VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to " 1111248e6799SNeel Natu "idle requested", vcpu_state2str(vcpu->state)); 1112f80330a8SNeel Natu msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1113248e6799SNeel Natu } 1114f80330a8SNeel Natu } else { 1115f80330a8SNeel Natu KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1116f80330a8SNeel Natu "vcpu idle state")); 1117f80330a8SNeel Natu } 1118f80330a8SNeel Natu 1119ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 1120ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1121ef39d7e9SNeel Natu "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1122ef39d7e9SNeel Natu } else { 1123ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1124ef39d7e9SNeel Natu "vcpu that is not running", vcpu->hostcpu)); 1125ef39d7e9SNeel Natu } 1126ef39d7e9SNeel Natu 1127f80330a8SNeel Natu /* 1128318224bbSNeel Natu * The following state transitions are allowed: 1129318224bbSNeel Natu * IDLE -> FROZEN -> IDLE 1130318224bbSNeel Natu * FROZEN -> RUNNING -> FROZEN 1131318224bbSNeel Natu * FROZEN -> SLEEPING -> FROZEN 1132f76fc5d4SNeel Natu */ 1133318224bbSNeel Natu switch (vcpu->state) { 1134318224bbSNeel Natu case VCPU_IDLE: 1135318224bbSNeel Natu case VCPU_RUNNING: 1136318224bbSNeel Natu case VCPU_SLEEPING: 1137318224bbSNeel Natu error = (newstate != VCPU_FROZEN); 1138318224bbSNeel Natu break; 1139318224bbSNeel Natu case VCPU_FROZEN: 1140318224bbSNeel Natu error = (newstate == VCPU_FROZEN); 1141318224bbSNeel Natu break; 1142318224bbSNeel Natu default: 1143318224bbSNeel Natu error = 1; 1144318224bbSNeel Natu break; 1145318224bbSNeel Natu } 1146318224bbSNeel Natu 1147f80330a8SNeel Natu if (error) 1148f80330a8SNeel Natu return (EBUSY); 1149318224bbSNeel Natu 1150248e6799SNeel Natu VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s", 1151248e6799SNeel Natu vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); 1152248e6799SNeel Natu 1153f80330a8SNeel Natu vcpu->state = newstate; 1154ef39d7e9SNeel Natu if (newstate == VCPU_RUNNING) 1155ef39d7e9SNeel Natu vcpu->hostcpu = curcpu; 1156ef39d7e9SNeel Natu else 1157ef39d7e9SNeel Natu vcpu->hostcpu = NOCPU; 1158ef39d7e9SNeel Natu 1159f80330a8SNeel Natu if (newstate == VCPU_IDLE) 1160f80330a8SNeel Natu wakeup(&vcpu->state); 1161f80330a8SNeel Natu 1162f80330a8SNeel Natu return (0); 1163318224bbSNeel Natu } 1164318224bbSNeel Natu 1165318224bbSNeel Natu static void 1166318224bbSNeel Natu vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1167318224bbSNeel Natu { 1168318224bbSNeel Natu int error; 1169318224bbSNeel Natu 1170f80330a8SNeel Natu if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) 1171318224bbSNeel Natu panic("Error %d setting state to %d\n", error, newstate); 1172318224bbSNeel Natu } 1173318224bbSNeel Natu 1174318224bbSNeel Natu static void 1175248e6799SNeel Natu vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1176318224bbSNeel Natu { 1177318224bbSNeel Natu int error; 1178318224bbSNeel Natu 1179248e6799SNeel Natu if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0) 1180318224bbSNeel Natu panic("Error %d setting state to %d", error, newstate); 1181318224bbSNeel Natu } 1182318224bbSNeel Natu 11835b8a8cd1SNeel Natu static void 11845b8a8cd1SNeel Natu vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) 11855b8a8cd1SNeel Natu { 11865b8a8cd1SNeel Natu 11875b8a8cd1SNeel Natu KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); 11885b8a8cd1SNeel Natu 11895b8a8cd1SNeel Natu /* 11905b8a8cd1SNeel Natu * Update 'rendezvous_func' and execute a write memory barrier to 11915b8a8cd1SNeel Natu * ensure that it is visible across all host cpus. This is not needed 11925b8a8cd1SNeel Natu * for correctness but it does ensure that all the vcpus will notice 11935b8a8cd1SNeel Natu * that the rendezvous is requested immediately. 11945b8a8cd1SNeel Natu */ 11955b8a8cd1SNeel Natu vm->rendezvous_func = func; 11965b8a8cd1SNeel Natu wmb(); 11975b8a8cd1SNeel Natu } 11985b8a8cd1SNeel Natu 11995b8a8cd1SNeel Natu #define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ 12005b8a8cd1SNeel Natu do { \ 12015b8a8cd1SNeel Natu if (vcpuid >= 0) \ 12025b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, fmt); \ 12035b8a8cd1SNeel Natu else \ 12045b8a8cd1SNeel Natu VM_CTR0(vm, fmt); \ 12055b8a8cd1SNeel Natu } while (0) 12065b8a8cd1SNeel Natu 12075b8a8cd1SNeel Natu static void 12085b8a8cd1SNeel Natu vm_handle_rendezvous(struct vm *vm, int vcpuid) 12095b8a8cd1SNeel Natu { 12105b8a8cd1SNeel Natu 12115b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 12125b8a8cd1SNeel Natu ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); 12135b8a8cd1SNeel Natu 12145b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 12155b8a8cd1SNeel Natu while (vm->rendezvous_func != NULL) { 121622d822c6SNeel Natu /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 121722d822c6SNeel Natu CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); 121822d822c6SNeel Natu 12195b8a8cd1SNeel Natu if (vcpuid != -1 && 122022d822c6SNeel Natu CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 122122d822c6SNeel Natu !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 12225b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); 12235b8a8cd1SNeel Natu (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); 12245b8a8cd1SNeel Natu CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 12255b8a8cd1SNeel Natu } 12265b8a8cd1SNeel Natu if (CPU_CMP(&vm->rendezvous_req_cpus, 12275b8a8cd1SNeel Natu &vm->rendezvous_done_cpus) == 0) { 12285b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); 12295b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, NULL); 12305b8a8cd1SNeel Natu wakeup(&vm->rendezvous_func); 12315b8a8cd1SNeel Natu break; 12325b8a8cd1SNeel Natu } 12335b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); 12345b8a8cd1SNeel Natu mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 12355b8a8cd1SNeel Natu "vmrndv", 0); 12365b8a8cd1SNeel Natu } 12375b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 12385b8a8cd1SNeel Natu } 12395b8a8cd1SNeel Natu 1240318224bbSNeel Natu /* 1241318224bbSNeel Natu * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1242318224bbSNeel Natu */ 1243318224bbSNeel Natu static int 1244becd9849SNeel Natu vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) 1245318224bbSNeel Natu { 1246318224bbSNeel Natu struct vcpu *vcpu; 1247c6a0cc2eSNeel Natu const char *wmesg; 12482ce12423SNeel Natu int t, vcpu_halted, vm_halted; 1249e50ce2aaSNeel Natu 1250e50ce2aaSNeel Natu KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 1251318224bbSNeel Natu 1252318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1253e50ce2aaSNeel Natu vcpu_halted = 0; 1254e50ce2aaSNeel Natu vm_halted = 0; 1255318224bbSNeel Natu 1256f76fc5d4SNeel Natu vcpu_lock(vcpu); 1257c6a0cc2eSNeel Natu while (1) { 1258f76fc5d4SNeel Natu /* 1259f76fc5d4SNeel Natu * Do a final check for pending NMI or interrupts before 1260c6a0cc2eSNeel Natu * really putting this thread to sleep. Also check for 1261c6a0cc2eSNeel Natu * software events that would cause this vcpu to wakeup. 1262f76fc5d4SNeel Natu * 1263c6a0cc2eSNeel Natu * These interrupts/events could have happened after the 1264c6a0cc2eSNeel Natu * vcpu returned from VMRUN() and before it acquired the 1265c6a0cc2eSNeel Natu * vcpu lock above. 1266f76fc5d4SNeel Natu */ 1267248e6799SNeel Natu if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) 1268c6a0cc2eSNeel Natu break; 1269c6a0cc2eSNeel Natu if (vm_nmi_pending(vm, vcpuid)) 1270c6a0cc2eSNeel Natu break; 1271c6a0cc2eSNeel Natu if (!intr_disabled) { 1272c6a0cc2eSNeel Natu if (vm_extint_pending(vm, vcpuid) || 1273c6a0cc2eSNeel Natu vlapic_pending_intr(vcpu->vlapic, NULL)) { 1274c6a0cc2eSNeel Natu break; 1275c6a0cc2eSNeel Natu } 1276c6a0cc2eSNeel Natu } 1277c6a0cc2eSNeel Natu 1278f008d157SNeel Natu /* Don't go to sleep if the vcpu thread needs to yield */ 1279f008d157SNeel Natu if (vcpu_should_yield(vm, vcpuid)) 1280f008d157SNeel Natu break; 1281f008d157SNeel Natu 1282e50ce2aaSNeel Natu /* 1283e50ce2aaSNeel Natu * Some Linux guests implement "halt" by having all vcpus 1284e50ce2aaSNeel Natu * execute HLT with interrupts disabled. 'halted_cpus' keeps 1285e50ce2aaSNeel Natu * track of the vcpus that have entered this state. When all 1286e50ce2aaSNeel Natu * vcpus enter the halted state the virtual machine is halted. 1287e50ce2aaSNeel Natu */ 1288e50ce2aaSNeel Natu if (intr_disabled) { 1289c6a0cc2eSNeel Natu wmesg = "vmhalt"; 1290e50ce2aaSNeel Natu VCPU_CTR0(vm, vcpuid, "Halted"); 1291055fc2cbSNeel Natu if (!vcpu_halted && halt_detection_enabled) { 1292e50ce2aaSNeel Natu vcpu_halted = 1; 1293e50ce2aaSNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1294e50ce2aaSNeel Natu } 1295e50ce2aaSNeel Natu if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1296e50ce2aaSNeel Natu vm_halted = 1; 1297e50ce2aaSNeel Natu break; 1298e50ce2aaSNeel Natu } 1299e50ce2aaSNeel Natu } else { 1300e50ce2aaSNeel Natu wmesg = "vmidle"; 1301e50ce2aaSNeel Natu } 1302c6a0cc2eSNeel Natu 1303f76fc5d4SNeel Natu t = ticks; 1304248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); 1305f008d157SNeel Natu /* 1306f008d157SNeel Natu * XXX msleep_spin() cannot be interrupted by signals so 1307f008d157SNeel Natu * wake up periodically to check pending signals. 1308f008d157SNeel Natu */ 1309f008d157SNeel Natu msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); 1310248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); 1311f76fc5d4SNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 1312f76fc5d4SNeel Natu } 1313e50ce2aaSNeel Natu 1314e50ce2aaSNeel Natu if (vcpu_halted) 1315e50ce2aaSNeel Natu CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1316e50ce2aaSNeel Natu 1317f76fc5d4SNeel Natu vcpu_unlock(vcpu); 1318f76fc5d4SNeel Natu 1319e50ce2aaSNeel Natu if (vm_halted) 1320e50ce2aaSNeel Natu vm_suspend(vm, VM_SUSPEND_HALT); 1321e50ce2aaSNeel Natu 1322318224bbSNeel Natu return (0); 1323318224bbSNeel Natu } 1324318224bbSNeel Natu 1325318224bbSNeel Natu static int 1326becd9849SNeel Natu vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) 1327318224bbSNeel Natu { 1328318224bbSNeel Natu int rv, ftype; 1329318224bbSNeel Natu struct vm_map *map; 1330318224bbSNeel Natu struct vcpu *vcpu; 1331318224bbSNeel Natu struct vm_exit *vme; 1332318224bbSNeel Natu 1333318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1334318224bbSNeel Natu vme = &vcpu->exitinfo; 1335318224bbSNeel Natu 1336d087a399SNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 1337d087a399SNeel Natu __func__, vme->inst_length)); 1338d087a399SNeel Natu 1339318224bbSNeel Natu ftype = vme->u.paging.fault_type; 1340318224bbSNeel Natu KASSERT(ftype == VM_PROT_READ || 1341318224bbSNeel Natu ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1342318224bbSNeel Natu ("vm_handle_paging: invalid fault_type %d", ftype)); 1343318224bbSNeel Natu 1344318224bbSNeel Natu if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1345318224bbSNeel Natu rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1346318224bbSNeel Natu vme->u.paging.gpa, ftype); 13479d8d8e3eSNeel Natu if (rv == 0) { 13489d8d8e3eSNeel Natu VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", 13499d8d8e3eSNeel Natu ftype == VM_PROT_READ ? "accessed" : "dirty", 13509d8d8e3eSNeel Natu vme->u.paging.gpa); 1351318224bbSNeel Natu goto done; 1352318224bbSNeel Natu } 13539d8d8e3eSNeel Natu } 1354318224bbSNeel Natu 1355318224bbSNeel Natu map = &vm->vmspace->vm_map; 1356318224bbSNeel Natu rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 1357318224bbSNeel Natu 1358513c8d33SNeel Natu VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 1359513c8d33SNeel Natu "ftype = %d", rv, vme->u.paging.gpa, ftype); 1360318224bbSNeel Natu 1361318224bbSNeel Natu if (rv != KERN_SUCCESS) 1362318224bbSNeel Natu return (EFAULT); 1363318224bbSNeel Natu done: 1364318224bbSNeel Natu return (0); 1365318224bbSNeel Natu } 1366318224bbSNeel Natu 1367318224bbSNeel Natu static int 1368becd9849SNeel Natu vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) 1369318224bbSNeel Natu { 1370318224bbSNeel Natu struct vie *vie; 1371318224bbSNeel Natu struct vcpu *vcpu; 1372318224bbSNeel Natu struct vm_exit *vme; 1373e4f605eeSTycho Nightingale uint64_t gla, gpa, cs_base; 1374e813a873SNeel Natu struct vm_guest_paging *paging; 1375565bbb86SNeel Natu mem_region_read_t mread; 1376565bbb86SNeel Natu mem_region_write_t mwrite; 1377f7a9f178SNeel Natu enum vm_cpu_mode cpu_mode; 13781c73ea3eSNeel Natu int cs_d, error, fault; 1379318224bbSNeel Natu 1380318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1381318224bbSNeel Natu vme = &vcpu->exitinfo; 1382318224bbSNeel Natu 13831c73ea3eSNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 13841c73ea3eSNeel Natu __func__, vme->inst_length)); 13851c73ea3eSNeel Natu 1386318224bbSNeel Natu gla = vme->u.inst_emul.gla; 1387318224bbSNeel Natu gpa = vme->u.inst_emul.gpa; 1388e4f605eeSTycho Nightingale cs_base = vme->u.inst_emul.cs_base; 1389f7a9f178SNeel Natu cs_d = vme->u.inst_emul.cs_d; 1390318224bbSNeel Natu vie = &vme->u.inst_emul.vie; 1391e813a873SNeel Natu paging = &vme->u.inst_emul.paging; 1392f7a9f178SNeel Natu cpu_mode = paging->cpu_mode; 1393318224bbSNeel Natu 13949d8d8e3eSNeel Natu VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa); 13959d8d8e3eSNeel Natu 1396318224bbSNeel Natu /* Fetch, decode and emulate the faulting instruction */ 1397c2a875f9SNeel Natu if (vie->num_valid == 0) { 1398e4f605eeSTycho Nightingale error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip + 13991c73ea3eSNeel Natu cs_base, VIE_INST_SIZE, vie, &fault); 1400c2a875f9SNeel Natu } else { 1401c2a875f9SNeel Natu /* 1402c2a875f9SNeel Natu * The instruction bytes have already been copied into 'vie' 1403c2a875f9SNeel Natu */ 14049c4d5478SNeel Natu error = fault = 0; 1405c2a875f9SNeel Natu } 14069c4d5478SNeel Natu if (error || fault) 14079c4d5478SNeel Natu return (error); 1408318224bbSNeel Natu 1409c07a0648SNeel Natu if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) { 1410c07a0648SNeel Natu VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", 1411c07a0648SNeel Natu vme->rip + cs_base); 1412c07a0648SNeel Natu *retu = true; /* dump instruction bytes in userspace */ 1413c07a0648SNeel Natu return (0); 1414c07a0648SNeel Natu } 1415318224bbSNeel Natu 1416a0b78f09SPeter Grehan /* 14171c73ea3eSNeel Natu * Update 'nextrip' based on the length of the emulated instruction. 1418a0b78f09SPeter Grehan */ 1419a0b78f09SPeter Grehan vme->inst_length = vie->num_processed; 1420d087a399SNeel Natu vcpu->nextrip += vie->num_processed; 14211c73ea3eSNeel Natu VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction " 14221c73ea3eSNeel Natu "decoding", vcpu->nextrip); 1423a0b78f09SPeter Grehan 142408e3ff32SNeel Natu /* return to userland unless this is an in-kernel emulated device */ 1425565bbb86SNeel Natu if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1426565bbb86SNeel Natu mread = lapic_mmio_read; 1427565bbb86SNeel Natu mwrite = lapic_mmio_write; 1428565bbb86SNeel Natu } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1429565bbb86SNeel Natu mread = vioapic_mmio_read; 1430565bbb86SNeel Natu mwrite = vioapic_mmio_write; 143108e3ff32SNeel Natu } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 143208e3ff32SNeel Natu mread = vhpet_mmio_read; 143308e3ff32SNeel Natu mwrite = vhpet_mmio_write; 1434565bbb86SNeel Natu } else { 1435becd9849SNeel Natu *retu = true; 1436318224bbSNeel Natu return (0); 1437318224bbSNeel Natu } 1438318224bbSNeel Natu 1439d665d229SNeel Natu error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, 1440d665d229SNeel Natu mread, mwrite, retu); 1441318224bbSNeel Natu 1442318224bbSNeel Natu return (error); 1443318224bbSNeel Natu } 1444318224bbSNeel Natu 1445b15a09c0SNeel Natu static int 1446b15a09c0SNeel Natu vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) 1447b15a09c0SNeel Natu { 1448b15a09c0SNeel Natu int i, done; 1449b15a09c0SNeel Natu struct vcpu *vcpu; 1450b15a09c0SNeel Natu 1451b15a09c0SNeel Natu done = 0; 1452b15a09c0SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1453b15a09c0SNeel Natu 1454b15a09c0SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); 1455b15a09c0SNeel Natu 1456b15a09c0SNeel Natu /* 1457b15a09c0SNeel Natu * Wait until all 'active_cpus' have suspended themselves. 1458b15a09c0SNeel Natu * 1459b15a09c0SNeel Natu * Since a VM may be suspended at any time including when one or 1460b15a09c0SNeel Natu * more vcpus are doing a rendezvous we need to call the rendezvous 1461b15a09c0SNeel Natu * handler while we are waiting to prevent a deadlock. 1462b15a09c0SNeel Natu */ 1463b15a09c0SNeel Natu vcpu_lock(vcpu); 1464b15a09c0SNeel Natu while (1) { 1465b15a09c0SNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 1466b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); 1467b15a09c0SNeel Natu break; 1468b15a09c0SNeel Natu } 1469b15a09c0SNeel Natu 1470b15a09c0SNeel Natu if (vm->rendezvous_func == NULL) { 1471b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); 1472248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); 1473b15a09c0SNeel Natu msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1474248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); 1475b15a09c0SNeel Natu } else { 1476b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); 1477b15a09c0SNeel Natu vcpu_unlock(vcpu); 1478b15a09c0SNeel Natu vm_handle_rendezvous(vm, vcpuid); 1479b15a09c0SNeel Natu vcpu_lock(vcpu); 1480b15a09c0SNeel Natu } 1481b15a09c0SNeel Natu } 1482b15a09c0SNeel Natu vcpu_unlock(vcpu); 1483b15a09c0SNeel Natu 1484b15a09c0SNeel Natu /* 1485b15a09c0SNeel Natu * Wakeup the other sleeping vcpus and return to userspace. 1486b15a09c0SNeel Natu */ 1487b15a09c0SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1488b15a09c0SNeel Natu if (CPU_ISSET(i, &vm->suspended_cpus)) { 1489b15a09c0SNeel Natu vcpu_notify_event(vm, i, false); 1490b15a09c0SNeel Natu } 1491b15a09c0SNeel Natu } 1492b15a09c0SNeel Natu 1493b15a09c0SNeel Natu *retu = true; 1494b15a09c0SNeel Natu return (0); 1495b15a09c0SNeel Natu } 1496b15a09c0SNeel Natu 1497248e6799SNeel Natu static int 1498248e6799SNeel Natu vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu) 1499248e6799SNeel Natu { 1500248e6799SNeel Natu struct vcpu *vcpu = &vm->vcpu[vcpuid]; 1501248e6799SNeel Natu 1502248e6799SNeel Natu vcpu_lock(vcpu); 1503248e6799SNeel Natu KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); 1504248e6799SNeel Natu vcpu->reqidle = 0; 1505248e6799SNeel Natu vcpu_unlock(vcpu); 1506248e6799SNeel Natu *retu = true; 1507248e6799SNeel Natu return (0); 1508248e6799SNeel Natu } 1509248e6799SNeel Natu 1510b15a09c0SNeel Natu int 1511f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how) 1512b15a09c0SNeel Natu { 1513f0fdcfe2SNeel Natu int i; 1514b15a09c0SNeel Natu 1515f0fdcfe2SNeel Natu if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1516f0fdcfe2SNeel Natu return (EINVAL); 1517f0fdcfe2SNeel Natu 1518f0fdcfe2SNeel Natu if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1519f0fdcfe2SNeel Natu VM_CTR2(vm, "virtual machine already suspended %d/%d", 1520f0fdcfe2SNeel Natu vm->suspend, how); 1521b15a09c0SNeel Natu return (EALREADY); 1522b15a09c0SNeel Natu } 1523f0fdcfe2SNeel Natu 1524f0fdcfe2SNeel Natu VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1525f0fdcfe2SNeel Natu 1526f0fdcfe2SNeel Natu /* 1527f0fdcfe2SNeel Natu * Notify all active vcpus that they are now suspended. 1528f0fdcfe2SNeel Natu */ 1529f0fdcfe2SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1530f0fdcfe2SNeel Natu if (CPU_ISSET(i, &vm->active_cpus)) 1531f0fdcfe2SNeel Natu vcpu_notify_event(vm, i, false); 1532f0fdcfe2SNeel Natu } 1533f0fdcfe2SNeel Natu 1534f0fdcfe2SNeel Natu return (0); 1535f0fdcfe2SNeel Natu } 1536f0fdcfe2SNeel Natu 1537f0fdcfe2SNeel Natu void 1538f0fdcfe2SNeel Natu vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) 1539f0fdcfe2SNeel Natu { 1540f0fdcfe2SNeel Natu struct vm_exit *vmexit; 1541f0fdcfe2SNeel Natu 1542f0fdcfe2SNeel Natu KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1543f0fdcfe2SNeel Natu ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1544f0fdcfe2SNeel Natu 1545f0fdcfe2SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1546f0fdcfe2SNeel Natu vmexit->rip = rip; 1547f0fdcfe2SNeel Natu vmexit->inst_length = 0; 1548f0fdcfe2SNeel Natu vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1549f0fdcfe2SNeel Natu vmexit->u.suspended.how = vm->suspend; 1550b15a09c0SNeel Natu } 1551b15a09c0SNeel Natu 155240487465SNeel Natu void 155340487465SNeel Natu vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) 155440487465SNeel Natu { 155540487465SNeel Natu struct vm_exit *vmexit; 155640487465SNeel Natu 155740487465SNeel Natu KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress")); 155840487465SNeel Natu 155940487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 156040487465SNeel Natu vmexit->rip = rip; 156140487465SNeel Natu vmexit->inst_length = 0; 156240487465SNeel Natu vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 156340487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1); 156440487465SNeel Natu } 156540487465SNeel Natu 156640487465SNeel Natu void 1567248e6799SNeel Natu vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip) 1568248e6799SNeel Natu { 1569248e6799SNeel Natu struct vm_exit *vmexit; 1570248e6799SNeel Natu 1571248e6799SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1572248e6799SNeel Natu vmexit->rip = rip; 1573248e6799SNeel Natu vmexit->inst_length = 0; 1574248e6799SNeel Natu vmexit->exitcode = VM_EXITCODE_REQIDLE; 1575248e6799SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1); 1576248e6799SNeel Natu } 1577248e6799SNeel Natu 1578248e6799SNeel Natu void 157940487465SNeel Natu vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) 158040487465SNeel Natu { 158140487465SNeel Natu struct vm_exit *vmexit; 158240487465SNeel Natu 158340487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 158440487465SNeel Natu vmexit->rip = rip; 158540487465SNeel Natu vmexit->inst_length = 0; 158640487465SNeel Natu vmexit->exitcode = VM_EXITCODE_BOGUS; 158740487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1); 158840487465SNeel Natu } 158940487465SNeel Natu 1590318224bbSNeel Natu int 1591318224bbSNeel Natu vm_run(struct vm *vm, struct vm_run *vmrun) 1592318224bbSNeel Natu { 1593248e6799SNeel Natu struct vm_eventinfo evinfo; 1594318224bbSNeel Natu int error, vcpuid; 1595318224bbSNeel Natu struct vcpu *vcpu; 1596318224bbSNeel Natu struct pcb *pcb; 1597d087a399SNeel Natu uint64_t tscval; 1598318224bbSNeel Natu struct vm_exit *vme; 1599becd9849SNeel Natu bool retu, intr_disabled; 1600318224bbSNeel Natu pmap_t pmap; 1601318224bbSNeel Natu 1602318224bbSNeel Natu vcpuid = vmrun->cpuid; 1603318224bbSNeel Natu 1604318224bbSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1605318224bbSNeel Natu return (EINVAL); 1606318224bbSNeel Natu 160795ebc360SNeel Natu if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 160895ebc360SNeel Natu return (EINVAL); 160995ebc360SNeel Natu 161095ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 161195ebc360SNeel Natu return (EINVAL); 161295ebc360SNeel Natu 1613318224bbSNeel Natu pmap = vmspace_pmap(vm->vmspace); 1614318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1615318224bbSNeel Natu vme = &vcpu->exitinfo; 1616248e6799SNeel Natu evinfo.rptr = &vm->rendezvous_func; 1617248e6799SNeel Natu evinfo.sptr = &vm->suspend; 1618248e6799SNeel Natu evinfo.iptr = &vcpu->reqidle; 1619318224bbSNeel Natu restart: 1620318224bbSNeel Natu critical_enter(); 1621318224bbSNeel Natu 1622318224bbSNeel Natu KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1623318224bbSNeel Natu ("vm_run: absurd pm_active")); 1624318224bbSNeel Natu 1625318224bbSNeel Natu tscval = rdtsc(); 1626318224bbSNeel Natu 1627318224bbSNeel Natu pcb = PCPU_GET(curpcb); 1628318224bbSNeel Natu set_pcb_flags(pcb, PCB_FULL_IRET); 1629318224bbSNeel Natu 1630318224bbSNeel Natu restore_guest_fpustate(vcpu); 1631318224bbSNeel Natu 1632318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1633248e6799SNeel Natu error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo); 1634318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1635318224bbSNeel Natu 1636318224bbSNeel Natu save_guest_fpustate(vcpu); 1637318224bbSNeel Natu 1638318224bbSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1639318224bbSNeel Natu 1640318224bbSNeel Natu critical_exit(); 1641318224bbSNeel Natu 1642318224bbSNeel Natu if (error == 0) { 1643becd9849SNeel Natu retu = false; 1644d087a399SNeel Natu vcpu->nextrip = vme->rip + vme->inst_length; 1645318224bbSNeel Natu switch (vme->exitcode) { 1646248e6799SNeel Natu case VM_EXITCODE_REQIDLE: 1647248e6799SNeel Natu error = vm_handle_reqidle(vm, vcpuid, &retu); 1648248e6799SNeel Natu break; 1649b15a09c0SNeel Natu case VM_EXITCODE_SUSPENDED: 1650b15a09c0SNeel Natu error = vm_handle_suspend(vm, vcpuid, &retu); 1651b15a09c0SNeel Natu break; 165230b94db8SNeel Natu case VM_EXITCODE_IOAPIC_EOI: 165330b94db8SNeel Natu vioapic_process_eoi(vm, vcpuid, 165430b94db8SNeel Natu vme->u.ioapic_eoi.vector); 165530b94db8SNeel Natu break; 16565b8a8cd1SNeel Natu case VM_EXITCODE_RENDEZVOUS: 16575b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 16585b8a8cd1SNeel Natu error = 0; 16595b8a8cd1SNeel Natu break; 1660318224bbSNeel Natu case VM_EXITCODE_HLT: 1661becd9849SNeel Natu intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 16621c052192SNeel Natu error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); 1663318224bbSNeel Natu break; 1664318224bbSNeel Natu case VM_EXITCODE_PAGING: 1665318224bbSNeel Natu error = vm_handle_paging(vm, vcpuid, &retu); 1666318224bbSNeel Natu break; 1667318224bbSNeel Natu case VM_EXITCODE_INST_EMUL: 1668318224bbSNeel Natu error = vm_handle_inst_emul(vm, vcpuid, &retu); 1669318224bbSNeel Natu break; 1670d17b5104SNeel Natu case VM_EXITCODE_INOUT: 1671d17b5104SNeel Natu case VM_EXITCODE_INOUT_STR: 1672d17b5104SNeel Natu error = vm_handle_inout(vm, vcpuid, vme, &retu); 1673d17b5104SNeel Natu break; 167465145c7fSNeel Natu case VM_EXITCODE_MONITOR: 167565145c7fSNeel Natu case VM_EXITCODE_MWAIT: 167665145c7fSNeel Natu vm_inject_ud(vm, vcpuid); 167765145c7fSNeel Natu break; 1678318224bbSNeel Natu default: 1679becd9849SNeel Natu retu = true; /* handled in userland */ 1680318224bbSNeel Natu break; 1681318224bbSNeel Natu } 1682318224bbSNeel Natu } 1683318224bbSNeel Natu 1684d087a399SNeel Natu if (error == 0 && retu == false) 1685f76fc5d4SNeel Natu goto restart; 1686f76fc5d4SNeel Natu 1687248e6799SNeel Natu VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode); 1688248e6799SNeel Natu 1689318224bbSNeel Natu /* copy the exit information */ 1690318224bbSNeel Natu bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1691366f6083SPeter Grehan return (error); 1692366f6083SPeter Grehan } 1693366f6083SPeter Grehan 1694366f6083SPeter Grehan int 1695c9c75df4SNeel Natu vm_restart_instruction(void *arg, int vcpuid) 1696c9c75df4SNeel Natu { 1697d087a399SNeel Natu struct vm *vm; 1698c9c75df4SNeel Natu struct vcpu *vcpu; 1699d087a399SNeel Natu enum vcpu_state state; 1700d087a399SNeel Natu uint64_t rip; 1701d087a399SNeel Natu int error; 1702c9c75df4SNeel Natu 1703d087a399SNeel Natu vm = arg; 1704c9c75df4SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1705c9c75df4SNeel Natu return (EINVAL); 1706c9c75df4SNeel Natu 1707c9c75df4SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1708d087a399SNeel Natu state = vcpu_get_state(vm, vcpuid, NULL); 1709d087a399SNeel Natu if (state == VCPU_RUNNING) { 1710d087a399SNeel Natu /* 1711d087a399SNeel Natu * When a vcpu is "running" the next instruction is determined 1712d087a399SNeel Natu * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. 1713d087a399SNeel Natu * Thus setting 'inst_length' to zero will cause the current 1714d087a399SNeel Natu * instruction to be restarted. 1715d087a399SNeel Natu */ 1716c9c75df4SNeel Natu vcpu->exitinfo.inst_length = 0; 1717d087a399SNeel Natu VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " 1718d087a399SNeel Natu "setting inst_length to zero", vcpu->exitinfo.rip); 1719d087a399SNeel Natu } else if (state == VCPU_FROZEN) { 1720d087a399SNeel Natu /* 1721d087a399SNeel Natu * When a vcpu is "frozen" it is outside the critical section 1722d087a399SNeel Natu * around VMRUN() and 'nextrip' points to the next instruction. 1723d087a399SNeel Natu * Thus instruction restart is achieved by setting 'nextrip' 1724d087a399SNeel Natu * to the vcpu's %rip. 1725d087a399SNeel Natu */ 1726d087a399SNeel Natu error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); 1727d087a399SNeel Natu KASSERT(!error, ("%s: error %d getting rip", __func__, error)); 1728d087a399SNeel Natu VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " 1729d087a399SNeel Natu "nextrip from %#lx to %#lx", vcpu->nextrip, rip); 1730d087a399SNeel Natu vcpu->nextrip = rip; 1731d087a399SNeel Natu } else { 1732d087a399SNeel Natu panic("%s: invalid state %d", __func__, state); 1733d087a399SNeel Natu } 1734c9c75df4SNeel Natu return (0); 1735c9c75df4SNeel Natu } 1736c9c75df4SNeel Natu 1737c9c75df4SNeel Natu int 1738091d4532SNeel Natu vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) 1739091d4532SNeel Natu { 1740091d4532SNeel Natu struct vcpu *vcpu; 1741091d4532SNeel Natu int type, vector; 1742091d4532SNeel Natu 1743091d4532SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1744091d4532SNeel Natu return (EINVAL); 1745091d4532SNeel Natu 1746091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1747091d4532SNeel Natu 1748091d4532SNeel Natu if (info & VM_INTINFO_VALID) { 1749091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1750091d4532SNeel Natu vector = info & 0xff; 1751091d4532SNeel Natu if (type == VM_INTINFO_NMI && vector != IDT_NMI) 1752091d4532SNeel Natu return (EINVAL); 1753091d4532SNeel Natu if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) 1754091d4532SNeel Natu return (EINVAL); 1755091d4532SNeel Natu if (info & VM_INTINFO_RSVD) 1756091d4532SNeel Natu return (EINVAL); 1757091d4532SNeel Natu } else { 1758091d4532SNeel Natu info = 0; 1759091d4532SNeel Natu } 1760091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); 1761091d4532SNeel Natu vcpu->exitintinfo = info; 1762091d4532SNeel Natu return (0); 1763091d4532SNeel Natu } 1764091d4532SNeel Natu 1765091d4532SNeel Natu enum exc_class { 1766091d4532SNeel Natu EXC_BENIGN, 1767091d4532SNeel Natu EXC_CONTRIBUTORY, 1768091d4532SNeel Natu EXC_PAGEFAULT 1769091d4532SNeel Natu }; 1770091d4532SNeel Natu 1771091d4532SNeel Natu #define IDT_VE 20 /* Virtualization Exception (Intel specific) */ 1772091d4532SNeel Natu 1773091d4532SNeel Natu static enum exc_class 1774091d4532SNeel Natu exception_class(uint64_t info) 1775091d4532SNeel Natu { 1776091d4532SNeel Natu int type, vector; 1777091d4532SNeel Natu 1778091d4532SNeel Natu KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); 1779091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1780091d4532SNeel Natu vector = info & 0xff; 1781091d4532SNeel Natu 1782091d4532SNeel Natu /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ 1783091d4532SNeel Natu switch (type) { 1784091d4532SNeel Natu case VM_INTINFO_HWINTR: 1785091d4532SNeel Natu case VM_INTINFO_SWINTR: 1786091d4532SNeel Natu case VM_INTINFO_NMI: 1787091d4532SNeel Natu return (EXC_BENIGN); 1788091d4532SNeel Natu default: 1789091d4532SNeel Natu /* 1790091d4532SNeel Natu * Hardware exception. 1791091d4532SNeel Natu * 1792091d4532SNeel Natu * SVM and VT-x use identical type values to represent NMI, 1793091d4532SNeel Natu * hardware interrupt and software interrupt. 1794091d4532SNeel Natu * 1795091d4532SNeel Natu * SVM uses type '3' for all exceptions. VT-x uses type '3' 1796091d4532SNeel Natu * for exceptions except #BP and #OF. #BP and #OF use a type 1797091d4532SNeel Natu * value of '5' or '6'. Therefore we don't check for explicit 1798091d4532SNeel Natu * values of 'type' to classify 'intinfo' into a hardware 1799091d4532SNeel Natu * exception. 1800091d4532SNeel Natu */ 1801091d4532SNeel Natu break; 1802091d4532SNeel Natu } 1803091d4532SNeel Natu 1804091d4532SNeel Natu switch (vector) { 1805091d4532SNeel Natu case IDT_PF: 1806091d4532SNeel Natu case IDT_VE: 1807091d4532SNeel Natu return (EXC_PAGEFAULT); 1808091d4532SNeel Natu case IDT_DE: 1809091d4532SNeel Natu case IDT_TS: 1810091d4532SNeel Natu case IDT_NP: 1811091d4532SNeel Natu case IDT_SS: 1812091d4532SNeel Natu case IDT_GP: 1813091d4532SNeel Natu return (EXC_CONTRIBUTORY); 1814091d4532SNeel Natu default: 1815091d4532SNeel Natu return (EXC_BENIGN); 1816091d4532SNeel Natu } 1817091d4532SNeel Natu } 1818091d4532SNeel Natu 1819091d4532SNeel Natu static int 1820091d4532SNeel Natu nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, 1821091d4532SNeel Natu uint64_t *retinfo) 1822091d4532SNeel Natu { 1823091d4532SNeel Natu enum exc_class exc1, exc2; 1824091d4532SNeel Natu int type1, vector1; 1825091d4532SNeel Natu 1826091d4532SNeel Natu KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); 1827091d4532SNeel Natu KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); 1828091d4532SNeel Natu 1829091d4532SNeel Natu /* 1830091d4532SNeel Natu * If an exception occurs while attempting to call the double-fault 1831091d4532SNeel Natu * handler the processor enters shutdown mode (aka triple fault). 1832091d4532SNeel Natu */ 1833091d4532SNeel Natu type1 = info1 & VM_INTINFO_TYPE; 1834091d4532SNeel Natu vector1 = info1 & 0xff; 1835091d4532SNeel Natu if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { 1836091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", 1837091d4532SNeel Natu info1, info2); 1838091d4532SNeel Natu vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); 1839091d4532SNeel Natu *retinfo = 0; 1840091d4532SNeel Natu return (0); 1841091d4532SNeel Natu } 1842091d4532SNeel Natu 1843091d4532SNeel Natu /* 1844091d4532SNeel Natu * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 1845091d4532SNeel Natu */ 1846091d4532SNeel Natu exc1 = exception_class(info1); 1847091d4532SNeel Natu exc2 = exception_class(info2); 1848091d4532SNeel Natu if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || 1849091d4532SNeel Natu (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { 1850091d4532SNeel Natu /* Convert nested fault into a double fault. */ 1851091d4532SNeel Natu *retinfo = IDT_DF; 1852091d4532SNeel Natu *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1853091d4532SNeel Natu *retinfo |= VM_INTINFO_DEL_ERRCODE; 1854091d4532SNeel Natu } else { 1855091d4532SNeel Natu /* Handle exceptions serially */ 1856091d4532SNeel Natu *retinfo = info2; 1857091d4532SNeel Natu } 1858091d4532SNeel Natu return (1); 1859091d4532SNeel Natu } 1860091d4532SNeel Natu 1861091d4532SNeel Natu static uint64_t 1862091d4532SNeel Natu vcpu_exception_intinfo(struct vcpu *vcpu) 1863091d4532SNeel Natu { 1864091d4532SNeel Natu uint64_t info = 0; 1865091d4532SNeel Natu 1866091d4532SNeel Natu if (vcpu->exception_pending) { 1867c9c75df4SNeel Natu info = vcpu->exc_vector & 0xff; 1868091d4532SNeel Natu info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1869c9c75df4SNeel Natu if (vcpu->exc_errcode_valid) { 1870091d4532SNeel Natu info |= VM_INTINFO_DEL_ERRCODE; 1871c9c75df4SNeel Natu info |= (uint64_t)vcpu->exc_errcode << 32; 1872091d4532SNeel Natu } 1873091d4532SNeel Natu } 1874091d4532SNeel Natu return (info); 1875091d4532SNeel Natu } 1876091d4532SNeel Natu 1877091d4532SNeel Natu int 1878091d4532SNeel Natu vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) 1879091d4532SNeel Natu { 1880091d4532SNeel Natu struct vcpu *vcpu; 1881091d4532SNeel Natu uint64_t info1, info2; 1882091d4532SNeel Natu int valid; 1883091d4532SNeel Natu 1884091d4532SNeel Natu KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); 1885091d4532SNeel Natu 1886091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1887091d4532SNeel Natu 1888091d4532SNeel Natu info1 = vcpu->exitintinfo; 1889091d4532SNeel Natu vcpu->exitintinfo = 0; 1890091d4532SNeel Natu 1891091d4532SNeel Natu info2 = 0; 1892091d4532SNeel Natu if (vcpu->exception_pending) { 1893091d4532SNeel Natu info2 = vcpu_exception_intinfo(vcpu); 1894091d4532SNeel Natu vcpu->exception_pending = 0; 1895091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", 1896c9c75df4SNeel Natu vcpu->exc_vector, info2); 1897091d4532SNeel Natu } 1898091d4532SNeel Natu 1899091d4532SNeel Natu if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { 1900091d4532SNeel Natu valid = nested_fault(vm, vcpuid, info1, info2, retinfo); 1901091d4532SNeel Natu } else if (info1 & VM_INTINFO_VALID) { 1902091d4532SNeel Natu *retinfo = info1; 1903091d4532SNeel Natu valid = 1; 1904091d4532SNeel Natu } else if (info2 & VM_INTINFO_VALID) { 1905091d4532SNeel Natu *retinfo = info2; 1906091d4532SNeel Natu valid = 1; 1907091d4532SNeel Natu } else { 1908091d4532SNeel Natu valid = 0; 1909091d4532SNeel Natu } 1910091d4532SNeel Natu 1911091d4532SNeel Natu if (valid) { 1912091d4532SNeel Natu VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " 1913091d4532SNeel Natu "retinfo(%#lx)", __func__, info1, info2, *retinfo); 1914091d4532SNeel Natu } 1915091d4532SNeel Natu 1916091d4532SNeel Natu return (valid); 1917091d4532SNeel Natu } 1918091d4532SNeel Natu 1919091d4532SNeel Natu int 1920091d4532SNeel Natu vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) 1921091d4532SNeel Natu { 1922091d4532SNeel Natu struct vcpu *vcpu; 1923091d4532SNeel Natu 1924091d4532SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1925091d4532SNeel Natu return (EINVAL); 1926091d4532SNeel Natu 1927091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1928091d4532SNeel Natu *info1 = vcpu->exitintinfo; 1929091d4532SNeel Natu *info2 = vcpu_exception_intinfo(vcpu); 1930091d4532SNeel Natu return (0); 1931091d4532SNeel Natu } 1932091d4532SNeel Natu 1933091d4532SNeel Natu int 1934c9c75df4SNeel Natu vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, 1935c9c75df4SNeel Natu uint32_t errcode, int restart_instruction) 1936366f6083SPeter Grehan { 1937dc506506SNeel Natu struct vcpu *vcpu; 193847b9935dSNeel Natu uint64_t regval; 19392ce12423SNeel Natu int error; 1940dc506506SNeel Natu 1941366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1942366f6083SPeter Grehan return (EINVAL); 1943366f6083SPeter Grehan 1944c9c75df4SNeel Natu if (vector < 0 || vector >= 32) 1945366f6083SPeter Grehan return (EINVAL); 1946366f6083SPeter Grehan 1947091d4532SNeel Natu /* 1948091d4532SNeel Natu * A double fault exception should never be injected directly into 1949091d4532SNeel Natu * the guest. It is a derived exception that results from specific 1950091d4532SNeel Natu * combinations of nested faults. 1951091d4532SNeel Natu */ 1952c9c75df4SNeel Natu if (vector == IDT_DF) 1953091d4532SNeel Natu return (EINVAL); 1954091d4532SNeel Natu 1955dc506506SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1956366f6083SPeter Grehan 1957dc506506SNeel Natu if (vcpu->exception_pending) { 1958dc506506SNeel Natu VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " 1959c9c75df4SNeel Natu "pending exception %d", vector, vcpu->exc_vector); 1960dc506506SNeel Natu return (EBUSY); 1961dc506506SNeel Natu } 1962dc506506SNeel Natu 196347b9935dSNeel Natu if (errcode_valid) { 196447b9935dSNeel Natu /* 196547b9935dSNeel Natu * Exceptions don't deliver an error code in real mode. 196647b9935dSNeel Natu */ 196747b9935dSNeel Natu error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, ®val); 196847b9935dSNeel Natu KASSERT(!error, ("%s: error %d getting CR0", __func__, error)); 196947b9935dSNeel Natu if (!(regval & CR0_PE)) 197047b9935dSNeel Natu errcode_valid = 0; 197147b9935dSNeel Natu } 197247b9935dSNeel Natu 19732ce12423SNeel Natu /* 19742ce12423SNeel Natu * From section 26.6.1 "Interruptibility State" in Intel SDM: 19752ce12423SNeel Natu * 19762ce12423SNeel Natu * Event blocking by "STI" or "MOV SS" is cleared after guest executes 19772ce12423SNeel Natu * one instruction or incurs an exception. 19782ce12423SNeel Natu */ 19792ce12423SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0); 19802ce12423SNeel Natu KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", 19812ce12423SNeel Natu __func__, error)); 19822ce12423SNeel Natu 1983c9c75df4SNeel Natu if (restart_instruction) 1984c9c75df4SNeel Natu vm_restart_instruction(vm, vcpuid); 1985c9c75df4SNeel Natu 1986dc506506SNeel Natu vcpu->exception_pending = 1; 1987c9c75df4SNeel Natu vcpu->exc_vector = vector; 1988c9c75df4SNeel Natu vcpu->exc_errcode = errcode; 1989c9c75df4SNeel Natu vcpu->exc_errcode_valid = errcode_valid; 1990c9c75df4SNeel Natu VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector); 1991dc506506SNeel Natu return (0); 1992dc506506SNeel Natu } 1993dc506506SNeel Natu 1994d37f2adbSNeel Natu void 1995d37f2adbSNeel Natu vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, 1996d37f2adbSNeel Natu int errcode) 1997dc506506SNeel Natu { 1998d37f2adbSNeel Natu struct vm *vm; 1999c9c75df4SNeel Natu int error, restart_instruction; 2000dc506506SNeel Natu 2001d37f2adbSNeel Natu vm = vmarg; 2002c9c75df4SNeel Natu restart_instruction = 1; 2003d37f2adbSNeel Natu 2004c9c75df4SNeel Natu error = vm_inject_exception(vm, vcpuid, vector, errcode_valid, 2005c9c75df4SNeel Natu errcode, restart_instruction); 2006dc506506SNeel Natu KASSERT(error == 0, ("vm_inject_exception error %d", error)); 2007dc506506SNeel Natu } 2008dc506506SNeel Natu 2009dc506506SNeel Natu void 2010d37f2adbSNeel Natu vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) 2011fd949af6SNeel Natu { 2012d37f2adbSNeel Natu struct vm *vm; 201337a723a5SNeel Natu int error; 201437a723a5SNeel Natu 2015d37f2adbSNeel Natu vm = vmarg; 201637a723a5SNeel Natu VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", 201737a723a5SNeel Natu error_code, cr2); 201837a723a5SNeel Natu 201937a723a5SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); 202037a723a5SNeel Natu KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 2021fd949af6SNeel Natu 2022d37f2adbSNeel Natu vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); 2023366f6083SPeter Grehan } 2024366f6083SPeter Grehan 202561592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 2026366f6083SPeter Grehan 2027f352ff0cSNeel Natu int 2028f352ff0cSNeel Natu vm_inject_nmi(struct vm *vm, int vcpuid) 2029f352ff0cSNeel Natu { 2030f352ff0cSNeel Natu struct vcpu *vcpu; 2031f352ff0cSNeel Natu 2032f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2033366f6083SPeter Grehan return (EINVAL); 2034366f6083SPeter Grehan 2035f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 2036f352ff0cSNeel Natu 2037f352ff0cSNeel Natu vcpu->nmi_pending = 1; 2038de5ea6b6SNeel Natu vcpu_notify_event(vm, vcpuid, false); 2039f352ff0cSNeel Natu return (0); 2040f352ff0cSNeel Natu } 2041f352ff0cSNeel Natu 2042f352ff0cSNeel Natu int 2043f352ff0cSNeel Natu vm_nmi_pending(struct vm *vm, int vcpuid) 2044f352ff0cSNeel Natu { 2045f352ff0cSNeel Natu struct vcpu *vcpu; 2046f352ff0cSNeel Natu 2047f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2048f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 2049f352ff0cSNeel Natu 2050f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 2051f352ff0cSNeel Natu 2052f352ff0cSNeel Natu return (vcpu->nmi_pending); 2053f352ff0cSNeel Natu } 2054f352ff0cSNeel Natu 2055f352ff0cSNeel Natu void 2056f352ff0cSNeel Natu vm_nmi_clear(struct vm *vm, int vcpuid) 2057f352ff0cSNeel Natu { 2058f352ff0cSNeel Natu struct vcpu *vcpu; 2059f352ff0cSNeel Natu 2060f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2061f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 2062f352ff0cSNeel Natu 2063f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 2064f352ff0cSNeel Natu 2065f352ff0cSNeel Natu if (vcpu->nmi_pending == 0) 2066f352ff0cSNeel Natu panic("vm_nmi_clear: inconsistent nmi_pending state"); 2067f352ff0cSNeel Natu 2068f352ff0cSNeel Natu vcpu->nmi_pending = 0; 2069f352ff0cSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 2070366f6083SPeter Grehan } 2071366f6083SPeter Grehan 20720775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 20730775fbb4STycho Nightingale 20740775fbb4STycho Nightingale int 20750775fbb4STycho Nightingale vm_inject_extint(struct vm *vm, int vcpuid) 20760775fbb4STycho Nightingale { 20770775fbb4STycho Nightingale struct vcpu *vcpu; 20780775fbb4STycho Nightingale 20790775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 20800775fbb4STycho Nightingale return (EINVAL); 20810775fbb4STycho Nightingale 20820775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 20830775fbb4STycho Nightingale 20840775fbb4STycho Nightingale vcpu->extint_pending = 1; 20850775fbb4STycho Nightingale vcpu_notify_event(vm, vcpuid, false); 20860775fbb4STycho Nightingale return (0); 20870775fbb4STycho Nightingale } 20880775fbb4STycho Nightingale 20890775fbb4STycho Nightingale int 20900775fbb4STycho Nightingale vm_extint_pending(struct vm *vm, int vcpuid) 20910775fbb4STycho Nightingale { 20920775fbb4STycho Nightingale struct vcpu *vcpu; 20930775fbb4STycho Nightingale 20940775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 20950775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 20960775fbb4STycho Nightingale 20970775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 20980775fbb4STycho Nightingale 20990775fbb4STycho Nightingale return (vcpu->extint_pending); 21000775fbb4STycho Nightingale } 21010775fbb4STycho Nightingale 21020775fbb4STycho Nightingale void 21030775fbb4STycho Nightingale vm_extint_clear(struct vm *vm, int vcpuid) 21040775fbb4STycho Nightingale { 21050775fbb4STycho Nightingale struct vcpu *vcpu; 21060775fbb4STycho Nightingale 21070775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 21080775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 21090775fbb4STycho Nightingale 21100775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 21110775fbb4STycho Nightingale 21120775fbb4STycho Nightingale if (vcpu->extint_pending == 0) 21130775fbb4STycho Nightingale panic("vm_extint_clear: inconsistent extint_pending state"); 21140775fbb4STycho Nightingale 21150775fbb4STycho Nightingale vcpu->extint_pending = 0; 21160775fbb4STycho Nightingale vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); 21170775fbb4STycho Nightingale } 21180775fbb4STycho Nightingale 2119366f6083SPeter Grehan int 2120366f6083SPeter Grehan vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 2121366f6083SPeter Grehan { 2122366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 2123366f6083SPeter Grehan return (EINVAL); 2124366f6083SPeter Grehan 2125366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2126366f6083SPeter Grehan return (EINVAL); 2127366f6083SPeter Grehan 2128366f6083SPeter Grehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 2129366f6083SPeter Grehan } 2130366f6083SPeter Grehan 2131366f6083SPeter Grehan int 2132366f6083SPeter Grehan vm_set_capability(struct vm *vm, int vcpu, int type, int val) 2133366f6083SPeter Grehan { 2134366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 2135366f6083SPeter Grehan return (EINVAL); 2136366f6083SPeter Grehan 2137366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2138366f6083SPeter Grehan return (EINVAL); 2139366f6083SPeter Grehan 2140366f6083SPeter Grehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 2141366f6083SPeter Grehan } 2142366f6083SPeter Grehan 2143366f6083SPeter Grehan struct vlapic * 2144366f6083SPeter Grehan vm_lapic(struct vm *vm, int cpu) 2145366f6083SPeter Grehan { 2146366f6083SPeter Grehan return (vm->vcpu[cpu].vlapic); 2147366f6083SPeter Grehan } 2148366f6083SPeter Grehan 2149565bbb86SNeel Natu struct vioapic * 2150565bbb86SNeel Natu vm_ioapic(struct vm *vm) 2151565bbb86SNeel Natu { 2152565bbb86SNeel Natu 2153565bbb86SNeel Natu return (vm->vioapic); 2154565bbb86SNeel Natu } 2155565bbb86SNeel Natu 215608e3ff32SNeel Natu struct vhpet * 215708e3ff32SNeel Natu vm_hpet(struct vm *vm) 215808e3ff32SNeel Natu { 215908e3ff32SNeel Natu 216008e3ff32SNeel Natu return (vm->vhpet); 216108e3ff32SNeel Natu } 216208e3ff32SNeel Natu 2163366f6083SPeter Grehan boolean_t 2164366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func) 2165366f6083SPeter Grehan { 216607044a96SNeel Natu int found, i, n; 216707044a96SNeel Natu int b, s, f; 2168366f6083SPeter Grehan char *val, *cp, *cp2; 2169366f6083SPeter Grehan 2170366f6083SPeter Grehan /* 217107044a96SNeel Natu * XXX 217207044a96SNeel Natu * The length of an environment variable is limited to 128 bytes which 217307044a96SNeel Natu * puts an upper limit on the number of passthru devices that may be 217407044a96SNeel Natu * specified using a single environment variable. 217507044a96SNeel Natu * 217607044a96SNeel Natu * Work around this by scanning multiple environment variable 217707044a96SNeel Natu * names instead of a single one - yuck! 2178366f6083SPeter Grehan */ 217907044a96SNeel Natu const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 218007044a96SNeel Natu 218107044a96SNeel Natu /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 2182366f6083SPeter Grehan found = 0; 218307044a96SNeel Natu for (i = 0; names[i] != NULL && !found; i++) { 21842be111bfSDavide Italiano cp = val = kern_getenv(names[i]); 2185366f6083SPeter Grehan while (cp != NULL && *cp != '\0') { 2186366f6083SPeter Grehan if ((cp2 = strchr(cp, ' ')) != NULL) 2187366f6083SPeter Grehan *cp2 = '\0'; 2188366f6083SPeter Grehan 2189366f6083SPeter Grehan n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 2190366f6083SPeter Grehan if (n == 3 && bus == b && slot == s && func == f) { 2191366f6083SPeter Grehan found = 1; 2192366f6083SPeter Grehan break; 2193366f6083SPeter Grehan } 2194366f6083SPeter Grehan 2195366f6083SPeter Grehan if (cp2 != NULL) 2196366f6083SPeter Grehan *cp2++ = ' '; 2197366f6083SPeter Grehan 2198366f6083SPeter Grehan cp = cp2; 2199366f6083SPeter Grehan } 2200366f6083SPeter Grehan freeenv(val); 220107044a96SNeel Natu } 2202366f6083SPeter Grehan return (found); 2203366f6083SPeter Grehan } 2204366f6083SPeter Grehan 2205366f6083SPeter Grehan void * 2206366f6083SPeter Grehan vm_iommu_domain(struct vm *vm) 2207366f6083SPeter Grehan { 2208366f6083SPeter Grehan 2209366f6083SPeter Grehan return (vm->iommu); 2210366f6083SPeter Grehan } 2211366f6083SPeter Grehan 221275dd3366SNeel Natu int 2213f80330a8SNeel Natu vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, 2214f80330a8SNeel Natu bool from_idle) 2215366f6083SPeter Grehan { 221675dd3366SNeel Natu int error; 2217366f6083SPeter Grehan struct vcpu *vcpu; 2218366f6083SPeter Grehan 2219366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2220366f6083SPeter Grehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 2221366f6083SPeter Grehan 2222366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 2223366f6083SPeter Grehan 222475dd3366SNeel Natu vcpu_lock(vcpu); 2225248e6799SNeel Natu error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle); 222675dd3366SNeel Natu vcpu_unlock(vcpu); 222775dd3366SNeel Natu 222875dd3366SNeel Natu return (error); 222975dd3366SNeel Natu } 223075dd3366SNeel Natu 223175dd3366SNeel Natu enum vcpu_state 2232d3c11f40SPeter Grehan vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 2233366f6083SPeter Grehan { 2234366f6083SPeter Grehan struct vcpu *vcpu; 223575dd3366SNeel Natu enum vcpu_state state; 2236366f6083SPeter Grehan 2237366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2238366f6083SPeter Grehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 2239366f6083SPeter Grehan 2240366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 2241366f6083SPeter Grehan 224275dd3366SNeel Natu vcpu_lock(vcpu); 224375dd3366SNeel Natu state = vcpu->state; 2244d3c11f40SPeter Grehan if (hostcpu != NULL) 2245d3c11f40SPeter Grehan *hostcpu = vcpu->hostcpu; 224675dd3366SNeel Natu vcpu_unlock(vcpu); 2247366f6083SPeter Grehan 224875dd3366SNeel Natu return (state); 2249366f6083SPeter Grehan } 2250366f6083SPeter Grehan 225195ebc360SNeel Natu int 2252366f6083SPeter Grehan vm_activate_cpu(struct vm *vm, int vcpuid) 2253366f6083SPeter Grehan { 2254366f6083SPeter Grehan 225595ebc360SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 225695ebc360SNeel Natu return (EINVAL); 225795ebc360SNeel Natu 225895ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->active_cpus)) 225995ebc360SNeel Natu return (EBUSY); 226022d822c6SNeel Natu 226122d822c6SNeel Natu VCPU_CTR0(vm, vcpuid, "activated"); 226222d822c6SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); 226395ebc360SNeel Natu return (0); 2264366f6083SPeter Grehan } 2265366f6083SPeter Grehan 2266a5615c90SPeter Grehan cpuset_t 2267366f6083SPeter Grehan vm_active_cpus(struct vm *vm) 2268366f6083SPeter Grehan { 2269366f6083SPeter Grehan 2270366f6083SPeter Grehan return (vm->active_cpus); 2271366f6083SPeter Grehan } 2272366f6083SPeter Grehan 227395ebc360SNeel Natu cpuset_t 227495ebc360SNeel Natu vm_suspended_cpus(struct vm *vm) 227595ebc360SNeel Natu { 227695ebc360SNeel Natu 227795ebc360SNeel Natu return (vm->suspended_cpus); 227895ebc360SNeel Natu } 227995ebc360SNeel Natu 2280366f6083SPeter Grehan void * 2281366f6083SPeter Grehan vcpu_stats(struct vm *vm, int vcpuid) 2282366f6083SPeter Grehan { 2283366f6083SPeter Grehan 2284366f6083SPeter Grehan return (vm->vcpu[vcpuid].stats); 2285366f6083SPeter Grehan } 2286e9027382SNeel Natu 2287e9027382SNeel Natu int 2288e9027382SNeel Natu vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 2289e9027382SNeel Natu { 2290e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2291e9027382SNeel Natu return (EINVAL); 2292e9027382SNeel Natu 2293e9027382SNeel Natu *state = vm->vcpu[vcpuid].x2apic_state; 2294e9027382SNeel Natu 2295e9027382SNeel Natu return (0); 2296e9027382SNeel Natu } 2297e9027382SNeel Natu 2298e9027382SNeel Natu int 2299e9027382SNeel Natu vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 2300e9027382SNeel Natu { 2301e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2302e9027382SNeel Natu return (EINVAL); 2303e9027382SNeel Natu 23043f23d3caSNeel Natu if (state >= X2APIC_STATE_LAST) 2305e9027382SNeel Natu return (EINVAL); 2306e9027382SNeel Natu 2307e9027382SNeel Natu vm->vcpu[vcpuid].x2apic_state = state; 2308e9027382SNeel Natu 230973820fb0SNeel Natu vlapic_set_x2apic_state(vm, vcpuid, state); 231073820fb0SNeel Natu 2311e9027382SNeel Natu return (0); 2312e9027382SNeel Natu } 231375dd3366SNeel Natu 231422821874SNeel Natu /* 231522821874SNeel Natu * This function is called to ensure that a vcpu "sees" a pending event 231622821874SNeel Natu * as soon as possible: 231722821874SNeel Natu * - If the vcpu thread is sleeping then it is woken up. 231822821874SNeel Natu * - If the vcpu is running on a different host_cpu then an IPI will be directed 231922821874SNeel Natu * to the host_cpu to cause the vcpu to trap into the hypervisor. 232022821874SNeel Natu */ 2321248e6799SNeel Natu static void 2322248e6799SNeel Natu vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) 232375dd3366SNeel Natu { 232475dd3366SNeel Natu int hostcpu; 232575dd3366SNeel Natu 232675dd3366SNeel Natu hostcpu = vcpu->hostcpu; 2327ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 2328ef39d7e9SNeel Natu KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 2329de5ea6b6SNeel Natu if (hostcpu != curcpu) { 2330ef39d7e9SNeel Natu if (lapic_intr) { 2331add611fdSNeel Natu vlapic_post_intr(vcpu->vlapic, hostcpu, 2332add611fdSNeel Natu vmm_ipinum); 2333ef39d7e9SNeel Natu } else { 233475dd3366SNeel Natu ipi_cpu(hostcpu, vmm_ipinum); 233575dd3366SNeel Natu } 2336ef39d7e9SNeel Natu } else { 2337ef39d7e9SNeel Natu /* 2338ef39d7e9SNeel Natu * If the 'vcpu' is running on 'curcpu' then it must 2339ef39d7e9SNeel Natu * be sending a notification to itself (e.g. SELF_IPI). 2340ef39d7e9SNeel Natu * The pending event will be picked up when the vcpu 2341ef39d7e9SNeel Natu * transitions back to guest context. 2342ef39d7e9SNeel Natu */ 2343ef39d7e9SNeel Natu } 2344ef39d7e9SNeel Natu } else { 2345ef39d7e9SNeel Natu KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 2346ef39d7e9SNeel Natu "with hostcpu %d", vcpu->state, hostcpu)); 2347366f6083SPeter Grehan if (vcpu->state == VCPU_SLEEPING) 2348366f6083SPeter Grehan wakeup_one(vcpu); 2349366f6083SPeter Grehan } 2350248e6799SNeel Natu } 2351248e6799SNeel Natu 2352248e6799SNeel Natu void 2353248e6799SNeel Natu vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) 2354248e6799SNeel Natu { 2355248e6799SNeel Natu struct vcpu *vcpu = &vm->vcpu[vcpuid]; 2356248e6799SNeel Natu 2357248e6799SNeel Natu vcpu_lock(vcpu); 2358248e6799SNeel Natu vcpu_notify_event_locked(vcpu, lapic_intr); 2359f76fc5d4SNeel Natu vcpu_unlock(vcpu); 2360f76fc5d4SNeel Natu } 2361318224bbSNeel Natu 2362318224bbSNeel Natu struct vmspace * 2363318224bbSNeel Natu vm_get_vmspace(struct vm *vm) 2364318224bbSNeel Natu { 2365318224bbSNeel Natu 2366318224bbSNeel Natu return (vm->vmspace); 2367318224bbSNeel Natu } 2368565bbb86SNeel Natu 2369565bbb86SNeel Natu int 2370565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid) 2371565bbb86SNeel Natu { 2372565bbb86SNeel Natu /* 2373565bbb86SNeel Natu * XXX apic id is assumed to be numerically identical to vcpu id 2374565bbb86SNeel Natu */ 2375565bbb86SNeel Natu return (apicid); 2376565bbb86SNeel Natu } 23775b8a8cd1SNeel Natu 23785b8a8cd1SNeel Natu void 23795b8a8cd1SNeel Natu vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 23805b8a8cd1SNeel Natu vm_rendezvous_func_t func, void *arg) 23815b8a8cd1SNeel Natu { 2382970955e4SNeel Natu int i; 2383970955e4SNeel Natu 23845b8a8cd1SNeel Natu /* 23855b8a8cd1SNeel Natu * Enforce that this function is called without any locks 23865b8a8cd1SNeel Natu */ 23875b8a8cd1SNeel Natu WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 23885b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 23895b8a8cd1SNeel Natu ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); 23905b8a8cd1SNeel Natu 23915b8a8cd1SNeel Natu restart: 23925b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 23935b8a8cd1SNeel Natu if (vm->rendezvous_func != NULL) { 23945b8a8cd1SNeel Natu /* 23955b8a8cd1SNeel Natu * If a rendezvous is already in progress then we need to 23965b8a8cd1SNeel Natu * call the rendezvous handler in case this 'vcpuid' is one 23975b8a8cd1SNeel Natu * of the targets of the rendezvous. 23985b8a8cd1SNeel Natu */ 23995b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); 24005b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 24015b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 24025b8a8cd1SNeel Natu goto restart; 24035b8a8cd1SNeel Natu } 24045b8a8cd1SNeel Natu KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 24055b8a8cd1SNeel Natu "rendezvous is still in progress")); 24065b8a8cd1SNeel Natu 24075b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); 24085b8a8cd1SNeel Natu vm->rendezvous_req_cpus = dest; 24095b8a8cd1SNeel Natu CPU_ZERO(&vm->rendezvous_done_cpus); 24105b8a8cd1SNeel Natu vm->rendezvous_arg = arg; 24115b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, func); 24125b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 24135b8a8cd1SNeel Natu 2414970955e4SNeel Natu /* 2415970955e4SNeel Natu * Wake up any sleeping vcpus and trigger a VM-exit in any running 2416970955e4SNeel Natu * vcpus so they handle the rendezvous as soon as possible. 2417970955e4SNeel Natu */ 2418970955e4SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 2419970955e4SNeel Natu if (CPU_ISSET(i, &dest)) 2420970955e4SNeel Natu vcpu_notify_event(vm, i, false); 2421970955e4SNeel Natu } 2422970955e4SNeel Natu 24235b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 24245b8a8cd1SNeel Natu } 2425762fd208STycho Nightingale 2426762fd208STycho Nightingale struct vatpic * 2427762fd208STycho Nightingale vm_atpic(struct vm *vm) 2428762fd208STycho Nightingale { 2429762fd208STycho Nightingale return (vm->vatpic); 2430762fd208STycho Nightingale } 2431e883c9bbSTycho Nightingale 2432e883c9bbSTycho Nightingale struct vatpit * 2433e883c9bbSTycho Nightingale vm_atpit(struct vm *vm) 2434e883c9bbSTycho Nightingale { 2435e883c9bbSTycho Nightingale return (vm->vatpit); 2436e883c9bbSTycho Nightingale } 2437d17b5104SNeel Natu 2438160ef77aSNeel Natu struct vpmtmr * 2439160ef77aSNeel Natu vm_pmtmr(struct vm *vm) 2440160ef77aSNeel Natu { 2441160ef77aSNeel Natu 2442160ef77aSNeel Natu return (vm->vpmtmr); 2443160ef77aSNeel Natu } 2444160ef77aSNeel Natu 24450dafa5cdSNeel Natu struct vrtc * 24460dafa5cdSNeel Natu vm_rtc(struct vm *vm) 24470dafa5cdSNeel Natu { 24480dafa5cdSNeel Natu 24490dafa5cdSNeel Natu return (vm->vrtc); 24500dafa5cdSNeel Natu } 24510dafa5cdSNeel Natu 2452d17b5104SNeel Natu enum vm_reg_name 2453d17b5104SNeel Natu vm_segment_name(int seg) 2454d17b5104SNeel Natu { 2455d17b5104SNeel Natu static enum vm_reg_name seg_names[] = { 2456d17b5104SNeel Natu VM_REG_GUEST_ES, 2457d17b5104SNeel Natu VM_REG_GUEST_CS, 2458d17b5104SNeel Natu VM_REG_GUEST_SS, 2459d17b5104SNeel Natu VM_REG_GUEST_DS, 2460d17b5104SNeel Natu VM_REG_GUEST_FS, 2461d17b5104SNeel Natu VM_REG_GUEST_GS 2462d17b5104SNeel Natu }; 2463d17b5104SNeel Natu 2464d17b5104SNeel Natu KASSERT(seg >= 0 && seg < nitems(seg_names), 2465d17b5104SNeel Natu ("%s: invalid segment encoding %d", __func__, seg)); 2466d17b5104SNeel Natu return (seg_names[seg]); 2467d17b5104SNeel Natu } 2468cf1d80d8SPeter Grehan 2469d665d229SNeel Natu void 2470d665d229SNeel Natu vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, 2471d665d229SNeel Natu int num_copyinfo) 2472d665d229SNeel Natu { 2473d665d229SNeel Natu int idx; 2474d665d229SNeel Natu 2475d665d229SNeel Natu for (idx = 0; idx < num_copyinfo; idx++) { 2476d665d229SNeel Natu if (copyinfo[idx].cookie != NULL) 2477d665d229SNeel Natu vm_gpa_release(copyinfo[idx].cookie); 2478d665d229SNeel Natu } 2479d665d229SNeel Natu bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); 2480d665d229SNeel Natu } 2481d665d229SNeel Natu 2482d665d229SNeel Natu int 2483d665d229SNeel Natu vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 2484d665d229SNeel Natu uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 24859c4d5478SNeel Natu int num_copyinfo, int *fault) 2486d665d229SNeel Natu { 2487d665d229SNeel Natu int error, idx, nused; 2488d665d229SNeel Natu size_t n, off, remaining; 2489d665d229SNeel Natu void *hva, *cookie; 2490d665d229SNeel Natu uint64_t gpa; 2491d665d229SNeel Natu 2492d665d229SNeel Natu bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); 2493d665d229SNeel Natu 2494d665d229SNeel Natu nused = 0; 2495d665d229SNeel Natu remaining = len; 2496d665d229SNeel Natu while (remaining > 0) { 2497d665d229SNeel Natu KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); 24989c4d5478SNeel Natu error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault); 24999c4d5478SNeel Natu if (error || *fault) 2500d665d229SNeel Natu return (error); 2501d665d229SNeel Natu off = gpa & PAGE_MASK; 2502d665d229SNeel Natu n = min(remaining, PAGE_SIZE - off); 2503d665d229SNeel Natu copyinfo[nused].gpa = gpa; 2504d665d229SNeel Natu copyinfo[nused].len = n; 2505d665d229SNeel Natu remaining -= n; 2506d665d229SNeel Natu gla += n; 2507d665d229SNeel Natu nused++; 2508d665d229SNeel Natu } 2509d665d229SNeel Natu 2510d665d229SNeel Natu for (idx = 0; idx < nused; idx++) { 25119b1aa8d6SNeel Natu hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa, 25129b1aa8d6SNeel Natu copyinfo[idx].len, prot, &cookie); 2513d665d229SNeel Natu if (hva == NULL) 2514d665d229SNeel Natu break; 2515d665d229SNeel Natu copyinfo[idx].hva = hva; 2516d665d229SNeel Natu copyinfo[idx].cookie = cookie; 2517d665d229SNeel Natu } 2518d665d229SNeel Natu 2519d665d229SNeel Natu if (idx != nused) { 2520d665d229SNeel Natu vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); 25219c4d5478SNeel Natu return (EFAULT); 2522d665d229SNeel Natu } else { 25239c4d5478SNeel Natu *fault = 0; 2524d665d229SNeel Natu return (0); 2525d665d229SNeel Natu } 2526d665d229SNeel Natu } 2527d665d229SNeel Natu 2528d665d229SNeel Natu void 2529d665d229SNeel Natu vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, 2530d665d229SNeel Natu size_t len) 2531d665d229SNeel Natu { 2532d665d229SNeel Natu char *dst; 2533d665d229SNeel Natu int idx; 2534d665d229SNeel Natu 2535d665d229SNeel Natu dst = kaddr; 2536d665d229SNeel Natu idx = 0; 2537d665d229SNeel Natu while (len > 0) { 2538d665d229SNeel Natu bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); 2539d665d229SNeel Natu len -= copyinfo[idx].len; 2540d665d229SNeel Natu dst += copyinfo[idx].len; 2541d665d229SNeel Natu idx++; 2542d665d229SNeel Natu } 2543d665d229SNeel Natu } 2544d665d229SNeel Natu 2545d665d229SNeel Natu void 2546d665d229SNeel Natu vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, 2547d665d229SNeel Natu struct vm_copyinfo *copyinfo, size_t len) 2548d665d229SNeel Natu { 2549d665d229SNeel Natu const char *src; 2550d665d229SNeel Natu int idx; 2551d665d229SNeel Natu 2552d665d229SNeel Natu src = kaddr; 2553d665d229SNeel Natu idx = 0; 2554d665d229SNeel Natu while (len > 0) { 2555d665d229SNeel Natu bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); 2556d665d229SNeel Natu len -= copyinfo[idx].len; 2557d665d229SNeel Natu src += copyinfo[idx].len; 2558d665d229SNeel Natu idx++; 2559d665d229SNeel Natu } 2560d665d229SNeel Natu } 2561cf1d80d8SPeter Grehan 2562cf1d80d8SPeter Grehan /* 2563cf1d80d8SPeter Grehan * Return the amount of in-use and wired memory for the VM. Since 2564cf1d80d8SPeter Grehan * these are global stats, only return the values with for vCPU 0 2565cf1d80d8SPeter Grehan */ 2566cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_RESIDENT); 2567cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_WIRED); 2568cf1d80d8SPeter Grehan 2569cf1d80d8SPeter Grehan static void 2570cf1d80d8SPeter Grehan vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2571cf1d80d8SPeter Grehan { 2572cf1d80d8SPeter Grehan 2573cf1d80d8SPeter Grehan if (vcpu == 0) { 2574cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, 2575cf1d80d8SPeter Grehan PAGE_SIZE * vmspace_resident_count(vm->vmspace)); 2576cf1d80d8SPeter Grehan } 2577cf1d80d8SPeter Grehan } 2578cf1d80d8SPeter Grehan 2579cf1d80d8SPeter Grehan static void 2580cf1d80d8SPeter Grehan vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2581cf1d80d8SPeter Grehan { 2582cf1d80d8SPeter Grehan 2583cf1d80d8SPeter Grehan if (vcpu == 0) { 2584cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, 2585cf1d80d8SPeter Grehan PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); 2586cf1d80d8SPeter Grehan } 2587cf1d80d8SPeter Grehan } 2588cf1d80d8SPeter Grehan 2589cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); 2590cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); 2591