1366f6083SPeter Grehan /*- 2366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc. 3366f6083SPeter Grehan * All rights reserved. 4366f6083SPeter Grehan * 5366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without 6366f6083SPeter Grehan * modification, are permitted provided that the following conditions 7366f6083SPeter Grehan * are met: 8366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright 9366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer. 10366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 11366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the 12366f6083SPeter Grehan * documentation and/or other materials provided with the distribution. 13366f6083SPeter Grehan * 14366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24366f6083SPeter Grehan * SUCH DAMAGE. 25366f6083SPeter Grehan * 26366f6083SPeter Grehan * $FreeBSD$ 27366f6083SPeter Grehan */ 28366f6083SPeter Grehan 29366f6083SPeter Grehan #include <sys/cdefs.h> 30366f6083SPeter Grehan __FBSDID("$FreeBSD$"); 31366f6083SPeter Grehan 32366f6083SPeter Grehan #include <sys/param.h> 3338f1b189SPeter Grehan #include <sys/systm.h> 34366f6083SPeter Grehan #include <sys/kernel.h> 35366f6083SPeter Grehan #include <sys/module.h> 36366f6083SPeter Grehan #include <sys/sysctl.h> 37366f6083SPeter Grehan #include <sys/malloc.h> 38366f6083SPeter Grehan #include <sys/pcpu.h> 39366f6083SPeter Grehan #include <sys/lock.h> 40366f6083SPeter Grehan #include <sys/mutex.h> 41366f6083SPeter Grehan #include <sys/proc.h> 42318224bbSNeel Natu #include <sys/rwlock.h> 43366f6083SPeter Grehan #include <sys/sched.h> 44366f6083SPeter Grehan #include <sys/smp.h> 45366f6083SPeter Grehan #include <sys/systm.h> 46366f6083SPeter Grehan 47366f6083SPeter Grehan #include <vm/vm.h> 48318224bbSNeel Natu #include <vm/vm_object.h> 49318224bbSNeel Natu #include <vm/vm_page.h> 50318224bbSNeel Natu #include <vm/pmap.h> 51318224bbSNeel Natu #include <vm/vm_map.h> 52318224bbSNeel Natu #include <vm/vm_extern.h> 53318224bbSNeel Natu #include <vm/vm_param.h> 54366f6083SPeter Grehan 5563e62d39SJohn Baldwin #include <machine/cpu.h> 56366f6083SPeter Grehan #include <machine/vm.h> 57366f6083SPeter Grehan #include <machine/pcb.h> 5875dd3366SNeel Natu #include <machine/smp.h> 591c052192SNeel Natu #include <x86/psl.h> 6034a6b2d6SJohn Baldwin #include <x86/apicreg.h> 61318224bbSNeel Natu #include <machine/vmparam.h> 62366f6083SPeter Grehan 63366f6083SPeter Grehan #include <machine/vmm.h> 64565bbb86SNeel Natu #include <machine/vmm_dev.h> 65e813a873SNeel Natu #include <machine/vmm_instruction_emul.h> 66565bbb86SNeel Natu 67d17b5104SNeel Natu #include "vmm_ioport.h" 68318224bbSNeel Natu #include "vmm_ktr.h" 69b01c2033SNeel Natu #include "vmm_host.h" 70366f6083SPeter Grehan #include "vmm_mem.h" 71366f6083SPeter Grehan #include "vmm_util.h" 72762fd208STycho Nightingale #include "vatpic.h" 73e883c9bbSTycho Nightingale #include "vatpit.h" 7408e3ff32SNeel Natu #include "vhpet.h" 75565bbb86SNeel Natu #include "vioapic.h" 76366f6083SPeter Grehan #include "vlapic.h" 77160ef77aSNeel Natu #include "vpmtmr.h" 780dafa5cdSNeel Natu #include "vrtc.h" 79366f6083SPeter Grehan #include "vmm_stat.h" 80f76fc5d4SNeel Natu #include "vmm_lapic.h" 81366f6083SPeter Grehan 82366f6083SPeter Grehan #include "io/ppt.h" 83366f6083SPeter Grehan #include "io/iommu.h" 84366f6083SPeter Grehan 85366f6083SPeter Grehan struct vlapic; 86366f6083SPeter Grehan 875fcf252fSNeel Natu /* 885fcf252fSNeel Natu * Initialization: 895fcf252fSNeel Natu * (a) allocated when vcpu is created 905fcf252fSNeel Natu * (i) initialized when vcpu is created and when it is reinitialized 915fcf252fSNeel Natu * (o) initialized the first time the vcpu is created 925fcf252fSNeel Natu * (x) initialized before use 935fcf252fSNeel Natu */ 94366f6083SPeter Grehan struct vcpu { 955fcf252fSNeel Natu struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ 965fcf252fSNeel Natu enum vcpu_state state; /* (o) vcpu state */ 975fcf252fSNeel Natu int hostcpu; /* (o) vcpu's host cpu */ 98*248e6799SNeel Natu int reqidle; /* (i) request vcpu to idle */ 995fcf252fSNeel Natu struct vlapic *vlapic; /* (i) APIC device model */ 1005fcf252fSNeel Natu enum x2apic_state x2apic_state; /* (i) APIC mode */ 101091d4532SNeel Natu uint64_t exitintinfo; /* (i) events pending at VM exit */ 1025fcf252fSNeel Natu int nmi_pending; /* (i) NMI pending */ 1035fcf252fSNeel Natu int extint_pending; /* (i) INTR pending */ 1045fcf252fSNeel Natu int exception_pending; /* (i) exception pending */ 105c9c75df4SNeel Natu int exc_vector; /* (x) exception collateral */ 106c9c75df4SNeel Natu int exc_errcode_valid; 107c9c75df4SNeel Natu uint32_t exc_errcode; 1085fcf252fSNeel Natu struct savefpu *guestfpu; /* (a,i) guest fpu state */ 1095fcf252fSNeel Natu uint64_t guest_xcr0; /* (i) guest %xcr0 register */ 1105fcf252fSNeel Natu void *stats; /* (a,i) statistics */ 1115fcf252fSNeel Natu struct vm_exit exitinfo; /* (x) exit reason and collateral */ 112d087a399SNeel Natu uint64_t nextrip; /* (x) next instruction to execute */ 113366f6083SPeter Grehan }; 114366f6083SPeter Grehan 1155fcf252fSNeel Natu #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 116f76fc5d4SNeel Natu #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 117f76fc5d4SNeel Natu #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 118f76fc5d4SNeel Natu #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 119318224bbSNeel Natu #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 12075dd3366SNeel Natu 121318224bbSNeel Natu struct mem_seg { 122318224bbSNeel Natu vm_paddr_t gpa; 123318224bbSNeel Natu size_t len; 124318224bbSNeel Natu boolean_t wired; 125318224bbSNeel Natu vm_object_t object; 126318224bbSNeel Natu }; 127366f6083SPeter Grehan #define VM_MAX_MEMORY_SEGMENTS 2 128366f6083SPeter Grehan 129366f6083SPeter Grehan /* 1305fcf252fSNeel Natu * Initialization: 1315fcf252fSNeel Natu * (o) initialized the first time the VM is created 1325fcf252fSNeel Natu * (i) initialized when VM is created and when it is reinitialized 1335fcf252fSNeel Natu * (x) initialized before use 134366f6083SPeter Grehan */ 1355fcf252fSNeel Natu struct vm { 1365fcf252fSNeel Natu void *cookie; /* (i) cpu-specific data */ 1375fcf252fSNeel Natu void *iommu; /* (x) iommu-specific data */ 1385fcf252fSNeel Natu struct vhpet *vhpet; /* (i) virtual HPET */ 1395fcf252fSNeel Natu struct vioapic *vioapic; /* (i) virtual ioapic */ 1405fcf252fSNeel Natu struct vatpic *vatpic; /* (i) virtual atpic */ 1415fcf252fSNeel Natu struct vatpit *vatpit; /* (i) virtual atpit */ 142160ef77aSNeel Natu struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ 1430dafa5cdSNeel Natu struct vrtc *vrtc; /* (o) virtual RTC */ 1445fcf252fSNeel Natu volatile cpuset_t active_cpus; /* (i) active vcpus */ 1455fcf252fSNeel Natu int suspend; /* (i) stop VM execution */ 1465fcf252fSNeel Natu volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 1475fcf252fSNeel Natu volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 1485fcf252fSNeel Natu cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ 1495fcf252fSNeel Natu cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ 1505fcf252fSNeel Natu void *rendezvous_arg; /* (x) rendezvous func/arg */ 1515b8a8cd1SNeel Natu vm_rendezvous_func_t rendezvous_func; 1525fcf252fSNeel Natu struct mtx rendezvous_mtx; /* (o) rendezvous lock */ 1535fcf252fSNeel Natu int num_mem_segs; /* (o) guest memory segments */ 1545fcf252fSNeel Natu struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; 1555fcf252fSNeel Natu struct vmspace *vmspace; /* (o) guest's address space */ 1565fcf252fSNeel Natu char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 1575fcf252fSNeel Natu struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ 158366f6083SPeter Grehan }; 159366f6083SPeter Grehan 160d5408b1dSNeel Natu static int vmm_initialized; 161d5408b1dSNeel Natu 162366f6083SPeter Grehan static struct vmm_ops *ops; 163add611fdSNeel Natu #define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) 164366f6083SPeter Grehan #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 16563e62d39SJohn Baldwin #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 166366f6083SPeter Grehan 167318224bbSNeel Natu #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 168*248e6799SNeel Natu #define VMRUN(vmi, vcpu, rip, pmap, evinfo) \ 169*248e6799SNeel Natu (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO) 170366f6083SPeter Grehan #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 171318224bbSNeel Natu #define VMSPACE_ALLOC(min, max) \ 172318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 173318224bbSNeel Natu #define VMSPACE_FREE(vmspace) \ 174318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 175366f6083SPeter Grehan #define VMGETREG(vmi, vcpu, num, retval) \ 176366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 177366f6083SPeter Grehan #define VMSETREG(vmi, vcpu, num, val) \ 178366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 179366f6083SPeter Grehan #define VMGETDESC(vmi, vcpu, num, desc) \ 180366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 181366f6083SPeter Grehan #define VMSETDESC(vmi, vcpu, num, desc) \ 182366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 183366f6083SPeter Grehan #define VMGETCAP(vmi, vcpu, num, retval) \ 184366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 185366f6083SPeter Grehan #define VMSETCAP(vmi, vcpu, num, val) \ 186366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 187de5ea6b6SNeel Natu #define VLAPIC_INIT(vmi, vcpu) \ 188de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) 189de5ea6b6SNeel Natu #define VLAPIC_CLEANUP(vmi, vlapic) \ 190de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) 191366f6083SPeter Grehan 192014a52f3SNeel Natu #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 193014a52f3SNeel Natu #define fpu_stop_emulating() clts() 194366f6083SPeter Grehan 195366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm"); 196366f6083SPeter Grehan 197366f6083SPeter Grehan /* statistics */ 19861592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 199366f6083SPeter Grehan 200add611fdSNeel Natu SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 201add611fdSNeel Natu 202055fc2cbSNeel Natu /* 203055fc2cbSNeel Natu * Halt the guest if all vcpus are executing a HLT instruction with 204055fc2cbSNeel Natu * interrupts disabled. 205055fc2cbSNeel Natu */ 206055fc2cbSNeel Natu static int halt_detection_enabled = 1; 207055fc2cbSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 208055fc2cbSNeel Natu &halt_detection_enabled, 0, 209055fc2cbSNeel Natu "Halt VM if all vcpus execute HLT with interrupts disabled"); 210055fc2cbSNeel Natu 211add611fdSNeel Natu static int vmm_ipinum; 212add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 213add611fdSNeel Natu "IPI vector used for vcpu notifications"); 214add611fdSNeel Natu 215b0538143SNeel Natu static int trace_guest_exceptions; 216b0538143SNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, 217b0538143SNeel Natu &trace_guest_exceptions, 0, 218b0538143SNeel Natu "Trap into hypervisor on all guest exceptions and reflect them back"); 219b0538143SNeel Natu 220a15f820aSRyan Stone static int vmm_force_iommu = 0; 221a15f820aSRyan Stone TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu); 222a15f820aSRyan Stone SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0, 223a15f820aSRyan Stone "Force use of I/O MMU even if no passthrough devices were found."); 224a15f820aSRyan Stone 225*248e6799SNeel Natu static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); 226*248e6799SNeel Natu 227*248e6799SNeel Natu #ifdef KTR 228*248e6799SNeel Natu static const char * 229*248e6799SNeel Natu vcpu_state2str(enum vcpu_state state) 230*248e6799SNeel Natu { 231*248e6799SNeel Natu 232*248e6799SNeel Natu switch (state) { 233*248e6799SNeel Natu case VCPU_IDLE: 234*248e6799SNeel Natu return ("idle"); 235*248e6799SNeel Natu case VCPU_FROZEN: 236*248e6799SNeel Natu return ("frozen"); 237*248e6799SNeel Natu case VCPU_RUNNING: 238*248e6799SNeel Natu return ("running"); 239*248e6799SNeel Natu case VCPU_SLEEPING: 240*248e6799SNeel Natu return ("sleeping"); 241*248e6799SNeel Natu default: 242*248e6799SNeel Natu return ("unknown"); 243*248e6799SNeel Natu } 244*248e6799SNeel Natu } 245*248e6799SNeel Natu #endif 246*248e6799SNeel Natu 247366f6083SPeter Grehan static void 2485fcf252fSNeel Natu vcpu_cleanup(struct vm *vm, int i, bool destroy) 249366f6083SPeter Grehan { 250de5ea6b6SNeel Natu struct vcpu *vcpu = &vm->vcpu[i]; 251de5ea6b6SNeel Natu 252de5ea6b6SNeel Natu VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); 2535fcf252fSNeel Natu if (destroy) { 254366f6083SPeter Grehan vmm_stat_free(vcpu->stats); 25538f1b189SPeter Grehan fpu_save_area_free(vcpu->guestfpu); 256366f6083SPeter Grehan } 2575fcf252fSNeel Natu } 258366f6083SPeter Grehan 259366f6083SPeter Grehan static void 2605fcf252fSNeel Natu vcpu_init(struct vm *vm, int vcpu_id, bool create) 261366f6083SPeter Grehan { 262366f6083SPeter Grehan struct vcpu *vcpu; 263366f6083SPeter Grehan 2645fcf252fSNeel Natu KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU, 2655fcf252fSNeel Natu ("vcpu_init: invalid vcpu %d", vcpu_id)); 2665fcf252fSNeel Natu 267366f6083SPeter Grehan vcpu = &vm->vcpu[vcpu_id]; 268366f6083SPeter Grehan 2695fcf252fSNeel Natu if (create) { 2705fcf252fSNeel Natu KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " 2715fcf252fSNeel Natu "initialized", vcpu_id)); 27275dd3366SNeel Natu vcpu_lock_init(vcpu); 2735fcf252fSNeel Natu vcpu->state = VCPU_IDLE; 27475dd3366SNeel Natu vcpu->hostcpu = NOCPU; 2755fcf252fSNeel Natu vcpu->guestfpu = fpu_save_area_alloc(); 2765fcf252fSNeel Natu vcpu->stats = vmm_stat_alloc(); 2775fcf252fSNeel Natu } 2785fcf252fSNeel Natu 279de5ea6b6SNeel Natu vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); 28052e5c8a2SNeel Natu vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); 281*248e6799SNeel Natu vcpu->reqidle = 0; 282091d4532SNeel Natu vcpu->exitintinfo = 0; 2835fcf252fSNeel Natu vcpu->nmi_pending = 0; 2845fcf252fSNeel Natu vcpu->extint_pending = 0; 2855fcf252fSNeel Natu vcpu->exception_pending = 0; 286abb023fbSJohn Baldwin vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 28738f1b189SPeter Grehan fpu_save_area_reset(vcpu->guestfpu); 2885fcf252fSNeel Natu vmm_stat_init(vcpu->stats); 289366f6083SPeter Grehan } 290366f6083SPeter Grehan 291b0538143SNeel Natu int 292b0538143SNeel Natu vcpu_trace_exceptions(struct vm *vm, int vcpuid) 293b0538143SNeel Natu { 294b0538143SNeel Natu 295b0538143SNeel Natu return (trace_guest_exceptions); 296b0538143SNeel Natu } 297b0538143SNeel Natu 29898ed632cSNeel Natu struct vm_exit * 29998ed632cSNeel Natu vm_exitinfo(struct vm *vm, int cpuid) 30098ed632cSNeel Natu { 30198ed632cSNeel Natu struct vcpu *vcpu; 30298ed632cSNeel Natu 30398ed632cSNeel Natu if (cpuid < 0 || cpuid >= VM_MAXCPU) 30498ed632cSNeel Natu panic("vm_exitinfo: invalid cpuid %d", cpuid); 30598ed632cSNeel Natu 30698ed632cSNeel Natu vcpu = &vm->vcpu[cpuid]; 30798ed632cSNeel Natu 30898ed632cSNeel Natu return (&vcpu->exitinfo); 30998ed632cSNeel Natu } 31098ed632cSNeel Natu 31163e62d39SJohn Baldwin static void 31263e62d39SJohn Baldwin vmm_resume(void) 31363e62d39SJohn Baldwin { 31463e62d39SJohn Baldwin VMM_RESUME(); 31563e62d39SJohn Baldwin } 31663e62d39SJohn Baldwin 317366f6083SPeter Grehan static int 318366f6083SPeter Grehan vmm_init(void) 319366f6083SPeter Grehan { 320366f6083SPeter Grehan int error; 321366f6083SPeter Grehan 322b01c2033SNeel Natu vmm_host_state_init(); 323add611fdSNeel Natu 32418a2b08eSNeel Natu vmm_ipinum = lapic_ipi_alloc(&IDTVEC(justreturn)); 32518a2b08eSNeel Natu if (vmm_ipinum < 0) 326add611fdSNeel Natu vmm_ipinum = IPI_AST; 327366f6083SPeter Grehan 328366f6083SPeter Grehan error = vmm_mem_init(); 329366f6083SPeter Grehan if (error) 330366f6083SPeter Grehan return (error); 331366f6083SPeter Grehan 332366f6083SPeter Grehan if (vmm_is_intel()) 333366f6083SPeter Grehan ops = &vmm_ops_intel; 334366f6083SPeter Grehan else if (vmm_is_amd()) 335366f6083SPeter Grehan ops = &vmm_ops_amd; 336366f6083SPeter Grehan else 337366f6083SPeter Grehan return (ENXIO); 338366f6083SPeter Grehan 33963e62d39SJohn Baldwin vmm_resume_p = vmm_resume; 340366f6083SPeter Grehan 341add611fdSNeel Natu return (VMM_INIT(vmm_ipinum)); 342366f6083SPeter Grehan } 343366f6083SPeter Grehan 344366f6083SPeter Grehan static int 345366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg) 346366f6083SPeter Grehan { 347366f6083SPeter Grehan int error; 348366f6083SPeter Grehan 349366f6083SPeter Grehan switch (what) { 350366f6083SPeter Grehan case MOD_LOAD: 351366f6083SPeter Grehan vmmdev_init(); 352a15f820aSRyan Stone if (vmm_force_iommu || ppt_avail_devices() > 0) 353366f6083SPeter Grehan iommu_init(); 354366f6083SPeter Grehan error = vmm_init(); 355d5408b1dSNeel Natu if (error == 0) 356d5408b1dSNeel Natu vmm_initialized = 1; 357366f6083SPeter Grehan break; 358366f6083SPeter Grehan case MOD_UNLOAD: 359cdc5b9e7SNeel Natu error = vmmdev_cleanup(); 360cdc5b9e7SNeel Natu if (error == 0) { 36163e62d39SJohn Baldwin vmm_resume_p = NULL; 362366f6083SPeter Grehan iommu_cleanup(); 363add611fdSNeel Natu if (vmm_ipinum != IPI_AST) 36418a2b08eSNeel Natu lapic_ipi_free(vmm_ipinum); 365366f6083SPeter Grehan error = VMM_CLEANUP(); 36681ef6611SPeter Grehan /* 36781ef6611SPeter Grehan * Something bad happened - prevent new 36881ef6611SPeter Grehan * VMs from being created 36981ef6611SPeter Grehan */ 37081ef6611SPeter Grehan if (error) 371d5408b1dSNeel Natu vmm_initialized = 0; 37281ef6611SPeter Grehan } 373366f6083SPeter Grehan break; 374366f6083SPeter Grehan default: 375366f6083SPeter Grehan error = 0; 376366f6083SPeter Grehan break; 377366f6083SPeter Grehan } 378366f6083SPeter Grehan return (error); 379366f6083SPeter Grehan } 380366f6083SPeter Grehan 381366f6083SPeter Grehan static moduledata_t vmm_kmod = { 382366f6083SPeter Grehan "vmm", 383366f6083SPeter Grehan vmm_handler, 384366f6083SPeter Grehan NULL 385366f6083SPeter Grehan }; 386366f6083SPeter Grehan 387366f6083SPeter Grehan /* 388e3f0800bSNeel Natu * vmm initialization has the following dependencies: 389e3f0800bSNeel Natu * 390e3f0800bSNeel Natu * - iommu initialization must happen after the pci passthru driver has had 391e3f0800bSNeel Natu * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 392e3f0800bSNeel Natu * 393e3f0800bSNeel Natu * - VT-x initialization requires smp_rendezvous() and therefore must happen 394e3f0800bSNeel Natu * after SMP is fully functional (after SI_SUB_SMP). 395366f6083SPeter Grehan */ 396e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 397366f6083SPeter Grehan MODULE_VERSION(vmm, 1); 398366f6083SPeter Grehan 3995fcf252fSNeel Natu static void 4005fcf252fSNeel Natu vm_init(struct vm *vm, bool create) 4015fcf252fSNeel Natu { 4025fcf252fSNeel Natu int i; 4035fcf252fSNeel Natu 4045fcf252fSNeel Natu vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace)); 4055fcf252fSNeel Natu vm->iommu = NULL; 4065fcf252fSNeel Natu vm->vioapic = vioapic_init(vm); 4075fcf252fSNeel Natu vm->vhpet = vhpet_init(vm); 4085fcf252fSNeel Natu vm->vatpic = vatpic_init(vm); 4095fcf252fSNeel Natu vm->vatpit = vatpit_init(vm); 410160ef77aSNeel Natu vm->vpmtmr = vpmtmr_init(vm); 4110dafa5cdSNeel Natu if (create) 4120dafa5cdSNeel Natu vm->vrtc = vrtc_init(vm); 4135fcf252fSNeel Natu 4145fcf252fSNeel Natu CPU_ZERO(&vm->active_cpus); 4155fcf252fSNeel Natu 4165fcf252fSNeel Natu vm->suspend = 0; 4175fcf252fSNeel Natu CPU_ZERO(&vm->suspended_cpus); 4185fcf252fSNeel Natu 4195fcf252fSNeel Natu for (i = 0; i < VM_MAXCPU; i++) 4205fcf252fSNeel Natu vcpu_init(vm, i, create); 4215fcf252fSNeel Natu } 4225fcf252fSNeel Natu 423d5408b1dSNeel Natu int 424d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm) 425366f6083SPeter Grehan { 426366f6083SPeter Grehan struct vm *vm; 427318224bbSNeel Natu struct vmspace *vmspace; 428366f6083SPeter Grehan 429d5408b1dSNeel Natu /* 430d5408b1dSNeel Natu * If vmm.ko could not be successfully initialized then don't attempt 431d5408b1dSNeel Natu * to create the virtual machine. 432d5408b1dSNeel Natu */ 433d5408b1dSNeel Natu if (!vmm_initialized) 434d5408b1dSNeel Natu return (ENXIO); 435d5408b1dSNeel Natu 436366f6083SPeter Grehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 437d5408b1dSNeel Natu return (EINVAL); 438366f6083SPeter Grehan 439526c8885SPeter Grehan vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS); 440318224bbSNeel Natu if (vmspace == NULL) 441318224bbSNeel Natu return (ENOMEM); 442318224bbSNeel Natu 443366f6083SPeter Grehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 444366f6083SPeter Grehan strcpy(vm->name, name); 4455fcf252fSNeel Natu vm->num_mem_segs = 0; 44688c4b8d1SNeel Natu vm->vmspace = vmspace; 4475b8a8cd1SNeel Natu mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 448366f6083SPeter Grehan 4495fcf252fSNeel Natu vm_init(vm, true); 450366f6083SPeter Grehan 451d5408b1dSNeel Natu *retvm = vm; 452d5408b1dSNeel Natu return (0); 453366f6083SPeter Grehan } 454366f6083SPeter Grehan 455f7d51510SNeel Natu static void 456318224bbSNeel Natu vm_free_mem_seg(struct vm *vm, struct mem_seg *seg) 457f7d51510SNeel Natu { 4587ce04d0aSNeel Natu 459318224bbSNeel Natu if (seg->object != NULL) 460318224bbSNeel Natu vmm_mem_free(vm->vmspace, seg->gpa, seg->len); 461f7d51510SNeel Natu 462318224bbSNeel Natu bzero(seg, sizeof(*seg)); 463f7d51510SNeel Natu } 464f7d51510SNeel Natu 4655fcf252fSNeel Natu static void 4665fcf252fSNeel Natu vm_cleanup(struct vm *vm, bool destroy) 467366f6083SPeter Grehan { 468366f6083SPeter Grehan int i; 469366f6083SPeter Grehan 470366f6083SPeter Grehan ppt_unassign_all(vm); 471366f6083SPeter Grehan 472318224bbSNeel Natu if (vm->iommu != NULL) 473318224bbSNeel Natu iommu_destroy_domain(vm->iommu); 474318224bbSNeel Natu 4750dafa5cdSNeel Natu if (destroy) 4760dafa5cdSNeel Natu vrtc_cleanup(vm->vrtc); 4770dafa5cdSNeel Natu else 4780dafa5cdSNeel Natu vrtc_reset(vm->vrtc); 479160ef77aSNeel Natu vpmtmr_cleanup(vm->vpmtmr); 480e883c9bbSTycho Nightingale vatpit_cleanup(vm->vatpit); 48108e3ff32SNeel Natu vhpet_cleanup(vm->vhpet); 482762fd208STycho Nightingale vatpic_cleanup(vm->vatpic); 48308e3ff32SNeel Natu vioapic_cleanup(vm->vioapic); 48408e3ff32SNeel Natu 4855fcf252fSNeel Natu for (i = 0; i < VM_MAXCPU; i++) 4865fcf252fSNeel Natu vcpu_cleanup(vm, i, destroy); 4875fcf252fSNeel Natu 4885fcf252fSNeel Natu VMCLEANUP(vm->cookie); 4895fcf252fSNeel Natu 4905fcf252fSNeel Natu if (destroy) { 491366f6083SPeter Grehan for (i = 0; i < vm->num_mem_segs; i++) 492f7d51510SNeel Natu vm_free_mem_seg(vm, &vm->mem_segs[i]); 493f7d51510SNeel Natu 494f7d51510SNeel Natu vm->num_mem_segs = 0; 495366f6083SPeter Grehan 496318224bbSNeel Natu VMSPACE_FREE(vm->vmspace); 4975fcf252fSNeel Natu vm->vmspace = NULL; 4985fcf252fSNeel Natu } 4995fcf252fSNeel Natu } 500366f6083SPeter Grehan 5015fcf252fSNeel Natu void 5025fcf252fSNeel Natu vm_destroy(struct vm *vm) 5035fcf252fSNeel Natu { 5045fcf252fSNeel Natu vm_cleanup(vm, true); 505366f6083SPeter Grehan free(vm, M_VM); 506366f6083SPeter Grehan } 507366f6083SPeter Grehan 5085fcf252fSNeel Natu int 5095fcf252fSNeel Natu vm_reinit(struct vm *vm) 5105fcf252fSNeel Natu { 5115fcf252fSNeel Natu int error; 5125fcf252fSNeel Natu 5135fcf252fSNeel Natu /* 5145fcf252fSNeel Natu * A virtual machine can be reset only if all vcpus are suspended. 5155fcf252fSNeel Natu */ 5165fcf252fSNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 5175fcf252fSNeel Natu vm_cleanup(vm, false); 5185fcf252fSNeel Natu vm_init(vm, false); 5195fcf252fSNeel Natu error = 0; 5205fcf252fSNeel Natu } else { 5215fcf252fSNeel Natu error = EBUSY; 5225fcf252fSNeel Natu } 5235fcf252fSNeel Natu 5245fcf252fSNeel Natu return (error); 5255fcf252fSNeel Natu } 5265fcf252fSNeel Natu 527366f6083SPeter Grehan const char * 528366f6083SPeter Grehan vm_name(struct vm *vm) 529366f6083SPeter Grehan { 530366f6083SPeter Grehan return (vm->name); 531366f6083SPeter Grehan } 532366f6083SPeter Grehan 533366f6083SPeter Grehan int 534366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 535366f6083SPeter Grehan { 536318224bbSNeel Natu vm_object_t obj; 537366f6083SPeter Grehan 538318224bbSNeel Natu if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 539318224bbSNeel Natu return (ENOMEM); 540318224bbSNeel Natu else 541318224bbSNeel Natu return (0); 542366f6083SPeter Grehan } 543366f6083SPeter Grehan 544366f6083SPeter Grehan int 545366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 546366f6083SPeter Grehan { 547366f6083SPeter Grehan 548318224bbSNeel Natu vmm_mmio_free(vm->vmspace, gpa, len); 549318224bbSNeel Natu return (0); 550366f6083SPeter Grehan } 551366f6083SPeter Grehan 552318224bbSNeel Natu boolean_t 553318224bbSNeel Natu vm_mem_allocated(struct vm *vm, vm_paddr_t gpa) 554366f6083SPeter Grehan { 555341f19c9SNeel Natu int i; 556341f19c9SNeel Natu vm_paddr_t gpabase, gpalimit; 557341f19c9SNeel Natu 558341f19c9SNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 559341f19c9SNeel Natu gpabase = vm->mem_segs[i].gpa; 560341f19c9SNeel Natu gpalimit = gpabase + vm->mem_segs[i].len; 561341f19c9SNeel Natu if (gpa >= gpabase && gpa < gpalimit) 562318224bbSNeel Natu return (TRUE); /* 'gpa' is regular memory */ 563341f19c9SNeel Natu } 564341f19c9SNeel Natu 565318224bbSNeel Natu if (ppt_is_mmio(vm, gpa)) 566318224bbSNeel Natu return (TRUE); /* 'gpa' is pci passthru mmio */ 567318224bbSNeel Natu 568318224bbSNeel Natu return (FALSE); 569341f19c9SNeel Natu } 570341f19c9SNeel Natu 571341f19c9SNeel Natu int 572341f19c9SNeel Natu vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 573341f19c9SNeel Natu { 574318224bbSNeel Natu int available, allocated; 575318224bbSNeel Natu struct mem_seg *seg; 576318224bbSNeel Natu vm_object_t object; 577318224bbSNeel Natu vm_paddr_t g; 578366f6083SPeter Grehan 579341f19c9SNeel Natu if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 580341f19c9SNeel Natu return (EINVAL); 581341f19c9SNeel Natu 582341f19c9SNeel Natu available = allocated = 0; 583341f19c9SNeel Natu g = gpa; 584341f19c9SNeel Natu while (g < gpa + len) { 585318224bbSNeel Natu if (vm_mem_allocated(vm, g)) 586341f19c9SNeel Natu allocated++; 587318224bbSNeel Natu else 588318224bbSNeel Natu available++; 589341f19c9SNeel Natu 590341f19c9SNeel Natu g += PAGE_SIZE; 591341f19c9SNeel Natu } 592341f19c9SNeel Natu 593366f6083SPeter Grehan /* 594341f19c9SNeel Natu * If there are some allocated and some available pages in the address 595341f19c9SNeel Natu * range then it is an error. 596366f6083SPeter Grehan */ 597341f19c9SNeel Natu if (allocated && available) 598341f19c9SNeel Natu return (EINVAL); 599341f19c9SNeel Natu 600341f19c9SNeel Natu /* 601341f19c9SNeel Natu * If the entire address range being requested has already been 602341f19c9SNeel Natu * allocated then there isn't anything more to do. 603341f19c9SNeel Natu */ 604341f19c9SNeel Natu if (allocated && available == 0) 605341f19c9SNeel Natu return (0); 606366f6083SPeter Grehan 607366f6083SPeter Grehan if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 608366f6083SPeter Grehan return (E2BIG); 609366f6083SPeter Grehan 610f7d51510SNeel Natu seg = &vm->mem_segs[vm->num_mem_segs]; 611366f6083SPeter Grehan 612318224bbSNeel Natu if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) 613318224bbSNeel Natu return (ENOMEM); 614318224bbSNeel Natu 615f7d51510SNeel Natu seg->gpa = gpa; 616318224bbSNeel Natu seg->len = len; 617318224bbSNeel Natu seg->object = object; 618318224bbSNeel Natu seg->wired = FALSE; 6197ce04d0aSNeel Natu 620366f6083SPeter Grehan vm->num_mem_segs++; 621341f19c9SNeel Natu 622366f6083SPeter Grehan return (0); 623366f6083SPeter Grehan } 624366f6083SPeter Grehan 625477867a0SNeel Natu static vm_paddr_t 626477867a0SNeel Natu vm_maxmem(struct vm *vm) 627477867a0SNeel Natu { 628477867a0SNeel Natu int i; 629477867a0SNeel Natu vm_paddr_t gpa, maxmem; 630477867a0SNeel Natu 631477867a0SNeel Natu maxmem = 0; 632477867a0SNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 633477867a0SNeel Natu gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len; 634477867a0SNeel Natu if (gpa > maxmem) 635477867a0SNeel Natu maxmem = gpa; 636477867a0SNeel Natu } 637477867a0SNeel Natu return (maxmem); 638477867a0SNeel Natu } 639477867a0SNeel Natu 640318224bbSNeel Natu static void 641318224bbSNeel Natu vm_gpa_unwire(struct vm *vm) 642366f6083SPeter Grehan { 643318224bbSNeel Natu int i, rv; 644318224bbSNeel Natu struct mem_seg *seg; 6454db4fb2cSNeel Natu 646318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 647318224bbSNeel Natu seg = &vm->mem_segs[i]; 648318224bbSNeel Natu if (!seg->wired) 649318224bbSNeel Natu continue; 650366f6083SPeter Grehan 651318224bbSNeel Natu rv = vm_map_unwire(&vm->vmspace->vm_map, 652318224bbSNeel Natu seg->gpa, seg->gpa + seg->len, 653318224bbSNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 654318224bbSNeel Natu KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " 655318224bbSNeel Natu "%#lx/%ld could not be unwired: %d", 656318224bbSNeel Natu vm_name(vm), seg->gpa, seg->len, rv)); 657318224bbSNeel Natu 658318224bbSNeel Natu seg->wired = FALSE; 659318224bbSNeel Natu } 660318224bbSNeel Natu } 661318224bbSNeel Natu 662318224bbSNeel Natu static int 663318224bbSNeel Natu vm_gpa_wire(struct vm *vm) 664318224bbSNeel Natu { 665318224bbSNeel Natu int i, rv; 666318224bbSNeel Natu struct mem_seg *seg; 667318224bbSNeel Natu 668318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 669318224bbSNeel Natu seg = &vm->mem_segs[i]; 670318224bbSNeel Natu if (seg->wired) 671318224bbSNeel Natu continue; 672318224bbSNeel Natu 673318224bbSNeel Natu /* XXX rlimits? */ 674318224bbSNeel Natu rv = vm_map_wire(&vm->vmspace->vm_map, 675318224bbSNeel Natu seg->gpa, seg->gpa + seg->len, 676318224bbSNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 677318224bbSNeel Natu if (rv != KERN_SUCCESS) 678318224bbSNeel Natu break; 679318224bbSNeel Natu 680318224bbSNeel Natu seg->wired = TRUE; 681318224bbSNeel Natu } 682318224bbSNeel Natu 683318224bbSNeel Natu if (i < vm->num_mem_segs) { 684318224bbSNeel Natu /* 685318224bbSNeel Natu * Undo the wiring before returning an error. 686318224bbSNeel Natu */ 687318224bbSNeel Natu vm_gpa_unwire(vm); 688318224bbSNeel Natu return (EAGAIN); 689318224bbSNeel Natu } 690318224bbSNeel Natu 691318224bbSNeel Natu return (0); 692318224bbSNeel Natu } 693318224bbSNeel Natu 694318224bbSNeel Natu static void 695318224bbSNeel Natu vm_iommu_modify(struct vm *vm, boolean_t map) 696318224bbSNeel Natu { 697318224bbSNeel Natu int i, sz; 698318224bbSNeel Natu vm_paddr_t gpa, hpa; 699318224bbSNeel Natu struct mem_seg *seg; 700318224bbSNeel Natu void *vp, *cookie, *host_domain; 701318224bbSNeel Natu 702318224bbSNeel Natu sz = PAGE_SIZE; 703318224bbSNeel Natu host_domain = iommu_host_domain(); 704318224bbSNeel Natu 705318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 706318224bbSNeel Natu seg = &vm->mem_segs[i]; 707318224bbSNeel Natu KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", 708318224bbSNeel Natu vm_name(vm), seg->gpa, seg->len)); 709318224bbSNeel Natu 710318224bbSNeel Natu gpa = seg->gpa; 711318224bbSNeel Natu while (gpa < seg->gpa + seg->len) { 712318224bbSNeel Natu vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, 713318224bbSNeel Natu &cookie); 714318224bbSNeel Natu KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 715318224bbSNeel Natu vm_name(vm), gpa)); 716318224bbSNeel Natu 717318224bbSNeel Natu vm_gpa_release(cookie); 718318224bbSNeel Natu 719318224bbSNeel Natu hpa = DMAP_TO_PHYS((uintptr_t)vp); 720318224bbSNeel Natu if (map) { 721318224bbSNeel Natu iommu_create_mapping(vm->iommu, gpa, hpa, sz); 722318224bbSNeel Natu iommu_remove_mapping(host_domain, hpa, sz); 723318224bbSNeel Natu } else { 724318224bbSNeel Natu iommu_remove_mapping(vm->iommu, gpa, sz); 725318224bbSNeel Natu iommu_create_mapping(host_domain, hpa, hpa, sz); 726318224bbSNeel Natu } 727318224bbSNeel Natu 728318224bbSNeel Natu gpa += PAGE_SIZE; 729318224bbSNeel Natu } 730318224bbSNeel Natu } 731318224bbSNeel Natu 732318224bbSNeel Natu /* 733318224bbSNeel Natu * Invalidate the cached translations associated with the domain 734318224bbSNeel Natu * from which pages were removed. 735318224bbSNeel Natu */ 736318224bbSNeel Natu if (map) 737318224bbSNeel Natu iommu_invalidate_tlb(host_domain); 738318224bbSNeel Natu else 739318224bbSNeel Natu iommu_invalidate_tlb(vm->iommu); 740318224bbSNeel Natu } 741318224bbSNeel Natu 742318224bbSNeel Natu #define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 743318224bbSNeel Natu #define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 744318224bbSNeel Natu 745318224bbSNeel Natu int 746318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 747318224bbSNeel Natu { 748318224bbSNeel Natu int error; 749318224bbSNeel Natu 750318224bbSNeel Natu error = ppt_unassign_device(vm, bus, slot, func); 751318224bbSNeel Natu if (error) 752318224bbSNeel Natu return (error); 753318224bbSNeel Natu 75451f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 755318224bbSNeel Natu vm_iommu_unmap(vm); 756318224bbSNeel Natu vm_gpa_unwire(vm); 757318224bbSNeel Natu } 758318224bbSNeel Natu return (0); 759318224bbSNeel Natu } 760318224bbSNeel Natu 761318224bbSNeel Natu int 762318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 763318224bbSNeel Natu { 764318224bbSNeel Natu int error; 765318224bbSNeel Natu vm_paddr_t maxaddr; 766318224bbSNeel Natu 767318224bbSNeel Natu /* 768318224bbSNeel Natu * Virtual machines with pci passthru devices get special treatment: 769318224bbSNeel Natu * - the guest physical memory is wired 770318224bbSNeel Natu * - the iommu is programmed to do the 'gpa' to 'hpa' translation 771318224bbSNeel Natu * 772318224bbSNeel Natu * We need to do this before the first pci passthru device is attached. 773318224bbSNeel Natu */ 77451f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 775318224bbSNeel Natu KASSERT(vm->iommu == NULL, 776318224bbSNeel Natu ("vm_assign_pptdev: iommu must be NULL")); 777477867a0SNeel Natu maxaddr = vm_maxmem(vm); 778318224bbSNeel Natu vm->iommu = iommu_create_domain(maxaddr); 779318224bbSNeel Natu 780318224bbSNeel Natu error = vm_gpa_wire(vm); 781318224bbSNeel Natu if (error) 782318224bbSNeel Natu return (error); 783318224bbSNeel Natu 784318224bbSNeel Natu vm_iommu_map(vm); 785318224bbSNeel Natu } 786318224bbSNeel Natu 787318224bbSNeel Natu error = ppt_assign_device(vm, bus, slot, func); 788318224bbSNeel Natu return (error); 789318224bbSNeel Natu } 790318224bbSNeel Natu 791318224bbSNeel Natu void * 792318224bbSNeel Natu vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 793318224bbSNeel Natu void **cookie) 794318224bbSNeel Natu { 795318224bbSNeel Natu int count, pageoff; 796318224bbSNeel Natu vm_page_t m; 797318224bbSNeel Natu 798318224bbSNeel Natu pageoff = gpa & PAGE_MASK; 799318224bbSNeel Natu if (len > PAGE_SIZE - pageoff) 800318224bbSNeel Natu panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 801318224bbSNeel Natu 802318224bbSNeel Natu count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 803318224bbSNeel Natu trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 804318224bbSNeel Natu 805318224bbSNeel Natu if (count == 1) { 806318224bbSNeel Natu *cookie = m; 807318224bbSNeel Natu return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 808318224bbSNeel Natu } else { 809318224bbSNeel Natu *cookie = NULL; 810318224bbSNeel Natu return (NULL); 811318224bbSNeel Natu } 812318224bbSNeel Natu } 813318224bbSNeel Natu 814318224bbSNeel Natu void 815318224bbSNeel Natu vm_gpa_release(void *cookie) 816318224bbSNeel Natu { 817318224bbSNeel Natu vm_page_t m = cookie; 818318224bbSNeel Natu 819318224bbSNeel Natu vm_page_lock(m); 820318224bbSNeel Natu vm_page_unhold(m); 821318224bbSNeel Natu vm_page_unlock(m); 822366f6083SPeter Grehan } 823366f6083SPeter Grehan 824366f6083SPeter Grehan int 825366f6083SPeter Grehan vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 826366f6083SPeter Grehan struct vm_memory_segment *seg) 827366f6083SPeter Grehan { 828366f6083SPeter Grehan int i; 829366f6083SPeter Grehan 830366f6083SPeter Grehan for (i = 0; i < vm->num_mem_segs; i++) { 831366f6083SPeter Grehan if (gpabase == vm->mem_segs[i].gpa) { 832318224bbSNeel Natu seg->gpa = vm->mem_segs[i].gpa; 833318224bbSNeel Natu seg->len = vm->mem_segs[i].len; 834318224bbSNeel Natu seg->wired = vm->mem_segs[i].wired; 835366f6083SPeter Grehan return (0); 836366f6083SPeter Grehan } 837366f6083SPeter Grehan } 838366f6083SPeter Grehan return (-1); 839366f6083SPeter Grehan } 840366f6083SPeter Grehan 841366f6083SPeter Grehan int 842318224bbSNeel Natu vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 843318224bbSNeel Natu vm_offset_t *offset, struct vm_object **object) 844318224bbSNeel Natu { 845318224bbSNeel Natu int i; 846318224bbSNeel Natu size_t seg_len; 847318224bbSNeel Natu vm_paddr_t seg_gpa; 848318224bbSNeel Natu vm_object_t seg_obj; 849318224bbSNeel Natu 850318224bbSNeel Natu for (i = 0; i < vm->num_mem_segs; i++) { 851318224bbSNeel Natu if ((seg_obj = vm->mem_segs[i].object) == NULL) 852318224bbSNeel Natu continue; 853318224bbSNeel Natu 854318224bbSNeel Natu seg_gpa = vm->mem_segs[i].gpa; 855318224bbSNeel Natu seg_len = vm->mem_segs[i].len; 856318224bbSNeel Natu 857318224bbSNeel Natu if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { 858318224bbSNeel Natu *offset = gpa - seg_gpa; 859318224bbSNeel Natu *object = seg_obj; 860318224bbSNeel Natu vm_object_reference(seg_obj); 861318224bbSNeel Natu return (0); 862318224bbSNeel Natu } 863318224bbSNeel Natu } 864318224bbSNeel Natu 865318224bbSNeel Natu return (EINVAL); 866318224bbSNeel Natu } 867318224bbSNeel Natu 868318224bbSNeel Natu int 869366f6083SPeter Grehan vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 870366f6083SPeter Grehan { 871366f6083SPeter Grehan 872366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 873366f6083SPeter Grehan return (EINVAL); 874366f6083SPeter Grehan 875366f6083SPeter Grehan if (reg >= VM_REG_LAST) 876366f6083SPeter Grehan return (EINVAL); 877366f6083SPeter Grehan 878366f6083SPeter Grehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 879366f6083SPeter Grehan } 880366f6083SPeter Grehan 881366f6083SPeter Grehan int 882d087a399SNeel Natu vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) 883366f6083SPeter Grehan { 884d087a399SNeel Natu struct vcpu *vcpu; 885d087a399SNeel Natu int error; 886366f6083SPeter Grehan 887d087a399SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 888366f6083SPeter Grehan return (EINVAL); 889366f6083SPeter Grehan 890366f6083SPeter Grehan if (reg >= VM_REG_LAST) 891366f6083SPeter Grehan return (EINVAL); 892366f6083SPeter Grehan 893d087a399SNeel Natu error = VMSETREG(vm->cookie, vcpuid, reg, val); 894d087a399SNeel Natu if (error || reg != VM_REG_GUEST_RIP) 895d087a399SNeel Natu return (error); 896d087a399SNeel Natu 897d087a399SNeel Natu /* Set 'nextrip' to match the value of %rip */ 898d087a399SNeel Natu VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); 899d087a399SNeel Natu vcpu = &vm->vcpu[vcpuid]; 900d087a399SNeel Natu vcpu->nextrip = val; 901d087a399SNeel Natu return (0); 902366f6083SPeter Grehan } 903366f6083SPeter Grehan 904366f6083SPeter Grehan static boolean_t 905366f6083SPeter Grehan is_descriptor_table(int reg) 906366f6083SPeter Grehan { 907366f6083SPeter Grehan 908366f6083SPeter Grehan switch (reg) { 909366f6083SPeter Grehan case VM_REG_GUEST_IDTR: 910366f6083SPeter Grehan case VM_REG_GUEST_GDTR: 911366f6083SPeter Grehan return (TRUE); 912366f6083SPeter Grehan default: 913366f6083SPeter Grehan return (FALSE); 914366f6083SPeter Grehan } 915366f6083SPeter Grehan } 916366f6083SPeter Grehan 917366f6083SPeter Grehan static boolean_t 918366f6083SPeter Grehan is_segment_register(int reg) 919366f6083SPeter Grehan { 920366f6083SPeter Grehan 921366f6083SPeter Grehan switch (reg) { 922366f6083SPeter Grehan case VM_REG_GUEST_ES: 923366f6083SPeter Grehan case VM_REG_GUEST_CS: 924366f6083SPeter Grehan case VM_REG_GUEST_SS: 925366f6083SPeter Grehan case VM_REG_GUEST_DS: 926366f6083SPeter Grehan case VM_REG_GUEST_FS: 927366f6083SPeter Grehan case VM_REG_GUEST_GS: 928366f6083SPeter Grehan case VM_REG_GUEST_TR: 929366f6083SPeter Grehan case VM_REG_GUEST_LDTR: 930366f6083SPeter Grehan return (TRUE); 931366f6083SPeter Grehan default: 932366f6083SPeter Grehan return (FALSE); 933366f6083SPeter Grehan } 934366f6083SPeter Grehan } 935366f6083SPeter Grehan 936366f6083SPeter Grehan int 937366f6083SPeter Grehan vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 938366f6083SPeter Grehan struct seg_desc *desc) 939366f6083SPeter Grehan { 940366f6083SPeter Grehan 941366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 942366f6083SPeter Grehan return (EINVAL); 943366f6083SPeter Grehan 944366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 945366f6083SPeter Grehan return (EINVAL); 946366f6083SPeter Grehan 947366f6083SPeter Grehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 948366f6083SPeter Grehan } 949366f6083SPeter Grehan 950366f6083SPeter Grehan int 951366f6083SPeter Grehan vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 952366f6083SPeter Grehan struct seg_desc *desc) 953366f6083SPeter Grehan { 954366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 955366f6083SPeter Grehan return (EINVAL); 956366f6083SPeter Grehan 957366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 958366f6083SPeter Grehan return (EINVAL); 959366f6083SPeter Grehan 960366f6083SPeter Grehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 961366f6083SPeter Grehan } 962366f6083SPeter Grehan 963366f6083SPeter Grehan static void 964366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu) 965366f6083SPeter Grehan { 966366f6083SPeter Grehan 96738f1b189SPeter Grehan /* flush host state to the pcb */ 96838f1b189SPeter Grehan fpuexit(curthread); 969bd8572e0SNeel Natu 970bd8572e0SNeel Natu /* restore guest FPU state */ 971366f6083SPeter Grehan fpu_stop_emulating(); 97238f1b189SPeter Grehan fpurestore(vcpu->guestfpu); 973bd8572e0SNeel Natu 974abb023fbSJohn Baldwin /* restore guest XCR0 if XSAVE is enabled in the host */ 975abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) 976abb023fbSJohn Baldwin load_xcr(0, vcpu->guest_xcr0); 977abb023fbSJohn Baldwin 978bd8572e0SNeel Natu /* 979bd8572e0SNeel Natu * The FPU is now "dirty" with the guest's state so turn on emulation 980bd8572e0SNeel Natu * to trap any access to the FPU by the host. 981bd8572e0SNeel Natu */ 982bd8572e0SNeel Natu fpu_start_emulating(); 983366f6083SPeter Grehan } 984366f6083SPeter Grehan 985366f6083SPeter Grehan static void 986366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu) 987366f6083SPeter Grehan { 988366f6083SPeter Grehan 989bd8572e0SNeel Natu if ((rcr0() & CR0_TS) == 0) 990bd8572e0SNeel Natu panic("fpu emulation not enabled in host!"); 991bd8572e0SNeel Natu 992abb023fbSJohn Baldwin /* save guest XCR0 and restore host XCR0 */ 993abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) { 994abb023fbSJohn Baldwin vcpu->guest_xcr0 = rxcr(0); 995abb023fbSJohn Baldwin load_xcr(0, vmm_get_host_xcr0()); 996abb023fbSJohn Baldwin } 997abb023fbSJohn Baldwin 998bd8572e0SNeel Natu /* save guest FPU state */ 999bd8572e0SNeel Natu fpu_stop_emulating(); 100038f1b189SPeter Grehan fpusave(vcpu->guestfpu); 1001366f6083SPeter Grehan fpu_start_emulating(); 1002366f6083SPeter Grehan } 1003366f6083SPeter Grehan 100461592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 1005f76fc5d4SNeel Natu 1006318224bbSNeel Natu static int 1007*248e6799SNeel Natu vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, 1008f80330a8SNeel Natu bool from_idle) 1009366f6083SPeter Grehan { 1010*248e6799SNeel Natu struct vcpu *vcpu; 1011318224bbSNeel Natu int error; 1012366f6083SPeter Grehan 1013*248e6799SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1014318224bbSNeel Natu vcpu_assert_locked(vcpu); 1015366f6083SPeter Grehan 1016f76fc5d4SNeel Natu /* 1017f80330a8SNeel Natu * State transitions from the vmmdev_ioctl() must always begin from 1018f80330a8SNeel Natu * the VCPU_IDLE state. This guarantees that there is only a single 1019f80330a8SNeel Natu * ioctl() operating on a vcpu at any point. 1020f80330a8SNeel Natu */ 1021f80330a8SNeel Natu if (from_idle) { 1022*248e6799SNeel Natu while (vcpu->state != VCPU_IDLE) { 1023*248e6799SNeel Natu vcpu->reqidle = 1; 1024*248e6799SNeel Natu vcpu_notify_event_locked(vcpu, false); 1025*248e6799SNeel Natu VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to " 1026*248e6799SNeel Natu "idle requested", vcpu_state2str(vcpu->state)); 1027f80330a8SNeel Natu msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1028*248e6799SNeel Natu } 1029f80330a8SNeel Natu } else { 1030f80330a8SNeel Natu KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1031f80330a8SNeel Natu "vcpu idle state")); 1032f80330a8SNeel Natu } 1033f80330a8SNeel Natu 1034ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 1035ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1036ef39d7e9SNeel Natu "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1037ef39d7e9SNeel Natu } else { 1038ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1039ef39d7e9SNeel Natu "vcpu that is not running", vcpu->hostcpu)); 1040ef39d7e9SNeel Natu } 1041ef39d7e9SNeel Natu 1042f80330a8SNeel Natu /* 1043318224bbSNeel Natu * The following state transitions are allowed: 1044318224bbSNeel Natu * IDLE -> FROZEN -> IDLE 1045318224bbSNeel Natu * FROZEN -> RUNNING -> FROZEN 1046318224bbSNeel Natu * FROZEN -> SLEEPING -> FROZEN 1047f76fc5d4SNeel Natu */ 1048318224bbSNeel Natu switch (vcpu->state) { 1049318224bbSNeel Natu case VCPU_IDLE: 1050318224bbSNeel Natu case VCPU_RUNNING: 1051318224bbSNeel Natu case VCPU_SLEEPING: 1052318224bbSNeel Natu error = (newstate != VCPU_FROZEN); 1053318224bbSNeel Natu break; 1054318224bbSNeel Natu case VCPU_FROZEN: 1055318224bbSNeel Natu error = (newstate == VCPU_FROZEN); 1056318224bbSNeel Natu break; 1057318224bbSNeel Natu default: 1058318224bbSNeel Natu error = 1; 1059318224bbSNeel Natu break; 1060318224bbSNeel Natu } 1061318224bbSNeel Natu 1062f80330a8SNeel Natu if (error) 1063f80330a8SNeel Natu return (EBUSY); 1064318224bbSNeel Natu 1065*248e6799SNeel Natu VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s", 1066*248e6799SNeel Natu vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); 1067*248e6799SNeel Natu 1068f80330a8SNeel Natu vcpu->state = newstate; 1069ef39d7e9SNeel Natu if (newstate == VCPU_RUNNING) 1070ef39d7e9SNeel Natu vcpu->hostcpu = curcpu; 1071ef39d7e9SNeel Natu else 1072ef39d7e9SNeel Natu vcpu->hostcpu = NOCPU; 1073ef39d7e9SNeel Natu 1074f80330a8SNeel Natu if (newstate == VCPU_IDLE) 1075f80330a8SNeel Natu wakeup(&vcpu->state); 1076f80330a8SNeel Natu 1077f80330a8SNeel Natu return (0); 1078318224bbSNeel Natu } 1079318224bbSNeel Natu 1080318224bbSNeel Natu static void 1081318224bbSNeel Natu vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1082318224bbSNeel Natu { 1083318224bbSNeel Natu int error; 1084318224bbSNeel Natu 1085f80330a8SNeel Natu if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) 1086318224bbSNeel Natu panic("Error %d setting state to %d\n", error, newstate); 1087318224bbSNeel Natu } 1088318224bbSNeel Natu 1089318224bbSNeel Natu static void 1090*248e6799SNeel Natu vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1091318224bbSNeel Natu { 1092318224bbSNeel Natu int error; 1093318224bbSNeel Natu 1094*248e6799SNeel Natu if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0) 1095318224bbSNeel Natu panic("Error %d setting state to %d", error, newstate); 1096318224bbSNeel Natu } 1097318224bbSNeel Natu 10985b8a8cd1SNeel Natu static void 10995b8a8cd1SNeel Natu vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) 11005b8a8cd1SNeel Natu { 11015b8a8cd1SNeel Natu 11025b8a8cd1SNeel Natu KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); 11035b8a8cd1SNeel Natu 11045b8a8cd1SNeel Natu /* 11055b8a8cd1SNeel Natu * Update 'rendezvous_func' and execute a write memory barrier to 11065b8a8cd1SNeel Natu * ensure that it is visible across all host cpus. This is not needed 11075b8a8cd1SNeel Natu * for correctness but it does ensure that all the vcpus will notice 11085b8a8cd1SNeel Natu * that the rendezvous is requested immediately. 11095b8a8cd1SNeel Natu */ 11105b8a8cd1SNeel Natu vm->rendezvous_func = func; 11115b8a8cd1SNeel Natu wmb(); 11125b8a8cd1SNeel Natu } 11135b8a8cd1SNeel Natu 11145b8a8cd1SNeel Natu #define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ 11155b8a8cd1SNeel Natu do { \ 11165b8a8cd1SNeel Natu if (vcpuid >= 0) \ 11175b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, fmt); \ 11185b8a8cd1SNeel Natu else \ 11195b8a8cd1SNeel Natu VM_CTR0(vm, fmt); \ 11205b8a8cd1SNeel Natu } while (0) 11215b8a8cd1SNeel Natu 11225b8a8cd1SNeel Natu static void 11235b8a8cd1SNeel Natu vm_handle_rendezvous(struct vm *vm, int vcpuid) 11245b8a8cd1SNeel Natu { 11255b8a8cd1SNeel Natu 11265b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 11275b8a8cd1SNeel Natu ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); 11285b8a8cd1SNeel Natu 11295b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 11305b8a8cd1SNeel Natu while (vm->rendezvous_func != NULL) { 113122d822c6SNeel Natu /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 113222d822c6SNeel Natu CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); 113322d822c6SNeel Natu 11345b8a8cd1SNeel Natu if (vcpuid != -1 && 113522d822c6SNeel Natu CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 113622d822c6SNeel Natu !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 11375b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); 11385b8a8cd1SNeel Natu (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); 11395b8a8cd1SNeel Natu CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 11405b8a8cd1SNeel Natu } 11415b8a8cd1SNeel Natu if (CPU_CMP(&vm->rendezvous_req_cpus, 11425b8a8cd1SNeel Natu &vm->rendezvous_done_cpus) == 0) { 11435b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); 11445b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, NULL); 11455b8a8cd1SNeel Natu wakeup(&vm->rendezvous_func); 11465b8a8cd1SNeel Natu break; 11475b8a8cd1SNeel Natu } 11485b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); 11495b8a8cd1SNeel Natu mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 11505b8a8cd1SNeel Natu "vmrndv", 0); 11515b8a8cd1SNeel Natu } 11525b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 11535b8a8cd1SNeel Natu } 11545b8a8cd1SNeel Natu 1155318224bbSNeel Natu /* 1156318224bbSNeel Natu * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1157318224bbSNeel Natu */ 1158318224bbSNeel Natu static int 1159becd9849SNeel Natu vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) 1160318224bbSNeel Natu { 1161318224bbSNeel Natu struct vcpu *vcpu; 1162c6a0cc2eSNeel Natu const char *wmesg; 11632ce12423SNeel Natu int t, vcpu_halted, vm_halted; 1164e50ce2aaSNeel Natu 1165e50ce2aaSNeel Natu KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 1166318224bbSNeel Natu 1167318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1168e50ce2aaSNeel Natu vcpu_halted = 0; 1169e50ce2aaSNeel Natu vm_halted = 0; 1170318224bbSNeel Natu 1171f76fc5d4SNeel Natu vcpu_lock(vcpu); 1172c6a0cc2eSNeel Natu while (1) { 1173f76fc5d4SNeel Natu /* 1174f76fc5d4SNeel Natu * Do a final check for pending NMI or interrupts before 1175c6a0cc2eSNeel Natu * really putting this thread to sleep. Also check for 1176c6a0cc2eSNeel Natu * software events that would cause this vcpu to wakeup. 1177f76fc5d4SNeel Natu * 1178c6a0cc2eSNeel Natu * These interrupts/events could have happened after the 1179c6a0cc2eSNeel Natu * vcpu returned from VMRUN() and before it acquired the 1180c6a0cc2eSNeel Natu * vcpu lock above. 1181f76fc5d4SNeel Natu */ 1182*248e6799SNeel Natu if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) 1183c6a0cc2eSNeel Natu break; 1184c6a0cc2eSNeel Natu if (vm_nmi_pending(vm, vcpuid)) 1185c6a0cc2eSNeel Natu break; 1186c6a0cc2eSNeel Natu if (!intr_disabled) { 1187c6a0cc2eSNeel Natu if (vm_extint_pending(vm, vcpuid) || 1188c6a0cc2eSNeel Natu vlapic_pending_intr(vcpu->vlapic, NULL)) { 1189c6a0cc2eSNeel Natu break; 1190c6a0cc2eSNeel Natu } 1191c6a0cc2eSNeel Natu } 1192c6a0cc2eSNeel Natu 1193f008d157SNeel Natu /* Don't go to sleep if the vcpu thread needs to yield */ 1194f008d157SNeel Natu if (vcpu_should_yield(vm, vcpuid)) 1195f008d157SNeel Natu break; 1196f008d157SNeel Natu 1197e50ce2aaSNeel Natu /* 1198e50ce2aaSNeel Natu * Some Linux guests implement "halt" by having all vcpus 1199e50ce2aaSNeel Natu * execute HLT with interrupts disabled. 'halted_cpus' keeps 1200e50ce2aaSNeel Natu * track of the vcpus that have entered this state. When all 1201e50ce2aaSNeel Natu * vcpus enter the halted state the virtual machine is halted. 1202e50ce2aaSNeel Natu */ 1203e50ce2aaSNeel Natu if (intr_disabled) { 1204c6a0cc2eSNeel Natu wmesg = "vmhalt"; 1205e50ce2aaSNeel Natu VCPU_CTR0(vm, vcpuid, "Halted"); 1206055fc2cbSNeel Natu if (!vcpu_halted && halt_detection_enabled) { 1207e50ce2aaSNeel Natu vcpu_halted = 1; 1208e50ce2aaSNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1209e50ce2aaSNeel Natu } 1210e50ce2aaSNeel Natu if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1211e50ce2aaSNeel Natu vm_halted = 1; 1212e50ce2aaSNeel Natu break; 1213e50ce2aaSNeel Natu } 1214e50ce2aaSNeel Natu } else { 1215e50ce2aaSNeel Natu wmesg = "vmidle"; 1216e50ce2aaSNeel Natu } 1217c6a0cc2eSNeel Natu 1218f76fc5d4SNeel Natu t = ticks; 1219*248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); 1220f008d157SNeel Natu /* 1221f008d157SNeel Natu * XXX msleep_spin() cannot be interrupted by signals so 1222f008d157SNeel Natu * wake up periodically to check pending signals. 1223f008d157SNeel Natu */ 1224f008d157SNeel Natu msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); 1225*248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); 1226f76fc5d4SNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 1227f76fc5d4SNeel Natu } 1228e50ce2aaSNeel Natu 1229e50ce2aaSNeel Natu if (vcpu_halted) 1230e50ce2aaSNeel Natu CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1231e50ce2aaSNeel Natu 1232f76fc5d4SNeel Natu vcpu_unlock(vcpu); 1233f76fc5d4SNeel Natu 1234e50ce2aaSNeel Natu if (vm_halted) 1235e50ce2aaSNeel Natu vm_suspend(vm, VM_SUSPEND_HALT); 1236e50ce2aaSNeel Natu 1237318224bbSNeel Natu return (0); 1238318224bbSNeel Natu } 1239318224bbSNeel Natu 1240318224bbSNeel Natu static int 1241becd9849SNeel Natu vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) 1242318224bbSNeel Natu { 1243318224bbSNeel Natu int rv, ftype; 1244318224bbSNeel Natu struct vm_map *map; 1245318224bbSNeel Natu struct vcpu *vcpu; 1246318224bbSNeel Natu struct vm_exit *vme; 1247318224bbSNeel Natu 1248318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1249318224bbSNeel Natu vme = &vcpu->exitinfo; 1250318224bbSNeel Natu 1251d087a399SNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 1252d087a399SNeel Natu __func__, vme->inst_length)); 1253d087a399SNeel Natu 1254318224bbSNeel Natu ftype = vme->u.paging.fault_type; 1255318224bbSNeel Natu KASSERT(ftype == VM_PROT_READ || 1256318224bbSNeel Natu ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1257318224bbSNeel Natu ("vm_handle_paging: invalid fault_type %d", ftype)); 1258318224bbSNeel Natu 1259318224bbSNeel Natu if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1260318224bbSNeel Natu rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1261318224bbSNeel Natu vme->u.paging.gpa, ftype); 12629d8d8e3eSNeel Natu if (rv == 0) { 12639d8d8e3eSNeel Natu VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", 12649d8d8e3eSNeel Natu ftype == VM_PROT_READ ? "accessed" : "dirty", 12659d8d8e3eSNeel Natu vme->u.paging.gpa); 1266318224bbSNeel Natu goto done; 1267318224bbSNeel Natu } 12689d8d8e3eSNeel Natu } 1269318224bbSNeel Natu 1270318224bbSNeel Natu map = &vm->vmspace->vm_map; 1271318224bbSNeel Natu rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 1272318224bbSNeel Natu 1273513c8d33SNeel Natu VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 1274513c8d33SNeel Natu "ftype = %d", rv, vme->u.paging.gpa, ftype); 1275318224bbSNeel Natu 1276318224bbSNeel Natu if (rv != KERN_SUCCESS) 1277318224bbSNeel Natu return (EFAULT); 1278318224bbSNeel Natu done: 1279318224bbSNeel Natu return (0); 1280318224bbSNeel Natu } 1281318224bbSNeel Natu 1282318224bbSNeel Natu static int 1283becd9849SNeel Natu vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) 1284318224bbSNeel Natu { 1285318224bbSNeel Natu struct vie *vie; 1286318224bbSNeel Natu struct vcpu *vcpu; 1287318224bbSNeel Natu struct vm_exit *vme; 1288e4f605eeSTycho Nightingale uint64_t gla, gpa, cs_base; 1289e813a873SNeel Natu struct vm_guest_paging *paging; 1290565bbb86SNeel Natu mem_region_read_t mread; 1291565bbb86SNeel Natu mem_region_write_t mwrite; 1292f7a9f178SNeel Natu enum vm_cpu_mode cpu_mode; 12931c73ea3eSNeel Natu int cs_d, error, fault; 1294318224bbSNeel Natu 1295318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1296318224bbSNeel Natu vme = &vcpu->exitinfo; 1297318224bbSNeel Natu 12981c73ea3eSNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 12991c73ea3eSNeel Natu __func__, vme->inst_length)); 13001c73ea3eSNeel Natu 1301318224bbSNeel Natu gla = vme->u.inst_emul.gla; 1302318224bbSNeel Natu gpa = vme->u.inst_emul.gpa; 1303e4f605eeSTycho Nightingale cs_base = vme->u.inst_emul.cs_base; 1304f7a9f178SNeel Natu cs_d = vme->u.inst_emul.cs_d; 1305318224bbSNeel Natu vie = &vme->u.inst_emul.vie; 1306e813a873SNeel Natu paging = &vme->u.inst_emul.paging; 1307f7a9f178SNeel Natu cpu_mode = paging->cpu_mode; 1308318224bbSNeel Natu 13099d8d8e3eSNeel Natu VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa); 13109d8d8e3eSNeel Natu 1311318224bbSNeel Natu /* Fetch, decode and emulate the faulting instruction */ 1312c2a875f9SNeel Natu if (vie->num_valid == 0) { 1313e4f605eeSTycho Nightingale error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip + 13141c73ea3eSNeel Natu cs_base, VIE_INST_SIZE, vie, &fault); 1315c2a875f9SNeel Natu } else { 1316c2a875f9SNeel Natu /* 1317c2a875f9SNeel Natu * The instruction bytes have already been copied into 'vie' 1318c2a875f9SNeel Natu */ 13199c4d5478SNeel Natu error = fault = 0; 1320c2a875f9SNeel Natu } 13219c4d5478SNeel Natu if (error || fault) 13229c4d5478SNeel Natu return (error); 1323318224bbSNeel Natu 1324c07a0648SNeel Natu if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) { 1325c07a0648SNeel Natu VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", 1326c07a0648SNeel Natu vme->rip + cs_base); 1327c07a0648SNeel Natu *retu = true; /* dump instruction bytes in userspace */ 1328c07a0648SNeel Natu return (0); 1329c07a0648SNeel Natu } 1330318224bbSNeel Natu 1331a0b78f09SPeter Grehan /* 13321c73ea3eSNeel Natu * Update 'nextrip' based on the length of the emulated instruction. 1333a0b78f09SPeter Grehan */ 1334a0b78f09SPeter Grehan vme->inst_length = vie->num_processed; 1335d087a399SNeel Natu vcpu->nextrip += vie->num_processed; 13361c73ea3eSNeel Natu VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction " 13371c73ea3eSNeel Natu "decoding", vcpu->nextrip); 1338a0b78f09SPeter Grehan 133908e3ff32SNeel Natu /* return to userland unless this is an in-kernel emulated device */ 1340565bbb86SNeel Natu if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1341565bbb86SNeel Natu mread = lapic_mmio_read; 1342565bbb86SNeel Natu mwrite = lapic_mmio_write; 1343565bbb86SNeel Natu } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1344565bbb86SNeel Natu mread = vioapic_mmio_read; 1345565bbb86SNeel Natu mwrite = vioapic_mmio_write; 134608e3ff32SNeel Natu } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 134708e3ff32SNeel Natu mread = vhpet_mmio_read; 134808e3ff32SNeel Natu mwrite = vhpet_mmio_write; 1349565bbb86SNeel Natu } else { 1350becd9849SNeel Natu *retu = true; 1351318224bbSNeel Natu return (0); 1352318224bbSNeel Natu } 1353318224bbSNeel Natu 1354d665d229SNeel Natu error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, 1355d665d229SNeel Natu mread, mwrite, retu); 1356318224bbSNeel Natu 1357318224bbSNeel Natu return (error); 1358318224bbSNeel Natu } 1359318224bbSNeel Natu 1360b15a09c0SNeel Natu static int 1361b15a09c0SNeel Natu vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) 1362b15a09c0SNeel Natu { 1363b15a09c0SNeel Natu int i, done; 1364b15a09c0SNeel Natu struct vcpu *vcpu; 1365b15a09c0SNeel Natu 1366b15a09c0SNeel Natu done = 0; 1367b15a09c0SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1368b15a09c0SNeel Natu 1369b15a09c0SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); 1370b15a09c0SNeel Natu 1371b15a09c0SNeel Natu /* 1372b15a09c0SNeel Natu * Wait until all 'active_cpus' have suspended themselves. 1373b15a09c0SNeel Natu * 1374b15a09c0SNeel Natu * Since a VM may be suspended at any time including when one or 1375b15a09c0SNeel Natu * more vcpus are doing a rendezvous we need to call the rendezvous 1376b15a09c0SNeel Natu * handler while we are waiting to prevent a deadlock. 1377b15a09c0SNeel Natu */ 1378b15a09c0SNeel Natu vcpu_lock(vcpu); 1379b15a09c0SNeel Natu while (1) { 1380b15a09c0SNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 1381b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); 1382b15a09c0SNeel Natu break; 1383b15a09c0SNeel Natu } 1384b15a09c0SNeel Natu 1385b15a09c0SNeel Natu if (vm->rendezvous_func == NULL) { 1386b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); 1387*248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); 1388b15a09c0SNeel Natu msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1389*248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); 1390b15a09c0SNeel Natu } else { 1391b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); 1392b15a09c0SNeel Natu vcpu_unlock(vcpu); 1393b15a09c0SNeel Natu vm_handle_rendezvous(vm, vcpuid); 1394b15a09c0SNeel Natu vcpu_lock(vcpu); 1395b15a09c0SNeel Natu } 1396b15a09c0SNeel Natu } 1397b15a09c0SNeel Natu vcpu_unlock(vcpu); 1398b15a09c0SNeel Natu 1399b15a09c0SNeel Natu /* 1400b15a09c0SNeel Natu * Wakeup the other sleeping vcpus and return to userspace. 1401b15a09c0SNeel Natu */ 1402b15a09c0SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1403b15a09c0SNeel Natu if (CPU_ISSET(i, &vm->suspended_cpus)) { 1404b15a09c0SNeel Natu vcpu_notify_event(vm, i, false); 1405b15a09c0SNeel Natu } 1406b15a09c0SNeel Natu } 1407b15a09c0SNeel Natu 1408b15a09c0SNeel Natu *retu = true; 1409b15a09c0SNeel Natu return (0); 1410b15a09c0SNeel Natu } 1411b15a09c0SNeel Natu 1412*248e6799SNeel Natu static int 1413*248e6799SNeel Natu vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu) 1414*248e6799SNeel Natu { 1415*248e6799SNeel Natu struct vcpu *vcpu = &vm->vcpu[vcpuid]; 1416*248e6799SNeel Natu 1417*248e6799SNeel Natu vcpu_lock(vcpu); 1418*248e6799SNeel Natu KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); 1419*248e6799SNeel Natu vcpu->reqidle = 0; 1420*248e6799SNeel Natu vcpu_unlock(vcpu); 1421*248e6799SNeel Natu *retu = true; 1422*248e6799SNeel Natu return (0); 1423*248e6799SNeel Natu } 1424*248e6799SNeel Natu 1425b15a09c0SNeel Natu int 1426f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how) 1427b15a09c0SNeel Natu { 1428f0fdcfe2SNeel Natu int i; 1429b15a09c0SNeel Natu 1430f0fdcfe2SNeel Natu if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1431f0fdcfe2SNeel Natu return (EINVAL); 1432f0fdcfe2SNeel Natu 1433f0fdcfe2SNeel Natu if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1434f0fdcfe2SNeel Natu VM_CTR2(vm, "virtual machine already suspended %d/%d", 1435f0fdcfe2SNeel Natu vm->suspend, how); 1436b15a09c0SNeel Natu return (EALREADY); 1437b15a09c0SNeel Natu } 1438f0fdcfe2SNeel Natu 1439f0fdcfe2SNeel Natu VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1440f0fdcfe2SNeel Natu 1441f0fdcfe2SNeel Natu /* 1442f0fdcfe2SNeel Natu * Notify all active vcpus that they are now suspended. 1443f0fdcfe2SNeel Natu */ 1444f0fdcfe2SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1445f0fdcfe2SNeel Natu if (CPU_ISSET(i, &vm->active_cpus)) 1446f0fdcfe2SNeel Natu vcpu_notify_event(vm, i, false); 1447f0fdcfe2SNeel Natu } 1448f0fdcfe2SNeel Natu 1449f0fdcfe2SNeel Natu return (0); 1450f0fdcfe2SNeel Natu } 1451f0fdcfe2SNeel Natu 1452f0fdcfe2SNeel Natu void 1453f0fdcfe2SNeel Natu vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) 1454f0fdcfe2SNeel Natu { 1455f0fdcfe2SNeel Natu struct vm_exit *vmexit; 1456f0fdcfe2SNeel Natu 1457f0fdcfe2SNeel Natu KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1458f0fdcfe2SNeel Natu ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1459f0fdcfe2SNeel Natu 1460f0fdcfe2SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1461f0fdcfe2SNeel Natu vmexit->rip = rip; 1462f0fdcfe2SNeel Natu vmexit->inst_length = 0; 1463f0fdcfe2SNeel Natu vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1464f0fdcfe2SNeel Natu vmexit->u.suspended.how = vm->suspend; 1465b15a09c0SNeel Natu } 1466b15a09c0SNeel Natu 146740487465SNeel Natu void 146840487465SNeel Natu vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) 146940487465SNeel Natu { 147040487465SNeel Natu struct vm_exit *vmexit; 147140487465SNeel Natu 147240487465SNeel Natu KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress")); 147340487465SNeel Natu 147440487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 147540487465SNeel Natu vmexit->rip = rip; 147640487465SNeel Natu vmexit->inst_length = 0; 147740487465SNeel Natu vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 147840487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1); 147940487465SNeel Natu } 148040487465SNeel Natu 148140487465SNeel Natu void 1482*248e6799SNeel Natu vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip) 1483*248e6799SNeel Natu { 1484*248e6799SNeel Natu struct vm_exit *vmexit; 1485*248e6799SNeel Natu 1486*248e6799SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1487*248e6799SNeel Natu vmexit->rip = rip; 1488*248e6799SNeel Natu vmexit->inst_length = 0; 1489*248e6799SNeel Natu vmexit->exitcode = VM_EXITCODE_REQIDLE; 1490*248e6799SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1); 1491*248e6799SNeel Natu } 1492*248e6799SNeel Natu 1493*248e6799SNeel Natu void 149440487465SNeel Natu vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) 149540487465SNeel Natu { 149640487465SNeel Natu struct vm_exit *vmexit; 149740487465SNeel Natu 149840487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 149940487465SNeel Natu vmexit->rip = rip; 150040487465SNeel Natu vmexit->inst_length = 0; 150140487465SNeel Natu vmexit->exitcode = VM_EXITCODE_BOGUS; 150240487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1); 150340487465SNeel Natu } 150440487465SNeel Natu 1505318224bbSNeel Natu int 1506318224bbSNeel Natu vm_run(struct vm *vm, struct vm_run *vmrun) 1507318224bbSNeel Natu { 1508*248e6799SNeel Natu struct vm_eventinfo evinfo; 1509318224bbSNeel Natu int error, vcpuid; 1510318224bbSNeel Natu struct vcpu *vcpu; 1511318224bbSNeel Natu struct pcb *pcb; 1512d087a399SNeel Natu uint64_t tscval; 1513318224bbSNeel Natu struct vm_exit *vme; 1514becd9849SNeel Natu bool retu, intr_disabled; 1515318224bbSNeel Natu pmap_t pmap; 1516318224bbSNeel Natu 1517318224bbSNeel Natu vcpuid = vmrun->cpuid; 1518318224bbSNeel Natu 1519318224bbSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1520318224bbSNeel Natu return (EINVAL); 1521318224bbSNeel Natu 152295ebc360SNeel Natu if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 152395ebc360SNeel Natu return (EINVAL); 152495ebc360SNeel Natu 152595ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 152695ebc360SNeel Natu return (EINVAL); 152795ebc360SNeel Natu 1528318224bbSNeel Natu pmap = vmspace_pmap(vm->vmspace); 1529318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1530318224bbSNeel Natu vme = &vcpu->exitinfo; 1531*248e6799SNeel Natu evinfo.rptr = &vm->rendezvous_func; 1532*248e6799SNeel Natu evinfo.sptr = &vm->suspend; 1533*248e6799SNeel Natu evinfo.iptr = &vcpu->reqidle; 1534318224bbSNeel Natu restart: 1535318224bbSNeel Natu critical_enter(); 1536318224bbSNeel Natu 1537318224bbSNeel Natu KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1538318224bbSNeel Natu ("vm_run: absurd pm_active")); 1539318224bbSNeel Natu 1540318224bbSNeel Natu tscval = rdtsc(); 1541318224bbSNeel Natu 1542318224bbSNeel Natu pcb = PCPU_GET(curpcb); 1543318224bbSNeel Natu set_pcb_flags(pcb, PCB_FULL_IRET); 1544318224bbSNeel Natu 1545318224bbSNeel Natu restore_guest_fpustate(vcpu); 1546318224bbSNeel Natu 1547318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1548*248e6799SNeel Natu error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo); 1549318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1550318224bbSNeel Natu 1551318224bbSNeel Natu save_guest_fpustate(vcpu); 1552318224bbSNeel Natu 1553318224bbSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1554318224bbSNeel Natu 1555318224bbSNeel Natu critical_exit(); 1556318224bbSNeel Natu 1557318224bbSNeel Natu if (error == 0) { 1558becd9849SNeel Natu retu = false; 1559d087a399SNeel Natu vcpu->nextrip = vme->rip + vme->inst_length; 1560318224bbSNeel Natu switch (vme->exitcode) { 1561*248e6799SNeel Natu case VM_EXITCODE_REQIDLE: 1562*248e6799SNeel Natu error = vm_handle_reqidle(vm, vcpuid, &retu); 1563*248e6799SNeel Natu break; 1564b15a09c0SNeel Natu case VM_EXITCODE_SUSPENDED: 1565b15a09c0SNeel Natu error = vm_handle_suspend(vm, vcpuid, &retu); 1566b15a09c0SNeel Natu break; 156730b94db8SNeel Natu case VM_EXITCODE_IOAPIC_EOI: 156830b94db8SNeel Natu vioapic_process_eoi(vm, vcpuid, 156930b94db8SNeel Natu vme->u.ioapic_eoi.vector); 157030b94db8SNeel Natu break; 15715b8a8cd1SNeel Natu case VM_EXITCODE_RENDEZVOUS: 15725b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 15735b8a8cd1SNeel Natu error = 0; 15745b8a8cd1SNeel Natu break; 1575318224bbSNeel Natu case VM_EXITCODE_HLT: 1576becd9849SNeel Natu intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 15771c052192SNeel Natu error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); 1578318224bbSNeel Natu break; 1579318224bbSNeel Natu case VM_EXITCODE_PAGING: 1580318224bbSNeel Natu error = vm_handle_paging(vm, vcpuid, &retu); 1581318224bbSNeel Natu break; 1582318224bbSNeel Natu case VM_EXITCODE_INST_EMUL: 1583318224bbSNeel Natu error = vm_handle_inst_emul(vm, vcpuid, &retu); 1584318224bbSNeel Natu break; 1585d17b5104SNeel Natu case VM_EXITCODE_INOUT: 1586d17b5104SNeel Natu case VM_EXITCODE_INOUT_STR: 1587d17b5104SNeel Natu error = vm_handle_inout(vm, vcpuid, vme, &retu); 1588d17b5104SNeel Natu break; 158965145c7fSNeel Natu case VM_EXITCODE_MONITOR: 159065145c7fSNeel Natu case VM_EXITCODE_MWAIT: 159165145c7fSNeel Natu vm_inject_ud(vm, vcpuid); 159265145c7fSNeel Natu break; 1593318224bbSNeel Natu default: 1594becd9849SNeel Natu retu = true; /* handled in userland */ 1595318224bbSNeel Natu break; 1596318224bbSNeel Natu } 1597318224bbSNeel Natu } 1598318224bbSNeel Natu 1599d087a399SNeel Natu if (error == 0 && retu == false) 1600f76fc5d4SNeel Natu goto restart; 1601f76fc5d4SNeel Natu 1602*248e6799SNeel Natu VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode); 1603*248e6799SNeel Natu 1604318224bbSNeel Natu /* copy the exit information */ 1605318224bbSNeel Natu bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1606366f6083SPeter Grehan return (error); 1607366f6083SPeter Grehan } 1608366f6083SPeter Grehan 1609366f6083SPeter Grehan int 1610c9c75df4SNeel Natu vm_restart_instruction(void *arg, int vcpuid) 1611c9c75df4SNeel Natu { 1612d087a399SNeel Natu struct vm *vm; 1613c9c75df4SNeel Natu struct vcpu *vcpu; 1614d087a399SNeel Natu enum vcpu_state state; 1615d087a399SNeel Natu uint64_t rip; 1616d087a399SNeel Natu int error; 1617c9c75df4SNeel Natu 1618d087a399SNeel Natu vm = arg; 1619c9c75df4SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1620c9c75df4SNeel Natu return (EINVAL); 1621c9c75df4SNeel Natu 1622c9c75df4SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1623d087a399SNeel Natu state = vcpu_get_state(vm, vcpuid, NULL); 1624d087a399SNeel Natu if (state == VCPU_RUNNING) { 1625d087a399SNeel Natu /* 1626d087a399SNeel Natu * When a vcpu is "running" the next instruction is determined 1627d087a399SNeel Natu * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. 1628d087a399SNeel Natu * Thus setting 'inst_length' to zero will cause the current 1629d087a399SNeel Natu * instruction to be restarted. 1630d087a399SNeel Natu */ 1631c9c75df4SNeel Natu vcpu->exitinfo.inst_length = 0; 1632d087a399SNeel Natu VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " 1633d087a399SNeel Natu "setting inst_length to zero", vcpu->exitinfo.rip); 1634d087a399SNeel Natu } else if (state == VCPU_FROZEN) { 1635d087a399SNeel Natu /* 1636d087a399SNeel Natu * When a vcpu is "frozen" it is outside the critical section 1637d087a399SNeel Natu * around VMRUN() and 'nextrip' points to the next instruction. 1638d087a399SNeel Natu * Thus instruction restart is achieved by setting 'nextrip' 1639d087a399SNeel Natu * to the vcpu's %rip. 1640d087a399SNeel Natu */ 1641d087a399SNeel Natu error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); 1642d087a399SNeel Natu KASSERT(!error, ("%s: error %d getting rip", __func__, error)); 1643d087a399SNeel Natu VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " 1644d087a399SNeel Natu "nextrip from %#lx to %#lx", vcpu->nextrip, rip); 1645d087a399SNeel Natu vcpu->nextrip = rip; 1646d087a399SNeel Natu } else { 1647d087a399SNeel Natu panic("%s: invalid state %d", __func__, state); 1648d087a399SNeel Natu } 1649c9c75df4SNeel Natu return (0); 1650c9c75df4SNeel Natu } 1651c9c75df4SNeel Natu 1652c9c75df4SNeel Natu int 1653091d4532SNeel Natu vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) 1654091d4532SNeel Natu { 1655091d4532SNeel Natu struct vcpu *vcpu; 1656091d4532SNeel Natu int type, vector; 1657091d4532SNeel Natu 1658091d4532SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1659091d4532SNeel Natu return (EINVAL); 1660091d4532SNeel Natu 1661091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1662091d4532SNeel Natu 1663091d4532SNeel Natu if (info & VM_INTINFO_VALID) { 1664091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1665091d4532SNeel Natu vector = info & 0xff; 1666091d4532SNeel Natu if (type == VM_INTINFO_NMI && vector != IDT_NMI) 1667091d4532SNeel Natu return (EINVAL); 1668091d4532SNeel Natu if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) 1669091d4532SNeel Natu return (EINVAL); 1670091d4532SNeel Natu if (info & VM_INTINFO_RSVD) 1671091d4532SNeel Natu return (EINVAL); 1672091d4532SNeel Natu } else { 1673091d4532SNeel Natu info = 0; 1674091d4532SNeel Natu } 1675091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); 1676091d4532SNeel Natu vcpu->exitintinfo = info; 1677091d4532SNeel Natu return (0); 1678091d4532SNeel Natu } 1679091d4532SNeel Natu 1680091d4532SNeel Natu enum exc_class { 1681091d4532SNeel Natu EXC_BENIGN, 1682091d4532SNeel Natu EXC_CONTRIBUTORY, 1683091d4532SNeel Natu EXC_PAGEFAULT 1684091d4532SNeel Natu }; 1685091d4532SNeel Natu 1686091d4532SNeel Natu #define IDT_VE 20 /* Virtualization Exception (Intel specific) */ 1687091d4532SNeel Natu 1688091d4532SNeel Natu static enum exc_class 1689091d4532SNeel Natu exception_class(uint64_t info) 1690091d4532SNeel Natu { 1691091d4532SNeel Natu int type, vector; 1692091d4532SNeel Natu 1693091d4532SNeel Natu KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); 1694091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1695091d4532SNeel Natu vector = info & 0xff; 1696091d4532SNeel Natu 1697091d4532SNeel Natu /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ 1698091d4532SNeel Natu switch (type) { 1699091d4532SNeel Natu case VM_INTINFO_HWINTR: 1700091d4532SNeel Natu case VM_INTINFO_SWINTR: 1701091d4532SNeel Natu case VM_INTINFO_NMI: 1702091d4532SNeel Natu return (EXC_BENIGN); 1703091d4532SNeel Natu default: 1704091d4532SNeel Natu /* 1705091d4532SNeel Natu * Hardware exception. 1706091d4532SNeel Natu * 1707091d4532SNeel Natu * SVM and VT-x use identical type values to represent NMI, 1708091d4532SNeel Natu * hardware interrupt and software interrupt. 1709091d4532SNeel Natu * 1710091d4532SNeel Natu * SVM uses type '3' for all exceptions. VT-x uses type '3' 1711091d4532SNeel Natu * for exceptions except #BP and #OF. #BP and #OF use a type 1712091d4532SNeel Natu * value of '5' or '6'. Therefore we don't check for explicit 1713091d4532SNeel Natu * values of 'type' to classify 'intinfo' into a hardware 1714091d4532SNeel Natu * exception. 1715091d4532SNeel Natu */ 1716091d4532SNeel Natu break; 1717091d4532SNeel Natu } 1718091d4532SNeel Natu 1719091d4532SNeel Natu switch (vector) { 1720091d4532SNeel Natu case IDT_PF: 1721091d4532SNeel Natu case IDT_VE: 1722091d4532SNeel Natu return (EXC_PAGEFAULT); 1723091d4532SNeel Natu case IDT_DE: 1724091d4532SNeel Natu case IDT_TS: 1725091d4532SNeel Natu case IDT_NP: 1726091d4532SNeel Natu case IDT_SS: 1727091d4532SNeel Natu case IDT_GP: 1728091d4532SNeel Natu return (EXC_CONTRIBUTORY); 1729091d4532SNeel Natu default: 1730091d4532SNeel Natu return (EXC_BENIGN); 1731091d4532SNeel Natu } 1732091d4532SNeel Natu } 1733091d4532SNeel Natu 1734091d4532SNeel Natu static int 1735091d4532SNeel Natu nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, 1736091d4532SNeel Natu uint64_t *retinfo) 1737091d4532SNeel Natu { 1738091d4532SNeel Natu enum exc_class exc1, exc2; 1739091d4532SNeel Natu int type1, vector1; 1740091d4532SNeel Natu 1741091d4532SNeel Natu KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); 1742091d4532SNeel Natu KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); 1743091d4532SNeel Natu 1744091d4532SNeel Natu /* 1745091d4532SNeel Natu * If an exception occurs while attempting to call the double-fault 1746091d4532SNeel Natu * handler the processor enters shutdown mode (aka triple fault). 1747091d4532SNeel Natu */ 1748091d4532SNeel Natu type1 = info1 & VM_INTINFO_TYPE; 1749091d4532SNeel Natu vector1 = info1 & 0xff; 1750091d4532SNeel Natu if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { 1751091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", 1752091d4532SNeel Natu info1, info2); 1753091d4532SNeel Natu vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); 1754091d4532SNeel Natu *retinfo = 0; 1755091d4532SNeel Natu return (0); 1756091d4532SNeel Natu } 1757091d4532SNeel Natu 1758091d4532SNeel Natu /* 1759091d4532SNeel Natu * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 1760091d4532SNeel Natu */ 1761091d4532SNeel Natu exc1 = exception_class(info1); 1762091d4532SNeel Natu exc2 = exception_class(info2); 1763091d4532SNeel Natu if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || 1764091d4532SNeel Natu (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { 1765091d4532SNeel Natu /* Convert nested fault into a double fault. */ 1766091d4532SNeel Natu *retinfo = IDT_DF; 1767091d4532SNeel Natu *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1768091d4532SNeel Natu *retinfo |= VM_INTINFO_DEL_ERRCODE; 1769091d4532SNeel Natu } else { 1770091d4532SNeel Natu /* Handle exceptions serially */ 1771091d4532SNeel Natu *retinfo = info2; 1772091d4532SNeel Natu } 1773091d4532SNeel Natu return (1); 1774091d4532SNeel Natu } 1775091d4532SNeel Natu 1776091d4532SNeel Natu static uint64_t 1777091d4532SNeel Natu vcpu_exception_intinfo(struct vcpu *vcpu) 1778091d4532SNeel Natu { 1779091d4532SNeel Natu uint64_t info = 0; 1780091d4532SNeel Natu 1781091d4532SNeel Natu if (vcpu->exception_pending) { 1782c9c75df4SNeel Natu info = vcpu->exc_vector & 0xff; 1783091d4532SNeel Natu info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1784c9c75df4SNeel Natu if (vcpu->exc_errcode_valid) { 1785091d4532SNeel Natu info |= VM_INTINFO_DEL_ERRCODE; 1786c9c75df4SNeel Natu info |= (uint64_t)vcpu->exc_errcode << 32; 1787091d4532SNeel Natu } 1788091d4532SNeel Natu } 1789091d4532SNeel Natu return (info); 1790091d4532SNeel Natu } 1791091d4532SNeel Natu 1792091d4532SNeel Natu int 1793091d4532SNeel Natu vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) 1794091d4532SNeel Natu { 1795091d4532SNeel Natu struct vcpu *vcpu; 1796091d4532SNeel Natu uint64_t info1, info2; 1797091d4532SNeel Natu int valid; 1798091d4532SNeel Natu 1799091d4532SNeel Natu KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); 1800091d4532SNeel Natu 1801091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1802091d4532SNeel Natu 1803091d4532SNeel Natu info1 = vcpu->exitintinfo; 1804091d4532SNeel Natu vcpu->exitintinfo = 0; 1805091d4532SNeel Natu 1806091d4532SNeel Natu info2 = 0; 1807091d4532SNeel Natu if (vcpu->exception_pending) { 1808091d4532SNeel Natu info2 = vcpu_exception_intinfo(vcpu); 1809091d4532SNeel Natu vcpu->exception_pending = 0; 1810091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", 1811c9c75df4SNeel Natu vcpu->exc_vector, info2); 1812091d4532SNeel Natu } 1813091d4532SNeel Natu 1814091d4532SNeel Natu if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { 1815091d4532SNeel Natu valid = nested_fault(vm, vcpuid, info1, info2, retinfo); 1816091d4532SNeel Natu } else if (info1 & VM_INTINFO_VALID) { 1817091d4532SNeel Natu *retinfo = info1; 1818091d4532SNeel Natu valid = 1; 1819091d4532SNeel Natu } else if (info2 & VM_INTINFO_VALID) { 1820091d4532SNeel Natu *retinfo = info2; 1821091d4532SNeel Natu valid = 1; 1822091d4532SNeel Natu } else { 1823091d4532SNeel Natu valid = 0; 1824091d4532SNeel Natu } 1825091d4532SNeel Natu 1826091d4532SNeel Natu if (valid) { 1827091d4532SNeel Natu VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " 1828091d4532SNeel Natu "retinfo(%#lx)", __func__, info1, info2, *retinfo); 1829091d4532SNeel Natu } 1830091d4532SNeel Natu 1831091d4532SNeel Natu return (valid); 1832091d4532SNeel Natu } 1833091d4532SNeel Natu 1834091d4532SNeel Natu int 1835091d4532SNeel Natu vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) 1836091d4532SNeel Natu { 1837091d4532SNeel Natu struct vcpu *vcpu; 1838091d4532SNeel Natu 1839091d4532SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1840091d4532SNeel Natu return (EINVAL); 1841091d4532SNeel Natu 1842091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1843091d4532SNeel Natu *info1 = vcpu->exitintinfo; 1844091d4532SNeel Natu *info2 = vcpu_exception_intinfo(vcpu); 1845091d4532SNeel Natu return (0); 1846091d4532SNeel Natu } 1847091d4532SNeel Natu 1848091d4532SNeel Natu int 1849c9c75df4SNeel Natu vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, 1850c9c75df4SNeel Natu uint32_t errcode, int restart_instruction) 1851366f6083SPeter Grehan { 1852dc506506SNeel Natu struct vcpu *vcpu; 185347b9935dSNeel Natu uint64_t regval; 18542ce12423SNeel Natu int error; 1855dc506506SNeel Natu 1856366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1857366f6083SPeter Grehan return (EINVAL); 1858366f6083SPeter Grehan 1859c9c75df4SNeel Natu if (vector < 0 || vector >= 32) 1860366f6083SPeter Grehan return (EINVAL); 1861366f6083SPeter Grehan 1862091d4532SNeel Natu /* 1863091d4532SNeel Natu * A double fault exception should never be injected directly into 1864091d4532SNeel Natu * the guest. It is a derived exception that results from specific 1865091d4532SNeel Natu * combinations of nested faults. 1866091d4532SNeel Natu */ 1867c9c75df4SNeel Natu if (vector == IDT_DF) 1868091d4532SNeel Natu return (EINVAL); 1869091d4532SNeel Natu 1870dc506506SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1871366f6083SPeter Grehan 1872dc506506SNeel Natu if (vcpu->exception_pending) { 1873dc506506SNeel Natu VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " 1874c9c75df4SNeel Natu "pending exception %d", vector, vcpu->exc_vector); 1875dc506506SNeel Natu return (EBUSY); 1876dc506506SNeel Natu } 1877dc506506SNeel Natu 187847b9935dSNeel Natu if (errcode_valid) { 187947b9935dSNeel Natu /* 188047b9935dSNeel Natu * Exceptions don't deliver an error code in real mode. 188147b9935dSNeel Natu */ 188247b9935dSNeel Natu error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, ®val); 188347b9935dSNeel Natu KASSERT(!error, ("%s: error %d getting CR0", __func__, error)); 188447b9935dSNeel Natu if (!(regval & CR0_PE)) 188547b9935dSNeel Natu errcode_valid = 0; 188647b9935dSNeel Natu } 188747b9935dSNeel Natu 18882ce12423SNeel Natu /* 18892ce12423SNeel Natu * From section 26.6.1 "Interruptibility State" in Intel SDM: 18902ce12423SNeel Natu * 18912ce12423SNeel Natu * Event blocking by "STI" or "MOV SS" is cleared after guest executes 18922ce12423SNeel Natu * one instruction or incurs an exception. 18932ce12423SNeel Natu */ 18942ce12423SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0); 18952ce12423SNeel Natu KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", 18962ce12423SNeel Natu __func__, error)); 18972ce12423SNeel Natu 1898c9c75df4SNeel Natu if (restart_instruction) 1899c9c75df4SNeel Natu vm_restart_instruction(vm, vcpuid); 1900c9c75df4SNeel Natu 1901dc506506SNeel Natu vcpu->exception_pending = 1; 1902c9c75df4SNeel Natu vcpu->exc_vector = vector; 1903c9c75df4SNeel Natu vcpu->exc_errcode = errcode; 1904c9c75df4SNeel Natu vcpu->exc_errcode_valid = errcode_valid; 1905c9c75df4SNeel Natu VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector); 1906dc506506SNeel Natu return (0); 1907dc506506SNeel Natu } 1908dc506506SNeel Natu 1909d37f2adbSNeel Natu void 1910d37f2adbSNeel Natu vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, 1911d37f2adbSNeel Natu int errcode) 1912dc506506SNeel Natu { 1913d37f2adbSNeel Natu struct vm *vm; 1914c9c75df4SNeel Natu int error, restart_instruction; 1915dc506506SNeel Natu 1916d37f2adbSNeel Natu vm = vmarg; 1917c9c75df4SNeel Natu restart_instruction = 1; 1918d37f2adbSNeel Natu 1919c9c75df4SNeel Natu error = vm_inject_exception(vm, vcpuid, vector, errcode_valid, 1920c9c75df4SNeel Natu errcode, restart_instruction); 1921dc506506SNeel Natu KASSERT(error == 0, ("vm_inject_exception error %d", error)); 1922dc506506SNeel Natu } 1923dc506506SNeel Natu 1924dc506506SNeel Natu void 1925d37f2adbSNeel Natu vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) 1926fd949af6SNeel Natu { 1927d37f2adbSNeel Natu struct vm *vm; 192837a723a5SNeel Natu int error; 192937a723a5SNeel Natu 1930d37f2adbSNeel Natu vm = vmarg; 193137a723a5SNeel Natu VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", 193237a723a5SNeel Natu error_code, cr2); 193337a723a5SNeel Natu 193437a723a5SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); 193537a723a5SNeel Natu KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 1936fd949af6SNeel Natu 1937d37f2adbSNeel Natu vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); 1938366f6083SPeter Grehan } 1939366f6083SPeter Grehan 194061592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 1941366f6083SPeter Grehan 1942f352ff0cSNeel Natu int 1943f352ff0cSNeel Natu vm_inject_nmi(struct vm *vm, int vcpuid) 1944f352ff0cSNeel Natu { 1945f352ff0cSNeel Natu struct vcpu *vcpu; 1946f352ff0cSNeel Natu 1947f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1948366f6083SPeter Grehan return (EINVAL); 1949366f6083SPeter Grehan 1950f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1951f352ff0cSNeel Natu 1952f352ff0cSNeel Natu vcpu->nmi_pending = 1; 1953de5ea6b6SNeel Natu vcpu_notify_event(vm, vcpuid, false); 1954f352ff0cSNeel Natu return (0); 1955f352ff0cSNeel Natu } 1956f352ff0cSNeel Natu 1957f352ff0cSNeel Natu int 1958f352ff0cSNeel Natu vm_nmi_pending(struct vm *vm, int vcpuid) 1959f352ff0cSNeel Natu { 1960f352ff0cSNeel Natu struct vcpu *vcpu; 1961f352ff0cSNeel Natu 1962f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1963f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1964f352ff0cSNeel Natu 1965f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1966f352ff0cSNeel Natu 1967f352ff0cSNeel Natu return (vcpu->nmi_pending); 1968f352ff0cSNeel Natu } 1969f352ff0cSNeel Natu 1970f352ff0cSNeel Natu void 1971f352ff0cSNeel Natu vm_nmi_clear(struct vm *vm, int vcpuid) 1972f352ff0cSNeel Natu { 1973f352ff0cSNeel Natu struct vcpu *vcpu; 1974f352ff0cSNeel Natu 1975f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1976f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1977f352ff0cSNeel Natu 1978f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1979f352ff0cSNeel Natu 1980f352ff0cSNeel Natu if (vcpu->nmi_pending == 0) 1981f352ff0cSNeel Natu panic("vm_nmi_clear: inconsistent nmi_pending state"); 1982f352ff0cSNeel Natu 1983f352ff0cSNeel Natu vcpu->nmi_pending = 0; 1984f352ff0cSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 1985366f6083SPeter Grehan } 1986366f6083SPeter Grehan 19870775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 19880775fbb4STycho Nightingale 19890775fbb4STycho Nightingale int 19900775fbb4STycho Nightingale vm_inject_extint(struct vm *vm, int vcpuid) 19910775fbb4STycho Nightingale { 19920775fbb4STycho Nightingale struct vcpu *vcpu; 19930775fbb4STycho Nightingale 19940775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 19950775fbb4STycho Nightingale return (EINVAL); 19960775fbb4STycho Nightingale 19970775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 19980775fbb4STycho Nightingale 19990775fbb4STycho Nightingale vcpu->extint_pending = 1; 20000775fbb4STycho Nightingale vcpu_notify_event(vm, vcpuid, false); 20010775fbb4STycho Nightingale return (0); 20020775fbb4STycho Nightingale } 20030775fbb4STycho Nightingale 20040775fbb4STycho Nightingale int 20050775fbb4STycho Nightingale vm_extint_pending(struct vm *vm, int vcpuid) 20060775fbb4STycho Nightingale { 20070775fbb4STycho Nightingale struct vcpu *vcpu; 20080775fbb4STycho Nightingale 20090775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 20100775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 20110775fbb4STycho Nightingale 20120775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 20130775fbb4STycho Nightingale 20140775fbb4STycho Nightingale return (vcpu->extint_pending); 20150775fbb4STycho Nightingale } 20160775fbb4STycho Nightingale 20170775fbb4STycho Nightingale void 20180775fbb4STycho Nightingale vm_extint_clear(struct vm *vm, int vcpuid) 20190775fbb4STycho Nightingale { 20200775fbb4STycho Nightingale struct vcpu *vcpu; 20210775fbb4STycho Nightingale 20220775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 20230775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 20240775fbb4STycho Nightingale 20250775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 20260775fbb4STycho Nightingale 20270775fbb4STycho Nightingale if (vcpu->extint_pending == 0) 20280775fbb4STycho Nightingale panic("vm_extint_clear: inconsistent extint_pending state"); 20290775fbb4STycho Nightingale 20300775fbb4STycho Nightingale vcpu->extint_pending = 0; 20310775fbb4STycho Nightingale vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); 20320775fbb4STycho Nightingale } 20330775fbb4STycho Nightingale 2034366f6083SPeter Grehan int 2035366f6083SPeter Grehan vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 2036366f6083SPeter Grehan { 2037366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 2038366f6083SPeter Grehan return (EINVAL); 2039366f6083SPeter Grehan 2040366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2041366f6083SPeter Grehan return (EINVAL); 2042366f6083SPeter Grehan 2043366f6083SPeter Grehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 2044366f6083SPeter Grehan } 2045366f6083SPeter Grehan 2046366f6083SPeter Grehan int 2047366f6083SPeter Grehan vm_set_capability(struct vm *vm, int vcpu, int type, int val) 2048366f6083SPeter Grehan { 2049366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 2050366f6083SPeter Grehan return (EINVAL); 2051366f6083SPeter Grehan 2052366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2053366f6083SPeter Grehan return (EINVAL); 2054366f6083SPeter Grehan 2055366f6083SPeter Grehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 2056366f6083SPeter Grehan } 2057366f6083SPeter Grehan 2058366f6083SPeter Grehan struct vlapic * 2059366f6083SPeter Grehan vm_lapic(struct vm *vm, int cpu) 2060366f6083SPeter Grehan { 2061366f6083SPeter Grehan return (vm->vcpu[cpu].vlapic); 2062366f6083SPeter Grehan } 2063366f6083SPeter Grehan 2064565bbb86SNeel Natu struct vioapic * 2065565bbb86SNeel Natu vm_ioapic(struct vm *vm) 2066565bbb86SNeel Natu { 2067565bbb86SNeel Natu 2068565bbb86SNeel Natu return (vm->vioapic); 2069565bbb86SNeel Natu } 2070565bbb86SNeel Natu 207108e3ff32SNeel Natu struct vhpet * 207208e3ff32SNeel Natu vm_hpet(struct vm *vm) 207308e3ff32SNeel Natu { 207408e3ff32SNeel Natu 207508e3ff32SNeel Natu return (vm->vhpet); 207608e3ff32SNeel Natu } 207708e3ff32SNeel Natu 2078366f6083SPeter Grehan boolean_t 2079366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func) 2080366f6083SPeter Grehan { 208107044a96SNeel Natu int found, i, n; 208207044a96SNeel Natu int b, s, f; 2083366f6083SPeter Grehan char *val, *cp, *cp2; 2084366f6083SPeter Grehan 2085366f6083SPeter Grehan /* 208607044a96SNeel Natu * XXX 208707044a96SNeel Natu * The length of an environment variable is limited to 128 bytes which 208807044a96SNeel Natu * puts an upper limit on the number of passthru devices that may be 208907044a96SNeel Natu * specified using a single environment variable. 209007044a96SNeel Natu * 209107044a96SNeel Natu * Work around this by scanning multiple environment variable 209207044a96SNeel Natu * names instead of a single one - yuck! 2093366f6083SPeter Grehan */ 209407044a96SNeel Natu const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 209507044a96SNeel Natu 209607044a96SNeel Natu /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 2097366f6083SPeter Grehan found = 0; 209807044a96SNeel Natu for (i = 0; names[i] != NULL && !found; i++) { 20992be111bfSDavide Italiano cp = val = kern_getenv(names[i]); 2100366f6083SPeter Grehan while (cp != NULL && *cp != '\0') { 2101366f6083SPeter Grehan if ((cp2 = strchr(cp, ' ')) != NULL) 2102366f6083SPeter Grehan *cp2 = '\0'; 2103366f6083SPeter Grehan 2104366f6083SPeter Grehan n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 2105366f6083SPeter Grehan if (n == 3 && bus == b && slot == s && func == f) { 2106366f6083SPeter Grehan found = 1; 2107366f6083SPeter Grehan break; 2108366f6083SPeter Grehan } 2109366f6083SPeter Grehan 2110366f6083SPeter Grehan if (cp2 != NULL) 2111366f6083SPeter Grehan *cp2++ = ' '; 2112366f6083SPeter Grehan 2113366f6083SPeter Grehan cp = cp2; 2114366f6083SPeter Grehan } 2115366f6083SPeter Grehan freeenv(val); 211607044a96SNeel Natu } 2117366f6083SPeter Grehan return (found); 2118366f6083SPeter Grehan } 2119366f6083SPeter Grehan 2120366f6083SPeter Grehan void * 2121366f6083SPeter Grehan vm_iommu_domain(struct vm *vm) 2122366f6083SPeter Grehan { 2123366f6083SPeter Grehan 2124366f6083SPeter Grehan return (vm->iommu); 2125366f6083SPeter Grehan } 2126366f6083SPeter Grehan 212775dd3366SNeel Natu int 2128f80330a8SNeel Natu vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, 2129f80330a8SNeel Natu bool from_idle) 2130366f6083SPeter Grehan { 213175dd3366SNeel Natu int error; 2132366f6083SPeter Grehan struct vcpu *vcpu; 2133366f6083SPeter Grehan 2134366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2135366f6083SPeter Grehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 2136366f6083SPeter Grehan 2137366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 2138366f6083SPeter Grehan 213975dd3366SNeel Natu vcpu_lock(vcpu); 2140*248e6799SNeel Natu error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle); 214175dd3366SNeel Natu vcpu_unlock(vcpu); 214275dd3366SNeel Natu 214375dd3366SNeel Natu return (error); 214475dd3366SNeel Natu } 214575dd3366SNeel Natu 214675dd3366SNeel Natu enum vcpu_state 2147d3c11f40SPeter Grehan vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 2148366f6083SPeter Grehan { 2149366f6083SPeter Grehan struct vcpu *vcpu; 215075dd3366SNeel Natu enum vcpu_state state; 2151366f6083SPeter Grehan 2152366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2153366f6083SPeter Grehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 2154366f6083SPeter Grehan 2155366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 2156366f6083SPeter Grehan 215775dd3366SNeel Natu vcpu_lock(vcpu); 215875dd3366SNeel Natu state = vcpu->state; 2159d3c11f40SPeter Grehan if (hostcpu != NULL) 2160d3c11f40SPeter Grehan *hostcpu = vcpu->hostcpu; 216175dd3366SNeel Natu vcpu_unlock(vcpu); 2162366f6083SPeter Grehan 216375dd3366SNeel Natu return (state); 2164366f6083SPeter Grehan } 2165366f6083SPeter Grehan 216695ebc360SNeel Natu int 2167366f6083SPeter Grehan vm_activate_cpu(struct vm *vm, int vcpuid) 2168366f6083SPeter Grehan { 2169366f6083SPeter Grehan 217095ebc360SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 217195ebc360SNeel Natu return (EINVAL); 217295ebc360SNeel Natu 217395ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->active_cpus)) 217495ebc360SNeel Natu return (EBUSY); 217522d822c6SNeel Natu 217622d822c6SNeel Natu VCPU_CTR0(vm, vcpuid, "activated"); 217722d822c6SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); 217895ebc360SNeel Natu return (0); 2179366f6083SPeter Grehan } 2180366f6083SPeter Grehan 2181a5615c90SPeter Grehan cpuset_t 2182366f6083SPeter Grehan vm_active_cpus(struct vm *vm) 2183366f6083SPeter Grehan { 2184366f6083SPeter Grehan 2185366f6083SPeter Grehan return (vm->active_cpus); 2186366f6083SPeter Grehan } 2187366f6083SPeter Grehan 218895ebc360SNeel Natu cpuset_t 218995ebc360SNeel Natu vm_suspended_cpus(struct vm *vm) 219095ebc360SNeel Natu { 219195ebc360SNeel Natu 219295ebc360SNeel Natu return (vm->suspended_cpus); 219395ebc360SNeel Natu } 219495ebc360SNeel Natu 2195366f6083SPeter Grehan void * 2196366f6083SPeter Grehan vcpu_stats(struct vm *vm, int vcpuid) 2197366f6083SPeter Grehan { 2198366f6083SPeter Grehan 2199366f6083SPeter Grehan return (vm->vcpu[vcpuid].stats); 2200366f6083SPeter Grehan } 2201e9027382SNeel Natu 2202e9027382SNeel Natu int 2203e9027382SNeel Natu vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 2204e9027382SNeel Natu { 2205e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2206e9027382SNeel Natu return (EINVAL); 2207e9027382SNeel Natu 2208e9027382SNeel Natu *state = vm->vcpu[vcpuid].x2apic_state; 2209e9027382SNeel Natu 2210e9027382SNeel Natu return (0); 2211e9027382SNeel Natu } 2212e9027382SNeel Natu 2213e9027382SNeel Natu int 2214e9027382SNeel Natu vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 2215e9027382SNeel Natu { 2216e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2217e9027382SNeel Natu return (EINVAL); 2218e9027382SNeel Natu 22193f23d3caSNeel Natu if (state >= X2APIC_STATE_LAST) 2220e9027382SNeel Natu return (EINVAL); 2221e9027382SNeel Natu 2222e9027382SNeel Natu vm->vcpu[vcpuid].x2apic_state = state; 2223e9027382SNeel Natu 222473820fb0SNeel Natu vlapic_set_x2apic_state(vm, vcpuid, state); 222573820fb0SNeel Natu 2226e9027382SNeel Natu return (0); 2227e9027382SNeel Natu } 222875dd3366SNeel Natu 222922821874SNeel Natu /* 223022821874SNeel Natu * This function is called to ensure that a vcpu "sees" a pending event 223122821874SNeel Natu * as soon as possible: 223222821874SNeel Natu * - If the vcpu thread is sleeping then it is woken up. 223322821874SNeel Natu * - If the vcpu is running on a different host_cpu then an IPI will be directed 223422821874SNeel Natu * to the host_cpu to cause the vcpu to trap into the hypervisor. 223522821874SNeel Natu */ 2236*248e6799SNeel Natu static void 2237*248e6799SNeel Natu vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) 223875dd3366SNeel Natu { 223975dd3366SNeel Natu int hostcpu; 224075dd3366SNeel Natu 224175dd3366SNeel Natu hostcpu = vcpu->hostcpu; 2242ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 2243ef39d7e9SNeel Natu KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 2244de5ea6b6SNeel Natu if (hostcpu != curcpu) { 2245ef39d7e9SNeel Natu if (lapic_intr) { 2246add611fdSNeel Natu vlapic_post_intr(vcpu->vlapic, hostcpu, 2247add611fdSNeel Natu vmm_ipinum); 2248ef39d7e9SNeel Natu } else { 224975dd3366SNeel Natu ipi_cpu(hostcpu, vmm_ipinum); 225075dd3366SNeel Natu } 2251ef39d7e9SNeel Natu } else { 2252ef39d7e9SNeel Natu /* 2253ef39d7e9SNeel Natu * If the 'vcpu' is running on 'curcpu' then it must 2254ef39d7e9SNeel Natu * be sending a notification to itself (e.g. SELF_IPI). 2255ef39d7e9SNeel Natu * The pending event will be picked up when the vcpu 2256ef39d7e9SNeel Natu * transitions back to guest context. 2257ef39d7e9SNeel Natu */ 2258ef39d7e9SNeel Natu } 2259ef39d7e9SNeel Natu } else { 2260ef39d7e9SNeel Natu KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 2261ef39d7e9SNeel Natu "with hostcpu %d", vcpu->state, hostcpu)); 2262366f6083SPeter Grehan if (vcpu->state == VCPU_SLEEPING) 2263366f6083SPeter Grehan wakeup_one(vcpu); 2264366f6083SPeter Grehan } 2265*248e6799SNeel Natu } 2266*248e6799SNeel Natu 2267*248e6799SNeel Natu void 2268*248e6799SNeel Natu vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) 2269*248e6799SNeel Natu { 2270*248e6799SNeel Natu struct vcpu *vcpu = &vm->vcpu[vcpuid]; 2271*248e6799SNeel Natu 2272*248e6799SNeel Natu vcpu_lock(vcpu); 2273*248e6799SNeel Natu vcpu_notify_event_locked(vcpu, lapic_intr); 2274f76fc5d4SNeel Natu vcpu_unlock(vcpu); 2275f76fc5d4SNeel Natu } 2276318224bbSNeel Natu 2277318224bbSNeel Natu struct vmspace * 2278318224bbSNeel Natu vm_get_vmspace(struct vm *vm) 2279318224bbSNeel Natu { 2280318224bbSNeel Natu 2281318224bbSNeel Natu return (vm->vmspace); 2282318224bbSNeel Natu } 2283565bbb86SNeel Natu 2284565bbb86SNeel Natu int 2285565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid) 2286565bbb86SNeel Natu { 2287565bbb86SNeel Natu /* 2288565bbb86SNeel Natu * XXX apic id is assumed to be numerically identical to vcpu id 2289565bbb86SNeel Natu */ 2290565bbb86SNeel Natu return (apicid); 2291565bbb86SNeel Natu } 22925b8a8cd1SNeel Natu 22935b8a8cd1SNeel Natu void 22945b8a8cd1SNeel Natu vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 22955b8a8cd1SNeel Natu vm_rendezvous_func_t func, void *arg) 22965b8a8cd1SNeel Natu { 2297970955e4SNeel Natu int i; 2298970955e4SNeel Natu 22995b8a8cd1SNeel Natu /* 23005b8a8cd1SNeel Natu * Enforce that this function is called without any locks 23015b8a8cd1SNeel Natu */ 23025b8a8cd1SNeel Natu WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 23035b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 23045b8a8cd1SNeel Natu ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); 23055b8a8cd1SNeel Natu 23065b8a8cd1SNeel Natu restart: 23075b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 23085b8a8cd1SNeel Natu if (vm->rendezvous_func != NULL) { 23095b8a8cd1SNeel Natu /* 23105b8a8cd1SNeel Natu * If a rendezvous is already in progress then we need to 23115b8a8cd1SNeel Natu * call the rendezvous handler in case this 'vcpuid' is one 23125b8a8cd1SNeel Natu * of the targets of the rendezvous. 23135b8a8cd1SNeel Natu */ 23145b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); 23155b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 23165b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 23175b8a8cd1SNeel Natu goto restart; 23185b8a8cd1SNeel Natu } 23195b8a8cd1SNeel Natu KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 23205b8a8cd1SNeel Natu "rendezvous is still in progress")); 23215b8a8cd1SNeel Natu 23225b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); 23235b8a8cd1SNeel Natu vm->rendezvous_req_cpus = dest; 23245b8a8cd1SNeel Natu CPU_ZERO(&vm->rendezvous_done_cpus); 23255b8a8cd1SNeel Natu vm->rendezvous_arg = arg; 23265b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, func); 23275b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 23285b8a8cd1SNeel Natu 2329970955e4SNeel Natu /* 2330970955e4SNeel Natu * Wake up any sleeping vcpus and trigger a VM-exit in any running 2331970955e4SNeel Natu * vcpus so they handle the rendezvous as soon as possible. 2332970955e4SNeel Natu */ 2333970955e4SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 2334970955e4SNeel Natu if (CPU_ISSET(i, &dest)) 2335970955e4SNeel Natu vcpu_notify_event(vm, i, false); 2336970955e4SNeel Natu } 2337970955e4SNeel Natu 23385b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 23395b8a8cd1SNeel Natu } 2340762fd208STycho Nightingale 2341762fd208STycho Nightingale struct vatpic * 2342762fd208STycho Nightingale vm_atpic(struct vm *vm) 2343762fd208STycho Nightingale { 2344762fd208STycho Nightingale return (vm->vatpic); 2345762fd208STycho Nightingale } 2346e883c9bbSTycho Nightingale 2347e883c9bbSTycho Nightingale struct vatpit * 2348e883c9bbSTycho Nightingale vm_atpit(struct vm *vm) 2349e883c9bbSTycho Nightingale { 2350e883c9bbSTycho Nightingale return (vm->vatpit); 2351e883c9bbSTycho Nightingale } 2352d17b5104SNeel Natu 2353160ef77aSNeel Natu struct vpmtmr * 2354160ef77aSNeel Natu vm_pmtmr(struct vm *vm) 2355160ef77aSNeel Natu { 2356160ef77aSNeel Natu 2357160ef77aSNeel Natu return (vm->vpmtmr); 2358160ef77aSNeel Natu } 2359160ef77aSNeel Natu 23600dafa5cdSNeel Natu struct vrtc * 23610dafa5cdSNeel Natu vm_rtc(struct vm *vm) 23620dafa5cdSNeel Natu { 23630dafa5cdSNeel Natu 23640dafa5cdSNeel Natu return (vm->vrtc); 23650dafa5cdSNeel Natu } 23660dafa5cdSNeel Natu 2367d17b5104SNeel Natu enum vm_reg_name 2368d17b5104SNeel Natu vm_segment_name(int seg) 2369d17b5104SNeel Natu { 2370d17b5104SNeel Natu static enum vm_reg_name seg_names[] = { 2371d17b5104SNeel Natu VM_REG_GUEST_ES, 2372d17b5104SNeel Natu VM_REG_GUEST_CS, 2373d17b5104SNeel Natu VM_REG_GUEST_SS, 2374d17b5104SNeel Natu VM_REG_GUEST_DS, 2375d17b5104SNeel Natu VM_REG_GUEST_FS, 2376d17b5104SNeel Natu VM_REG_GUEST_GS 2377d17b5104SNeel Natu }; 2378d17b5104SNeel Natu 2379d17b5104SNeel Natu KASSERT(seg >= 0 && seg < nitems(seg_names), 2380d17b5104SNeel Natu ("%s: invalid segment encoding %d", __func__, seg)); 2381d17b5104SNeel Natu return (seg_names[seg]); 2382d17b5104SNeel Natu } 2383cf1d80d8SPeter Grehan 2384d665d229SNeel Natu void 2385d665d229SNeel Natu vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, 2386d665d229SNeel Natu int num_copyinfo) 2387d665d229SNeel Natu { 2388d665d229SNeel Natu int idx; 2389d665d229SNeel Natu 2390d665d229SNeel Natu for (idx = 0; idx < num_copyinfo; idx++) { 2391d665d229SNeel Natu if (copyinfo[idx].cookie != NULL) 2392d665d229SNeel Natu vm_gpa_release(copyinfo[idx].cookie); 2393d665d229SNeel Natu } 2394d665d229SNeel Natu bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); 2395d665d229SNeel Natu } 2396d665d229SNeel Natu 2397d665d229SNeel Natu int 2398d665d229SNeel Natu vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 2399d665d229SNeel Natu uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 24009c4d5478SNeel Natu int num_copyinfo, int *fault) 2401d665d229SNeel Natu { 2402d665d229SNeel Natu int error, idx, nused; 2403d665d229SNeel Natu size_t n, off, remaining; 2404d665d229SNeel Natu void *hva, *cookie; 2405d665d229SNeel Natu uint64_t gpa; 2406d665d229SNeel Natu 2407d665d229SNeel Natu bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); 2408d665d229SNeel Natu 2409d665d229SNeel Natu nused = 0; 2410d665d229SNeel Natu remaining = len; 2411d665d229SNeel Natu while (remaining > 0) { 2412d665d229SNeel Natu KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); 24139c4d5478SNeel Natu error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault); 24149c4d5478SNeel Natu if (error || *fault) 2415d665d229SNeel Natu return (error); 2416d665d229SNeel Natu off = gpa & PAGE_MASK; 2417d665d229SNeel Natu n = min(remaining, PAGE_SIZE - off); 2418d665d229SNeel Natu copyinfo[nused].gpa = gpa; 2419d665d229SNeel Natu copyinfo[nused].len = n; 2420d665d229SNeel Natu remaining -= n; 2421d665d229SNeel Natu gla += n; 2422d665d229SNeel Natu nused++; 2423d665d229SNeel Natu } 2424d665d229SNeel Natu 2425d665d229SNeel Natu for (idx = 0; idx < nused; idx++) { 2426d665d229SNeel Natu hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len, 2427d665d229SNeel Natu prot, &cookie); 2428d665d229SNeel Natu if (hva == NULL) 2429d665d229SNeel Natu break; 2430d665d229SNeel Natu copyinfo[idx].hva = hva; 2431d665d229SNeel Natu copyinfo[idx].cookie = cookie; 2432d665d229SNeel Natu } 2433d665d229SNeel Natu 2434d665d229SNeel Natu if (idx != nused) { 2435d665d229SNeel Natu vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); 24369c4d5478SNeel Natu return (EFAULT); 2437d665d229SNeel Natu } else { 24389c4d5478SNeel Natu *fault = 0; 2439d665d229SNeel Natu return (0); 2440d665d229SNeel Natu } 2441d665d229SNeel Natu } 2442d665d229SNeel Natu 2443d665d229SNeel Natu void 2444d665d229SNeel Natu vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, 2445d665d229SNeel Natu size_t len) 2446d665d229SNeel Natu { 2447d665d229SNeel Natu char *dst; 2448d665d229SNeel Natu int idx; 2449d665d229SNeel Natu 2450d665d229SNeel Natu dst = kaddr; 2451d665d229SNeel Natu idx = 0; 2452d665d229SNeel Natu while (len > 0) { 2453d665d229SNeel Natu bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); 2454d665d229SNeel Natu len -= copyinfo[idx].len; 2455d665d229SNeel Natu dst += copyinfo[idx].len; 2456d665d229SNeel Natu idx++; 2457d665d229SNeel Natu } 2458d665d229SNeel Natu } 2459d665d229SNeel Natu 2460d665d229SNeel Natu void 2461d665d229SNeel Natu vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, 2462d665d229SNeel Natu struct vm_copyinfo *copyinfo, size_t len) 2463d665d229SNeel Natu { 2464d665d229SNeel Natu const char *src; 2465d665d229SNeel Natu int idx; 2466d665d229SNeel Natu 2467d665d229SNeel Natu src = kaddr; 2468d665d229SNeel Natu idx = 0; 2469d665d229SNeel Natu while (len > 0) { 2470d665d229SNeel Natu bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); 2471d665d229SNeel Natu len -= copyinfo[idx].len; 2472d665d229SNeel Natu src += copyinfo[idx].len; 2473d665d229SNeel Natu idx++; 2474d665d229SNeel Natu } 2475d665d229SNeel Natu } 2476cf1d80d8SPeter Grehan 2477cf1d80d8SPeter Grehan /* 2478cf1d80d8SPeter Grehan * Return the amount of in-use and wired memory for the VM. Since 2479cf1d80d8SPeter Grehan * these are global stats, only return the values with for vCPU 0 2480cf1d80d8SPeter Grehan */ 2481cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_RESIDENT); 2482cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_WIRED); 2483cf1d80d8SPeter Grehan 2484cf1d80d8SPeter Grehan static void 2485cf1d80d8SPeter Grehan vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2486cf1d80d8SPeter Grehan { 2487cf1d80d8SPeter Grehan 2488cf1d80d8SPeter Grehan if (vcpu == 0) { 2489cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, 2490cf1d80d8SPeter Grehan PAGE_SIZE * vmspace_resident_count(vm->vmspace)); 2491cf1d80d8SPeter Grehan } 2492cf1d80d8SPeter Grehan } 2493cf1d80d8SPeter Grehan 2494cf1d80d8SPeter Grehan static void 2495cf1d80d8SPeter Grehan vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2496cf1d80d8SPeter Grehan { 2497cf1d80d8SPeter Grehan 2498cf1d80d8SPeter Grehan if (vcpu == 0) { 2499cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, 2500cf1d80d8SPeter Grehan PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); 2501cf1d80d8SPeter Grehan } 2502cf1d80d8SPeter Grehan } 2503cf1d80d8SPeter Grehan 2504cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); 2505cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); 2506