1366f6083SPeter Grehan /*- 2*c49761ddSPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3*c49761ddSPedro F. Giffuni * 4366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc. 5366f6083SPeter Grehan * All rights reserved. 6366f6083SPeter Grehan * 7366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without 8366f6083SPeter Grehan * modification, are permitted provided that the following conditions 9366f6083SPeter Grehan * are met: 10366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright 11366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer. 12366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 13366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the 14366f6083SPeter Grehan * documentation and/or other materials provided with the distribution. 15366f6083SPeter Grehan * 16366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26366f6083SPeter Grehan * SUCH DAMAGE. 27366f6083SPeter Grehan * 28366f6083SPeter Grehan * $FreeBSD$ 29366f6083SPeter Grehan */ 30366f6083SPeter Grehan 31366f6083SPeter Grehan #include <sys/cdefs.h> 32366f6083SPeter Grehan __FBSDID("$FreeBSD$"); 33366f6083SPeter Grehan 34366f6083SPeter Grehan #include <sys/param.h> 3538f1b189SPeter Grehan #include <sys/systm.h> 36366f6083SPeter Grehan #include <sys/kernel.h> 37366f6083SPeter Grehan #include <sys/module.h> 38366f6083SPeter Grehan #include <sys/sysctl.h> 39366f6083SPeter Grehan #include <sys/malloc.h> 40366f6083SPeter Grehan #include <sys/pcpu.h> 41366f6083SPeter Grehan #include <sys/lock.h> 42366f6083SPeter Grehan #include <sys/mutex.h> 43366f6083SPeter Grehan #include <sys/proc.h> 44318224bbSNeel Natu #include <sys/rwlock.h> 45366f6083SPeter Grehan #include <sys/sched.h> 46366f6083SPeter Grehan #include <sys/smp.h> 47366f6083SPeter Grehan #include <sys/systm.h> 48366f6083SPeter Grehan 49366f6083SPeter Grehan #include <vm/vm.h> 50318224bbSNeel Natu #include <vm/vm_object.h> 51318224bbSNeel Natu #include <vm/vm_page.h> 52318224bbSNeel Natu #include <vm/pmap.h> 53318224bbSNeel Natu #include <vm/vm_map.h> 54318224bbSNeel Natu #include <vm/vm_extern.h> 55318224bbSNeel Natu #include <vm/vm_param.h> 56366f6083SPeter Grehan 5763e62d39SJohn Baldwin #include <machine/cpu.h> 58366f6083SPeter Grehan #include <machine/pcb.h> 5975dd3366SNeel Natu #include <machine/smp.h> 601c052192SNeel Natu #include <x86/psl.h> 6134a6b2d6SJohn Baldwin #include <x86/apicreg.h> 62366f6083SPeter Grehan 63366f6083SPeter Grehan #include <machine/vmm.h> 64565bbb86SNeel Natu #include <machine/vmm_dev.h> 65e813a873SNeel Natu #include <machine/vmm_instruction_emul.h> 66565bbb86SNeel Natu 67d17b5104SNeel Natu #include "vmm_ioport.h" 68318224bbSNeel Natu #include "vmm_ktr.h" 69b01c2033SNeel Natu #include "vmm_host.h" 70366f6083SPeter Grehan #include "vmm_mem.h" 71366f6083SPeter Grehan #include "vmm_util.h" 72762fd208STycho Nightingale #include "vatpic.h" 73e883c9bbSTycho Nightingale #include "vatpit.h" 7408e3ff32SNeel Natu #include "vhpet.h" 75565bbb86SNeel Natu #include "vioapic.h" 76366f6083SPeter Grehan #include "vlapic.h" 77160ef77aSNeel Natu #include "vpmtmr.h" 780dafa5cdSNeel Natu #include "vrtc.h" 79366f6083SPeter Grehan #include "vmm_stat.h" 80f76fc5d4SNeel Natu #include "vmm_lapic.h" 81366f6083SPeter Grehan 82366f6083SPeter Grehan #include "io/ppt.h" 83366f6083SPeter Grehan #include "io/iommu.h" 84366f6083SPeter Grehan 85366f6083SPeter Grehan struct vlapic; 86366f6083SPeter Grehan 875fcf252fSNeel Natu /* 885fcf252fSNeel Natu * Initialization: 895fcf252fSNeel Natu * (a) allocated when vcpu is created 905fcf252fSNeel Natu * (i) initialized when vcpu is created and when it is reinitialized 915fcf252fSNeel Natu * (o) initialized the first time the vcpu is created 925fcf252fSNeel Natu * (x) initialized before use 935fcf252fSNeel Natu */ 94366f6083SPeter Grehan struct vcpu { 955fcf252fSNeel Natu struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ 965fcf252fSNeel Natu enum vcpu_state state; /* (o) vcpu state */ 975fcf252fSNeel Natu int hostcpu; /* (o) vcpu's host cpu */ 98248e6799SNeel Natu int reqidle; /* (i) request vcpu to idle */ 995fcf252fSNeel Natu struct vlapic *vlapic; /* (i) APIC device model */ 1005fcf252fSNeel Natu enum x2apic_state x2apic_state; /* (i) APIC mode */ 101091d4532SNeel Natu uint64_t exitintinfo; /* (i) events pending at VM exit */ 1025fcf252fSNeel Natu int nmi_pending; /* (i) NMI pending */ 1035fcf252fSNeel Natu int extint_pending; /* (i) INTR pending */ 1045fcf252fSNeel Natu int exception_pending; /* (i) exception pending */ 105c9c75df4SNeel Natu int exc_vector; /* (x) exception collateral */ 106c9c75df4SNeel Natu int exc_errcode_valid; 107c9c75df4SNeel Natu uint32_t exc_errcode; 1085fcf252fSNeel Natu struct savefpu *guestfpu; /* (a,i) guest fpu state */ 1095fcf252fSNeel Natu uint64_t guest_xcr0; /* (i) guest %xcr0 register */ 1105fcf252fSNeel Natu void *stats; /* (a,i) statistics */ 1115fcf252fSNeel Natu struct vm_exit exitinfo; /* (x) exit reason and collateral */ 112d087a399SNeel Natu uint64_t nextrip; /* (x) next instruction to execute */ 113366f6083SPeter Grehan }; 114366f6083SPeter Grehan 1155fcf252fSNeel Natu #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 116f76fc5d4SNeel Natu #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 117f76fc5d4SNeel Natu #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 118f76fc5d4SNeel Natu #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 119318224bbSNeel Natu #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 12075dd3366SNeel Natu 121318224bbSNeel Natu struct mem_seg { 1229b1aa8d6SNeel Natu size_t len; 1239b1aa8d6SNeel Natu bool sysmem; 1249b1aa8d6SNeel Natu struct vm_object *object; 1259b1aa8d6SNeel Natu }; 1266bcf245eSMarcel Moolenaar #define VM_MAX_MEMSEGS 3 1279b1aa8d6SNeel Natu 1289b1aa8d6SNeel Natu struct mem_map { 129318224bbSNeel Natu vm_paddr_t gpa; 130318224bbSNeel Natu size_t len; 1319b1aa8d6SNeel Natu vm_ooffset_t segoff; 1329b1aa8d6SNeel Natu int segid; 1339b1aa8d6SNeel Natu int prot; 1349b1aa8d6SNeel Natu int flags; 135318224bbSNeel Natu }; 1369b1aa8d6SNeel Natu #define VM_MAX_MEMMAPS 4 137366f6083SPeter Grehan 138366f6083SPeter Grehan /* 1395fcf252fSNeel Natu * Initialization: 1405fcf252fSNeel Natu * (o) initialized the first time the VM is created 1415fcf252fSNeel Natu * (i) initialized when VM is created and when it is reinitialized 1425fcf252fSNeel Natu * (x) initialized before use 143366f6083SPeter Grehan */ 1445fcf252fSNeel Natu struct vm { 1455fcf252fSNeel Natu void *cookie; /* (i) cpu-specific data */ 1465fcf252fSNeel Natu void *iommu; /* (x) iommu-specific data */ 1475fcf252fSNeel Natu struct vhpet *vhpet; /* (i) virtual HPET */ 1485fcf252fSNeel Natu struct vioapic *vioapic; /* (i) virtual ioapic */ 1495fcf252fSNeel Natu struct vatpic *vatpic; /* (i) virtual atpic */ 1505fcf252fSNeel Natu struct vatpit *vatpit; /* (i) virtual atpit */ 151160ef77aSNeel Natu struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ 1520dafa5cdSNeel Natu struct vrtc *vrtc; /* (o) virtual RTC */ 1535fcf252fSNeel Natu volatile cpuset_t active_cpus; /* (i) active vcpus */ 1545fcf252fSNeel Natu int suspend; /* (i) stop VM execution */ 1555fcf252fSNeel Natu volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 1565fcf252fSNeel Natu volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 1575fcf252fSNeel Natu cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ 1585fcf252fSNeel Natu cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ 1595fcf252fSNeel Natu void *rendezvous_arg; /* (x) rendezvous func/arg */ 1605b8a8cd1SNeel Natu vm_rendezvous_func_t rendezvous_func; 1615fcf252fSNeel Natu struct mtx rendezvous_mtx; /* (o) rendezvous lock */ 1629b1aa8d6SNeel Natu struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 1639b1aa8d6SNeel Natu struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 1645fcf252fSNeel Natu struct vmspace *vmspace; /* (o) guest's address space */ 1655fcf252fSNeel Natu char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 1665fcf252fSNeel Natu struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ 167366f6083SPeter Grehan }; 168366f6083SPeter Grehan 169d5408b1dSNeel Natu static int vmm_initialized; 170d5408b1dSNeel Natu 171366f6083SPeter Grehan static struct vmm_ops *ops; 172add611fdSNeel Natu #define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) 173366f6083SPeter Grehan #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 17463e62d39SJohn Baldwin #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 175366f6083SPeter Grehan 176318224bbSNeel Natu #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 177248e6799SNeel Natu #define VMRUN(vmi, vcpu, rip, pmap, evinfo) \ 178248e6799SNeel Natu (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO) 179366f6083SPeter Grehan #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 180318224bbSNeel Natu #define VMSPACE_ALLOC(min, max) \ 181318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 182318224bbSNeel Natu #define VMSPACE_FREE(vmspace) \ 183318224bbSNeel Natu (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 184366f6083SPeter Grehan #define VMGETREG(vmi, vcpu, num, retval) \ 185366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 186366f6083SPeter Grehan #define VMSETREG(vmi, vcpu, num, val) \ 187366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 188366f6083SPeter Grehan #define VMGETDESC(vmi, vcpu, num, desc) \ 189366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 190366f6083SPeter Grehan #define VMSETDESC(vmi, vcpu, num, desc) \ 191366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 192366f6083SPeter Grehan #define VMGETCAP(vmi, vcpu, num, retval) \ 193366f6083SPeter Grehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 194366f6083SPeter Grehan #define VMSETCAP(vmi, vcpu, num, val) \ 195366f6083SPeter Grehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 196de5ea6b6SNeel Natu #define VLAPIC_INIT(vmi, vcpu) \ 197de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) 198de5ea6b6SNeel Natu #define VLAPIC_CLEANUP(vmi, vlapic) \ 199de5ea6b6SNeel Natu (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) 200366f6083SPeter Grehan 201014a52f3SNeel Natu #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 202014a52f3SNeel Natu #define fpu_stop_emulating() clts() 203366f6083SPeter Grehan 204366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm"); 205366f6083SPeter Grehan 206366f6083SPeter Grehan /* statistics */ 20761592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 208366f6083SPeter Grehan 209add611fdSNeel Natu SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 210add611fdSNeel Natu 211055fc2cbSNeel Natu /* 212055fc2cbSNeel Natu * Halt the guest if all vcpus are executing a HLT instruction with 213055fc2cbSNeel Natu * interrupts disabled. 214055fc2cbSNeel Natu */ 215055fc2cbSNeel Natu static int halt_detection_enabled = 1; 216055fc2cbSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 217055fc2cbSNeel Natu &halt_detection_enabled, 0, 218055fc2cbSNeel Natu "Halt VM if all vcpus execute HLT with interrupts disabled"); 219055fc2cbSNeel Natu 220978f3da1SAndriy Gapon static int vmm_ipinum; 221add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 222add611fdSNeel Natu "IPI vector used for vcpu notifications"); 223add611fdSNeel Natu 224b0538143SNeel Natu static int trace_guest_exceptions; 225b0538143SNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, 226b0538143SNeel Natu &trace_guest_exceptions, 0, 227b0538143SNeel Natu "Trap into hypervisor on all guest exceptions and reflect them back"); 228b0538143SNeel Natu 2299b1aa8d6SNeel Natu static void vm_free_memmap(struct vm *vm, int ident); 2309b1aa8d6SNeel Natu static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 231248e6799SNeel Natu static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); 232248e6799SNeel Natu 233248e6799SNeel Natu #ifdef KTR 234248e6799SNeel Natu static const char * 235248e6799SNeel Natu vcpu_state2str(enum vcpu_state state) 236248e6799SNeel Natu { 237248e6799SNeel Natu 238248e6799SNeel Natu switch (state) { 239248e6799SNeel Natu case VCPU_IDLE: 240248e6799SNeel Natu return ("idle"); 241248e6799SNeel Natu case VCPU_FROZEN: 242248e6799SNeel Natu return ("frozen"); 243248e6799SNeel Natu case VCPU_RUNNING: 244248e6799SNeel Natu return ("running"); 245248e6799SNeel Natu case VCPU_SLEEPING: 246248e6799SNeel Natu return ("sleeping"); 247248e6799SNeel Natu default: 248248e6799SNeel Natu return ("unknown"); 249248e6799SNeel Natu } 250248e6799SNeel Natu } 251248e6799SNeel Natu #endif 252248e6799SNeel Natu 253366f6083SPeter Grehan static void 2545fcf252fSNeel Natu vcpu_cleanup(struct vm *vm, int i, bool destroy) 255366f6083SPeter Grehan { 256de5ea6b6SNeel Natu struct vcpu *vcpu = &vm->vcpu[i]; 257de5ea6b6SNeel Natu 258de5ea6b6SNeel Natu VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); 2595fcf252fSNeel Natu if (destroy) { 260366f6083SPeter Grehan vmm_stat_free(vcpu->stats); 26138f1b189SPeter Grehan fpu_save_area_free(vcpu->guestfpu); 262366f6083SPeter Grehan } 2635fcf252fSNeel Natu } 264366f6083SPeter Grehan 265366f6083SPeter Grehan static void 2665fcf252fSNeel Natu vcpu_init(struct vm *vm, int vcpu_id, bool create) 267366f6083SPeter Grehan { 268366f6083SPeter Grehan struct vcpu *vcpu; 269366f6083SPeter Grehan 2705fcf252fSNeel Natu KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU, 2715fcf252fSNeel Natu ("vcpu_init: invalid vcpu %d", vcpu_id)); 2725fcf252fSNeel Natu 273366f6083SPeter Grehan vcpu = &vm->vcpu[vcpu_id]; 274366f6083SPeter Grehan 2755fcf252fSNeel Natu if (create) { 2765fcf252fSNeel Natu KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " 2775fcf252fSNeel Natu "initialized", vcpu_id)); 27875dd3366SNeel Natu vcpu_lock_init(vcpu); 2795fcf252fSNeel Natu vcpu->state = VCPU_IDLE; 28075dd3366SNeel Natu vcpu->hostcpu = NOCPU; 2815fcf252fSNeel Natu vcpu->guestfpu = fpu_save_area_alloc(); 2825fcf252fSNeel Natu vcpu->stats = vmm_stat_alloc(); 2835fcf252fSNeel Natu } 2845fcf252fSNeel Natu 285de5ea6b6SNeel Natu vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); 28652e5c8a2SNeel Natu vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); 287248e6799SNeel Natu vcpu->reqidle = 0; 288091d4532SNeel Natu vcpu->exitintinfo = 0; 2895fcf252fSNeel Natu vcpu->nmi_pending = 0; 2905fcf252fSNeel Natu vcpu->extint_pending = 0; 2915fcf252fSNeel Natu vcpu->exception_pending = 0; 292abb023fbSJohn Baldwin vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 29338f1b189SPeter Grehan fpu_save_area_reset(vcpu->guestfpu); 2945fcf252fSNeel Natu vmm_stat_init(vcpu->stats); 295366f6083SPeter Grehan } 296366f6083SPeter Grehan 297b0538143SNeel Natu int 298b0538143SNeel Natu vcpu_trace_exceptions(struct vm *vm, int vcpuid) 299b0538143SNeel Natu { 300b0538143SNeel Natu 301b0538143SNeel Natu return (trace_guest_exceptions); 302b0538143SNeel Natu } 303b0538143SNeel Natu 30498ed632cSNeel Natu struct vm_exit * 30598ed632cSNeel Natu vm_exitinfo(struct vm *vm, int cpuid) 30698ed632cSNeel Natu { 30798ed632cSNeel Natu struct vcpu *vcpu; 30898ed632cSNeel Natu 30998ed632cSNeel Natu if (cpuid < 0 || cpuid >= VM_MAXCPU) 31098ed632cSNeel Natu panic("vm_exitinfo: invalid cpuid %d", cpuid); 31198ed632cSNeel Natu 31298ed632cSNeel Natu vcpu = &vm->vcpu[cpuid]; 31398ed632cSNeel Natu 31498ed632cSNeel Natu return (&vcpu->exitinfo); 31598ed632cSNeel Natu } 31698ed632cSNeel Natu 31763e62d39SJohn Baldwin static void 31863e62d39SJohn Baldwin vmm_resume(void) 31963e62d39SJohn Baldwin { 32063e62d39SJohn Baldwin VMM_RESUME(); 32163e62d39SJohn Baldwin } 32263e62d39SJohn Baldwin 323366f6083SPeter Grehan static int 324366f6083SPeter Grehan vmm_init(void) 325366f6083SPeter Grehan { 326366f6083SPeter Grehan int error; 327366f6083SPeter Grehan 328b01c2033SNeel Natu vmm_host_state_init(); 329add611fdSNeel Natu 33018a2b08eSNeel Natu vmm_ipinum = lapic_ipi_alloc(&IDTVEC(justreturn)); 33118a2b08eSNeel Natu if (vmm_ipinum < 0) 332add611fdSNeel Natu vmm_ipinum = IPI_AST; 333366f6083SPeter Grehan 334366f6083SPeter Grehan error = vmm_mem_init(); 335366f6083SPeter Grehan if (error) 336366f6083SPeter Grehan return (error); 337366f6083SPeter Grehan 338366f6083SPeter Grehan if (vmm_is_intel()) 339366f6083SPeter Grehan ops = &vmm_ops_intel; 340366f6083SPeter Grehan else if (vmm_is_amd()) 341366f6083SPeter Grehan ops = &vmm_ops_amd; 342366f6083SPeter Grehan else 343366f6083SPeter Grehan return (ENXIO); 344366f6083SPeter Grehan 34563e62d39SJohn Baldwin vmm_resume_p = vmm_resume; 346366f6083SPeter Grehan 347add611fdSNeel Natu return (VMM_INIT(vmm_ipinum)); 348366f6083SPeter Grehan } 349366f6083SPeter Grehan 350366f6083SPeter Grehan static int 351366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg) 352366f6083SPeter Grehan { 353366f6083SPeter Grehan int error; 354366f6083SPeter Grehan 355366f6083SPeter Grehan switch (what) { 356366f6083SPeter Grehan case MOD_LOAD: 357366f6083SPeter Grehan vmmdev_init(); 358366f6083SPeter Grehan error = vmm_init(); 359d5408b1dSNeel Natu if (error == 0) 360d5408b1dSNeel Natu vmm_initialized = 1; 361366f6083SPeter Grehan break; 362366f6083SPeter Grehan case MOD_UNLOAD: 363cdc5b9e7SNeel Natu error = vmmdev_cleanup(); 364cdc5b9e7SNeel Natu if (error == 0) { 36563e62d39SJohn Baldwin vmm_resume_p = NULL; 366366f6083SPeter Grehan iommu_cleanup(); 367add611fdSNeel Natu if (vmm_ipinum != IPI_AST) 36818a2b08eSNeel Natu lapic_ipi_free(vmm_ipinum); 369366f6083SPeter Grehan error = VMM_CLEANUP(); 37081ef6611SPeter Grehan /* 37181ef6611SPeter Grehan * Something bad happened - prevent new 37281ef6611SPeter Grehan * VMs from being created 37381ef6611SPeter Grehan */ 37481ef6611SPeter Grehan if (error) 375d5408b1dSNeel Natu vmm_initialized = 0; 37681ef6611SPeter Grehan } 377366f6083SPeter Grehan break; 378366f6083SPeter Grehan default: 379366f6083SPeter Grehan error = 0; 380366f6083SPeter Grehan break; 381366f6083SPeter Grehan } 382366f6083SPeter Grehan return (error); 383366f6083SPeter Grehan } 384366f6083SPeter Grehan 385366f6083SPeter Grehan static moduledata_t vmm_kmod = { 386366f6083SPeter Grehan "vmm", 387366f6083SPeter Grehan vmm_handler, 388366f6083SPeter Grehan NULL 389366f6083SPeter Grehan }; 390366f6083SPeter Grehan 391366f6083SPeter Grehan /* 392e3f0800bSNeel Natu * vmm initialization has the following dependencies: 393e3f0800bSNeel Natu * 394e3f0800bSNeel Natu * - VT-x initialization requires smp_rendezvous() and therefore must happen 395e3f0800bSNeel Natu * after SMP is fully functional (after SI_SUB_SMP). 396366f6083SPeter Grehan */ 397e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 398366f6083SPeter Grehan MODULE_VERSION(vmm, 1); 399366f6083SPeter Grehan 4005fcf252fSNeel Natu static void 4015fcf252fSNeel Natu vm_init(struct vm *vm, bool create) 4025fcf252fSNeel Natu { 4035fcf252fSNeel Natu int i; 4045fcf252fSNeel Natu 4055fcf252fSNeel Natu vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace)); 4065fcf252fSNeel Natu vm->iommu = NULL; 4075fcf252fSNeel Natu vm->vioapic = vioapic_init(vm); 4085fcf252fSNeel Natu vm->vhpet = vhpet_init(vm); 4095fcf252fSNeel Natu vm->vatpic = vatpic_init(vm); 4105fcf252fSNeel Natu vm->vatpit = vatpit_init(vm); 411160ef77aSNeel Natu vm->vpmtmr = vpmtmr_init(vm); 4120dafa5cdSNeel Natu if (create) 4130dafa5cdSNeel Natu vm->vrtc = vrtc_init(vm); 4145fcf252fSNeel Natu 4155fcf252fSNeel Natu CPU_ZERO(&vm->active_cpus); 4165fcf252fSNeel Natu 4175fcf252fSNeel Natu vm->suspend = 0; 4185fcf252fSNeel Natu CPU_ZERO(&vm->suspended_cpus); 4195fcf252fSNeel Natu 4205fcf252fSNeel Natu for (i = 0; i < VM_MAXCPU; i++) 4215fcf252fSNeel Natu vcpu_init(vm, i, create); 4225fcf252fSNeel Natu } 4235fcf252fSNeel Natu 424d5408b1dSNeel Natu int 425d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm) 426366f6083SPeter Grehan { 427366f6083SPeter Grehan struct vm *vm; 428318224bbSNeel Natu struct vmspace *vmspace; 429366f6083SPeter Grehan 430d5408b1dSNeel Natu /* 431d5408b1dSNeel Natu * If vmm.ko could not be successfully initialized then don't attempt 432d5408b1dSNeel Natu * to create the virtual machine. 433d5408b1dSNeel Natu */ 434d5408b1dSNeel Natu if (!vmm_initialized) 435d5408b1dSNeel Natu return (ENXIO); 436d5408b1dSNeel Natu 437366f6083SPeter Grehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 438d5408b1dSNeel Natu return (EINVAL); 439366f6083SPeter Grehan 440526c8885SPeter Grehan vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS); 441318224bbSNeel Natu if (vmspace == NULL) 442318224bbSNeel Natu return (ENOMEM); 443318224bbSNeel Natu 444366f6083SPeter Grehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 445366f6083SPeter Grehan strcpy(vm->name, name); 44688c4b8d1SNeel Natu vm->vmspace = vmspace; 4475b8a8cd1SNeel Natu mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 448366f6083SPeter Grehan 4495fcf252fSNeel Natu vm_init(vm, true); 450366f6083SPeter Grehan 451d5408b1dSNeel Natu *retvm = vm; 452d5408b1dSNeel Natu return (0); 453366f6083SPeter Grehan } 454366f6083SPeter Grehan 455f7d51510SNeel Natu static void 4565fcf252fSNeel Natu vm_cleanup(struct vm *vm, bool destroy) 457366f6083SPeter Grehan { 4589b1aa8d6SNeel Natu struct mem_map *mm; 459366f6083SPeter Grehan int i; 460366f6083SPeter Grehan 461366f6083SPeter Grehan ppt_unassign_all(vm); 462366f6083SPeter Grehan 463318224bbSNeel Natu if (vm->iommu != NULL) 464318224bbSNeel Natu iommu_destroy_domain(vm->iommu); 465318224bbSNeel Natu 4660dafa5cdSNeel Natu if (destroy) 4670dafa5cdSNeel Natu vrtc_cleanup(vm->vrtc); 4680dafa5cdSNeel Natu else 4690dafa5cdSNeel Natu vrtc_reset(vm->vrtc); 470160ef77aSNeel Natu vpmtmr_cleanup(vm->vpmtmr); 471e883c9bbSTycho Nightingale vatpit_cleanup(vm->vatpit); 47208e3ff32SNeel Natu vhpet_cleanup(vm->vhpet); 473762fd208STycho Nightingale vatpic_cleanup(vm->vatpic); 47408e3ff32SNeel Natu vioapic_cleanup(vm->vioapic); 47508e3ff32SNeel Natu 4765fcf252fSNeel Natu for (i = 0; i < VM_MAXCPU; i++) 4775fcf252fSNeel Natu vcpu_cleanup(vm, i, destroy); 4785fcf252fSNeel Natu 4795fcf252fSNeel Natu VMCLEANUP(vm->cookie); 4805fcf252fSNeel Natu 4819b1aa8d6SNeel Natu /* 4829b1aa8d6SNeel Natu * System memory is removed from the guest address space only when 4839b1aa8d6SNeel Natu * the VM is destroyed. This is because the mapping remains the same 4849b1aa8d6SNeel Natu * across VM reset. 4859b1aa8d6SNeel Natu * 4869b1aa8d6SNeel Natu * Device memory can be relocated by the guest (e.g. using PCI BARs) 4879b1aa8d6SNeel Natu * so those mappings are removed on a VM reset. 4889b1aa8d6SNeel Natu */ 4899b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 4909b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 4919b1aa8d6SNeel Natu if (destroy || !sysmem_mapping(vm, mm)) 4929b1aa8d6SNeel Natu vm_free_memmap(vm, i); 4939b1aa8d6SNeel Natu } 494f7d51510SNeel Natu 4959b1aa8d6SNeel Natu if (destroy) { 4969b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMSEGS; i++) 4979b1aa8d6SNeel Natu vm_free_memseg(vm, i); 498366f6083SPeter Grehan 499318224bbSNeel Natu VMSPACE_FREE(vm->vmspace); 5005fcf252fSNeel Natu vm->vmspace = NULL; 5015fcf252fSNeel Natu } 5025fcf252fSNeel Natu } 503366f6083SPeter Grehan 5045fcf252fSNeel Natu void 5055fcf252fSNeel Natu vm_destroy(struct vm *vm) 5065fcf252fSNeel Natu { 5075fcf252fSNeel Natu vm_cleanup(vm, true); 508366f6083SPeter Grehan free(vm, M_VM); 509366f6083SPeter Grehan } 510366f6083SPeter Grehan 5115fcf252fSNeel Natu int 5125fcf252fSNeel Natu vm_reinit(struct vm *vm) 5135fcf252fSNeel Natu { 5145fcf252fSNeel Natu int error; 5155fcf252fSNeel Natu 5165fcf252fSNeel Natu /* 5175fcf252fSNeel Natu * A virtual machine can be reset only if all vcpus are suspended. 5185fcf252fSNeel Natu */ 5195fcf252fSNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 5205fcf252fSNeel Natu vm_cleanup(vm, false); 5215fcf252fSNeel Natu vm_init(vm, false); 5225fcf252fSNeel Natu error = 0; 5235fcf252fSNeel Natu } else { 5245fcf252fSNeel Natu error = EBUSY; 5255fcf252fSNeel Natu } 5265fcf252fSNeel Natu 5275fcf252fSNeel Natu return (error); 5285fcf252fSNeel Natu } 5295fcf252fSNeel Natu 530366f6083SPeter Grehan const char * 531366f6083SPeter Grehan vm_name(struct vm *vm) 532366f6083SPeter Grehan { 533366f6083SPeter Grehan return (vm->name); 534366f6083SPeter Grehan } 535366f6083SPeter Grehan 536366f6083SPeter Grehan int 537366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 538366f6083SPeter Grehan { 539318224bbSNeel Natu vm_object_t obj; 540366f6083SPeter Grehan 541318224bbSNeel Natu if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 542318224bbSNeel Natu return (ENOMEM); 543318224bbSNeel Natu else 544318224bbSNeel Natu return (0); 545366f6083SPeter Grehan } 546366f6083SPeter Grehan 547366f6083SPeter Grehan int 548366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 549366f6083SPeter Grehan { 550366f6083SPeter Grehan 551318224bbSNeel Natu vmm_mmio_free(vm->vmspace, gpa, len); 552318224bbSNeel Natu return (0); 553366f6083SPeter Grehan } 554366f6083SPeter Grehan 5559b1aa8d6SNeel Natu /* 5569b1aa8d6SNeel Natu * Return 'true' if 'gpa' is allocated in the guest address space. 5579b1aa8d6SNeel Natu * 5589b1aa8d6SNeel Natu * This function is called in the context of a running vcpu which acts as 5599b1aa8d6SNeel Natu * an implicit lock on 'vm->mem_maps[]'. 5609b1aa8d6SNeel Natu */ 5619b1aa8d6SNeel Natu bool 5629b1aa8d6SNeel Natu vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa) 563366f6083SPeter Grehan { 5649b1aa8d6SNeel Natu struct mem_map *mm; 565341f19c9SNeel Natu int i; 566341f19c9SNeel Natu 5679b1aa8d6SNeel Natu #ifdef INVARIANTS 5689b1aa8d6SNeel Natu int hostcpu, state; 5699b1aa8d6SNeel Natu state = vcpu_get_state(vm, vcpuid, &hostcpu); 5709b1aa8d6SNeel Natu KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 5719b1aa8d6SNeel Natu ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 5729b1aa8d6SNeel Natu #endif 5739b1aa8d6SNeel Natu 5749b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 5759b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 5769b1aa8d6SNeel Natu if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 5779b1aa8d6SNeel Natu return (true); /* 'gpa' is sysmem or devmem */ 578341f19c9SNeel Natu } 579341f19c9SNeel Natu 580318224bbSNeel Natu if (ppt_is_mmio(vm, gpa)) 5819b1aa8d6SNeel Natu return (true); /* 'gpa' is pci passthru mmio */ 582318224bbSNeel Natu 5839b1aa8d6SNeel Natu return (false); 584341f19c9SNeel Natu } 585341f19c9SNeel Natu 586341f19c9SNeel Natu int 5879b1aa8d6SNeel Natu vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 588341f19c9SNeel Natu { 589318224bbSNeel Natu struct mem_seg *seg; 5909b1aa8d6SNeel Natu vm_object_t obj; 591366f6083SPeter Grehan 5929b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 593341f19c9SNeel Natu return (EINVAL); 594341f19c9SNeel Natu 5959b1aa8d6SNeel Natu if (len == 0 || (len & PAGE_MASK)) 5969b1aa8d6SNeel Natu return (EINVAL); 597341f19c9SNeel Natu 5989b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 5999b1aa8d6SNeel Natu if (seg->object != NULL) { 6009b1aa8d6SNeel Natu if (seg->len == len && seg->sysmem == sysmem) 6019b1aa8d6SNeel Natu return (EEXIST); 6029b1aa8d6SNeel Natu else 6039b1aa8d6SNeel Natu return (EINVAL); 604341f19c9SNeel Natu } 605341f19c9SNeel Natu 6069b1aa8d6SNeel Natu obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 6079b1aa8d6SNeel Natu if (obj == NULL) 608318224bbSNeel Natu return (ENOMEM); 609318224bbSNeel Natu 610318224bbSNeel Natu seg->len = len; 6119b1aa8d6SNeel Natu seg->object = obj; 6129b1aa8d6SNeel Natu seg->sysmem = sysmem; 613366f6083SPeter Grehan return (0); 614366f6083SPeter Grehan } 615366f6083SPeter Grehan 6169b1aa8d6SNeel Natu int 6179b1aa8d6SNeel Natu vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 6189b1aa8d6SNeel Natu vm_object_t *objptr) 619477867a0SNeel Natu { 6209b1aa8d6SNeel Natu struct mem_seg *seg; 621477867a0SNeel Natu 6229b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 6239b1aa8d6SNeel Natu return (EINVAL); 6249b1aa8d6SNeel Natu 6259b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 6269b1aa8d6SNeel Natu if (len) 6279b1aa8d6SNeel Natu *len = seg->len; 6289b1aa8d6SNeel Natu if (sysmem) 6299b1aa8d6SNeel Natu *sysmem = seg->sysmem; 6309b1aa8d6SNeel Natu if (objptr) 6319b1aa8d6SNeel Natu *objptr = seg->object; 6329b1aa8d6SNeel Natu return (0); 633477867a0SNeel Natu } 6349b1aa8d6SNeel Natu 6359b1aa8d6SNeel Natu void 6369b1aa8d6SNeel Natu vm_free_memseg(struct vm *vm, int ident) 6379b1aa8d6SNeel Natu { 6389b1aa8d6SNeel Natu struct mem_seg *seg; 6399b1aa8d6SNeel Natu 6409b1aa8d6SNeel Natu KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 6419b1aa8d6SNeel Natu ("%s: invalid memseg ident %d", __func__, ident)); 6429b1aa8d6SNeel Natu 6439b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 6449b1aa8d6SNeel Natu if (seg->object != NULL) { 6459b1aa8d6SNeel Natu vm_object_deallocate(seg->object); 6469b1aa8d6SNeel Natu bzero(seg, sizeof(struct mem_seg)); 6479b1aa8d6SNeel Natu } 6489b1aa8d6SNeel Natu } 6499b1aa8d6SNeel Natu 6509b1aa8d6SNeel Natu int 6519b1aa8d6SNeel Natu vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 6529b1aa8d6SNeel Natu size_t len, int prot, int flags) 6539b1aa8d6SNeel Natu { 6549b1aa8d6SNeel Natu struct mem_seg *seg; 6559b1aa8d6SNeel Natu struct mem_map *m, *map; 6569b1aa8d6SNeel Natu vm_ooffset_t last; 6579b1aa8d6SNeel Natu int i, error; 6589b1aa8d6SNeel Natu 6599b1aa8d6SNeel Natu if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 6609b1aa8d6SNeel Natu return (EINVAL); 6619b1aa8d6SNeel Natu 6629b1aa8d6SNeel Natu if (flags & ~VM_MEMMAP_F_WIRED) 6639b1aa8d6SNeel Natu return (EINVAL); 6649b1aa8d6SNeel Natu 6659b1aa8d6SNeel Natu if (segid < 0 || segid >= VM_MAX_MEMSEGS) 6669b1aa8d6SNeel Natu return (EINVAL); 6679b1aa8d6SNeel Natu 6689b1aa8d6SNeel Natu seg = &vm->mem_segs[segid]; 6699b1aa8d6SNeel Natu if (seg->object == NULL) 6709b1aa8d6SNeel Natu return (EINVAL); 6719b1aa8d6SNeel Natu 6729b1aa8d6SNeel Natu last = first + len; 6739b1aa8d6SNeel Natu if (first < 0 || first >= last || last > seg->len) 6749b1aa8d6SNeel Natu return (EINVAL); 6759b1aa8d6SNeel Natu 6769b1aa8d6SNeel Natu if ((gpa | first | last) & PAGE_MASK) 6779b1aa8d6SNeel Natu return (EINVAL); 6789b1aa8d6SNeel Natu 6799b1aa8d6SNeel Natu map = NULL; 6809b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 6819b1aa8d6SNeel Natu m = &vm->mem_maps[i]; 6829b1aa8d6SNeel Natu if (m->len == 0) { 6839b1aa8d6SNeel Natu map = m; 6849b1aa8d6SNeel Natu break; 6859b1aa8d6SNeel Natu } 6869b1aa8d6SNeel Natu } 6879b1aa8d6SNeel Natu 6889b1aa8d6SNeel Natu if (map == NULL) 6899b1aa8d6SNeel Natu return (ENOSPC); 6909b1aa8d6SNeel Natu 6919b1aa8d6SNeel Natu error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 6929b1aa8d6SNeel Natu len, 0, VMFS_NO_SPACE, prot, prot, 0); 6939b1aa8d6SNeel Natu if (error != KERN_SUCCESS) 6949b1aa8d6SNeel Natu return (EFAULT); 6959b1aa8d6SNeel Natu 6969b1aa8d6SNeel Natu vm_object_reference(seg->object); 6979b1aa8d6SNeel Natu 6989b1aa8d6SNeel Natu if (flags & VM_MEMMAP_F_WIRED) { 6999b1aa8d6SNeel Natu error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 7009b1aa8d6SNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 7019b1aa8d6SNeel Natu if (error != KERN_SUCCESS) { 7029b1aa8d6SNeel Natu vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 7039b1aa8d6SNeel Natu return (EFAULT); 7049b1aa8d6SNeel Natu } 7059b1aa8d6SNeel Natu } 7069b1aa8d6SNeel Natu 7079b1aa8d6SNeel Natu map->gpa = gpa; 7089b1aa8d6SNeel Natu map->len = len; 7099b1aa8d6SNeel Natu map->segoff = first; 7109b1aa8d6SNeel Natu map->segid = segid; 7119b1aa8d6SNeel Natu map->prot = prot; 7129b1aa8d6SNeel Natu map->flags = flags; 7139b1aa8d6SNeel Natu return (0); 7149b1aa8d6SNeel Natu } 7159b1aa8d6SNeel Natu 7169b1aa8d6SNeel Natu int 7179b1aa8d6SNeel Natu vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 7189b1aa8d6SNeel Natu vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 7199b1aa8d6SNeel Natu { 7209b1aa8d6SNeel Natu struct mem_map *mm, *mmnext; 7219b1aa8d6SNeel Natu int i; 7229b1aa8d6SNeel Natu 7239b1aa8d6SNeel Natu mmnext = NULL; 7249b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 7259b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 7269b1aa8d6SNeel Natu if (mm->len == 0 || mm->gpa < *gpa) 7279b1aa8d6SNeel Natu continue; 7289b1aa8d6SNeel Natu if (mmnext == NULL || mm->gpa < mmnext->gpa) 7299b1aa8d6SNeel Natu mmnext = mm; 7309b1aa8d6SNeel Natu } 7319b1aa8d6SNeel Natu 7329b1aa8d6SNeel Natu if (mmnext != NULL) { 7339b1aa8d6SNeel Natu *gpa = mmnext->gpa; 7349b1aa8d6SNeel Natu if (segid) 7359b1aa8d6SNeel Natu *segid = mmnext->segid; 7369b1aa8d6SNeel Natu if (segoff) 7379b1aa8d6SNeel Natu *segoff = mmnext->segoff; 7389b1aa8d6SNeel Natu if (len) 7399b1aa8d6SNeel Natu *len = mmnext->len; 7409b1aa8d6SNeel Natu if (prot) 7419b1aa8d6SNeel Natu *prot = mmnext->prot; 7429b1aa8d6SNeel Natu if (flags) 7439b1aa8d6SNeel Natu *flags = mmnext->flags; 7449b1aa8d6SNeel Natu return (0); 7459b1aa8d6SNeel Natu } else { 7469b1aa8d6SNeel Natu return (ENOENT); 7479b1aa8d6SNeel Natu } 748477867a0SNeel Natu } 749477867a0SNeel Natu 750318224bbSNeel Natu static void 7519b1aa8d6SNeel Natu vm_free_memmap(struct vm *vm, int ident) 752366f6083SPeter Grehan { 7539b1aa8d6SNeel Natu struct mem_map *mm; 7549b1aa8d6SNeel Natu int error; 7554db4fb2cSNeel Natu 7569b1aa8d6SNeel Natu mm = &vm->mem_maps[ident]; 7579b1aa8d6SNeel Natu if (mm->len) { 7589b1aa8d6SNeel Natu error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 7599b1aa8d6SNeel Natu mm->gpa + mm->len); 7609b1aa8d6SNeel Natu KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 7619b1aa8d6SNeel Natu __func__, error)); 7629b1aa8d6SNeel Natu bzero(mm, sizeof(struct mem_map)); 763318224bbSNeel Natu } 764318224bbSNeel Natu } 765318224bbSNeel Natu 7669b1aa8d6SNeel Natu static __inline bool 7679b1aa8d6SNeel Natu sysmem_mapping(struct vm *vm, struct mem_map *mm) 768318224bbSNeel Natu { 769318224bbSNeel Natu 7709b1aa8d6SNeel Natu if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 7719b1aa8d6SNeel Natu return (true); 7729b1aa8d6SNeel Natu else 7739b1aa8d6SNeel Natu return (false); 774318224bbSNeel Natu } 775318224bbSNeel Natu 7769b1aa8d6SNeel Natu static vm_paddr_t 7779b1aa8d6SNeel Natu sysmem_maxaddr(struct vm *vm) 7789b1aa8d6SNeel Natu { 7799b1aa8d6SNeel Natu struct mem_map *mm; 7809b1aa8d6SNeel Natu vm_paddr_t maxaddr; 7819b1aa8d6SNeel Natu int i; 782318224bbSNeel Natu 7839b1aa8d6SNeel Natu maxaddr = 0; 7849b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 7859b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 7869b1aa8d6SNeel Natu if (sysmem_mapping(vm, mm)) { 7879b1aa8d6SNeel Natu if (maxaddr < mm->gpa + mm->len) 7889b1aa8d6SNeel Natu maxaddr = mm->gpa + mm->len; 7899b1aa8d6SNeel Natu } 7909b1aa8d6SNeel Natu } 7919b1aa8d6SNeel Natu return (maxaddr); 792318224bbSNeel Natu } 793318224bbSNeel Natu 794318224bbSNeel Natu static void 795318224bbSNeel Natu vm_iommu_modify(struct vm *vm, boolean_t map) 796318224bbSNeel Natu { 797318224bbSNeel Natu int i, sz; 798318224bbSNeel Natu vm_paddr_t gpa, hpa; 7999b1aa8d6SNeel Natu struct mem_map *mm; 800318224bbSNeel Natu void *vp, *cookie, *host_domain; 801318224bbSNeel Natu 802318224bbSNeel Natu sz = PAGE_SIZE; 803318224bbSNeel Natu host_domain = iommu_host_domain(); 804318224bbSNeel Natu 8059b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 8069b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 8079b1aa8d6SNeel Natu if (!sysmem_mapping(vm, mm)) 8089b1aa8d6SNeel Natu continue; 809318224bbSNeel Natu 8109b1aa8d6SNeel Natu if (map) { 8119b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, 8129b1aa8d6SNeel Natu ("iommu map found invalid memmap %#lx/%#lx/%#x", 8139b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 8149b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) 8159b1aa8d6SNeel Natu continue; 8169b1aa8d6SNeel Natu mm->flags |= VM_MEMMAP_F_IOMMU; 8179b1aa8d6SNeel Natu } else { 8189b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0) 8199b1aa8d6SNeel Natu continue; 8209b1aa8d6SNeel Natu mm->flags &= ~VM_MEMMAP_F_IOMMU; 8219b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, 8229b1aa8d6SNeel Natu ("iommu unmap found invalid memmap %#lx/%#lx/%#x", 8239b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 8249b1aa8d6SNeel Natu } 8259b1aa8d6SNeel Natu 8269b1aa8d6SNeel Natu gpa = mm->gpa; 8279b1aa8d6SNeel Natu while (gpa < mm->gpa + mm->len) { 8289b1aa8d6SNeel Natu vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE, 829318224bbSNeel Natu &cookie); 830318224bbSNeel Natu KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 831318224bbSNeel Natu vm_name(vm), gpa)); 832318224bbSNeel Natu 833318224bbSNeel Natu vm_gpa_release(cookie); 834318224bbSNeel Natu 835318224bbSNeel Natu hpa = DMAP_TO_PHYS((uintptr_t)vp); 836318224bbSNeel Natu if (map) { 837318224bbSNeel Natu iommu_create_mapping(vm->iommu, gpa, hpa, sz); 838318224bbSNeel Natu iommu_remove_mapping(host_domain, hpa, sz); 839318224bbSNeel Natu } else { 840318224bbSNeel Natu iommu_remove_mapping(vm->iommu, gpa, sz); 841318224bbSNeel Natu iommu_create_mapping(host_domain, hpa, hpa, sz); 842318224bbSNeel Natu } 843318224bbSNeel Natu 844318224bbSNeel Natu gpa += PAGE_SIZE; 845318224bbSNeel Natu } 846318224bbSNeel Natu } 847318224bbSNeel Natu 848318224bbSNeel Natu /* 849318224bbSNeel Natu * Invalidate the cached translations associated with the domain 850318224bbSNeel Natu * from which pages were removed. 851318224bbSNeel Natu */ 852318224bbSNeel Natu if (map) 853318224bbSNeel Natu iommu_invalidate_tlb(host_domain); 854318224bbSNeel Natu else 855318224bbSNeel Natu iommu_invalidate_tlb(vm->iommu); 856318224bbSNeel Natu } 857318224bbSNeel Natu 858318224bbSNeel Natu #define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 859318224bbSNeel Natu #define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 860318224bbSNeel Natu 861318224bbSNeel Natu int 862318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 863318224bbSNeel Natu { 864318224bbSNeel Natu int error; 865318224bbSNeel Natu 866318224bbSNeel Natu error = ppt_unassign_device(vm, bus, slot, func); 867318224bbSNeel Natu if (error) 868318224bbSNeel Natu return (error); 869318224bbSNeel Natu 8709b1aa8d6SNeel Natu if (ppt_assigned_devices(vm) == 0) 871318224bbSNeel Natu vm_iommu_unmap(vm); 8729b1aa8d6SNeel Natu 873318224bbSNeel Natu return (0); 874318224bbSNeel Natu } 875318224bbSNeel Natu 876318224bbSNeel Natu int 877318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 878318224bbSNeel Natu { 879318224bbSNeel Natu int error; 880318224bbSNeel Natu vm_paddr_t maxaddr; 881318224bbSNeel Natu 8829b1aa8d6SNeel Natu /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */ 88351f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 884318224bbSNeel Natu KASSERT(vm->iommu == NULL, 885318224bbSNeel Natu ("vm_assign_pptdev: iommu must be NULL")); 8869b1aa8d6SNeel Natu maxaddr = sysmem_maxaddr(vm); 887318224bbSNeel Natu vm->iommu = iommu_create_domain(maxaddr); 888ffe1b10dSJohn Baldwin if (vm->iommu == NULL) 889ffe1b10dSJohn Baldwin return (ENXIO); 890318224bbSNeel Natu vm_iommu_map(vm); 891318224bbSNeel Natu } 892318224bbSNeel Natu 893318224bbSNeel Natu error = ppt_assign_device(vm, bus, slot, func); 894318224bbSNeel Natu return (error); 895318224bbSNeel Natu } 896318224bbSNeel Natu 897318224bbSNeel Natu void * 8989b1aa8d6SNeel Natu vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot, 899318224bbSNeel Natu void **cookie) 900318224bbSNeel Natu { 9019b1aa8d6SNeel Natu int i, count, pageoff; 9029b1aa8d6SNeel Natu struct mem_map *mm; 903318224bbSNeel Natu vm_page_t m; 9049b1aa8d6SNeel Natu #ifdef INVARIANTS 9059b1aa8d6SNeel Natu /* 9069b1aa8d6SNeel Natu * All vcpus are frozen by ioctls that modify the memory map 9079b1aa8d6SNeel Natu * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is 9089b1aa8d6SNeel Natu * guaranteed if at least one vcpu is in the VCPU_FROZEN state. 9099b1aa8d6SNeel Natu */ 9109b1aa8d6SNeel Natu int state; 9112de70600SJohn Baldwin KASSERT(vcpuid >= -1 && vcpuid < VM_MAXCPU, ("%s: invalid vcpuid %d", 9129b1aa8d6SNeel Natu __func__, vcpuid)); 9139b1aa8d6SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 9149b1aa8d6SNeel Natu if (vcpuid != -1 && vcpuid != i) 9159b1aa8d6SNeel Natu continue; 9169b1aa8d6SNeel Natu state = vcpu_get_state(vm, i, NULL); 9179b1aa8d6SNeel Natu KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 9189b1aa8d6SNeel Natu __func__, state)); 9199b1aa8d6SNeel Natu } 9209b1aa8d6SNeel Natu #endif 921318224bbSNeel Natu pageoff = gpa & PAGE_MASK; 922318224bbSNeel Natu if (len > PAGE_SIZE - pageoff) 923318224bbSNeel Natu panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 924318224bbSNeel Natu 9259b1aa8d6SNeel Natu count = 0; 9269b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 9279b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 9289b1aa8d6SNeel Natu if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 9299b1aa8d6SNeel Natu gpa < mm->gpa + mm->len) { 930318224bbSNeel Natu count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 931318224bbSNeel Natu trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 9329b1aa8d6SNeel Natu break; 9339b1aa8d6SNeel Natu } 9349b1aa8d6SNeel Natu } 935318224bbSNeel Natu 936318224bbSNeel Natu if (count == 1) { 937318224bbSNeel Natu *cookie = m; 938318224bbSNeel Natu return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 939318224bbSNeel Natu } else { 940318224bbSNeel Natu *cookie = NULL; 941318224bbSNeel Natu return (NULL); 942318224bbSNeel Natu } 943318224bbSNeel Natu } 944318224bbSNeel Natu 945318224bbSNeel Natu void 946318224bbSNeel Natu vm_gpa_release(void *cookie) 947318224bbSNeel Natu { 948318224bbSNeel Natu vm_page_t m = cookie; 949318224bbSNeel Natu 950318224bbSNeel Natu vm_page_lock(m); 951318224bbSNeel Natu vm_page_unhold(m); 952318224bbSNeel Natu vm_page_unlock(m); 953366f6083SPeter Grehan } 954366f6083SPeter Grehan 955366f6083SPeter Grehan int 956366f6083SPeter Grehan vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 957366f6083SPeter Grehan { 958366f6083SPeter Grehan 959366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 960366f6083SPeter Grehan return (EINVAL); 961366f6083SPeter Grehan 962366f6083SPeter Grehan if (reg >= VM_REG_LAST) 963366f6083SPeter Grehan return (EINVAL); 964366f6083SPeter Grehan 965366f6083SPeter Grehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 966366f6083SPeter Grehan } 967366f6083SPeter Grehan 968366f6083SPeter Grehan int 969d087a399SNeel Natu vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) 970366f6083SPeter Grehan { 971d087a399SNeel Natu struct vcpu *vcpu; 972d087a399SNeel Natu int error; 973366f6083SPeter Grehan 974d087a399SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 975366f6083SPeter Grehan return (EINVAL); 976366f6083SPeter Grehan 977366f6083SPeter Grehan if (reg >= VM_REG_LAST) 978366f6083SPeter Grehan return (EINVAL); 979366f6083SPeter Grehan 980d087a399SNeel Natu error = VMSETREG(vm->cookie, vcpuid, reg, val); 981d087a399SNeel Natu if (error || reg != VM_REG_GUEST_RIP) 982d087a399SNeel Natu return (error); 983d087a399SNeel Natu 984d087a399SNeel Natu /* Set 'nextrip' to match the value of %rip */ 985d087a399SNeel Natu VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); 986d087a399SNeel Natu vcpu = &vm->vcpu[vcpuid]; 987d087a399SNeel Natu vcpu->nextrip = val; 988d087a399SNeel Natu return (0); 989366f6083SPeter Grehan } 990366f6083SPeter Grehan 991366f6083SPeter Grehan static boolean_t 992366f6083SPeter Grehan is_descriptor_table(int reg) 993366f6083SPeter Grehan { 994366f6083SPeter Grehan 995366f6083SPeter Grehan switch (reg) { 996366f6083SPeter Grehan case VM_REG_GUEST_IDTR: 997366f6083SPeter Grehan case VM_REG_GUEST_GDTR: 998366f6083SPeter Grehan return (TRUE); 999366f6083SPeter Grehan default: 1000366f6083SPeter Grehan return (FALSE); 1001366f6083SPeter Grehan } 1002366f6083SPeter Grehan } 1003366f6083SPeter Grehan 1004366f6083SPeter Grehan static boolean_t 1005366f6083SPeter Grehan is_segment_register(int reg) 1006366f6083SPeter Grehan { 1007366f6083SPeter Grehan 1008366f6083SPeter Grehan switch (reg) { 1009366f6083SPeter Grehan case VM_REG_GUEST_ES: 1010366f6083SPeter Grehan case VM_REG_GUEST_CS: 1011366f6083SPeter Grehan case VM_REG_GUEST_SS: 1012366f6083SPeter Grehan case VM_REG_GUEST_DS: 1013366f6083SPeter Grehan case VM_REG_GUEST_FS: 1014366f6083SPeter Grehan case VM_REG_GUEST_GS: 1015366f6083SPeter Grehan case VM_REG_GUEST_TR: 1016366f6083SPeter Grehan case VM_REG_GUEST_LDTR: 1017366f6083SPeter Grehan return (TRUE); 1018366f6083SPeter Grehan default: 1019366f6083SPeter Grehan return (FALSE); 1020366f6083SPeter Grehan } 1021366f6083SPeter Grehan } 1022366f6083SPeter Grehan 1023366f6083SPeter Grehan int 1024366f6083SPeter Grehan vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 1025366f6083SPeter Grehan struct seg_desc *desc) 1026366f6083SPeter Grehan { 1027366f6083SPeter Grehan 1028366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1029366f6083SPeter Grehan return (EINVAL); 1030366f6083SPeter Grehan 1031366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1032366f6083SPeter Grehan return (EINVAL); 1033366f6083SPeter Grehan 1034366f6083SPeter Grehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 1035366f6083SPeter Grehan } 1036366f6083SPeter Grehan 1037366f6083SPeter Grehan int 1038366f6083SPeter Grehan vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 1039366f6083SPeter Grehan struct seg_desc *desc) 1040366f6083SPeter Grehan { 1041366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1042366f6083SPeter Grehan return (EINVAL); 1043366f6083SPeter Grehan 1044366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1045366f6083SPeter Grehan return (EINVAL); 1046366f6083SPeter Grehan 1047366f6083SPeter Grehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 1048366f6083SPeter Grehan } 1049366f6083SPeter Grehan 1050366f6083SPeter Grehan static void 1051366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu) 1052366f6083SPeter Grehan { 1053366f6083SPeter Grehan 105438f1b189SPeter Grehan /* flush host state to the pcb */ 105538f1b189SPeter Grehan fpuexit(curthread); 1056bd8572e0SNeel Natu 1057bd8572e0SNeel Natu /* restore guest FPU state */ 1058366f6083SPeter Grehan fpu_stop_emulating(); 105938f1b189SPeter Grehan fpurestore(vcpu->guestfpu); 1060bd8572e0SNeel Natu 1061abb023fbSJohn Baldwin /* restore guest XCR0 if XSAVE is enabled in the host */ 1062abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) 1063abb023fbSJohn Baldwin load_xcr(0, vcpu->guest_xcr0); 1064abb023fbSJohn Baldwin 1065bd8572e0SNeel Natu /* 1066bd8572e0SNeel Natu * The FPU is now "dirty" with the guest's state so turn on emulation 1067bd8572e0SNeel Natu * to trap any access to the FPU by the host. 1068bd8572e0SNeel Natu */ 1069bd8572e0SNeel Natu fpu_start_emulating(); 1070366f6083SPeter Grehan } 1071366f6083SPeter Grehan 1072366f6083SPeter Grehan static void 1073366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu) 1074366f6083SPeter Grehan { 1075366f6083SPeter Grehan 1076bd8572e0SNeel Natu if ((rcr0() & CR0_TS) == 0) 1077bd8572e0SNeel Natu panic("fpu emulation not enabled in host!"); 1078bd8572e0SNeel Natu 1079abb023fbSJohn Baldwin /* save guest XCR0 and restore host XCR0 */ 1080abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) { 1081abb023fbSJohn Baldwin vcpu->guest_xcr0 = rxcr(0); 1082abb023fbSJohn Baldwin load_xcr(0, vmm_get_host_xcr0()); 1083abb023fbSJohn Baldwin } 1084abb023fbSJohn Baldwin 1085bd8572e0SNeel Natu /* save guest FPU state */ 1086bd8572e0SNeel Natu fpu_stop_emulating(); 108738f1b189SPeter Grehan fpusave(vcpu->guestfpu); 1088366f6083SPeter Grehan fpu_start_emulating(); 1089366f6083SPeter Grehan } 1090366f6083SPeter Grehan 109161592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 1092f76fc5d4SNeel Natu 1093318224bbSNeel Natu static int 1094248e6799SNeel Natu vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, 1095f80330a8SNeel Natu bool from_idle) 1096366f6083SPeter Grehan { 1097248e6799SNeel Natu struct vcpu *vcpu; 1098318224bbSNeel Natu int error; 1099366f6083SPeter Grehan 1100248e6799SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1101318224bbSNeel Natu vcpu_assert_locked(vcpu); 1102366f6083SPeter Grehan 1103f76fc5d4SNeel Natu /* 1104f80330a8SNeel Natu * State transitions from the vmmdev_ioctl() must always begin from 1105f80330a8SNeel Natu * the VCPU_IDLE state. This guarantees that there is only a single 1106f80330a8SNeel Natu * ioctl() operating on a vcpu at any point. 1107f80330a8SNeel Natu */ 1108f80330a8SNeel Natu if (from_idle) { 1109248e6799SNeel Natu while (vcpu->state != VCPU_IDLE) { 1110248e6799SNeel Natu vcpu->reqidle = 1; 1111248e6799SNeel Natu vcpu_notify_event_locked(vcpu, false); 1112248e6799SNeel Natu VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to " 1113248e6799SNeel Natu "idle requested", vcpu_state2str(vcpu->state)); 1114f80330a8SNeel Natu msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1115248e6799SNeel Natu } 1116f80330a8SNeel Natu } else { 1117f80330a8SNeel Natu KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1118f80330a8SNeel Natu "vcpu idle state")); 1119f80330a8SNeel Natu } 1120f80330a8SNeel Natu 1121ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 1122ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1123ef39d7e9SNeel Natu "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1124ef39d7e9SNeel Natu } else { 1125ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1126ef39d7e9SNeel Natu "vcpu that is not running", vcpu->hostcpu)); 1127ef39d7e9SNeel Natu } 1128ef39d7e9SNeel Natu 1129f80330a8SNeel Natu /* 1130318224bbSNeel Natu * The following state transitions are allowed: 1131318224bbSNeel Natu * IDLE -> FROZEN -> IDLE 1132318224bbSNeel Natu * FROZEN -> RUNNING -> FROZEN 1133318224bbSNeel Natu * FROZEN -> SLEEPING -> FROZEN 1134f76fc5d4SNeel Natu */ 1135318224bbSNeel Natu switch (vcpu->state) { 1136318224bbSNeel Natu case VCPU_IDLE: 1137318224bbSNeel Natu case VCPU_RUNNING: 1138318224bbSNeel Natu case VCPU_SLEEPING: 1139318224bbSNeel Natu error = (newstate != VCPU_FROZEN); 1140318224bbSNeel Natu break; 1141318224bbSNeel Natu case VCPU_FROZEN: 1142318224bbSNeel Natu error = (newstate == VCPU_FROZEN); 1143318224bbSNeel Natu break; 1144318224bbSNeel Natu default: 1145318224bbSNeel Natu error = 1; 1146318224bbSNeel Natu break; 1147318224bbSNeel Natu } 1148318224bbSNeel Natu 1149f80330a8SNeel Natu if (error) 1150f80330a8SNeel Natu return (EBUSY); 1151318224bbSNeel Natu 1152248e6799SNeel Natu VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s", 1153248e6799SNeel Natu vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); 1154248e6799SNeel Natu 1155f80330a8SNeel Natu vcpu->state = newstate; 1156ef39d7e9SNeel Natu if (newstate == VCPU_RUNNING) 1157ef39d7e9SNeel Natu vcpu->hostcpu = curcpu; 1158ef39d7e9SNeel Natu else 1159ef39d7e9SNeel Natu vcpu->hostcpu = NOCPU; 1160ef39d7e9SNeel Natu 1161f80330a8SNeel Natu if (newstate == VCPU_IDLE) 1162f80330a8SNeel Natu wakeup(&vcpu->state); 1163f80330a8SNeel Natu 1164f80330a8SNeel Natu return (0); 1165318224bbSNeel Natu } 1166318224bbSNeel Natu 1167318224bbSNeel Natu static void 1168318224bbSNeel Natu vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1169318224bbSNeel Natu { 1170318224bbSNeel Natu int error; 1171318224bbSNeel Natu 1172f80330a8SNeel Natu if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) 1173318224bbSNeel Natu panic("Error %d setting state to %d\n", error, newstate); 1174318224bbSNeel Natu } 1175318224bbSNeel Natu 1176318224bbSNeel Natu static void 1177248e6799SNeel Natu vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1178318224bbSNeel Natu { 1179318224bbSNeel Natu int error; 1180318224bbSNeel Natu 1181248e6799SNeel Natu if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0) 1182318224bbSNeel Natu panic("Error %d setting state to %d", error, newstate); 1183318224bbSNeel Natu } 1184318224bbSNeel Natu 11855b8a8cd1SNeel Natu static void 11865b8a8cd1SNeel Natu vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) 11875b8a8cd1SNeel Natu { 11885b8a8cd1SNeel Natu 11895b8a8cd1SNeel Natu KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); 11905b8a8cd1SNeel Natu 11915b8a8cd1SNeel Natu /* 11925b8a8cd1SNeel Natu * Update 'rendezvous_func' and execute a write memory barrier to 11935b8a8cd1SNeel Natu * ensure that it is visible across all host cpus. This is not needed 11945b8a8cd1SNeel Natu * for correctness but it does ensure that all the vcpus will notice 11955b8a8cd1SNeel Natu * that the rendezvous is requested immediately. 11965b8a8cd1SNeel Natu */ 11975b8a8cd1SNeel Natu vm->rendezvous_func = func; 11985b8a8cd1SNeel Natu wmb(); 11995b8a8cd1SNeel Natu } 12005b8a8cd1SNeel Natu 12015b8a8cd1SNeel Natu #define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ 12025b8a8cd1SNeel Natu do { \ 12035b8a8cd1SNeel Natu if (vcpuid >= 0) \ 12045b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, fmt); \ 12055b8a8cd1SNeel Natu else \ 12065b8a8cd1SNeel Natu VM_CTR0(vm, fmt); \ 12075b8a8cd1SNeel Natu } while (0) 12085b8a8cd1SNeel Natu 12095b8a8cd1SNeel Natu static void 12105b8a8cd1SNeel Natu vm_handle_rendezvous(struct vm *vm, int vcpuid) 12115b8a8cd1SNeel Natu { 12125b8a8cd1SNeel Natu 12135b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 12145b8a8cd1SNeel Natu ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); 12155b8a8cd1SNeel Natu 12165b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 12175b8a8cd1SNeel Natu while (vm->rendezvous_func != NULL) { 121822d822c6SNeel Natu /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 121922d822c6SNeel Natu CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); 122022d822c6SNeel Natu 12215b8a8cd1SNeel Natu if (vcpuid != -1 && 122222d822c6SNeel Natu CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 122322d822c6SNeel Natu !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 12245b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); 12255b8a8cd1SNeel Natu (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); 12265b8a8cd1SNeel Natu CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 12275b8a8cd1SNeel Natu } 12285b8a8cd1SNeel Natu if (CPU_CMP(&vm->rendezvous_req_cpus, 12295b8a8cd1SNeel Natu &vm->rendezvous_done_cpus) == 0) { 12305b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); 12315b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, NULL); 12325b8a8cd1SNeel Natu wakeup(&vm->rendezvous_func); 12335b8a8cd1SNeel Natu break; 12345b8a8cd1SNeel Natu } 12355b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); 12365b8a8cd1SNeel Natu mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 12375b8a8cd1SNeel Natu "vmrndv", 0); 12385b8a8cd1SNeel Natu } 12395b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 12405b8a8cd1SNeel Natu } 12415b8a8cd1SNeel Natu 1242318224bbSNeel Natu /* 1243318224bbSNeel Natu * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1244318224bbSNeel Natu */ 1245318224bbSNeel Natu static int 1246becd9849SNeel Natu vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) 1247318224bbSNeel Natu { 1248318224bbSNeel Natu struct vcpu *vcpu; 1249c6a0cc2eSNeel Natu const char *wmesg; 12502ce12423SNeel Natu int t, vcpu_halted, vm_halted; 1251e50ce2aaSNeel Natu 1252e50ce2aaSNeel Natu KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 1253318224bbSNeel Natu 1254318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1255e50ce2aaSNeel Natu vcpu_halted = 0; 1256e50ce2aaSNeel Natu vm_halted = 0; 1257318224bbSNeel Natu 1258f76fc5d4SNeel Natu vcpu_lock(vcpu); 1259c6a0cc2eSNeel Natu while (1) { 1260f76fc5d4SNeel Natu /* 1261f76fc5d4SNeel Natu * Do a final check for pending NMI or interrupts before 1262c6a0cc2eSNeel Natu * really putting this thread to sleep. Also check for 1263c6a0cc2eSNeel Natu * software events that would cause this vcpu to wakeup. 1264f76fc5d4SNeel Natu * 1265c6a0cc2eSNeel Natu * These interrupts/events could have happened after the 1266c6a0cc2eSNeel Natu * vcpu returned from VMRUN() and before it acquired the 1267c6a0cc2eSNeel Natu * vcpu lock above. 1268f76fc5d4SNeel Natu */ 1269248e6799SNeel Natu if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) 1270c6a0cc2eSNeel Natu break; 1271c6a0cc2eSNeel Natu if (vm_nmi_pending(vm, vcpuid)) 1272c6a0cc2eSNeel Natu break; 1273c6a0cc2eSNeel Natu if (!intr_disabled) { 1274c6a0cc2eSNeel Natu if (vm_extint_pending(vm, vcpuid) || 1275c6a0cc2eSNeel Natu vlapic_pending_intr(vcpu->vlapic, NULL)) { 1276c6a0cc2eSNeel Natu break; 1277c6a0cc2eSNeel Natu } 1278c6a0cc2eSNeel Natu } 1279c6a0cc2eSNeel Natu 1280f008d157SNeel Natu /* Don't go to sleep if the vcpu thread needs to yield */ 1281f008d157SNeel Natu if (vcpu_should_yield(vm, vcpuid)) 1282f008d157SNeel Natu break; 1283f008d157SNeel Natu 1284e50ce2aaSNeel Natu /* 1285e50ce2aaSNeel Natu * Some Linux guests implement "halt" by having all vcpus 1286e50ce2aaSNeel Natu * execute HLT with interrupts disabled. 'halted_cpus' keeps 1287e50ce2aaSNeel Natu * track of the vcpus that have entered this state. When all 1288e50ce2aaSNeel Natu * vcpus enter the halted state the virtual machine is halted. 1289e50ce2aaSNeel Natu */ 1290e50ce2aaSNeel Natu if (intr_disabled) { 1291c6a0cc2eSNeel Natu wmesg = "vmhalt"; 1292e50ce2aaSNeel Natu VCPU_CTR0(vm, vcpuid, "Halted"); 1293055fc2cbSNeel Natu if (!vcpu_halted && halt_detection_enabled) { 1294e50ce2aaSNeel Natu vcpu_halted = 1; 1295e50ce2aaSNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1296e50ce2aaSNeel Natu } 1297e50ce2aaSNeel Natu if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1298e50ce2aaSNeel Natu vm_halted = 1; 1299e50ce2aaSNeel Natu break; 1300e50ce2aaSNeel Natu } 1301e50ce2aaSNeel Natu } else { 1302e50ce2aaSNeel Natu wmesg = "vmidle"; 1303e50ce2aaSNeel Natu } 1304c6a0cc2eSNeel Natu 1305f76fc5d4SNeel Natu t = ticks; 1306248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); 1307f008d157SNeel Natu /* 1308f008d157SNeel Natu * XXX msleep_spin() cannot be interrupted by signals so 1309f008d157SNeel Natu * wake up periodically to check pending signals. 1310f008d157SNeel Natu */ 1311f008d157SNeel Natu msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); 1312248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); 1313f76fc5d4SNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 1314f76fc5d4SNeel Natu } 1315e50ce2aaSNeel Natu 1316e50ce2aaSNeel Natu if (vcpu_halted) 1317e50ce2aaSNeel Natu CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1318e50ce2aaSNeel Natu 1319f76fc5d4SNeel Natu vcpu_unlock(vcpu); 1320f76fc5d4SNeel Natu 1321e50ce2aaSNeel Natu if (vm_halted) 1322e50ce2aaSNeel Natu vm_suspend(vm, VM_SUSPEND_HALT); 1323e50ce2aaSNeel Natu 1324318224bbSNeel Natu return (0); 1325318224bbSNeel Natu } 1326318224bbSNeel Natu 1327318224bbSNeel Natu static int 1328becd9849SNeel Natu vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) 1329318224bbSNeel Natu { 1330318224bbSNeel Natu int rv, ftype; 1331318224bbSNeel Natu struct vm_map *map; 1332318224bbSNeel Natu struct vcpu *vcpu; 1333318224bbSNeel Natu struct vm_exit *vme; 1334318224bbSNeel Natu 1335318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1336318224bbSNeel Natu vme = &vcpu->exitinfo; 1337318224bbSNeel Natu 1338d087a399SNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 1339d087a399SNeel Natu __func__, vme->inst_length)); 1340d087a399SNeel Natu 1341318224bbSNeel Natu ftype = vme->u.paging.fault_type; 1342318224bbSNeel Natu KASSERT(ftype == VM_PROT_READ || 1343318224bbSNeel Natu ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1344318224bbSNeel Natu ("vm_handle_paging: invalid fault_type %d", ftype)); 1345318224bbSNeel Natu 1346318224bbSNeel Natu if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1347318224bbSNeel Natu rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1348318224bbSNeel Natu vme->u.paging.gpa, ftype); 13499d8d8e3eSNeel Natu if (rv == 0) { 13509d8d8e3eSNeel Natu VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", 13519d8d8e3eSNeel Natu ftype == VM_PROT_READ ? "accessed" : "dirty", 13529d8d8e3eSNeel Natu vme->u.paging.gpa); 1353318224bbSNeel Natu goto done; 1354318224bbSNeel Natu } 13559d8d8e3eSNeel Natu } 1356318224bbSNeel Natu 1357318224bbSNeel Natu map = &vm->vmspace->vm_map; 1358318224bbSNeel Natu rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 1359318224bbSNeel Natu 1360513c8d33SNeel Natu VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 1361513c8d33SNeel Natu "ftype = %d", rv, vme->u.paging.gpa, ftype); 1362318224bbSNeel Natu 1363318224bbSNeel Natu if (rv != KERN_SUCCESS) 1364318224bbSNeel Natu return (EFAULT); 1365318224bbSNeel Natu done: 1366318224bbSNeel Natu return (0); 1367318224bbSNeel Natu } 1368318224bbSNeel Natu 1369318224bbSNeel Natu static int 1370becd9849SNeel Natu vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) 1371318224bbSNeel Natu { 1372318224bbSNeel Natu struct vie *vie; 1373318224bbSNeel Natu struct vcpu *vcpu; 1374318224bbSNeel Natu struct vm_exit *vme; 1375e4f605eeSTycho Nightingale uint64_t gla, gpa, cs_base; 1376e813a873SNeel Natu struct vm_guest_paging *paging; 1377565bbb86SNeel Natu mem_region_read_t mread; 1378565bbb86SNeel Natu mem_region_write_t mwrite; 1379f7a9f178SNeel Natu enum vm_cpu_mode cpu_mode; 13801c73ea3eSNeel Natu int cs_d, error, fault; 1381318224bbSNeel Natu 1382318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1383318224bbSNeel Natu vme = &vcpu->exitinfo; 1384318224bbSNeel Natu 13851c73ea3eSNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 13861c73ea3eSNeel Natu __func__, vme->inst_length)); 13871c73ea3eSNeel Natu 1388318224bbSNeel Natu gla = vme->u.inst_emul.gla; 1389318224bbSNeel Natu gpa = vme->u.inst_emul.gpa; 1390e4f605eeSTycho Nightingale cs_base = vme->u.inst_emul.cs_base; 1391f7a9f178SNeel Natu cs_d = vme->u.inst_emul.cs_d; 1392318224bbSNeel Natu vie = &vme->u.inst_emul.vie; 1393e813a873SNeel Natu paging = &vme->u.inst_emul.paging; 1394f7a9f178SNeel Natu cpu_mode = paging->cpu_mode; 1395318224bbSNeel Natu 13969d8d8e3eSNeel Natu VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa); 13979d8d8e3eSNeel Natu 1398318224bbSNeel Natu /* Fetch, decode and emulate the faulting instruction */ 1399c2a875f9SNeel Natu if (vie->num_valid == 0) { 1400e4f605eeSTycho Nightingale error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip + 14011c73ea3eSNeel Natu cs_base, VIE_INST_SIZE, vie, &fault); 1402c2a875f9SNeel Natu } else { 1403c2a875f9SNeel Natu /* 1404c2a875f9SNeel Natu * The instruction bytes have already been copied into 'vie' 1405c2a875f9SNeel Natu */ 14069c4d5478SNeel Natu error = fault = 0; 1407c2a875f9SNeel Natu } 14089c4d5478SNeel Natu if (error || fault) 14099c4d5478SNeel Natu return (error); 1410318224bbSNeel Natu 1411c07a0648SNeel Natu if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) { 1412c07a0648SNeel Natu VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", 1413c07a0648SNeel Natu vme->rip + cs_base); 1414c07a0648SNeel Natu *retu = true; /* dump instruction bytes in userspace */ 1415c07a0648SNeel Natu return (0); 1416c07a0648SNeel Natu } 1417318224bbSNeel Natu 1418a0b78f09SPeter Grehan /* 14191c73ea3eSNeel Natu * Update 'nextrip' based on the length of the emulated instruction. 1420a0b78f09SPeter Grehan */ 1421a0b78f09SPeter Grehan vme->inst_length = vie->num_processed; 1422d087a399SNeel Natu vcpu->nextrip += vie->num_processed; 14231c73ea3eSNeel Natu VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction " 14241c73ea3eSNeel Natu "decoding", vcpu->nextrip); 1425a0b78f09SPeter Grehan 142608e3ff32SNeel Natu /* return to userland unless this is an in-kernel emulated device */ 1427565bbb86SNeel Natu if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1428565bbb86SNeel Natu mread = lapic_mmio_read; 1429565bbb86SNeel Natu mwrite = lapic_mmio_write; 1430565bbb86SNeel Natu } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1431565bbb86SNeel Natu mread = vioapic_mmio_read; 1432565bbb86SNeel Natu mwrite = vioapic_mmio_write; 143308e3ff32SNeel Natu } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 143408e3ff32SNeel Natu mread = vhpet_mmio_read; 143508e3ff32SNeel Natu mwrite = vhpet_mmio_write; 1436565bbb86SNeel Natu } else { 1437becd9849SNeel Natu *retu = true; 1438318224bbSNeel Natu return (0); 1439318224bbSNeel Natu } 1440318224bbSNeel Natu 1441d665d229SNeel Natu error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, 1442d665d229SNeel Natu mread, mwrite, retu); 1443318224bbSNeel Natu 1444318224bbSNeel Natu return (error); 1445318224bbSNeel Natu } 1446318224bbSNeel Natu 1447b15a09c0SNeel Natu static int 1448b15a09c0SNeel Natu vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) 1449b15a09c0SNeel Natu { 1450b15a09c0SNeel Natu int i, done; 1451b15a09c0SNeel Natu struct vcpu *vcpu; 1452b15a09c0SNeel Natu 1453b15a09c0SNeel Natu done = 0; 1454b15a09c0SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1455b15a09c0SNeel Natu 1456b15a09c0SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); 1457b15a09c0SNeel Natu 1458b15a09c0SNeel Natu /* 1459b15a09c0SNeel Natu * Wait until all 'active_cpus' have suspended themselves. 1460b15a09c0SNeel Natu * 1461b15a09c0SNeel Natu * Since a VM may be suspended at any time including when one or 1462b15a09c0SNeel Natu * more vcpus are doing a rendezvous we need to call the rendezvous 1463b15a09c0SNeel Natu * handler while we are waiting to prevent a deadlock. 1464b15a09c0SNeel Natu */ 1465b15a09c0SNeel Natu vcpu_lock(vcpu); 1466b15a09c0SNeel Natu while (1) { 1467b15a09c0SNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 1468b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); 1469b15a09c0SNeel Natu break; 1470b15a09c0SNeel Natu } 1471b15a09c0SNeel Natu 1472b15a09c0SNeel Natu if (vm->rendezvous_func == NULL) { 1473b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); 1474248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); 1475b15a09c0SNeel Natu msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1476248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); 1477b15a09c0SNeel Natu } else { 1478b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); 1479b15a09c0SNeel Natu vcpu_unlock(vcpu); 1480b15a09c0SNeel Natu vm_handle_rendezvous(vm, vcpuid); 1481b15a09c0SNeel Natu vcpu_lock(vcpu); 1482b15a09c0SNeel Natu } 1483b15a09c0SNeel Natu } 1484b15a09c0SNeel Natu vcpu_unlock(vcpu); 1485b15a09c0SNeel Natu 1486b15a09c0SNeel Natu /* 1487b15a09c0SNeel Natu * Wakeup the other sleeping vcpus and return to userspace. 1488b15a09c0SNeel Natu */ 1489b15a09c0SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1490b15a09c0SNeel Natu if (CPU_ISSET(i, &vm->suspended_cpus)) { 1491b15a09c0SNeel Natu vcpu_notify_event(vm, i, false); 1492b15a09c0SNeel Natu } 1493b15a09c0SNeel Natu } 1494b15a09c0SNeel Natu 1495b15a09c0SNeel Natu *retu = true; 1496b15a09c0SNeel Natu return (0); 1497b15a09c0SNeel Natu } 1498b15a09c0SNeel Natu 1499248e6799SNeel Natu static int 1500248e6799SNeel Natu vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu) 1501248e6799SNeel Natu { 1502248e6799SNeel Natu struct vcpu *vcpu = &vm->vcpu[vcpuid]; 1503248e6799SNeel Natu 1504248e6799SNeel Natu vcpu_lock(vcpu); 1505248e6799SNeel Natu KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); 1506248e6799SNeel Natu vcpu->reqidle = 0; 1507248e6799SNeel Natu vcpu_unlock(vcpu); 1508248e6799SNeel Natu *retu = true; 1509248e6799SNeel Natu return (0); 1510248e6799SNeel Natu } 1511248e6799SNeel Natu 1512b15a09c0SNeel Natu int 1513f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how) 1514b15a09c0SNeel Natu { 1515f0fdcfe2SNeel Natu int i; 1516b15a09c0SNeel Natu 1517f0fdcfe2SNeel Natu if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1518f0fdcfe2SNeel Natu return (EINVAL); 1519f0fdcfe2SNeel Natu 1520f0fdcfe2SNeel Natu if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1521f0fdcfe2SNeel Natu VM_CTR2(vm, "virtual machine already suspended %d/%d", 1522f0fdcfe2SNeel Natu vm->suspend, how); 1523b15a09c0SNeel Natu return (EALREADY); 1524b15a09c0SNeel Natu } 1525f0fdcfe2SNeel Natu 1526f0fdcfe2SNeel Natu VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1527f0fdcfe2SNeel Natu 1528f0fdcfe2SNeel Natu /* 1529f0fdcfe2SNeel Natu * Notify all active vcpus that they are now suspended. 1530f0fdcfe2SNeel Natu */ 1531f0fdcfe2SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 1532f0fdcfe2SNeel Natu if (CPU_ISSET(i, &vm->active_cpus)) 1533f0fdcfe2SNeel Natu vcpu_notify_event(vm, i, false); 1534f0fdcfe2SNeel Natu } 1535f0fdcfe2SNeel Natu 1536f0fdcfe2SNeel Natu return (0); 1537f0fdcfe2SNeel Natu } 1538f0fdcfe2SNeel Natu 1539f0fdcfe2SNeel Natu void 1540f0fdcfe2SNeel Natu vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) 1541f0fdcfe2SNeel Natu { 1542f0fdcfe2SNeel Natu struct vm_exit *vmexit; 1543f0fdcfe2SNeel Natu 1544f0fdcfe2SNeel Natu KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1545f0fdcfe2SNeel Natu ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1546f0fdcfe2SNeel Natu 1547f0fdcfe2SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1548f0fdcfe2SNeel Natu vmexit->rip = rip; 1549f0fdcfe2SNeel Natu vmexit->inst_length = 0; 1550f0fdcfe2SNeel Natu vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1551f0fdcfe2SNeel Natu vmexit->u.suspended.how = vm->suspend; 1552b15a09c0SNeel Natu } 1553b15a09c0SNeel Natu 155440487465SNeel Natu void 155540487465SNeel Natu vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) 155640487465SNeel Natu { 155740487465SNeel Natu struct vm_exit *vmexit; 155840487465SNeel Natu 155940487465SNeel Natu KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress")); 156040487465SNeel Natu 156140487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 156240487465SNeel Natu vmexit->rip = rip; 156340487465SNeel Natu vmexit->inst_length = 0; 156440487465SNeel Natu vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 156540487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1); 156640487465SNeel Natu } 156740487465SNeel Natu 156840487465SNeel Natu void 1569248e6799SNeel Natu vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip) 1570248e6799SNeel Natu { 1571248e6799SNeel Natu struct vm_exit *vmexit; 1572248e6799SNeel Natu 1573248e6799SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1574248e6799SNeel Natu vmexit->rip = rip; 1575248e6799SNeel Natu vmexit->inst_length = 0; 1576248e6799SNeel Natu vmexit->exitcode = VM_EXITCODE_REQIDLE; 1577248e6799SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1); 1578248e6799SNeel Natu } 1579248e6799SNeel Natu 1580248e6799SNeel Natu void 158140487465SNeel Natu vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) 158240487465SNeel Natu { 158340487465SNeel Natu struct vm_exit *vmexit; 158440487465SNeel Natu 158540487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 158640487465SNeel Natu vmexit->rip = rip; 158740487465SNeel Natu vmexit->inst_length = 0; 158840487465SNeel Natu vmexit->exitcode = VM_EXITCODE_BOGUS; 158940487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1); 159040487465SNeel Natu } 159140487465SNeel Natu 1592318224bbSNeel Natu int 1593318224bbSNeel Natu vm_run(struct vm *vm, struct vm_run *vmrun) 1594318224bbSNeel Natu { 1595248e6799SNeel Natu struct vm_eventinfo evinfo; 1596318224bbSNeel Natu int error, vcpuid; 1597318224bbSNeel Natu struct vcpu *vcpu; 1598318224bbSNeel Natu struct pcb *pcb; 1599d087a399SNeel Natu uint64_t tscval; 1600318224bbSNeel Natu struct vm_exit *vme; 1601becd9849SNeel Natu bool retu, intr_disabled; 1602318224bbSNeel Natu pmap_t pmap; 1603318224bbSNeel Natu 1604318224bbSNeel Natu vcpuid = vmrun->cpuid; 1605318224bbSNeel Natu 1606318224bbSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1607318224bbSNeel Natu return (EINVAL); 1608318224bbSNeel Natu 160995ebc360SNeel Natu if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 161095ebc360SNeel Natu return (EINVAL); 161195ebc360SNeel Natu 161295ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 161395ebc360SNeel Natu return (EINVAL); 161495ebc360SNeel Natu 1615318224bbSNeel Natu pmap = vmspace_pmap(vm->vmspace); 1616318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1617318224bbSNeel Natu vme = &vcpu->exitinfo; 1618248e6799SNeel Natu evinfo.rptr = &vm->rendezvous_func; 1619248e6799SNeel Natu evinfo.sptr = &vm->suspend; 1620248e6799SNeel Natu evinfo.iptr = &vcpu->reqidle; 1621318224bbSNeel Natu restart: 1622318224bbSNeel Natu critical_enter(); 1623318224bbSNeel Natu 1624318224bbSNeel Natu KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1625318224bbSNeel Natu ("vm_run: absurd pm_active")); 1626318224bbSNeel Natu 1627318224bbSNeel Natu tscval = rdtsc(); 1628318224bbSNeel Natu 1629318224bbSNeel Natu pcb = PCPU_GET(curpcb); 1630318224bbSNeel Natu set_pcb_flags(pcb, PCB_FULL_IRET); 1631318224bbSNeel Natu 1632318224bbSNeel Natu restore_guest_fpustate(vcpu); 1633318224bbSNeel Natu 1634318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1635248e6799SNeel Natu error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo); 1636318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1637318224bbSNeel Natu 1638318224bbSNeel Natu save_guest_fpustate(vcpu); 1639318224bbSNeel Natu 1640318224bbSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1641318224bbSNeel Natu 1642318224bbSNeel Natu critical_exit(); 1643318224bbSNeel Natu 1644318224bbSNeel Natu if (error == 0) { 1645becd9849SNeel Natu retu = false; 1646d087a399SNeel Natu vcpu->nextrip = vme->rip + vme->inst_length; 1647318224bbSNeel Natu switch (vme->exitcode) { 1648248e6799SNeel Natu case VM_EXITCODE_REQIDLE: 1649248e6799SNeel Natu error = vm_handle_reqidle(vm, vcpuid, &retu); 1650248e6799SNeel Natu break; 1651b15a09c0SNeel Natu case VM_EXITCODE_SUSPENDED: 1652b15a09c0SNeel Natu error = vm_handle_suspend(vm, vcpuid, &retu); 1653b15a09c0SNeel Natu break; 165430b94db8SNeel Natu case VM_EXITCODE_IOAPIC_EOI: 165530b94db8SNeel Natu vioapic_process_eoi(vm, vcpuid, 165630b94db8SNeel Natu vme->u.ioapic_eoi.vector); 165730b94db8SNeel Natu break; 16585b8a8cd1SNeel Natu case VM_EXITCODE_RENDEZVOUS: 16595b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 16605b8a8cd1SNeel Natu error = 0; 16615b8a8cd1SNeel Natu break; 1662318224bbSNeel Natu case VM_EXITCODE_HLT: 1663becd9849SNeel Natu intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 16641c052192SNeel Natu error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); 1665318224bbSNeel Natu break; 1666318224bbSNeel Natu case VM_EXITCODE_PAGING: 1667318224bbSNeel Natu error = vm_handle_paging(vm, vcpuid, &retu); 1668318224bbSNeel Natu break; 1669318224bbSNeel Natu case VM_EXITCODE_INST_EMUL: 1670318224bbSNeel Natu error = vm_handle_inst_emul(vm, vcpuid, &retu); 1671318224bbSNeel Natu break; 1672d17b5104SNeel Natu case VM_EXITCODE_INOUT: 1673d17b5104SNeel Natu case VM_EXITCODE_INOUT_STR: 1674d17b5104SNeel Natu error = vm_handle_inout(vm, vcpuid, vme, &retu); 1675d17b5104SNeel Natu break; 167665145c7fSNeel Natu case VM_EXITCODE_MONITOR: 167765145c7fSNeel Natu case VM_EXITCODE_MWAIT: 167865145c7fSNeel Natu vm_inject_ud(vm, vcpuid); 167965145c7fSNeel Natu break; 1680318224bbSNeel Natu default: 1681becd9849SNeel Natu retu = true; /* handled in userland */ 1682318224bbSNeel Natu break; 1683318224bbSNeel Natu } 1684318224bbSNeel Natu } 1685318224bbSNeel Natu 1686d087a399SNeel Natu if (error == 0 && retu == false) 1687f76fc5d4SNeel Natu goto restart; 1688f76fc5d4SNeel Natu 1689248e6799SNeel Natu VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode); 1690248e6799SNeel Natu 1691318224bbSNeel Natu /* copy the exit information */ 1692318224bbSNeel Natu bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1693366f6083SPeter Grehan return (error); 1694366f6083SPeter Grehan } 1695366f6083SPeter Grehan 1696366f6083SPeter Grehan int 1697c9c75df4SNeel Natu vm_restart_instruction(void *arg, int vcpuid) 1698c9c75df4SNeel Natu { 1699d087a399SNeel Natu struct vm *vm; 1700c9c75df4SNeel Natu struct vcpu *vcpu; 1701d087a399SNeel Natu enum vcpu_state state; 1702d087a399SNeel Natu uint64_t rip; 1703d087a399SNeel Natu int error; 1704c9c75df4SNeel Natu 1705d087a399SNeel Natu vm = arg; 1706c9c75df4SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1707c9c75df4SNeel Natu return (EINVAL); 1708c9c75df4SNeel Natu 1709c9c75df4SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1710d087a399SNeel Natu state = vcpu_get_state(vm, vcpuid, NULL); 1711d087a399SNeel Natu if (state == VCPU_RUNNING) { 1712d087a399SNeel Natu /* 1713d087a399SNeel Natu * When a vcpu is "running" the next instruction is determined 1714d087a399SNeel Natu * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. 1715d087a399SNeel Natu * Thus setting 'inst_length' to zero will cause the current 1716d087a399SNeel Natu * instruction to be restarted. 1717d087a399SNeel Natu */ 1718c9c75df4SNeel Natu vcpu->exitinfo.inst_length = 0; 1719d087a399SNeel Natu VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " 1720d087a399SNeel Natu "setting inst_length to zero", vcpu->exitinfo.rip); 1721d087a399SNeel Natu } else if (state == VCPU_FROZEN) { 1722d087a399SNeel Natu /* 1723d087a399SNeel Natu * When a vcpu is "frozen" it is outside the critical section 1724d087a399SNeel Natu * around VMRUN() and 'nextrip' points to the next instruction. 1725d087a399SNeel Natu * Thus instruction restart is achieved by setting 'nextrip' 1726d087a399SNeel Natu * to the vcpu's %rip. 1727d087a399SNeel Natu */ 1728d087a399SNeel Natu error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); 1729d087a399SNeel Natu KASSERT(!error, ("%s: error %d getting rip", __func__, error)); 1730d087a399SNeel Natu VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " 1731d087a399SNeel Natu "nextrip from %#lx to %#lx", vcpu->nextrip, rip); 1732d087a399SNeel Natu vcpu->nextrip = rip; 1733d087a399SNeel Natu } else { 1734d087a399SNeel Natu panic("%s: invalid state %d", __func__, state); 1735d087a399SNeel Natu } 1736c9c75df4SNeel Natu return (0); 1737c9c75df4SNeel Natu } 1738c9c75df4SNeel Natu 1739c9c75df4SNeel Natu int 1740091d4532SNeel Natu vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) 1741091d4532SNeel Natu { 1742091d4532SNeel Natu struct vcpu *vcpu; 1743091d4532SNeel Natu int type, vector; 1744091d4532SNeel Natu 1745091d4532SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1746091d4532SNeel Natu return (EINVAL); 1747091d4532SNeel Natu 1748091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1749091d4532SNeel Natu 1750091d4532SNeel Natu if (info & VM_INTINFO_VALID) { 1751091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1752091d4532SNeel Natu vector = info & 0xff; 1753091d4532SNeel Natu if (type == VM_INTINFO_NMI && vector != IDT_NMI) 1754091d4532SNeel Natu return (EINVAL); 1755091d4532SNeel Natu if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) 1756091d4532SNeel Natu return (EINVAL); 1757091d4532SNeel Natu if (info & VM_INTINFO_RSVD) 1758091d4532SNeel Natu return (EINVAL); 1759091d4532SNeel Natu } else { 1760091d4532SNeel Natu info = 0; 1761091d4532SNeel Natu } 1762091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); 1763091d4532SNeel Natu vcpu->exitintinfo = info; 1764091d4532SNeel Natu return (0); 1765091d4532SNeel Natu } 1766091d4532SNeel Natu 1767091d4532SNeel Natu enum exc_class { 1768091d4532SNeel Natu EXC_BENIGN, 1769091d4532SNeel Natu EXC_CONTRIBUTORY, 1770091d4532SNeel Natu EXC_PAGEFAULT 1771091d4532SNeel Natu }; 1772091d4532SNeel Natu 1773091d4532SNeel Natu #define IDT_VE 20 /* Virtualization Exception (Intel specific) */ 1774091d4532SNeel Natu 1775091d4532SNeel Natu static enum exc_class 1776091d4532SNeel Natu exception_class(uint64_t info) 1777091d4532SNeel Natu { 1778091d4532SNeel Natu int type, vector; 1779091d4532SNeel Natu 1780091d4532SNeel Natu KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); 1781091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1782091d4532SNeel Natu vector = info & 0xff; 1783091d4532SNeel Natu 1784091d4532SNeel Natu /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ 1785091d4532SNeel Natu switch (type) { 1786091d4532SNeel Natu case VM_INTINFO_HWINTR: 1787091d4532SNeel Natu case VM_INTINFO_SWINTR: 1788091d4532SNeel Natu case VM_INTINFO_NMI: 1789091d4532SNeel Natu return (EXC_BENIGN); 1790091d4532SNeel Natu default: 1791091d4532SNeel Natu /* 1792091d4532SNeel Natu * Hardware exception. 1793091d4532SNeel Natu * 1794091d4532SNeel Natu * SVM and VT-x use identical type values to represent NMI, 1795091d4532SNeel Natu * hardware interrupt and software interrupt. 1796091d4532SNeel Natu * 1797091d4532SNeel Natu * SVM uses type '3' for all exceptions. VT-x uses type '3' 1798091d4532SNeel Natu * for exceptions except #BP and #OF. #BP and #OF use a type 1799091d4532SNeel Natu * value of '5' or '6'. Therefore we don't check for explicit 1800091d4532SNeel Natu * values of 'type' to classify 'intinfo' into a hardware 1801091d4532SNeel Natu * exception. 1802091d4532SNeel Natu */ 1803091d4532SNeel Natu break; 1804091d4532SNeel Natu } 1805091d4532SNeel Natu 1806091d4532SNeel Natu switch (vector) { 1807091d4532SNeel Natu case IDT_PF: 1808091d4532SNeel Natu case IDT_VE: 1809091d4532SNeel Natu return (EXC_PAGEFAULT); 1810091d4532SNeel Natu case IDT_DE: 1811091d4532SNeel Natu case IDT_TS: 1812091d4532SNeel Natu case IDT_NP: 1813091d4532SNeel Natu case IDT_SS: 1814091d4532SNeel Natu case IDT_GP: 1815091d4532SNeel Natu return (EXC_CONTRIBUTORY); 1816091d4532SNeel Natu default: 1817091d4532SNeel Natu return (EXC_BENIGN); 1818091d4532SNeel Natu } 1819091d4532SNeel Natu } 1820091d4532SNeel Natu 1821091d4532SNeel Natu static int 1822091d4532SNeel Natu nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, 1823091d4532SNeel Natu uint64_t *retinfo) 1824091d4532SNeel Natu { 1825091d4532SNeel Natu enum exc_class exc1, exc2; 1826091d4532SNeel Natu int type1, vector1; 1827091d4532SNeel Natu 1828091d4532SNeel Natu KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); 1829091d4532SNeel Natu KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); 1830091d4532SNeel Natu 1831091d4532SNeel Natu /* 1832091d4532SNeel Natu * If an exception occurs while attempting to call the double-fault 1833091d4532SNeel Natu * handler the processor enters shutdown mode (aka triple fault). 1834091d4532SNeel Natu */ 1835091d4532SNeel Natu type1 = info1 & VM_INTINFO_TYPE; 1836091d4532SNeel Natu vector1 = info1 & 0xff; 1837091d4532SNeel Natu if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { 1838091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", 1839091d4532SNeel Natu info1, info2); 1840091d4532SNeel Natu vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); 1841091d4532SNeel Natu *retinfo = 0; 1842091d4532SNeel Natu return (0); 1843091d4532SNeel Natu } 1844091d4532SNeel Natu 1845091d4532SNeel Natu /* 1846091d4532SNeel Natu * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 1847091d4532SNeel Natu */ 1848091d4532SNeel Natu exc1 = exception_class(info1); 1849091d4532SNeel Natu exc2 = exception_class(info2); 1850091d4532SNeel Natu if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || 1851091d4532SNeel Natu (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { 1852091d4532SNeel Natu /* Convert nested fault into a double fault. */ 1853091d4532SNeel Natu *retinfo = IDT_DF; 1854091d4532SNeel Natu *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1855091d4532SNeel Natu *retinfo |= VM_INTINFO_DEL_ERRCODE; 1856091d4532SNeel Natu } else { 1857091d4532SNeel Natu /* Handle exceptions serially */ 1858091d4532SNeel Natu *retinfo = info2; 1859091d4532SNeel Natu } 1860091d4532SNeel Natu return (1); 1861091d4532SNeel Natu } 1862091d4532SNeel Natu 1863091d4532SNeel Natu static uint64_t 1864091d4532SNeel Natu vcpu_exception_intinfo(struct vcpu *vcpu) 1865091d4532SNeel Natu { 1866091d4532SNeel Natu uint64_t info = 0; 1867091d4532SNeel Natu 1868091d4532SNeel Natu if (vcpu->exception_pending) { 1869c9c75df4SNeel Natu info = vcpu->exc_vector & 0xff; 1870091d4532SNeel Natu info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1871c9c75df4SNeel Natu if (vcpu->exc_errcode_valid) { 1872091d4532SNeel Natu info |= VM_INTINFO_DEL_ERRCODE; 1873c9c75df4SNeel Natu info |= (uint64_t)vcpu->exc_errcode << 32; 1874091d4532SNeel Natu } 1875091d4532SNeel Natu } 1876091d4532SNeel Natu return (info); 1877091d4532SNeel Natu } 1878091d4532SNeel Natu 1879091d4532SNeel Natu int 1880091d4532SNeel Natu vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) 1881091d4532SNeel Natu { 1882091d4532SNeel Natu struct vcpu *vcpu; 1883091d4532SNeel Natu uint64_t info1, info2; 1884091d4532SNeel Natu int valid; 1885091d4532SNeel Natu 1886091d4532SNeel Natu KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); 1887091d4532SNeel Natu 1888091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1889091d4532SNeel Natu 1890091d4532SNeel Natu info1 = vcpu->exitintinfo; 1891091d4532SNeel Natu vcpu->exitintinfo = 0; 1892091d4532SNeel Natu 1893091d4532SNeel Natu info2 = 0; 1894091d4532SNeel Natu if (vcpu->exception_pending) { 1895091d4532SNeel Natu info2 = vcpu_exception_intinfo(vcpu); 1896091d4532SNeel Natu vcpu->exception_pending = 0; 1897091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", 1898c9c75df4SNeel Natu vcpu->exc_vector, info2); 1899091d4532SNeel Natu } 1900091d4532SNeel Natu 1901091d4532SNeel Natu if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { 1902091d4532SNeel Natu valid = nested_fault(vm, vcpuid, info1, info2, retinfo); 1903091d4532SNeel Natu } else if (info1 & VM_INTINFO_VALID) { 1904091d4532SNeel Natu *retinfo = info1; 1905091d4532SNeel Natu valid = 1; 1906091d4532SNeel Natu } else if (info2 & VM_INTINFO_VALID) { 1907091d4532SNeel Natu *retinfo = info2; 1908091d4532SNeel Natu valid = 1; 1909091d4532SNeel Natu } else { 1910091d4532SNeel Natu valid = 0; 1911091d4532SNeel Natu } 1912091d4532SNeel Natu 1913091d4532SNeel Natu if (valid) { 1914091d4532SNeel Natu VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " 1915091d4532SNeel Natu "retinfo(%#lx)", __func__, info1, info2, *retinfo); 1916091d4532SNeel Natu } 1917091d4532SNeel Natu 1918091d4532SNeel Natu return (valid); 1919091d4532SNeel Natu } 1920091d4532SNeel Natu 1921091d4532SNeel Natu int 1922091d4532SNeel Natu vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) 1923091d4532SNeel Natu { 1924091d4532SNeel Natu struct vcpu *vcpu; 1925091d4532SNeel Natu 1926091d4532SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1927091d4532SNeel Natu return (EINVAL); 1928091d4532SNeel Natu 1929091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1930091d4532SNeel Natu *info1 = vcpu->exitintinfo; 1931091d4532SNeel Natu *info2 = vcpu_exception_intinfo(vcpu); 1932091d4532SNeel Natu return (0); 1933091d4532SNeel Natu } 1934091d4532SNeel Natu 1935091d4532SNeel Natu int 1936c9c75df4SNeel Natu vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, 1937c9c75df4SNeel Natu uint32_t errcode, int restart_instruction) 1938366f6083SPeter Grehan { 1939dc506506SNeel Natu struct vcpu *vcpu; 194047b9935dSNeel Natu uint64_t regval; 19412ce12423SNeel Natu int error; 1942dc506506SNeel Natu 1943366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1944366f6083SPeter Grehan return (EINVAL); 1945366f6083SPeter Grehan 1946c9c75df4SNeel Natu if (vector < 0 || vector >= 32) 1947366f6083SPeter Grehan return (EINVAL); 1948366f6083SPeter Grehan 1949091d4532SNeel Natu /* 1950091d4532SNeel Natu * A double fault exception should never be injected directly into 1951091d4532SNeel Natu * the guest. It is a derived exception that results from specific 1952091d4532SNeel Natu * combinations of nested faults. 1953091d4532SNeel Natu */ 1954c9c75df4SNeel Natu if (vector == IDT_DF) 1955091d4532SNeel Natu return (EINVAL); 1956091d4532SNeel Natu 1957dc506506SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1958366f6083SPeter Grehan 1959dc506506SNeel Natu if (vcpu->exception_pending) { 1960dc506506SNeel Natu VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " 1961c9c75df4SNeel Natu "pending exception %d", vector, vcpu->exc_vector); 1962dc506506SNeel Natu return (EBUSY); 1963dc506506SNeel Natu } 1964dc506506SNeel Natu 196547b9935dSNeel Natu if (errcode_valid) { 196647b9935dSNeel Natu /* 196747b9935dSNeel Natu * Exceptions don't deliver an error code in real mode. 196847b9935dSNeel Natu */ 196947b9935dSNeel Natu error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, ®val); 197047b9935dSNeel Natu KASSERT(!error, ("%s: error %d getting CR0", __func__, error)); 197147b9935dSNeel Natu if (!(regval & CR0_PE)) 197247b9935dSNeel Natu errcode_valid = 0; 197347b9935dSNeel Natu } 197447b9935dSNeel Natu 19752ce12423SNeel Natu /* 19762ce12423SNeel Natu * From section 26.6.1 "Interruptibility State" in Intel SDM: 19772ce12423SNeel Natu * 19782ce12423SNeel Natu * Event blocking by "STI" or "MOV SS" is cleared after guest executes 19792ce12423SNeel Natu * one instruction or incurs an exception. 19802ce12423SNeel Natu */ 19812ce12423SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0); 19822ce12423SNeel Natu KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", 19832ce12423SNeel Natu __func__, error)); 19842ce12423SNeel Natu 1985c9c75df4SNeel Natu if (restart_instruction) 1986c9c75df4SNeel Natu vm_restart_instruction(vm, vcpuid); 1987c9c75df4SNeel Natu 1988dc506506SNeel Natu vcpu->exception_pending = 1; 1989c9c75df4SNeel Natu vcpu->exc_vector = vector; 1990c9c75df4SNeel Natu vcpu->exc_errcode = errcode; 1991c9c75df4SNeel Natu vcpu->exc_errcode_valid = errcode_valid; 1992c9c75df4SNeel Natu VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector); 1993dc506506SNeel Natu return (0); 1994dc506506SNeel Natu } 1995dc506506SNeel Natu 1996d37f2adbSNeel Natu void 1997d37f2adbSNeel Natu vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, 1998d37f2adbSNeel Natu int errcode) 1999dc506506SNeel Natu { 2000d37f2adbSNeel Natu struct vm *vm; 2001c9c75df4SNeel Natu int error, restart_instruction; 2002dc506506SNeel Natu 2003d37f2adbSNeel Natu vm = vmarg; 2004c9c75df4SNeel Natu restart_instruction = 1; 2005d37f2adbSNeel Natu 2006c9c75df4SNeel Natu error = vm_inject_exception(vm, vcpuid, vector, errcode_valid, 2007c9c75df4SNeel Natu errcode, restart_instruction); 2008dc506506SNeel Natu KASSERT(error == 0, ("vm_inject_exception error %d", error)); 2009dc506506SNeel Natu } 2010dc506506SNeel Natu 2011dc506506SNeel Natu void 2012d37f2adbSNeel Natu vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) 2013fd949af6SNeel Natu { 2014d37f2adbSNeel Natu struct vm *vm; 201537a723a5SNeel Natu int error; 201637a723a5SNeel Natu 2017d37f2adbSNeel Natu vm = vmarg; 201837a723a5SNeel Natu VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", 201937a723a5SNeel Natu error_code, cr2); 202037a723a5SNeel Natu 202137a723a5SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); 202237a723a5SNeel Natu KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 2023fd949af6SNeel Natu 2024d37f2adbSNeel Natu vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); 2025366f6083SPeter Grehan } 2026366f6083SPeter Grehan 202761592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 2028366f6083SPeter Grehan 2029f352ff0cSNeel Natu int 2030f352ff0cSNeel Natu vm_inject_nmi(struct vm *vm, int vcpuid) 2031f352ff0cSNeel Natu { 2032f352ff0cSNeel Natu struct vcpu *vcpu; 2033f352ff0cSNeel Natu 2034f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2035366f6083SPeter Grehan return (EINVAL); 2036366f6083SPeter Grehan 2037f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 2038f352ff0cSNeel Natu 2039f352ff0cSNeel Natu vcpu->nmi_pending = 1; 2040de5ea6b6SNeel Natu vcpu_notify_event(vm, vcpuid, false); 2041f352ff0cSNeel Natu return (0); 2042f352ff0cSNeel Natu } 2043f352ff0cSNeel Natu 2044f352ff0cSNeel Natu int 2045f352ff0cSNeel Natu vm_nmi_pending(struct vm *vm, int vcpuid) 2046f352ff0cSNeel Natu { 2047f352ff0cSNeel Natu struct vcpu *vcpu; 2048f352ff0cSNeel Natu 2049f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2050f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 2051f352ff0cSNeel Natu 2052f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 2053f352ff0cSNeel Natu 2054f352ff0cSNeel Natu return (vcpu->nmi_pending); 2055f352ff0cSNeel Natu } 2056f352ff0cSNeel Natu 2057f352ff0cSNeel Natu void 2058f352ff0cSNeel Natu vm_nmi_clear(struct vm *vm, int vcpuid) 2059f352ff0cSNeel Natu { 2060f352ff0cSNeel Natu struct vcpu *vcpu; 2061f352ff0cSNeel Natu 2062f352ff0cSNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2063f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 2064f352ff0cSNeel Natu 2065f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 2066f352ff0cSNeel Natu 2067f352ff0cSNeel Natu if (vcpu->nmi_pending == 0) 2068f352ff0cSNeel Natu panic("vm_nmi_clear: inconsistent nmi_pending state"); 2069f352ff0cSNeel Natu 2070f352ff0cSNeel Natu vcpu->nmi_pending = 0; 2071f352ff0cSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 2072366f6083SPeter Grehan } 2073366f6083SPeter Grehan 20740775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 20750775fbb4STycho Nightingale 20760775fbb4STycho Nightingale int 20770775fbb4STycho Nightingale vm_inject_extint(struct vm *vm, int vcpuid) 20780775fbb4STycho Nightingale { 20790775fbb4STycho Nightingale struct vcpu *vcpu; 20800775fbb4STycho Nightingale 20810775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 20820775fbb4STycho Nightingale return (EINVAL); 20830775fbb4STycho Nightingale 20840775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 20850775fbb4STycho Nightingale 20860775fbb4STycho Nightingale vcpu->extint_pending = 1; 20870775fbb4STycho Nightingale vcpu_notify_event(vm, vcpuid, false); 20880775fbb4STycho Nightingale return (0); 20890775fbb4STycho Nightingale } 20900775fbb4STycho Nightingale 20910775fbb4STycho Nightingale int 20920775fbb4STycho Nightingale vm_extint_pending(struct vm *vm, int vcpuid) 20930775fbb4STycho Nightingale { 20940775fbb4STycho Nightingale struct vcpu *vcpu; 20950775fbb4STycho Nightingale 20960775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 20970775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 20980775fbb4STycho Nightingale 20990775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 21000775fbb4STycho Nightingale 21010775fbb4STycho Nightingale return (vcpu->extint_pending); 21020775fbb4STycho Nightingale } 21030775fbb4STycho Nightingale 21040775fbb4STycho Nightingale void 21050775fbb4STycho Nightingale vm_extint_clear(struct vm *vm, int vcpuid) 21060775fbb4STycho Nightingale { 21070775fbb4STycho Nightingale struct vcpu *vcpu; 21080775fbb4STycho Nightingale 21090775fbb4STycho Nightingale if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 21100775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 21110775fbb4STycho Nightingale 21120775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 21130775fbb4STycho Nightingale 21140775fbb4STycho Nightingale if (vcpu->extint_pending == 0) 21150775fbb4STycho Nightingale panic("vm_extint_clear: inconsistent extint_pending state"); 21160775fbb4STycho Nightingale 21170775fbb4STycho Nightingale vcpu->extint_pending = 0; 21180775fbb4STycho Nightingale vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); 21190775fbb4STycho Nightingale } 21200775fbb4STycho Nightingale 2121366f6083SPeter Grehan int 2122366f6083SPeter Grehan vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 2123366f6083SPeter Grehan { 2124366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 2125366f6083SPeter Grehan return (EINVAL); 2126366f6083SPeter Grehan 2127366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2128366f6083SPeter Grehan return (EINVAL); 2129366f6083SPeter Grehan 2130366f6083SPeter Grehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 2131366f6083SPeter Grehan } 2132366f6083SPeter Grehan 2133366f6083SPeter Grehan int 2134366f6083SPeter Grehan vm_set_capability(struct vm *vm, int vcpu, int type, int val) 2135366f6083SPeter Grehan { 2136366f6083SPeter Grehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 2137366f6083SPeter Grehan return (EINVAL); 2138366f6083SPeter Grehan 2139366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2140366f6083SPeter Grehan return (EINVAL); 2141366f6083SPeter Grehan 2142366f6083SPeter Grehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 2143366f6083SPeter Grehan } 2144366f6083SPeter Grehan 2145366f6083SPeter Grehan struct vlapic * 2146366f6083SPeter Grehan vm_lapic(struct vm *vm, int cpu) 2147366f6083SPeter Grehan { 2148366f6083SPeter Grehan return (vm->vcpu[cpu].vlapic); 2149366f6083SPeter Grehan } 2150366f6083SPeter Grehan 2151565bbb86SNeel Natu struct vioapic * 2152565bbb86SNeel Natu vm_ioapic(struct vm *vm) 2153565bbb86SNeel Natu { 2154565bbb86SNeel Natu 2155565bbb86SNeel Natu return (vm->vioapic); 2156565bbb86SNeel Natu } 2157565bbb86SNeel Natu 215808e3ff32SNeel Natu struct vhpet * 215908e3ff32SNeel Natu vm_hpet(struct vm *vm) 216008e3ff32SNeel Natu { 216108e3ff32SNeel Natu 216208e3ff32SNeel Natu return (vm->vhpet); 216308e3ff32SNeel Natu } 216408e3ff32SNeel Natu 2165366f6083SPeter Grehan boolean_t 2166366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func) 2167366f6083SPeter Grehan { 216807044a96SNeel Natu int found, i, n; 216907044a96SNeel Natu int b, s, f; 2170366f6083SPeter Grehan char *val, *cp, *cp2; 2171366f6083SPeter Grehan 2172366f6083SPeter Grehan /* 217307044a96SNeel Natu * XXX 217407044a96SNeel Natu * The length of an environment variable is limited to 128 bytes which 217507044a96SNeel Natu * puts an upper limit on the number of passthru devices that may be 217607044a96SNeel Natu * specified using a single environment variable. 217707044a96SNeel Natu * 217807044a96SNeel Natu * Work around this by scanning multiple environment variable 217907044a96SNeel Natu * names instead of a single one - yuck! 2180366f6083SPeter Grehan */ 218107044a96SNeel Natu const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 218207044a96SNeel Natu 218307044a96SNeel Natu /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 2184366f6083SPeter Grehan found = 0; 218507044a96SNeel Natu for (i = 0; names[i] != NULL && !found; i++) { 21862be111bfSDavide Italiano cp = val = kern_getenv(names[i]); 2187366f6083SPeter Grehan while (cp != NULL && *cp != '\0') { 2188366f6083SPeter Grehan if ((cp2 = strchr(cp, ' ')) != NULL) 2189366f6083SPeter Grehan *cp2 = '\0'; 2190366f6083SPeter Grehan 2191366f6083SPeter Grehan n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 2192366f6083SPeter Grehan if (n == 3 && bus == b && slot == s && func == f) { 2193366f6083SPeter Grehan found = 1; 2194366f6083SPeter Grehan break; 2195366f6083SPeter Grehan } 2196366f6083SPeter Grehan 2197366f6083SPeter Grehan if (cp2 != NULL) 2198366f6083SPeter Grehan *cp2++ = ' '; 2199366f6083SPeter Grehan 2200366f6083SPeter Grehan cp = cp2; 2201366f6083SPeter Grehan } 2202366f6083SPeter Grehan freeenv(val); 220307044a96SNeel Natu } 2204366f6083SPeter Grehan return (found); 2205366f6083SPeter Grehan } 2206366f6083SPeter Grehan 2207366f6083SPeter Grehan void * 2208366f6083SPeter Grehan vm_iommu_domain(struct vm *vm) 2209366f6083SPeter Grehan { 2210366f6083SPeter Grehan 2211366f6083SPeter Grehan return (vm->iommu); 2212366f6083SPeter Grehan } 2213366f6083SPeter Grehan 221475dd3366SNeel Natu int 2215f80330a8SNeel Natu vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, 2216f80330a8SNeel Natu bool from_idle) 2217366f6083SPeter Grehan { 221875dd3366SNeel Natu int error; 2219366f6083SPeter Grehan struct vcpu *vcpu; 2220366f6083SPeter Grehan 2221366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2222366f6083SPeter Grehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 2223366f6083SPeter Grehan 2224366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 2225366f6083SPeter Grehan 222675dd3366SNeel Natu vcpu_lock(vcpu); 2227248e6799SNeel Natu error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle); 222875dd3366SNeel Natu vcpu_unlock(vcpu); 222975dd3366SNeel Natu 223075dd3366SNeel Natu return (error); 223175dd3366SNeel Natu } 223275dd3366SNeel Natu 223375dd3366SNeel Natu enum vcpu_state 2234d3c11f40SPeter Grehan vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 2235366f6083SPeter Grehan { 2236366f6083SPeter Grehan struct vcpu *vcpu; 223775dd3366SNeel Natu enum vcpu_state state; 2238366f6083SPeter Grehan 2239366f6083SPeter Grehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2240366f6083SPeter Grehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 2241366f6083SPeter Grehan 2242366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 2243366f6083SPeter Grehan 224475dd3366SNeel Natu vcpu_lock(vcpu); 224575dd3366SNeel Natu state = vcpu->state; 2246d3c11f40SPeter Grehan if (hostcpu != NULL) 2247d3c11f40SPeter Grehan *hostcpu = vcpu->hostcpu; 224875dd3366SNeel Natu vcpu_unlock(vcpu); 2249366f6083SPeter Grehan 225075dd3366SNeel Natu return (state); 2251366f6083SPeter Grehan } 2252366f6083SPeter Grehan 225395ebc360SNeel Natu int 2254366f6083SPeter Grehan vm_activate_cpu(struct vm *vm, int vcpuid) 2255366f6083SPeter Grehan { 2256366f6083SPeter Grehan 225795ebc360SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 225895ebc360SNeel Natu return (EINVAL); 225995ebc360SNeel Natu 226095ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->active_cpus)) 226195ebc360SNeel Natu return (EBUSY); 226222d822c6SNeel Natu 226322d822c6SNeel Natu VCPU_CTR0(vm, vcpuid, "activated"); 226422d822c6SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); 226595ebc360SNeel Natu return (0); 2266366f6083SPeter Grehan } 2267366f6083SPeter Grehan 2268a5615c90SPeter Grehan cpuset_t 2269366f6083SPeter Grehan vm_active_cpus(struct vm *vm) 2270366f6083SPeter Grehan { 2271366f6083SPeter Grehan 2272366f6083SPeter Grehan return (vm->active_cpus); 2273366f6083SPeter Grehan } 2274366f6083SPeter Grehan 227595ebc360SNeel Natu cpuset_t 227695ebc360SNeel Natu vm_suspended_cpus(struct vm *vm) 227795ebc360SNeel Natu { 227895ebc360SNeel Natu 227995ebc360SNeel Natu return (vm->suspended_cpus); 228095ebc360SNeel Natu } 228195ebc360SNeel Natu 2282366f6083SPeter Grehan void * 2283366f6083SPeter Grehan vcpu_stats(struct vm *vm, int vcpuid) 2284366f6083SPeter Grehan { 2285366f6083SPeter Grehan 2286366f6083SPeter Grehan return (vm->vcpu[vcpuid].stats); 2287366f6083SPeter Grehan } 2288e9027382SNeel Natu 2289e9027382SNeel Natu int 2290e9027382SNeel Natu vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 2291e9027382SNeel Natu { 2292e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2293e9027382SNeel Natu return (EINVAL); 2294e9027382SNeel Natu 2295e9027382SNeel Natu *state = vm->vcpu[vcpuid].x2apic_state; 2296e9027382SNeel Natu 2297e9027382SNeel Natu return (0); 2298e9027382SNeel Natu } 2299e9027382SNeel Natu 2300e9027382SNeel Natu int 2301e9027382SNeel Natu vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 2302e9027382SNeel Natu { 2303e9027382SNeel Natu if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2304e9027382SNeel Natu return (EINVAL); 2305e9027382SNeel Natu 23063f23d3caSNeel Natu if (state >= X2APIC_STATE_LAST) 2307e9027382SNeel Natu return (EINVAL); 2308e9027382SNeel Natu 2309e9027382SNeel Natu vm->vcpu[vcpuid].x2apic_state = state; 2310e9027382SNeel Natu 231173820fb0SNeel Natu vlapic_set_x2apic_state(vm, vcpuid, state); 231273820fb0SNeel Natu 2313e9027382SNeel Natu return (0); 2314e9027382SNeel Natu } 231575dd3366SNeel Natu 231622821874SNeel Natu /* 231722821874SNeel Natu * This function is called to ensure that a vcpu "sees" a pending event 231822821874SNeel Natu * as soon as possible: 231922821874SNeel Natu * - If the vcpu thread is sleeping then it is woken up. 232022821874SNeel Natu * - If the vcpu is running on a different host_cpu then an IPI will be directed 232122821874SNeel Natu * to the host_cpu to cause the vcpu to trap into the hypervisor. 232222821874SNeel Natu */ 2323248e6799SNeel Natu static void 2324248e6799SNeel Natu vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) 232575dd3366SNeel Natu { 232675dd3366SNeel Natu int hostcpu; 232775dd3366SNeel Natu 232875dd3366SNeel Natu hostcpu = vcpu->hostcpu; 2329ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 2330ef39d7e9SNeel Natu KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 2331de5ea6b6SNeel Natu if (hostcpu != curcpu) { 2332ef39d7e9SNeel Natu if (lapic_intr) { 2333add611fdSNeel Natu vlapic_post_intr(vcpu->vlapic, hostcpu, 2334add611fdSNeel Natu vmm_ipinum); 2335ef39d7e9SNeel Natu } else { 233675dd3366SNeel Natu ipi_cpu(hostcpu, vmm_ipinum); 233775dd3366SNeel Natu } 2338ef39d7e9SNeel Natu } else { 2339ef39d7e9SNeel Natu /* 2340ef39d7e9SNeel Natu * If the 'vcpu' is running on 'curcpu' then it must 2341ef39d7e9SNeel Natu * be sending a notification to itself (e.g. SELF_IPI). 2342ef39d7e9SNeel Natu * The pending event will be picked up when the vcpu 2343ef39d7e9SNeel Natu * transitions back to guest context. 2344ef39d7e9SNeel Natu */ 2345ef39d7e9SNeel Natu } 2346ef39d7e9SNeel Natu } else { 2347ef39d7e9SNeel Natu KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 2348ef39d7e9SNeel Natu "with hostcpu %d", vcpu->state, hostcpu)); 2349366f6083SPeter Grehan if (vcpu->state == VCPU_SLEEPING) 2350366f6083SPeter Grehan wakeup_one(vcpu); 2351366f6083SPeter Grehan } 2352248e6799SNeel Natu } 2353248e6799SNeel Natu 2354248e6799SNeel Natu void 2355248e6799SNeel Natu vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) 2356248e6799SNeel Natu { 2357248e6799SNeel Natu struct vcpu *vcpu = &vm->vcpu[vcpuid]; 2358248e6799SNeel Natu 2359248e6799SNeel Natu vcpu_lock(vcpu); 2360248e6799SNeel Natu vcpu_notify_event_locked(vcpu, lapic_intr); 2361f76fc5d4SNeel Natu vcpu_unlock(vcpu); 2362f76fc5d4SNeel Natu } 2363318224bbSNeel Natu 2364318224bbSNeel Natu struct vmspace * 2365318224bbSNeel Natu vm_get_vmspace(struct vm *vm) 2366318224bbSNeel Natu { 2367318224bbSNeel Natu 2368318224bbSNeel Natu return (vm->vmspace); 2369318224bbSNeel Natu } 2370565bbb86SNeel Natu 2371565bbb86SNeel Natu int 2372565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid) 2373565bbb86SNeel Natu { 2374565bbb86SNeel Natu /* 2375565bbb86SNeel Natu * XXX apic id is assumed to be numerically identical to vcpu id 2376565bbb86SNeel Natu */ 2377565bbb86SNeel Natu return (apicid); 2378565bbb86SNeel Natu } 23795b8a8cd1SNeel Natu 23805b8a8cd1SNeel Natu void 23815b8a8cd1SNeel Natu vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 23825b8a8cd1SNeel Natu vm_rendezvous_func_t func, void *arg) 23835b8a8cd1SNeel Natu { 2384970955e4SNeel Natu int i; 2385970955e4SNeel Natu 23865b8a8cd1SNeel Natu /* 23875b8a8cd1SNeel Natu * Enforce that this function is called without any locks 23885b8a8cd1SNeel Natu */ 23895b8a8cd1SNeel Natu WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 23905b8a8cd1SNeel Natu KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 23915b8a8cd1SNeel Natu ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); 23925b8a8cd1SNeel Natu 23935b8a8cd1SNeel Natu restart: 23945b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 23955b8a8cd1SNeel Natu if (vm->rendezvous_func != NULL) { 23965b8a8cd1SNeel Natu /* 23975b8a8cd1SNeel Natu * If a rendezvous is already in progress then we need to 23985b8a8cd1SNeel Natu * call the rendezvous handler in case this 'vcpuid' is one 23995b8a8cd1SNeel Natu * of the targets of the rendezvous. 24005b8a8cd1SNeel Natu */ 24015b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); 24025b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 24035b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 24045b8a8cd1SNeel Natu goto restart; 24055b8a8cd1SNeel Natu } 24065b8a8cd1SNeel Natu KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 24075b8a8cd1SNeel Natu "rendezvous is still in progress")); 24085b8a8cd1SNeel Natu 24095b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); 24105b8a8cd1SNeel Natu vm->rendezvous_req_cpus = dest; 24115b8a8cd1SNeel Natu CPU_ZERO(&vm->rendezvous_done_cpus); 24125b8a8cd1SNeel Natu vm->rendezvous_arg = arg; 24135b8a8cd1SNeel Natu vm_set_rendezvous_func(vm, func); 24145b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 24155b8a8cd1SNeel Natu 2416970955e4SNeel Natu /* 2417970955e4SNeel Natu * Wake up any sleeping vcpus and trigger a VM-exit in any running 2418970955e4SNeel Natu * vcpus so they handle the rendezvous as soon as possible. 2419970955e4SNeel Natu */ 2420970955e4SNeel Natu for (i = 0; i < VM_MAXCPU; i++) { 2421970955e4SNeel Natu if (CPU_ISSET(i, &dest)) 2422970955e4SNeel Natu vcpu_notify_event(vm, i, false); 2423970955e4SNeel Natu } 2424970955e4SNeel Natu 24255b8a8cd1SNeel Natu vm_handle_rendezvous(vm, vcpuid); 24265b8a8cd1SNeel Natu } 2427762fd208STycho Nightingale 2428762fd208STycho Nightingale struct vatpic * 2429762fd208STycho Nightingale vm_atpic(struct vm *vm) 2430762fd208STycho Nightingale { 2431762fd208STycho Nightingale return (vm->vatpic); 2432762fd208STycho Nightingale } 2433e883c9bbSTycho Nightingale 2434e883c9bbSTycho Nightingale struct vatpit * 2435e883c9bbSTycho Nightingale vm_atpit(struct vm *vm) 2436e883c9bbSTycho Nightingale { 2437e883c9bbSTycho Nightingale return (vm->vatpit); 2438e883c9bbSTycho Nightingale } 2439d17b5104SNeel Natu 2440160ef77aSNeel Natu struct vpmtmr * 2441160ef77aSNeel Natu vm_pmtmr(struct vm *vm) 2442160ef77aSNeel Natu { 2443160ef77aSNeel Natu 2444160ef77aSNeel Natu return (vm->vpmtmr); 2445160ef77aSNeel Natu } 2446160ef77aSNeel Natu 24470dafa5cdSNeel Natu struct vrtc * 24480dafa5cdSNeel Natu vm_rtc(struct vm *vm) 24490dafa5cdSNeel Natu { 24500dafa5cdSNeel Natu 24510dafa5cdSNeel Natu return (vm->vrtc); 24520dafa5cdSNeel Natu } 24530dafa5cdSNeel Natu 2454d17b5104SNeel Natu enum vm_reg_name 2455d17b5104SNeel Natu vm_segment_name(int seg) 2456d17b5104SNeel Natu { 2457d17b5104SNeel Natu static enum vm_reg_name seg_names[] = { 2458d17b5104SNeel Natu VM_REG_GUEST_ES, 2459d17b5104SNeel Natu VM_REG_GUEST_CS, 2460d17b5104SNeel Natu VM_REG_GUEST_SS, 2461d17b5104SNeel Natu VM_REG_GUEST_DS, 2462d17b5104SNeel Natu VM_REG_GUEST_FS, 2463d17b5104SNeel Natu VM_REG_GUEST_GS 2464d17b5104SNeel Natu }; 2465d17b5104SNeel Natu 2466d17b5104SNeel Natu KASSERT(seg >= 0 && seg < nitems(seg_names), 2467d17b5104SNeel Natu ("%s: invalid segment encoding %d", __func__, seg)); 2468d17b5104SNeel Natu return (seg_names[seg]); 2469d17b5104SNeel Natu } 2470cf1d80d8SPeter Grehan 2471d665d229SNeel Natu void 2472d665d229SNeel Natu vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, 2473d665d229SNeel Natu int num_copyinfo) 2474d665d229SNeel Natu { 2475d665d229SNeel Natu int idx; 2476d665d229SNeel Natu 2477d665d229SNeel Natu for (idx = 0; idx < num_copyinfo; idx++) { 2478d665d229SNeel Natu if (copyinfo[idx].cookie != NULL) 2479d665d229SNeel Natu vm_gpa_release(copyinfo[idx].cookie); 2480d665d229SNeel Natu } 2481d665d229SNeel Natu bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); 2482d665d229SNeel Natu } 2483d665d229SNeel Natu 2484d665d229SNeel Natu int 2485d665d229SNeel Natu vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 2486d665d229SNeel Natu uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 24879c4d5478SNeel Natu int num_copyinfo, int *fault) 2488d665d229SNeel Natu { 2489d665d229SNeel Natu int error, idx, nused; 2490d665d229SNeel Natu size_t n, off, remaining; 2491d665d229SNeel Natu void *hva, *cookie; 2492d665d229SNeel Natu uint64_t gpa; 2493d665d229SNeel Natu 2494d665d229SNeel Natu bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); 2495d665d229SNeel Natu 2496d665d229SNeel Natu nused = 0; 2497d665d229SNeel Natu remaining = len; 2498d665d229SNeel Natu while (remaining > 0) { 2499d665d229SNeel Natu KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); 25009c4d5478SNeel Natu error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault); 25019c4d5478SNeel Natu if (error || *fault) 2502d665d229SNeel Natu return (error); 2503d665d229SNeel Natu off = gpa & PAGE_MASK; 2504d665d229SNeel Natu n = min(remaining, PAGE_SIZE - off); 2505d665d229SNeel Natu copyinfo[nused].gpa = gpa; 2506d665d229SNeel Natu copyinfo[nused].len = n; 2507d665d229SNeel Natu remaining -= n; 2508d665d229SNeel Natu gla += n; 2509d665d229SNeel Natu nused++; 2510d665d229SNeel Natu } 2511d665d229SNeel Natu 2512d665d229SNeel Natu for (idx = 0; idx < nused; idx++) { 25139b1aa8d6SNeel Natu hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa, 25149b1aa8d6SNeel Natu copyinfo[idx].len, prot, &cookie); 2515d665d229SNeel Natu if (hva == NULL) 2516d665d229SNeel Natu break; 2517d665d229SNeel Natu copyinfo[idx].hva = hva; 2518d665d229SNeel Natu copyinfo[idx].cookie = cookie; 2519d665d229SNeel Natu } 2520d665d229SNeel Natu 2521d665d229SNeel Natu if (idx != nused) { 2522d665d229SNeel Natu vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); 25239c4d5478SNeel Natu return (EFAULT); 2524d665d229SNeel Natu } else { 25259c4d5478SNeel Natu *fault = 0; 2526d665d229SNeel Natu return (0); 2527d665d229SNeel Natu } 2528d665d229SNeel Natu } 2529d665d229SNeel Natu 2530d665d229SNeel Natu void 2531d665d229SNeel Natu vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, 2532d665d229SNeel Natu size_t len) 2533d665d229SNeel Natu { 2534d665d229SNeel Natu char *dst; 2535d665d229SNeel Natu int idx; 2536d665d229SNeel Natu 2537d665d229SNeel Natu dst = kaddr; 2538d665d229SNeel Natu idx = 0; 2539d665d229SNeel Natu while (len > 0) { 2540d665d229SNeel Natu bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); 2541d665d229SNeel Natu len -= copyinfo[idx].len; 2542d665d229SNeel Natu dst += copyinfo[idx].len; 2543d665d229SNeel Natu idx++; 2544d665d229SNeel Natu } 2545d665d229SNeel Natu } 2546d665d229SNeel Natu 2547d665d229SNeel Natu void 2548d665d229SNeel Natu vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, 2549d665d229SNeel Natu struct vm_copyinfo *copyinfo, size_t len) 2550d665d229SNeel Natu { 2551d665d229SNeel Natu const char *src; 2552d665d229SNeel Natu int idx; 2553d665d229SNeel Natu 2554d665d229SNeel Natu src = kaddr; 2555d665d229SNeel Natu idx = 0; 2556d665d229SNeel Natu while (len > 0) { 2557d665d229SNeel Natu bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); 2558d665d229SNeel Natu len -= copyinfo[idx].len; 2559d665d229SNeel Natu src += copyinfo[idx].len; 2560d665d229SNeel Natu idx++; 2561d665d229SNeel Natu } 2562d665d229SNeel Natu } 2563cf1d80d8SPeter Grehan 2564cf1d80d8SPeter Grehan /* 2565cf1d80d8SPeter Grehan * Return the amount of in-use and wired memory for the VM. Since 2566cf1d80d8SPeter Grehan * these are global stats, only return the values with for vCPU 0 2567cf1d80d8SPeter Grehan */ 2568cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_RESIDENT); 2569cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_WIRED); 2570cf1d80d8SPeter Grehan 2571cf1d80d8SPeter Grehan static void 2572cf1d80d8SPeter Grehan vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2573cf1d80d8SPeter Grehan { 2574cf1d80d8SPeter Grehan 2575cf1d80d8SPeter Grehan if (vcpu == 0) { 2576cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, 2577cf1d80d8SPeter Grehan PAGE_SIZE * vmspace_resident_count(vm->vmspace)); 2578cf1d80d8SPeter Grehan } 2579cf1d80d8SPeter Grehan } 2580cf1d80d8SPeter Grehan 2581cf1d80d8SPeter Grehan static void 2582cf1d80d8SPeter Grehan vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2583cf1d80d8SPeter Grehan { 2584cf1d80d8SPeter Grehan 2585cf1d80d8SPeter Grehan if (vcpu == 0) { 2586cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, 2587cf1d80d8SPeter Grehan PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); 2588cf1d80d8SPeter Grehan } 2589cf1d80d8SPeter Grehan } 2590cf1d80d8SPeter Grehan 2591cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); 2592cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); 2593