1366f6083SPeter Grehan /*- 2c49761ddSPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3c49761ddSPedro F. Giffuni * 4366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc. 5366f6083SPeter Grehan * All rights reserved. 6366f6083SPeter Grehan * 7366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without 8366f6083SPeter Grehan * modification, are permitted provided that the following conditions 9366f6083SPeter Grehan * are met: 10366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright 11366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer. 12366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 13366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the 14366f6083SPeter Grehan * documentation and/or other materials provided with the distribution. 15366f6083SPeter Grehan * 16366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26366f6083SPeter Grehan * SUCH DAMAGE. 27366f6083SPeter Grehan * 28366f6083SPeter Grehan * $FreeBSD$ 29366f6083SPeter Grehan */ 30366f6083SPeter Grehan 31366f6083SPeter Grehan #include <sys/cdefs.h> 32366f6083SPeter Grehan __FBSDID("$FreeBSD$"); 33366f6083SPeter Grehan 34483d953aSJohn Baldwin #include "opt_bhyve_snapshot.h" 35483d953aSJohn Baldwin 36366f6083SPeter Grehan #include <sys/param.h> 3738f1b189SPeter Grehan #include <sys/systm.h> 38366f6083SPeter Grehan #include <sys/kernel.h> 39366f6083SPeter Grehan #include <sys/module.h> 40366f6083SPeter Grehan #include <sys/sysctl.h> 41366f6083SPeter Grehan #include <sys/malloc.h> 42366f6083SPeter Grehan #include <sys/pcpu.h> 43366f6083SPeter Grehan #include <sys/lock.h> 44366f6083SPeter Grehan #include <sys/mutex.h> 45366f6083SPeter Grehan #include <sys/proc.h> 46318224bbSNeel Natu #include <sys/rwlock.h> 47366f6083SPeter Grehan #include <sys/sched.h> 48366f6083SPeter Grehan #include <sys/smp.h> 49483d953aSJohn Baldwin #include <sys/vnode.h> 50366f6083SPeter Grehan 51366f6083SPeter Grehan #include <vm/vm.h> 52*3c48106aSKonstantin Belousov #include <vm/vm_param.h> 53*3c48106aSKonstantin Belousov #include <vm/vm_extern.h> 54318224bbSNeel Natu #include <vm/vm_object.h> 55318224bbSNeel Natu #include <vm/vm_page.h> 56318224bbSNeel Natu #include <vm/pmap.h> 57318224bbSNeel Natu #include <vm/vm_map.h> 58483d953aSJohn Baldwin #include <vm/vm_pager.h> 59483d953aSJohn Baldwin #include <vm/vm_kern.h> 60483d953aSJohn Baldwin #include <vm/vnode_pager.h> 61483d953aSJohn Baldwin #include <vm/swap_pager.h> 62483d953aSJohn Baldwin #include <vm/uma.h> 63366f6083SPeter Grehan 6463e62d39SJohn Baldwin #include <machine/cpu.h> 65366f6083SPeter Grehan #include <machine/pcb.h> 6675dd3366SNeel Natu #include <machine/smp.h> 67bd50262fSKonstantin Belousov #include <machine/md_var.h> 681c052192SNeel Natu #include <x86/psl.h> 6934a6b2d6SJohn Baldwin #include <x86/apicreg.h> 7015add60dSPeter Grehan #include <x86/ifunc.h> 71366f6083SPeter Grehan 72366f6083SPeter Grehan #include <machine/vmm.h> 73565bbb86SNeel Natu #include <machine/vmm_dev.h> 74e813a873SNeel Natu #include <machine/vmm_instruction_emul.h> 75483d953aSJohn Baldwin #include <machine/vmm_snapshot.h> 76565bbb86SNeel Natu 77d17b5104SNeel Natu #include "vmm_ioport.h" 78318224bbSNeel Natu #include "vmm_ktr.h" 79b01c2033SNeel Natu #include "vmm_host.h" 80366f6083SPeter Grehan #include "vmm_mem.h" 81366f6083SPeter Grehan #include "vmm_util.h" 82762fd208STycho Nightingale #include "vatpic.h" 83e883c9bbSTycho Nightingale #include "vatpit.h" 8408e3ff32SNeel Natu #include "vhpet.h" 85565bbb86SNeel Natu #include "vioapic.h" 86366f6083SPeter Grehan #include "vlapic.h" 87160ef77aSNeel Natu #include "vpmtmr.h" 880dafa5cdSNeel Natu #include "vrtc.h" 89366f6083SPeter Grehan #include "vmm_stat.h" 90f76fc5d4SNeel Natu #include "vmm_lapic.h" 91366f6083SPeter Grehan 92366f6083SPeter Grehan #include "io/ppt.h" 93366f6083SPeter Grehan #include "io/iommu.h" 94366f6083SPeter Grehan 95366f6083SPeter Grehan struct vlapic; 96366f6083SPeter Grehan 975fcf252fSNeel Natu /* 985fcf252fSNeel Natu * Initialization: 995fcf252fSNeel Natu * (a) allocated when vcpu is created 1005fcf252fSNeel Natu * (i) initialized when vcpu is created and when it is reinitialized 1015fcf252fSNeel Natu * (o) initialized the first time the vcpu is created 1025fcf252fSNeel Natu * (x) initialized before use 1035fcf252fSNeel Natu */ 104366f6083SPeter Grehan struct vcpu { 1055fcf252fSNeel Natu struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ 1065fcf252fSNeel Natu enum vcpu_state state; /* (o) vcpu state */ 1075fcf252fSNeel Natu int hostcpu; /* (o) vcpu's host cpu */ 108248e6799SNeel Natu int reqidle; /* (i) request vcpu to idle */ 1095fcf252fSNeel Natu struct vlapic *vlapic; /* (i) APIC device model */ 1105fcf252fSNeel Natu enum x2apic_state x2apic_state; /* (i) APIC mode */ 111091d4532SNeel Natu uint64_t exitintinfo; /* (i) events pending at VM exit */ 1125fcf252fSNeel Natu int nmi_pending; /* (i) NMI pending */ 1135fcf252fSNeel Natu int extint_pending; /* (i) INTR pending */ 1145fcf252fSNeel Natu int exception_pending; /* (i) exception pending */ 115c9c75df4SNeel Natu int exc_vector; /* (x) exception collateral */ 116c9c75df4SNeel Natu int exc_errcode_valid; 117c9c75df4SNeel Natu uint32_t exc_errcode; 1185fcf252fSNeel Natu struct savefpu *guestfpu; /* (a,i) guest fpu state */ 1195fcf252fSNeel Natu uint64_t guest_xcr0; /* (i) guest %xcr0 register */ 1205fcf252fSNeel Natu void *stats; /* (a,i) statistics */ 1215fcf252fSNeel Natu struct vm_exit exitinfo; /* (x) exit reason and collateral */ 122d087a399SNeel Natu uint64_t nextrip; /* (x) next instruction to execute */ 123483d953aSJohn Baldwin uint64_t tsc_offset; /* (o) TSC offsetting */ 124366f6083SPeter Grehan }; 125366f6083SPeter Grehan 1265fcf252fSNeel Natu #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 127f76fc5d4SNeel Natu #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 128f76fc5d4SNeel Natu #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 129f76fc5d4SNeel Natu #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 130318224bbSNeel Natu #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 13175dd3366SNeel Natu 132318224bbSNeel Natu struct mem_seg { 1339b1aa8d6SNeel Natu size_t len; 1349b1aa8d6SNeel Natu bool sysmem; 1359b1aa8d6SNeel Natu struct vm_object *object; 1369b1aa8d6SNeel Natu }; 1376bcf245eSMarcel Moolenaar #define VM_MAX_MEMSEGS 3 1389b1aa8d6SNeel Natu 1399b1aa8d6SNeel Natu struct mem_map { 140318224bbSNeel Natu vm_paddr_t gpa; 141318224bbSNeel Natu size_t len; 1429b1aa8d6SNeel Natu vm_ooffset_t segoff; 1439b1aa8d6SNeel Natu int segid; 1449b1aa8d6SNeel Natu int prot; 1459b1aa8d6SNeel Natu int flags; 146318224bbSNeel Natu }; 14700d3723fSConrad Meyer #define VM_MAX_MEMMAPS 8 148366f6083SPeter Grehan 149366f6083SPeter Grehan /* 1505fcf252fSNeel Natu * Initialization: 1515fcf252fSNeel Natu * (o) initialized the first time the VM is created 1525fcf252fSNeel Natu * (i) initialized when VM is created and when it is reinitialized 1535fcf252fSNeel Natu * (x) initialized before use 154366f6083SPeter Grehan */ 1555fcf252fSNeel Natu struct vm { 1565fcf252fSNeel Natu void *cookie; /* (i) cpu-specific data */ 1575fcf252fSNeel Natu void *iommu; /* (x) iommu-specific data */ 1585fcf252fSNeel Natu struct vhpet *vhpet; /* (i) virtual HPET */ 1595fcf252fSNeel Natu struct vioapic *vioapic; /* (i) virtual ioapic */ 1605fcf252fSNeel Natu struct vatpic *vatpic; /* (i) virtual atpic */ 1615fcf252fSNeel Natu struct vatpit *vatpit; /* (i) virtual atpit */ 162160ef77aSNeel Natu struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ 1630dafa5cdSNeel Natu struct vrtc *vrtc; /* (o) virtual RTC */ 1645fcf252fSNeel Natu volatile cpuset_t active_cpus; /* (i) active vcpus */ 165fc276d92SJohn Baldwin volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 1665fcf252fSNeel Natu int suspend; /* (i) stop VM execution */ 1675fcf252fSNeel Natu volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 1685fcf252fSNeel Natu volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 1695fcf252fSNeel Natu cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ 1705fcf252fSNeel Natu cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ 1715fcf252fSNeel Natu void *rendezvous_arg; /* (x) rendezvous func/arg */ 1725b8a8cd1SNeel Natu vm_rendezvous_func_t rendezvous_func; 1735fcf252fSNeel Natu struct mtx rendezvous_mtx; /* (o) rendezvous lock */ 1749b1aa8d6SNeel Natu struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 1759b1aa8d6SNeel Natu struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 1765fcf252fSNeel Natu struct vmspace *vmspace; /* (o) guest's address space */ 1775fcf252fSNeel Natu char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 1785fcf252fSNeel Natu struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ 17901d822d3SRodney W. Grimes /* The following describe the vm cpu topology */ 18001d822d3SRodney W. Grimes uint16_t sockets; /* (o) num of sockets */ 18101d822d3SRodney W. Grimes uint16_t cores; /* (o) num of cores/socket */ 18201d822d3SRodney W. Grimes uint16_t threads; /* (o) num of threads/core */ 18301d822d3SRodney W. Grimes uint16_t maxcpus; /* (o) max pluggable cpus */ 184366f6083SPeter Grehan }; 185366f6083SPeter Grehan 186d5408b1dSNeel Natu static int vmm_initialized; 187d5408b1dSNeel Natu 18815add60dSPeter Grehan static void vmmops_panic(void); 189366f6083SPeter Grehan 19015add60dSPeter Grehan static void 19115add60dSPeter Grehan vmmops_panic(void) 19215add60dSPeter Grehan { 19315add60dSPeter Grehan panic("vmm_ops func called when !vmm_is_intel() && !vmm_is_svm()"); 19415add60dSPeter Grehan } 19515add60dSPeter Grehan 19615add60dSPeter Grehan #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ 19715add60dSPeter Grehan DEFINE_IFUNC(static, ret_type, vmmops_##opname, args) \ 19815add60dSPeter Grehan { \ 19915add60dSPeter Grehan if (vmm_is_intel()) \ 20015add60dSPeter Grehan return (vmm_ops_intel.opname); \ 20115add60dSPeter Grehan else if (vmm_is_svm()) \ 20215add60dSPeter Grehan return (vmm_ops_amd.opname); \ 20315add60dSPeter Grehan else \ 20415add60dSPeter Grehan return ((ret_type (*)args)vmmops_panic); \ 20515add60dSPeter Grehan } 20615add60dSPeter Grehan 20715add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum)) 20815add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) 20915add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, modresume, (void)) 21015add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) 21115add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, run, (void *vmi, int vcpu, register_t rip, 21215add60dSPeter Grehan struct pmap *pmap, struct vm_eventinfo *info)) 21315add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) 21415add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, getreg, (void *vmi, int vcpu, int num, 21515add60dSPeter Grehan uint64_t *retval)) 21615add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, setreg, (void *vmi, int vcpu, int num, 21715add60dSPeter Grehan uint64_t val)) 21815add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, getdesc, (void *vmi, int vcpu, int num, 21915add60dSPeter Grehan struct seg_desc *desc)) 22015add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, setdesc, (void *vmi, int vcpu, int num, 22115add60dSPeter Grehan struct seg_desc *desc)) 22215add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, getcap, (void *vmi, int vcpu, int num, int *retval)) 22315add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, setcap, (void *vmi, int vcpu, int num, int val)) 22415add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, 22515add60dSPeter Grehan vm_offset_t max)) 22615add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) 22715add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(struct vlapic *, vlapic_init, (void *vmi, int vcpu)) 22815add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, vlapic_cleanup, (void *vmi, struct vlapic *vlapic)) 229483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 23015add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta 23115add60dSPeter Grehan *meta)) 23215add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, vmcx_snapshot, (void *vmi, struct vm_snapshot_meta 23315add60dSPeter Grehan *meta, int vcpu)) 23415add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vmi, int vcpuid, uint64_t now)) 235483d953aSJohn Baldwin #endif 236366f6083SPeter Grehan 237014a52f3SNeel Natu #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 238014a52f3SNeel Natu #define fpu_stop_emulating() clts() 239366f6083SPeter Grehan 2406ac73777STycho Nightingale SDT_PROVIDER_DEFINE(vmm); 2416ac73777STycho Nightingale 242366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm"); 243366f6083SPeter Grehan 244366f6083SPeter Grehan /* statistics */ 24561592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 246366f6083SPeter Grehan 247b40598c5SPawel Biernacki SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 248b40598c5SPawel Biernacki NULL); 249add611fdSNeel Natu 250055fc2cbSNeel Natu /* 251055fc2cbSNeel Natu * Halt the guest if all vcpus are executing a HLT instruction with 252055fc2cbSNeel Natu * interrupts disabled. 253055fc2cbSNeel Natu */ 254055fc2cbSNeel Natu static int halt_detection_enabled = 1; 255055fc2cbSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 256055fc2cbSNeel Natu &halt_detection_enabled, 0, 257055fc2cbSNeel Natu "Halt VM if all vcpus execute HLT with interrupts disabled"); 258055fc2cbSNeel Natu 259978f3da1SAndriy Gapon static int vmm_ipinum; 260add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 261add611fdSNeel Natu "IPI vector used for vcpu notifications"); 262add611fdSNeel Natu 263b0538143SNeel Natu static int trace_guest_exceptions; 264b0538143SNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, 265b0538143SNeel Natu &trace_guest_exceptions, 0, 266b0538143SNeel Natu "Trap into hypervisor on all guest exceptions and reflect them back"); 267b0538143SNeel Natu 2689b1aa8d6SNeel Natu static void vm_free_memmap(struct vm *vm, int ident); 2699b1aa8d6SNeel Natu static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 270248e6799SNeel Natu static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); 271248e6799SNeel Natu 272248e6799SNeel Natu #ifdef KTR 273248e6799SNeel Natu static const char * 274248e6799SNeel Natu vcpu_state2str(enum vcpu_state state) 275248e6799SNeel Natu { 276248e6799SNeel Natu 277248e6799SNeel Natu switch (state) { 278248e6799SNeel Natu case VCPU_IDLE: 279248e6799SNeel Natu return ("idle"); 280248e6799SNeel Natu case VCPU_FROZEN: 281248e6799SNeel Natu return ("frozen"); 282248e6799SNeel Natu case VCPU_RUNNING: 283248e6799SNeel Natu return ("running"); 284248e6799SNeel Natu case VCPU_SLEEPING: 285248e6799SNeel Natu return ("sleeping"); 286248e6799SNeel Natu default: 287248e6799SNeel Natu return ("unknown"); 288248e6799SNeel Natu } 289248e6799SNeel Natu } 290248e6799SNeel Natu #endif 291248e6799SNeel Natu 292366f6083SPeter Grehan static void 2935fcf252fSNeel Natu vcpu_cleanup(struct vm *vm, int i, bool destroy) 294366f6083SPeter Grehan { 295de5ea6b6SNeel Natu struct vcpu *vcpu = &vm->vcpu[i]; 296de5ea6b6SNeel Natu 29715add60dSPeter Grehan vmmops_vlapic_cleanup(vm->cookie, vcpu->vlapic); 2985fcf252fSNeel Natu if (destroy) { 299366f6083SPeter Grehan vmm_stat_free(vcpu->stats); 30038f1b189SPeter Grehan fpu_save_area_free(vcpu->guestfpu); 301366f6083SPeter Grehan } 3025fcf252fSNeel Natu } 303366f6083SPeter Grehan 304366f6083SPeter Grehan static void 3055fcf252fSNeel Natu vcpu_init(struct vm *vm, int vcpu_id, bool create) 306366f6083SPeter Grehan { 307366f6083SPeter Grehan struct vcpu *vcpu; 308366f6083SPeter Grehan 309a488c9c9SRodney W. Grimes KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 3105fcf252fSNeel Natu ("vcpu_init: invalid vcpu %d", vcpu_id)); 3115fcf252fSNeel Natu 312366f6083SPeter Grehan vcpu = &vm->vcpu[vcpu_id]; 313366f6083SPeter Grehan 3145fcf252fSNeel Natu if (create) { 3155fcf252fSNeel Natu KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " 3165fcf252fSNeel Natu "initialized", vcpu_id)); 31775dd3366SNeel Natu vcpu_lock_init(vcpu); 3185fcf252fSNeel Natu vcpu->state = VCPU_IDLE; 31975dd3366SNeel Natu vcpu->hostcpu = NOCPU; 3205fcf252fSNeel Natu vcpu->guestfpu = fpu_save_area_alloc(); 3215fcf252fSNeel Natu vcpu->stats = vmm_stat_alloc(); 322483d953aSJohn Baldwin vcpu->tsc_offset = 0; 3235fcf252fSNeel Natu } 3245fcf252fSNeel Natu 32515add60dSPeter Grehan vcpu->vlapic = vmmops_vlapic_init(vm->cookie, vcpu_id); 32652e5c8a2SNeel Natu vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); 327248e6799SNeel Natu vcpu->reqidle = 0; 328091d4532SNeel Natu vcpu->exitintinfo = 0; 3295fcf252fSNeel Natu vcpu->nmi_pending = 0; 3305fcf252fSNeel Natu vcpu->extint_pending = 0; 3315fcf252fSNeel Natu vcpu->exception_pending = 0; 332abb023fbSJohn Baldwin vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 33338f1b189SPeter Grehan fpu_save_area_reset(vcpu->guestfpu); 3345fcf252fSNeel Natu vmm_stat_init(vcpu->stats); 335366f6083SPeter Grehan } 336366f6083SPeter Grehan 337b0538143SNeel Natu int 338b0538143SNeel Natu vcpu_trace_exceptions(struct vm *vm, int vcpuid) 339b0538143SNeel Natu { 340b0538143SNeel Natu 341b0538143SNeel Natu return (trace_guest_exceptions); 342b0538143SNeel Natu } 343b0538143SNeel Natu 34498ed632cSNeel Natu struct vm_exit * 34598ed632cSNeel Natu vm_exitinfo(struct vm *vm, int cpuid) 34698ed632cSNeel Natu { 34798ed632cSNeel Natu struct vcpu *vcpu; 34898ed632cSNeel Natu 349a488c9c9SRodney W. Grimes if (cpuid < 0 || cpuid >= vm->maxcpus) 35098ed632cSNeel Natu panic("vm_exitinfo: invalid cpuid %d", cpuid); 35198ed632cSNeel Natu 35298ed632cSNeel Natu vcpu = &vm->vcpu[cpuid]; 35398ed632cSNeel Natu 35498ed632cSNeel Natu return (&vcpu->exitinfo); 35598ed632cSNeel Natu } 35698ed632cSNeel Natu 357366f6083SPeter Grehan static int 358366f6083SPeter Grehan vmm_init(void) 359366f6083SPeter Grehan { 360366f6083SPeter Grehan int error; 361366f6083SPeter Grehan 36215add60dSPeter Grehan if (!vmm_is_hw_supported()) 36315add60dSPeter Grehan return (ENXIO); 36415add60dSPeter Grehan 365b01c2033SNeel Natu vmm_host_state_init(); 366add611fdSNeel Natu 367bd50262fSKonstantin Belousov vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) : 368bd50262fSKonstantin Belousov &IDTVEC(justreturn)); 36918a2b08eSNeel Natu if (vmm_ipinum < 0) 370add611fdSNeel Natu vmm_ipinum = IPI_AST; 371366f6083SPeter Grehan 372366f6083SPeter Grehan error = vmm_mem_init(); 373366f6083SPeter Grehan if (error) 374366f6083SPeter Grehan return (error); 375366f6083SPeter Grehan 37615add60dSPeter Grehan vmm_resume_p = vmmops_modresume; 377366f6083SPeter Grehan 37815add60dSPeter Grehan return (vmmops_modinit(vmm_ipinum)); 379366f6083SPeter Grehan } 380366f6083SPeter Grehan 381366f6083SPeter Grehan static int 382366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg) 383366f6083SPeter Grehan { 384366f6083SPeter Grehan int error; 385366f6083SPeter Grehan 386366f6083SPeter Grehan switch (what) { 387366f6083SPeter Grehan case MOD_LOAD: 38815add60dSPeter Grehan if (vmm_is_hw_supported()) { 389366f6083SPeter Grehan vmmdev_init(); 390366f6083SPeter Grehan error = vmm_init(); 391d5408b1dSNeel Natu if (error == 0) 392d5408b1dSNeel Natu vmm_initialized = 1; 39315add60dSPeter Grehan } else { 39415add60dSPeter Grehan error = ENXIO; 39515add60dSPeter Grehan } 396366f6083SPeter Grehan break; 397366f6083SPeter Grehan case MOD_UNLOAD: 39815add60dSPeter Grehan if (vmm_is_hw_supported()) { 399cdc5b9e7SNeel Natu error = vmmdev_cleanup(); 400cdc5b9e7SNeel Natu if (error == 0) { 40163e62d39SJohn Baldwin vmm_resume_p = NULL; 402366f6083SPeter Grehan iommu_cleanup(); 403add611fdSNeel Natu if (vmm_ipinum != IPI_AST) 40418a2b08eSNeel Natu lapic_ipi_free(vmm_ipinum); 40515add60dSPeter Grehan error = vmmops_modcleanup(); 40681ef6611SPeter Grehan /* 40781ef6611SPeter Grehan * Something bad happened - prevent new 40881ef6611SPeter Grehan * VMs from being created 40981ef6611SPeter Grehan */ 41081ef6611SPeter Grehan if (error) 411d5408b1dSNeel Natu vmm_initialized = 0; 41281ef6611SPeter Grehan } 41315add60dSPeter Grehan } else { 41415add60dSPeter Grehan error = 0; 41515add60dSPeter Grehan } 416366f6083SPeter Grehan break; 417366f6083SPeter Grehan default: 418366f6083SPeter Grehan error = 0; 419366f6083SPeter Grehan break; 420366f6083SPeter Grehan } 421366f6083SPeter Grehan return (error); 422366f6083SPeter Grehan } 423366f6083SPeter Grehan 424366f6083SPeter Grehan static moduledata_t vmm_kmod = { 425366f6083SPeter Grehan "vmm", 426366f6083SPeter Grehan vmm_handler, 427366f6083SPeter Grehan NULL 428366f6083SPeter Grehan }; 429366f6083SPeter Grehan 430366f6083SPeter Grehan /* 431e3f0800bSNeel Natu * vmm initialization has the following dependencies: 432e3f0800bSNeel Natu * 433e3f0800bSNeel Natu * - VT-x initialization requires smp_rendezvous() and therefore must happen 434e3f0800bSNeel Natu * after SMP is fully functional (after SI_SUB_SMP). 435366f6083SPeter Grehan */ 436e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 437366f6083SPeter Grehan MODULE_VERSION(vmm, 1); 438366f6083SPeter Grehan 4395fcf252fSNeel Natu static void 4405fcf252fSNeel Natu vm_init(struct vm *vm, bool create) 4415fcf252fSNeel Natu { 4425fcf252fSNeel Natu int i; 4435fcf252fSNeel Natu 44415add60dSPeter Grehan vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 4455fcf252fSNeel Natu vm->iommu = NULL; 4465fcf252fSNeel Natu vm->vioapic = vioapic_init(vm); 4475fcf252fSNeel Natu vm->vhpet = vhpet_init(vm); 4485fcf252fSNeel Natu vm->vatpic = vatpic_init(vm); 4495fcf252fSNeel Natu vm->vatpit = vatpit_init(vm); 450160ef77aSNeel Natu vm->vpmtmr = vpmtmr_init(vm); 4510dafa5cdSNeel Natu if (create) 4520dafa5cdSNeel Natu vm->vrtc = vrtc_init(vm); 4535fcf252fSNeel Natu 4545fcf252fSNeel Natu CPU_ZERO(&vm->active_cpus); 455fc276d92SJohn Baldwin CPU_ZERO(&vm->debug_cpus); 4565fcf252fSNeel Natu 4575fcf252fSNeel Natu vm->suspend = 0; 4585fcf252fSNeel Natu CPU_ZERO(&vm->suspended_cpus); 4595fcf252fSNeel Natu 460a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) 4615fcf252fSNeel Natu vcpu_init(vm, i, create); 4625fcf252fSNeel Natu } 4635fcf252fSNeel Natu 46401d822d3SRodney W. Grimes /* 46501d822d3SRodney W. Grimes * The default CPU topology is a single thread per package. 46601d822d3SRodney W. Grimes */ 46701d822d3SRodney W. Grimes u_int cores_per_package = 1; 46801d822d3SRodney W. Grimes u_int threads_per_core = 1; 46901d822d3SRodney W. Grimes 470d5408b1dSNeel Natu int 471d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm) 472366f6083SPeter Grehan { 473366f6083SPeter Grehan struct vm *vm; 474318224bbSNeel Natu struct vmspace *vmspace; 475366f6083SPeter Grehan 476d5408b1dSNeel Natu /* 477d5408b1dSNeel Natu * If vmm.ko could not be successfully initialized then don't attempt 478d5408b1dSNeel Natu * to create the virtual machine. 479d5408b1dSNeel Natu */ 480d5408b1dSNeel Natu if (!vmm_initialized) 481d5408b1dSNeel Natu return (ENXIO); 482d5408b1dSNeel Natu 483366f6083SPeter Grehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 484d5408b1dSNeel Natu return (EINVAL); 485366f6083SPeter Grehan 486*3c48106aSKonstantin Belousov vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48); 487318224bbSNeel Natu if (vmspace == NULL) 488318224bbSNeel Natu return (ENOMEM); 489318224bbSNeel Natu 490366f6083SPeter Grehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 491366f6083SPeter Grehan strcpy(vm->name, name); 49288c4b8d1SNeel Natu vm->vmspace = vmspace; 4935b8a8cd1SNeel Natu mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 494366f6083SPeter Grehan 49501d822d3SRodney W. Grimes vm->sockets = 1; 49601d822d3SRodney W. Grimes vm->cores = cores_per_package; /* XXX backwards compatibility */ 49701d822d3SRodney W. Grimes vm->threads = threads_per_core; /* XXX backwards compatibility */ 498a488c9c9SRodney W. Grimes vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ 49901d822d3SRodney W. Grimes 5005fcf252fSNeel Natu vm_init(vm, true); 501366f6083SPeter Grehan 502d5408b1dSNeel Natu *retvm = vm; 503d5408b1dSNeel Natu return (0); 504366f6083SPeter Grehan } 505366f6083SPeter Grehan 50601d822d3SRodney W. Grimes void 50701d822d3SRodney W. Grimes vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 50801d822d3SRodney W. Grimes uint16_t *threads, uint16_t *maxcpus) 50901d822d3SRodney W. Grimes { 51001d822d3SRodney W. Grimes *sockets = vm->sockets; 51101d822d3SRodney W. Grimes *cores = vm->cores; 51201d822d3SRodney W. Grimes *threads = vm->threads; 51301d822d3SRodney W. Grimes *maxcpus = vm->maxcpus; 51401d822d3SRodney W. Grimes } 51501d822d3SRodney W. Grimes 516a488c9c9SRodney W. Grimes uint16_t 517a488c9c9SRodney W. Grimes vm_get_maxcpus(struct vm *vm) 518a488c9c9SRodney W. Grimes { 519a488c9c9SRodney W. Grimes return (vm->maxcpus); 520a488c9c9SRodney W. Grimes } 521a488c9c9SRodney W. Grimes 52201d822d3SRodney W. Grimes int 52301d822d3SRodney W. Grimes vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 52401d822d3SRodney W. Grimes uint16_t threads, uint16_t maxcpus) 52501d822d3SRodney W. Grimes { 52601d822d3SRodney W. Grimes if (maxcpus != 0) 52701d822d3SRodney W. Grimes return (EINVAL); /* XXX remove when supported */ 528a488c9c9SRodney W. Grimes if ((sockets * cores * threads) > vm->maxcpus) 52901d822d3SRodney W. Grimes return (EINVAL); 53001d822d3SRodney W. Grimes /* XXX need to check sockets * cores * threads == vCPU, how? */ 53101d822d3SRodney W. Grimes vm->sockets = sockets; 53201d822d3SRodney W. Grimes vm->cores = cores; 53301d822d3SRodney W. Grimes vm->threads = threads; 534a488c9c9SRodney W. Grimes vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ 53501d822d3SRodney W. Grimes return(0); 53601d822d3SRodney W. Grimes } 53701d822d3SRodney W. Grimes 538f7d51510SNeel Natu static void 5395fcf252fSNeel Natu vm_cleanup(struct vm *vm, bool destroy) 540366f6083SPeter Grehan { 5419b1aa8d6SNeel Natu struct mem_map *mm; 542366f6083SPeter Grehan int i; 543366f6083SPeter Grehan 544366f6083SPeter Grehan ppt_unassign_all(vm); 545366f6083SPeter Grehan 546318224bbSNeel Natu if (vm->iommu != NULL) 547318224bbSNeel Natu iommu_destroy_domain(vm->iommu); 548318224bbSNeel Natu 5490dafa5cdSNeel Natu if (destroy) 5500dafa5cdSNeel Natu vrtc_cleanup(vm->vrtc); 5510dafa5cdSNeel Natu else 5520dafa5cdSNeel Natu vrtc_reset(vm->vrtc); 553160ef77aSNeel Natu vpmtmr_cleanup(vm->vpmtmr); 554e883c9bbSTycho Nightingale vatpit_cleanup(vm->vatpit); 55508e3ff32SNeel Natu vhpet_cleanup(vm->vhpet); 556762fd208STycho Nightingale vatpic_cleanup(vm->vatpic); 55708e3ff32SNeel Natu vioapic_cleanup(vm->vioapic); 55808e3ff32SNeel Natu 559a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) 5605fcf252fSNeel Natu vcpu_cleanup(vm, i, destroy); 5615fcf252fSNeel Natu 56215add60dSPeter Grehan vmmops_cleanup(vm->cookie); 5635fcf252fSNeel Natu 5649b1aa8d6SNeel Natu /* 5659b1aa8d6SNeel Natu * System memory is removed from the guest address space only when 5669b1aa8d6SNeel Natu * the VM is destroyed. This is because the mapping remains the same 5679b1aa8d6SNeel Natu * across VM reset. 5689b1aa8d6SNeel Natu * 5699b1aa8d6SNeel Natu * Device memory can be relocated by the guest (e.g. using PCI BARs) 5709b1aa8d6SNeel Natu * so those mappings are removed on a VM reset. 5719b1aa8d6SNeel Natu */ 5729b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 5739b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 5749b1aa8d6SNeel Natu if (destroy || !sysmem_mapping(vm, mm)) 5759b1aa8d6SNeel Natu vm_free_memmap(vm, i); 5769b1aa8d6SNeel Natu } 577f7d51510SNeel Natu 5789b1aa8d6SNeel Natu if (destroy) { 5799b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMSEGS; i++) 5809b1aa8d6SNeel Natu vm_free_memseg(vm, i); 581366f6083SPeter Grehan 58215add60dSPeter Grehan vmmops_vmspace_free(vm->vmspace); 5835fcf252fSNeel Natu vm->vmspace = NULL; 5845fcf252fSNeel Natu } 5855fcf252fSNeel Natu } 586366f6083SPeter Grehan 5875fcf252fSNeel Natu void 5885fcf252fSNeel Natu vm_destroy(struct vm *vm) 5895fcf252fSNeel Natu { 5905fcf252fSNeel Natu vm_cleanup(vm, true); 591366f6083SPeter Grehan free(vm, M_VM); 592366f6083SPeter Grehan } 593366f6083SPeter Grehan 5945fcf252fSNeel Natu int 5955fcf252fSNeel Natu vm_reinit(struct vm *vm) 5965fcf252fSNeel Natu { 5975fcf252fSNeel Natu int error; 5985fcf252fSNeel Natu 5995fcf252fSNeel Natu /* 6005fcf252fSNeel Natu * A virtual machine can be reset only if all vcpus are suspended. 6015fcf252fSNeel Natu */ 6025fcf252fSNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 6035fcf252fSNeel Natu vm_cleanup(vm, false); 6045fcf252fSNeel Natu vm_init(vm, false); 6055fcf252fSNeel Natu error = 0; 6065fcf252fSNeel Natu } else { 6075fcf252fSNeel Natu error = EBUSY; 6085fcf252fSNeel Natu } 6095fcf252fSNeel Natu 6105fcf252fSNeel Natu return (error); 6115fcf252fSNeel Natu } 6125fcf252fSNeel Natu 613366f6083SPeter Grehan const char * 614366f6083SPeter Grehan vm_name(struct vm *vm) 615366f6083SPeter Grehan { 616366f6083SPeter Grehan return (vm->name); 617366f6083SPeter Grehan } 618366f6083SPeter Grehan 619366f6083SPeter Grehan int 620366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 621366f6083SPeter Grehan { 622318224bbSNeel Natu vm_object_t obj; 623366f6083SPeter Grehan 624318224bbSNeel Natu if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 625318224bbSNeel Natu return (ENOMEM); 626318224bbSNeel Natu else 627318224bbSNeel Natu return (0); 628366f6083SPeter Grehan } 629366f6083SPeter Grehan 630366f6083SPeter Grehan int 631366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 632366f6083SPeter Grehan { 633366f6083SPeter Grehan 634318224bbSNeel Natu vmm_mmio_free(vm->vmspace, gpa, len); 635318224bbSNeel Natu return (0); 636366f6083SPeter Grehan } 637366f6083SPeter Grehan 6389b1aa8d6SNeel Natu /* 6399b1aa8d6SNeel Natu * Return 'true' if 'gpa' is allocated in the guest address space. 6409b1aa8d6SNeel Natu * 6419b1aa8d6SNeel Natu * This function is called in the context of a running vcpu which acts as 6429b1aa8d6SNeel Natu * an implicit lock on 'vm->mem_maps[]'. 6439b1aa8d6SNeel Natu */ 6449b1aa8d6SNeel Natu bool 6459b1aa8d6SNeel Natu vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa) 646366f6083SPeter Grehan { 6479b1aa8d6SNeel Natu struct mem_map *mm; 648341f19c9SNeel Natu int i; 649341f19c9SNeel Natu 6509b1aa8d6SNeel Natu #ifdef INVARIANTS 6519b1aa8d6SNeel Natu int hostcpu, state; 6529b1aa8d6SNeel Natu state = vcpu_get_state(vm, vcpuid, &hostcpu); 6539b1aa8d6SNeel Natu KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 6549b1aa8d6SNeel Natu ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 6559b1aa8d6SNeel Natu #endif 6569b1aa8d6SNeel Natu 6579b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 6589b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 6599b1aa8d6SNeel Natu if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 6609b1aa8d6SNeel Natu return (true); /* 'gpa' is sysmem or devmem */ 661341f19c9SNeel Natu } 662341f19c9SNeel Natu 663318224bbSNeel Natu if (ppt_is_mmio(vm, gpa)) 6649b1aa8d6SNeel Natu return (true); /* 'gpa' is pci passthru mmio */ 665318224bbSNeel Natu 6669b1aa8d6SNeel Natu return (false); 667341f19c9SNeel Natu } 668341f19c9SNeel Natu 669341f19c9SNeel Natu int 6709b1aa8d6SNeel Natu vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 671341f19c9SNeel Natu { 672318224bbSNeel Natu struct mem_seg *seg; 6739b1aa8d6SNeel Natu vm_object_t obj; 674366f6083SPeter Grehan 6759b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 676341f19c9SNeel Natu return (EINVAL); 677341f19c9SNeel Natu 6789b1aa8d6SNeel Natu if (len == 0 || (len & PAGE_MASK)) 6799b1aa8d6SNeel Natu return (EINVAL); 680341f19c9SNeel Natu 6819b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 6829b1aa8d6SNeel Natu if (seg->object != NULL) { 6839b1aa8d6SNeel Natu if (seg->len == len && seg->sysmem == sysmem) 6849b1aa8d6SNeel Natu return (EEXIST); 6859b1aa8d6SNeel Natu else 6869b1aa8d6SNeel Natu return (EINVAL); 687341f19c9SNeel Natu } 688341f19c9SNeel Natu 6899b1aa8d6SNeel Natu obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 6909b1aa8d6SNeel Natu if (obj == NULL) 691318224bbSNeel Natu return (ENOMEM); 692318224bbSNeel Natu 693318224bbSNeel Natu seg->len = len; 6949b1aa8d6SNeel Natu seg->object = obj; 6959b1aa8d6SNeel Natu seg->sysmem = sysmem; 696366f6083SPeter Grehan return (0); 697366f6083SPeter Grehan } 698366f6083SPeter Grehan 6999b1aa8d6SNeel Natu int 7009b1aa8d6SNeel Natu vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 7019b1aa8d6SNeel Natu vm_object_t *objptr) 702477867a0SNeel Natu { 7039b1aa8d6SNeel Natu struct mem_seg *seg; 704477867a0SNeel Natu 7059b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 7069b1aa8d6SNeel Natu return (EINVAL); 7079b1aa8d6SNeel Natu 7089b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 7099b1aa8d6SNeel Natu if (len) 7109b1aa8d6SNeel Natu *len = seg->len; 7119b1aa8d6SNeel Natu if (sysmem) 7129b1aa8d6SNeel Natu *sysmem = seg->sysmem; 7139b1aa8d6SNeel Natu if (objptr) 7149b1aa8d6SNeel Natu *objptr = seg->object; 7159b1aa8d6SNeel Natu return (0); 716477867a0SNeel Natu } 7179b1aa8d6SNeel Natu 7189b1aa8d6SNeel Natu void 7199b1aa8d6SNeel Natu vm_free_memseg(struct vm *vm, int ident) 7209b1aa8d6SNeel Natu { 7219b1aa8d6SNeel Natu struct mem_seg *seg; 7229b1aa8d6SNeel Natu 7239b1aa8d6SNeel Natu KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 7249b1aa8d6SNeel Natu ("%s: invalid memseg ident %d", __func__, ident)); 7259b1aa8d6SNeel Natu 7269b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 7279b1aa8d6SNeel Natu if (seg->object != NULL) { 7289b1aa8d6SNeel Natu vm_object_deallocate(seg->object); 7299b1aa8d6SNeel Natu bzero(seg, sizeof(struct mem_seg)); 7309b1aa8d6SNeel Natu } 7319b1aa8d6SNeel Natu } 7329b1aa8d6SNeel Natu 7339b1aa8d6SNeel Natu int 7349b1aa8d6SNeel Natu vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 7359b1aa8d6SNeel Natu size_t len, int prot, int flags) 7369b1aa8d6SNeel Natu { 7379b1aa8d6SNeel Natu struct mem_seg *seg; 7389b1aa8d6SNeel Natu struct mem_map *m, *map; 7399b1aa8d6SNeel Natu vm_ooffset_t last; 7409b1aa8d6SNeel Natu int i, error; 7419b1aa8d6SNeel Natu 7429b1aa8d6SNeel Natu if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 7439b1aa8d6SNeel Natu return (EINVAL); 7449b1aa8d6SNeel Natu 7459b1aa8d6SNeel Natu if (flags & ~VM_MEMMAP_F_WIRED) 7469b1aa8d6SNeel Natu return (EINVAL); 7479b1aa8d6SNeel Natu 7489b1aa8d6SNeel Natu if (segid < 0 || segid >= VM_MAX_MEMSEGS) 7499b1aa8d6SNeel Natu return (EINVAL); 7509b1aa8d6SNeel Natu 7519b1aa8d6SNeel Natu seg = &vm->mem_segs[segid]; 7529b1aa8d6SNeel Natu if (seg->object == NULL) 7539b1aa8d6SNeel Natu return (EINVAL); 7549b1aa8d6SNeel Natu 7559b1aa8d6SNeel Natu last = first + len; 7569b1aa8d6SNeel Natu if (first < 0 || first >= last || last > seg->len) 7579b1aa8d6SNeel Natu return (EINVAL); 7589b1aa8d6SNeel Natu 7599b1aa8d6SNeel Natu if ((gpa | first | last) & PAGE_MASK) 7609b1aa8d6SNeel Natu return (EINVAL); 7619b1aa8d6SNeel Natu 7629b1aa8d6SNeel Natu map = NULL; 7639b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 7649b1aa8d6SNeel Natu m = &vm->mem_maps[i]; 7659b1aa8d6SNeel Natu if (m->len == 0) { 7669b1aa8d6SNeel Natu map = m; 7679b1aa8d6SNeel Natu break; 7689b1aa8d6SNeel Natu } 7699b1aa8d6SNeel Natu } 7709b1aa8d6SNeel Natu 7719b1aa8d6SNeel Natu if (map == NULL) 7729b1aa8d6SNeel Natu return (ENOSPC); 7739b1aa8d6SNeel Natu 7749b1aa8d6SNeel Natu error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 7759b1aa8d6SNeel Natu len, 0, VMFS_NO_SPACE, prot, prot, 0); 7769b1aa8d6SNeel Natu if (error != KERN_SUCCESS) 7779b1aa8d6SNeel Natu return (EFAULT); 7789b1aa8d6SNeel Natu 7799b1aa8d6SNeel Natu vm_object_reference(seg->object); 7809b1aa8d6SNeel Natu 7819b1aa8d6SNeel Natu if (flags & VM_MEMMAP_F_WIRED) { 7829b1aa8d6SNeel Natu error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 7839b1aa8d6SNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 7849b1aa8d6SNeel Natu if (error != KERN_SUCCESS) { 7859b1aa8d6SNeel Natu vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 78654a3a114SMark Johnston return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 78754a3a114SMark Johnston EFAULT); 7889b1aa8d6SNeel Natu } 7899b1aa8d6SNeel Natu } 7909b1aa8d6SNeel Natu 7919b1aa8d6SNeel Natu map->gpa = gpa; 7929b1aa8d6SNeel Natu map->len = len; 7939b1aa8d6SNeel Natu map->segoff = first; 7949b1aa8d6SNeel Natu map->segid = segid; 7959b1aa8d6SNeel Natu map->prot = prot; 7969b1aa8d6SNeel Natu map->flags = flags; 7979b1aa8d6SNeel Natu return (0); 7989b1aa8d6SNeel Natu } 7999b1aa8d6SNeel Natu 8009b1aa8d6SNeel Natu int 8019b1aa8d6SNeel Natu vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 8029b1aa8d6SNeel Natu vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 8039b1aa8d6SNeel Natu { 8049b1aa8d6SNeel Natu struct mem_map *mm, *mmnext; 8059b1aa8d6SNeel Natu int i; 8069b1aa8d6SNeel Natu 8079b1aa8d6SNeel Natu mmnext = NULL; 8089b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 8099b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 8109b1aa8d6SNeel Natu if (mm->len == 0 || mm->gpa < *gpa) 8119b1aa8d6SNeel Natu continue; 8129b1aa8d6SNeel Natu if (mmnext == NULL || mm->gpa < mmnext->gpa) 8139b1aa8d6SNeel Natu mmnext = mm; 8149b1aa8d6SNeel Natu } 8159b1aa8d6SNeel Natu 8169b1aa8d6SNeel Natu if (mmnext != NULL) { 8179b1aa8d6SNeel Natu *gpa = mmnext->gpa; 8189b1aa8d6SNeel Natu if (segid) 8199b1aa8d6SNeel Natu *segid = mmnext->segid; 8209b1aa8d6SNeel Natu if (segoff) 8219b1aa8d6SNeel Natu *segoff = mmnext->segoff; 8229b1aa8d6SNeel Natu if (len) 8239b1aa8d6SNeel Natu *len = mmnext->len; 8249b1aa8d6SNeel Natu if (prot) 8259b1aa8d6SNeel Natu *prot = mmnext->prot; 8269b1aa8d6SNeel Natu if (flags) 8279b1aa8d6SNeel Natu *flags = mmnext->flags; 8289b1aa8d6SNeel Natu return (0); 8299b1aa8d6SNeel Natu } else { 8309b1aa8d6SNeel Natu return (ENOENT); 8319b1aa8d6SNeel Natu } 832477867a0SNeel Natu } 833477867a0SNeel Natu 834318224bbSNeel Natu static void 8359b1aa8d6SNeel Natu vm_free_memmap(struct vm *vm, int ident) 836366f6083SPeter Grehan { 8379b1aa8d6SNeel Natu struct mem_map *mm; 8389b1aa8d6SNeel Natu int error; 8394db4fb2cSNeel Natu 8409b1aa8d6SNeel Natu mm = &vm->mem_maps[ident]; 8419b1aa8d6SNeel Natu if (mm->len) { 8429b1aa8d6SNeel Natu error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 8439b1aa8d6SNeel Natu mm->gpa + mm->len); 8449b1aa8d6SNeel Natu KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 8459b1aa8d6SNeel Natu __func__, error)); 8469b1aa8d6SNeel Natu bzero(mm, sizeof(struct mem_map)); 847318224bbSNeel Natu } 848318224bbSNeel Natu } 849318224bbSNeel Natu 8509b1aa8d6SNeel Natu static __inline bool 8519b1aa8d6SNeel Natu sysmem_mapping(struct vm *vm, struct mem_map *mm) 852318224bbSNeel Natu { 853318224bbSNeel Natu 8549b1aa8d6SNeel Natu if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 8559b1aa8d6SNeel Natu return (true); 8569b1aa8d6SNeel Natu else 8579b1aa8d6SNeel Natu return (false); 858318224bbSNeel Natu } 859318224bbSNeel Natu 860147d12a7SAntoine Brodin vm_paddr_t 861147d12a7SAntoine Brodin vmm_sysmem_maxaddr(struct vm *vm) 8629b1aa8d6SNeel Natu { 8639b1aa8d6SNeel Natu struct mem_map *mm; 8649b1aa8d6SNeel Natu vm_paddr_t maxaddr; 8659b1aa8d6SNeel Natu int i; 866318224bbSNeel Natu 8679b1aa8d6SNeel Natu maxaddr = 0; 8689b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 8699b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 8709b1aa8d6SNeel Natu if (sysmem_mapping(vm, mm)) { 8719b1aa8d6SNeel Natu if (maxaddr < mm->gpa + mm->len) 8729b1aa8d6SNeel Natu maxaddr = mm->gpa + mm->len; 8739b1aa8d6SNeel Natu } 8749b1aa8d6SNeel Natu } 8759b1aa8d6SNeel Natu return (maxaddr); 876318224bbSNeel Natu } 877318224bbSNeel Natu 878318224bbSNeel Natu static void 879490d56c5SEd Maste vm_iommu_modify(struct vm *vm, bool map) 880318224bbSNeel Natu { 881318224bbSNeel Natu int i, sz; 882318224bbSNeel Natu vm_paddr_t gpa, hpa; 8839b1aa8d6SNeel Natu struct mem_map *mm; 884318224bbSNeel Natu void *vp, *cookie, *host_domain; 885318224bbSNeel Natu 886318224bbSNeel Natu sz = PAGE_SIZE; 887318224bbSNeel Natu host_domain = iommu_host_domain(); 888318224bbSNeel Natu 8899b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 8909b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 8919b1aa8d6SNeel Natu if (!sysmem_mapping(vm, mm)) 8929b1aa8d6SNeel Natu continue; 893318224bbSNeel Natu 8949b1aa8d6SNeel Natu if (map) { 8959b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, 8969b1aa8d6SNeel Natu ("iommu map found invalid memmap %#lx/%#lx/%#x", 8979b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 8989b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) 8999b1aa8d6SNeel Natu continue; 9009b1aa8d6SNeel Natu mm->flags |= VM_MEMMAP_F_IOMMU; 9019b1aa8d6SNeel Natu } else { 9029b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0) 9039b1aa8d6SNeel Natu continue; 9049b1aa8d6SNeel Natu mm->flags &= ~VM_MEMMAP_F_IOMMU; 9059b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, 9069b1aa8d6SNeel Natu ("iommu unmap found invalid memmap %#lx/%#lx/%#x", 9079b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 9089b1aa8d6SNeel Natu } 9099b1aa8d6SNeel Natu 9109b1aa8d6SNeel Natu gpa = mm->gpa; 9119b1aa8d6SNeel Natu while (gpa < mm->gpa + mm->len) { 9129b1aa8d6SNeel Natu vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE, 913318224bbSNeel Natu &cookie); 914318224bbSNeel Natu KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 915318224bbSNeel Natu vm_name(vm), gpa)); 916318224bbSNeel Natu 917318224bbSNeel Natu vm_gpa_release(cookie); 918318224bbSNeel Natu 919318224bbSNeel Natu hpa = DMAP_TO_PHYS((uintptr_t)vp); 920318224bbSNeel Natu if (map) { 921318224bbSNeel Natu iommu_create_mapping(vm->iommu, gpa, hpa, sz); 922318224bbSNeel Natu iommu_remove_mapping(host_domain, hpa, sz); 923318224bbSNeel Natu } else { 924318224bbSNeel Natu iommu_remove_mapping(vm->iommu, gpa, sz); 925318224bbSNeel Natu iommu_create_mapping(host_domain, hpa, hpa, sz); 926318224bbSNeel Natu } 927318224bbSNeel Natu 928318224bbSNeel Natu gpa += PAGE_SIZE; 929318224bbSNeel Natu } 930318224bbSNeel Natu } 931318224bbSNeel Natu 932318224bbSNeel Natu /* 933318224bbSNeel Natu * Invalidate the cached translations associated with the domain 934318224bbSNeel Natu * from which pages were removed. 935318224bbSNeel Natu */ 936318224bbSNeel Natu if (map) 937318224bbSNeel Natu iommu_invalidate_tlb(host_domain); 938318224bbSNeel Natu else 939318224bbSNeel Natu iommu_invalidate_tlb(vm->iommu); 940318224bbSNeel Natu } 941318224bbSNeel Natu 942490d56c5SEd Maste #define vm_iommu_unmap(vm) vm_iommu_modify((vm), false) 943490d56c5SEd Maste #define vm_iommu_map(vm) vm_iommu_modify((vm), true) 944318224bbSNeel Natu 945318224bbSNeel Natu int 946318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 947318224bbSNeel Natu { 948318224bbSNeel Natu int error; 949318224bbSNeel Natu 950318224bbSNeel Natu error = ppt_unassign_device(vm, bus, slot, func); 951318224bbSNeel Natu if (error) 952318224bbSNeel Natu return (error); 953318224bbSNeel Natu 9549b1aa8d6SNeel Natu if (ppt_assigned_devices(vm) == 0) 955318224bbSNeel Natu vm_iommu_unmap(vm); 9569b1aa8d6SNeel Natu 957318224bbSNeel Natu return (0); 958318224bbSNeel Natu } 959318224bbSNeel Natu 960318224bbSNeel Natu int 961318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 962318224bbSNeel Natu { 963318224bbSNeel Natu int error; 964318224bbSNeel Natu vm_paddr_t maxaddr; 965318224bbSNeel Natu 9669b1aa8d6SNeel Natu /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */ 96751f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 968318224bbSNeel Natu KASSERT(vm->iommu == NULL, 969318224bbSNeel Natu ("vm_assign_pptdev: iommu must be NULL")); 970147d12a7SAntoine Brodin maxaddr = vmm_sysmem_maxaddr(vm); 971318224bbSNeel Natu vm->iommu = iommu_create_domain(maxaddr); 972ffe1b10dSJohn Baldwin if (vm->iommu == NULL) 973ffe1b10dSJohn Baldwin return (ENXIO); 974318224bbSNeel Natu vm_iommu_map(vm); 975318224bbSNeel Natu } 976318224bbSNeel Natu 977318224bbSNeel Natu error = ppt_assign_device(vm, bus, slot, func); 978318224bbSNeel Natu return (error); 979318224bbSNeel Natu } 980318224bbSNeel Natu 981318224bbSNeel Natu void * 9829b1aa8d6SNeel Natu vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot, 983318224bbSNeel Natu void **cookie) 984318224bbSNeel Natu { 9859b1aa8d6SNeel Natu int i, count, pageoff; 9869b1aa8d6SNeel Natu struct mem_map *mm; 987318224bbSNeel Natu vm_page_t m; 9889b1aa8d6SNeel Natu #ifdef INVARIANTS 9899b1aa8d6SNeel Natu /* 9909b1aa8d6SNeel Natu * All vcpus are frozen by ioctls that modify the memory map 9919b1aa8d6SNeel Natu * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is 9929b1aa8d6SNeel Natu * guaranteed if at least one vcpu is in the VCPU_FROZEN state. 9939b1aa8d6SNeel Natu */ 9949b1aa8d6SNeel Natu int state; 995a488c9c9SRodney W. Grimes KASSERT(vcpuid >= -1 && vcpuid < vm->maxcpus, ("%s: invalid vcpuid %d", 9969b1aa8d6SNeel Natu __func__, vcpuid)); 997a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 9989b1aa8d6SNeel Natu if (vcpuid != -1 && vcpuid != i) 9999b1aa8d6SNeel Natu continue; 10009b1aa8d6SNeel Natu state = vcpu_get_state(vm, i, NULL); 10019b1aa8d6SNeel Natu KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 10029b1aa8d6SNeel Natu __func__, state)); 10039b1aa8d6SNeel Natu } 10049b1aa8d6SNeel Natu #endif 1005318224bbSNeel Natu pageoff = gpa & PAGE_MASK; 1006318224bbSNeel Natu if (len > PAGE_SIZE - pageoff) 1007318224bbSNeel Natu panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1008318224bbSNeel Natu 10099b1aa8d6SNeel Natu count = 0; 10109b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 10119b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 101246567b4fSPeter Grehan if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) { 1013318224bbSNeel Natu count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1014318224bbSNeel Natu trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 10159b1aa8d6SNeel Natu break; 10169b1aa8d6SNeel Natu } 10179b1aa8d6SNeel Natu } 1018318224bbSNeel Natu 1019318224bbSNeel Natu if (count == 1) { 1020318224bbSNeel Natu *cookie = m; 1021318224bbSNeel Natu return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1022318224bbSNeel Natu } else { 1023318224bbSNeel Natu *cookie = NULL; 1024318224bbSNeel Natu return (NULL); 1025318224bbSNeel Natu } 1026318224bbSNeel Natu } 1027318224bbSNeel Natu 1028318224bbSNeel Natu void 1029318224bbSNeel Natu vm_gpa_release(void *cookie) 1030318224bbSNeel Natu { 1031318224bbSNeel Natu vm_page_t m = cookie; 1032318224bbSNeel Natu 1033eeacb3b0SMark Johnston vm_page_unwire(m, PQ_ACTIVE); 1034366f6083SPeter Grehan } 1035366f6083SPeter Grehan 1036366f6083SPeter Grehan int 1037366f6083SPeter Grehan vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 1038366f6083SPeter Grehan { 1039366f6083SPeter Grehan 1040a488c9c9SRodney W. Grimes if (vcpu < 0 || vcpu >= vm->maxcpus) 1041366f6083SPeter Grehan return (EINVAL); 1042366f6083SPeter Grehan 1043366f6083SPeter Grehan if (reg >= VM_REG_LAST) 1044366f6083SPeter Grehan return (EINVAL); 1045366f6083SPeter Grehan 104615add60dSPeter Grehan return (vmmops_getreg(vm->cookie, vcpu, reg, retval)); 1047366f6083SPeter Grehan } 1048366f6083SPeter Grehan 1049366f6083SPeter Grehan int 1050d087a399SNeel Natu vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) 1051366f6083SPeter Grehan { 1052d087a399SNeel Natu struct vcpu *vcpu; 1053d087a399SNeel Natu int error; 1054366f6083SPeter Grehan 1055a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 1056366f6083SPeter Grehan return (EINVAL); 1057366f6083SPeter Grehan 1058366f6083SPeter Grehan if (reg >= VM_REG_LAST) 1059366f6083SPeter Grehan return (EINVAL); 1060366f6083SPeter Grehan 106115add60dSPeter Grehan error = vmmops_setreg(vm->cookie, vcpuid, reg, val); 1062d087a399SNeel Natu if (error || reg != VM_REG_GUEST_RIP) 1063d087a399SNeel Natu return (error); 1064d087a399SNeel Natu 1065d087a399SNeel Natu /* Set 'nextrip' to match the value of %rip */ 1066d087a399SNeel Natu VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); 1067d087a399SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1068d087a399SNeel Natu vcpu->nextrip = val; 1069d087a399SNeel Natu return (0); 1070366f6083SPeter Grehan } 1071366f6083SPeter Grehan 1072490d56c5SEd Maste static bool 1073366f6083SPeter Grehan is_descriptor_table(int reg) 1074366f6083SPeter Grehan { 1075366f6083SPeter Grehan 1076366f6083SPeter Grehan switch (reg) { 1077366f6083SPeter Grehan case VM_REG_GUEST_IDTR: 1078366f6083SPeter Grehan case VM_REG_GUEST_GDTR: 1079490d56c5SEd Maste return (true); 1080366f6083SPeter Grehan default: 1081490d56c5SEd Maste return (false); 1082366f6083SPeter Grehan } 1083366f6083SPeter Grehan } 1084366f6083SPeter Grehan 1085490d56c5SEd Maste static bool 1086366f6083SPeter Grehan is_segment_register(int reg) 1087366f6083SPeter Grehan { 1088366f6083SPeter Grehan 1089366f6083SPeter Grehan switch (reg) { 1090366f6083SPeter Grehan case VM_REG_GUEST_ES: 1091366f6083SPeter Grehan case VM_REG_GUEST_CS: 1092366f6083SPeter Grehan case VM_REG_GUEST_SS: 1093366f6083SPeter Grehan case VM_REG_GUEST_DS: 1094366f6083SPeter Grehan case VM_REG_GUEST_FS: 1095366f6083SPeter Grehan case VM_REG_GUEST_GS: 1096366f6083SPeter Grehan case VM_REG_GUEST_TR: 1097366f6083SPeter Grehan case VM_REG_GUEST_LDTR: 1098490d56c5SEd Maste return (true); 1099366f6083SPeter Grehan default: 1100490d56c5SEd Maste return (false); 1101366f6083SPeter Grehan } 1102366f6083SPeter Grehan } 1103366f6083SPeter Grehan 1104366f6083SPeter Grehan int 1105366f6083SPeter Grehan vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 1106366f6083SPeter Grehan struct seg_desc *desc) 1107366f6083SPeter Grehan { 1108366f6083SPeter Grehan 1109a488c9c9SRodney W. Grimes if (vcpu < 0 || vcpu >= vm->maxcpus) 1110366f6083SPeter Grehan return (EINVAL); 1111366f6083SPeter Grehan 1112366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1113366f6083SPeter Grehan return (EINVAL); 1114366f6083SPeter Grehan 111515add60dSPeter Grehan return (vmmops_getdesc(vm->cookie, vcpu, reg, desc)); 1116366f6083SPeter Grehan } 1117366f6083SPeter Grehan 1118366f6083SPeter Grehan int 1119366f6083SPeter Grehan vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 1120366f6083SPeter Grehan struct seg_desc *desc) 1121366f6083SPeter Grehan { 1122a488c9c9SRodney W. Grimes if (vcpu < 0 || vcpu >= vm->maxcpus) 1123366f6083SPeter Grehan return (EINVAL); 1124366f6083SPeter Grehan 1125366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1126366f6083SPeter Grehan return (EINVAL); 1127366f6083SPeter Grehan 112815add60dSPeter Grehan return (vmmops_setdesc(vm->cookie, vcpu, reg, desc)); 1129366f6083SPeter Grehan } 1130366f6083SPeter Grehan 1131366f6083SPeter Grehan static void 1132366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu) 1133366f6083SPeter Grehan { 1134366f6083SPeter Grehan 113538f1b189SPeter Grehan /* flush host state to the pcb */ 113638f1b189SPeter Grehan fpuexit(curthread); 1137bd8572e0SNeel Natu 1138bd8572e0SNeel Natu /* restore guest FPU state */ 1139366f6083SPeter Grehan fpu_stop_emulating(); 114038f1b189SPeter Grehan fpurestore(vcpu->guestfpu); 1141bd8572e0SNeel Natu 1142abb023fbSJohn Baldwin /* restore guest XCR0 if XSAVE is enabled in the host */ 1143abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) 1144abb023fbSJohn Baldwin load_xcr(0, vcpu->guest_xcr0); 1145abb023fbSJohn Baldwin 1146bd8572e0SNeel Natu /* 1147bd8572e0SNeel Natu * The FPU is now "dirty" with the guest's state so turn on emulation 1148bd8572e0SNeel Natu * to trap any access to the FPU by the host. 1149bd8572e0SNeel Natu */ 1150bd8572e0SNeel Natu fpu_start_emulating(); 1151366f6083SPeter Grehan } 1152366f6083SPeter Grehan 1153366f6083SPeter Grehan static void 1154366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu) 1155366f6083SPeter Grehan { 1156366f6083SPeter Grehan 1157bd8572e0SNeel Natu if ((rcr0() & CR0_TS) == 0) 1158bd8572e0SNeel Natu panic("fpu emulation not enabled in host!"); 1159bd8572e0SNeel Natu 1160abb023fbSJohn Baldwin /* save guest XCR0 and restore host XCR0 */ 1161abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) { 1162abb023fbSJohn Baldwin vcpu->guest_xcr0 = rxcr(0); 1163abb023fbSJohn Baldwin load_xcr(0, vmm_get_host_xcr0()); 1164abb023fbSJohn Baldwin } 1165abb023fbSJohn Baldwin 1166bd8572e0SNeel Natu /* save guest FPU state */ 1167bd8572e0SNeel Natu fpu_stop_emulating(); 116838f1b189SPeter Grehan fpusave(vcpu->guestfpu); 1169366f6083SPeter Grehan fpu_start_emulating(); 1170366f6083SPeter Grehan } 1171366f6083SPeter Grehan 117261592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 1173f76fc5d4SNeel Natu 1174318224bbSNeel Natu static int 1175248e6799SNeel Natu vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, 1176f80330a8SNeel Natu bool from_idle) 1177366f6083SPeter Grehan { 1178248e6799SNeel Natu struct vcpu *vcpu; 1179318224bbSNeel Natu int error; 1180366f6083SPeter Grehan 1181248e6799SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1182318224bbSNeel Natu vcpu_assert_locked(vcpu); 1183366f6083SPeter Grehan 1184f76fc5d4SNeel Natu /* 1185f80330a8SNeel Natu * State transitions from the vmmdev_ioctl() must always begin from 1186f80330a8SNeel Natu * the VCPU_IDLE state. This guarantees that there is only a single 1187f80330a8SNeel Natu * ioctl() operating on a vcpu at any point. 1188f80330a8SNeel Natu */ 1189f80330a8SNeel Natu if (from_idle) { 1190248e6799SNeel Natu while (vcpu->state != VCPU_IDLE) { 1191248e6799SNeel Natu vcpu->reqidle = 1; 1192248e6799SNeel Natu vcpu_notify_event_locked(vcpu, false); 1193248e6799SNeel Natu VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to " 1194248e6799SNeel Natu "idle requested", vcpu_state2str(vcpu->state)); 1195f80330a8SNeel Natu msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1196248e6799SNeel Natu } 1197f80330a8SNeel Natu } else { 1198f80330a8SNeel Natu KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1199f80330a8SNeel Natu "vcpu idle state")); 1200f80330a8SNeel Natu } 1201f80330a8SNeel Natu 1202ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 1203ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1204ef39d7e9SNeel Natu "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1205ef39d7e9SNeel Natu } else { 1206ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1207ef39d7e9SNeel Natu "vcpu that is not running", vcpu->hostcpu)); 1208ef39d7e9SNeel Natu } 1209ef39d7e9SNeel Natu 1210f80330a8SNeel Natu /* 1211318224bbSNeel Natu * The following state transitions are allowed: 1212318224bbSNeel Natu * IDLE -> FROZEN -> IDLE 1213318224bbSNeel Natu * FROZEN -> RUNNING -> FROZEN 1214318224bbSNeel Natu * FROZEN -> SLEEPING -> FROZEN 1215f76fc5d4SNeel Natu */ 1216318224bbSNeel Natu switch (vcpu->state) { 1217318224bbSNeel Natu case VCPU_IDLE: 1218318224bbSNeel Natu case VCPU_RUNNING: 1219318224bbSNeel Natu case VCPU_SLEEPING: 1220318224bbSNeel Natu error = (newstate != VCPU_FROZEN); 1221318224bbSNeel Natu break; 1222318224bbSNeel Natu case VCPU_FROZEN: 1223318224bbSNeel Natu error = (newstate == VCPU_FROZEN); 1224318224bbSNeel Natu break; 1225318224bbSNeel Natu default: 1226318224bbSNeel Natu error = 1; 1227318224bbSNeel Natu break; 1228318224bbSNeel Natu } 1229318224bbSNeel Natu 1230f80330a8SNeel Natu if (error) 1231f80330a8SNeel Natu return (EBUSY); 1232318224bbSNeel Natu 1233248e6799SNeel Natu VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s", 1234248e6799SNeel Natu vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); 1235248e6799SNeel Natu 1236f80330a8SNeel Natu vcpu->state = newstate; 1237ef39d7e9SNeel Natu if (newstate == VCPU_RUNNING) 1238ef39d7e9SNeel Natu vcpu->hostcpu = curcpu; 1239ef39d7e9SNeel Natu else 1240ef39d7e9SNeel Natu vcpu->hostcpu = NOCPU; 1241ef39d7e9SNeel Natu 1242f80330a8SNeel Natu if (newstate == VCPU_IDLE) 1243f80330a8SNeel Natu wakeup(&vcpu->state); 1244f80330a8SNeel Natu 1245f80330a8SNeel Natu return (0); 1246318224bbSNeel Natu } 1247318224bbSNeel Natu 1248318224bbSNeel Natu static void 1249318224bbSNeel Natu vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1250318224bbSNeel Natu { 1251318224bbSNeel Natu int error; 1252318224bbSNeel Natu 1253f80330a8SNeel Natu if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) 1254318224bbSNeel Natu panic("Error %d setting state to %d\n", error, newstate); 1255318224bbSNeel Natu } 1256318224bbSNeel Natu 1257318224bbSNeel Natu static void 1258248e6799SNeel Natu vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1259318224bbSNeel Natu { 1260318224bbSNeel Natu int error; 1261318224bbSNeel Natu 1262248e6799SNeel Natu if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0) 1263318224bbSNeel Natu panic("Error %d setting state to %d", error, newstate); 1264318224bbSNeel Natu } 1265318224bbSNeel Natu 12665b8a8cd1SNeel Natu #define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ 12675b8a8cd1SNeel Natu do { \ 12685b8a8cd1SNeel Natu if (vcpuid >= 0) \ 12695b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, fmt); \ 12705b8a8cd1SNeel Natu else \ 12715b8a8cd1SNeel Natu VM_CTR0(vm, fmt); \ 12725b8a8cd1SNeel Natu } while (0) 12735b8a8cd1SNeel Natu 1274b837daddSKonstantin Belousov static int 12755b8a8cd1SNeel Natu vm_handle_rendezvous(struct vm *vm, int vcpuid) 12765b8a8cd1SNeel Natu { 1277b837daddSKonstantin Belousov struct thread *td; 1278b837daddSKonstantin Belousov int error; 12795b8a8cd1SNeel Natu 1280a488c9c9SRodney W. Grimes KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus), 12815b8a8cd1SNeel Natu ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); 12825b8a8cd1SNeel Natu 1283b837daddSKonstantin Belousov error = 0; 1284b837daddSKonstantin Belousov td = curthread; 12855b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 12865b8a8cd1SNeel Natu while (vm->rendezvous_func != NULL) { 128722d822c6SNeel Natu /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 128822d822c6SNeel Natu CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); 128922d822c6SNeel Natu 12905b8a8cd1SNeel Natu if (vcpuid != -1 && 129122d822c6SNeel Natu CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 129222d822c6SNeel Natu !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 12935b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); 12945b8a8cd1SNeel Natu (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); 12955b8a8cd1SNeel Natu CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 12965b8a8cd1SNeel Natu } 12975b8a8cd1SNeel Natu if (CPU_CMP(&vm->rendezvous_req_cpus, 12985b8a8cd1SNeel Natu &vm->rendezvous_done_cpus) == 0) { 12995b8a8cd1SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); 1300869dbab7SAndriy Gapon vm->rendezvous_func = NULL; 13015b8a8cd1SNeel Natu wakeup(&vm->rendezvous_func); 13025b8a8cd1SNeel Natu break; 13035b8a8cd1SNeel Natu } 13045b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); 13055b8a8cd1SNeel Natu mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 1306b837daddSKonstantin Belousov "vmrndv", hz); 1307b837daddSKonstantin Belousov if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { 1308b837daddSKonstantin Belousov mtx_unlock(&vm->rendezvous_mtx); 1309b837daddSKonstantin Belousov error = thread_check_susp(td, true); 1310b837daddSKonstantin Belousov if (error != 0) 1311b837daddSKonstantin Belousov return (error); 1312b837daddSKonstantin Belousov mtx_lock(&vm->rendezvous_mtx); 1313b837daddSKonstantin Belousov } 13145b8a8cd1SNeel Natu } 13155b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 1316b837daddSKonstantin Belousov return (0); 13175b8a8cd1SNeel Natu } 13185b8a8cd1SNeel Natu 1319318224bbSNeel Natu /* 1320318224bbSNeel Natu * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1321318224bbSNeel Natu */ 1322318224bbSNeel Natu static int 1323becd9849SNeel Natu vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) 1324318224bbSNeel Natu { 1325318224bbSNeel Natu struct vcpu *vcpu; 1326c6a0cc2eSNeel Natu const char *wmesg; 1327b837daddSKonstantin Belousov struct thread *td; 1328b837daddSKonstantin Belousov int error, t, vcpu_halted, vm_halted; 1329e50ce2aaSNeel Natu 1330e50ce2aaSNeel Natu KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 1331318224bbSNeel Natu 1332318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1333e50ce2aaSNeel Natu vcpu_halted = 0; 1334e50ce2aaSNeel Natu vm_halted = 0; 1335b837daddSKonstantin Belousov error = 0; 1336b837daddSKonstantin Belousov td = curthread; 1337318224bbSNeel Natu 1338f76fc5d4SNeel Natu vcpu_lock(vcpu); 1339c6a0cc2eSNeel Natu while (1) { 1340f76fc5d4SNeel Natu /* 1341f76fc5d4SNeel Natu * Do a final check for pending NMI or interrupts before 1342c6a0cc2eSNeel Natu * really putting this thread to sleep. Also check for 1343c6a0cc2eSNeel Natu * software events that would cause this vcpu to wakeup. 1344f76fc5d4SNeel Natu * 1345c6a0cc2eSNeel Natu * These interrupts/events could have happened after the 134615add60dSPeter Grehan * vcpu returned from vmmops_run() and before it acquired the 1347c6a0cc2eSNeel Natu * vcpu lock above. 1348f76fc5d4SNeel Natu */ 1349248e6799SNeel Natu if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) 1350c6a0cc2eSNeel Natu break; 1351c6a0cc2eSNeel Natu if (vm_nmi_pending(vm, vcpuid)) 1352c6a0cc2eSNeel Natu break; 1353c6a0cc2eSNeel Natu if (!intr_disabled) { 1354c6a0cc2eSNeel Natu if (vm_extint_pending(vm, vcpuid) || 1355c6a0cc2eSNeel Natu vlapic_pending_intr(vcpu->vlapic, NULL)) { 1356c6a0cc2eSNeel Natu break; 1357c6a0cc2eSNeel Natu } 1358c6a0cc2eSNeel Natu } 1359c6a0cc2eSNeel Natu 1360f008d157SNeel Natu /* Don't go to sleep if the vcpu thread needs to yield */ 1361f008d157SNeel Natu if (vcpu_should_yield(vm, vcpuid)) 1362f008d157SNeel Natu break; 1363f008d157SNeel Natu 1364fc276d92SJohn Baldwin if (vcpu_debugged(vm, vcpuid)) 1365fc276d92SJohn Baldwin break; 1366fc276d92SJohn Baldwin 1367e50ce2aaSNeel Natu /* 1368e50ce2aaSNeel Natu * Some Linux guests implement "halt" by having all vcpus 1369e50ce2aaSNeel Natu * execute HLT with interrupts disabled. 'halted_cpus' keeps 1370e50ce2aaSNeel Natu * track of the vcpus that have entered this state. When all 1371e50ce2aaSNeel Natu * vcpus enter the halted state the virtual machine is halted. 1372e50ce2aaSNeel Natu */ 1373e50ce2aaSNeel Natu if (intr_disabled) { 1374c6a0cc2eSNeel Natu wmesg = "vmhalt"; 1375e50ce2aaSNeel Natu VCPU_CTR0(vm, vcpuid, "Halted"); 1376055fc2cbSNeel Natu if (!vcpu_halted && halt_detection_enabled) { 1377e50ce2aaSNeel Natu vcpu_halted = 1; 1378e50ce2aaSNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1379e50ce2aaSNeel Natu } 1380e50ce2aaSNeel Natu if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1381e50ce2aaSNeel Natu vm_halted = 1; 1382e50ce2aaSNeel Natu break; 1383e50ce2aaSNeel Natu } 1384e50ce2aaSNeel Natu } else { 1385e50ce2aaSNeel Natu wmesg = "vmidle"; 1386e50ce2aaSNeel Natu } 1387c6a0cc2eSNeel Natu 1388f76fc5d4SNeel Natu t = ticks; 1389248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); 1390f008d157SNeel Natu /* 1391f008d157SNeel Natu * XXX msleep_spin() cannot be interrupted by signals so 1392f008d157SNeel Natu * wake up periodically to check pending signals. 1393f008d157SNeel Natu */ 1394f008d157SNeel Natu msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); 1395248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); 1396f76fc5d4SNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 1397b837daddSKonstantin Belousov if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { 1398b837daddSKonstantin Belousov vcpu_unlock(vcpu); 1399b837daddSKonstantin Belousov error = thread_check_susp(td, false); 1400b837daddSKonstantin Belousov if (error != 0) 1401b837daddSKonstantin Belousov return (error); 1402b837daddSKonstantin Belousov vcpu_lock(vcpu); 1403b837daddSKonstantin Belousov } 1404f76fc5d4SNeel Natu } 1405e50ce2aaSNeel Natu 1406e50ce2aaSNeel Natu if (vcpu_halted) 1407e50ce2aaSNeel Natu CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1408e50ce2aaSNeel Natu 1409f76fc5d4SNeel Natu vcpu_unlock(vcpu); 1410f76fc5d4SNeel Natu 1411e50ce2aaSNeel Natu if (vm_halted) 1412e50ce2aaSNeel Natu vm_suspend(vm, VM_SUSPEND_HALT); 1413e50ce2aaSNeel Natu 1414318224bbSNeel Natu return (0); 1415318224bbSNeel Natu } 1416318224bbSNeel Natu 1417318224bbSNeel Natu static int 1418becd9849SNeel Natu vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) 1419318224bbSNeel Natu { 1420318224bbSNeel Natu int rv, ftype; 1421318224bbSNeel Natu struct vm_map *map; 1422318224bbSNeel Natu struct vcpu *vcpu; 1423318224bbSNeel Natu struct vm_exit *vme; 1424318224bbSNeel Natu 1425318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1426318224bbSNeel Natu vme = &vcpu->exitinfo; 1427318224bbSNeel Natu 1428d087a399SNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 1429d087a399SNeel Natu __func__, vme->inst_length)); 1430d087a399SNeel Natu 1431318224bbSNeel Natu ftype = vme->u.paging.fault_type; 1432318224bbSNeel Natu KASSERT(ftype == VM_PROT_READ || 1433318224bbSNeel Natu ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1434318224bbSNeel Natu ("vm_handle_paging: invalid fault_type %d", ftype)); 1435318224bbSNeel Natu 1436318224bbSNeel Natu if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1437318224bbSNeel Natu rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1438318224bbSNeel Natu vme->u.paging.gpa, ftype); 14399d8d8e3eSNeel Natu if (rv == 0) { 14409d8d8e3eSNeel Natu VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", 14419d8d8e3eSNeel Natu ftype == VM_PROT_READ ? "accessed" : "dirty", 14429d8d8e3eSNeel Natu vme->u.paging.gpa); 1443318224bbSNeel Natu goto done; 1444318224bbSNeel Natu } 14459d8d8e3eSNeel Natu } 1446318224bbSNeel Natu 1447318224bbSNeel Natu map = &vm->vmspace->vm_map; 1448df08823dSKonstantin Belousov rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1449318224bbSNeel Natu 1450513c8d33SNeel Natu VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 1451513c8d33SNeel Natu "ftype = %d", rv, vme->u.paging.gpa, ftype); 1452318224bbSNeel Natu 1453318224bbSNeel Natu if (rv != KERN_SUCCESS) 1454318224bbSNeel Natu return (EFAULT); 1455318224bbSNeel Natu done: 1456318224bbSNeel Natu return (0); 1457318224bbSNeel Natu } 1458318224bbSNeel Natu 1459318224bbSNeel Natu static int 1460becd9849SNeel Natu vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) 1461318224bbSNeel Natu { 1462318224bbSNeel Natu struct vie *vie; 1463318224bbSNeel Natu struct vcpu *vcpu; 1464318224bbSNeel Natu struct vm_exit *vme; 1465e4f605eeSTycho Nightingale uint64_t gla, gpa, cs_base; 1466e813a873SNeel Natu struct vm_guest_paging *paging; 1467565bbb86SNeel Natu mem_region_read_t mread; 1468565bbb86SNeel Natu mem_region_write_t mwrite; 1469f7a9f178SNeel Natu enum vm_cpu_mode cpu_mode; 14701c73ea3eSNeel Natu int cs_d, error, fault; 1471318224bbSNeel Natu 1472318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1473318224bbSNeel Natu vme = &vcpu->exitinfo; 1474318224bbSNeel Natu 14751c73ea3eSNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 14761c73ea3eSNeel Natu __func__, vme->inst_length)); 14771c73ea3eSNeel Natu 1478318224bbSNeel Natu gla = vme->u.inst_emul.gla; 1479318224bbSNeel Natu gpa = vme->u.inst_emul.gpa; 1480e4f605eeSTycho Nightingale cs_base = vme->u.inst_emul.cs_base; 1481f7a9f178SNeel Natu cs_d = vme->u.inst_emul.cs_d; 1482318224bbSNeel Natu vie = &vme->u.inst_emul.vie; 1483e813a873SNeel Natu paging = &vme->u.inst_emul.paging; 1484f7a9f178SNeel Natu cpu_mode = paging->cpu_mode; 1485318224bbSNeel Natu 14869d8d8e3eSNeel Natu VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa); 14879d8d8e3eSNeel Natu 1488318224bbSNeel Natu /* Fetch, decode and emulate the faulting instruction */ 1489c2a875f9SNeel Natu if (vie->num_valid == 0) { 1490e4f605eeSTycho Nightingale error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip + 14911c73ea3eSNeel Natu cs_base, VIE_INST_SIZE, vie, &fault); 1492c2a875f9SNeel Natu } else { 1493c2a875f9SNeel Natu /* 1494c2a875f9SNeel Natu * The instruction bytes have already been copied into 'vie' 1495c2a875f9SNeel Natu */ 14969c4d5478SNeel Natu error = fault = 0; 1497c2a875f9SNeel Natu } 14989c4d5478SNeel Natu if (error || fault) 14999c4d5478SNeel Natu return (error); 1500318224bbSNeel Natu 1501c07a0648SNeel Natu if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) { 1502c07a0648SNeel Natu VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", 1503c07a0648SNeel Natu vme->rip + cs_base); 1504c07a0648SNeel Natu *retu = true; /* dump instruction bytes in userspace */ 1505c07a0648SNeel Natu return (0); 1506c07a0648SNeel Natu } 1507318224bbSNeel Natu 1508a0b78f09SPeter Grehan /* 15091c73ea3eSNeel Natu * Update 'nextrip' based on the length of the emulated instruction. 1510a0b78f09SPeter Grehan */ 1511a0b78f09SPeter Grehan vme->inst_length = vie->num_processed; 1512d087a399SNeel Natu vcpu->nextrip += vie->num_processed; 15131c73ea3eSNeel Natu VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction " 15141c73ea3eSNeel Natu "decoding", vcpu->nextrip); 1515a0b78f09SPeter Grehan 151608e3ff32SNeel Natu /* return to userland unless this is an in-kernel emulated device */ 1517565bbb86SNeel Natu if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1518565bbb86SNeel Natu mread = lapic_mmio_read; 1519565bbb86SNeel Natu mwrite = lapic_mmio_write; 1520565bbb86SNeel Natu } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1521565bbb86SNeel Natu mread = vioapic_mmio_read; 1522565bbb86SNeel Natu mwrite = vioapic_mmio_write; 152308e3ff32SNeel Natu } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 152408e3ff32SNeel Natu mread = vhpet_mmio_read; 152508e3ff32SNeel Natu mwrite = vhpet_mmio_write; 1526565bbb86SNeel Natu } else { 1527becd9849SNeel Natu *retu = true; 1528318224bbSNeel Natu return (0); 1529318224bbSNeel Natu } 1530318224bbSNeel Natu 1531d665d229SNeel Natu error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, 1532d665d229SNeel Natu mread, mwrite, retu); 1533318224bbSNeel Natu 1534318224bbSNeel Natu return (error); 1535318224bbSNeel Natu } 1536318224bbSNeel Natu 1537b15a09c0SNeel Natu static int 1538b15a09c0SNeel Natu vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) 1539b15a09c0SNeel Natu { 1540b837daddSKonstantin Belousov int error, i; 1541b15a09c0SNeel Natu struct vcpu *vcpu; 1542b837daddSKonstantin Belousov struct thread *td; 1543b15a09c0SNeel Natu 1544b837daddSKonstantin Belousov error = 0; 1545b15a09c0SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1546b837daddSKonstantin Belousov td = curthread; 1547b15a09c0SNeel Natu 1548b15a09c0SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); 1549b15a09c0SNeel Natu 1550b15a09c0SNeel Natu /* 1551b15a09c0SNeel Natu * Wait until all 'active_cpus' have suspended themselves. 1552b15a09c0SNeel Natu * 1553b15a09c0SNeel Natu * Since a VM may be suspended at any time including when one or 1554b15a09c0SNeel Natu * more vcpus are doing a rendezvous we need to call the rendezvous 1555b15a09c0SNeel Natu * handler while we are waiting to prevent a deadlock. 1556b15a09c0SNeel Natu */ 1557b15a09c0SNeel Natu vcpu_lock(vcpu); 1558b837daddSKonstantin Belousov while (error == 0) { 1559b15a09c0SNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 1560b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); 1561b15a09c0SNeel Natu break; 1562b15a09c0SNeel Natu } 1563b15a09c0SNeel Natu 1564b15a09c0SNeel Natu if (vm->rendezvous_func == NULL) { 1565b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); 1566248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); 1567b15a09c0SNeel Natu msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1568248e6799SNeel Natu vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); 1569b837daddSKonstantin Belousov if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { 1570b837daddSKonstantin Belousov vcpu_unlock(vcpu); 1571b837daddSKonstantin Belousov error = thread_check_susp(td, false); 1572b837daddSKonstantin Belousov vcpu_lock(vcpu); 1573b837daddSKonstantin Belousov } 1574b15a09c0SNeel Natu } else { 1575b15a09c0SNeel Natu VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); 1576b15a09c0SNeel Natu vcpu_unlock(vcpu); 1577b837daddSKonstantin Belousov error = vm_handle_rendezvous(vm, vcpuid); 1578b15a09c0SNeel Natu vcpu_lock(vcpu); 1579b15a09c0SNeel Natu } 1580b15a09c0SNeel Natu } 1581b15a09c0SNeel Natu vcpu_unlock(vcpu); 1582b15a09c0SNeel Natu 1583b15a09c0SNeel Natu /* 1584b15a09c0SNeel Natu * Wakeup the other sleeping vcpus and return to userspace. 1585b15a09c0SNeel Natu */ 1586a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 1587b15a09c0SNeel Natu if (CPU_ISSET(i, &vm->suspended_cpus)) { 1588b15a09c0SNeel Natu vcpu_notify_event(vm, i, false); 1589b15a09c0SNeel Natu } 1590b15a09c0SNeel Natu } 1591b15a09c0SNeel Natu 1592b15a09c0SNeel Natu *retu = true; 1593b837daddSKonstantin Belousov return (error); 1594b15a09c0SNeel Natu } 1595b15a09c0SNeel Natu 1596248e6799SNeel Natu static int 1597248e6799SNeel Natu vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu) 1598248e6799SNeel Natu { 1599248e6799SNeel Natu struct vcpu *vcpu = &vm->vcpu[vcpuid]; 1600248e6799SNeel Natu 1601248e6799SNeel Natu vcpu_lock(vcpu); 1602248e6799SNeel Natu KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); 1603248e6799SNeel Natu vcpu->reqidle = 0; 1604248e6799SNeel Natu vcpu_unlock(vcpu); 1605248e6799SNeel Natu *retu = true; 1606248e6799SNeel Natu return (0); 1607248e6799SNeel Natu } 1608248e6799SNeel Natu 1609b15a09c0SNeel Natu int 1610f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how) 1611b15a09c0SNeel Natu { 1612f0fdcfe2SNeel Natu int i; 1613b15a09c0SNeel Natu 1614f0fdcfe2SNeel Natu if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1615f0fdcfe2SNeel Natu return (EINVAL); 1616f0fdcfe2SNeel Natu 1617f0fdcfe2SNeel Natu if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1618f0fdcfe2SNeel Natu VM_CTR2(vm, "virtual machine already suspended %d/%d", 1619f0fdcfe2SNeel Natu vm->suspend, how); 1620b15a09c0SNeel Natu return (EALREADY); 1621b15a09c0SNeel Natu } 1622f0fdcfe2SNeel Natu 1623f0fdcfe2SNeel Natu VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1624f0fdcfe2SNeel Natu 1625f0fdcfe2SNeel Natu /* 1626f0fdcfe2SNeel Natu * Notify all active vcpus that they are now suspended. 1627f0fdcfe2SNeel Natu */ 1628a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 1629f0fdcfe2SNeel Natu if (CPU_ISSET(i, &vm->active_cpus)) 1630f0fdcfe2SNeel Natu vcpu_notify_event(vm, i, false); 1631f0fdcfe2SNeel Natu } 1632f0fdcfe2SNeel Natu 1633f0fdcfe2SNeel Natu return (0); 1634f0fdcfe2SNeel Natu } 1635f0fdcfe2SNeel Natu 1636f0fdcfe2SNeel Natu void 1637f0fdcfe2SNeel Natu vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) 1638f0fdcfe2SNeel Natu { 1639f0fdcfe2SNeel Natu struct vm_exit *vmexit; 1640f0fdcfe2SNeel Natu 1641f0fdcfe2SNeel Natu KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1642f0fdcfe2SNeel Natu ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1643f0fdcfe2SNeel Natu 1644f0fdcfe2SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1645f0fdcfe2SNeel Natu vmexit->rip = rip; 1646f0fdcfe2SNeel Natu vmexit->inst_length = 0; 1647f0fdcfe2SNeel Natu vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1648f0fdcfe2SNeel Natu vmexit->u.suspended.how = vm->suspend; 1649b15a09c0SNeel Natu } 1650b15a09c0SNeel Natu 165140487465SNeel Natu void 1652fc276d92SJohn Baldwin vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip) 1653fc276d92SJohn Baldwin { 1654fc276d92SJohn Baldwin struct vm_exit *vmexit; 1655fc276d92SJohn Baldwin 1656fc276d92SJohn Baldwin vmexit = vm_exitinfo(vm, vcpuid); 1657fc276d92SJohn Baldwin vmexit->rip = rip; 1658fc276d92SJohn Baldwin vmexit->inst_length = 0; 1659fc276d92SJohn Baldwin vmexit->exitcode = VM_EXITCODE_DEBUG; 1660fc276d92SJohn Baldwin } 1661fc276d92SJohn Baldwin 1662fc276d92SJohn Baldwin void 166340487465SNeel Natu vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) 166440487465SNeel Natu { 166540487465SNeel Natu struct vm_exit *vmexit; 166640487465SNeel Natu 166740487465SNeel Natu KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress")); 166840487465SNeel Natu 166940487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 167040487465SNeel Natu vmexit->rip = rip; 167140487465SNeel Natu vmexit->inst_length = 0; 167240487465SNeel Natu vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 167340487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1); 167440487465SNeel Natu } 167540487465SNeel Natu 167640487465SNeel Natu void 1677248e6799SNeel Natu vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip) 1678248e6799SNeel Natu { 1679248e6799SNeel Natu struct vm_exit *vmexit; 1680248e6799SNeel Natu 1681248e6799SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 1682248e6799SNeel Natu vmexit->rip = rip; 1683248e6799SNeel Natu vmexit->inst_length = 0; 1684248e6799SNeel Natu vmexit->exitcode = VM_EXITCODE_REQIDLE; 1685248e6799SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1); 1686248e6799SNeel Natu } 1687248e6799SNeel Natu 1688248e6799SNeel Natu void 168940487465SNeel Natu vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) 169040487465SNeel Natu { 169140487465SNeel Natu struct vm_exit *vmexit; 169240487465SNeel Natu 169340487465SNeel Natu vmexit = vm_exitinfo(vm, vcpuid); 169440487465SNeel Natu vmexit->rip = rip; 169540487465SNeel Natu vmexit->inst_length = 0; 169640487465SNeel Natu vmexit->exitcode = VM_EXITCODE_BOGUS; 169740487465SNeel Natu vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1); 169840487465SNeel Natu } 169940487465SNeel Natu 1700318224bbSNeel Natu int 1701318224bbSNeel Natu vm_run(struct vm *vm, struct vm_run *vmrun) 1702318224bbSNeel Natu { 1703248e6799SNeel Natu struct vm_eventinfo evinfo; 1704318224bbSNeel Natu int error, vcpuid; 1705318224bbSNeel Natu struct vcpu *vcpu; 1706318224bbSNeel Natu struct pcb *pcb; 1707d087a399SNeel Natu uint64_t tscval; 1708318224bbSNeel Natu struct vm_exit *vme; 1709becd9849SNeel Natu bool retu, intr_disabled; 1710318224bbSNeel Natu pmap_t pmap; 1711318224bbSNeel Natu 1712318224bbSNeel Natu vcpuid = vmrun->cpuid; 1713318224bbSNeel Natu 1714a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 1715318224bbSNeel Natu return (EINVAL); 1716318224bbSNeel Natu 171795ebc360SNeel Natu if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 171895ebc360SNeel Natu return (EINVAL); 171995ebc360SNeel Natu 172095ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 172195ebc360SNeel Natu return (EINVAL); 172295ebc360SNeel Natu 1723318224bbSNeel Natu pmap = vmspace_pmap(vm->vmspace); 1724318224bbSNeel Natu vcpu = &vm->vcpu[vcpuid]; 1725318224bbSNeel Natu vme = &vcpu->exitinfo; 1726248e6799SNeel Natu evinfo.rptr = &vm->rendezvous_func; 1727248e6799SNeel Natu evinfo.sptr = &vm->suspend; 1728248e6799SNeel Natu evinfo.iptr = &vcpu->reqidle; 1729318224bbSNeel Natu restart: 1730318224bbSNeel Natu critical_enter(); 1731318224bbSNeel Natu 1732318224bbSNeel Natu KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1733318224bbSNeel Natu ("vm_run: absurd pm_active")); 1734318224bbSNeel Natu 1735318224bbSNeel Natu tscval = rdtsc(); 1736318224bbSNeel Natu 1737318224bbSNeel Natu pcb = PCPU_GET(curpcb); 1738318224bbSNeel Natu set_pcb_flags(pcb, PCB_FULL_IRET); 1739318224bbSNeel Natu 1740318224bbSNeel Natu restore_guest_fpustate(vcpu); 1741318224bbSNeel Natu 1742318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 174315add60dSPeter Grehan error = vmmops_run(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo); 1744318224bbSNeel Natu vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1745318224bbSNeel Natu 1746318224bbSNeel Natu save_guest_fpustate(vcpu); 1747318224bbSNeel Natu 1748318224bbSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1749318224bbSNeel Natu 1750318224bbSNeel Natu critical_exit(); 1751318224bbSNeel Natu 1752318224bbSNeel Natu if (error == 0) { 1753becd9849SNeel Natu retu = false; 1754d087a399SNeel Natu vcpu->nextrip = vme->rip + vme->inst_length; 1755318224bbSNeel Natu switch (vme->exitcode) { 1756248e6799SNeel Natu case VM_EXITCODE_REQIDLE: 1757248e6799SNeel Natu error = vm_handle_reqidle(vm, vcpuid, &retu); 1758248e6799SNeel Natu break; 1759b15a09c0SNeel Natu case VM_EXITCODE_SUSPENDED: 1760b15a09c0SNeel Natu error = vm_handle_suspend(vm, vcpuid, &retu); 1761b15a09c0SNeel Natu break; 176230b94db8SNeel Natu case VM_EXITCODE_IOAPIC_EOI: 176330b94db8SNeel Natu vioapic_process_eoi(vm, vcpuid, 176430b94db8SNeel Natu vme->u.ioapic_eoi.vector); 176530b94db8SNeel Natu break; 17665b8a8cd1SNeel Natu case VM_EXITCODE_RENDEZVOUS: 1767b837daddSKonstantin Belousov error = vm_handle_rendezvous(vm, vcpuid); 17685b8a8cd1SNeel Natu break; 1769318224bbSNeel Natu case VM_EXITCODE_HLT: 1770becd9849SNeel Natu intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 17711c052192SNeel Natu error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); 1772318224bbSNeel Natu break; 1773318224bbSNeel Natu case VM_EXITCODE_PAGING: 1774318224bbSNeel Natu error = vm_handle_paging(vm, vcpuid, &retu); 1775318224bbSNeel Natu break; 1776318224bbSNeel Natu case VM_EXITCODE_INST_EMUL: 1777318224bbSNeel Natu error = vm_handle_inst_emul(vm, vcpuid, &retu); 1778318224bbSNeel Natu break; 1779d17b5104SNeel Natu case VM_EXITCODE_INOUT: 1780d17b5104SNeel Natu case VM_EXITCODE_INOUT_STR: 1781d17b5104SNeel Natu error = vm_handle_inout(vm, vcpuid, vme, &retu); 1782d17b5104SNeel Natu break; 178365145c7fSNeel Natu case VM_EXITCODE_MONITOR: 178465145c7fSNeel Natu case VM_EXITCODE_MWAIT: 178527d26457SAndrew Turner case VM_EXITCODE_VMINSN: 178665145c7fSNeel Natu vm_inject_ud(vm, vcpuid); 178765145c7fSNeel Natu break; 1788318224bbSNeel Natu default: 1789becd9849SNeel Natu retu = true; /* handled in userland */ 1790318224bbSNeel Natu break; 1791318224bbSNeel Natu } 1792318224bbSNeel Natu } 1793318224bbSNeel Natu 1794d087a399SNeel Natu if (error == 0 && retu == false) 1795f76fc5d4SNeel Natu goto restart; 1796f76fc5d4SNeel Natu 1797248e6799SNeel Natu VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode); 1798248e6799SNeel Natu 1799318224bbSNeel Natu /* copy the exit information */ 1800318224bbSNeel Natu bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1801366f6083SPeter Grehan return (error); 1802366f6083SPeter Grehan } 1803366f6083SPeter Grehan 1804366f6083SPeter Grehan int 1805c9c75df4SNeel Natu vm_restart_instruction(void *arg, int vcpuid) 1806c9c75df4SNeel Natu { 1807d087a399SNeel Natu struct vm *vm; 1808c9c75df4SNeel Natu struct vcpu *vcpu; 1809d087a399SNeel Natu enum vcpu_state state; 1810d087a399SNeel Natu uint64_t rip; 1811d087a399SNeel Natu int error; 1812c9c75df4SNeel Natu 1813d087a399SNeel Natu vm = arg; 1814a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 1815c9c75df4SNeel Natu return (EINVAL); 1816c9c75df4SNeel Natu 1817c9c75df4SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1818d087a399SNeel Natu state = vcpu_get_state(vm, vcpuid, NULL); 1819d087a399SNeel Natu if (state == VCPU_RUNNING) { 1820d087a399SNeel Natu /* 1821d087a399SNeel Natu * When a vcpu is "running" the next instruction is determined 1822d087a399SNeel Natu * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. 1823d087a399SNeel Natu * Thus setting 'inst_length' to zero will cause the current 1824d087a399SNeel Natu * instruction to be restarted. 1825d087a399SNeel Natu */ 1826c9c75df4SNeel Natu vcpu->exitinfo.inst_length = 0; 1827d087a399SNeel Natu VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " 1828d087a399SNeel Natu "setting inst_length to zero", vcpu->exitinfo.rip); 1829d087a399SNeel Natu } else if (state == VCPU_FROZEN) { 1830d087a399SNeel Natu /* 1831d087a399SNeel Natu * When a vcpu is "frozen" it is outside the critical section 183215add60dSPeter Grehan * around vmmops_run() and 'nextrip' points to the next 183315add60dSPeter Grehan * instruction. Thus instruction restart is achieved by setting 183415add60dSPeter Grehan * 'nextrip' to the vcpu's %rip. 1835d087a399SNeel Natu */ 1836d087a399SNeel Natu error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); 1837d087a399SNeel Natu KASSERT(!error, ("%s: error %d getting rip", __func__, error)); 1838d087a399SNeel Natu VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " 1839d087a399SNeel Natu "nextrip from %#lx to %#lx", vcpu->nextrip, rip); 1840d087a399SNeel Natu vcpu->nextrip = rip; 1841d087a399SNeel Natu } else { 1842d087a399SNeel Natu panic("%s: invalid state %d", __func__, state); 1843d087a399SNeel Natu } 1844c9c75df4SNeel Natu return (0); 1845c9c75df4SNeel Natu } 1846c9c75df4SNeel Natu 1847c9c75df4SNeel Natu int 1848091d4532SNeel Natu vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) 1849091d4532SNeel Natu { 1850091d4532SNeel Natu struct vcpu *vcpu; 1851091d4532SNeel Natu int type, vector; 1852091d4532SNeel Natu 1853a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 1854091d4532SNeel Natu return (EINVAL); 1855091d4532SNeel Natu 1856091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1857091d4532SNeel Natu 1858091d4532SNeel Natu if (info & VM_INTINFO_VALID) { 1859091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1860091d4532SNeel Natu vector = info & 0xff; 1861091d4532SNeel Natu if (type == VM_INTINFO_NMI && vector != IDT_NMI) 1862091d4532SNeel Natu return (EINVAL); 1863091d4532SNeel Natu if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) 1864091d4532SNeel Natu return (EINVAL); 1865091d4532SNeel Natu if (info & VM_INTINFO_RSVD) 1866091d4532SNeel Natu return (EINVAL); 1867091d4532SNeel Natu } else { 1868091d4532SNeel Natu info = 0; 1869091d4532SNeel Natu } 1870091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); 1871091d4532SNeel Natu vcpu->exitintinfo = info; 1872091d4532SNeel Natu return (0); 1873091d4532SNeel Natu } 1874091d4532SNeel Natu 1875091d4532SNeel Natu enum exc_class { 1876091d4532SNeel Natu EXC_BENIGN, 1877091d4532SNeel Natu EXC_CONTRIBUTORY, 1878091d4532SNeel Natu EXC_PAGEFAULT 1879091d4532SNeel Natu }; 1880091d4532SNeel Natu 1881091d4532SNeel Natu #define IDT_VE 20 /* Virtualization Exception (Intel specific) */ 1882091d4532SNeel Natu 1883091d4532SNeel Natu static enum exc_class 1884091d4532SNeel Natu exception_class(uint64_t info) 1885091d4532SNeel Natu { 1886091d4532SNeel Natu int type, vector; 1887091d4532SNeel Natu 1888091d4532SNeel Natu KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); 1889091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1890091d4532SNeel Natu vector = info & 0xff; 1891091d4532SNeel Natu 1892091d4532SNeel Natu /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ 1893091d4532SNeel Natu switch (type) { 1894091d4532SNeel Natu case VM_INTINFO_HWINTR: 1895091d4532SNeel Natu case VM_INTINFO_SWINTR: 1896091d4532SNeel Natu case VM_INTINFO_NMI: 1897091d4532SNeel Natu return (EXC_BENIGN); 1898091d4532SNeel Natu default: 1899091d4532SNeel Natu /* 1900091d4532SNeel Natu * Hardware exception. 1901091d4532SNeel Natu * 1902091d4532SNeel Natu * SVM and VT-x use identical type values to represent NMI, 1903091d4532SNeel Natu * hardware interrupt and software interrupt. 1904091d4532SNeel Natu * 1905091d4532SNeel Natu * SVM uses type '3' for all exceptions. VT-x uses type '3' 1906091d4532SNeel Natu * for exceptions except #BP and #OF. #BP and #OF use a type 1907091d4532SNeel Natu * value of '5' or '6'. Therefore we don't check for explicit 1908091d4532SNeel Natu * values of 'type' to classify 'intinfo' into a hardware 1909091d4532SNeel Natu * exception. 1910091d4532SNeel Natu */ 1911091d4532SNeel Natu break; 1912091d4532SNeel Natu } 1913091d4532SNeel Natu 1914091d4532SNeel Natu switch (vector) { 1915091d4532SNeel Natu case IDT_PF: 1916091d4532SNeel Natu case IDT_VE: 1917091d4532SNeel Natu return (EXC_PAGEFAULT); 1918091d4532SNeel Natu case IDT_DE: 1919091d4532SNeel Natu case IDT_TS: 1920091d4532SNeel Natu case IDT_NP: 1921091d4532SNeel Natu case IDT_SS: 1922091d4532SNeel Natu case IDT_GP: 1923091d4532SNeel Natu return (EXC_CONTRIBUTORY); 1924091d4532SNeel Natu default: 1925091d4532SNeel Natu return (EXC_BENIGN); 1926091d4532SNeel Natu } 1927091d4532SNeel Natu } 1928091d4532SNeel Natu 1929091d4532SNeel Natu static int 1930091d4532SNeel Natu nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, 1931091d4532SNeel Natu uint64_t *retinfo) 1932091d4532SNeel Natu { 1933091d4532SNeel Natu enum exc_class exc1, exc2; 1934091d4532SNeel Natu int type1, vector1; 1935091d4532SNeel Natu 1936091d4532SNeel Natu KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); 1937091d4532SNeel Natu KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); 1938091d4532SNeel Natu 1939091d4532SNeel Natu /* 1940091d4532SNeel Natu * If an exception occurs while attempting to call the double-fault 1941091d4532SNeel Natu * handler the processor enters shutdown mode (aka triple fault). 1942091d4532SNeel Natu */ 1943091d4532SNeel Natu type1 = info1 & VM_INTINFO_TYPE; 1944091d4532SNeel Natu vector1 = info1 & 0xff; 1945091d4532SNeel Natu if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { 1946091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", 1947091d4532SNeel Natu info1, info2); 1948091d4532SNeel Natu vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); 1949091d4532SNeel Natu *retinfo = 0; 1950091d4532SNeel Natu return (0); 1951091d4532SNeel Natu } 1952091d4532SNeel Natu 1953091d4532SNeel Natu /* 1954091d4532SNeel Natu * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 1955091d4532SNeel Natu */ 1956091d4532SNeel Natu exc1 = exception_class(info1); 1957091d4532SNeel Natu exc2 = exception_class(info2); 1958091d4532SNeel Natu if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || 1959091d4532SNeel Natu (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { 1960091d4532SNeel Natu /* Convert nested fault into a double fault. */ 1961091d4532SNeel Natu *retinfo = IDT_DF; 1962091d4532SNeel Natu *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1963091d4532SNeel Natu *retinfo |= VM_INTINFO_DEL_ERRCODE; 1964091d4532SNeel Natu } else { 1965091d4532SNeel Natu /* Handle exceptions serially */ 1966091d4532SNeel Natu *retinfo = info2; 1967091d4532SNeel Natu } 1968091d4532SNeel Natu return (1); 1969091d4532SNeel Natu } 1970091d4532SNeel Natu 1971091d4532SNeel Natu static uint64_t 1972091d4532SNeel Natu vcpu_exception_intinfo(struct vcpu *vcpu) 1973091d4532SNeel Natu { 1974091d4532SNeel Natu uint64_t info = 0; 1975091d4532SNeel Natu 1976091d4532SNeel Natu if (vcpu->exception_pending) { 1977c9c75df4SNeel Natu info = vcpu->exc_vector & 0xff; 1978091d4532SNeel Natu info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1979c9c75df4SNeel Natu if (vcpu->exc_errcode_valid) { 1980091d4532SNeel Natu info |= VM_INTINFO_DEL_ERRCODE; 1981c9c75df4SNeel Natu info |= (uint64_t)vcpu->exc_errcode << 32; 1982091d4532SNeel Natu } 1983091d4532SNeel Natu } 1984091d4532SNeel Natu return (info); 1985091d4532SNeel Natu } 1986091d4532SNeel Natu 1987091d4532SNeel Natu int 1988091d4532SNeel Natu vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) 1989091d4532SNeel Natu { 1990091d4532SNeel Natu struct vcpu *vcpu; 1991091d4532SNeel Natu uint64_t info1, info2; 1992091d4532SNeel Natu int valid; 1993091d4532SNeel Natu 1994a488c9c9SRodney W. Grimes KASSERT(vcpuid >= 0 && 1995a488c9c9SRodney W. Grimes vcpuid < vm->maxcpus, ("invalid vcpu %d", vcpuid)); 1996091d4532SNeel Natu 1997091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 1998091d4532SNeel Natu 1999091d4532SNeel Natu info1 = vcpu->exitintinfo; 2000091d4532SNeel Natu vcpu->exitintinfo = 0; 2001091d4532SNeel Natu 2002091d4532SNeel Natu info2 = 0; 2003091d4532SNeel Natu if (vcpu->exception_pending) { 2004091d4532SNeel Natu info2 = vcpu_exception_intinfo(vcpu); 2005091d4532SNeel Natu vcpu->exception_pending = 0; 2006091d4532SNeel Natu VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", 2007c9c75df4SNeel Natu vcpu->exc_vector, info2); 2008091d4532SNeel Natu } 2009091d4532SNeel Natu 2010091d4532SNeel Natu if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { 2011091d4532SNeel Natu valid = nested_fault(vm, vcpuid, info1, info2, retinfo); 2012091d4532SNeel Natu } else if (info1 & VM_INTINFO_VALID) { 2013091d4532SNeel Natu *retinfo = info1; 2014091d4532SNeel Natu valid = 1; 2015091d4532SNeel Natu } else if (info2 & VM_INTINFO_VALID) { 2016091d4532SNeel Natu *retinfo = info2; 2017091d4532SNeel Natu valid = 1; 2018091d4532SNeel Natu } else { 2019091d4532SNeel Natu valid = 0; 2020091d4532SNeel Natu } 2021091d4532SNeel Natu 2022091d4532SNeel Natu if (valid) { 2023091d4532SNeel Natu VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " 2024091d4532SNeel Natu "retinfo(%#lx)", __func__, info1, info2, *retinfo); 2025091d4532SNeel Natu } 2026091d4532SNeel Natu 2027091d4532SNeel Natu return (valid); 2028091d4532SNeel Natu } 2029091d4532SNeel Natu 2030091d4532SNeel Natu int 2031091d4532SNeel Natu vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) 2032091d4532SNeel Natu { 2033091d4532SNeel Natu struct vcpu *vcpu; 2034091d4532SNeel Natu 2035a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 2036091d4532SNeel Natu return (EINVAL); 2037091d4532SNeel Natu 2038091d4532SNeel Natu vcpu = &vm->vcpu[vcpuid]; 2039091d4532SNeel Natu *info1 = vcpu->exitintinfo; 2040091d4532SNeel Natu *info2 = vcpu_exception_intinfo(vcpu); 2041091d4532SNeel Natu return (0); 2042091d4532SNeel Natu } 2043091d4532SNeel Natu 2044091d4532SNeel Natu int 2045c9c75df4SNeel Natu vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, 2046c9c75df4SNeel Natu uint32_t errcode, int restart_instruction) 2047366f6083SPeter Grehan { 2048dc506506SNeel Natu struct vcpu *vcpu; 204947b9935dSNeel Natu uint64_t regval; 20502ce12423SNeel Natu int error; 2051dc506506SNeel Natu 2052a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 2053366f6083SPeter Grehan return (EINVAL); 2054366f6083SPeter Grehan 2055c9c75df4SNeel Natu if (vector < 0 || vector >= 32) 2056366f6083SPeter Grehan return (EINVAL); 2057366f6083SPeter Grehan 2058091d4532SNeel Natu /* 2059091d4532SNeel Natu * A double fault exception should never be injected directly into 2060091d4532SNeel Natu * the guest. It is a derived exception that results from specific 2061091d4532SNeel Natu * combinations of nested faults. 2062091d4532SNeel Natu */ 2063c9c75df4SNeel Natu if (vector == IDT_DF) 2064091d4532SNeel Natu return (EINVAL); 2065091d4532SNeel Natu 2066dc506506SNeel Natu vcpu = &vm->vcpu[vcpuid]; 2067366f6083SPeter Grehan 2068dc506506SNeel Natu if (vcpu->exception_pending) { 2069dc506506SNeel Natu VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " 2070c9c75df4SNeel Natu "pending exception %d", vector, vcpu->exc_vector); 2071dc506506SNeel Natu return (EBUSY); 2072dc506506SNeel Natu } 2073dc506506SNeel Natu 207447b9935dSNeel Natu if (errcode_valid) { 207547b9935dSNeel Natu /* 207647b9935dSNeel Natu * Exceptions don't deliver an error code in real mode. 207747b9935dSNeel Natu */ 207847b9935dSNeel Natu error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, ®val); 207947b9935dSNeel Natu KASSERT(!error, ("%s: error %d getting CR0", __func__, error)); 208047b9935dSNeel Natu if (!(regval & CR0_PE)) 208147b9935dSNeel Natu errcode_valid = 0; 208247b9935dSNeel Natu } 208347b9935dSNeel Natu 20842ce12423SNeel Natu /* 20852ce12423SNeel Natu * From section 26.6.1 "Interruptibility State" in Intel SDM: 20862ce12423SNeel Natu * 20872ce12423SNeel Natu * Event blocking by "STI" or "MOV SS" is cleared after guest executes 20882ce12423SNeel Natu * one instruction or incurs an exception. 20892ce12423SNeel Natu */ 20902ce12423SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0); 20912ce12423SNeel Natu KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", 20922ce12423SNeel Natu __func__, error)); 20932ce12423SNeel Natu 2094c9c75df4SNeel Natu if (restart_instruction) 2095c9c75df4SNeel Natu vm_restart_instruction(vm, vcpuid); 2096c9c75df4SNeel Natu 2097dc506506SNeel Natu vcpu->exception_pending = 1; 2098c9c75df4SNeel Natu vcpu->exc_vector = vector; 2099c9c75df4SNeel Natu vcpu->exc_errcode = errcode; 2100c9c75df4SNeel Natu vcpu->exc_errcode_valid = errcode_valid; 2101c9c75df4SNeel Natu VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector); 2102dc506506SNeel Natu return (0); 2103dc506506SNeel Natu } 2104dc506506SNeel Natu 2105d37f2adbSNeel Natu void 2106d37f2adbSNeel Natu vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, 2107d37f2adbSNeel Natu int errcode) 2108dc506506SNeel Natu { 2109d37f2adbSNeel Natu struct vm *vm; 2110c9c75df4SNeel Natu int error, restart_instruction; 2111dc506506SNeel Natu 2112d37f2adbSNeel Natu vm = vmarg; 2113c9c75df4SNeel Natu restart_instruction = 1; 2114d37f2adbSNeel Natu 2115c9c75df4SNeel Natu error = vm_inject_exception(vm, vcpuid, vector, errcode_valid, 2116c9c75df4SNeel Natu errcode, restart_instruction); 2117dc506506SNeel Natu KASSERT(error == 0, ("vm_inject_exception error %d", error)); 2118dc506506SNeel Natu } 2119dc506506SNeel Natu 2120dc506506SNeel Natu void 2121d37f2adbSNeel Natu vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) 2122fd949af6SNeel Natu { 2123d37f2adbSNeel Natu struct vm *vm; 212437a723a5SNeel Natu int error; 212537a723a5SNeel Natu 2126d37f2adbSNeel Natu vm = vmarg; 212737a723a5SNeel Natu VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", 212837a723a5SNeel Natu error_code, cr2); 212937a723a5SNeel Natu 213037a723a5SNeel Natu error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); 213137a723a5SNeel Natu KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 2132fd949af6SNeel Natu 2133d37f2adbSNeel Natu vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); 2134366f6083SPeter Grehan } 2135366f6083SPeter Grehan 213661592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 2137366f6083SPeter Grehan 2138f352ff0cSNeel Natu int 2139f352ff0cSNeel Natu vm_inject_nmi(struct vm *vm, int vcpuid) 2140f352ff0cSNeel Natu { 2141f352ff0cSNeel Natu struct vcpu *vcpu; 2142f352ff0cSNeel Natu 2143a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 2144366f6083SPeter Grehan return (EINVAL); 2145366f6083SPeter Grehan 2146f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 2147f352ff0cSNeel Natu 2148f352ff0cSNeel Natu vcpu->nmi_pending = 1; 2149de5ea6b6SNeel Natu vcpu_notify_event(vm, vcpuid, false); 2150f352ff0cSNeel Natu return (0); 2151f352ff0cSNeel Natu } 2152f352ff0cSNeel Natu 2153f352ff0cSNeel Natu int 2154f352ff0cSNeel Natu vm_nmi_pending(struct vm *vm, int vcpuid) 2155f352ff0cSNeel Natu { 2156f352ff0cSNeel Natu struct vcpu *vcpu; 2157f352ff0cSNeel Natu 2158a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 2159f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 2160f352ff0cSNeel Natu 2161f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 2162f352ff0cSNeel Natu 2163f352ff0cSNeel Natu return (vcpu->nmi_pending); 2164f352ff0cSNeel Natu } 2165f352ff0cSNeel Natu 2166f352ff0cSNeel Natu void 2167f352ff0cSNeel Natu vm_nmi_clear(struct vm *vm, int vcpuid) 2168f352ff0cSNeel Natu { 2169f352ff0cSNeel Natu struct vcpu *vcpu; 2170f352ff0cSNeel Natu 2171a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 2172f352ff0cSNeel Natu panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 2173f352ff0cSNeel Natu 2174f352ff0cSNeel Natu vcpu = &vm->vcpu[vcpuid]; 2175f352ff0cSNeel Natu 2176f352ff0cSNeel Natu if (vcpu->nmi_pending == 0) 2177f352ff0cSNeel Natu panic("vm_nmi_clear: inconsistent nmi_pending state"); 2178f352ff0cSNeel Natu 2179f352ff0cSNeel Natu vcpu->nmi_pending = 0; 2180f352ff0cSNeel Natu vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 2181366f6083SPeter Grehan } 2182366f6083SPeter Grehan 21830775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 21840775fbb4STycho Nightingale 21850775fbb4STycho Nightingale int 21860775fbb4STycho Nightingale vm_inject_extint(struct vm *vm, int vcpuid) 21870775fbb4STycho Nightingale { 21880775fbb4STycho Nightingale struct vcpu *vcpu; 21890775fbb4STycho Nightingale 2190a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 21910775fbb4STycho Nightingale return (EINVAL); 21920775fbb4STycho Nightingale 21930775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 21940775fbb4STycho Nightingale 21950775fbb4STycho Nightingale vcpu->extint_pending = 1; 21960775fbb4STycho Nightingale vcpu_notify_event(vm, vcpuid, false); 21970775fbb4STycho Nightingale return (0); 21980775fbb4STycho Nightingale } 21990775fbb4STycho Nightingale 22000775fbb4STycho Nightingale int 22010775fbb4STycho Nightingale vm_extint_pending(struct vm *vm, int vcpuid) 22020775fbb4STycho Nightingale { 22030775fbb4STycho Nightingale struct vcpu *vcpu; 22040775fbb4STycho Nightingale 2205a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 22060775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 22070775fbb4STycho Nightingale 22080775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 22090775fbb4STycho Nightingale 22100775fbb4STycho Nightingale return (vcpu->extint_pending); 22110775fbb4STycho Nightingale } 22120775fbb4STycho Nightingale 22130775fbb4STycho Nightingale void 22140775fbb4STycho Nightingale vm_extint_clear(struct vm *vm, int vcpuid) 22150775fbb4STycho Nightingale { 22160775fbb4STycho Nightingale struct vcpu *vcpu; 22170775fbb4STycho Nightingale 2218a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 22190775fbb4STycho Nightingale panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 22200775fbb4STycho Nightingale 22210775fbb4STycho Nightingale vcpu = &vm->vcpu[vcpuid]; 22220775fbb4STycho Nightingale 22230775fbb4STycho Nightingale if (vcpu->extint_pending == 0) 22240775fbb4STycho Nightingale panic("vm_extint_clear: inconsistent extint_pending state"); 22250775fbb4STycho Nightingale 22260775fbb4STycho Nightingale vcpu->extint_pending = 0; 22270775fbb4STycho Nightingale vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); 22280775fbb4STycho Nightingale } 22290775fbb4STycho Nightingale 2230366f6083SPeter Grehan int 2231366f6083SPeter Grehan vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 2232366f6083SPeter Grehan { 2233a488c9c9SRodney W. Grimes if (vcpu < 0 || vcpu >= vm->maxcpus) 2234366f6083SPeter Grehan return (EINVAL); 2235366f6083SPeter Grehan 2236366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2237366f6083SPeter Grehan return (EINVAL); 2238366f6083SPeter Grehan 223915add60dSPeter Grehan return (vmmops_getcap(vm->cookie, vcpu, type, retval)); 2240366f6083SPeter Grehan } 2241366f6083SPeter Grehan 2242366f6083SPeter Grehan int 2243366f6083SPeter Grehan vm_set_capability(struct vm *vm, int vcpu, int type, int val) 2244366f6083SPeter Grehan { 2245a488c9c9SRodney W. Grimes if (vcpu < 0 || vcpu >= vm->maxcpus) 2246366f6083SPeter Grehan return (EINVAL); 2247366f6083SPeter Grehan 2248366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2249366f6083SPeter Grehan return (EINVAL); 2250366f6083SPeter Grehan 225115add60dSPeter Grehan return (vmmops_setcap(vm->cookie, vcpu, type, val)); 2252366f6083SPeter Grehan } 2253366f6083SPeter Grehan 2254366f6083SPeter Grehan struct vlapic * 2255366f6083SPeter Grehan vm_lapic(struct vm *vm, int cpu) 2256366f6083SPeter Grehan { 2257366f6083SPeter Grehan return (vm->vcpu[cpu].vlapic); 2258366f6083SPeter Grehan } 2259366f6083SPeter Grehan 2260565bbb86SNeel Natu struct vioapic * 2261565bbb86SNeel Natu vm_ioapic(struct vm *vm) 2262565bbb86SNeel Natu { 2263565bbb86SNeel Natu 2264565bbb86SNeel Natu return (vm->vioapic); 2265565bbb86SNeel Natu } 2266565bbb86SNeel Natu 226708e3ff32SNeel Natu struct vhpet * 226808e3ff32SNeel Natu vm_hpet(struct vm *vm) 226908e3ff32SNeel Natu { 227008e3ff32SNeel Natu 227108e3ff32SNeel Natu return (vm->vhpet); 227208e3ff32SNeel Natu } 227308e3ff32SNeel Natu 2274490d56c5SEd Maste bool 2275366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func) 2276366f6083SPeter Grehan { 2277490d56c5SEd Maste int b, f, i, n, s; 2278366f6083SPeter Grehan char *val, *cp, *cp2; 2279490d56c5SEd Maste bool found; 2280366f6083SPeter Grehan 2281366f6083SPeter Grehan /* 228207044a96SNeel Natu * XXX 228307044a96SNeel Natu * The length of an environment variable is limited to 128 bytes which 228407044a96SNeel Natu * puts an upper limit on the number of passthru devices that may be 228507044a96SNeel Natu * specified using a single environment variable. 228607044a96SNeel Natu * 228707044a96SNeel Natu * Work around this by scanning multiple environment variable 228807044a96SNeel Natu * names instead of a single one - yuck! 2289366f6083SPeter Grehan */ 229007044a96SNeel Natu const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 229107044a96SNeel Natu 229207044a96SNeel Natu /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 2293490d56c5SEd Maste found = false; 229407044a96SNeel Natu for (i = 0; names[i] != NULL && !found; i++) { 22952be111bfSDavide Italiano cp = val = kern_getenv(names[i]); 2296366f6083SPeter Grehan while (cp != NULL && *cp != '\0') { 2297366f6083SPeter Grehan if ((cp2 = strchr(cp, ' ')) != NULL) 2298366f6083SPeter Grehan *cp2 = '\0'; 2299366f6083SPeter Grehan 2300366f6083SPeter Grehan n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 2301366f6083SPeter Grehan if (n == 3 && bus == b && slot == s && func == f) { 2302490d56c5SEd Maste found = true; 2303366f6083SPeter Grehan break; 2304366f6083SPeter Grehan } 2305366f6083SPeter Grehan 2306366f6083SPeter Grehan if (cp2 != NULL) 2307366f6083SPeter Grehan *cp2++ = ' '; 2308366f6083SPeter Grehan 2309366f6083SPeter Grehan cp = cp2; 2310366f6083SPeter Grehan } 2311366f6083SPeter Grehan freeenv(val); 231207044a96SNeel Natu } 2313366f6083SPeter Grehan return (found); 2314366f6083SPeter Grehan } 2315366f6083SPeter Grehan 2316366f6083SPeter Grehan void * 2317366f6083SPeter Grehan vm_iommu_domain(struct vm *vm) 2318366f6083SPeter Grehan { 2319366f6083SPeter Grehan 2320366f6083SPeter Grehan return (vm->iommu); 2321366f6083SPeter Grehan } 2322366f6083SPeter Grehan 232375dd3366SNeel Natu int 2324f80330a8SNeel Natu vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, 2325f80330a8SNeel Natu bool from_idle) 2326366f6083SPeter Grehan { 232775dd3366SNeel Natu int error; 2328366f6083SPeter Grehan struct vcpu *vcpu; 2329366f6083SPeter Grehan 2330a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 2331366f6083SPeter Grehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 2332366f6083SPeter Grehan 2333366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 2334366f6083SPeter Grehan 233575dd3366SNeel Natu vcpu_lock(vcpu); 2336248e6799SNeel Natu error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle); 233775dd3366SNeel Natu vcpu_unlock(vcpu); 233875dd3366SNeel Natu 233975dd3366SNeel Natu return (error); 234075dd3366SNeel Natu } 234175dd3366SNeel Natu 234275dd3366SNeel Natu enum vcpu_state 2343d3c11f40SPeter Grehan vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 2344366f6083SPeter Grehan { 2345366f6083SPeter Grehan struct vcpu *vcpu; 234675dd3366SNeel Natu enum vcpu_state state; 2347366f6083SPeter Grehan 2348a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 2349366f6083SPeter Grehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 2350366f6083SPeter Grehan 2351366f6083SPeter Grehan vcpu = &vm->vcpu[vcpuid]; 2352366f6083SPeter Grehan 235375dd3366SNeel Natu vcpu_lock(vcpu); 235475dd3366SNeel Natu state = vcpu->state; 2355d3c11f40SPeter Grehan if (hostcpu != NULL) 2356d3c11f40SPeter Grehan *hostcpu = vcpu->hostcpu; 235775dd3366SNeel Natu vcpu_unlock(vcpu); 2358366f6083SPeter Grehan 235975dd3366SNeel Natu return (state); 2360366f6083SPeter Grehan } 2361366f6083SPeter Grehan 236295ebc360SNeel Natu int 2363366f6083SPeter Grehan vm_activate_cpu(struct vm *vm, int vcpuid) 2364366f6083SPeter Grehan { 2365366f6083SPeter Grehan 2366a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 236795ebc360SNeel Natu return (EINVAL); 236895ebc360SNeel Natu 236995ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->active_cpus)) 237095ebc360SNeel Natu return (EBUSY); 237122d822c6SNeel Natu 237222d822c6SNeel Natu VCPU_CTR0(vm, vcpuid, "activated"); 237322d822c6SNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); 237495ebc360SNeel Natu return (0); 2375366f6083SPeter Grehan } 2376366f6083SPeter Grehan 2377fc276d92SJohn Baldwin int 2378fc276d92SJohn Baldwin vm_suspend_cpu(struct vm *vm, int vcpuid) 2379fc276d92SJohn Baldwin { 2380fc276d92SJohn Baldwin int i; 2381fc276d92SJohn Baldwin 2382a488c9c9SRodney W. Grimes if (vcpuid < -1 || vcpuid >= vm->maxcpus) 2383fc276d92SJohn Baldwin return (EINVAL); 2384fc276d92SJohn Baldwin 2385fc276d92SJohn Baldwin if (vcpuid == -1) { 2386fc276d92SJohn Baldwin vm->debug_cpus = vm->active_cpus; 2387a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 2388fc276d92SJohn Baldwin if (CPU_ISSET(i, &vm->active_cpus)) 2389fc276d92SJohn Baldwin vcpu_notify_event(vm, i, false); 2390fc276d92SJohn Baldwin } 2391fc276d92SJohn Baldwin } else { 2392fc276d92SJohn Baldwin if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 2393fc276d92SJohn Baldwin return (EINVAL); 2394fc276d92SJohn Baldwin 2395fc276d92SJohn Baldwin CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus); 2396fc276d92SJohn Baldwin vcpu_notify_event(vm, vcpuid, false); 2397fc276d92SJohn Baldwin } 2398fc276d92SJohn Baldwin return (0); 2399fc276d92SJohn Baldwin } 2400fc276d92SJohn Baldwin 2401fc276d92SJohn Baldwin int 2402fc276d92SJohn Baldwin vm_resume_cpu(struct vm *vm, int vcpuid) 2403fc276d92SJohn Baldwin { 2404fc276d92SJohn Baldwin 2405a488c9c9SRodney W. Grimes if (vcpuid < -1 || vcpuid >= vm->maxcpus) 2406fc276d92SJohn Baldwin return (EINVAL); 2407fc276d92SJohn Baldwin 2408fc276d92SJohn Baldwin if (vcpuid == -1) { 2409fc276d92SJohn Baldwin CPU_ZERO(&vm->debug_cpus); 2410fc276d92SJohn Baldwin } else { 2411fc276d92SJohn Baldwin if (!CPU_ISSET(vcpuid, &vm->debug_cpus)) 2412fc276d92SJohn Baldwin return (EINVAL); 2413fc276d92SJohn Baldwin 2414fc276d92SJohn Baldwin CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus); 2415fc276d92SJohn Baldwin } 2416fc276d92SJohn Baldwin return (0); 2417fc276d92SJohn Baldwin } 2418fc276d92SJohn Baldwin 2419fc276d92SJohn Baldwin int 2420fc276d92SJohn Baldwin vcpu_debugged(struct vm *vm, int vcpuid) 2421fc276d92SJohn Baldwin { 2422fc276d92SJohn Baldwin 2423fc276d92SJohn Baldwin return (CPU_ISSET(vcpuid, &vm->debug_cpus)); 2424fc276d92SJohn Baldwin } 2425fc276d92SJohn Baldwin 2426a5615c90SPeter Grehan cpuset_t 2427366f6083SPeter Grehan vm_active_cpus(struct vm *vm) 2428366f6083SPeter Grehan { 2429366f6083SPeter Grehan 2430366f6083SPeter Grehan return (vm->active_cpus); 2431366f6083SPeter Grehan } 2432366f6083SPeter Grehan 243395ebc360SNeel Natu cpuset_t 2434fc276d92SJohn Baldwin vm_debug_cpus(struct vm *vm) 2435fc276d92SJohn Baldwin { 2436fc276d92SJohn Baldwin 2437fc276d92SJohn Baldwin return (vm->debug_cpus); 2438fc276d92SJohn Baldwin } 2439fc276d92SJohn Baldwin 2440fc276d92SJohn Baldwin cpuset_t 244195ebc360SNeel Natu vm_suspended_cpus(struct vm *vm) 244295ebc360SNeel Natu { 244395ebc360SNeel Natu 244495ebc360SNeel Natu return (vm->suspended_cpus); 244595ebc360SNeel Natu } 244695ebc360SNeel Natu 2447366f6083SPeter Grehan void * 2448366f6083SPeter Grehan vcpu_stats(struct vm *vm, int vcpuid) 2449366f6083SPeter Grehan { 2450366f6083SPeter Grehan 2451366f6083SPeter Grehan return (vm->vcpu[vcpuid].stats); 2452366f6083SPeter Grehan } 2453e9027382SNeel Natu 2454e9027382SNeel Natu int 2455e9027382SNeel Natu vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 2456e9027382SNeel Natu { 2457a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 2458e9027382SNeel Natu return (EINVAL); 2459e9027382SNeel Natu 2460e9027382SNeel Natu *state = vm->vcpu[vcpuid].x2apic_state; 2461e9027382SNeel Natu 2462e9027382SNeel Natu return (0); 2463e9027382SNeel Natu } 2464e9027382SNeel Natu 2465e9027382SNeel Natu int 2466e9027382SNeel Natu vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 2467e9027382SNeel Natu { 2468a488c9c9SRodney W. Grimes if (vcpuid < 0 || vcpuid >= vm->maxcpus) 2469e9027382SNeel Natu return (EINVAL); 2470e9027382SNeel Natu 24713f23d3caSNeel Natu if (state >= X2APIC_STATE_LAST) 2472e9027382SNeel Natu return (EINVAL); 2473e9027382SNeel Natu 2474e9027382SNeel Natu vm->vcpu[vcpuid].x2apic_state = state; 2475e9027382SNeel Natu 247673820fb0SNeel Natu vlapic_set_x2apic_state(vm, vcpuid, state); 247773820fb0SNeel Natu 2478e9027382SNeel Natu return (0); 2479e9027382SNeel Natu } 248075dd3366SNeel Natu 248122821874SNeel Natu /* 248222821874SNeel Natu * This function is called to ensure that a vcpu "sees" a pending event 248322821874SNeel Natu * as soon as possible: 248422821874SNeel Natu * - If the vcpu thread is sleeping then it is woken up. 248522821874SNeel Natu * - If the vcpu is running on a different host_cpu then an IPI will be directed 248622821874SNeel Natu * to the host_cpu to cause the vcpu to trap into the hypervisor. 248722821874SNeel Natu */ 2488248e6799SNeel Natu static void 2489248e6799SNeel Natu vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) 249075dd3366SNeel Natu { 249175dd3366SNeel Natu int hostcpu; 249275dd3366SNeel Natu 249375dd3366SNeel Natu hostcpu = vcpu->hostcpu; 2494ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 2495ef39d7e9SNeel Natu KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 2496de5ea6b6SNeel Natu if (hostcpu != curcpu) { 2497ef39d7e9SNeel Natu if (lapic_intr) { 2498add611fdSNeel Natu vlapic_post_intr(vcpu->vlapic, hostcpu, 2499add611fdSNeel Natu vmm_ipinum); 2500ef39d7e9SNeel Natu } else { 250175dd3366SNeel Natu ipi_cpu(hostcpu, vmm_ipinum); 250275dd3366SNeel Natu } 2503ef39d7e9SNeel Natu } else { 2504ef39d7e9SNeel Natu /* 2505ef39d7e9SNeel Natu * If the 'vcpu' is running on 'curcpu' then it must 2506ef39d7e9SNeel Natu * be sending a notification to itself (e.g. SELF_IPI). 2507ef39d7e9SNeel Natu * The pending event will be picked up when the vcpu 2508ef39d7e9SNeel Natu * transitions back to guest context. 2509ef39d7e9SNeel Natu */ 2510ef39d7e9SNeel Natu } 2511ef39d7e9SNeel Natu } else { 2512ef39d7e9SNeel Natu KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 2513ef39d7e9SNeel Natu "with hostcpu %d", vcpu->state, hostcpu)); 2514366f6083SPeter Grehan if (vcpu->state == VCPU_SLEEPING) 2515366f6083SPeter Grehan wakeup_one(vcpu); 2516366f6083SPeter Grehan } 2517248e6799SNeel Natu } 2518248e6799SNeel Natu 2519248e6799SNeel Natu void 2520248e6799SNeel Natu vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) 2521248e6799SNeel Natu { 2522248e6799SNeel Natu struct vcpu *vcpu = &vm->vcpu[vcpuid]; 2523248e6799SNeel Natu 2524248e6799SNeel Natu vcpu_lock(vcpu); 2525248e6799SNeel Natu vcpu_notify_event_locked(vcpu, lapic_intr); 2526f76fc5d4SNeel Natu vcpu_unlock(vcpu); 2527f76fc5d4SNeel Natu } 2528318224bbSNeel Natu 2529318224bbSNeel Natu struct vmspace * 2530318224bbSNeel Natu vm_get_vmspace(struct vm *vm) 2531318224bbSNeel Natu { 2532318224bbSNeel Natu 2533318224bbSNeel Natu return (vm->vmspace); 2534318224bbSNeel Natu } 2535565bbb86SNeel Natu 2536565bbb86SNeel Natu int 2537565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid) 2538565bbb86SNeel Natu { 2539565bbb86SNeel Natu /* 2540565bbb86SNeel Natu * XXX apic id is assumed to be numerically identical to vcpu id 2541565bbb86SNeel Natu */ 2542565bbb86SNeel Natu return (apicid); 2543565bbb86SNeel Natu } 25445b8a8cd1SNeel Natu 2545b837daddSKonstantin Belousov int 25465b8a8cd1SNeel Natu vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 25475b8a8cd1SNeel Natu vm_rendezvous_func_t func, void *arg) 25485b8a8cd1SNeel Natu { 2549b837daddSKonstantin Belousov int error, i; 2550970955e4SNeel Natu 25515b8a8cd1SNeel Natu /* 25525b8a8cd1SNeel Natu * Enforce that this function is called without any locks 25535b8a8cd1SNeel Natu */ 25545b8a8cd1SNeel Natu WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 2555a488c9c9SRodney W. Grimes KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus), 25565b8a8cd1SNeel Natu ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); 25575b8a8cd1SNeel Natu 25585b8a8cd1SNeel Natu restart: 25595b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 25605b8a8cd1SNeel Natu if (vm->rendezvous_func != NULL) { 25615b8a8cd1SNeel Natu /* 25625b8a8cd1SNeel Natu * If a rendezvous is already in progress then we need to 25635b8a8cd1SNeel Natu * call the rendezvous handler in case this 'vcpuid' is one 25645b8a8cd1SNeel Natu * of the targets of the rendezvous. 25655b8a8cd1SNeel Natu */ 25665b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); 25675b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 2568b837daddSKonstantin Belousov error = vm_handle_rendezvous(vm, vcpuid); 2569b837daddSKonstantin Belousov if (error != 0) 2570b837daddSKonstantin Belousov return (error); 25715b8a8cd1SNeel Natu goto restart; 25725b8a8cd1SNeel Natu } 25735b8a8cd1SNeel Natu KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 25745b8a8cd1SNeel Natu "rendezvous is still in progress")); 25755b8a8cd1SNeel Natu 25765b8a8cd1SNeel Natu RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); 25775b8a8cd1SNeel Natu vm->rendezvous_req_cpus = dest; 25785b8a8cd1SNeel Natu CPU_ZERO(&vm->rendezvous_done_cpus); 25795b8a8cd1SNeel Natu vm->rendezvous_arg = arg; 2580869dbab7SAndriy Gapon vm->rendezvous_func = func; 25815b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 25825b8a8cd1SNeel Natu 2583970955e4SNeel Natu /* 2584970955e4SNeel Natu * Wake up any sleeping vcpus and trigger a VM-exit in any running 2585970955e4SNeel Natu * vcpus so they handle the rendezvous as soon as possible. 2586970955e4SNeel Natu */ 2587a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 2588970955e4SNeel Natu if (CPU_ISSET(i, &dest)) 2589970955e4SNeel Natu vcpu_notify_event(vm, i, false); 2590970955e4SNeel Natu } 2591970955e4SNeel Natu 2592b837daddSKonstantin Belousov return (vm_handle_rendezvous(vm, vcpuid)); 25935b8a8cd1SNeel Natu } 2594762fd208STycho Nightingale 2595762fd208STycho Nightingale struct vatpic * 2596762fd208STycho Nightingale vm_atpic(struct vm *vm) 2597762fd208STycho Nightingale { 2598762fd208STycho Nightingale return (vm->vatpic); 2599762fd208STycho Nightingale } 2600e883c9bbSTycho Nightingale 2601e883c9bbSTycho Nightingale struct vatpit * 2602e883c9bbSTycho Nightingale vm_atpit(struct vm *vm) 2603e883c9bbSTycho Nightingale { 2604e883c9bbSTycho Nightingale return (vm->vatpit); 2605e883c9bbSTycho Nightingale } 2606d17b5104SNeel Natu 2607160ef77aSNeel Natu struct vpmtmr * 2608160ef77aSNeel Natu vm_pmtmr(struct vm *vm) 2609160ef77aSNeel Natu { 2610160ef77aSNeel Natu 2611160ef77aSNeel Natu return (vm->vpmtmr); 2612160ef77aSNeel Natu } 2613160ef77aSNeel Natu 26140dafa5cdSNeel Natu struct vrtc * 26150dafa5cdSNeel Natu vm_rtc(struct vm *vm) 26160dafa5cdSNeel Natu { 26170dafa5cdSNeel Natu 26180dafa5cdSNeel Natu return (vm->vrtc); 26190dafa5cdSNeel Natu } 26200dafa5cdSNeel Natu 2621d17b5104SNeel Natu enum vm_reg_name 2622d17b5104SNeel Natu vm_segment_name(int seg) 2623d17b5104SNeel Natu { 2624d17b5104SNeel Natu static enum vm_reg_name seg_names[] = { 2625d17b5104SNeel Natu VM_REG_GUEST_ES, 2626d17b5104SNeel Natu VM_REG_GUEST_CS, 2627d17b5104SNeel Natu VM_REG_GUEST_SS, 2628d17b5104SNeel Natu VM_REG_GUEST_DS, 2629d17b5104SNeel Natu VM_REG_GUEST_FS, 2630d17b5104SNeel Natu VM_REG_GUEST_GS 2631d17b5104SNeel Natu }; 2632d17b5104SNeel Natu 2633d17b5104SNeel Natu KASSERT(seg >= 0 && seg < nitems(seg_names), 2634d17b5104SNeel Natu ("%s: invalid segment encoding %d", __func__, seg)); 2635d17b5104SNeel Natu return (seg_names[seg]); 2636d17b5104SNeel Natu } 2637cf1d80d8SPeter Grehan 2638d665d229SNeel Natu void 2639d665d229SNeel Natu vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, 2640d665d229SNeel Natu int num_copyinfo) 2641d665d229SNeel Natu { 2642d665d229SNeel Natu int idx; 2643d665d229SNeel Natu 2644d665d229SNeel Natu for (idx = 0; idx < num_copyinfo; idx++) { 2645d665d229SNeel Natu if (copyinfo[idx].cookie != NULL) 2646d665d229SNeel Natu vm_gpa_release(copyinfo[idx].cookie); 2647d665d229SNeel Natu } 2648d665d229SNeel Natu bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); 2649d665d229SNeel Natu } 2650d665d229SNeel Natu 2651d665d229SNeel Natu int 2652d665d229SNeel Natu vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 2653d665d229SNeel Natu uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 26549c4d5478SNeel Natu int num_copyinfo, int *fault) 2655d665d229SNeel Natu { 2656d665d229SNeel Natu int error, idx, nused; 2657d665d229SNeel Natu size_t n, off, remaining; 2658d665d229SNeel Natu void *hva, *cookie; 2659d665d229SNeel Natu uint64_t gpa; 2660d665d229SNeel Natu 2661d665d229SNeel Natu bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); 2662d665d229SNeel Natu 2663d665d229SNeel Natu nused = 0; 2664d665d229SNeel Natu remaining = len; 2665d665d229SNeel Natu while (remaining > 0) { 2666d665d229SNeel Natu KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); 26679c4d5478SNeel Natu error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault); 26689c4d5478SNeel Natu if (error || *fault) 2669d665d229SNeel Natu return (error); 2670d665d229SNeel Natu off = gpa & PAGE_MASK; 2671d665d229SNeel Natu n = min(remaining, PAGE_SIZE - off); 2672d665d229SNeel Natu copyinfo[nused].gpa = gpa; 2673d665d229SNeel Natu copyinfo[nused].len = n; 2674d665d229SNeel Natu remaining -= n; 2675d665d229SNeel Natu gla += n; 2676d665d229SNeel Natu nused++; 2677d665d229SNeel Natu } 2678d665d229SNeel Natu 2679d665d229SNeel Natu for (idx = 0; idx < nused; idx++) { 26809b1aa8d6SNeel Natu hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa, 26819b1aa8d6SNeel Natu copyinfo[idx].len, prot, &cookie); 2682d665d229SNeel Natu if (hva == NULL) 2683d665d229SNeel Natu break; 2684d665d229SNeel Natu copyinfo[idx].hva = hva; 2685d665d229SNeel Natu copyinfo[idx].cookie = cookie; 2686d665d229SNeel Natu } 2687d665d229SNeel Natu 2688d665d229SNeel Natu if (idx != nused) { 2689d665d229SNeel Natu vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); 26909c4d5478SNeel Natu return (EFAULT); 2691d665d229SNeel Natu } else { 26929c4d5478SNeel Natu *fault = 0; 2693d665d229SNeel Natu return (0); 2694d665d229SNeel Natu } 2695d665d229SNeel Natu } 2696d665d229SNeel Natu 2697d665d229SNeel Natu void 2698d665d229SNeel Natu vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, 2699d665d229SNeel Natu size_t len) 2700d665d229SNeel Natu { 2701d665d229SNeel Natu char *dst; 2702d665d229SNeel Natu int idx; 2703d665d229SNeel Natu 2704d665d229SNeel Natu dst = kaddr; 2705d665d229SNeel Natu idx = 0; 2706d665d229SNeel Natu while (len > 0) { 2707d665d229SNeel Natu bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); 2708d665d229SNeel Natu len -= copyinfo[idx].len; 2709d665d229SNeel Natu dst += copyinfo[idx].len; 2710d665d229SNeel Natu idx++; 2711d665d229SNeel Natu } 2712d665d229SNeel Natu } 2713d665d229SNeel Natu 2714d665d229SNeel Natu void 2715d665d229SNeel Natu vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, 2716d665d229SNeel Natu struct vm_copyinfo *copyinfo, size_t len) 2717d665d229SNeel Natu { 2718d665d229SNeel Natu const char *src; 2719d665d229SNeel Natu int idx; 2720d665d229SNeel Natu 2721d665d229SNeel Natu src = kaddr; 2722d665d229SNeel Natu idx = 0; 2723d665d229SNeel Natu while (len > 0) { 2724d665d229SNeel Natu bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); 2725d665d229SNeel Natu len -= copyinfo[idx].len; 2726d665d229SNeel Natu src += copyinfo[idx].len; 2727d665d229SNeel Natu idx++; 2728d665d229SNeel Natu } 2729d665d229SNeel Natu } 2730cf1d80d8SPeter Grehan 2731cf1d80d8SPeter Grehan /* 2732cf1d80d8SPeter Grehan * Return the amount of in-use and wired memory for the VM. Since 2733cf1d80d8SPeter Grehan * these are global stats, only return the values with for vCPU 0 2734cf1d80d8SPeter Grehan */ 2735cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_RESIDENT); 2736cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_WIRED); 2737cf1d80d8SPeter Grehan 2738cf1d80d8SPeter Grehan static void 2739cf1d80d8SPeter Grehan vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2740cf1d80d8SPeter Grehan { 2741cf1d80d8SPeter Grehan 2742cf1d80d8SPeter Grehan if (vcpu == 0) { 2743cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, 2744cf1d80d8SPeter Grehan PAGE_SIZE * vmspace_resident_count(vm->vmspace)); 2745cf1d80d8SPeter Grehan } 2746cf1d80d8SPeter Grehan } 2747cf1d80d8SPeter Grehan 2748cf1d80d8SPeter Grehan static void 2749cf1d80d8SPeter Grehan vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2750cf1d80d8SPeter Grehan { 2751cf1d80d8SPeter Grehan 2752cf1d80d8SPeter Grehan if (vcpu == 0) { 2753cf1d80d8SPeter Grehan vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, 2754cf1d80d8SPeter Grehan PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); 2755cf1d80d8SPeter Grehan } 2756cf1d80d8SPeter Grehan } 2757cf1d80d8SPeter Grehan 2758cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); 2759cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); 2760483d953aSJohn Baldwin 2761483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 2762483d953aSJohn Baldwin static int 2763483d953aSJohn Baldwin vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta) 2764483d953aSJohn Baldwin { 2765483d953aSJohn Baldwin int ret; 2766483d953aSJohn Baldwin int i; 2767483d953aSJohn Baldwin struct vcpu *vcpu; 2768483d953aSJohn Baldwin 2769483d953aSJohn Baldwin for (i = 0; i < VM_MAXCPU; i++) { 2770483d953aSJohn Baldwin vcpu = &vm->vcpu[i]; 2771483d953aSJohn Baldwin 2772483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done); 2773483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done); 2774483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_vector, meta, ret, done); 2775483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode_valid, meta, ret, done); 2776483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode, meta, ret, done); 2777483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done); 2778483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done); 2779483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done); 2780483d953aSJohn Baldwin /* XXX we're cheating here, since the value of tsc_offset as 2781483d953aSJohn Baldwin * saved here is actually the value of the guest's TSC value. 2782483d953aSJohn Baldwin * 2783483d953aSJohn Baldwin * It will be turned turned back into an actual offset when the 2784483d953aSJohn Baldwin * TSC restore function is called 2785483d953aSJohn Baldwin */ 2786483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->tsc_offset, meta, ret, done); 2787483d953aSJohn Baldwin } 2788483d953aSJohn Baldwin 2789483d953aSJohn Baldwin done: 2790483d953aSJohn Baldwin return (ret); 2791483d953aSJohn Baldwin } 2792483d953aSJohn Baldwin 2793483d953aSJohn Baldwin static int 2794483d953aSJohn Baldwin vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta) 2795483d953aSJohn Baldwin { 2796483d953aSJohn Baldwin int ret; 2797483d953aSJohn Baldwin int i; 2798483d953aSJohn Baldwin uint64_t now; 2799483d953aSJohn Baldwin 2800483d953aSJohn Baldwin ret = 0; 2801483d953aSJohn Baldwin now = rdtsc(); 2802483d953aSJohn Baldwin 2803483d953aSJohn Baldwin if (meta->op == VM_SNAPSHOT_SAVE) { 2804483d953aSJohn Baldwin /* XXX make tsc_offset take the value TSC proper as seen by the 2805483d953aSJohn Baldwin * guest 2806483d953aSJohn Baldwin */ 2807483d953aSJohn Baldwin for (i = 0; i < VM_MAXCPU; i++) 2808483d953aSJohn Baldwin vm->vcpu[i].tsc_offset += now; 2809483d953aSJohn Baldwin } 2810483d953aSJohn Baldwin 2811483d953aSJohn Baldwin ret = vm_snapshot_vcpus(vm, meta); 2812483d953aSJohn Baldwin if (ret != 0) { 2813483d953aSJohn Baldwin printf("%s: failed to copy vm data to user buffer", __func__); 2814483d953aSJohn Baldwin goto done; 2815483d953aSJohn Baldwin } 2816483d953aSJohn Baldwin 2817483d953aSJohn Baldwin if (meta->op == VM_SNAPSHOT_SAVE) { 2818483d953aSJohn Baldwin /* XXX turn tsc_offset back into an offset; actual value is only 2819483d953aSJohn Baldwin * required for restore; using it otherwise would be wrong 2820483d953aSJohn Baldwin */ 2821483d953aSJohn Baldwin for (i = 0; i < VM_MAXCPU; i++) 2822483d953aSJohn Baldwin vm->vcpu[i].tsc_offset -= now; 2823483d953aSJohn Baldwin } 2824483d953aSJohn Baldwin 2825483d953aSJohn Baldwin done: 2826483d953aSJohn Baldwin return (ret); 2827483d953aSJohn Baldwin } 2828483d953aSJohn Baldwin 2829483d953aSJohn Baldwin static int 2830483d953aSJohn Baldwin vm_snapshot_vmcx(struct vm *vm, struct vm_snapshot_meta *meta) 2831483d953aSJohn Baldwin { 2832483d953aSJohn Baldwin int i, error; 2833483d953aSJohn Baldwin 2834483d953aSJohn Baldwin error = 0; 2835483d953aSJohn Baldwin 2836483d953aSJohn Baldwin for (i = 0; i < VM_MAXCPU; i++) { 283715add60dSPeter Grehan error = vmmops_vmcx_snapshot(vm->cookie, meta, i); 2838483d953aSJohn Baldwin if (error != 0) { 2839483d953aSJohn Baldwin printf("%s: failed to snapshot vmcs/vmcb data for " 2840483d953aSJohn Baldwin "vCPU: %d; error: %d\n", __func__, i, error); 2841483d953aSJohn Baldwin goto done; 2842483d953aSJohn Baldwin } 2843483d953aSJohn Baldwin } 2844483d953aSJohn Baldwin 2845483d953aSJohn Baldwin done: 2846483d953aSJohn Baldwin return (error); 2847483d953aSJohn Baldwin } 2848483d953aSJohn Baldwin 2849483d953aSJohn Baldwin /* 2850483d953aSJohn Baldwin * Save kernel-side structures to user-space for snapshotting. 2851483d953aSJohn Baldwin */ 2852483d953aSJohn Baldwin int 2853483d953aSJohn Baldwin vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta) 2854483d953aSJohn Baldwin { 2855483d953aSJohn Baldwin int ret = 0; 2856483d953aSJohn Baldwin 2857483d953aSJohn Baldwin switch (meta->dev_req) { 2858483d953aSJohn Baldwin case STRUCT_VMX: 285915add60dSPeter Grehan ret = vmmops_snapshot(vm->cookie, meta); 2860483d953aSJohn Baldwin break; 2861483d953aSJohn Baldwin case STRUCT_VMCX: 2862483d953aSJohn Baldwin ret = vm_snapshot_vmcx(vm, meta); 2863483d953aSJohn Baldwin break; 2864483d953aSJohn Baldwin case STRUCT_VM: 2865483d953aSJohn Baldwin ret = vm_snapshot_vm(vm, meta); 2866483d953aSJohn Baldwin break; 2867483d953aSJohn Baldwin case STRUCT_VIOAPIC: 2868483d953aSJohn Baldwin ret = vioapic_snapshot(vm_ioapic(vm), meta); 2869483d953aSJohn Baldwin break; 2870483d953aSJohn Baldwin case STRUCT_VLAPIC: 2871483d953aSJohn Baldwin ret = vlapic_snapshot(vm, meta); 2872483d953aSJohn Baldwin break; 2873483d953aSJohn Baldwin case STRUCT_VHPET: 2874483d953aSJohn Baldwin ret = vhpet_snapshot(vm_hpet(vm), meta); 2875483d953aSJohn Baldwin break; 2876483d953aSJohn Baldwin case STRUCT_VATPIC: 2877483d953aSJohn Baldwin ret = vatpic_snapshot(vm_atpic(vm), meta); 2878483d953aSJohn Baldwin break; 2879483d953aSJohn Baldwin case STRUCT_VATPIT: 2880483d953aSJohn Baldwin ret = vatpit_snapshot(vm_atpit(vm), meta); 2881483d953aSJohn Baldwin break; 2882483d953aSJohn Baldwin case STRUCT_VPMTMR: 2883483d953aSJohn Baldwin ret = vpmtmr_snapshot(vm_pmtmr(vm), meta); 2884483d953aSJohn Baldwin break; 2885483d953aSJohn Baldwin case STRUCT_VRTC: 2886483d953aSJohn Baldwin ret = vrtc_snapshot(vm_rtc(vm), meta); 2887483d953aSJohn Baldwin break; 2888483d953aSJohn Baldwin default: 2889483d953aSJohn Baldwin printf("%s: failed to find the requested type %#x\n", 2890483d953aSJohn Baldwin __func__, meta->dev_req); 2891483d953aSJohn Baldwin ret = (EINVAL); 2892483d953aSJohn Baldwin } 2893483d953aSJohn Baldwin return (ret); 2894483d953aSJohn Baldwin } 2895483d953aSJohn Baldwin 2896483d953aSJohn Baldwin int 2897483d953aSJohn Baldwin vm_set_tsc_offset(struct vm *vm, int vcpuid, uint64_t offset) 2898483d953aSJohn Baldwin { 2899483d953aSJohn Baldwin struct vcpu *vcpu; 2900483d953aSJohn Baldwin 2901483d953aSJohn Baldwin if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2902483d953aSJohn Baldwin return (EINVAL); 2903483d953aSJohn Baldwin 2904483d953aSJohn Baldwin vcpu = &vm->vcpu[vcpuid]; 2905483d953aSJohn Baldwin vcpu->tsc_offset = offset; 2906483d953aSJohn Baldwin 2907483d953aSJohn Baldwin return (0); 2908483d953aSJohn Baldwin } 2909483d953aSJohn Baldwin 2910483d953aSJohn Baldwin int 2911483d953aSJohn Baldwin vm_restore_time(struct vm *vm) 2912483d953aSJohn Baldwin { 2913483d953aSJohn Baldwin int error, i; 2914483d953aSJohn Baldwin uint64_t now; 2915483d953aSJohn Baldwin struct vcpu *vcpu; 2916483d953aSJohn Baldwin 2917483d953aSJohn Baldwin now = rdtsc(); 2918483d953aSJohn Baldwin 2919483d953aSJohn Baldwin error = vhpet_restore_time(vm_hpet(vm)); 2920483d953aSJohn Baldwin if (error) 2921483d953aSJohn Baldwin return (error); 2922483d953aSJohn Baldwin 2923483d953aSJohn Baldwin for (i = 0; i < nitems(vm->vcpu); i++) { 2924483d953aSJohn Baldwin vcpu = &vm->vcpu[i]; 2925483d953aSJohn Baldwin 292615add60dSPeter Grehan error = vmmops_restore_tsc(vm->cookie, i, vcpu->tsc_offset - 292715add60dSPeter Grehan now); 2928483d953aSJohn Baldwin if (error) 2929483d953aSJohn Baldwin return (error); 2930483d953aSJohn Baldwin } 2931483d953aSJohn Baldwin 2932483d953aSJohn Baldwin return (0); 2933483d953aSJohn Baldwin } 2934483d953aSJohn Baldwin #endif 2935