1366f6083SPeter Grehan /*- 2c49761ddSPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3c49761ddSPedro F. Giffuni * 4366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc. 5366f6083SPeter Grehan * All rights reserved. 6366f6083SPeter Grehan * 7366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without 8366f6083SPeter Grehan * modification, are permitted provided that the following conditions 9366f6083SPeter Grehan * are met: 10366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright 11366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer. 12366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 13366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the 14366f6083SPeter Grehan * documentation and/or other materials provided with the distribution. 15366f6083SPeter Grehan * 16366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26366f6083SPeter Grehan * SUCH DAMAGE. 27366f6083SPeter Grehan * 28366f6083SPeter Grehan * $FreeBSD$ 29366f6083SPeter Grehan */ 30366f6083SPeter Grehan 31366f6083SPeter Grehan #include <sys/cdefs.h> 32366f6083SPeter Grehan __FBSDID("$FreeBSD$"); 33366f6083SPeter Grehan 34483d953aSJohn Baldwin #include "opt_bhyve_snapshot.h" 35483d953aSJohn Baldwin 36366f6083SPeter Grehan #include <sys/param.h> 3738f1b189SPeter Grehan #include <sys/systm.h> 38366f6083SPeter Grehan #include <sys/kernel.h> 39366f6083SPeter Grehan #include <sys/module.h> 40366f6083SPeter Grehan #include <sys/sysctl.h> 41366f6083SPeter Grehan #include <sys/malloc.h> 42366f6083SPeter Grehan #include <sys/pcpu.h> 43366f6083SPeter Grehan #include <sys/lock.h> 44366f6083SPeter Grehan #include <sys/mutex.h> 45366f6083SPeter Grehan #include <sys/proc.h> 46318224bbSNeel Natu #include <sys/rwlock.h> 47366f6083SPeter Grehan #include <sys/sched.h> 48366f6083SPeter Grehan #include <sys/smp.h> 4967b69e76SJohn Baldwin #include <sys/sx.h> 50483d953aSJohn Baldwin #include <sys/vnode.h> 51366f6083SPeter Grehan 52366f6083SPeter Grehan #include <vm/vm.h> 533c48106aSKonstantin Belousov #include <vm/vm_param.h> 543c48106aSKonstantin Belousov #include <vm/vm_extern.h> 55318224bbSNeel Natu #include <vm/vm_object.h> 56318224bbSNeel Natu #include <vm/vm_page.h> 57318224bbSNeel Natu #include <vm/pmap.h> 58318224bbSNeel Natu #include <vm/vm_map.h> 59483d953aSJohn Baldwin #include <vm/vm_pager.h> 60483d953aSJohn Baldwin #include <vm/vm_kern.h> 61483d953aSJohn Baldwin #include <vm/vnode_pager.h> 62483d953aSJohn Baldwin #include <vm/swap_pager.h> 63483d953aSJohn Baldwin #include <vm/uma.h> 64366f6083SPeter Grehan 6563e62d39SJohn Baldwin #include <machine/cpu.h> 66366f6083SPeter Grehan #include <machine/pcb.h> 6775dd3366SNeel Natu #include <machine/smp.h> 68bd50262fSKonstantin Belousov #include <machine/md_var.h> 691c052192SNeel Natu #include <x86/psl.h> 7034a6b2d6SJohn Baldwin #include <x86/apicreg.h> 7115add60dSPeter Grehan #include <x86/ifunc.h> 72366f6083SPeter Grehan 73366f6083SPeter Grehan #include <machine/vmm.h> 74565bbb86SNeel Natu #include <machine/vmm_dev.h> 75e813a873SNeel Natu #include <machine/vmm_instruction_emul.h> 76483d953aSJohn Baldwin #include <machine/vmm_snapshot.h> 77565bbb86SNeel Natu 78d17b5104SNeel Natu #include "vmm_ioport.h" 79318224bbSNeel Natu #include "vmm_ktr.h" 80b01c2033SNeel Natu #include "vmm_host.h" 81366f6083SPeter Grehan #include "vmm_mem.h" 82366f6083SPeter Grehan #include "vmm_util.h" 83762fd208STycho Nightingale #include "vatpic.h" 84e883c9bbSTycho Nightingale #include "vatpit.h" 8508e3ff32SNeel Natu #include "vhpet.h" 86565bbb86SNeel Natu #include "vioapic.h" 87366f6083SPeter Grehan #include "vlapic.h" 88160ef77aSNeel Natu #include "vpmtmr.h" 890dafa5cdSNeel Natu #include "vrtc.h" 90366f6083SPeter Grehan #include "vmm_stat.h" 91f76fc5d4SNeel Natu #include "vmm_lapic.h" 92366f6083SPeter Grehan 93366f6083SPeter Grehan #include "io/ppt.h" 94366f6083SPeter Grehan #include "io/iommu.h" 95366f6083SPeter Grehan 96366f6083SPeter Grehan struct vlapic; 97366f6083SPeter Grehan 985fcf252fSNeel Natu /* 995fcf252fSNeel Natu * Initialization: 1005fcf252fSNeel Natu * (a) allocated when vcpu is created 1015fcf252fSNeel Natu * (i) initialized when vcpu is created and when it is reinitialized 1025fcf252fSNeel Natu * (o) initialized the first time the vcpu is created 1035fcf252fSNeel Natu * (x) initialized before use 1045fcf252fSNeel Natu */ 105366f6083SPeter Grehan struct vcpu { 1065fcf252fSNeel Natu struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ 1075fcf252fSNeel Natu enum vcpu_state state; /* (o) vcpu state */ 108950af9ffSJohn Baldwin int vcpuid; /* (o) */ 1095fcf252fSNeel Natu int hostcpu; /* (o) vcpu's host cpu */ 110248e6799SNeel Natu int reqidle; /* (i) request vcpu to idle */ 111950af9ffSJohn Baldwin struct vm *vm; /* (o) */ 1121aa51504SJohn Baldwin void *cookie; /* (i) cpu-specific data */ 1135fcf252fSNeel Natu struct vlapic *vlapic; /* (i) APIC device model */ 1145fcf252fSNeel Natu enum x2apic_state x2apic_state; /* (i) APIC mode */ 115091d4532SNeel Natu uint64_t exitintinfo; /* (i) events pending at VM exit */ 1165fcf252fSNeel Natu int nmi_pending; /* (i) NMI pending */ 1175fcf252fSNeel Natu int extint_pending; /* (i) INTR pending */ 1185fcf252fSNeel Natu int exception_pending; /* (i) exception pending */ 119c9c75df4SNeel Natu int exc_vector; /* (x) exception collateral */ 120c9c75df4SNeel Natu int exc_errcode_valid; 121c9c75df4SNeel Natu uint32_t exc_errcode; 1225fcf252fSNeel Natu struct savefpu *guestfpu; /* (a,i) guest fpu state */ 1235fcf252fSNeel Natu uint64_t guest_xcr0; /* (i) guest %xcr0 register */ 1245fcf252fSNeel Natu void *stats; /* (a,i) statistics */ 1255fcf252fSNeel Natu struct vm_exit exitinfo; /* (x) exit reason and collateral */ 126d087a399SNeel Natu uint64_t nextrip; /* (x) next instruction to execute */ 127483d953aSJohn Baldwin uint64_t tsc_offset; /* (o) TSC offsetting */ 128366f6083SPeter Grehan }; 129366f6083SPeter Grehan 1305fcf252fSNeel Natu #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 131f76fc5d4SNeel Natu #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 13208ebb360SJohn Baldwin #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 133f76fc5d4SNeel Natu #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 134f76fc5d4SNeel Natu #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 135318224bbSNeel Natu #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 13675dd3366SNeel Natu 137318224bbSNeel Natu struct mem_seg { 1389b1aa8d6SNeel Natu size_t len; 1399b1aa8d6SNeel Natu bool sysmem; 1409b1aa8d6SNeel Natu struct vm_object *object; 1419b1aa8d6SNeel Natu }; 142e47fe318SCorvin Köhne #define VM_MAX_MEMSEGS 4 1439b1aa8d6SNeel Natu 1449b1aa8d6SNeel Natu struct mem_map { 145318224bbSNeel Natu vm_paddr_t gpa; 146318224bbSNeel Natu size_t len; 1479b1aa8d6SNeel Natu vm_ooffset_t segoff; 1489b1aa8d6SNeel Natu int segid; 1499b1aa8d6SNeel Natu int prot; 1509b1aa8d6SNeel Natu int flags; 151318224bbSNeel Natu }; 15200d3723fSConrad Meyer #define VM_MAX_MEMMAPS 8 153366f6083SPeter Grehan 154366f6083SPeter Grehan /* 1555fcf252fSNeel Natu * Initialization: 1565fcf252fSNeel Natu * (o) initialized the first time the VM is created 1575fcf252fSNeel Natu * (i) initialized when VM is created and when it is reinitialized 1585fcf252fSNeel Natu * (x) initialized before use 15967b69e76SJohn Baldwin * 16067b69e76SJohn Baldwin * Locking: 16167b69e76SJohn Baldwin * [m] mem_segs_lock 16267b69e76SJohn Baldwin * [r] rendezvous_mtx 16367b69e76SJohn Baldwin * [v] reads require one frozen vcpu, writes require freezing all vcpus 164366f6083SPeter Grehan */ 1655fcf252fSNeel Natu struct vm { 1665fcf252fSNeel Natu void *cookie; /* (i) cpu-specific data */ 1675fcf252fSNeel Natu void *iommu; /* (x) iommu-specific data */ 1685fcf252fSNeel Natu struct vhpet *vhpet; /* (i) virtual HPET */ 1695fcf252fSNeel Natu struct vioapic *vioapic; /* (i) virtual ioapic */ 1705fcf252fSNeel Natu struct vatpic *vatpic; /* (i) virtual atpic */ 1715fcf252fSNeel Natu struct vatpit *vatpit; /* (i) virtual atpit */ 172160ef77aSNeel Natu struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ 1730dafa5cdSNeel Natu struct vrtc *vrtc; /* (o) virtual RTC */ 1745fcf252fSNeel Natu volatile cpuset_t active_cpus; /* (i) active vcpus */ 175fc276d92SJohn Baldwin volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 176*c0f35dbfSJohn Baldwin cpuset_t startup_cpus; /* (i) [r] waiting for startup */ 1775fcf252fSNeel Natu int suspend; /* (i) stop VM execution */ 1785fcf252fSNeel Natu volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 1795fcf252fSNeel Natu volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 18067b69e76SJohn Baldwin cpuset_t rendezvous_req_cpus; /* (x) [r] rendezvous requested */ 18167b69e76SJohn Baldwin cpuset_t rendezvous_done_cpus; /* (x) [r] rendezvous finished */ 18267b69e76SJohn Baldwin void *rendezvous_arg; /* (x) [r] rendezvous func/arg */ 1835b8a8cd1SNeel Natu vm_rendezvous_func_t rendezvous_func; 1845fcf252fSNeel Natu struct mtx rendezvous_mtx; /* (o) rendezvous lock */ 18567b69e76SJohn Baldwin struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) [m+v] guest address space */ 18667b69e76SJohn Baldwin struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */ 1875fcf252fSNeel Natu struct vmspace *vmspace; /* (o) guest's address space */ 188df95cc76SKa Ho Ng char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ 1895fcf252fSNeel Natu struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ 19001d822d3SRodney W. Grimes /* The following describe the vm cpu topology */ 19101d822d3SRodney W. Grimes uint16_t sockets; /* (o) num of sockets */ 19201d822d3SRodney W. Grimes uint16_t cores; /* (o) num of cores/socket */ 19301d822d3SRodney W. Grimes uint16_t threads; /* (o) num of threads/core */ 19401d822d3SRodney W. Grimes uint16_t maxcpus; /* (o) max pluggable cpus */ 19567b69e76SJohn Baldwin struct sx mem_segs_lock; /* (o) */ 196366f6083SPeter Grehan }; 197366f6083SPeter Grehan 198950af9ffSJohn Baldwin #define VMM_CTR0(vcpu, format) \ 199950af9ffSJohn Baldwin VCPU_CTR0((vcpu)->vm, (vcpu)->vcpuid, format) 200950af9ffSJohn Baldwin 201950af9ffSJohn Baldwin #define VMM_CTR1(vcpu, format, p1) \ 202950af9ffSJohn Baldwin VCPU_CTR1((vcpu)->vm, (vcpu)->vcpuid, format, p1) 203950af9ffSJohn Baldwin 204950af9ffSJohn Baldwin #define VMM_CTR2(vcpu, format, p1, p2) \ 205950af9ffSJohn Baldwin VCPU_CTR2((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2) 206950af9ffSJohn Baldwin 207950af9ffSJohn Baldwin #define VMM_CTR3(vcpu, format, p1, p2, p3) \ 208950af9ffSJohn Baldwin VCPU_CTR3((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3) 209950af9ffSJohn Baldwin 210950af9ffSJohn Baldwin #define VMM_CTR4(vcpu, format, p1, p2, p3, p4) \ 211950af9ffSJohn Baldwin VCPU_CTR4((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3, p4) 212950af9ffSJohn Baldwin 213d5408b1dSNeel Natu static int vmm_initialized; 214d5408b1dSNeel Natu 21515add60dSPeter Grehan static void vmmops_panic(void); 216366f6083SPeter Grehan 21715add60dSPeter Grehan static void 21815add60dSPeter Grehan vmmops_panic(void) 21915add60dSPeter Grehan { 22015add60dSPeter Grehan panic("vmm_ops func called when !vmm_is_intel() && !vmm_is_svm()"); 22115add60dSPeter Grehan } 22215add60dSPeter Grehan 22315add60dSPeter Grehan #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ 22415add60dSPeter Grehan DEFINE_IFUNC(static, ret_type, vmmops_##opname, args) \ 22515add60dSPeter Grehan { \ 22615add60dSPeter Grehan if (vmm_is_intel()) \ 22715add60dSPeter Grehan return (vmm_ops_intel.opname); \ 22815add60dSPeter Grehan else if (vmm_is_svm()) \ 22915add60dSPeter Grehan return (vmm_ops_amd.opname); \ 23015add60dSPeter Grehan else \ 23115add60dSPeter Grehan return ((ret_type (*)args)vmmops_panic); \ 23215add60dSPeter Grehan } 23315add60dSPeter Grehan 23415add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum)) 23515add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) 23615add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, modresume, (void)) 23715add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) 238869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t rip, struct pmap *pmap, 239869c8d19SJohn Baldwin struct vm_eventinfo *info)) 24015add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) 241950af9ffSJohn Baldwin DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, 242950af9ffSJohn Baldwin int vcpu_id)) 243869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) 244869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) 245869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) 246869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, getdesc, (void *vcpui, int num, struct seg_desc *desc)) 247869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, setdesc, (void *vcpui, int num, struct seg_desc *desc)) 248869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) 249869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) 25015add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, 25115add60dSPeter Grehan vm_offset_t max)) 25215add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) 253869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(struct vlapic *, vlapic_init, (void *vcpui)) 254869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(void, vlapic_cleanup, (struct vlapic *vlapic)) 255483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 256869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta)) 257869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui, 258869c8d19SJohn Baldwin struct vm_snapshot_meta *meta)) 259869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now)) 260483d953aSJohn Baldwin #endif 261366f6083SPeter Grehan 262014a52f3SNeel Natu #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 263014a52f3SNeel Natu #define fpu_stop_emulating() clts() 264366f6083SPeter Grehan 2656ac73777STycho Nightingale SDT_PROVIDER_DEFINE(vmm); 2666ac73777STycho Nightingale 267366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm"); 268366f6083SPeter Grehan 269366f6083SPeter Grehan /* statistics */ 27061592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 271366f6083SPeter Grehan 272b40598c5SPawel Biernacki SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 273b40598c5SPawel Biernacki NULL); 274add611fdSNeel Natu 275055fc2cbSNeel Natu /* 276055fc2cbSNeel Natu * Halt the guest if all vcpus are executing a HLT instruction with 277055fc2cbSNeel Natu * interrupts disabled. 278055fc2cbSNeel Natu */ 279055fc2cbSNeel Natu static int halt_detection_enabled = 1; 280055fc2cbSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 281055fc2cbSNeel Natu &halt_detection_enabled, 0, 282055fc2cbSNeel Natu "Halt VM if all vcpus execute HLT with interrupts disabled"); 283055fc2cbSNeel Natu 284978f3da1SAndriy Gapon static int vmm_ipinum; 285add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 286add611fdSNeel Natu "IPI vector used for vcpu notifications"); 287add611fdSNeel Natu 288b0538143SNeel Natu static int trace_guest_exceptions; 289b0538143SNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, 290b0538143SNeel Natu &trace_guest_exceptions, 0, 291b0538143SNeel Natu "Trap into hypervisor on all guest exceptions and reflect them back"); 292b0538143SNeel Natu 2933ba952e1SCorvin Köhne static int trap_wbinvd; 2943ba952e1SCorvin Köhne SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0, 2953ba952e1SCorvin Köhne "WBINVD triggers a VM-exit"); 2963ba952e1SCorvin Köhne 2979b1aa8d6SNeel Natu static void vm_free_memmap(struct vm *vm, int ident); 2989b1aa8d6SNeel Natu static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 299248e6799SNeel Natu static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); 300248e6799SNeel Natu 301248e6799SNeel Natu #ifdef KTR 302248e6799SNeel Natu static const char * 303248e6799SNeel Natu vcpu_state2str(enum vcpu_state state) 304248e6799SNeel Natu { 305248e6799SNeel Natu 306248e6799SNeel Natu switch (state) { 307248e6799SNeel Natu case VCPU_IDLE: 308248e6799SNeel Natu return ("idle"); 309248e6799SNeel Natu case VCPU_FROZEN: 310248e6799SNeel Natu return ("frozen"); 311248e6799SNeel Natu case VCPU_RUNNING: 312248e6799SNeel Natu return ("running"); 313248e6799SNeel Natu case VCPU_SLEEPING: 314248e6799SNeel Natu return ("sleeping"); 315248e6799SNeel Natu default: 316248e6799SNeel Natu return ("unknown"); 317248e6799SNeel Natu } 318248e6799SNeel Natu } 319248e6799SNeel Natu #endif 320248e6799SNeel Natu 321366f6083SPeter Grehan static void 3225fcf252fSNeel Natu vcpu_cleanup(struct vm *vm, int i, bool destroy) 323366f6083SPeter Grehan { 324de5ea6b6SNeel Natu struct vcpu *vcpu = &vm->vcpu[i]; 325de5ea6b6SNeel Natu 326869c8d19SJohn Baldwin vmmops_vlapic_cleanup(vcpu->vlapic); 327869c8d19SJohn Baldwin vmmops_vcpu_cleanup(vcpu->cookie); 3281aa51504SJohn Baldwin vcpu->cookie = NULL; 3295fcf252fSNeel Natu if (destroy) { 330366f6083SPeter Grehan vmm_stat_free(vcpu->stats); 33138f1b189SPeter Grehan fpu_save_area_free(vcpu->guestfpu); 33208ebb360SJohn Baldwin vcpu_lock_destroy(vcpu); 333366f6083SPeter Grehan } 3345fcf252fSNeel Natu } 335366f6083SPeter Grehan 336366f6083SPeter Grehan static void 3375fcf252fSNeel Natu vcpu_init(struct vm *vm, int vcpu_id, bool create) 338366f6083SPeter Grehan { 339366f6083SPeter Grehan struct vcpu *vcpu; 340366f6083SPeter Grehan 341a488c9c9SRodney W. Grimes KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 3425fcf252fSNeel Natu ("vcpu_init: invalid vcpu %d", vcpu_id)); 3435fcf252fSNeel Natu 344366f6083SPeter Grehan vcpu = &vm->vcpu[vcpu_id]; 345366f6083SPeter Grehan 3465fcf252fSNeel Natu if (create) { 3475fcf252fSNeel Natu KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " 3485fcf252fSNeel Natu "initialized", vcpu_id)); 34975dd3366SNeel Natu vcpu_lock_init(vcpu); 3505fcf252fSNeel Natu vcpu->state = VCPU_IDLE; 35175dd3366SNeel Natu vcpu->hostcpu = NOCPU; 352950af9ffSJohn Baldwin vcpu->vcpuid = vcpu_id; 353950af9ffSJohn Baldwin vcpu->vm = vm; 3545fcf252fSNeel Natu vcpu->guestfpu = fpu_save_area_alloc(); 3555fcf252fSNeel Natu vcpu->stats = vmm_stat_alloc(); 356483d953aSJohn Baldwin vcpu->tsc_offset = 0; 3575fcf252fSNeel Natu } 3585fcf252fSNeel Natu 359950af9ffSJohn Baldwin vcpu->cookie = vmmops_vcpu_init(vm->cookie, vcpu, vcpu_id); 360869c8d19SJohn Baldwin vcpu->vlapic = vmmops_vlapic_init(vcpu->cookie); 3613f0f4b15SJohn Baldwin vm_set_x2apic_state(vcpu, X2APIC_DISABLED); 362248e6799SNeel Natu vcpu->reqidle = 0; 363091d4532SNeel Natu vcpu->exitintinfo = 0; 3645fcf252fSNeel Natu vcpu->nmi_pending = 0; 3655fcf252fSNeel Natu vcpu->extint_pending = 0; 3665fcf252fSNeel Natu vcpu->exception_pending = 0; 367abb023fbSJohn Baldwin vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 36838f1b189SPeter Grehan fpu_save_area_reset(vcpu->guestfpu); 3695fcf252fSNeel Natu vmm_stat_init(vcpu->stats); 370366f6083SPeter Grehan } 371366f6083SPeter Grehan 372b0538143SNeel Natu int 37380cb5d84SJohn Baldwin vcpu_trace_exceptions(struct vcpu *vcpu) 374b0538143SNeel Natu { 375b0538143SNeel Natu 376b0538143SNeel Natu return (trace_guest_exceptions); 377b0538143SNeel Natu } 378b0538143SNeel Natu 3793ba952e1SCorvin Köhne int 38080cb5d84SJohn Baldwin vcpu_trap_wbinvd(struct vcpu *vcpu) 3813ba952e1SCorvin Köhne { 3823ba952e1SCorvin Köhne return (trap_wbinvd); 3833ba952e1SCorvin Köhne } 3843ba952e1SCorvin Köhne 38598ed632cSNeel Natu struct vm_exit * 38680cb5d84SJohn Baldwin vm_exitinfo(struct vcpu *vcpu) 38798ed632cSNeel Natu { 38898ed632cSNeel Natu return (&vcpu->exitinfo); 38998ed632cSNeel Natu } 39098ed632cSNeel Natu 391366f6083SPeter Grehan static int 392366f6083SPeter Grehan vmm_init(void) 393366f6083SPeter Grehan { 394366f6083SPeter Grehan int error; 395366f6083SPeter Grehan 39615add60dSPeter Grehan if (!vmm_is_hw_supported()) 39715add60dSPeter Grehan return (ENXIO); 39815add60dSPeter Grehan 399b01c2033SNeel Natu vmm_host_state_init(); 400add611fdSNeel Natu 401bd50262fSKonstantin Belousov vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) : 402bd50262fSKonstantin Belousov &IDTVEC(justreturn)); 40318a2b08eSNeel Natu if (vmm_ipinum < 0) 404add611fdSNeel Natu vmm_ipinum = IPI_AST; 405366f6083SPeter Grehan 406366f6083SPeter Grehan error = vmm_mem_init(); 407366f6083SPeter Grehan if (error) 408366f6083SPeter Grehan return (error); 409366f6083SPeter Grehan 41015add60dSPeter Grehan vmm_resume_p = vmmops_modresume; 411366f6083SPeter Grehan 41215add60dSPeter Grehan return (vmmops_modinit(vmm_ipinum)); 413366f6083SPeter Grehan } 414366f6083SPeter Grehan 415366f6083SPeter Grehan static int 416366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg) 417366f6083SPeter Grehan { 418366f6083SPeter Grehan int error; 419366f6083SPeter Grehan 420366f6083SPeter Grehan switch (what) { 421366f6083SPeter Grehan case MOD_LOAD: 42215add60dSPeter Grehan if (vmm_is_hw_supported()) { 423366f6083SPeter Grehan vmmdev_init(); 424366f6083SPeter Grehan error = vmm_init(); 425d5408b1dSNeel Natu if (error == 0) 426d5408b1dSNeel Natu vmm_initialized = 1; 42715add60dSPeter Grehan } else { 42815add60dSPeter Grehan error = ENXIO; 42915add60dSPeter Grehan } 430366f6083SPeter Grehan break; 431366f6083SPeter Grehan case MOD_UNLOAD: 43215add60dSPeter Grehan if (vmm_is_hw_supported()) { 433cdc5b9e7SNeel Natu error = vmmdev_cleanup(); 434cdc5b9e7SNeel Natu if (error == 0) { 43563e62d39SJohn Baldwin vmm_resume_p = NULL; 436366f6083SPeter Grehan iommu_cleanup(); 437add611fdSNeel Natu if (vmm_ipinum != IPI_AST) 43818a2b08eSNeel Natu lapic_ipi_free(vmm_ipinum); 43915add60dSPeter Grehan error = vmmops_modcleanup(); 44081ef6611SPeter Grehan /* 44181ef6611SPeter Grehan * Something bad happened - prevent new 44281ef6611SPeter Grehan * VMs from being created 44381ef6611SPeter Grehan */ 44481ef6611SPeter Grehan if (error) 445d5408b1dSNeel Natu vmm_initialized = 0; 44681ef6611SPeter Grehan } 44715add60dSPeter Grehan } else { 44815add60dSPeter Grehan error = 0; 44915add60dSPeter Grehan } 450366f6083SPeter Grehan break; 451366f6083SPeter Grehan default: 452366f6083SPeter Grehan error = 0; 453366f6083SPeter Grehan break; 454366f6083SPeter Grehan } 455366f6083SPeter Grehan return (error); 456366f6083SPeter Grehan } 457366f6083SPeter Grehan 458366f6083SPeter Grehan static moduledata_t vmm_kmod = { 459366f6083SPeter Grehan "vmm", 460366f6083SPeter Grehan vmm_handler, 461366f6083SPeter Grehan NULL 462366f6083SPeter Grehan }; 463366f6083SPeter Grehan 464366f6083SPeter Grehan /* 465e3f0800bSNeel Natu * vmm initialization has the following dependencies: 466e3f0800bSNeel Natu * 467e3f0800bSNeel Natu * - VT-x initialization requires smp_rendezvous() and therefore must happen 468e3f0800bSNeel Natu * after SMP is fully functional (after SI_SUB_SMP). 469366f6083SPeter Grehan */ 470e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 471366f6083SPeter Grehan MODULE_VERSION(vmm, 1); 472366f6083SPeter Grehan 4735fcf252fSNeel Natu static void 4745fcf252fSNeel Natu vm_init(struct vm *vm, bool create) 4755fcf252fSNeel Natu { 4765fcf252fSNeel Natu int i; 4775fcf252fSNeel Natu 47815add60dSPeter Grehan vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 4795fcf252fSNeel Natu vm->iommu = NULL; 4805fcf252fSNeel Natu vm->vioapic = vioapic_init(vm); 4815fcf252fSNeel Natu vm->vhpet = vhpet_init(vm); 4825fcf252fSNeel Natu vm->vatpic = vatpic_init(vm); 4835fcf252fSNeel Natu vm->vatpit = vatpit_init(vm); 484160ef77aSNeel Natu vm->vpmtmr = vpmtmr_init(vm); 4850dafa5cdSNeel Natu if (create) 4860dafa5cdSNeel Natu vm->vrtc = vrtc_init(vm); 4875fcf252fSNeel Natu 4885fcf252fSNeel Natu CPU_ZERO(&vm->active_cpus); 489fc276d92SJohn Baldwin CPU_ZERO(&vm->debug_cpus); 490*c0f35dbfSJohn Baldwin CPU_ZERO(&vm->startup_cpus); 4915fcf252fSNeel Natu 4925fcf252fSNeel Natu vm->suspend = 0; 4935fcf252fSNeel Natu CPU_ZERO(&vm->suspended_cpus); 4945fcf252fSNeel Natu 495a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) 4965fcf252fSNeel Natu vcpu_init(vm, i, create); 4975fcf252fSNeel Natu } 4985fcf252fSNeel Natu 49901d822d3SRodney W. Grimes /* 50001d822d3SRodney W. Grimes * The default CPU topology is a single thread per package. 50101d822d3SRodney W. Grimes */ 50201d822d3SRodney W. Grimes u_int cores_per_package = 1; 50301d822d3SRodney W. Grimes u_int threads_per_core = 1; 50401d822d3SRodney W. Grimes 505d5408b1dSNeel Natu int 506d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm) 507366f6083SPeter Grehan { 508366f6083SPeter Grehan struct vm *vm; 509318224bbSNeel Natu struct vmspace *vmspace; 510366f6083SPeter Grehan 511d5408b1dSNeel Natu /* 512d5408b1dSNeel Natu * If vmm.ko could not be successfully initialized then don't attempt 513d5408b1dSNeel Natu * to create the virtual machine. 514d5408b1dSNeel Natu */ 515d5408b1dSNeel Natu if (!vmm_initialized) 516d5408b1dSNeel Natu return (ENXIO); 517d5408b1dSNeel Natu 518df95cc76SKa Ho Ng if (name == NULL || strnlen(name, VM_MAX_NAMELEN + 1) == 519df95cc76SKa Ho Ng VM_MAX_NAMELEN + 1) 520d5408b1dSNeel Natu return (EINVAL); 521366f6083SPeter Grehan 5223c48106aSKonstantin Belousov vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48); 523318224bbSNeel Natu if (vmspace == NULL) 524318224bbSNeel Natu return (ENOMEM); 525318224bbSNeel Natu 526366f6083SPeter Grehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 527366f6083SPeter Grehan strcpy(vm->name, name); 52888c4b8d1SNeel Natu vm->vmspace = vmspace; 5295b8a8cd1SNeel Natu mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 53067b69e76SJohn Baldwin sx_init(&vm->mem_segs_lock, "vm mem_segs"); 531366f6083SPeter Grehan 53201d822d3SRodney W. Grimes vm->sockets = 1; 53301d822d3SRodney W. Grimes vm->cores = cores_per_package; /* XXX backwards compatibility */ 53401d822d3SRodney W. Grimes vm->threads = threads_per_core; /* XXX backwards compatibility */ 535a488c9c9SRodney W. Grimes vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ 53601d822d3SRodney W. Grimes 5375fcf252fSNeel Natu vm_init(vm, true); 538366f6083SPeter Grehan 539d5408b1dSNeel Natu *retvm = vm; 540d5408b1dSNeel Natu return (0); 541366f6083SPeter Grehan } 542366f6083SPeter Grehan 54301d822d3SRodney W. Grimes void 54401d822d3SRodney W. Grimes vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 54501d822d3SRodney W. Grimes uint16_t *threads, uint16_t *maxcpus) 54601d822d3SRodney W. Grimes { 54701d822d3SRodney W. Grimes *sockets = vm->sockets; 54801d822d3SRodney W. Grimes *cores = vm->cores; 54901d822d3SRodney W. Grimes *threads = vm->threads; 55001d822d3SRodney W. Grimes *maxcpus = vm->maxcpus; 55101d822d3SRodney W. Grimes } 55201d822d3SRodney W. Grimes 553a488c9c9SRodney W. Grimes uint16_t 554a488c9c9SRodney W. Grimes vm_get_maxcpus(struct vm *vm) 555a488c9c9SRodney W. Grimes { 556a488c9c9SRodney W. Grimes return (vm->maxcpus); 557a488c9c9SRodney W. Grimes } 558a488c9c9SRodney W. Grimes 55901d822d3SRodney W. Grimes int 56001d822d3SRodney W. Grimes vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 56101d822d3SRodney W. Grimes uint16_t threads, uint16_t maxcpus) 56201d822d3SRodney W. Grimes { 56301d822d3SRodney W. Grimes if (maxcpus != 0) 56401d822d3SRodney W. Grimes return (EINVAL); /* XXX remove when supported */ 565a488c9c9SRodney W. Grimes if ((sockets * cores * threads) > vm->maxcpus) 56601d822d3SRodney W. Grimes return (EINVAL); 56701d822d3SRodney W. Grimes /* XXX need to check sockets * cores * threads == vCPU, how? */ 56801d822d3SRodney W. Grimes vm->sockets = sockets; 56901d822d3SRodney W. Grimes vm->cores = cores; 57001d822d3SRodney W. Grimes vm->threads = threads; 571a488c9c9SRodney W. Grimes vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ 57201d822d3SRodney W. Grimes return(0); 57301d822d3SRodney W. Grimes } 57401d822d3SRodney W. Grimes 575f7d51510SNeel Natu static void 5765fcf252fSNeel Natu vm_cleanup(struct vm *vm, bool destroy) 577366f6083SPeter Grehan { 5789b1aa8d6SNeel Natu struct mem_map *mm; 579366f6083SPeter Grehan int i; 580366f6083SPeter Grehan 581366f6083SPeter Grehan ppt_unassign_all(vm); 582366f6083SPeter Grehan 583318224bbSNeel Natu if (vm->iommu != NULL) 584318224bbSNeel Natu iommu_destroy_domain(vm->iommu); 585318224bbSNeel Natu 5860dafa5cdSNeel Natu if (destroy) 5870dafa5cdSNeel Natu vrtc_cleanup(vm->vrtc); 5880dafa5cdSNeel Natu else 5890dafa5cdSNeel Natu vrtc_reset(vm->vrtc); 590160ef77aSNeel Natu vpmtmr_cleanup(vm->vpmtmr); 591e883c9bbSTycho Nightingale vatpit_cleanup(vm->vatpit); 59208e3ff32SNeel Natu vhpet_cleanup(vm->vhpet); 593762fd208STycho Nightingale vatpic_cleanup(vm->vatpic); 59408e3ff32SNeel Natu vioapic_cleanup(vm->vioapic); 59508e3ff32SNeel Natu 596a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) 5975fcf252fSNeel Natu vcpu_cleanup(vm, i, destroy); 5985fcf252fSNeel Natu 59915add60dSPeter Grehan vmmops_cleanup(vm->cookie); 6005fcf252fSNeel Natu 6019b1aa8d6SNeel Natu /* 6029b1aa8d6SNeel Natu * System memory is removed from the guest address space only when 6039b1aa8d6SNeel Natu * the VM is destroyed. This is because the mapping remains the same 6049b1aa8d6SNeel Natu * across VM reset. 6059b1aa8d6SNeel Natu * 6069b1aa8d6SNeel Natu * Device memory can be relocated by the guest (e.g. using PCI BARs) 6079b1aa8d6SNeel Natu * so those mappings are removed on a VM reset. 6089b1aa8d6SNeel Natu */ 6099b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 6109b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 6119b1aa8d6SNeel Natu if (destroy || !sysmem_mapping(vm, mm)) 6129b1aa8d6SNeel Natu vm_free_memmap(vm, i); 6139b1aa8d6SNeel Natu } 614f7d51510SNeel Natu 6159b1aa8d6SNeel Natu if (destroy) { 6169b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMSEGS; i++) 6179b1aa8d6SNeel Natu vm_free_memseg(vm, i); 618366f6083SPeter Grehan 61915add60dSPeter Grehan vmmops_vmspace_free(vm->vmspace); 6205fcf252fSNeel Natu vm->vmspace = NULL; 62108ebb360SJohn Baldwin 62267b69e76SJohn Baldwin sx_destroy(&vm->mem_segs_lock); 62308ebb360SJohn Baldwin mtx_destroy(&vm->rendezvous_mtx); 6245fcf252fSNeel Natu } 6255fcf252fSNeel Natu } 626366f6083SPeter Grehan 6275fcf252fSNeel Natu void 6285fcf252fSNeel Natu vm_destroy(struct vm *vm) 6295fcf252fSNeel Natu { 6305fcf252fSNeel Natu vm_cleanup(vm, true); 631366f6083SPeter Grehan free(vm, M_VM); 632366f6083SPeter Grehan } 633366f6083SPeter Grehan 6345fcf252fSNeel Natu int 6355fcf252fSNeel Natu vm_reinit(struct vm *vm) 6365fcf252fSNeel Natu { 6375fcf252fSNeel Natu int error; 6385fcf252fSNeel Natu 6395fcf252fSNeel Natu /* 6405fcf252fSNeel Natu * A virtual machine can be reset only if all vcpus are suspended. 6415fcf252fSNeel Natu */ 6425fcf252fSNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 6435fcf252fSNeel Natu vm_cleanup(vm, false); 6445fcf252fSNeel Natu vm_init(vm, false); 6455fcf252fSNeel Natu error = 0; 6465fcf252fSNeel Natu } else { 6475fcf252fSNeel Natu error = EBUSY; 6485fcf252fSNeel Natu } 6495fcf252fSNeel Natu 6505fcf252fSNeel Natu return (error); 6515fcf252fSNeel Natu } 6525fcf252fSNeel Natu 653366f6083SPeter Grehan const char * 654366f6083SPeter Grehan vm_name(struct vm *vm) 655366f6083SPeter Grehan { 656366f6083SPeter Grehan return (vm->name); 657366f6083SPeter Grehan } 658366f6083SPeter Grehan 65967b69e76SJohn Baldwin void 66067b69e76SJohn Baldwin vm_slock_memsegs(struct vm *vm) 66167b69e76SJohn Baldwin { 66267b69e76SJohn Baldwin sx_slock(&vm->mem_segs_lock); 66367b69e76SJohn Baldwin } 66467b69e76SJohn Baldwin 66567b69e76SJohn Baldwin void 66667b69e76SJohn Baldwin vm_xlock_memsegs(struct vm *vm) 66767b69e76SJohn Baldwin { 66867b69e76SJohn Baldwin sx_xlock(&vm->mem_segs_lock); 66967b69e76SJohn Baldwin } 67067b69e76SJohn Baldwin 67167b69e76SJohn Baldwin void 67267b69e76SJohn Baldwin vm_unlock_memsegs(struct vm *vm) 67367b69e76SJohn Baldwin { 67467b69e76SJohn Baldwin sx_unlock(&vm->mem_segs_lock); 67567b69e76SJohn Baldwin } 67667b69e76SJohn Baldwin 677366f6083SPeter Grehan int 678366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 679366f6083SPeter Grehan { 680318224bbSNeel Natu vm_object_t obj; 681366f6083SPeter Grehan 682318224bbSNeel Natu if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 683318224bbSNeel Natu return (ENOMEM); 684318224bbSNeel Natu else 685318224bbSNeel Natu return (0); 686366f6083SPeter Grehan } 687366f6083SPeter Grehan 688366f6083SPeter Grehan int 689366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 690366f6083SPeter Grehan { 691366f6083SPeter Grehan 692318224bbSNeel Natu vmm_mmio_free(vm->vmspace, gpa, len); 693318224bbSNeel Natu return (0); 694366f6083SPeter Grehan } 695366f6083SPeter Grehan 6969b1aa8d6SNeel Natu /* 6979b1aa8d6SNeel Natu * Return 'true' if 'gpa' is allocated in the guest address space. 6989b1aa8d6SNeel Natu * 6999b1aa8d6SNeel Natu * This function is called in the context of a running vcpu which acts as 7009b1aa8d6SNeel Natu * an implicit lock on 'vm->mem_maps[]'. 7019b1aa8d6SNeel Natu */ 7029b1aa8d6SNeel Natu bool 70380cb5d84SJohn Baldwin vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 704366f6083SPeter Grehan { 70580cb5d84SJohn Baldwin struct vm *vm = vcpu->vm; 7069b1aa8d6SNeel Natu struct mem_map *mm; 707341f19c9SNeel Natu int i; 708341f19c9SNeel Natu 7099b1aa8d6SNeel Natu #ifdef INVARIANTS 7109b1aa8d6SNeel Natu int hostcpu, state; 71180cb5d84SJohn Baldwin state = vcpu_get_state(vcpu, &hostcpu); 7129b1aa8d6SNeel Natu KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 7139b1aa8d6SNeel Natu ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 7149b1aa8d6SNeel Natu #endif 7159b1aa8d6SNeel Natu 7169b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 7179b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 7189b1aa8d6SNeel Natu if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 7199b1aa8d6SNeel Natu return (true); /* 'gpa' is sysmem or devmem */ 720341f19c9SNeel Natu } 721341f19c9SNeel Natu 722318224bbSNeel Natu if (ppt_is_mmio(vm, gpa)) 7239b1aa8d6SNeel Natu return (true); /* 'gpa' is pci passthru mmio */ 724318224bbSNeel Natu 7259b1aa8d6SNeel Natu return (false); 726341f19c9SNeel Natu } 727341f19c9SNeel Natu 728341f19c9SNeel Natu int 7299b1aa8d6SNeel Natu vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 730341f19c9SNeel Natu { 731318224bbSNeel Natu struct mem_seg *seg; 7329b1aa8d6SNeel Natu vm_object_t obj; 733366f6083SPeter Grehan 73467b69e76SJohn Baldwin sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 73567b69e76SJohn Baldwin 7369b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 737341f19c9SNeel Natu return (EINVAL); 738341f19c9SNeel Natu 7399b1aa8d6SNeel Natu if (len == 0 || (len & PAGE_MASK)) 7409b1aa8d6SNeel Natu return (EINVAL); 741341f19c9SNeel Natu 7429b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 7439b1aa8d6SNeel Natu if (seg->object != NULL) { 7449b1aa8d6SNeel Natu if (seg->len == len && seg->sysmem == sysmem) 7459b1aa8d6SNeel Natu return (EEXIST); 7469b1aa8d6SNeel Natu else 7479b1aa8d6SNeel Natu return (EINVAL); 748341f19c9SNeel Natu } 749341f19c9SNeel Natu 7506b389740SMark Johnston obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT); 7519b1aa8d6SNeel Natu if (obj == NULL) 752318224bbSNeel Natu return (ENOMEM); 753318224bbSNeel Natu 754318224bbSNeel Natu seg->len = len; 7559b1aa8d6SNeel Natu seg->object = obj; 7569b1aa8d6SNeel Natu seg->sysmem = sysmem; 757366f6083SPeter Grehan return (0); 758366f6083SPeter Grehan } 759366f6083SPeter Grehan 7609b1aa8d6SNeel Natu int 7619b1aa8d6SNeel Natu vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 7629b1aa8d6SNeel Natu vm_object_t *objptr) 763477867a0SNeel Natu { 7649b1aa8d6SNeel Natu struct mem_seg *seg; 765477867a0SNeel Natu 76667b69e76SJohn Baldwin sx_assert(&vm->mem_segs_lock, SX_LOCKED); 76767b69e76SJohn Baldwin 7689b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 7699b1aa8d6SNeel Natu return (EINVAL); 7709b1aa8d6SNeel Natu 7719b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 7729b1aa8d6SNeel Natu if (len) 7739b1aa8d6SNeel Natu *len = seg->len; 7749b1aa8d6SNeel Natu if (sysmem) 7759b1aa8d6SNeel Natu *sysmem = seg->sysmem; 7769b1aa8d6SNeel Natu if (objptr) 7779b1aa8d6SNeel Natu *objptr = seg->object; 7789b1aa8d6SNeel Natu return (0); 779477867a0SNeel Natu } 7809b1aa8d6SNeel Natu 7819b1aa8d6SNeel Natu void 7829b1aa8d6SNeel Natu vm_free_memseg(struct vm *vm, int ident) 7839b1aa8d6SNeel Natu { 7849b1aa8d6SNeel Natu struct mem_seg *seg; 7859b1aa8d6SNeel Natu 7869b1aa8d6SNeel Natu KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 7879b1aa8d6SNeel Natu ("%s: invalid memseg ident %d", __func__, ident)); 7889b1aa8d6SNeel Natu 7899b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 7909b1aa8d6SNeel Natu if (seg->object != NULL) { 7919b1aa8d6SNeel Natu vm_object_deallocate(seg->object); 7929b1aa8d6SNeel Natu bzero(seg, sizeof(struct mem_seg)); 7939b1aa8d6SNeel Natu } 7949b1aa8d6SNeel Natu } 7959b1aa8d6SNeel Natu 7969b1aa8d6SNeel Natu int 7979b1aa8d6SNeel Natu vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 7989b1aa8d6SNeel Natu size_t len, int prot, int flags) 7999b1aa8d6SNeel Natu { 8009b1aa8d6SNeel Natu struct mem_seg *seg; 8019b1aa8d6SNeel Natu struct mem_map *m, *map; 8029b1aa8d6SNeel Natu vm_ooffset_t last; 8039b1aa8d6SNeel Natu int i, error; 8049b1aa8d6SNeel Natu 8059b1aa8d6SNeel Natu if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 8069b1aa8d6SNeel Natu return (EINVAL); 8079b1aa8d6SNeel Natu 8089b1aa8d6SNeel Natu if (flags & ~VM_MEMMAP_F_WIRED) 8099b1aa8d6SNeel Natu return (EINVAL); 8109b1aa8d6SNeel Natu 8119b1aa8d6SNeel Natu if (segid < 0 || segid >= VM_MAX_MEMSEGS) 8129b1aa8d6SNeel Natu return (EINVAL); 8139b1aa8d6SNeel Natu 8149b1aa8d6SNeel Natu seg = &vm->mem_segs[segid]; 8159b1aa8d6SNeel Natu if (seg->object == NULL) 8169b1aa8d6SNeel Natu return (EINVAL); 8179b1aa8d6SNeel Natu 8189b1aa8d6SNeel Natu last = first + len; 8199b1aa8d6SNeel Natu if (first < 0 || first >= last || last > seg->len) 8209b1aa8d6SNeel Natu return (EINVAL); 8219b1aa8d6SNeel Natu 8229b1aa8d6SNeel Natu if ((gpa | first | last) & PAGE_MASK) 8239b1aa8d6SNeel Natu return (EINVAL); 8249b1aa8d6SNeel Natu 8259b1aa8d6SNeel Natu map = NULL; 8269b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 8279b1aa8d6SNeel Natu m = &vm->mem_maps[i]; 8289b1aa8d6SNeel Natu if (m->len == 0) { 8299b1aa8d6SNeel Natu map = m; 8309b1aa8d6SNeel Natu break; 8319b1aa8d6SNeel Natu } 8329b1aa8d6SNeel Natu } 8339b1aa8d6SNeel Natu 8349b1aa8d6SNeel Natu if (map == NULL) 8359b1aa8d6SNeel Natu return (ENOSPC); 8369b1aa8d6SNeel Natu 8379b1aa8d6SNeel Natu error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 8389b1aa8d6SNeel Natu len, 0, VMFS_NO_SPACE, prot, prot, 0); 8399b1aa8d6SNeel Natu if (error != KERN_SUCCESS) 8409b1aa8d6SNeel Natu return (EFAULT); 8419b1aa8d6SNeel Natu 8429b1aa8d6SNeel Natu vm_object_reference(seg->object); 8439b1aa8d6SNeel Natu 8449b1aa8d6SNeel Natu if (flags & VM_MEMMAP_F_WIRED) { 8459b1aa8d6SNeel Natu error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 8469b1aa8d6SNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 8479b1aa8d6SNeel Natu if (error != KERN_SUCCESS) { 8489b1aa8d6SNeel Natu vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 84954a3a114SMark Johnston return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 85054a3a114SMark Johnston EFAULT); 8519b1aa8d6SNeel Natu } 8529b1aa8d6SNeel Natu } 8539b1aa8d6SNeel Natu 8549b1aa8d6SNeel Natu map->gpa = gpa; 8559b1aa8d6SNeel Natu map->len = len; 8569b1aa8d6SNeel Natu map->segoff = first; 8579b1aa8d6SNeel Natu map->segid = segid; 8589b1aa8d6SNeel Natu map->prot = prot; 8599b1aa8d6SNeel Natu map->flags = flags; 8609b1aa8d6SNeel Natu return (0); 8619b1aa8d6SNeel Natu } 8629b1aa8d6SNeel Natu 8639b1aa8d6SNeel Natu int 864f8a6ec2dSD Scott Phillips vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 865f8a6ec2dSD Scott Phillips { 866f8a6ec2dSD Scott Phillips struct mem_map *m; 867f8a6ec2dSD Scott Phillips int i; 868f8a6ec2dSD Scott Phillips 869f8a6ec2dSD Scott Phillips for (i = 0; i < VM_MAX_MEMMAPS; i++) { 870f8a6ec2dSD Scott Phillips m = &vm->mem_maps[i]; 871f8a6ec2dSD Scott Phillips if (m->gpa == gpa && m->len == len && 872f8a6ec2dSD Scott Phillips (m->flags & VM_MEMMAP_F_IOMMU) == 0) { 873f8a6ec2dSD Scott Phillips vm_free_memmap(vm, i); 874f8a6ec2dSD Scott Phillips return (0); 875f8a6ec2dSD Scott Phillips } 876f8a6ec2dSD Scott Phillips } 877f8a6ec2dSD Scott Phillips 878f8a6ec2dSD Scott Phillips return (EINVAL); 879f8a6ec2dSD Scott Phillips } 880f8a6ec2dSD Scott Phillips 881f8a6ec2dSD Scott Phillips int 8829b1aa8d6SNeel Natu vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 8839b1aa8d6SNeel Natu vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 8849b1aa8d6SNeel Natu { 8859b1aa8d6SNeel Natu struct mem_map *mm, *mmnext; 8869b1aa8d6SNeel Natu int i; 8879b1aa8d6SNeel Natu 8889b1aa8d6SNeel Natu mmnext = NULL; 8899b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 8909b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 8919b1aa8d6SNeel Natu if (mm->len == 0 || mm->gpa < *gpa) 8929b1aa8d6SNeel Natu continue; 8939b1aa8d6SNeel Natu if (mmnext == NULL || mm->gpa < mmnext->gpa) 8949b1aa8d6SNeel Natu mmnext = mm; 8959b1aa8d6SNeel Natu } 8969b1aa8d6SNeel Natu 8979b1aa8d6SNeel Natu if (mmnext != NULL) { 8989b1aa8d6SNeel Natu *gpa = mmnext->gpa; 8999b1aa8d6SNeel Natu if (segid) 9009b1aa8d6SNeel Natu *segid = mmnext->segid; 9019b1aa8d6SNeel Natu if (segoff) 9029b1aa8d6SNeel Natu *segoff = mmnext->segoff; 9039b1aa8d6SNeel Natu if (len) 9049b1aa8d6SNeel Natu *len = mmnext->len; 9059b1aa8d6SNeel Natu if (prot) 9069b1aa8d6SNeel Natu *prot = mmnext->prot; 9079b1aa8d6SNeel Natu if (flags) 9089b1aa8d6SNeel Natu *flags = mmnext->flags; 9099b1aa8d6SNeel Natu return (0); 9109b1aa8d6SNeel Natu } else { 9119b1aa8d6SNeel Natu return (ENOENT); 9129b1aa8d6SNeel Natu } 913477867a0SNeel Natu } 914477867a0SNeel Natu 915318224bbSNeel Natu static void 9169b1aa8d6SNeel Natu vm_free_memmap(struct vm *vm, int ident) 917366f6083SPeter Grehan { 9189b1aa8d6SNeel Natu struct mem_map *mm; 91973505a10SRobert Wing int error __diagused; 9204db4fb2cSNeel Natu 9219b1aa8d6SNeel Natu mm = &vm->mem_maps[ident]; 9229b1aa8d6SNeel Natu if (mm->len) { 9239b1aa8d6SNeel Natu error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 9249b1aa8d6SNeel Natu mm->gpa + mm->len); 9259b1aa8d6SNeel Natu KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 9269b1aa8d6SNeel Natu __func__, error)); 9279b1aa8d6SNeel Natu bzero(mm, sizeof(struct mem_map)); 928318224bbSNeel Natu } 929318224bbSNeel Natu } 930318224bbSNeel Natu 9319b1aa8d6SNeel Natu static __inline bool 9329b1aa8d6SNeel Natu sysmem_mapping(struct vm *vm, struct mem_map *mm) 933318224bbSNeel Natu { 934318224bbSNeel Natu 9359b1aa8d6SNeel Natu if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 9369b1aa8d6SNeel Natu return (true); 9379b1aa8d6SNeel Natu else 9389b1aa8d6SNeel Natu return (false); 939318224bbSNeel Natu } 940318224bbSNeel Natu 941147d12a7SAntoine Brodin vm_paddr_t 942147d12a7SAntoine Brodin vmm_sysmem_maxaddr(struct vm *vm) 9439b1aa8d6SNeel Natu { 9449b1aa8d6SNeel Natu struct mem_map *mm; 9459b1aa8d6SNeel Natu vm_paddr_t maxaddr; 9469b1aa8d6SNeel Natu int i; 947318224bbSNeel Natu 9489b1aa8d6SNeel Natu maxaddr = 0; 9499b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 9509b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 9519b1aa8d6SNeel Natu if (sysmem_mapping(vm, mm)) { 9529b1aa8d6SNeel Natu if (maxaddr < mm->gpa + mm->len) 9539b1aa8d6SNeel Natu maxaddr = mm->gpa + mm->len; 9549b1aa8d6SNeel Natu } 9559b1aa8d6SNeel Natu } 9569b1aa8d6SNeel Natu return (maxaddr); 957318224bbSNeel Natu } 958318224bbSNeel Natu 959318224bbSNeel Natu static void 960490d56c5SEd Maste vm_iommu_modify(struct vm *vm, bool map) 961318224bbSNeel Natu { 962318224bbSNeel Natu int i, sz; 963318224bbSNeel Natu vm_paddr_t gpa, hpa; 9649b1aa8d6SNeel Natu struct mem_map *mm; 965318224bbSNeel Natu void *vp, *cookie, *host_domain; 966318224bbSNeel Natu 967318224bbSNeel Natu sz = PAGE_SIZE; 968318224bbSNeel Natu host_domain = iommu_host_domain(); 969318224bbSNeel Natu 9709b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 9719b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 9729b1aa8d6SNeel Natu if (!sysmem_mapping(vm, mm)) 9739b1aa8d6SNeel Natu continue; 974318224bbSNeel Natu 9759b1aa8d6SNeel Natu if (map) { 9769b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, 9779b1aa8d6SNeel Natu ("iommu map found invalid memmap %#lx/%#lx/%#x", 9789b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 9799b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) 9809b1aa8d6SNeel Natu continue; 9819b1aa8d6SNeel Natu mm->flags |= VM_MEMMAP_F_IOMMU; 9829b1aa8d6SNeel Natu } else { 9839b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0) 9849b1aa8d6SNeel Natu continue; 9859b1aa8d6SNeel Natu mm->flags &= ~VM_MEMMAP_F_IOMMU; 9869b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, 9879b1aa8d6SNeel Natu ("iommu unmap found invalid memmap %#lx/%#lx/%#x", 9889b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 9899b1aa8d6SNeel Natu } 9909b1aa8d6SNeel Natu 9919b1aa8d6SNeel Natu gpa = mm->gpa; 9929b1aa8d6SNeel Natu while (gpa < mm->gpa + mm->len) { 99328b561adSJohn Baldwin vp = vm_gpa_hold_global(vm, gpa, PAGE_SIZE, 99428b561adSJohn Baldwin VM_PROT_WRITE, &cookie); 995318224bbSNeel Natu KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 996318224bbSNeel Natu vm_name(vm), gpa)); 997318224bbSNeel Natu 998318224bbSNeel Natu vm_gpa_release(cookie); 999318224bbSNeel Natu 1000318224bbSNeel Natu hpa = DMAP_TO_PHYS((uintptr_t)vp); 1001318224bbSNeel Natu if (map) { 1002318224bbSNeel Natu iommu_create_mapping(vm->iommu, gpa, hpa, sz); 1003318224bbSNeel Natu } else { 1004318224bbSNeel Natu iommu_remove_mapping(vm->iommu, gpa, sz); 1005318224bbSNeel Natu } 1006318224bbSNeel Natu 1007318224bbSNeel Natu gpa += PAGE_SIZE; 1008318224bbSNeel Natu } 1009318224bbSNeel Natu } 1010318224bbSNeel Natu 1011318224bbSNeel Natu /* 1012318224bbSNeel Natu * Invalidate the cached translations associated with the domain 1013318224bbSNeel Natu * from which pages were removed. 1014318224bbSNeel Natu */ 1015318224bbSNeel Natu if (map) 1016318224bbSNeel Natu iommu_invalidate_tlb(host_domain); 1017318224bbSNeel Natu else 1018318224bbSNeel Natu iommu_invalidate_tlb(vm->iommu); 1019318224bbSNeel Natu } 1020318224bbSNeel Natu 1021490d56c5SEd Maste #define vm_iommu_unmap(vm) vm_iommu_modify((vm), false) 1022490d56c5SEd Maste #define vm_iommu_map(vm) vm_iommu_modify((vm), true) 1023318224bbSNeel Natu 1024318224bbSNeel Natu int 1025318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 1026318224bbSNeel Natu { 1027318224bbSNeel Natu int error; 1028318224bbSNeel Natu 1029318224bbSNeel Natu error = ppt_unassign_device(vm, bus, slot, func); 1030318224bbSNeel Natu if (error) 1031318224bbSNeel Natu return (error); 1032318224bbSNeel Natu 10339b1aa8d6SNeel Natu if (ppt_assigned_devices(vm) == 0) 1034318224bbSNeel Natu vm_iommu_unmap(vm); 10359b1aa8d6SNeel Natu 1036318224bbSNeel Natu return (0); 1037318224bbSNeel Natu } 1038318224bbSNeel Natu 1039318224bbSNeel Natu int 1040318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 1041318224bbSNeel Natu { 1042318224bbSNeel Natu int error; 1043318224bbSNeel Natu vm_paddr_t maxaddr; 1044318224bbSNeel Natu 10459b1aa8d6SNeel Natu /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */ 104651f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 1047318224bbSNeel Natu KASSERT(vm->iommu == NULL, 1048318224bbSNeel Natu ("vm_assign_pptdev: iommu must be NULL")); 1049147d12a7SAntoine Brodin maxaddr = vmm_sysmem_maxaddr(vm); 1050318224bbSNeel Natu vm->iommu = iommu_create_domain(maxaddr); 1051ffe1b10dSJohn Baldwin if (vm->iommu == NULL) 1052ffe1b10dSJohn Baldwin return (ENXIO); 1053318224bbSNeel Natu vm_iommu_map(vm); 1054318224bbSNeel Natu } 1055318224bbSNeel Natu 1056318224bbSNeel Natu error = ppt_assign_device(vm, bus, slot, func); 1057318224bbSNeel Natu return (error); 1058318224bbSNeel Natu } 1059318224bbSNeel Natu 106028b561adSJohn Baldwin static void * 106128b561adSJohn Baldwin _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1062318224bbSNeel Natu void **cookie) 1063318224bbSNeel Natu { 10649b1aa8d6SNeel Natu int i, count, pageoff; 10659b1aa8d6SNeel Natu struct mem_map *mm; 1066318224bbSNeel Natu vm_page_t m; 106728b561adSJohn Baldwin 1068318224bbSNeel Natu pageoff = gpa & PAGE_MASK; 1069318224bbSNeel Natu if (len > PAGE_SIZE - pageoff) 1070318224bbSNeel Natu panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1071318224bbSNeel Natu 10729b1aa8d6SNeel Natu count = 0; 10739b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 10749b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 107546567b4fSPeter Grehan if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) { 1076318224bbSNeel Natu count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1077318224bbSNeel Natu trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 10789b1aa8d6SNeel Natu break; 10799b1aa8d6SNeel Natu } 10809b1aa8d6SNeel Natu } 1081318224bbSNeel Natu 1082318224bbSNeel Natu if (count == 1) { 1083318224bbSNeel Natu *cookie = m; 1084318224bbSNeel Natu return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1085318224bbSNeel Natu } else { 1086318224bbSNeel Natu *cookie = NULL; 1087318224bbSNeel Natu return (NULL); 1088318224bbSNeel Natu } 1089318224bbSNeel Natu } 1090318224bbSNeel Natu 109128b561adSJohn Baldwin void * 1092d3956e46SJohn Baldwin vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 109328b561adSJohn Baldwin void **cookie) 109428b561adSJohn Baldwin { 109528b561adSJohn Baldwin #ifdef INVARIANTS 109628b561adSJohn Baldwin /* 109728b561adSJohn Baldwin * The current vcpu should be frozen to ensure 'vm_memmap[]' 109828b561adSJohn Baldwin * stability. 109928b561adSJohn Baldwin */ 1100d3956e46SJohn Baldwin int state = vcpu_get_state(vcpu, NULL); 110128b561adSJohn Baldwin KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 110228b561adSJohn Baldwin __func__, state)); 110328b561adSJohn Baldwin #endif 1104d3956e46SJohn Baldwin return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 110528b561adSJohn Baldwin } 110628b561adSJohn Baldwin 110728b561adSJohn Baldwin void * 110828b561adSJohn Baldwin vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 110928b561adSJohn Baldwin void **cookie) 111028b561adSJohn Baldwin { 111167b69e76SJohn Baldwin sx_assert(&vm->mem_segs_lock, SX_LOCKED); 111228b561adSJohn Baldwin return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 111328b561adSJohn Baldwin } 111428b561adSJohn Baldwin 1115318224bbSNeel Natu void 1116318224bbSNeel Natu vm_gpa_release(void *cookie) 1117318224bbSNeel Natu { 1118318224bbSNeel Natu vm_page_t m = cookie; 1119318224bbSNeel Natu 1120eeacb3b0SMark Johnston vm_page_unwire(m, PQ_ACTIVE); 1121366f6083SPeter Grehan } 1122366f6083SPeter Grehan 1123366f6083SPeter Grehan int 1124d3956e46SJohn Baldwin vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1125366f6083SPeter Grehan { 1126366f6083SPeter Grehan 1127366f6083SPeter Grehan if (reg >= VM_REG_LAST) 1128366f6083SPeter Grehan return (EINVAL); 1129366f6083SPeter Grehan 1130d3956e46SJohn Baldwin return (vmmops_getreg(vcpu->cookie, reg, retval)); 1131366f6083SPeter Grehan } 1132366f6083SPeter Grehan 1133366f6083SPeter Grehan int 1134d3956e46SJohn Baldwin vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1135366f6083SPeter Grehan { 1136d087a399SNeel Natu int error; 1137366f6083SPeter Grehan 1138366f6083SPeter Grehan if (reg >= VM_REG_LAST) 1139366f6083SPeter Grehan return (EINVAL); 1140366f6083SPeter Grehan 1141869c8d19SJohn Baldwin error = vmmops_setreg(vcpu->cookie, reg, val); 1142d087a399SNeel Natu if (error || reg != VM_REG_GUEST_RIP) 1143d087a399SNeel Natu return (error); 1144d087a399SNeel Natu 1145d087a399SNeel Natu /* Set 'nextrip' to match the value of %rip */ 1146d3956e46SJohn Baldwin VMM_CTR1(vcpu, "Setting nextrip to %#lx", val); 1147d087a399SNeel Natu vcpu->nextrip = val; 1148d087a399SNeel Natu return (0); 1149366f6083SPeter Grehan } 1150366f6083SPeter Grehan 1151490d56c5SEd Maste static bool 1152366f6083SPeter Grehan is_descriptor_table(int reg) 1153366f6083SPeter Grehan { 1154366f6083SPeter Grehan 1155366f6083SPeter Grehan switch (reg) { 1156366f6083SPeter Grehan case VM_REG_GUEST_IDTR: 1157366f6083SPeter Grehan case VM_REG_GUEST_GDTR: 1158490d56c5SEd Maste return (true); 1159366f6083SPeter Grehan default: 1160490d56c5SEd Maste return (false); 1161366f6083SPeter Grehan } 1162366f6083SPeter Grehan } 1163366f6083SPeter Grehan 1164490d56c5SEd Maste static bool 1165366f6083SPeter Grehan is_segment_register(int reg) 1166366f6083SPeter Grehan { 1167366f6083SPeter Grehan 1168366f6083SPeter Grehan switch (reg) { 1169366f6083SPeter Grehan case VM_REG_GUEST_ES: 1170366f6083SPeter Grehan case VM_REG_GUEST_CS: 1171366f6083SPeter Grehan case VM_REG_GUEST_SS: 1172366f6083SPeter Grehan case VM_REG_GUEST_DS: 1173366f6083SPeter Grehan case VM_REG_GUEST_FS: 1174366f6083SPeter Grehan case VM_REG_GUEST_GS: 1175366f6083SPeter Grehan case VM_REG_GUEST_TR: 1176366f6083SPeter Grehan case VM_REG_GUEST_LDTR: 1177490d56c5SEd Maste return (true); 1178366f6083SPeter Grehan default: 1179490d56c5SEd Maste return (false); 1180366f6083SPeter Grehan } 1181366f6083SPeter Grehan } 1182366f6083SPeter Grehan 1183366f6083SPeter Grehan int 1184d3956e46SJohn Baldwin vm_get_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *desc) 1185366f6083SPeter Grehan { 1186366f6083SPeter Grehan 1187366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1188366f6083SPeter Grehan return (EINVAL); 1189366f6083SPeter Grehan 1190d3956e46SJohn Baldwin return (vmmops_getdesc(vcpu->cookie, reg, desc)); 1191366f6083SPeter Grehan } 1192366f6083SPeter Grehan 1193366f6083SPeter Grehan int 11943f0f4b15SJohn Baldwin vm_set_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *desc) 1195366f6083SPeter Grehan { 1196366f6083SPeter Grehan 1197366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1198366f6083SPeter Grehan return (EINVAL); 1199366f6083SPeter Grehan 12003f0f4b15SJohn Baldwin return (vmmops_setdesc(vcpu->cookie, reg, desc)); 1201366f6083SPeter Grehan } 1202366f6083SPeter Grehan 1203366f6083SPeter Grehan static void 1204366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu) 1205366f6083SPeter Grehan { 1206366f6083SPeter Grehan 120738f1b189SPeter Grehan /* flush host state to the pcb */ 120838f1b189SPeter Grehan fpuexit(curthread); 1209bd8572e0SNeel Natu 1210bd8572e0SNeel Natu /* restore guest FPU state */ 1211366f6083SPeter Grehan fpu_stop_emulating(); 121238f1b189SPeter Grehan fpurestore(vcpu->guestfpu); 1213bd8572e0SNeel Natu 1214abb023fbSJohn Baldwin /* restore guest XCR0 if XSAVE is enabled in the host */ 1215abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) 1216abb023fbSJohn Baldwin load_xcr(0, vcpu->guest_xcr0); 1217abb023fbSJohn Baldwin 1218bd8572e0SNeel Natu /* 1219bd8572e0SNeel Natu * The FPU is now "dirty" with the guest's state so turn on emulation 1220bd8572e0SNeel Natu * to trap any access to the FPU by the host. 1221bd8572e0SNeel Natu */ 1222bd8572e0SNeel Natu fpu_start_emulating(); 1223366f6083SPeter Grehan } 1224366f6083SPeter Grehan 1225366f6083SPeter Grehan static void 1226366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu) 1227366f6083SPeter Grehan { 1228366f6083SPeter Grehan 1229bd8572e0SNeel Natu if ((rcr0() & CR0_TS) == 0) 1230bd8572e0SNeel Natu panic("fpu emulation not enabled in host!"); 1231bd8572e0SNeel Natu 1232abb023fbSJohn Baldwin /* save guest XCR0 and restore host XCR0 */ 1233abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) { 1234abb023fbSJohn Baldwin vcpu->guest_xcr0 = rxcr(0); 1235abb023fbSJohn Baldwin load_xcr(0, vmm_get_host_xcr0()); 1236abb023fbSJohn Baldwin } 1237abb023fbSJohn Baldwin 1238bd8572e0SNeel Natu /* save guest FPU state */ 1239bd8572e0SNeel Natu fpu_stop_emulating(); 124038f1b189SPeter Grehan fpusave(vcpu->guestfpu); 1241366f6083SPeter Grehan fpu_start_emulating(); 1242366f6083SPeter Grehan } 1243366f6083SPeter Grehan 124461592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 1245f76fc5d4SNeel Natu 1246318224bbSNeel Natu static int 12473f0f4b15SJohn Baldwin vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1248f80330a8SNeel Natu bool from_idle) 1249366f6083SPeter Grehan { 1250318224bbSNeel Natu int error; 1251366f6083SPeter Grehan 1252318224bbSNeel Natu vcpu_assert_locked(vcpu); 1253366f6083SPeter Grehan 1254f76fc5d4SNeel Natu /* 1255f80330a8SNeel Natu * State transitions from the vmmdev_ioctl() must always begin from 1256f80330a8SNeel Natu * the VCPU_IDLE state. This guarantees that there is only a single 1257f80330a8SNeel Natu * ioctl() operating on a vcpu at any point. 1258f80330a8SNeel Natu */ 1259f80330a8SNeel Natu if (from_idle) { 1260248e6799SNeel Natu while (vcpu->state != VCPU_IDLE) { 1261248e6799SNeel Natu vcpu->reqidle = 1; 1262248e6799SNeel Natu vcpu_notify_event_locked(vcpu, false); 12633f0f4b15SJohn Baldwin VMM_CTR1(vcpu, "vcpu state change from %s to " 1264248e6799SNeel Natu "idle requested", vcpu_state2str(vcpu->state)); 1265f80330a8SNeel Natu msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1266248e6799SNeel Natu } 1267f80330a8SNeel Natu } else { 1268f80330a8SNeel Natu KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1269f80330a8SNeel Natu "vcpu idle state")); 1270f80330a8SNeel Natu } 1271f80330a8SNeel Natu 1272ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 1273ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1274ef39d7e9SNeel Natu "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1275ef39d7e9SNeel Natu } else { 1276ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1277ef39d7e9SNeel Natu "vcpu that is not running", vcpu->hostcpu)); 1278ef39d7e9SNeel Natu } 1279ef39d7e9SNeel Natu 1280f80330a8SNeel Natu /* 1281318224bbSNeel Natu * The following state transitions are allowed: 1282318224bbSNeel Natu * IDLE -> FROZEN -> IDLE 1283318224bbSNeel Natu * FROZEN -> RUNNING -> FROZEN 1284318224bbSNeel Natu * FROZEN -> SLEEPING -> FROZEN 1285f76fc5d4SNeel Natu */ 1286318224bbSNeel Natu switch (vcpu->state) { 1287318224bbSNeel Natu case VCPU_IDLE: 1288318224bbSNeel Natu case VCPU_RUNNING: 1289318224bbSNeel Natu case VCPU_SLEEPING: 1290318224bbSNeel Natu error = (newstate != VCPU_FROZEN); 1291318224bbSNeel Natu break; 1292318224bbSNeel Natu case VCPU_FROZEN: 1293318224bbSNeel Natu error = (newstate == VCPU_FROZEN); 1294318224bbSNeel Natu break; 1295318224bbSNeel Natu default: 1296318224bbSNeel Natu error = 1; 1297318224bbSNeel Natu break; 1298318224bbSNeel Natu } 1299318224bbSNeel Natu 1300f80330a8SNeel Natu if (error) 1301f80330a8SNeel Natu return (EBUSY); 1302318224bbSNeel Natu 13033f0f4b15SJohn Baldwin VMM_CTR2(vcpu, "vcpu state changed from %s to %s", 1304248e6799SNeel Natu vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); 1305248e6799SNeel Natu 1306f80330a8SNeel Natu vcpu->state = newstate; 1307ef39d7e9SNeel Natu if (newstate == VCPU_RUNNING) 1308ef39d7e9SNeel Natu vcpu->hostcpu = curcpu; 1309ef39d7e9SNeel Natu else 1310ef39d7e9SNeel Natu vcpu->hostcpu = NOCPU; 1311ef39d7e9SNeel Natu 1312f80330a8SNeel Natu if (newstate == VCPU_IDLE) 1313f80330a8SNeel Natu wakeup(&vcpu->state); 1314f80330a8SNeel Natu 1315f80330a8SNeel Natu return (0); 1316318224bbSNeel Natu } 1317318224bbSNeel Natu 1318318224bbSNeel Natu static void 13193f0f4b15SJohn Baldwin vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1320318224bbSNeel Natu { 1321318224bbSNeel Natu int error; 1322318224bbSNeel Natu 13233f0f4b15SJohn Baldwin if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1324318224bbSNeel Natu panic("Error %d setting state to %d\n", error, newstate); 1325318224bbSNeel Natu } 1326318224bbSNeel Natu 1327318224bbSNeel Natu static void 13283f0f4b15SJohn Baldwin vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1329318224bbSNeel Natu { 1330318224bbSNeel Natu int error; 1331318224bbSNeel Natu 13323f0f4b15SJohn Baldwin if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1333318224bbSNeel Natu panic("Error %d setting state to %d", error, newstate); 1334318224bbSNeel Natu } 1335318224bbSNeel Natu 1336b837daddSKonstantin Belousov static int 1337d8be3d52SJohn Baldwin vm_handle_rendezvous(struct vcpu *vcpu) 13385b8a8cd1SNeel Natu { 1339d8be3d52SJohn Baldwin struct vm *vm = vcpu->vm; 1340b837daddSKonstantin Belousov struct thread *td; 1341d8be3d52SJohn Baldwin int error, vcpuid; 13425b8a8cd1SNeel Natu 1343b837daddSKonstantin Belousov error = 0; 1344d8be3d52SJohn Baldwin vcpuid = vcpu->vcpuid; 1345b837daddSKonstantin Belousov td = curthread; 13465b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 13475b8a8cd1SNeel Natu while (vm->rendezvous_func != NULL) { 134822d822c6SNeel Natu /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 1349e2650af1SStefan Eßer CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus); 135022d822c6SNeel Natu 1351949f0f47SJohn Baldwin if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 135222d822c6SNeel Natu !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 1353d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Calling rendezvous func"); 1354d8be3d52SJohn Baldwin (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); 13555b8a8cd1SNeel Natu CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 13565b8a8cd1SNeel Natu } 13575b8a8cd1SNeel Natu if (CPU_CMP(&vm->rendezvous_req_cpus, 13585b8a8cd1SNeel Natu &vm->rendezvous_done_cpus) == 0) { 1359d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Rendezvous completed"); 1360869dbab7SAndriy Gapon vm->rendezvous_func = NULL; 13615b8a8cd1SNeel Natu wakeup(&vm->rendezvous_func); 13625b8a8cd1SNeel Natu break; 13635b8a8cd1SNeel Natu } 1364d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Wait for rendezvous completion"); 13655b8a8cd1SNeel Natu mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 1366b837daddSKonstantin Belousov "vmrndv", hz); 1367c6d31b83SKonstantin Belousov if (td_ast_pending(td, TDA_SUSPEND)) { 1368b837daddSKonstantin Belousov mtx_unlock(&vm->rendezvous_mtx); 1369b837daddSKonstantin Belousov error = thread_check_susp(td, true); 1370b837daddSKonstantin Belousov if (error != 0) 1371b837daddSKonstantin Belousov return (error); 1372b837daddSKonstantin Belousov mtx_lock(&vm->rendezvous_mtx); 1373b837daddSKonstantin Belousov } 13745b8a8cd1SNeel Natu } 13755b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 1376b837daddSKonstantin Belousov return (0); 13775b8a8cd1SNeel Natu } 13785b8a8cd1SNeel Natu 1379318224bbSNeel Natu /* 1380318224bbSNeel Natu * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1381318224bbSNeel Natu */ 1382318224bbSNeel Natu static int 13833f0f4b15SJohn Baldwin vm_handle_hlt(struct vcpu *vcpu, bool intr_disabled, bool *retu) 1384318224bbSNeel Natu { 13853f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 1386c6a0cc2eSNeel Natu const char *wmesg; 1387b837daddSKonstantin Belousov struct thread *td; 13883f0f4b15SJohn Baldwin int error, t, vcpuid, vcpu_halted, vm_halted; 1389e50ce2aaSNeel Natu 13903f0f4b15SJohn Baldwin vcpuid = vcpu->vcpuid; 1391e50ce2aaSNeel Natu vcpu_halted = 0; 1392e50ce2aaSNeel Natu vm_halted = 0; 1393b837daddSKonstantin Belousov error = 0; 1394b837daddSKonstantin Belousov td = curthread; 1395318224bbSNeel Natu 13963f0f4b15SJohn Baldwin KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 13973f0f4b15SJohn Baldwin 1398f76fc5d4SNeel Natu vcpu_lock(vcpu); 1399c6a0cc2eSNeel Natu while (1) { 1400f76fc5d4SNeel Natu /* 1401f76fc5d4SNeel Natu * Do a final check for pending NMI or interrupts before 1402c6a0cc2eSNeel Natu * really putting this thread to sleep. Also check for 1403c6a0cc2eSNeel Natu * software events that would cause this vcpu to wakeup. 1404f76fc5d4SNeel Natu * 1405c6a0cc2eSNeel Natu * These interrupts/events could have happened after the 140615add60dSPeter Grehan * vcpu returned from vmmops_run() and before it acquired the 1407c6a0cc2eSNeel Natu * vcpu lock above. 1408f76fc5d4SNeel Natu */ 1409248e6799SNeel Natu if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) 1410c6a0cc2eSNeel Natu break; 141180cb5d84SJohn Baldwin if (vm_nmi_pending(vcpu)) 1412c6a0cc2eSNeel Natu break; 1413c6a0cc2eSNeel Natu if (!intr_disabled) { 141480cb5d84SJohn Baldwin if (vm_extint_pending(vcpu) || 1415c6a0cc2eSNeel Natu vlapic_pending_intr(vcpu->vlapic, NULL)) { 1416c6a0cc2eSNeel Natu break; 1417c6a0cc2eSNeel Natu } 1418c6a0cc2eSNeel Natu } 1419c6a0cc2eSNeel Natu 1420f008d157SNeel Natu /* Don't go to sleep if the vcpu thread needs to yield */ 142180cb5d84SJohn Baldwin if (vcpu_should_yield(vcpu)) 1422f008d157SNeel Natu break; 1423f008d157SNeel Natu 142480cb5d84SJohn Baldwin if (vcpu_debugged(vcpu)) 1425fc276d92SJohn Baldwin break; 1426fc276d92SJohn Baldwin 1427e50ce2aaSNeel Natu /* 1428e50ce2aaSNeel Natu * Some Linux guests implement "halt" by having all vcpus 1429e50ce2aaSNeel Natu * execute HLT with interrupts disabled. 'halted_cpus' keeps 1430e50ce2aaSNeel Natu * track of the vcpus that have entered this state. When all 1431e50ce2aaSNeel Natu * vcpus enter the halted state the virtual machine is halted. 1432e50ce2aaSNeel Natu */ 1433e50ce2aaSNeel Natu if (intr_disabled) { 1434c6a0cc2eSNeel Natu wmesg = "vmhalt"; 14353f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "Halted"); 1436055fc2cbSNeel Natu if (!vcpu_halted && halt_detection_enabled) { 1437e50ce2aaSNeel Natu vcpu_halted = 1; 1438e50ce2aaSNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1439e50ce2aaSNeel Natu } 1440e50ce2aaSNeel Natu if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1441e50ce2aaSNeel Natu vm_halted = 1; 1442e50ce2aaSNeel Natu break; 1443e50ce2aaSNeel Natu } 1444e50ce2aaSNeel Natu } else { 1445e50ce2aaSNeel Natu wmesg = "vmidle"; 1446e50ce2aaSNeel Natu } 1447c6a0cc2eSNeel Natu 1448f76fc5d4SNeel Natu t = ticks; 14493f0f4b15SJohn Baldwin vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1450f008d157SNeel Natu /* 1451f008d157SNeel Natu * XXX msleep_spin() cannot be interrupted by signals so 1452f008d157SNeel Natu * wake up periodically to check pending signals. 1453f008d157SNeel Natu */ 1454f008d157SNeel Natu msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); 14553f0f4b15SJohn Baldwin vcpu_require_state_locked(vcpu, VCPU_FROZEN); 14563dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VCPU_IDLE_TICKS, ticks - t); 1457c6d31b83SKonstantin Belousov if (td_ast_pending(td, TDA_SUSPEND)) { 1458b837daddSKonstantin Belousov vcpu_unlock(vcpu); 1459b837daddSKonstantin Belousov error = thread_check_susp(td, false); 14604d447b30SKonstantin Belousov if (error != 0) { 14614d447b30SKonstantin Belousov if (vcpu_halted) { 14624d447b30SKonstantin Belousov CPU_CLR_ATOMIC(vcpuid, 14634d447b30SKonstantin Belousov &vm->halted_cpus); 14644d447b30SKonstantin Belousov } 1465b837daddSKonstantin Belousov return (error); 14664d447b30SKonstantin Belousov } 1467b837daddSKonstantin Belousov vcpu_lock(vcpu); 1468b837daddSKonstantin Belousov } 1469f76fc5d4SNeel Natu } 1470e50ce2aaSNeel Natu 1471e50ce2aaSNeel Natu if (vcpu_halted) 1472e50ce2aaSNeel Natu CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1473e50ce2aaSNeel Natu 1474f76fc5d4SNeel Natu vcpu_unlock(vcpu); 1475f76fc5d4SNeel Natu 1476e50ce2aaSNeel Natu if (vm_halted) 1477e50ce2aaSNeel Natu vm_suspend(vm, VM_SUSPEND_HALT); 1478e50ce2aaSNeel Natu 1479318224bbSNeel Natu return (0); 1480318224bbSNeel Natu } 1481318224bbSNeel Natu 1482318224bbSNeel Natu static int 14833f0f4b15SJohn Baldwin vm_handle_paging(struct vcpu *vcpu, bool *retu) 1484318224bbSNeel Natu { 14853f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 1486318224bbSNeel Natu int rv, ftype; 1487318224bbSNeel Natu struct vm_map *map; 1488318224bbSNeel Natu struct vm_exit *vme; 1489318224bbSNeel Natu 1490318224bbSNeel Natu vme = &vcpu->exitinfo; 1491318224bbSNeel Natu 1492d087a399SNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 1493d087a399SNeel Natu __func__, vme->inst_length)); 1494d087a399SNeel Natu 1495318224bbSNeel Natu ftype = vme->u.paging.fault_type; 1496318224bbSNeel Natu KASSERT(ftype == VM_PROT_READ || 1497318224bbSNeel Natu ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1498318224bbSNeel Natu ("vm_handle_paging: invalid fault_type %d", ftype)); 1499318224bbSNeel Natu 1500318224bbSNeel Natu if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1501318224bbSNeel Natu rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1502318224bbSNeel Natu vme->u.paging.gpa, ftype); 15039d8d8e3eSNeel Natu if (rv == 0) { 15043f0f4b15SJohn Baldwin VMM_CTR2(vcpu, "%s bit emulation for gpa %#lx", 15059d8d8e3eSNeel Natu ftype == VM_PROT_READ ? "accessed" : "dirty", 15069d8d8e3eSNeel Natu vme->u.paging.gpa); 1507318224bbSNeel Natu goto done; 1508318224bbSNeel Natu } 15099d8d8e3eSNeel Natu } 1510318224bbSNeel Natu 1511318224bbSNeel Natu map = &vm->vmspace->vm_map; 1512df08823dSKonstantin Belousov rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1513318224bbSNeel Natu 15143f0f4b15SJohn Baldwin VMM_CTR3(vcpu, "vm_handle_paging rv = %d, gpa = %#lx, " 1515513c8d33SNeel Natu "ftype = %d", rv, vme->u.paging.gpa, ftype); 1516318224bbSNeel Natu 1517318224bbSNeel Natu if (rv != KERN_SUCCESS) 1518318224bbSNeel Natu return (EFAULT); 1519318224bbSNeel Natu done: 1520318224bbSNeel Natu return (0); 1521318224bbSNeel Natu } 1522318224bbSNeel Natu 1523318224bbSNeel Natu static int 15243f0f4b15SJohn Baldwin vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 1525318224bbSNeel Natu { 1526318224bbSNeel Natu struct vie *vie; 1527318224bbSNeel Natu struct vm_exit *vme; 1528e4f605eeSTycho Nightingale uint64_t gla, gpa, cs_base; 1529e813a873SNeel Natu struct vm_guest_paging *paging; 1530565bbb86SNeel Natu mem_region_read_t mread; 1531565bbb86SNeel Natu mem_region_write_t mwrite; 1532f7a9f178SNeel Natu enum vm_cpu_mode cpu_mode; 15331c73ea3eSNeel Natu int cs_d, error, fault; 1534318224bbSNeel Natu 1535318224bbSNeel Natu vme = &vcpu->exitinfo; 1536318224bbSNeel Natu 15371c73ea3eSNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 15381c73ea3eSNeel Natu __func__, vme->inst_length)); 15391c73ea3eSNeel Natu 1540318224bbSNeel Natu gla = vme->u.inst_emul.gla; 1541318224bbSNeel Natu gpa = vme->u.inst_emul.gpa; 1542e4f605eeSTycho Nightingale cs_base = vme->u.inst_emul.cs_base; 1543f7a9f178SNeel Natu cs_d = vme->u.inst_emul.cs_d; 1544318224bbSNeel Natu vie = &vme->u.inst_emul.vie; 1545e813a873SNeel Natu paging = &vme->u.inst_emul.paging; 1546f7a9f178SNeel Natu cpu_mode = paging->cpu_mode; 1547318224bbSNeel Natu 15483f0f4b15SJohn Baldwin VMM_CTR1(vcpu, "inst_emul fault accessing gpa %#lx", gpa); 15499d8d8e3eSNeel Natu 1550318224bbSNeel Natu /* Fetch, decode and emulate the faulting instruction */ 1551c2a875f9SNeel Natu if (vie->num_valid == 0) { 1552d3956e46SJohn Baldwin error = vmm_fetch_instruction(vcpu, paging, vme->rip + cs_base, 1553d3956e46SJohn Baldwin VIE_INST_SIZE, vie, &fault); 1554c2a875f9SNeel Natu } else { 1555c2a875f9SNeel Natu /* 1556c2a875f9SNeel Natu * The instruction bytes have already been copied into 'vie' 1557c2a875f9SNeel Natu */ 15589c4d5478SNeel Natu error = fault = 0; 1559c2a875f9SNeel Natu } 15609c4d5478SNeel Natu if (error || fault) 15619c4d5478SNeel Natu return (error); 1562318224bbSNeel Natu 1563d3956e46SJohn Baldwin if (vmm_decode_instruction(vcpu, gla, cpu_mode, cs_d, vie) != 0) { 15643f0f4b15SJohn Baldwin VMM_CTR1(vcpu, "Error decoding instruction at %#lx", 1565c07a0648SNeel Natu vme->rip + cs_base); 1566c07a0648SNeel Natu *retu = true; /* dump instruction bytes in userspace */ 1567c07a0648SNeel Natu return (0); 1568c07a0648SNeel Natu } 1569318224bbSNeel Natu 1570a0b78f09SPeter Grehan /* 15711c73ea3eSNeel Natu * Update 'nextrip' based on the length of the emulated instruction. 1572a0b78f09SPeter Grehan */ 1573a0b78f09SPeter Grehan vme->inst_length = vie->num_processed; 1574d087a399SNeel Natu vcpu->nextrip += vie->num_processed; 15753f0f4b15SJohn Baldwin VMM_CTR1(vcpu, "nextrip updated to %#lx after instruction decoding", 15763f0f4b15SJohn Baldwin vcpu->nextrip); 1577a0b78f09SPeter Grehan 157808e3ff32SNeel Natu /* return to userland unless this is an in-kernel emulated device */ 1579565bbb86SNeel Natu if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1580565bbb86SNeel Natu mread = lapic_mmio_read; 1581565bbb86SNeel Natu mwrite = lapic_mmio_write; 1582565bbb86SNeel Natu } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1583565bbb86SNeel Natu mread = vioapic_mmio_read; 1584565bbb86SNeel Natu mwrite = vioapic_mmio_write; 158508e3ff32SNeel Natu } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 158608e3ff32SNeel Natu mread = vhpet_mmio_read; 158708e3ff32SNeel Natu mwrite = vhpet_mmio_write; 1588565bbb86SNeel Natu } else { 1589becd9849SNeel Natu *retu = true; 1590318224bbSNeel Natu return (0); 1591318224bbSNeel Natu } 1592318224bbSNeel Natu 1593d3956e46SJohn Baldwin error = vmm_emulate_instruction(vcpu, gpa, vie, paging, mread, mwrite, 1594d3956e46SJohn Baldwin retu); 1595318224bbSNeel Natu 1596318224bbSNeel Natu return (error); 1597318224bbSNeel Natu } 1598318224bbSNeel Natu 1599b15a09c0SNeel Natu static int 16003f0f4b15SJohn Baldwin vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1601b15a09c0SNeel Natu { 16023f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 1603b837daddSKonstantin Belousov int error, i; 1604b837daddSKonstantin Belousov struct thread *td; 1605b15a09c0SNeel Natu 1606b837daddSKonstantin Belousov error = 0; 1607b837daddSKonstantin Belousov td = curthread; 1608b15a09c0SNeel Natu 16093f0f4b15SJohn Baldwin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1610b15a09c0SNeel Natu 1611b15a09c0SNeel Natu /* 1612b15a09c0SNeel Natu * Wait until all 'active_cpus' have suspended themselves. 1613b15a09c0SNeel Natu * 1614b15a09c0SNeel Natu * Since a VM may be suspended at any time including when one or 1615b15a09c0SNeel Natu * more vcpus are doing a rendezvous we need to call the rendezvous 1616b15a09c0SNeel Natu * handler while we are waiting to prevent a deadlock. 1617b15a09c0SNeel Natu */ 1618b15a09c0SNeel Natu vcpu_lock(vcpu); 1619b837daddSKonstantin Belousov while (error == 0) { 1620b15a09c0SNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 16213f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "All vcpus suspended"); 1622b15a09c0SNeel Natu break; 1623b15a09c0SNeel Natu } 1624b15a09c0SNeel Natu 1625b15a09c0SNeel Natu if (vm->rendezvous_func == NULL) { 16263f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "Sleeping during suspend"); 16273f0f4b15SJohn Baldwin vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1628b15a09c0SNeel Natu msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 16293f0f4b15SJohn Baldwin vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1630c6d31b83SKonstantin Belousov if (td_ast_pending(td, TDA_SUSPEND)) { 1631b837daddSKonstantin Belousov vcpu_unlock(vcpu); 1632b837daddSKonstantin Belousov error = thread_check_susp(td, false); 1633b837daddSKonstantin Belousov vcpu_lock(vcpu); 1634b837daddSKonstantin Belousov } 1635b15a09c0SNeel Natu } else { 16363f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "Rendezvous during suspend"); 1637b15a09c0SNeel Natu vcpu_unlock(vcpu); 1638d8be3d52SJohn Baldwin error = vm_handle_rendezvous(vcpu); 1639b15a09c0SNeel Natu vcpu_lock(vcpu); 1640b15a09c0SNeel Natu } 1641b15a09c0SNeel Natu } 1642b15a09c0SNeel Natu vcpu_unlock(vcpu); 1643b15a09c0SNeel Natu 1644b15a09c0SNeel Natu /* 1645b15a09c0SNeel Natu * Wakeup the other sleeping vcpus and return to userspace. 1646b15a09c0SNeel Natu */ 1647a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 1648b15a09c0SNeel Natu if (CPU_ISSET(i, &vm->suspended_cpus)) { 16493f0f4b15SJohn Baldwin vcpu_notify_event(vm_vcpu(vm, i), false); 1650b15a09c0SNeel Natu } 1651b15a09c0SNeel Natu } 1652b15a09c0SNeel Natu 1653b15a09c0SNeel Natu *retu = true; 1654b837daddSKonstantin Belousov return (error); 1655b15a09c0SNeel Natu } 1656b15a09c0SNeel Natu 1657248e6799SNeel Natu static int 16583f0f4b15SJohn Baldwin vm_handle_reqidle(struct vcpu *vcpu, bool *retu) 1659248e6799SNeel Natu { 1660248e6799SNeel Natu vcpu_lock(vcpu); 1661248e6799SNeel Natu KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); 1662248e6799SNeel Natu vcpu->reqidle = 0; 1663248e6799SNeel Natu vcpu_unlock(vcpu); 1664248e6799SNeel Natu *retu = true; 1665248e6799SNeel Natu return (0); 1666248e6799SNeel Natu } 1667248e6799SNeel Natu 1668b15a09c0SNeel Natu int 1669f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how) 1670b15a09c0SNeel Natu { 1671f0fdcfe2SNeel Natu int i; 1672b15a09c0SNeel Natu 1673f0fdcfe2SNeel Natu if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1674f0fdcfe2SNeel Natu return (EINVAL); 1675f0fdcfe2SNeel Natu 1676f0fdcfe2SNeel Natu if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1677f0fdcfe2SNeel Natu VM_CTR2(vm, "virtual machine already suspended %d/%d", 1678f0fdcfe2SNeel Natu vm->suspend, how); 1679b15a09c0SNeel Natu return (EALREADY); 1680b15a09c0SNeel Natu } 1681f0fdcfe2SNeel Natu 1682f0fdcfe2SNeel Natu VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1683f0fdcfe2SNeel Natu 1684f0fdcfe2SNeel Natu /* 1685f0fdcfe2SNeel Natu * Notify all active vcpus that they are now suspended. 1686f0fdcfe2SNeel Natu */ 1687a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 1688f0fdcfe2SNeel Natu if (CPU_ISSET(i, &vm->active_cpus)) 16893f0f4b15SJohn Baldwin vcpu_notify_event(vm_vcpu(vm, i), false); 1690f0fdcfe2SNeel Natu } 1691f0fdcfe2SNeel Natu 1692f0fdcfe2SNeel Natu return (0); 1693f0fdcfe2SNeel Natu } 1694f0fdcfe2SNeel Natu 1695f0fdcfe2SNeel Natu void 169680cb5d84SJohn Baldwin vm_exit_suspended(struct vcpu *vcpu, uint64_t rip) 1697f0fdcfe2SNeel Natu { 169880cb5d84SJohn Baldwin struct vm *vm = vcpu->vm; 1699f0fdcfe2SNeel Natu struct vm_exit *vmexit; 1700f0fdcfe2SNeel Natu 1701f0fdcfe2SNeel Natu KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1702f0fdcfe2SNeel Natu ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1703f0fdcfe2SNeel Natu 170480cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 1705f0fdcfe2SNeel Natu vmexit->rip = rip; 1706f0fdcfe2SNeel Natu vmexit->inst_length = 0; 1707f0fdcfe2SNeel Natu vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1708f0fdcfe2SNeel Natu vmexit->u.suspended.how = vm->suspend; 1709b15a09c0SNeel Natu } 1710b15a09c0SNeel Natu 171140487465SNeel Natu void 171280cb5d84SJohn Baldwin vm_exit_debug(struct vcpu *vcpu, uint64_t rip) 1713fc276d92SJohn Baldwin { 1714fc276d92SJohn Baldwin struct vm_exit *vmexit; 1715fc276d92SJohn Baldwin 171680cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 1717fc276d92SJohn Baldwin vmexit->rip = rip; 1718fc276d92SJohn Baldwin vmexit->inst_length = 0; 1719fc276d92SJohn Baldwin vmexit->exitcode = VM_EXITCODE_DEBUG; 1720fc276d92SJohn Baldwin } 1721fc276d92SJohn Baldwin 1722fc276d92SJohn Baldwin void 172380cb5d84SJohn Baldwin vm_exit_rendezvous(struct vcpu *vcpu, uint64_t rip) 172440487465SNeel Natu { 172540487465SNeel Natu struct vm_exit *vmexit; 172640487465SNeel Natu 172780cb5d84SJohn Baldwin KASSERT(vcpu->vm->rendezvous_func != NULL, 172880cb5d84SJohn Baldwin ("rendezvous not in progress")); 172940487465SNeel Natu 173080cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 173140487465SNeel Natu vmexit->rip = rip; 173240487465SNeel Natu vmexit->inst_length = 0; 173340487465SNeel Natu vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 173480cb5d84SJohn Baldwin vmm_stat_incr(vcpu, VMEXIT_RENDEZVOUS, 1); 173540487465SNeel Natu } 173640487465SNeel Natu 173740487465SNeel Natu void 173880cb5d84SJohn Baldwin vm_exit_reqidle(struct vcpu *vcpu, uint64_t rip) 1739248e6799SNeel Natu { 1740248e6799SNeel Natu struct vm_exit *vmexit; 1741248e6799SNeel Natu 174280cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 1743248e6799SNeel Natu vmexit->rip = rip; 1744248e6799SNeel Natu vmexit->inst_length = 0; 1745248e6799SNeel Natu vmexit->exitcode = VM_EXITCODE_REQIDLE; 174680cb5d84SJohn Baldwin vmm_stat_incr(vcpu, VMEXIT_REQIDLE, 1); 1747248e6799SNeel Natu } 1748248e6799SNeel Natu 1749248e6799SNeel Natu void 175080cb5d84SJohn Baldwin vm_exit_astpending(struct vcpu *vcpu, uint64_t rip) 175140487465SNeel Natu { 175240487465SNeel Natu struct vm_exit *vmexit; 175340487465SNeel Natu 175480cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 175540487465SNeel Natu vmexit->rip = rip; 175640487465SNeel Natu vmexit->inst_length = 0; 175740487465SNeel Natu vmexit->exitcode = VM_EXITCODE_BOGUS; 175880cb5d84SJohn Baldwin vmm_stat_incr(vcpu, VMEXIT_ASTPENDING, 1); 175940487465SNeel Natu } 176040487465SNeel Natu 1761318224bbSNeel Natu int 17623f0f4b15SJohn Baldwin vm_run(struct vcpu *vcpu, struct vm_exit *vme_user) 1763318224bbSNeel Natu { 17643f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 1765248e6799SNeel Natu struct vm_eventinfo evinfo; 1766318224bbSNeel Natu int error, vcpuid; 1767318224bbSNeel Natu struct pcb *pcb; 1768d087a399SNeel Natu uint64_t tscval; 1769318224bbSNeel Natu struct vm_exit *vme; 1770becd9849SNeel Natu bool retu, intr_disabled; 1771318224bbSNeel Natu pmap_t pmap; 1772318224bbSNeel Natu 17733f0f4b15SJohn Baldwin vcpuid = vcpu->vcpuid; 1774318224bbSNeel Natu 177595ebc360SNeel Natu if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 177695ebc360SNeel Natu return (EINVAL); 177795ebc360SNeel Natu 177895ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 177995ebc360SNeel Natu return (EINVAL); 178095ebc360SNeel Natu 1781318224bbSNeel Natu pmap = vmspace_pmap(vm->vmspace); 1782318224bbSNeel Natu vme = &vcpu->exitinfo; 1783248e6799SNeel Natu evinfo.rptr = &vm->rendezvous_func; 1784248e6799SNeel Natu evinfo.sptr = &vm->suspend; 1785248e6799SNeel Natu evinfo.iptr = &vcpu->reqidle; 1786318224bbSNeel Natu restart: 1787318224bbSNeel Natu critical_enter(); 1788318224bbSNeel Natu 1789318224bbSNeel Natu KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1790318224bbSNeel Natu ("vm_run: absurd pm_active")); 1791318224bbSNeel Natu 1792318224bbSNeel Natu tscval = rdtsc(); 1793318224bbSNeel Natu 1794318224bbSNeel Natu pcb = PCPU_GET(curpcb); 1795318224bbSNeel Natu set_pcb_flags(pcb, PCB_FULL_IRET); 1796318224bbSNeel Natu 1797318224bbSNeel Natu restore_guest_fpustate(vcpu); 1798318224bbSNeel Natu 17993f0f4b15SJohn Baldwin vcpu_require_state(vcpu, VCPU_RUNNING); 1800869c8d19SJohn Baldwin error = vmmops_run(vcpu->cookie, vcpu->nextrip, pmap, &evinfo); 18013f0f4b15SJohn Baldwin vcpu_require_state(vcpu, VCPU_FROZEN); 1802318224bbSNeel Natu 1803318224bbSNeel Natu save_guest_fpustate(vcpu); 1804318224bbSNeel Natu 18053dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1806318224bbSNeel Natu 1807318224bbSNeel Natu critical_exit(); 1808318224bbSNeel Natu 1809318224bbSNeel Natu if (error == 0) { 1810becd9849SNeel Natu retu = false; 1811d087a399SNeel Natu vcpu->nextrip = vme->rip + vme->inst_length; 1812318224bbSNeel Natu switch (vme->exitcode) { 1813248e6799SNeel Natu case VM_EXITCODE_REQIDLE: 18143f0f4b15SJohn Baldwin error = vm_handle_reqidle(vcpu, &retu); 1815248e6799SNeel Natu break; 1816b15a09c0SNeel Natu case VM_EXITCODE_SUSPENDED: 18173f0f4b15SJohn Baldwin error = vm_handle_suspend(vcpu, &retu); 1818b15a09c0SNeel Natu break; 181930b94db8SNeel Natu case VM_EXITCODE_IOAPIC_EOI: 1820e42c24d5SJohn Baldwin vioapic_process_eoi(vm, vme->u.ioapic_eoi.vector); 182130b94db8SNeel Natu break; 18225b8a8cd1SNeel Natu case VM_EXITCODE_RENDEZVOUS: 1823d8be3d52SJohn Baldwin error = vm_handle_rendezvous(vcpu); 18245b8a8cd1SNeel Natu break; 1825318224bbSNeel Natu case VM_EXITCODE_HLT: 1826becd9849SNeel Natu intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 18273f0f4b15SJohn Baldwin error = vm_handle_hlt(vcpu, intr_disabled, &retu); 1828318224bbSNeel Natu break; 1829318224bbSNeel Natu case VM_EXITCODE_PAGING: 18303f0f4b15SJohn Baldwin error = vm_handle_paging(vcpu, &retu); 1831318224bbSNeel Natu break; 1832318224bbSNeel Natu case VM_EXITCODE_INST_EMUL: 18333f0f4b15SJohn Baldwin error = vm_handle_inst_emul(vcpu, &retu); 1834318224bbSNeel Natu break; 1835d17b5104SNeel Natu case VM_EXITCODE_INOUT: 1836d17b5104SNeel Natu case VM_EXITCODE_INOUT_STR: 18373f0f4b15SJohn Baldwin error = vm_handle_inout(vcpu, vme, &retu); 1838d17b5104SNeel Natu break; 183965145c7fSNeel Natu case VM_EXITCODE_MONITOR: 184065145c7fSNeel Natu case VM_EXITCODE_MWAIT: 184127d26457SAndrew Turner case VM_EXITCODE_VMINSN: 1842d3956e46SJohn Baldwin vm_inject_ud(vcpu); 184365145c7fSNeel Natu break; 1844318224bbSNeel Natu default: 1845becd9849SNeel Natu retu = true; /* handled in userland */ 1846318224bbSNeel Natu break; 1847318224bbSNeel Natu } 1848318224bbSNeel Natu } 1849318224bbSNeel Natu 18500bda8d3eSCorvin Köhne /* 18510bda8d3eSCorvin Köhne * VM_EXITCODE_INST_EMUL could access the apic which could transform the 18520bda8d3eSCorvin Köhne * exit code into VM_EXITCODE_IPI. 18530bda8d3eSCorvin Köhne */ 18540bda8d3eSCorvin Köhne if (error == 0 && vme->exitcode == VM_EXITCODE_IPI) { 18550bda8d3eSCorvin Köhne retu = false; 1856d8be3d52SJohn Baldwin error = vm_handle_ipi(vcpu, vme, &retu); 18570bda8d3eSCorvin Köhne } 18580bda8d3eSCorvin Köhne 1859d087a399SNeel Natu if (error == 0 && retu == false) 1860f76fc5d4SNeel Natu goto restart; 1861f76fc5d4SNeel Natu 18623dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VMEXIT_USERSPACE, 1); 18633f0f4b15SJohn Baldwin VMM_CTR2(vcpu, "retu %d/%d", error, vme->exitcode); 1864248e6799SNeel Natu 1865318224bbSNeel Natu /* copy the exit information */ 18663f0f4b15SJohn Baldwin *vme_user = *vme; 1867366f6083SPeter Grehan return (error); 1868366f6083SPeter Grehan } 1869366f6083SPeter Grehan 1870366f6083SPeter Grehan int 1871d3956e46SJohn Baldwin vm_restart_instruction(struct vcpu *vcpu) 1872c9c75df4SNeel Natu { 1873d087a399SNeel Natu enum vcpu_state state; 1874d087a399SNeel Natu uint64_t rip; 187573505a10SRobert Wing int error __diagused; 1876c9c75df4SNeel Natu 1877d3956e46SJohn Baldwin state = vcpu_get_state(vcpu, NULL); 1878d087a399SNeel Natu if (state == VCPU_RUNNING) { 1879d087a399SNeel Natu /* 1880d087a399SNeel Natu * When a vcpu is "running" the next instruction is determined 1881d087a399SNeel Natu * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. 1882d087a399SNeel Natu * Thus setting 'inst_length' to zero will cause the current 1883d087a399SNeel Natu * instruction to be restarted. 1884d087a399SNeel Natu */ 1885c9c75df4SNeel Natu vcpu->exitinfo.inst_length = 0; 1886d3956e46SJohn Baldwin VMM_CTR1(vcpu, "restarting instruction at %#lx by " 1887d087a399SNeel Natu "setting inst_length to zero", vcpu->exitinfo.rip); 1888d087a399SNeel Natu } else if (state == VCPU_FROZEN) { 1889d087a399SNeel Natu /* 1890d087a399SNeel Natu * When a vcpu is "frozen" it is outside the critical section 189115add60dSPeter Grehan * around vmmops_run() and 'nextrip' points to the next 189215add60dSPeter Grehan * instruction. Thus instruction restart is achieved by setting 189315add60dSPeter Grehan * 'nextrip' to the vcpu's %rip. 1894d087a399SNeel Natu */ 1895d3956e46SJohn Baldwin error = vm_get_register(vcpu, VM_REG_GUEST_RIP, &rip); 1896d087a399SNeel Natu KASSERT(!error, ("%s: error %d getting rip", __func__, error)); 1897d3956e46SJohn Baldwin VMM_CTR2(vcpu, "restarting instruction by updating " 1898d087a399SNeel Natu "nextrip from %#lx to %#lx", vcpu->nextrip, rip); 1899d087a399SNeel Natu vcpu->nextrip = rip; 1900d087a399SNeel Natu } else { 1901d087a399SNeel Natu panic("%s: invalid state %d", __func__, state); 1902d087a399SNeel Natu } 1903c9c75df4SNeel Natu return (0); 1904c9c75df4SNeel Natu } 1905c9c75df4SNeel Natu 1906c9c75df4SNeel Natu int 190780cb5d84SJohn Baldwin vm_exit_intinfo(struct vcpu *vcpu, uint64_t info) 1908091d4532SNeel Natu { 1909091d4532SNeel Natu int type, vector; 1910091d4532SNeel Natu 1911091d4532SNeel Natu if (info & VM_INTINFO_VALID) { 1912091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1913091d4532SNeel Natu vector = info & 0xff; 1914091d4532SNeel Natu if (type == VM_INTINFO_NMI && vector != IDT_NMI) 1915091d4532SNeel Natu return (EINVAL); 1916091d4532SNeel Natu if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) 1917091d4532SNeel Natu return (EINVAL); 1918091d4532SNeel Natu if (info & VM_INTINFO_RSVD) 1919091d4532SNeel Natu return (EINVAL); 1920091d4532SNeel Natu } else { 1921091d4532SNeel Natu info = 0; 1922091d4532SNeel Natu } 192380cb5d84SJohn Baldwin VMM_CTR2(vcpu, "%s: info1(%#lx)", __func__, info); 1924091d4532SNeel Natu vcpu->exitintinfo = info; 1925091d4532SNeel Natu return (0); 1926091d4532SNeel Natu } 1927091d4532SNeel Natu 1928091d4532SNeel Natu enum exc_class { 1929091d4532SNeel Natu EXC_BENIGN, 1930091d4532SNeel Natu EXC_CONTRIBUTORY, 1931091d4532SNeel Natu EXC_PAGEFAULT 1932091d4532SNeel Natu }; 1933091d4532SNeel Natu 1934091d4532SNeel Natu #define IDT_VE 20 /* Virtualization Exception (Intel specific) */ 1935091d4532SNeel Natu 1936091d4532SNeel Natu static enum exc_class 1937091d4532SNeel Natu exception_class(uint64_t info) 1938091d4532SNeel Natu { 1939091d4532SNeel Natu int type, vector; 1940091d4532SNeel Natu 1941091d4532SNeel Natu KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); 1942091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1943091d4532SNeel Natu vector = info & 0xff; 1944091d4532SNeel Natu 1945091d4532SNeel Natu /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ 1946091d4532SNeel Natu switch (type) { 1947091d4532SNeel Natu case VM_INTINFO_HWINTR: 1948091d4532SNeel Natu case VM_INTINFO_SWINTR: 1949091d4532SNeel Natu case VM_INTINFO_NMI: 1950091d4532SNeel Natu return (EXC_BENIGN); 1951091d4532SNeel Natu default: 1952091d4532SNeel Natu /* 1953091d4532SNeel Natu * Hardware exception. 1954091d4532SNeel Natu * 1955091d4532SNeel Natu * SVM and VT-x use identical type values to represent NMI, 1956091d4532SNeel Natu * hardware interrupt and software interrupt. 1957091d4532SNeel Natu * 1958091d4532SNeel Natu * SVM uses type '3' for all exceptions. VT-x uses type '3' 1959091d4532SNeel Natu * for exceptions except #BP and #OF. #BP and #OF use a type 1960091d4532SNeel Natu * value of '5' or '6'. Therefore we don't check for explicit 1961091d4532SNeel Natu * values of 'type' to classify 'intinfo' into a hardware 1962091d4532SNeel Natu * exception. 1963091d4532SNeel Natu */ 1964091d4532SNeel Natu break; 1965091d4532SNeel Natu } 1966091d4532SNeel Natu 1967091d4532SNeel Natu switch (vector) { 1968091d4532SNeel Natu case IDT_PF: 1969091d4532SNeel Natu case IDT_VE: 1970091d4532SNeel Natu return (EXC_PAGEFAULT); 1971091d4532SNeel Natu case IDT_DE: 1972091d4532SNeel Natu case IDT_TS: 1973091d4532SNeel Natu case IDT_NP: 1974091d4532SNeel Natu case IDT_SS: 1975091d4532SNeel Natu case IDT_GP: 1976091d4532SNeel Natu return (EXC_CONTRIBUTORY); 1977091d4532SNeel Natu default: 1978091d4532SNeel Natu return (EXC_BENIGN); 1979091d4532SNeel Natu } 1980091d4532SNeel Natu } 1981091d4532SNeel Natu 1982091d4532SNeel Natu static int 198380cb5d84SJohn Baldwin nested_fault(struct vcpu *vcpu, uint64_t info1, uint64_t info2, 1984091d4532SNeel Natu uint64_t *retinfo) 1985091d4532SNeel Natu { 1986091d4532SNeel Natu enum exc_class exc1, exc2; 1987091d4532SNeel Natu int type1, vector1; 1988091d4532SNeel Natu 1989091d4532SNeel Natu KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); 1990091d4532SNeel Natu KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); 1991091d4532SNeel Natu 1992091d4532SNeel Natu /* 1993091d4532SNeel Natu * If an exception occurs while attempting to call the double-fault 1994091d4532SNeel Natu * handler the processor enters shutdown mode (aka triple fault). 1995091d4532SNeel Natu */ 1996091d4532SNeel Natu type1 = info1 & VM_INTINFO_TYPE; 1997091d4532SNeel Natu vector1 = info1 & 0xff; 1998091d4532SNeel Natu if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { 199980cb5d84SJohn Baldwin VMM_CTR2(vcpu, "triple fault: info1(%#lx), info2(%#lx)", 2000091d4532SNeel Natu info1, info2); 200180cb5d84SJohn Baldwin vm_suspend(vcpu->vm, VM_SUSPEND_TRIPLEFAULT); 2002091d4532SNeel Natu *retinfo = 0; 2003091d4532SNeel Natu return (0); 2004091d4532SNeel Natu } 2005091d4532SNeel Natu 2006091d4532SNeel Natu /* 2007091d4532SNeel Natu * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 2008091d4532SNeel Natu */ 2009091d4532SNeel Natu exc1 = exception_class(info1); 2010091d4532SNeel Natu exc2 = exception_class(info2); 2011091d4532SNeel Natu if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || 2012091d4532SNeel Natu (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { 2013091d4532SNeel Natu /* Convert nested fault into a double fault. */ 2014091d4532SNeel Natu *retinfo = IDT_DF; 2015091d4532SNeel Natu *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 2016091d4532SNeel Natu *retinfo |= VM_INTINFO_DEL_ERRCODE; 2017091d4532SNeel Natu } else { 2018091d4532SNeel Natu /* Handle exceptions serially */ 2019091d4532SNeel Natu *retinfo = info2; 2020091d4532SNeel Natu } 2021091d4532SNeel Natu return (1); 2022091d4532SNeel Natu } 2023091d4532SNeel Natu 2024091d4532SNeel Natu static uint64_t 2025091d4532SNeel Natu vcpu_exception_intinfo(struct vcpu *vcpu) 2026091d4532SNeel Natu { 2027091d4532SNeel Natu uint64_t info = 0; 2028091d4532SNeel Natu 2029091d4532SNeel Natu if (vcpu->exception_pending) { 2030c9c75df4SNeel Natu info = vcpu->exc_vector & 0xff; 2031091d4532SNeel Natu info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 2032c9c75df4SNeel Natu if (vcpu->exc_errcode_valid) { 2033091d4532SNeel Natu info |= VM_INTINFO_DEL_ERRCODE; 2034c9c75df4SNeel Natu info |= (uint64_t)vcpu->exc_errcode << 32; 2035091d4532SNeel Natu } 2036091d4532SNeel Natu } 2037091d4532SNeel Natu return (info); 2038091d4532SNeel Natu } 2039091d4532SNeel Natu 2040091d4532SNeel Natu int 204180cb5d84SJohn Baldwin vm_entry_intinfo(struct vcpu *vcpu, uint64_t *retinfo) 2042091d4532SNeel Natu { 2043091d4532SNeel Natu uint64_t info1, info2; 2044091d4532SNeel Natu int valid; 2045091d4532SNeel Natu 2046091d4532SNeel Natu info1 = vcpu->exitintinfo; 2047091d4532SNeel Natu vcpu->exitintinfo = 0; 2048091d4532SNeel Natu 2049091d4532SNeel Natu info2 = 0; 2050091d4532SNeel Natu if (vcpu->exception_pending) { 2051091d4532SNeel Natu info2 = vcpu_exception_intinfo(vcpu); 2052091d4532SNeel Natu vcpu->exception_pending = 0; 205380cb5d84SJohn Baldwin VMM_CTR2(vcpu, "Exception %d delivered: %#lx", 2054c9c75df4SNeel Natu vcpu->exc_vector, info2); 2055091d4532SNeel Natu } 2056091d4532SNeel Natu 2057091d4532SNeel Natu if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { 205880cb5d84SJohn Baldwin valid = nested_fault(vcpu, info1, info2, retinfo); 2059091d4532SNeel Natu } else if (info1 & VM_INTINFO_VALID) { 2060091d4532SNeel Natu *retinfo = info1; 2061091d4532SNeel Natu valid = 1; 2062091d4532SNeel Natu } else if (info2 & VM_INTINFO_VALID) { 2063091d4532SNeel Natu *retinfo = info2; 2064091d4532SNeel Natu valid = 1; 2065091d4532SNeel Natu } else { 2066091d4532SNeel Natu valid = 0; 2067091d4532SNeel Natu } 2068091d4532SNeel Natu 2069091d4532SNeel Natu if (valid) { 2070d3956e46SJohn Baldwin VMM_CTR4(vcpu, "%s: info1(%#lx), info2(%#lx), " 2071091d4532SNeel Natu "retinfo(%#lx)", __func__, info1, info2, *retinfo); 2072091d4532SNeel Natu } 2073091d4532SNeel Natu 2074091d4532SNeel Natu return (valid); 2075091d4532SNeel Natu } 2076091d4532SNeel Natu 2077091d4532SNeel Natu int 20783f0f4b15SJohn Baldwin vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2) 2079091d4532SNeel Natu { 2080091d4532SNeel Natu *info1 = vcpu->exitintinfo; 2081091d4532SNeel Natu *info2 = vcpu_exception_intinfo(vcpu); 2082091d4532SNeel Natu return (0); 2083091d4532SNeel Natu } 2084091d4532SNeel Natu 2085091d4532SNeel Natu int 2086d3956e46SJohn Baldwin vm_inject_exception(struct vcpu *vcpu, int vector, int errcode_valid, 2087c9c75df4SNeel Natu uint32_t errcode, int restart_instruction) 2088366f6083SPeter Grehan { 208947b9935dSNeel Natu uint64_t regval; 209073505a10SRobert Wing int error __diagused; 2091dc506506SNeel Natu 2092c9c75df4SNeel Natu if (vector < 0 || vector >= 32) 2093366f6083SPeter Grehan return (EINVAL); 2094366f6083SPeter Grehan 2095091d4532SNeel Natu /* 2096091d4532SNeel Natu * A double fault exception should never be injected directly into 2097091d4532SNeel Natu * the guest. It is a derived exception that results from specific 2098091d4532SNeel Natu * combinations of nested faults. 2099091d4532SNeel Natu */ 2100c9c75df4SNeel Natu if (vector == IDT_DF) 2101091d4532SNeel Natu return (EINVAL); 2102091d4532SNeel Natu 2103dc506506SNeel Natu if (vcpu->exception_pending) { 2104d3956e46SJohn Baldwin VMM_CTR2(vcpu, "Unable to inject exception %d due to " 2105c9c75df4SNeel Natu "pending exception %d", vector, vcpu->exc_vector); 2106dc506506SNeel Natu return (EBUSY); 2107dc506506SNeel Natu } 2108dc506506SNeel Natu 210947b9935dSNeel Natu if (errcode_valid) { 211047b9935dSNeel Natu /* 211147b9935dSNeel Natu * Exceptions don't deliver an error code in real mode. 211247b9935dSNeel Natu */ 2113d3956e46SJohn Baldwin error = vm_get_register(vcpu, VM_REG_GUEST_CR0, ®val); 211447b9935dSNeel Natu KASSERT(!error, ("%s: error %d getting CR0", __func__, error)); 211547b9935dSNeel Natu if (!(regval & CR0_PE)) 211647b9935dSNeel Natu errcode_valid = 0; 211747b9935dSNeel Natu } 211847b9935dSNeel Natu 21192ce12423SNeel Natu /* 21202ce12423SNeel Natu * From section 26.6.1 "Interruptibility State" in Intel SDM: 21212ce12423SNeel Natu * 21222ce12423SNeel Natu * Event blocking by "STI" or "MOV SS" is cleared after guest executes 21232ce12423SNeel Natu * one instruction or incurs an exception. 21242ce12423SNeel Natu */ 2125d3956e46SJohn Baldwin error = vm_set_register(vcpu, VM_REG_GUEST_INTR_SHADOW, 0); 21262ce12423SNeel Natu KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", 21272ce12423SNeel Natu __func__, error)); 21282ce12423SNeel Natu 2129c9c75df4SNeel Natu if (restart_instruction) 2130d3956e46SJohn Baldwin vm_restart_instruction(vcpu); 2131c9c75df4SNeel Natu 2132dc506506SNeel Natu vcpu->exception_pending = 1; 2133c9c75df4SNeel Natu vcpu->exc_vector = vector; 2134c9c75df4SNeel Natu vcpu->exc_errcode = errcode; 2135c9c75df4SNeel Natu vcpu->exc_errcode_valid = errcode_valid; 2136d3956e46SJohn Baldwin VMM_CTR1(vcpu, "Exception %d pending", vector); 2137dc506506SNeel Natu return (0); 2138dc506506SNeel Natu } 2139dc506506SNeel Natu 2140d37f2adbSNeel Natu void 2141d3956e46SJohn Baldwin vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid, int errcode) 2142dc506506SNeel Natu { 214373505a10SRobert Wing int error __diagused, restart_instruction; 2144dc506506SNeel Natu 2145c9c75df4SNeel Natu restart_instruction = 1; 2146d37f2adbSNeel Natu 2147d3956e46SJohn Baldwin error = vm_inject_exception(vcpu, vector, errcode_valid, 2148c9c75df4SNeel Natu errcode, restart_instruction); 2149dc506506SNeel Natu KASSERT(error == 0, ("vm_inject_exception error %d", error)); 2150dc506506SNeel Natu } 2151dc506506SNeel Natu 2152dc506506SNeel Natu void 2153d3956e46SJohn Baldwin vm_inject_pf(struct vcpu *vcpu, int error_code, uint64_t cr2) 2154fd949af6SNeel Natu { 215573505a10SRobert Wing int error __diagused; 215637a723a5SNeel Natu 2157d3956e46SJohn Baldwin VMM_CTR2(vcpu, "Injecting page fault: error_code %#x, cr2 %#lx", 215837a723a5SNeel Natu error_code, cr2); 215937a723a5SNeel Natu 2160d3956e46SJohn Baldwin error = vm_set_register(vcpu, VM_REG_GUEST_CR2, cr2); 216137a723a5SNeel Natu KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 2162fd949af6SNeel Natu 2163d3956e46SJohn Baldwin vm_inject_fault(vcpu, IDT_PF, 1, error_code); 2164366f6083SPeter Grehan } 2165366f6083SPeter Grehan 216661592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 2167366f6083SPeter Grehan 2168f352ff0cSNeel Natu int 21693f0f4b15SJohn Baldwin vm_inject_nmi(struct vcpu *vcpu) 2170f352ff0cSNeel Natu { 2171f352ff0cSNeel Natu 2172f352ff0cSNeel Natu vcpu->nmi_pending = 1; 21733f0f4b15SJohn Baldwin vcpu_notify_event(vcpu, false); 2174f352ff0cSNeel Natu return (0); 2175f352ff0cSNeel Natu } 2176f352ff0cSNeel Natu 2177f352ff0cSNeel Natu int 217880cb5d84SJohn Baldwin vm_nmi_pending(struct vcpu *vcpu) 2179f352ff0cSNeel Natu { 2180f352ff0cSNeel Natu return (vcpu->nmi_pending); 2181f352ff0cSNeel Natu } 2182f352ff0cSNeel Natu 2183f352ff0cSNeel Natu void 218480cb5d84SJohn Baldwin vm_nmi_clear(struct vcpu *vcpu) 2185f352ff0cSNeel Natu { 2186f352ff0cSNeel Natu if (vcpu->nmi_pending == 0) 2187f352ff0cSNeel Natu panic("vm_nmi_clear: inconsistent nmi_pending state"); 2188f352ff0cSNeel Natu 2189f352ff0cSNeel Natu vcpu->nmi_pending = 0; 21903dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VCPU_NMI_COUNT, 1); 2191366f6083SPeter Grehan } 2192366f6083SPeter Grehan 21930775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 21940775fbb4STycho Nightingale 21950775fbb4STycho Nightingale int 21963f0f4b15SJohn Baldwin vm_inject_extint(struct vcpu *vcpu) 21970775fbb4STycho Nightingale { 21980775fbb4STycho Nightingale 21990775fbb4STycho Nightingale vcpu->extint_pending = 1; 22003f0f4b15SJohn Baldwin vcpu_notify_event(vcpu, false); 22010775fbb4STycho Nightingale return (0); 22020775fbb4STycho Nightingale } 22030775fbb4STycho Nightingale 22040775fbb4STycho Nightingale int 220580cb5d84SJohn Baldwin vm_extint_pending(struct vcpu *vcpu) 22060775fbb4STycho Nightingale { 22070775fbb4STycho Nightingale return (vcpu->extint_pending); 22080775fbb4STycho Nightingale } 22090775fbb4STycho Nightingale 22100775fbb4STycho Nightingale void 221180cb5d84SJohn Baldwin vm_extint_clear(struct vcpu *vcpu) 22120775fbb4STycho Nightingale { 22130775fbb4STycho Nightingale if (vcpu->extint_pending == 0) 22140775fbb4STycho Nightingale panic("vm_extint_clear: inconsistent extint_pending state"); 22150775fbb4STycho Nightingale 22160775fbb4STycho Nightingale vcpu->extint_pending = 0; 22173dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VCPU_EXTINT_COUNT, 1); 22180775fbb4STycho Nightingale } 22190775fbb4STycho Nightingale 2220366f6083SPeter Grehan int 22213f0f4b15SJohn Baldwin vm_get_capability(struct vcpu *vcpu, int type, int *retval) 2222366f6083SPeter Grehan { 2223366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2224366f6083SPeter Grehan return (EINVAL); 2225366f6083SPeter Grehan 22263f0f4b15SJohn Baldwin return (vmmops_getcap(vcpu->cookie, type, retval)); 2227366f6083SPeter Grehan } 2228366f6083SPeter Grehan 2229366f6083SPeter Grehan int 22303f0f4b15SJohn Baldwin vm_set_capability(struct vcpu *vcpu, int type, int val) 2231366f6083SPeter Grehan { 2232366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2233366f6083SPeter Grehan return (EINVAL); 2234366f6083SPeter Grehan 22353f0f4b15SJohn Baldwin return (vmmops_setcap(vcpu->cookie, type, val)); 2236366f6083SPeter Grehan } 2237366f6083SPeter Grehan 2238950af9ffSJohn Baldwin struct vm * 2239950af9ffSJohn Baldwin vcpu_vm(struct vcpu *vcpu) 2240950af9ffSJohn Baldwin { 2241950af9ffSJohn Baldwin return (vcpu->vm); 2242950af9ffSJohn Baldwin } 2243950af9ffSJohn Baldwin 2244950af9ffSJohn Baldwin int 2245950af9ffSJohn Baldwin vcpu_vcpuid(struct vcpu *vcpu) 2246950af9ffSJohn Baldwin { 2247950af9ffSJohn Baldwin return (vcpu->vcpuid); 2248950af9ffSJohn Baldwin } 2249950af9ffSJohn Baldwin 2250950af9ffSJohn Baldwin struct vcpu * 2251950af9ffSJohn Baldwin vm_vcpu(struct vm *vm, int vcpuid) 2252950af9ffSJohn Baldwin { 2253950af9ffSJohn Baldwin return (&vm->vcpu[vcpuid]); 2254950af9ffSJohn Baldwin } 2255950af9ffSJohn Baldwin 2256366f6083SPeter Grehan struct vlapic * 2257d3956e46SJohn Baldwin vm_lapic(struct vcpu *vcpu) 2258366f6083SPeter Grehan { 2259d3956e46SJohn Baldwin return (vcpu->vlapic); 2260366f6083SPeter Grehan } 2261366f6083SPeter Grehan 2262565bbb86SNeel Natu struct vioapic * 2263565bbb86SNeel Natu vm_ioapic(struct vm *vm) 2264565bbb86SNeel Natu { 2265565bbb86SNeel Natu 2266565bbb86SNeel Natu return (vm->vioapic); 2267565bbb86SNeel Natu } 2268565bbb86SNeel Natu 226908e3ff32SNeel Natu struct vhpet * 227008e3ff32SNeel Natu vm_hpet(struct vm *vm) 227108e3ff32SNeel Natu { 227208e3ff32SNeel Natu 227308e3ff32SNeel Natu return (vm->vhpet); 227408e3ff32SNeel Natu } 227508e3ff32SNeel Natu 2276490d56c5SEd Maste bool 2277366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func) 2278366f6083SPeter Grehan { 2279490d56c5SEd Maste int b, f, i, n, s; 2280366f6083SPeter Grehan char *val, *cp, *cp2; 2281490d56c5SEd Maste bool found; 2282366f6083SPeter Grehan 2283366f6083SPeter Grehan /* 228407044a96SNeel Natu * XXX 228507044a96SNeel Natu * The length of an environment variable is limited to 128 bytes which 228607044a96SNeel Natu * puts an upper limit on the number of passthru devices that may be 228707044a96SNeel Natu * specified using a single environment variable. 228807044a96SNeel Natu * 228907044a96SNeel Natu * Work around this by scanning multiple environment variable 229007044a96SNeel Natu * names instead of a single one - yuck! 2291366f6083SPeter Grehan */ 229207044a96SNeel Natu const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 229307044a96SNeel Natu 229407044a96SNeel Natu /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 2295490d56c5SEd Maste found = false; 229607044a96SNeel Natu for (i = 0; names[i] != NULL && !found; i++) { 22972be111bfSDavide Italiano cp = val = kern_getenv(names[i]); 2298366f6083SPeter Grehan while (cp != NULL && *cp != '\0') { 2299366f6083SPeter Grehan if ((cp2 = strchr(cp, ' ')) != NULL) 2300366f6083SPeter Grehan *cp2 = '\0'; 2301366f6083SPeter Grehan 2302366f6083SPeter Grehan n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 2303366f6083SPeter Grehan if (n == 3 && bus == b && slot == s && func == f) { 2304490d56c5SEd Maste found = true; 2305366f6083SPeter Grehan break; 2306366f6083SPeter Grehan } 2307366f6083SPeter Grehan 2308366f6083SPeter Grehan if (cp2 != NULL) 2309366f6083SPeter Grehan *cp2++ = ' '; 2310366f6083SPeter Grehan 2311366f6083SPeter Grehan cp = cp2; 2312366f6083SPeter Grehan } 2313366f6083SPeter Grehan freeenv(val); 231407044a96SNeel Natu } 2315366f6083SPeter Grehan return (found); 2316366f6083SPeter Grehan } 2317366f6083SPeter Grehan 2318366f6083SPeter Grehan void * 2319366f6083SPeter Grehan vm_iommu_domain(struct vm *vm) 2320366f6083SPeter Grehan { 2321366f6083SPeter Grehan 2322366f6083SPeter Grehan return (vm->iommu); 2323366f6083SPeter Grehan } 2324366f6083SPeter Grehan 232575dd3366SNeel Natu int 23263f0f4b15SJohn Baldwin vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 2327366f6083SPeter Grehan { 232875dd3366SNeel Natu int error; 2329366f6083SPeter Grehan 233075dd3366SNeel Natu vcpu_lock(vcpu); 23313f0f4b15SJohn Baldwin error = vcpu_set_state_locked(vcpu, newstate, from_idle); 233275dd3366SNeel Natu vcpu_unlock(vcpu); 233375dd3366SNeel Natu 233475dd3366SNeel Natu return (error); 233575dd3366SNeel Natu } 233675dd3366SNeel Natu 233775dd3366SNeel Natu enum vcpu_state 2338d3956e46SJohn Baldwin vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 2339366f6083SPeter Grehan { 234075dd3366SNeel Natu enum vcpu_state state; 2341366f6083SPeter Grehan 234275dd3366SNeel Natu vcpu_lock(vcpu); 234375dd3366SNeel Natu state = vcpu->state; 2344d3c11f40SPeter Grehan if (hostcpu != NULL) 2345d3c11f40SPeter Grehan *hostcpu = vcpu->hostcpu; 234675dd3366SNeel Natu vcpu_unlock(vcpu); 2347366f6083SPeter Grehan 234875dd3366SNeel Natu return (state); 2349366f6083SPeter Grehan } 2350366f6083SPeter Grehan 235195ebc360SNeel Natu int 23523f0f4b15SJohn Baldwin vm_activate_cpu(struct vcpu *vcpu) 2353366f6083SPeter Grehan { 23543f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 2355366f6083SPeter Grehan 23563f0f4b15SJohn Baldwin if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 235795ebc360SNeel Natu return (EBUSY); 235822d822c6SNeel Natu 23593f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "activated"); 23603f0f4b15SJohn Baldwin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 236195ebc360SNeel Natu return (0); 2362366f6083SPeter Grehan } 2363366f6083SPeter Grehan 2364fc276d92SJohn Baldwin int 23653f0f4b15SJohn Baldwin vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 2366fc276d92SJohn Baldwin { 23673f0f4b15SJohn Baldwin if (vcpu == NULL) { 2368fc276d92SJohn Baldwin vm->debug_cpus = vm->active_cpus; 23693f0f4b15SJohn Baldwin for (int i = 0; i < vm->maxcpus; i++) { 2370fc276d92SJohn Baldwin if (CPU_ISSET(i, &vm->active_cpus)) 23713f0f4b15SJohn Baldwin vcpu_notify_event(vm_vcpu(vm, i), false); 2372fc276d92SJohn Baldwin } 2373fc276d92SJohn Baldwin } else { 23743f0f4b15SJohn Baldwin if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 2375fc276d92SJohn Baldwin return (EINVAL); 2376fc276d92SJohn Baldwin 23773f0f4b15SJohn Baldwin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 23783f0f4b15SJohn Baldwin vcpu_notify_event(vcpu, false); 2379fc276d92SJohn Baldwin } 2380fc276d92SJohn Baldwin return (0); 2381fc276d92SJohn Baldwin } 2382fc276d92SJohn Baldwin 2383fc276d92SJohn Baldwin int 23843f0f4b15SJohn Baldwin vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 2385fc276d92SJohn Baldwin { 2386fc276d92SJohn Baldwin 23873f0f4b15SJohn Baldwin if (vcpu == NULL) { 2388fc276d92SJohn Baldwin CPU_ZERO(&vm->debug_cpus); 2389fc276d92SJohn Baldwin } else { 23903f0f4b15SJohn Baldwin if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 2391fc276d92SJohn Baldwin return (EINVAL); 2392fc276d92SJohn Baldwin 23933f0f4b15SJohn Baldwin CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 2394fc276d92SJohn Baldwin } 2395fc276d92SJohn Baldwin return (0); 2396fc276d92SJohn Baldwin } 2397fc276d92SJohn Baldwin 2398fc276d92SJohn Baldwin int 239980cb5d84SJohn Baldwin vcpu_debugged(struct vcpu *vcpu) 2400fc276d92SJohn Baldwin { 2401fc276d92SJohn Baldwin 240280cb5d84SJohn Baldwin return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 2403fc276d92SJohn Baldwin } 2404fc276d92SJohn Baldwin 2405a5615c90SPeter Grehan cpuset_t 2406366f6083SPeter Grehan vm_active_cpus(struct vm *vm) 2407366f6083SPeter Grehan { 2408366f6083SPeter Grehan 2409366f6083SPeter Grehan return (vm->active_cpus); 2410366f6083SPeter Grehan } 2411366f6083SPeter Grehan 241295ebc360SNeel Natu cpuset_t 2413fc276d92SJohn Baldwin vm_debug_cpus(struct vm *vm) 2414fc276d92SJohn Baldwin { 2415fc276d92SJohn Baldwin 2416fc276d92SJohn Baldwin return (vm->debug_cpus); 2417fc276d92SJohn Baldwin } 2418fc276d92SJohn Baldwin 2419fc276d92SJohn Baldwin cpuset_t 242095ebc360SNeel Natu vm_suspended_cpus(struct vm *vm) 242195ebc360SNeel Natu { 242295ebc360SNeel Natu 242395ebc360SNeel Natu return (vm->suspended_cpus); 242495ebc360SNeel Natu } 242595ebc360SNeel Natu 2426*c0f35dbfSJohn Baldwin /* 2427*c0f35dbfSJohn Baldwin * Returns the subset of vCPUs in tostart that are awaiting startup. 2428*c0f35dbfSJohn Baldwin * These vCPUs are also marked as no longer awaiting startup. 2429*c0f35dbfSJohn Baldwin */ 2430*c0f35dbfSJohn Baldwin cpuset_t 2431*c0f35dbfSJohn Baldwin vm_start_cpus(struct vm *vm, const cpuset_t *tostart) 2432*c0f35dbfSJohn Baldwin { 2433*c0f35dbfSJohn Baldwin cpuset_t set; 2434*c0f35dbfSJohn Baldwin 2435*c0f35dbfSJohn Baldwin mtx_lock(&vm->rendezvous_mtx); 2436*c0f35dbfSJohn Baldwin CPU_AND(&set, &vm->startup_cpus, tostart); 2437*c0f35dbfSJohn Baldwin CPU_ANDNOT(&vm->startup_cpus, &vm->startup_cpus, &set); 2438*c0f35dbfSJohn Baldwin mtx_unlock(&vm->rendezvous_mtx); 2439*c0f35dbfSJohn Baldwin return (set); 2440*c0f35dbfSJohn Baldwin } 2441*c0f35dbfSJohn Baldwin 2442*c0f35dbfSJohn Baldwin void 2443*c0f35dbfSJohn Baldwin vm_await_start(struct vm *vm, const cpuset_t *waiting) 2444*c0f35dbfSJohn Baldwin { 2445*c0f35dbfSJohn Baldwin mtx_lock(&vm->rendezvous_mtx); 2446*c0f35dbfSJohn Baldwin CPU_OR(&vm->startup_cpus, &vm->startup_cpus, waiting); 2447*c0f35dbfSJohn Baldwin mtx_unlock(&vm->rendezvous_mtx); 2448*c0f35dbfSJohn Baldwin } 2449*c0f35dbfSJohn Baldwin 2450366f6083SPeter Grehan void * 24513dc3d32aSJohn Baldwin vcpu_stats(struct vcpu *vcpu) 2452366f6083SPeter Grehan { 2453366f6083SPeter Grehan 24543dc3d32aSJohn Baldwin return (vcpu->stats); 2455366f6083SPeter Grehan } 2456e9027382SNeel Natu 2457e9027382SNeel Natu int 24583f0f4b15SJohn Baldwin vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state) 2459e9027382SNeel Natu { 24603f0f4b15SJohn Baldwin *state = vcpu->x2apic_state; 2461e9027382SNeel Natu 2462e9027382SNeel Natu return (0); 2463e9027382SNeel Natu } 2464e9027382SNeel Natu 2465e9027382SNeel Natu int 24663f0f4b15SJohn Baldwin vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 2467e9027382SNeel Natu { 24683f23d3caSNeel Natu if (state >= X2APIC_STATE_LAST) 2469e9027382SNeel Natu return (EINVAL); 2470e9027382SNeel Natu 2471d3956e46SJohn Baldwin vcpu->x2apic_state = state; 2472e9027382SNeel Natu 2473d3956e46SJohn Baldwin vlapic_set_x2apic_state(vcpu, state); 247473820fb0SNeel Natu 2475e9027382SNeel Natu return (0); 2476e9027382SNeel Natu } 247775dd3366SNeel Natu 247822821874SNeel Natu /* 247922821874SNeel Natu * This function is called to ensure that a vcpu "sees" a pending event 248022821874SNeel Natu * as soon as possible: 248122821874SNeel Natu * - If the vcpu thread is sleeping then it is woken up. 248222821874SNeel Natu * - If the vcpu is running on a different host_cpu then an IPI will be directed 248322821874SNeel Natu * to the host_cpu to cause the vcpu to trap into the hypervisor. 248422821874SNeel Natu */ 2485248e6799SNeel Natu static void 2486248e6799SNeel Natu vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) 248775dd3366SNeel Natu { 248875dd3366SNeel Natu int hostcpu; 248975dd3366SNeel Natu 249075dd3366SNeel Natu hostcpu = vcpu->hostcpu; 2491ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 2492ef39d7e9SNeel Natu KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 2493de5ea6b6SNeel Natu if (hostcpu != curcpu) { 2494ef39d7e9SNeel Natu if (lapic_intr) { 2495add611fdSNeel Natu vlapic_post_intr(vcpu->vlapic, hostcpu, 2496add611fdSNeel Natu vmm_ipinum); 2497ef39d7e9SNeel Natu } else { 249875dd3366SNeel Natu ipi_cpu(hostcpu, vmm_ipinum); 249975dd3366SNeel Natu } 2500ef39d7e9SNeel Natu } else { 2501ef39d7e9SNeel Natu /* 2502ef39d7e9SNeel Natu * If the 'vcpu' is running on 'curcpu' then it must 2503ef39d7e9SNeel Natu * be sending a notification to itself (e.g. SELF_IPI). 2504ef39d7e9SNeel Natu * The pending event will be picked up when the vcpu 2505ef39d7e9SNeel Natu * transitions back to guest context. 2506ef39d7e9SNeel Natu */ 2507ef39d7e9SNeel Natu } 2508ef39d7e9SNeel Natu } else { 2509ef39d7e9SNeel Natu KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 2510ef39d7e9SNeel Natu "with hostcpu %d", vcpu->state, hostcpu)); 2511366f6083SPeter Grehan if (vcpu->state == VCPU_SLEEPING) 2512366f6083SPeter Grehan wakeup_one(vcpu); 2513366f6083SPeter Grehan } 2514248e6799SNeel Natu } 2515248e6799SNeel Natu 2516248e6799SNeel Natu void 25173f0f4b15SJohn Baldwin vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr) 2518248e6799SNeel Natu { 2519248e6799SNeel Natu vcpu_lock(vcpu); 2520248e6799SNeel Natu vcpu_notify_event_locked(vcpu, lapic_intr); 2521f76fc5d4SNeel Natu vcpu_unlock(vcpu); 2522f76fc5d4SNeel Natu } 2523318224bbSNeel Natu 2524318224bbSNeel Natu struct vmspace * 2525318224bbSNeel Natu vm_get_vmspace(struct vm *vm) 2526318224bbSNeel Natu { 2527318224bbSNeel Natu 2528318224bbSNeel Natu return (vm->vmspace); 2529318224bbSNeel Natu } 2530565bbb86SNeel Natu 2531565bbb86SNeel Natu int 2532565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid) 2533565bbb86SNeel Natu { 2534565bbb86SNeel Natu /* 2535565bbb86SNeel Natu * XXX apic id is assumed to be numerically identical to vcpu id 2536565bbb86SNeel Natu */ 2537565bbb86SNeel Natu return (apicid); 2538565bbb86SNeel Natu } 25395b8a8cd1SNeel Natu 2540b837daddSKonstantin Belousov int 2541d8be3d52SJohn Baldwin vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest, 25425b8a8cd1SNeel Natu vm_rendezvous_func_t func, void *arg) 25435b8a8cd1SNeel Natu { 2544d8be3d52SJohn Baldwin struct vm *vm = vcpu->vm; 2545b837daddSKonstantin Belousov int error, i; 2546970955e4SNeel Natu 25475b8a8cd1SNeel Natu /* 25485b8a8cd1SNeel Natu * Enforce that this function is called without any locks 25495b8a8cd1SNeel Natu */ 25505b8a8cd1SNeel Natu WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 25515b8a8cd1SNeel Natu 25525b8a8cd1SNeel Natu restart: 25535b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 25545b8a8cd1SNeel Natu if (vm->rendezvous_func != NULL) { 25555b8a8cd1SNeel Natu /* 25565b8a8cd1SNeel Natu * If a rendezvous is already in progress then we need to 25573f0f4b15SJohn Baldwin * call the rendezvous handler in case this 'vcpu' is one 25585b8a8cd1SNeel Natu * of the targets of the rendezvous. 25595b8a8cd1SNeel Natu */ 2560d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Rendezvous already in progress"); 25615b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 2562d8be3d52SJohn Baldwin error = vm_handle_rendezvous(vcpu); 2563b837daddSKonstantin Belousov if (error != 0) 2564b837daddSKonstantin Belousov return (error); 25655b8a8cd1SNeel Natu goto restart; 25665b8a8cd1SNeel Natu } 25675b8a8cd1SNeel Natu KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 25685b8a8cd1SNeel Natu "rendezvous is still in progress")); 25695b8a8cd1SNeel Natu 2570d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Initiating rendezvous"); 25715b8a8cd1SNeel Natu vm->rendezvous_req_cpus = dest; 25725b8a8cd1SNeel Natu CPU_ZERO(&vm->rendezvous_done_cpus); 25735b8a8cd1SNeel Natu vm->rendezvous_arg = arg; 2574869dbab7SAndriy Gapon vm->rendezvous_func = func; 25755b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 25765b8a8cd1SNeel Natu 2577970955e4SNeel Natu /* 2578970955e4SNeel Natu * Wake up any sleeping vcpus and trigger a VM-exit in any running 2579970955e4SNeel Natu * vcpus so they handle the rendezvous as soon as possible. 2580970955e4SNeel Natu */ 2581a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 2582970955e4SNeel Natu if (CPU_ISSET(i, &dest)) 25833f0f4b15SJohn Baldwin vcpu_notify_event(vm_vcpu(vm, i), false); 2584970955e4SNeel Natu } 2585970955e4SNeel Natu 2586d8be3d52SJohn Baldwin return (vm_handle_rendezvous(vcpu)); 25875b8a8cd1SNeel Natu } 2588762fd208STycho Nightingale 2589762fd208STycho Nightingale struct vatpic * 2590762fd208STycho Nightingale vm_atpic(struct vm *vm) 2591762fd208STycho Nightingale { 2592762fd208STycho Nightingale return (vm->vatpic); 2593762fd208STycho Nightingale } 2594e883c9bbSTycho Nightingale 2595e883c9bbSTycho Nightingale struct vatpit * 2596e883c9bbSTycho Nightingale vm_atpit(struct vm *vm) 2597e883c9bbSTycho Nightingale { 2598e883c9bbSTycho Nightingale return (vm->vatpit); 2599e883c9bbSTycho Nightingale } 2600d17b5104SNeel Natu 2601160ef77aSNeel Natu struct vpmtmr * 2602160ef77aSNeel Natu vm_pmtmr(struct vm *vm) 2603160ef77aSNeel Natu { 2604160ef77aSNeel Natu 2605160ef77aSNeel Natu return (vm->vpmtmr); 2606160ef77aSNeel Natu } 2607160ef77aSNeel Natu 26080dafa5cdSNeel Natu struct vrtc * 26090dafa5cdSNeel Natu vm_rtc(struct vm *vm) 26100dafa5cdSNeel Natu { 26110dafa5cdSNeel Natu 26120dafa5cdSNeel Natu return (vm->vrtc); 26130dafa5cdSNeel Natu } 26140dafa5cdSNeel Natu 2615d17b5104SNeel Natu enum vm_reg_name 2616d17b5104SNeel Natu vm_segment_name(int seg) 2617d17b5104SNeel Natu { 2618d17b5104SNeel Natu static enum vm_reg_name seg_names[] = { 2619d17b5104SNeel Natu VM_REG_GUEST_ES, 2620d17b5104SNeel Natu VM_REG_GUEST_CS, 2621d17b5104SNeel Natu VM_REG_GUEST_SS, 2622d17b5104SNeel Natu VM_REG_GUEST_DS, 2623d17b5104SNeel Natu VM_REG_GUEST_FS, 2624d17b5104SNeel Natu VM_REG_GUEST_GS 2625d17b5104SNeel Natu }; 2626d17b5104SNeel Natu 2627d17b5104SNeel Natu KASSERT(seg >= 0 && seg < nitems(seg_names), 2628d17b5104SNeel Natu ("%s: invalid segment encoding %d", __func__, seg)); 2629d17b5104SNeel Natu return (seg_names[seg]); 2630d17b5104SNeel Natu } 2631cf1d80d8SPeter Grehan 2632d665d229SNeel Natu void 26332b4fe856SJohn Baldwin vm_copy_teardown(struct vm_copyinfo *copyinfo, int num_copyinfo) 2634d665d229SNeel Natu { 2635d665d229SNeel Natu int idx; 2636d665d229SNeel Natu 2637d665d229SNeel Natu for (idx = 0; idx < num_copyinfo; idx++) { 2638d665d229SNeel Natu if (copyinfo[idx].cookie != NULL) 2639d665d229SNeel Natu vm_gpa_release(copyinfo[idx].cookie); 2640d665d229SNeel Natu } 2641d665d229SNeel Natu bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); 2642d665d229SNeel Natu } 2643d665d229SNeel Natu 2644d665d229SNeel Natu int 2645d3956e46SJohn Baldwin vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging, 2646d665d229SNeel Natu uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 26479c4d5478SNeel Natu int num_copyinfo, int *fault) 2648d665d229SNeel Natu { 2649d665d229SNeel Natu int error, idx, nused; 2650d665d229SNeel Natu size_t n, off, remaining; 2651d665d229SNeel Natu void *hva, *cookie; 2652d665d229SNeel Natu uint64_t gpa; 2653d665d229SNeel Natu 2654d665d229SNeel Natu bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); 2655d665d229SNeel Natu 2656d665d229SNeel Natu nused = 0; 2657d665d229SNeel Natu remaining = len; 2658d665d229SNeel Natu while (remaining > 0) { 2659d665d229SNeel Natu KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); 2660d3956e46SJohn Baldwin error = vm_gla2gpa(vcpu, paging, gla, prot, &gpa, fault); 26619c4d5478SNeel Natu if (error || *fault) 2662d665d229SNeel Natu return (error); 2663d665d229SNeel Natu off = gpa & PAGE_MASK; 2664d665d229SNeel Natu n = min(remaining, PAGE_SIZE - off); 2665d665d229SNeel Natu copyinfo[nused].gpa = gpa; 2666d665d229SNeel Natu copyinfo[nused].len = n; 2667d665d229SNeel Natu remaining -= n; 2668d665d229SNeel Natu gla += n; 2669d665d229SNeel Natu nused++; 2670d665d229SNeel Natu } 2671d665d229SNeel Natu 2672d665d229SNeel Natu for (idx = 0; idx < nused; idx++) { 2673d3956e46SJohn Baldwin hva = vm_gpa_hold(vcpu, copyinfo[idx].gpa, 26749b1aa8d6SNeel Natu copyinfo[idx].len, prot, &cookie); 2675d665d229SNeel Natu if (hva == NULL) 2676d665d229SNeel Natu break; 2677d665d229SNeel Natu copyinfo[idx].hva = hva; 2678d665d229SNeel Natu copyinfo[idx].cookie = cookie; 2679d665d229SNeel Natu } 2680d665d229SNeel Natu 2681d665d229SNeel Natu if (idx != nused) { 26822b4fe856SJohn Baldwin vm_copy_teardown(copyinfo, num_copyinfo); 26839c4d5478SNeel Natu return (EFAULT); 2684d665d229SNeel Natu } else { 26859c4d5478SNeel Natu *fault = 0; 2686d665d229SNeel Natu return (0); 2687d665d229SNeel Natu } 2688d665d229SNeel Natu } 2689d665d229SNeel Natu 2690d665d229SNeel Natu void 26912b4fe856SJohn Baldwin vm_copyin(struct vm_copyinfo *copyinfo, void *kaddr, size_t len) 2692d665d229SNeel Natu { 2693d665d229SNeel Natu char *dst; 2694d665d229SNeel Natu int idx; 2695d665d229SNeel Natu 2696d665d229SNeel Natu dst = kaddr; 2697d665d229SNeel Natu idx = 0; 2698d665d229SNeel Natu while (len > 0) { 2699d665d229SNeel Natu bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); 2700d665d229SNeel Natu len -= copyinfo[idx].len; 2701d665d229SNeel Natu dst += copyinfo[idx].len; 2702d665d229SNeel Natu idx++; 2703d665d229SNeel Natu } 2704d665d229SNeel Natu } 2705d665d229SNeel Natu 2706d665d229SNeel Natu void 27072b4fe856SJohn Baldwin vm_copyout(const void *kaddr, struct vm_copyinfo *copyinfo, size_t len) 2708d665d229SNeel Natu { 2709d665d229SNeel Natu const char *src; 2710d665d229SNeel Natu int idx; 2711d665d229SNeel Natu 2712d665d229SNeel Natu src = kaddr; 2713d665d229SNeel Natu idx = 0; 2714d665d229SNeel Natu while (len > 0) { 2715d665d229SNeel Natu bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); 2716d665d229SNeel Natu len -= copyinfo[idx].len; 2717d665d229SNeel Natu src += copyinfo[idx].len; 2718d665d229SNeel Natu idx++; 2719d665d229SNeel Natu } 2720d665d229SNeel Natu } 2721cf1d80d8SPeter Grehan 2722cf1d80d8SPeter Grehan /* 2723cf1d80d8SPeter Grehan * Return the amount of in-use and wired memory for the VM. Since 2724cf1d80d8SPeter Grehan * these are global stats, only return the values with for vCPU 0 2725cf1d80d8SPeter Grehan */ 2726cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_RESIDENT); 2727cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_WIRED); 2728cf1d80d8SPeter Grehan 2729cf1d80d8SPeter Grehan static void 27303f0f4b15SJohn Baldwin vm_get_rescnt(struct vcpu *vcpu, struct vmm_stat_type *stat) 2731cf1d80d8SPeter Grehan { 2732cf1d80d8SPeter Grehan 27333f0f4b15SJohn Baldwin if (vcpu->vcpuid == 0) { 27343f0f4b15SJohn Baldwin vmm_stat_set(vcpu, VMM_MEM_RESIDENT, PAGE_SIZE * 27353f0f4b15SJohn Baldwin vmspace_resident_count(vcpu->vm->vmspace)); 2736cf1d80d8SPeter Grehan } 2737cf1d80d8SPeter Grehan } 2738cf1d80d8SPeter Grehan 2739cf1d80d8SPeter Grehan static void 27403f0f4b15SJohn Baldwin vm_get_wiredcnt(struct vcpu *vcpu, struct vmm_stat_type *stat) 2741cf1d80d8SPeter Grehan { 2742cf1d80d8SPeter Grehan 27433f0f4b15SJohn Baldwin if (vcpu->vcpuid == 0) { 27443f0f4b15SJohn Baldwin vmm_stat_set(vcpu, VMM_MEM_WIRED, PAGE_SIZE * 27453f0f4b15SJohn Baldwin pmap_wired_count(vmspace_pmap(vcpu->vm->vmspace))); 2746cf1d80d8SPeter Grehan } 2747cf1d80d8SPeter Grehan } 2748cf1d80d8SPeter Grehan 2749cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); 2750cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); 2751483d953aSJohn Baldwin 2752483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 2753483d953aSJohn Baldwin static int 2754483d953aSJohn Baldwin vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta) 2755483d953aSJohn Baldwin { 2756a7db532eSJohn Baldwin uint64_t tsc, now; 2757483d953aSJohn Baldwin int ret; 2758483d953aSJohn Baldwin struct vcpu *vcpu; 275935abc6c2SJohn Baldwin uint16_t i, maxcpus; 2760483d953aSJohn Baldwin 2761a7db532eSJohn Baldwin now = rdtsc(); 276235abc6c2SJohn Baldwin maxcpus = vm_get_maxcpus(vm); 276335abc6c2SJohn Baldwin for (i = 0; i < maxcpus; i++) { 2764483d953aSJohn Baldwin vcpu = &vm->vcpu[i]; 2765483d953aSJohn Baldwin 2766483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done); 2767483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done); 2768483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_vector, meta, ret, done); 2769483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode_valid, meta, ret, done); 2770483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode, meta, ret, done); 2771483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done); 2772483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done); 2773483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done); 2774a7db532eSJohn Baldwin 2775a7db532eSJohn Baldwin /* 2776a7db532eSJohn Baldwin * Save the absolute TSC value by adding now to tsc_offset. 2777483d953aSJohn Baldwin * 2778483d953aSJohn Baldwin * It will be turned turned back into an actual offset when the 2779483d953aSJohn Baldwin * TSC restore function is called 2780483d953aSJohn Baldwin */ 2781a7db532eSJohn Baldwin tsc = now + vcpu->tsc_offset; 2782a7db532eSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(tsc, meta, ret, done); 2783483d953aSJohn Baldwin } 2784483d953aSJohn Baldwin 2785483d953aSJohn Baldwin done: 2786483d953aSJohn Baldwin return (ret); 2787483d953aSJohn Baldwin } 2788483d953aSJohn Baldwin 2789483d953aSJohn Baldwin static int 2790483d953aSJohn Baldwin vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta) 2791483d953aSJohn Baldwin { 2792483d953aSJohn Baldwin int ret; 2793483d953aSJohn Baldwin 2794483d953aSJohn Baldwin ret = vm_snapshot_vcpus(vm, meta); 2795a7db532eSJohn Baldwin if (ret != 0) 2796483d953aSJohn Baldwin goto done; 2797483d953aSJohn Baldwin 2798*c0f35dbfSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vm->startup_cpus, meta, ret, done); 2799483d953aSJohn Baldwin done: 2800483d953aSJohn Baldwin return (ret); 2801483d953aSJohn Baldwin } 2802483d953aSJohn Baldwin 2803483d953aSJohn Baldwin static int 28041aa51504SJohn Baldwin vm_snapshot_vcpu(struct vm *vm, struct vm_snapshot_meta *meta) 2805483d953aSJohn Baldwin { 280635abc6c2SJohn Baldwin int error; 28071aa51504SJohn Baldwin struct vcpu *vcpu; 280835abc6c2SJohn Baldwin uint16_t i, maxcpus; 2809483d953aSJohn Baldwin 2810483d953aSJohn Baldwin error = 0; 2811483d953aSJohn Baldwin 281235abc6c2SJohn Baldwin maxcpus = vm_get_maxcpus(vm); 281335abc6c2SJohn Baldwin for (i = 0; i < maxcpus; i++) { 28141aa51504SJohn Baldwin vcpu = &vm->vcpu[i]; 28151aa51504SJohn Baldwin 2816869c8d19SJohn Baldwin error = vmmops_vcpu_snapshot(vcpu->cookie, meta); 2817483d953aSJohn Baldwin if (error != 0) { 2818483d953aSJohn Baldwin printf("%s: failed to snapshot vmcs/vmcb data for " 2819483d953aSJohn Baldwin "vCPU: %d; error: %d\n", __func__, i, error); 2820483d953aSJohn Baldwin goto done; 2821483d953aSJohn Baldwin } 2822483d953aSJohn Baldwin } 2823483d953aSJohn Baldwin 2824483d953aSJohn Baldwin done: 2825483d953aSJohn Baldwin return (error); 2826483d953aSJohn Baldwin } 2827483d953aSJohn Baldwin 2828483d953aSJohn Baldwin /* 2829483d953aSJohn Baldwin * Save kernel-side structures to user-space for snapshotting. 2830483d953aSJohn Baldwin */ 2831483d953aSJohn Baldwin int 2832483d953aSJohn Baldwin vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta) 2833483d953aSJohn Baldwin { 2834483d953aSJohn Baldwin int ret = 0; 2835483d953aSJohn Baldwin 2836483d953aSJohn Baldwin switch (meta->dev_req) { 2837483d953aSJohn Baldwin case STRUCT_VMX: 283815add60dSPeter Grehan ret = vmmops_snapshot(vm->cookie, meta); 2839483d953aSJohn Baldwin break; 2840483d953aSJohn Baldwin case STRUCT_VMCX: 28411aa51504SJohn Baldwin ret = vm_snapshot_vcpu(vm, meta); 2842483d953aSJohn Baldwin break; 2843483d953aSJohn Baldwin case STRUCT_VM: 2844483d953aSJohn Baldwin ret = vm_snapshot_vm(vm, meta); 2845483d953aSJohn Baldwin break; 2846483d953aSJohn Baldwin case STRUCT_VIOAPIC: 2847483d953aSJohn Baldwin ret = vioapic_snapshot(vm_ioapic(vm), meta); 2848483d953aSJohn Baldwin break; 2849483d953aSJohn Baldwin case STRUCT_VLAPIC: 2850483d953aSJohn Baldwin ret = vlapic_snapshot(vm, meta); 2851483d953aSJohn Baldwin break; 2852483d953aSJohn Baldwin case STRUCT_VHPET: 2853483d953aSJohn Baldwin ret = vhpet_snapshot(vm_hpet(vm), meta); 2854483d953aSJohn Baldwin break; 2855483d953aSJohn Baldwin case STRUCT_VATPIC: 2856483d953aSJohn Baldwin ret = vatpic_snapshot(vm_atpic(vm), meta); 2857483d953aSJohn Baldwin break; 2858483d953aSJohn Baldwin case STRUCT_VATPIT: 2859483d953aSJohn Baldwin ret = vatpit_snapshot(vm_atpit(vm), meta); 2860483d953aSJohn Baldwin break; 2861483d953aSJohn Baldwin case STRUCT_VPMTMR: 2862483d953aSJohn Baldwin ret = vpmtmr_snapshot(vm_pmtmr(vm), meta); 2863483d953aSJohn Baldwin break; 2864483d953aSJohn Baldwin case STRUCT_VRTC: 2865483d953aSJohn Baldwin ret = vrtc_snapshot(vm_rtc(vm), meta); 2866483d953aSJohn Baldwin break; 2867483d953aSJohn Baldwin default: 2868483d953aSJohn Baldwin printf("%s: failed to find the requested type %#x\n", 2869483d953aSJohn Baldwin __func__, meta->dev_req); 2870483d953aSJohn Baldwin ret = (EINVAL); 2871483d953aSJohn Baldwin } 2872483d953aSJohn Baldwin return (ret); 2873483d953aSJohn Baldwin } 2874483d953aSJohn Baldwin 287580cb5d84SJohn Baldwin void 287680cb5d84SJohn Baldwin vm_set_tsc_offset(struct vcpu *vcpu, uint64_t offset) 2877483d953aSJohn Baldwin { 2878483d953aSJohn Baldwin vcpu->tsc_offset = offset; 2879483d953aSJohn Baldwin } 2880483d953aSJohn Baldwin 2881483d953aSJohn Baldwin int 2882483d953aSJohn Baldwin vm_restore_time(struct vm *vm) 2883483d953aSJohn Baldwin { 288435abc6c2SJohn Baldwin int error; 2885483d953aSJohn Baldwin uint64_t now; 2886483d953aSJohn Baldwin struct vcpu *vcpu; 288735abc6c2SJohn Baldwin uint16_t i, maxcpus; 2888483d953aSJohn Baldwin 2889483d953aSJohn Baldwin now = rdtsc(); 2890483d953aSJohn Baldwin 2891483d953aSJohn Baldwin error = vhpet_restore_time(vm_hpet(vm)); 2892483d953aSJohn Baldwin if (error) 2893483d953aSJohn Baldwin return (error); 2894483d953aSJohn Baldwin 289535abc6c2SJohn Baldwin maxcpus = vm_get_maxcpus(vm); 289635abc6c2SJohn Baldwin for (i = 0; i < maxcpus; i++) { 2897483d953aSJohn Baldwin vcpu = &vm->vcpu[i]; 2898483d953aSJohn Baldwin 2899869c8d19SJohn Baldwin error = vmmops_restore_tsc(vcpu->cookie, 29001aa51504SJohn Baldwin vcpu->tsc_offset - now); 2901483d953aSJohn Baldwin if (error) 2902483d953aSJohn Baldwin return (error); 2903483d953aSJohn Baldwin } 2904483d953aSJohn Baldwin 2905483d953aSJohn Baldwin return (0); 2906483d953aSJohn Baldwin } 2907483d953aSJohn Baldwin #endif 2908