1366f6083SPeter Grehan /*- 2c49761ddSPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3c49761ddSPedro F. Giffuni * 4366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc. 5366f6083SPeter Grehan * All rights reserved. 6366f6083SPeter Grehan * 7366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without 8366f6083SPeter Grehan * modification, are permitted provided that the following conditions 9366f6083SPeter Grehan * are met: 10366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright 11366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer. 12366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 13366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the 14366f6083SPeter Grehan * documentation and/or other materials provided with the distribution. 15366f6083SPeter Grehan * 16366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26366f6083SPeter Grehan * SUCH DAMAGE. 27366f6083SPeter Grehan * 28366f6083SPeter Grehan * $FreeBSD$ 29366f6083SPeter Grehan */ 30366f6083SPeter Grehan 31366f6083SPeter Grehan #include <sys/cdefs.h> 32366f6083SPeter Grehan __FBSDID("$FreeBSD$"); 33366f6083SPeter Grehan 34483d953aSJohn Baldwin #include "opt_bhyve_snapshot.h" 35483d953aSJohn Baldwin 36366f6083SPeter Grehan #include <sys/param.h> 3738f1b189SPeter Grehan #include <sys/systm.h> 38366f6083SPeter Grehan #include <sys/kernel.h> 39366f6083SPeter Grehan #include <sys/module.h> 40366f6083SPeter Grehan #include <sys/sysctl.h> 41366f6083SPeter Grehan #include <sys/malloc.h> 42366f6083SPeter Grehan #include <sys/pcpu.h> 43366f6083SPeter Grehan #include <sys/lock.h> 44366f6083SPeter Grehan #include <sys/mutex.h> 45366f6083SPeter Grehan #include <sys/proc.h> 46318224bbSNeel Natu #include <sys/rwlock.h> 47366f6083SPeter Grehan #include <sys/sched.h> 48366f6083SPeter Grehan #include <sys/smp.h> 4967b69e76SJohn Baldwin #include <sys/sx.h> 50483d953aSJohn Baldwin #include <sys/vnode.h> 51366f6083SPeter Grehan 52366f6083SPeter Grehan #include <vm/vm.h> 533c48106aSKonstantin Belousov #include <vm/vm_param.h> 543c48106aSKonstantin Belousov #include <vm/vm_extern.h> 55318224bbSNeel Natu #include <vm/vm_object.h> 56318224bbSNeel Natu #include <vm/vm_page.h> 57318224bbSNeel Natu #include <vm/pmap.h> 58318224bbSNeel Natu #include <vm/vm_map.h> 59483d953aSJohn Baldwin #include <vm/vm_pager.h> 60483d953aSJohn Baldwin #include <vm/vm_kern.h> 61483d953aSJohn Baldwin #include <vm/vnode_pager.h> 62483d953aSJohn Baldwin #include <vm/swap_pager.h> 63483d953aSJohn Baldwin #include <vm/uma.h> 64366f6083SPeter Grehan 6563e62d39SJohn Baldwin #include <machine/cpu.h> 66366f6083SPeter Grehan #include <machine/pcb.h> 6775dd3366SNeel Natu #include <machine/smp.h> 68bd50262fSKonstantin Belousov #include <machine/md_var.h> 691c052192SNeel Natu #include <x86/psl.h> 7034a6b2d6SJohn Baldwin #include <x86/apicreg.h> 7115add60dSPeter Grehan #include <x86/ifunc.h> 72366f6083SPeter Grehan 73366f6083SPeter Grehan #include <machine/vmm.h> 74565bbb86SNeel Natu #include <machine/vmm_dev.h> 75e813a873SNeel Natu #include <machine/vmm_instruction_emul.h> 76483d953aSJohn Baldwin #include <machine/vmm_snapshot.h> 77565bbb86SNeel Natu 78d17b5104SNeel Natu #include "vmm_ioport.h" 79318224bbSNeel Natu #include "vmm_ktr.h" 80b01c2033SNeel Natu #include "vmm_host.h" 81366f6083SPeter Grehan #include "vmm_mem.h" 82366f6083SPeter Grehan #include "vmm_util.h" 83762fd208STycho Nightingale #include "vatpic.h" 84e883c9bbSTycho Nightingale #include "vatpit.h" 8508e3ff32SNeel Natu #include "vhpet.h" 86565bbb86SNeel Natu #include "vioapic.h" 87366f6083SPeter Grehan #include "vlapic.h" 88160ef77aSNeel Natu #include "vpmtmr.h" 890dafa5cdSNeel Natu #include "vrtc.h" 90366f6083SPeter Grehan #include "vmm_stat.h" 91f76fc5d4SNeel Natu #include "vmm_lapic.h" 92366f6083SPeter Grehan 93366f6083SPeter Grehan #include "io/ppt.h" 94366f6083SPeter Grehan #include "io/iommu.h" 95366f6083SPeter Grehan 96366f6083SPeter Grehan struct vlapic; 97366f6083SPeter Grehan 985fcf252fSNeel Natu /* 995fcf252fSNeel Natu * Initialization: 1005fcf252fSNeel Natu * (a) allocated when vcpu is created 1015fcf252fSNeel Natu * (i) initialized when vcpu is created and when it is reinitialized 1025fcf252fSNeel Natu * (o) initialized the first time the vcpu is created 1035fcf252fSNeel Natu * (x) initialized before use 1045fcf252fSNeel Natu */ 105366f6083SPeter Grehan struct vcpu { 1065fcf252fSNeel Natu struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ 1075fcf252fSNeel Natu enum vcpu_state state; /* (o) vcpu state */ 108950af9ffSJohn Baldwin int vcpuid; /* (o) */ 1095fcf252fSNeel Natu int hostcpu; /* (o) vcpu's host cpu */ 110248e6799SNeel Natu int reqidle; /* (i) request vcpu to idle */ 111950af9ffSJohn Baldwin struct vm *vm; /* (o) */ 1121aa51504SJohn Baldwin void *cookie; /* (i) cpu-specific data */ 1135fcf252fSNeel Natu struct vlapic *vlapic; /* (i) APIC device model */ 1145fcf252fSNeel Natu enum x2apic_state x2apic_state; /* (i) APIC mode */ 115091d4532SNeel Natu uint64_t exitintinfo; /* (i) events pending at VM exit */ 1165fcf252fSNeel Natu int nmi_pending; /* (i) NMI pending */ 1175fcf252fSNeel Natu int extint_pending; /* (i) INTR pending */ 1185fcf252fSNeel Natu int exception_pending; /* (i) exception pending */ 119c9c75df4SNeel Natu int exc_vector; /* (x) exception collateral */ 120c9c75df4SNeel Natu int exc_errcode_valid; 121c9c75df4SNeel Natu uint32_t exc_errcode; 1225fcf252fSNeel Natu struct savefpu *guestfpu; /* (a,i) guest fpu state */ 1235fcf252fSNeel Natu uint64_t guest_xcr0; /* (i) guest %xcr0 register */ 1245fcf252fSNeel Natu void *stats; /* (a,i) statistics */ 1255fcf252fSNeel Natu struct vm_exit exitinfo; /* (x) exit reason and collateral */ 126d087a399SNeel Natu uint64_t nextrip; /* (x) next instruction to execute */ 127483d953aSJohn Baldwin uint64_t tsc_offset; /* (o) TSC offsetting */ 128366f6083SPeter Grehan }; 129366f6083SPeter Grehan 130f76fc5d4SNeel Natu #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 13108ebb360SJohn Baldwin #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 132f76fc5d4SNeel Natu #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 133f76fc5d4SNeel Natu #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 134318224bbSNeel Natu #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 13575dd3366SNeel Natu 136318224bbSNeel Natu struct mem_seg { 1379b1aa8d6SNeel Natu size_t len; 1389b1aa8d6SNeel Natu bool sysmem; 1399b1aa8d6SNeel Natu struct vm_object *object; 1409b1aa8d6SNeel Natu }; 141e47fe318SCorvin Köhne #define VM_MAX_MEMSEGS 4 1429b1aa8d6SNeel Natu 1439b1aa8d6SNeel Natu struct mem_map { 144318224bbSNeel Natu vm_paddr_t gpa; 145318224bbSNeel Natu size_t len; 1469b1aa8d6SNeel Natu vm_ooffset_t segoff; 1479b1aa8d6SNeel Natu int segid; 1489b1aa8d6SNeel Natu int prot; 1499b1aa8d6SNeel Natu int flags; 150318224bbSNeel Natu }; 15100d3723fSConrad Meyer #define VM_MAX_MEMMAPS 8 152366f6083SPeter Grehan 153366f6083SPeter Grehan /* 1545fcf252fSNeel Natu * Initialization: 1555fcf252fSNeel Natu * (o) initialized the first time the VM is created 1565fcf252fSNeel Natu * (i) initialized when VM is created and when it is reinitialized 1575fcf252fSNeel Natu * (x) initialized before use 15867b69e76SJohn Baldwin * 15967b69e76SJohn Baldwin * Locking: 16067b69e76SJohn Baldwin * [m] mem_segs_lock 16167b69e76SJohn Baldwin * [r] rendezvous_mtx 16267b69e76SJohn Baldwin * [v] reads require one frozen vcpu, writes require freezing all vcpus 163366f6083SPeter Grehan */ 1645fcf252fSNeel Natu struct vm { 1655fcf252fSNeel Natu void *cookie; /* (i) cpu-specific data */ 1665fcf252fSNeel Natu void *iommu; /* (x) iommu-specific data */ 1675fcf252fSNeel Natu struct vhpet *vhpet; /* (i) virtual HPET */ 1685fcf252fSNeel Natu struct vioapic *vioapic; /* (i) virtual ioapic */ 1695fcf252fSNeel Natu struct vatpic *vatpic; /* (i) virtual atpic */ 1705fcf252fSNeel Natu struct vatpit *vatpit; /* (i) virtual atpit */ 171160ef77aSNeel Natu struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ 1720dafa5cdSNeel Natu struct vrtc *vrtc; /* (o) virtual RTC */ 1735fcf252fSNeel Natu volatile cpuset_t active_cpus; /* (i) active vcpus */ 174fc276d92SJohn Baldwin volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 175c0f35dbfSJohn Baldwin cpuset_t startup_cpus; /* (i) [r] waiting for startup */ 1765fcf252fSNeel Natu int suspend; /* (i) stop VM execution */ 17798568a00SJohn Baldwin bool dying; /* (o) is dying */ 1785fcf252fSNeel Natu volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 1795fcf252fSNeel Natu volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 18067b69e76SJohn Baldwin cpuset_t rendezvous_req_cpus; /* (x) [r] rendezvous requested */ 18167b69e76SJohn Baldwin cpuset_t rendezvous_done_cpus; /* (x) [r] rendezvous finished */ 18267b69e76SJohn Baldwin void *rendezvous_arg; /* (x) [r] rendezvous func/arg */ 1835b8a8cd1SNeel Natu vm_rendezvous_func_t rendezvous_func; 1845fcf252fSNeel Natu struct mtx rendezvous_mtx; /* (o) rendezvous lock */ 18567b69e76SJohn Baldwin struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) [m+v] guest address space */ 18667b69e76SJohn Baldwin struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */ 1875fcf252fSNeel Natu struct vmspace *vmspace; /* (o) guest's address space */ 188df95cc76SKa Ho Ng char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ 189ee98f99dSJohn Baldwin struct vcpu **vcpu; /* (o) guest vcpus */ 19001d822d3SRodney W. Grimes /* The following describe the vm cpu topology */ 19101d822d3SRodney W. Grimes uint16_t sockets; /* (o) num of sockets */ 19201d822d3SRodney W. Grimes uint16_t cores; /* (o) num of cores/socket */ 19301d822d3SRodney W. Grimes uint16_t threads; /* (o) num of threads/core */ 19401d822d3SRodney W. Grimes uint16_t maxcpus; /* (o) max pluggable cpus */ 19567b69e76SJohn Baldwin struct sx mem_segs_lock; /* (o) */ 19698568a00SJohn Baldwin struct sx vcpus_init_lock; /* (o) */ 197366f6083SPeter Grehan }; 198366f6083SPeter Grehan 199950af9ffSJohn Baldwin #define VMM_CTR0(vcpu, format) \ 200950af9ffSJohn Baldwin VCPU_CTR0((vcpu)->vm, (vcpu)->vcpuid, format) 201950af9ffSJohn Baldwin 202950af9ffSJohn Baldwin #define VMM_CTR1(vcpu, format, p1) \ 203950af9ffSJohn Baldwin VCPU_CTR1((vcpu)->vm, (vcpu)->vcpuid, format, p1) 204950af9ffSJohn Baldwin 205950af9ffSJohn Baldwin #define VMM_CTR2(vcpu, format, p1, p2) \ 206950af9ffSJohn Baldwin VCPU_CTR2((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2) 207950af9ffSJohn Baldwin 208950af9ffSJohn Baldwin #define VMM_CTR3(vcpu, format, p1, p2, p3) \ 209950af9ffSJohn Baldwin VCPU_CTR3((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3) 210950af9ffSJohn Baldwin 211950af9ffSJohn Baldwin #define VMM_CTR4(vcpu, format, p1, p2, p3, p4) \ 212950af9ffSJohn Baldwin VCPU_CTR4((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3, p4) 213950af9ffSJohn Baldwin 214d5408b1dSNeel Natu static int vmm_initialized; 215d5408b1dSNeel Natu 21615add60dSPeter Grehan static void vmmops_panic(void); 217366f6083SPeter Grehan 21815add60dSPeter Grehan static void 21915add60dSPeter Grehan vmmops_panic(void) 22015add60dSPeter Grehan { 22115add60dSPeter Grehan panic("vmm_ops func called when !vmm_is_intel() && !vmm_is_svm()"); 22215add60dSPeter Grehan } 22315add60dSPeter Grehan 22415add60dSPeter Grehan #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ 22515add60dSPeter Grehan DEFINE_IFUNC(static, ret_type, vmmops_##opname, args) \ 22615add60dSPeter Grehan { \ 22715add60dSPeter Grehan if (vmm_is_intel()) \ 22815add60dSPeter Grehan return (vmm_ops_intel.opname); \ 22915add60dSPeter Grehan else if (vmm_is_svm()) \ 23015add60dSPeter Grehan return (vmm_ops_amd.opname); \ 23115add60dSPeter Grehan else \ 23215add60dSPeter Grehan return ((ret_type (*)args)vmmops_panic); \ 23315add60dSPeter Grehan } 23415add60dSPeter Grehan 23515add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum)) 23615add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) 23715add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, modresume, (void)) 23815add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) 239869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t rip, struct pmap *pmap, 240869c8d19SJohn Baldwin struct vm_eventinfo *info)) 24115add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) 242950af9ffSJohn Baldwin DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, 243950af9ffSJohn Baldwin int vcpu_id)) 244869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) 245869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) 246869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) 247869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, getdesc, (void *vcpui, int num, struct seg_desc *desc)) 248869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, setdesc, (void *vcpui, int num, struct seg_desc *desc)) 249869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) 250869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) 25115add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, 25215add60dSPeter Grehan vm_offset_t max)) 25315add60dSPeter Grehan DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) 254869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(struct vlapic *, vlapic_init, (void *vcpui)) 255869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(void, vlapic_cleanup, (struct vlapic *vlapic)) 256483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 257869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta)) 258869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui, 259869c8d19SJohn Baldwin struct vm_snapshot_meta *meta)) 260869c8d19SJohn Baldwin DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now)) 261483d953aSJohn Baldwin #endif 262366f6083SPeter Grehan 263014a52f3SNeel Natu #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 264014a52f3SNeel Natu #define fpu_stop_emulating() clts() 265366f6083SPeter Grehan 2666ac73777STycho Nightingale SDT_PROVIDER_DEFINE(vmm); 2676ac73777STycho Nightingale 268366f6083SPeter Grehan static MALLOC_DEFINE(M_VM, "vm", "vm"); 269366f6083SPeter Grehan 270366f6083SPeter Grehan /* statistics */ 27161592433SNeel Natu static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 272366f6083SPeter Grehan 273b40598c5SPawel Biernacki SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 274b40598c5SPawel Biernacki NULL); 275add611fdSNeel Natu 276055fc2cbSNeel Natu /* 277055fc2cbSNeel Natu * Halt the guest if all vcpus are executing a HLT instruction with 278055fc2cbSNeel Natu * interrupts disabled. 279055fc2cbSNeel Natu */ 280055fc2cbSNeel Natu static int halt_detection_enabled = 1; 281055fc2cbSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 282055fc2cbSNeel Natu &halt_detection_enabled, 0, 283055fc2cbSNeel Natu "Halt VM if all vcpus execute HLT with interrupts disabled"); 284055fc2cbSNeel Natu 285978f3da1SAndriy Gapon static int vmm_ipinum; 286add611fdSNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 287add611fdSNeel Natu "IPI vector used for vcpu notifications"); 288add611fdSNeel Natu 289b0538143SNeel Natu static int trace_guest_exceptions; 290b0538143SNeel Natu SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, 291b0538143SNeel Natu &trace_guest_exceptions, 0, 292b0538143SNeel Natu "Trap into hypervisor on all guest exceptions and reflect them back"); 293b0538143SNeel Natu 2943ba952e1SCorvin Köhne static int trap_wbinvd; 2953ba952e1SCorvin Köhne SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0, 2963ba952e1SCorvin Köhne "WBINVD triggers a VM-exit"); 2973ba952e1SCorvin Köhne 298ee98f99dSJohn Baldwin u_int vm_maxcpu; 299ee98f99dSJohn Baldwin SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 300ee98f99dSJohn Baldwin &vm_maxcpu, 0, "Maximum number of vCPUs"); 301ee98f99dSJohn Baldwin 3029b1aa8d6SNeel Natu static void vm_free_memmap(struct vm *vm, int ident); 3039b1aa8d6SNeel Natu static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 304248e6799SNeel Natu static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); 305248e6799SNeel Natu 306ee98f99dSJohn Baldwin /* 307ee98f99dSJohn Baldwin * Upper limit on vm_maxcpu. Limited by use of uint16_t types for CPU 308ee98f99dSJohn Baldwin * counts as well as range of vpid values for VT-x and by the capacity 309ee98f99dSJohn Baldwin * of cpuset_t masks. The call to new_unrhdr() in vpid_init() in 310ee98f99dSJohn Baldwin * vmx.c requires 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below. 311ee98f99dSJohn Baldwin */ 312ee98f99dSJohn Baldwin #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 313ee98f99dSJohn Baldwin 314248e6799SNeel Natu #ifdef KTR 315248e6799SNeel Natu static const char * 316248e6799SNeel Natu vcpu_state2str(enum vcpu_state state) 317248e6799SNeel Natu { 318248e6799SNeel Natu 319248e6799SNeel Natu switch (state) { 320248e6799SNeel Natu case VCPU_IDLE: 321248e6799SNeel Natu return ("idle"); 322248e6799SNeel Natu case VCPU_FROZEN: 323248e6799SNeel Natu return ("frozen"); 324248e6799SNeel Natu case VCPU_RUNNING: 325248e6799SNeel Natu return ("running"); 326248e6799SNeel Natu case VCPU_SLEEPING: 327248e6799SNeel Natu return ("sleeping"); 328248e6799SNeel Natu default: 329248e6799SNeel Natu return ("unknown"); 330248e6799SNeel Natu } 331248e6799SNeel Natu } 332248e6799SNeel Natu #endif 333248e6799SNeel Natu 334366f6083SPeter Grehan static void 33598568a00SJohn Baldwin vcpu_cleanup(struct vcpu *vcpu, bool destroy) 336366f6083SPeter Grehan { 337869c8d19SJohn Baldwin vmmops_vlapic_cleanup(vcpu->vlapic); 338869c8d19SJohn Baldwin vmmops_vcpu_cleanup(vcpu->cookie); 3391aa51504SJohn Baldwin vcpu->cookie = NULL; 3405fcf252fSNeel Natu if (destroy) { 341366f6083SPeter Grehan vmm_stat_free(vcpu->stats); 34238f1b189SPeter Grehan fpu_save_area_free(vcpu->guestfpu); 34308ebb360SJohn Baldwin vcpu_lock_destroy(vcpu); 344af3b48e1SJohn Baldwin free(vcpu, M_VM); 345366f6083SPeter Grehan } 3465fcf252fSNeel Natu } 347366f6083SPeter Grehan 34898568a00SJohn Baldwin static struct vcpu * 34998568a00SJohn Baldwin vcpu_alloc(struct vm *vm, int vcpu_id) 350366f6083SPeter Grehan { 351366f6083SPeter Grehan struct vcpu *vcpu; 352366f6083SPeter Grehan 353a488c9c9SRodney W. Grimes KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 3545fcf252fSNeel Natu ("vcpu_init: invalid vcpu %d", vcpu_id)); 3555fcf252fSNeel Natu 35698568a00SJohn Baldwin vcpu = malloc(sizeof(*vcpu), M_VM, M_WAITOK | M_ZERO); 35775dd3366SNeel Natu vcpu_lock_init(vcpu); 3585fcf252fSNeel Natu vcpu->state = VCPU_IDLE; 35975dd3366SNeel Natu vcpu->hostcpu = NOCPU; 360950af9ffSJohn Baldwin vcpu->vcpuid = vcpu_id; 361950af9ffSJohn Baldwin vcpu->vm = vm; 3625fcf252fSNeel Natu vcpu->guestfpu = fpu_save_area_alloc(); 3635fcf252fSNeel Natu vcpu->stats = vmm_stat_alloc(); 364483d953aSJohn Baldwin vcpu->tsc_offset = 0; 36598568a00SJohn Baldwin return (vcpu); 3665fcf252fSNeel Natu } 3675fcf252fSNeel Natu 36898568a00SJohn Baldwin static void 36998568a00SJohn Baldwin vcpu_init(struct vcpu *vcpu) 37098568a00SJohn Baldwin { 37198568a00SJohn Baldwin vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 372869c8d19SJohn Baldwin vcpu->vlapic = vmmops_vlapic_init(vcpu->cookie); 3733f0f4b15SJohn Baldwin vm_set_x2apic_state(vcpu, X2APIC_DISABLED); 374248e6799SNeel Natu vcpu->reqidle = 0; 375091d4532SNeel Natu vcpu->exitintinfo = 0; 3765fcf252fSNeel Natu vcpu->nmi_pending = 0; 3775fcf252fSNeel Natu vcpu->extint_pending = 0; 3785fcf252fSNeel Natu vcpu->exception_pending = 0; 379abb023fbSJohn Baldwin vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 38038f1b189SPeter Grehan fpu_save_area_reset(vcpu->guestfpu); 3815fcf252fSNeel Natu vmm_stat_init(vcpu->stats); 382366f6083SPeter Grehan } 383366f6083SPeter Grehan 384b0538143SNeel Natu int 38580cb5d84SJohn Baldwin vcpu_trace_exceptions(struct vcpu *vcpu) 386b0538143SNeel Natu { 387b0538143SNeel Natu 388b0538143SNeel Natu return (trace_guest_exceptions); 389b0538143SNeel Natu } 390b0538143SNeel Natu 3913ba952e1SCorvin Köhne int 39280cb5d84SJohn Baldwin vcpu_trap_wbinvd(struct vcpu *vcpu) 3933ba952e1SCorvin Köhne { 3943ba952e1SCorvin Köhne return (trap_wbinvd); 3953ba952e1SCorvin Köhne } 3963ba952e1SCorvin Köhne 39798ed632cSNeel Natu struct vm_exit * 39880cb5d84SJohn Baldwin vm_exitinfo(struct vcpu *vcpu) 39998ed632cSNeel Natu { 40098ed632cSNeel Natu return (&vcpu->exitinfo); 40198ed632cSNeel Natu } 40298ed632cSNeel Natu 403366f6083SPeter Grehan static int 404366f6083SPeter Grehan vmm_init(void) 405366f6083SPeter Grehan { 406366f6083SPeter Grehan int error; 407366f6083SPeter Grehan 40815add60dSPeter Grehan if (!vmm_is_hw_supported()) 40915add60dSPeter Grehan return (ENXIO); 41015add60dSPeter Grehan 411ee98f99dSJohn Baldwin vm_maxcpu = mp_ncpus; 412ee98f99dSJohn Baldwin TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 413ee98f99dSJohn Baldwin 414ee98f99dSJohn Baldwin if (vm_maxcpu > VM_MAXCPU) { 415ee98f99dSJohn Baldwin printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 416ee98f99dSJohn Baldwin vm_maxcpu = VM_MAXCPU; 417ee98f99dSJohn Baldwin } 418ee98f99dSJohn Baldwin if (vm_maxcpu == 0) 419ee98f99dSJohn Baldwin vm_maxcpu = 1; 420ee98f99dSJohn Baldwin 421b01c2033SNeel Natu vmm_host_state_init(); 422add611fdSNeel Natu 423bd50262fSKonstantin Belousov vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) : 424bd50262fSKonstantin Belousov &IDTVEC(justreturn)); 42518a2b08eSNeel Natu if (vmm_ipinum < 0) 426add611fdSNeel Natu vmm_ipinum = IPI_AST; 427366f6083SPeter Grehan 428366f6083SPeter Grehan error = vmm_mem_init(); 429366f6083SPeter Grehan if (error) 430366f6083SPeter Grehan return (error); 431366f6083SPeter Grehan 43215add60dSPeter Grehan vmm_resume_p = vmmops_modresume; 433366f6083SPeter Grehan 43415add60dSPeter Grehan return (vmmops_modinit(vmm_ipinum)); 435366f6083SPeter Grehan } 436366f6083SPeter Grehan 437366f6083SPeter Grehan static int 438366f6083SPeter Grehan vmm_handler(module_t mod, int what, void *arg) 439366f6083SPeter Grehan { 440366f6083SPeter Grehan int error; 441366f6083SPeter Grehan 442366f6083SPeter Grehan switch (what) { 443366f6083SPeter Grehan case MOD_LOAD: 44415add60dSPeter Grehan if (vmm_is_hw_supported()) { 445366f6083SPeter Grehan vmmdev_init(); 446366f6083SPeter Grehan error = vmm_init(); 447d5408b1dSNeel Natu if (error == 0) 448d5408b1dSNeel Natu vmm_initialized = 1; 44915add60dSPeter Grehan } else { 45015add60dSPeter Grehan error = ENXIO; 45115add60dSPeter Grehan } 452366f6083SPeter Grehan break; 453366f6083SPeter Grehan case MOD_UNLOAD: 45415add60dSPeter Grehan if (vmm_is_hw_supported()) { 455cdc5b9e7SNeel Natu error = vmmdev_cleanup(); 456cdc5b9e7SNeel Natu if (error == 0) { 45763e62d39SJohn Baldwin vmm_resume_p = NULL; 458366f6083SPeter Grehan iommu_cleanup(); 459add611fdSNeel Natu if (vmm_ipinum != IPI_AST) 46018a2b08eSNeel Natu lapic_ipi_free(vmm_ipinum); 46115add60dSPeter Grehan error = vmmops_modcleanup(); 46281ef6611SPeter Grehan /* 46381ef6611SPeter Grehan * Something bad happened - prevent new 46481ef6611SPeter Grehan * VMs from being created 46581ef6611SPeter Grehan */ 46681ef6611SPeter Grehan if (error) 467d5408b1dSNeel Natu vmm_initialized = 0; 46881ef6611SPeter Grehan } 46915add60dSPeter Grehan } else { 47015add60dSPeter Grehan error = 0; 47115add60dSPeter Grehan } 472366f6083SPeter Grehan break; 473366f6083SPeter Grehan default: 474366f6083SPeter Grehan error = 0; 475366f6083SPeter Grehan break; 476366f6083SPeter Grehan } 477366f6083SPeter Grehan return (error); 478366f6083SPeter Grehan } 479366f6083SPeter Grehan 480366f6083SPeter Grehan static moduledata_t vmm_kmod = { 481366f6083SPeter Grehan "vmm", 482366f6083SPeter Grehan vmm_handler, 483366f6083SPeter Grehan NULL 484366f6083SPeter Grehan }; 485366f6083SPeter Grehan 486366f6083SPeter Grehan /* 487e3f0800bSNeel Natu * vmm initialization has the following dependencies: 488e3f0800bSNeel Natu * 489e3f0800bSNeel Natu * - VT-x initialization requires smp_rendezvous() and therefore must happen 490e3f0800bSNeel Natu * after SMP is fully functional (after SI_SUB_SMP). 491366f6083SPeter Grehan */ 492e3f0800bSNeel Natu DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 493366f6083SPeter Grehan MODULE_VERSION(vmm, 1); 494366f6083SPeter Grehan 4955fcf252fSNeel Natu static void 4965fcf252fSNeel Natu vm_init(struct vm *vm, bool create) 4975fcf252fSNeel Natu { 49815add60dSPeter Grehan vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 4995fcf252fSNeel Natu vm->iommu = NULL; 5005fcf252fSNeel Natu vm->vioapic = vioapic_init(vm); 5015fcf252fSNeel Natu vm->vhpet = vhpet_init(vm); 5025fcf252fSNeel Natu vm->vatpic = vatpic_init(vm); 5035fcf252fSNeel Natu vm->vatpit = vatpit_init(vm); 504160ef77aSNeel Natu vm->vpmtmr = vpmtmr_init(vm); 5050dafa5cdSNeel Natu if (create) 5060dafa5cdSNeel Natu vm->vrtc = vrtc_init(vm); 5075fcf252fSNeel Natu 5085fcf252fSNeel Natu CPU_ZERO(&vm->active_cpus); 509fc276d92SJohn Baldwin CPU_ZERO(&vm->debug_cpus); 510c0f35dbfSJohn Baldwin CPU_ZERO(&vm->startup_cpus); 5115fcf252fSNeel Natu 5125fcf252fSNeel Natu vm->suspend = 0; 5135fcf252fSNeel Natu CPU_ZERO(&vm->suspended_cpus); 5145fcf252fSNeel Natu 51598568a00SJohn Baldwin if (!create) { 51698568a00SJohn Baldwin for (int i = 0; i < vm->maxcpus; i++) { 51798568a00SJohn Baldwin if (vm->vcpu[i] != NULL) 51898568a00SJohn Baldwin vcpu_init(vm->vcpu[i]); 51998568a00SJohn Baldwin } 52098568a00SJohn Baldwin } 52198568a00SJohn Baldwin } 52298568a00SJohn Baldwin 52398568a00SJohn Baldwin void 52498568a00SJohn Baldwin vm_disable_vcpu_creation(struct vm *vm) 52598568a00SJohn Baldwin { 52698568a00SJohn Baldwin sx_xlock(&vm->vcpus_init_lock); 52798568a00SJohn Baldwin vm->dying = true; 52898568a00SJohn Baldwin sx_xunlock(&vm->vcpus_init_lock); 52998568a00SJohn Baldwin } 53098568a00SJohn Baldwin 53198568a00SJohn Baldwin struct vcpu * 53298568a00SJohn Baldwin vm_alloc_vcpu(struct vm *vm, int vcpuid) 53398568a00SJohn Baldwin { 53498568a00SJohn Baldwin struct vcpu *vcpu; 53598568a00SJohn Baldwin 53698568a00SJohn Baldwin if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 53798568a00SJohn Baldwin return (NULL); 53898568a00SJohn Baldwin 53998568a00SJohn Baldwin vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); 54098568a00SJohn Baldwin if (__predict_true(vcpu != NULL)) 54198568a00SJohn Baldwin return (vcpu); 54298568a00SJohn Baldwin 54398568a00SJohn Baldwin sx_xlock(&vm->vcpus_init_lock); 54498568a00SJohn Baldwin vcpu = vm->vcpu[vcpuid]; 54598568a00SJohn Baldwin if (vcpu == NULL && !vm->dying) { 54698568a00SJohn Baldwin vcpu = vcpu_alloc(vm, vcpuid); 54798568a00SJohn Baldwin vcpu_init(vcpu); 54898568a00SJohn Baldwin 54998568a00SJohn Baldwin /* 55098568a00SJohn Baldwin * Ensure vCPU is fully created before updating pointer 55198568a00SJohn Baldwin * to permit unlocked reads above. 55298568a00SJohn Baldwin */ 55398568a00SJohn Baldwin atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 55498568a00SJohn Baldwin (uintptr_t)vcpu); 55598568a00SJohn Baldwin } 55698568a00SJohn Baldwin sx_xunlock(&vm->vcpus_init_lock); 55798568a00SJohn Baldwin return (vcpu); 55898568a00SJohn Baldwin } 55998568a00SJohn Baldwin 56098568a00SJohn Baldwin void 56198568a00SJohn Baldwin vm_slock_vcpus(struct vm *vm) 56298568a00SJohn Baldwin { 56398568a00SJohn Baldwin sx_slock(&vm->vcpus_init_lock); 56498568a00SJohn Baldwin } 56598568a00SJohn Baldwin 56698568a00SJohn Baldwin void 56798568a00SJohn Baldwin vm_unlock_vcpus(struct vm *vm) 56898568a00SJohn Baldwin { 56998568a00SJohn Baldwin sx_unlock(&vm->vcpus_init_lock); 5705fcf252fSNeel Natu } 5715fcf252fSNeel Natu 57201d822d3SRodney W. Grimes /* 57301d822d3SRodney W. Grimes * The default CPU topology is a single thread per package. 57401d822d3SRodney W. Grimes */ 57501d822d3SRodney W. Grimes u_int cores_per_package = 1; 57601d822d3SRodney W. Grimes u_int threads_per_core = 1; 57701d822d3SRodney W. Grimes 578d5408b1dSNeel Natu int 579d5408b1dSNeel Natu vm_create(const char *name, struct vm **retvm) 580366f6083SPeter Grehan { 581366f6083SPeter Grehan struct vm *vm; 582318224bbSNeel Natu struct vmspace *vmspace; 583366f6083SPeter Grehan 584d5408b1dSNeel Natu /* 585d5408b1dSNeel Natu * If vmm.ko could not be successfully initialized then don't attempt 586d5408b1dSNeel Natu * to create the virtual machine. 587d5408b1dSNeel Natu */ 588d5408b1dSNeel Natu if (!vmm_initialized) 589d5408b1dSNeel Natu return (ENXIO); 590d5408b1dSNeel Natu 591df95cc76SKa Ho Ng if (name == NULL || strnlen(name, VM_MAX_NAMELEN + 1) == 592df95cc76SKa Ho Ng VM_MAX_NAMELEN + 1) 593d5408b1dSNeel Natu return (EINVAL); 594366f6083SPeter Grehan 5953c48106aSKonstantin Belousov vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48); 596318224bbSNeel Natu if (vmspace == NULL) 597318224bbSNeel Natu return (ENOMEM); 598318224bbSNeel Natu 599366f6083SPeter Grehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 600366f6083SPeter Grehan strcpy(vm->name, name); 60188c4b8d1SNeel Natu vm->vmspace = vmspace; 6025b8a8cd1SNeel Natu mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 60367b69e76SJohn Baldwin sx_init(&vm->mem_segs_lock, "vm mem_segs"); 60498568a00SJohn Baldwin sx_init(&vm->vcpus_init_lock, "vm vcpus"); 605ee98f99dSJohn Baldwin vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK | 606ee98f99dSJohn Baldwin M_ZERO); 607366f6083SPeter Grehan 60801d822d3SRodney W. Grimes vm->sockets = 1; 60901d822d3SRodney W. Grimes vm->cores = cores_per_package; /* XXX backwards compatibility */ 61001d822d3SRodney W. Grimes vm->threads = threads_per_core; /* XXX backwards compatibility */ 611ee98f99dSJohn Baldwin vm->maxcpus = vm_maxcpu; 61201d822d3SRodney W. Grimes 6135fcf252fSNeel Natu vm_init(vm, true); 614366f6083SPeter Grehan 615d5408b1dSNeel Natu *retvm = vm; 616d5408b1dSNeel Natu return (0); 617366f6083SPeter Grehan } 618366f6083SPeter Grehan 61901d822d3SRodney W. Grimes void 62001d822d3SRodney W. Grimes vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 62101d822d3SRodney W. Grimes uint16_t *threads, uint16_t *maxcpus) 62201d822d3SRodney W. Grimes { 62301d822d3SRodney W. Grimes *sockets = vm->sockets; 62401d822d3SRodney W. Grimes *cores = vm->cores; 62501d822d3SRodney W. Grimes *threads = vm->threads; 62601d822d3SRodney W. Grimes *maxcpus = vm->maxcpus; 62701d822d3SRodney W. Grimes } 62801d822d3SRodney W. Grimes 629a488c9c9SRodney W. Grimes uint16_t 630a488c9c9SRodney W. Grimes vm_get_maxcpus(struct vm *vm) 631a488c9c9SRodney W. Grimes { 632a488c9c9SRodney W. Grimes return (vm->maxcpus); 633a488c9c9SRodney W. Grimes } 634a488c9c9SRodney W. Grimes 63501d822d3SRodney W. Grimes int 63601d822d3SRodney W. Grimes vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 63798568a00SJohn Baldwin uint16_t threads, uint16_t maxcpus __unused) 63801d822d3SRodney W. Grimes { 63998568a00SJohn Baldwin /* Ignore maxcpus. */ 640a488c9c9SRodney W. Grimes if ((sockets * cores * threads) > vm->maxcpus) 64101d822d3SRodney W. Grimes return (EINVAL); 64201d822d3SRodney W. Grimes vm->sockets = sockets; 64301d822d3SRodney W. Grimes vm->cores = cores; 64401d822d3SRodney W. Grimes vm->threads = threads; 64501d822d3SRodney W. Grimes return(0); 64601d822d3SRodney W. Grimes } 64701d822d3SRodney W. Grimes 648f7d51510SNeel Natu static void 6495fcf252fSNeel Natu vm_cleanup(struct vm *vm, bool destroy) 650366f6083SPeter Grehan { 6519b1aa8d6SNeel Natu struct mem_map *mm; 652366f6083SPeter Grehan int i; 653366f6083SPeter Grehan 654c668e817SRobert Wing if (destroy) 655c668e817SRobert Wing vm_xlock_memsegs(vm); 656c668e817SRobert Wing 657366f6083SPeter Grehan ppt_unassign_all(vm); 658366f6083SPeter Grehan 659318224bbSNeel Natu if (vm->iommu != NULL) 660318224bbSNeel Natu iommu_destroy_domain(vm->iommu); 661318224bbSNeel Natu 6620dafa5cdSNeel Natu if (destroy) 6630dafa5cdSNeel Natu vrtc_cleanup(vm->vrtc); 6640dafa5cdSNeel Natu else 6650dafa5cdSNeel Natu vrtc_reset(vm->vrtc); 666160ef77aSNeel Natu vpmtmr_cleanup(vm->vpmtmr); 667e883c9bbSTycho Nightingale vatpit_cleanup(vm->vatpit); 66808e3ff32SNeel Natu vhpet_cleanup(vm->vhpet); 669762fd208STycho Nightingale vatpic_cleanup(vm->vatpic); 67008e3ff32SNeel Natu vioapic_cleanup(vm->vioapic); 67108e3ff32SNeel Natu 67298568a00SJohn Baldwin for (i = 0; i < vm->maxcpus; i++) { 67398568a00SJohn Baldwin if (vm->vcpu[i] != NULL) 67498568a00SJohn Baldwin vcpu_cleanup(vm->vcpu[i], destroy); 67598568a00SJohn Baldwin } 6765fcf252fSNeel Natu 67715add60dSPeter Grehan vmmops_cleanup(vm->cookie); 6785fcf252fSNeel Natu 6799b1aa8d6SNeel Natu /* 6809b1aa8d6SNeel Natu * System memory is removed from the guest address space only when 6819b1aa8d6SNeel Natu * the VM is destroyed. This is because the mapping remains the same 6829b1aa8d6SNeel Natu * across VM reset. 6839b1aa8d6SNeel Natu * 6849b1aa8d6SNeel Natu * Device memory can be relocated by the guest (e.g. using PCI BARs) 6859b1aa8d6SNeel Natu * so those mappings are removed on a VM reset. 6869b1aa8d6SNeel Natu */ 6879b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 6889b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 6899b1aa8d6SNeel Natu if (destroy || !sysmem_mapping(vm, mm)) 6909b1aa8d6SNeel Natu vm_free_memmap(vm, i); 6919b1aa8d6SNeel Natu } 692f7d51510SNeel Natu 6939b1aa8d6SNeel Natu if (destroy) { 6949b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMSEGS; i++) 6959b1aa8d6SNeel Natu vm_free_memseg(vm, i); 696c668e817SRobert Wing vm_unlock_memsegs(vm); 697366f6083SPeter Grehan 69815add60dSPeter Grehan vmmops_vmspace_free(vm->vmspace); 6995fcf252fSNeel Natu vm->vmspace = NULL; 70008ebb360SJohn Baldwin 701ee98f99dSJohn Baldwin free(vm->vcpu, M_VM); 70298568a00SJohn Baldwin sx_destroy(&vm->vcpus_init_lock); 70367b69e76SJohn Baldwin sx_destroy(&vm->mem_segs_lock); 70408ebb360SJohn Baldwin mtx_destroy(&vm->rendezvous_mtx); 7055fcf252fSNeel Natu } 7065fcf252fSNeel Natu } 707366f6083SPeter Grehan 7085fcf252fSNeel Natu void 7095fcf252fSNeel Natu vm_destroy(struct vm *vm) 7105fcf252fSNeel Natu { 7115fcf252fSNeel Natu vm_cleanup(vm, true); 712366f6083SPeter Grehan free(vm, M_VM); 713366f6083SPeter Grehan } 714366f6083SPeter Grehan 7155fcf252fSNeel Natu int 7165fcf252fSNeel Natu vm_reinit(struct vm *vm) 7175fcf252fSNeel Natu { 7185fcf252fSNeel Natu int error; 7195fcf252fSNeel Natu 7205fcf252fSNeel Natu /* 7215fcf252fSNeel Natu * A virtual machine can be reset only if all vcpus are suspended. 7225fcf252fSNeel Natu */ 7235fcf252fSNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 7245fcf252fSNeel Natu vm_cleanup(vm, false); 7255fcf252fSNeel Natu vm_init(vm, false); 7265fcf252fSNeel Natu error = 0; 7275fcf252fSNeel Natu } else { 7285fcf252fSNeel Natu error = EBUSY; 7295fcf252fSNeel Natu } 7305fcf252fSNeel Natu 7315fcf252fSNeel Natu return (error); 7325fcf252fSNeel Natu } 7335fcf252fSNeel Natu 734366f6083SPeter Grehan const char * 735366f6083SPeter Grehan vm_name(struct vm *vm) 736366f6083SPeter Grehan { 737366f6083SPeter Grehan return (vm->name); 738366f6083SPeter Grehan } 739366f6083SPeter Grehan 74067b69e76SJohn Baldwin void 74167b69e76SJohn Baldwin vm_slock_memsegs(struct vm *vm) 74267b69e76SJohn Baldwin { 74367b69e76SJohn Baldwin sx_slock(&vm->mem_segs_lock); 74467b69e76SJohn Baldwin } 74567b69e76SJohn Baldwin 74667b69e76SJohn Baldwin void 74767b69e76SJohn Baldwin vm_xlock_memsegs(struct vm *vm) 74867b69e76SJohn Baldwin { 74967b69e76SJohn Baldwin sx_xlock(&vm->mem_segs_lock); 75067b69e76SJohn Baldwin } 75167b69e76SJohn Baldwin 75267b69e76SJohn Baldwin void 75367b69e76SJohn Baldwin vm_unlock_memsegs(struct vm *vm) 75467b69e76SJohn Baldwin { 75567b69e76SJohn Baldwin sx_unlock(&vm->mem_segs_lock); 75667b69e76SJohn Baldwin } 75767b69e76SJohn Baldwin 758366f6083SPeter Grehan int 759366f6083SPeter Grehan vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 760366f6083SPeter Grehan { 761318224bbSNeel Natu vm_object_t obj; 762366f6083SPeter Grehan 763318224bbSNeel Natu if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 764318224bbSNeel Natu return (ENOMEM); 765318224bbSNeel Natu else 766318224bbSNeel Natu return (0); 767366f6083SPeter Grehan } 768366f6083SPeter Grehan 769366f6083SPeter Grehan int 770366f6083SPeter Grehan vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 771366f6083SPeter Grehan { 772366f6083SPeter Grehan 773318224bbSNeel Natu vmm_mmio_free(vm->vmspace, gpa, len); 774318224bbSNeel Natu return (0); 775366f6083SPeter Grehan } 776366f6083SPeter Grehan 7779b1aa8d6SNeel Natu /* 7789b1aa8d6SNeel Natu * Return 'true' if 'gpa' is allocated in the guest address space. 7799b1aa8d6SNeel Natu * 7809b1aa8d6SNeel Natu * This function is called in the context of a running vcpu which acts as 7819b1aa8d6SNeel Natu * an implicit lock on 'vm->mem_maps[]'. 7829b1aa8d6SNeel Natu */ 7839b1aa8d6SNeel Natu bool 78480cb5d84SJohn Baldwin vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 785366f6083SPeter Grehan { 78680cb5d84SJohn Baldwin struct vm *vm = vcpu->vm; 7879b1aa8d6SNeel Natu struct mem_map *mm; 788341f19c9SNeel Natu int i; 789341f19c9SNeel Natu 7909b1aa8d6SNeel Natu #ifdef INVARIANTS 7919b1aa8d6SNeel Natu int hostcpu, state; 79280cb5d84SJohn Baldwin state = vcpu_get_state(vcpu, &hostcpu); 7939b1aa8d6SNeel Natu KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 7949b1aa8d6SNeel Natu ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 7959b1aa8d6SNeel Natu #endif 7969b1aa8d6SNeel Natu 7979b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 7989b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 7999b1aa8d6SNeel Natu if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 8009b1aa8d6SNeel Natu return (true); /* 'gpa' is sysmem or devmem */ 801341f19c9SNeel Natu } 802341f19c9SNeel Natu 803318224bbSNeel Natu if (ppt_is_mmio(vm, gpa)) 8049b1aa8d6SNeel Natu return (true); /* 'gpa' is pci passthru mmio */ 805318224bbSNeel Natu 8069b1aa8d6SNeel Natu return (false); 807341f19c9SNeel Natu } 808341f19c9SNeel Natu 809341f19c9SNeel Natu int 8109b1aa8d6SNeel Natu vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 811341f19c9SNeel Natu { 812318224bbSNeel Natu struct mem_seg *seg; 8139b1aa8d6SNeel Natu vm_object_t obj; 814366f6083SPeter Grehan 81567b69e76SJohn Baldwin sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 81667b69e76SJohn Baldwin 8179b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 818341f19c9SNeel Natu return (EINVAL); 819341f19c9SNeel Natu 8209b1aa8d6SNeel Natu if (len == 0 || (len & PAGE_MASK)) 8219b1aa8d6SNeel Natu return (EINVAL); 822341f19c9SNeel Natu 8239b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 8249b1aa8d6SNeel Natu if (seg->object != NULL) { 8259b1aa8d6SNeel Natu if (seg->len == len && seg->sysmem == sysmem) 8269b1aa8d6SNeel Natu return (EEXIST); 8279b1aa8d6SNeel Natu else 8289b1aa8d6SNeel Natu return (EINVAL); 829341f19c9SNeel Natu } 830341f19c9SNeel Natu 8316b389740SMark Johnston obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT); 8329b1aa8d6SNeel Natu if (obj == NULL) 833318224bbSNeel Natu return (ENOMEM); 834318224bbSNeel Natu 835318224bbSNeel Natu seg->len = len; 8369b1aa8d6SNeel Natu seg->object = obj; 8379b1aa8d6SNeel Natu seg->sysmem = sysmem; 838366f6083SPeter Grehan return (0); 839366f6083SPeter Grehan } 840366f6083SPeter Grehan 8419b1aa8d6SNeel Natu int 8429b1aa8d6SNeel Natu vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 8439b1aa8d6SNeel Natu vm_object_t *objptr) 844477867a0SNeel Natu { 8459b1aa8d6SNeel Natu struct mem_seg *seg; 846477867a0SNeel Natu 84767b69e76SJohn Baldwin sx_assert(&vm->mem_segs_lock, SX_LOCKED); 84867b69e76SJohn Baldwin 8499b1aa8d6SNeel Natu if (ident < 0 || ident >= VM_MAX_MEMSEGS) 8509b1aa8d6SNeel Natu return (EINVAL); 8519b1aa8d6SNeel Natu 8529b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 8539b1aa8d6SNeel Natu if (len) 8549b1aa8d6SNeel Natu *len = seg->len; 8559b1aa8d6SNeel Natu if (sysmem) 8569b1aa8d6SNeel Natu *sysmem = seg->sysmem; 8579b1aa8d6SNeel Natu if (objptr) 8589b1aa8d6SNeel Natu *objptr = seg->object; 8599b1aa8d6SNeel Natu return (0); 860477867a0SNeel Natu } 8619b1aa8d6SNeel Natu 8629b1aa8d6SNeel Natu void 8639b1aa8d6SNeel Natu vm_free_memseg(struct vm *vm, int ident) 8649b1aa8d6SNeel Natu { 8659b1aa8d6SNeel Natu struct mem_seg *seg; 8669b1aa8d6SNeel Natu 8679b1aa8d6SNeel Natu KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 8689b1aa8d6SNeel Natu ("%s: invalid memseg ident %d", __func__, ident)); 8699b1aa8d6SNeel Natu 8709b1aa8d6SNeel Natu seg = &vm->mem_segs[ident]; 8719b1aa8d6SNeel Natu if (seg->object != NULL) { 8729b1aa8d6SNeel Natu vm_object_deallocate(seg->object); 8739b1aa8d6SNeel Natu bzero(seg, sizeof(struct mem_seg)); 8749b1aa8d6SNeel Natu } 8759b1aa8d6SNeel Natu } 8769b1aa8d6SNeel Natu 8779b1aa8d6SNeel Natu int 8789b1aa8d6SNeel Natu vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 8799b1aa8d6SNeel Natu size_t len, int prot, int flags) 8809b1aa8d6SNeel Natu { 8819b1aa8d6SNeel Natu struct mem_seg *seg; 8829b1aa8d6SNeel Natu struct mem_map *m, *map; 8839b1aa8d6SNeel Natu vm_ooffset_t last; 8849b1aa8d6SNeel Natu int i, error; 8859b1aa8d6SNeel Natu 8869b1aa8d6SNeel Natu if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 8879b1aa8d6SNeel Natu return (EINVAL); 8889b1aa8d6SNeel Natu 8899b1aa8d6SNeel Natu if (flags & ~VM_MEMMAP_F_WIRED) 8909b1aa8d6SNeel Natu return (EINVAL); 8919b1aa8d6SNeel Natu 8929b1aa8d6SNeel Natu if (segid < 0 || segid >= VM_MAX_MEMSEGS) 8939b1aa8d6SNeel Natu return (EINVAL); 8949b1aa8d6SNeel Natu 8959b1aa8d6SNeel Natu seg = &vm->mem_segs[segid]; 8969b1aa8d6SNeel Natu if (seg->object == NULL) 8979b1aa8d6SNeel Natu return (EINVAL); 8989b1aa8d6SNeel Natu 8999b1aa8d6SNeel Natu last = first + len; 9009b1aa8d6SNeel Natu if (first < 0 || first >= last || last > seg->len) 9019b1aa8d6SNeel Natu return (EINVAL); 9029b1aa8d6SNeel Natu 9039b1aa8d6SNeel Natu if ((gpa | first | last) & PAGE_MASK) 9049b1aa8d6SNeel Natu return (EINVAL); 9059b1aa8d6SNeel Natu 9069b1aa8d6SNeel Natu map = NULL; 9079b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 9089b1aa8d6SNeel Natu m = &vm->mem_maps[i]; 9099b1aa8d6SNeel Natu if (m->len == 0) { 9109b1aa8d6SNeel Natu map = m; 9119b1aa8d6SNeel Natu break; 9129b1aa8d6SNeel Natu } 9139b1aa8d6SNeel Natu } 9149b1aa8d6SNeel Natu 9159b1aa8d6SNeel Natu if (map == NULL) 9169b1aa8d6SNeel Natu return (ENOSPC); 9179b1aa8d6SNeel Natu 9189b1aa8d6SNeel Natu error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 9199b1aa8d6SNeel Natu len, 0, VMFS_NO_SPACE, prot, prot, 0); 9209b1aa8d6SNeel Natu if (error != KERN_SUCCESS) 9219b1aa8d6SNeel Natu return (EFAULT); 9229b1aa8d6SNeel Natu 9239b1aa8d6SNeel Natu vm_object_reference(seg->object); 9249b1aa8d6SNeel Natu 9259b1aa8d6SNeel Natu if (flags & VM_MEMMAP_F_WIRED) { 9269b1aa8d6SNeel Natu error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 9279b1aa8d6SNeel Natu VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 9289b1aa8d6SNeel Natu if (error != KERN_SUCCESS) { 9299b1aa8d6SNeel Natu vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 93054a3a114SMark Johnston return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 93154a3a114SMark Johnston EFAULT); 9329b1aa8d6SNeel Natu } 9339b1aa8d6SNeel Natu } 9349b1aa8d6SNeel Natu 9359b1aa8d6SNeel Natu map->gpa = gpa; 9369b1aa8d6SNeel Natu map->len = len; 9379b1aa8d6SNeel Natu map->segoff = first; 9389b1aa8d6SNeel Natu map->segid = segid; 9399b1aa8d6SNeel Natu map->prot = prot; 9409b1aa8d6SNeel Natu map->flags = flags; 9419b1aa8d6SNeel Natu return (0); 9429b1aa8d6SNeel Natu } 9439b1aa8d6SNeel Natu 9449b1aa8d6SNeel Natu int 945f8a6ec2dSD Scott Phillips vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 946f8a6ec2dSD Scott Phillips { 947f8a6ec2dSD Scott Phillips struct mem_map *m; 948f8a6ec2dSD Scott Phillips int i; 949f8a6ec2dSD Scott Phillips 950f8a6ec2dSD Scott Phillips for (i = 0; i < VM_MAX_MEMMAPS; i++) { 951f8a6ec2dSD Scott Phillips m = &vm->mem_maps[i]; 952f8a6ec2dSD Scott Phillips if (m->gpa == gpa && m->len == len && 953f8a6ec2dSD Scott Phillips (m->flags & VM_MEMMAP_F_IOMMU) == 0) { 954f8a6ec2dSD Scott Phillips vm_free_memmap(vm, i); 955f8a6ec2dSD Scott Phillips return (0); 956f8a6ec2dSD Scott Phillips } 957f8a6ec2dSD Scott Phillips } 958f8a6ec2dSD Scott Phillips 959f8a6ec2dSD Scott Phillips return (EINVAL); 960f8a6ec2dSD Scott Phillips } 961f8a6ec2dSD Scott Phillips 962f8a6ec2dSD Scott Phillips int 9639b1aa8d6SNeel Natu vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 9649b1aa8d6SNeel Natu vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 9659b1aa8d6SNeel Natu { 9669b1aa8d6SNeel Natu struct mem_map *mm, *mmnext; 9679b1aa8d6SNeel Natu int i; 9689b1aa8d6SNeel Natu 9699b1aa8d6SNeel Natu mmnext = NULL; 9709b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 9719b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 9729b1aa8d6SNeel Natu if (mm->len == 0 || mm->gpa < *gpa) 9739b1aa8d6SNeel Natu continue; 9749b1aa8d6SNeel Natu if (mmnext == NULL || mm->gpa < mmnext->gpa) 9759b1aa8d6SNeel Natu mmnext = mm; 9769b1aa8d6SNeel Natu } 9779b1aa8d6SNeel Natu 9789b1aa8d6SNeel Natu if (mmnext != NULL) { 9799b1aa8d6SNeel Natu *gpa = mmnext->gpa; 9809b1aa8d6SNeel Natu if (segid) 9819b1aa8d6SNeel Natu *segid = mmnext->segid; 9829b1aa8d6SNeel Natu if (segoff) 9839b1aa8d6SNeel Natu *segoff = mmnext->segoff; 9849b1aa8d6SNeel Natu if (len) 9859b1aa8d6SNeel Natu *len = mmnext->len; 9869b1aa8d6SNeel Natu if (prot) 9879b1aa8d6SNeel Natu *prot = mmnext->prot; 9889b1aa8d6SNeel Natu if (flags) 9899b1aa8d6SNeel Natu *flags = mmnext->flags; 9909b1aa8d6SNeel Natu return (0); 9919b1aa8d6SNeel Natu } else { 9929b1aa8d6SNeel Natu return (ENOENT); 9939b1aa8d6SNeel Natu } 994477867a0SNeel Natu } 995477867a0SNeel Natu 996318224bbSNeel Natu static void 9979b1aa8d6SNeel Natu vm_free_memmap(struct vm *vm, int ident) 998366f6083SPeter Grehan { 9999b1aa8d6SNeel Natu struct mem_map *mm; 100073505a10SRobert Wing int error __diagused; 10014db4fb2cSNeel Natu 10029b1aa8d6SNeel Natu mm = &vm->mem_maps[ident]; 10039b1aa8d6SNeel Natu if (mm->len) { 10049b1aa8d6SNeel Natu error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 10059b1aa8d6SNeel Natu mm->gpa + mm->len); 10069b1aa8d6SNeel Natu KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 10079b1aa8d6SNeel Natu __func__, error)); 10089b1aa8d6SNeel Natu bzero(mm, sizeof(struct mem_map)); 1009318224bbSNeel Natu } 1010318224bbSNeel Natu } 1011318224bbSNeel Natu 10129b1aa8d6SNeel Natu static __inline bool 10139b1aa8d6SNeel Natu sysmem_mapping(struct vm *vm, struct mem_map *mm) 1014318224bbSNeel Natu { 1015318224bbSNeel Natu 10169b1aa8d6SNeel Natu if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 10179b1aa8d6SNeel Natu return (true); 10189b1aa8d6SNeel Natu else 10199b1aa8d6SNeel Natu return (false); 1020318224bbSNeel Natu } 1021318224bbSNeel Natu 1022147d12a7SAntoine Brodin vm_paddr_t 1023147d12a7SAntoine Brodin vmm_sysmem_maxaddr(struct vm *vm) 10249b1aa8d6SNeel Natu { 10259b1aa8d6SNeel Natu struct mem_map *mm; 10269b1aa8d6SNeel Natu vm_paddr_t maxaddr; 10279b1aa8d6SNeel Natu int i; 1028318224bbSNeel Natu 10299b1aa8d6SNeel Natu maxaddr = 0; 10309b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 10319b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 10329b1aa8d6SNeel Natu if (sysmem_mapping(vm, mm)) { 10339b1aa8d6SNeel Natu if (maxaddr < mm->gpa + mm->len) 10349b1aa8d6SNeel Natu maxaddr = mm->gpa + mm->len; 10359b1aa8d6SNeel Natu } 10369b1aa8d6SNeel Natu } 10379b1aa8d6SNeel Natu return (maxaddr); 1038318224bbSNeel Natu } 1039318224bbSNeel Natu 1040318224bbSNeel Natu static void 1041490d56c5SEd Maste vm_iommu_modify(struct vm *vm, bool map) 1042318224bbSNeel Natu { 1043318224bbSNeel Natu int i, sz; 1044318224bbSNeel Natu vm_paddr_t gpa, hpa; 10459b1aa8d6SNeel Natu struct mem_map *mm; 1046318224bbSNeel Natu void *vp, *cookie, *host_domain; 1047318224bbSNeel Natu 1048318224bbSNeel Natu sz = PAGE_SIZE; 1049318224bbSNeel Natu host_domain = iommu_host_domain(); 1050318224bbSNeel Natu 10519b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 10529b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 10539b1aa8d6SNeel Natu if (!sysmem_mapping(vm, mm)) 10549b1aa8d6SNeel Natu continue; 1055318224bbSNeel Natu 10569b1aa8d6SNeel Natu if (map) { 10579b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, 10589b1aa8d6SNeel Natu ("iommu map found invalid memmap %#lx/%#lx/%#x", 10599b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 10609b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) 10619b1aa8d6SNeel Natu continue; 10629b1aa8d6SNeel Natu mm->flags |= VM_MEMMAP_F_IOMMU; 10639b1aa8d6SNeel Natu } else { 10649b1aa8d6SNeel Natu if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0) 10659b1aa8d6SNeel Natu continue; 10669b1aa8d6SNeel Natu mm->flags &= ~VM_MEMMAP_F_IOMMU; 10679b1aa8d6SNeel Natu KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, 10689b1aa8d6SNeel Natu ("iommu unmap found invalid memmap %#lx/%#lx/%#x", 10699b1aa8d6SNeel Natu mm->gpa, mm->len, mm->flags)); 10709b1aa8d6SNeel Natu } 10719b1aa8d6SNeel Natu 10729b1aa8d6SNeel Natu gpa = mm->gpa; 10739b1aa8d6SNeel Natu while (gpa < mm->gpa + mm->len) { 107428b561adSJohn Baldwin vp = vm_gpa_hold_global(vm, gpa, PAGE_SIZE, 107528b561adSJohn Baldwin VM_PROT_WRITE, &cookie); 1076318224bbSNeel Natu KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 1077318224bbSNeel Natu vm_name(vm), gpa)); 1078318224bbSNeel Natu 1079318224bbSNeel Natu vm_gpa_release(cookie); 1080318224bbSNeel Natu 1081318224bbSNeel Natu hpa = DMAP_TO_PHYS((uintptr_t)vp); 1082318224bbSNeel Natu if (map) { 1083318224bbSNeel Natu iommu_create_mapping(vm->iommu, gpa, hpa, sz); 1084318224bbSNeel Natu } else { 1085318224bbSNeel Natu iommu_remove_mapping(vm->iommu, gpa, sz); 1086318224bbSNeel Natu } 1087318224bbSNeel Natu 1088318224bbSNeel Natu gpa += PAGE_SIZE; 1089318224bbSNeel Natu } 1090318224bbSNeel Natu } 1091318224bbSNeel Natu 1092318224bbSNeel Natu /* 1093318224bbSNeel Natu * Invalidate the cached translations associated with the domain 1094318224bbSNeel Natu * from which pages were removed. 1095318224bbSNeel Natu */ 1096318224bbSNeel Natu if (map) 1097318224bbSNeel Natu iommu_invalidate_tlb(host_domain); 1098318224bbSNeel Natu else 1099318224bbSNeel Natu iommu_invalidate_tlb(vm->iommu); 1100318224bbSNeel Natu } 1101318224bbSNeel Natu 1102490d56c5SEd Maste #define vm_iommu_unmap(vm) vm_iommu_modify((vm), false) 1103490d56c5SEd Maste #define vm_iommu_map(vm) vm_iommu_modify((vm), true) 1104318224bbSNeel Natu 1105318224bbSNeel Natu int 1106318224bbSNeel Natu vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 1107318224bbSNeel Natu { 1108318224bbSNeel Natu int error; 1109318224bbSNeel Natu 1110318224bbSNeel Natu error = ppt_unassign_device(vm, bus, slot, func); 1111318224bbSNeel Natu if (error) 1112318224bbSNeel Natu return (error); 1113318224bbSNeel Natu 11149b1aa8d6SNeel Natu if (ppt_assigned_devices(vm) == 0) 1115318224bbSNeel Natu vm_iommu_unmap(vm); 11169b1aa8d6SNeel Natu 1117318224bbSNeel Natu return (0); 1118318224bbSNeel Natu } 1119318224bbSNeel Natu 1120318224bbSNeel Natu int 1121318224bbSNeel Natu vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 1122318224bbSNeel Natu { 1123318224bbSNeel Natu int error; 1124318224bbSNeel Natu vm_paddr_t maxaddr; 1125318224bbSNeel Natu 11269b1aa8d6SNeel Natu /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */ 112751f45d01SNeel Natu if (ppt_assigned_devices(vm) == 0) { 1128318224bbSNeel Natu KASSERT(vm->iommu == NULL, 1129318224bbSNeel Natu ("vm_assign_pptdev: iommu must be NULL")); 1130147d12a7SAntoine Brodin maxaddr = vmm_sysmem_maxaddr(vm); 1131318224bbSNeel Natu vm->iommu = iommu_create_domain(maxaddr); 1132ffe1b10dSJohn Baldwin if (vm->iommu == NULL) 1133ffe1b10dSJohn Baldwin return (ENXIO); 1134318224bbSNeel Natu vm_iommu_map(vm); 1135318224bbSNeel Natu } 1136318224bbSNeel Natu 1137318224bbSNeel Natu error = ppt_assign_device(vm, bus, slot, func); 1138318224bbSNeel Natu return (error); 1139318224bbSNeel Natu } 1140318224bbSNeel Natu 114128b561adSJohn Baldwin static void * 114228b561adSJohn Baldwin _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1143318224bbSNeel Natu void **cookie) 1144318224bbSNeel Natu { 11459b1aa8d6SNeel Natu int i, count, pageoff; 11469b1aa8d6SNeel Natu struct mem_map *mm; 1147318224bbSNeel Natu vm_page_t m; 114828b561adSJohn Baldwin 1149318224bbSNeel Natu pageoff = gpa & PAGE_MASK; 1150318224bbSNeel Natu if (len > PAGE_SIZE - pageoff) 1151318224bbSNeel Natu panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1152318224bbSNeel Natu 11539b1aa8d6SNeel Natu count = 0; 11549b1aa8d6SNeel Natu for (i = 0; i < VM_MAX_MEMMAPS; i++) { 11559b1aa8d6SNeel Natu mm = &vm->mem_maps[i]; 115646567b4fSPeter Grehan if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) { 1157318224bbSNeel Natu count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1158318224bbSNeel Natu trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 11599b1aa8d6SNeel Natu break; 11609b1aa8d6SNeel Natu } 11619b1aa8d6SNeel Natu } 1162318224bbSNeel Natu 1163318224bbSNeel Natu if (count == 1) { 1164318224bbSNeel Natu *cookie = m; 1165318224bbSNeel Natu return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1166318224bbSNeel Natu } else { 1167318224bbSNeel Natu *cookie = NULL; 1168318224bbSNeel Natu return (NULL); 1169318224bbSNeel Natu } 1170318224bbSNeel Natu } 1171318224bbSNeel Natu 117228b561adSJohn Baldwin void * 1173d3956e46SJohn Baldwin vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 117428b561adSJohn Baldwin void **cookie) 117528b561adSJohn Baldwin { 117628b561adSJohn Baldwin #ifdef INVARIANTS 117728b561adSJohn Baldwin /* 117828b561adSJohn Baldwin * The current vcpu should be frozen to ensure 'vm_memmap[]' 117928b561adSJohn Baldwin * stability. 118028b561adSJohn Baldwin */ 1181d3956e46SJohn Baldwin int state = vcpu_get_state(vcpu, NULL); 118228b561adSJohn Baldwin KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 118328b561adSJohn Baldwin __func__, state)); 118428b561adSJohn Baldwin #endif 1185d3956e46SJohn Baldwin return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 118628b561adSJohn Baldwin } 118728b561adSJohn Baldwin 118828b561adSJohn Baldwin void * 118928b561adSJohn Baldwin vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 119028b561adSJohn Baldwin void **cookie) 119128b561adSJohn Baldwin { 119267b69e76SJohn Baldwin sx_assert(&vm->mem_segs_lock, SX_LOCKED); 119328b561adSJohn Baldwin return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 119428b561adSJohn Baldwin } 119528b561adSJohn Baldwin 1196318224bbSNeel Natu void 1197318224bbSNeel Natu vm_gpa_release(void *cookie) 1198318224bbSNeel Natu { 1199318224bbSNeel Natu vm_page_t m = cookie; 1200318224bbSNeel Natu 1201eeacb3b0SMark Johnston vm_page_unwire(m, PQ_ACTIVE); 1202366f6083SPeter Grehan } 1203366f6083SPeter Grehan 1204366f6083SPeter Grehan int 1205d3956e46SJohn Baldwin vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1206366f6083SPeter Grehan { 1207366f6083SPeter Grehan 1208366f6083SPeter Grehan if (reg >= VM_REG_LAST) 1209366f6083SPeter Grehan return (EINVAL); 1210366f6083SPeter Grehan 1211d3956e46SJohn Baldwin return (vmmops_getreg(vcpu->cookie, reg, retval)); 1212366f6083SPeter Grehan } 1213366f6083SPeter Grehan 1214366f6083SPeter Grehan int 1215d3956e46SJohn Baldwin vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1216366f6083SPeter Grehan { 1217d087a399SNeel Natu int error; 1218366f6083SPeter Grehan 1219366f6083SPeter Grehan if (reg >= VM_REG_LAST) 1220366f6083SPeter Grehan return (EINVAL); 1221366f6083SPeter Grehan 1222869c8d19SJohn Baldwin error = vmmops_setreg(vcpu->cookie, reg, val); 1223d087a399SNeel Natu if (error || reg != VM_REG_GUEST_RIP) 1224d087a399SNeel Natu return (error); 1225d087a399SNeel Natu 1226d087a399SNeel Natu /* Set 'nextrip' to match the value of %rip */ 1227d3956e46SJohn Baldwin VMM_CTR1(vcpu, "Setting nextrip to %#lx", val); 1228d087a399SNeel Natu vcpu->nextrip = val; 1229d087a399SNeel Natu return (0); 1230366f6083SPeter Grehan } 1231366f6083SPeter Grehan 1232490d56c5SEd Maste static bool 1233366f6083SPeter Grehan is_descriptor_table(int reg) 1234366f6083SPeter Grehan { 1235366f6083SPeter Grehan 1236366f6083SPeter Grehan switch (reg) { 1237366f6083SPeter Grehan case VM_REG_GUEST_IDTR: 1238366f6083SPeter Grehan case VM_REG_GUEST_GDTR: 1239490d56c5SEd Maste return (true); 1240366f6083SPeter Grehan default: 1241490d56c5SEd Maste return (false); 1242366f6083SPeter Grehan } 1243366f6083SPeter Grehan } 1244366f6083SPeter Grehan 1245490d56c5SEd Maste static bool 1246366f6083SPeter Grehan is_segment_register(int reg) 1247366f6083SPeter Grehan { 1248366f6083SPeter Grehan 1249366f6083SPeter Grehan switch (reg) { 1250366f6083SPeter Grehan case VM_REG_GUEST_ES: 1251366f6083SPeter Grehan case VM_REG_GUEST_CS: 1252366f6083SPeter Grehan case VM_REG_GUEST_SS: 1253366f6083SPeter Grehan case VM_REG_GUEST_DS: 1254366f6083SPeter Grehan case VM_REG_GUEST_FS: 1255366f6083SPeter Grehan case VM_REG_GUEST_GS: 1256366f6083SPeter Grehan case VM_REG_GUEST_TR: 1257366f6083SPeter Grehan case VM_REG_GUEST_LDTR: 1258490d56c5SEd Maste return (true); 1259366f6083SPeter Grehan default: 1260490d56c5SEd Maste return (false); 1261366f6083SPeter Grehan } 1262366f6083SPeter Grehan } 1263366f6083SPeter Grehan 1264366f6083SPeter Grehan int 1265d3956e46SJohn Baldwin vm_get_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *desc) 1266366f6083SPeter Grehan { 1267366f6083SPeter Grehan 1268366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1269366f6083SPeter Grehan return (EINVAL); 1270366f6083SPeter Grehan 1271d3956e46SJohn Baldwin return (vmmops_getdesc(vcpu->cookie, reg, desc)); 1272366f6083SPeter Grehan } 1273366f6083SPeter Grehan 1274366f6083SPeter Grehan int 12753f0f4b15SJohn Baldwin vm_set_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *desc) 1276366f6083SPeter Grehan { 1277366f6083SPeter Grehan 1278366f6083SPeter Grehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 1279366f6083SPeter Grehan return (EINVAL); 1280366f6083SPeter Grehan 12813f0f4b15SJohn Baldwin return (vmmops_setdesc(vcpu->cookie, reg, desc)); 1282366f6083SPeter Grehan } 1283366f6083SPeter Grehan 1284366f6083SPeter Grehan static void 1285366f6083SPeter Grehan restore_guest_fpustate(struct vcpu *vcpu) 1286366f6083SPeter Grehan { 1287366f6083SPeter Grehan 128838f1b189SPeter Grehan /* flush host state to the pcb */ 128938f1b189SPeter Grehan fpuexit(curthread); 1290bd8572e0SNeel Natu 1291bd8572e0SNeel Natu /* restore guest FPU state */ 1292366f6083SPeter Grehan fpu_stop_emulating(); 129338f1b189SPeter Grehan fpurestore(vcpu->guestfpu); 1294bd8572e0SNeel Natu 1295abb023fbSJohn Baldwin /* restore guest XCR0 if XSAVE is enabled in the host */ 1296abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) 1297abb023fbSJohn Baldwin load_xcr(0, vcpu->guest_xcr0); 1298abb023fbSJohn Baldwin 1299bd8572e0SNeel Natu /* 1300bd8572e0SNeel Natu * The FPU is now "dirty" with the guest's state so turn on emulation 1301bd8572e0SNeel Natu * to trap any access to the FPU by the host. 1302bd8572e0SNeel Natu */ 1303bd8572e0SNeel Natu fpu_start_emulating(); 1304366f6083SPeter Grehan } 1305366f6083SPeter Grehan 1306366f6083SPeter Grehan static void 1307366f6083SPeter Grehan save_guest_fpustate(struct vcpu *vcpu) 1308366f6083SPeter Grehan { 1309366f6083SPeter Grehan 1310bd8572e0SNeel Natu if ((rcr0() & CR0_TS) == 0) 1311bd8572e0SNeel Natu panic("fpu emulation not enabled in host!"); 1312bd8572e0SNeel Natu 1313abb023fbSJohn Baldwin /* save guest XCR0 and restore host XCR0 */ 1314abb023fbSJohn Baldwin if (rcr4() & CR4_XSAVE) { 1315abb023fbSJohn Baldwin vcpu->guest_xcr0 = rxcr(0); 1316abb023fbSJohn Baldwin load_xcr(0, vmm_get_host_xcr0()); 1317abb023fbSJohn Baldwin } 1318abb023fbSJohn Baldwin 1319bd8572e0SNeel Natu /* save guest FPU state */ 1320bd8572e0SNeel Natu fpu_stop_emulating(); 132138f1b189SPeter Grehan fpusave(vcpu->guestfpu); 1322366f6083SPeter Grehan fpu_start_emulating(); 1323366f6083SPeter Grehan } 1324366f6083SPeter Grehan 132561592433SNeel Natu static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 1326f76fc5d4SNeel Natu 1327318224bbSNeel Natu static int 13283f0f4b15SJohn Baldwin vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1329f80330a8SNeel Natu bool from_idle) 1330366f6083SPeter Grehan { 1331318224bbSNeel Natu int error; 1332366f6083SPeter Grehan 1333318224bbSNeel Natu vcpu_assert_locked(vcpu); 1334366f6083SPeter Grehan 1335f76fc5d4SNeel Natu /* 1336f80330a8SNeel Natu * State transitions from the vmmdev_ioctl() must always begin from 1337f80330a8SNeel Natu * the VCPU_IDLE state. This guarantees that there is only a single 1338f80330a8SNeel Natu * ioctl() operating on a vcpu at any point. 1339f80330a8SNeel Natu */ 1340f80330a8SNeel Natu if (from_idle) { 1341248e6799SNeel Natu while (vcpu->state != VCPU_IDLE) { 1342248e6799SNeel Natu vcpu->reqidle = 1; 1343248e6799SNeel Natu vcpu_notify_event_locked(vcpu, false); 13443f0f4b15SJohn Baldwin VMM_CTR1(vcpu, "vcpu state change from %s to " 1345248e6799SNeel Natu "idle requested", vcpu_state2str(vcpu->state)); 1346f80330a8SNeel Natu msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1347248e6799SNeel Natu } 1348f80330a8SNeel Natu } else { 1349f80330a8SNeel Natu KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1350f80330a8SNeel Natu "vcpu idle state")); 1351f80330a8SNeel Natu } 1352f80330a8SNeel Natu 1353ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 1354ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1355ef39d7e9SNeel Natu "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1356ef39d7e9SNeel Natu } else { 1357ef39d7e9SNeel Natu KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1358ef39d7e9SNeel Natu "vcpu that is not running", vcpu->hostcpu)); 1359ef39d7e9SNeel Natu } 1360ef39d7e9SNeel Natu 1361f80330a8SNeel Natu /* 1362318224bbSNeel Natu * The following state transitions are allowed: 1363318224bbSNeel Natu * IDLE -> FROZEN -> IDLE 1364318224bbSNeel Natu * FROZEN -> RUNNING -> FROZEN 1365318224bbSNeel Natu * FROZEN -> SLEEPING -> FROZEN 1366f76fc5d4SNeel Natu */ 1367318224bbSNeel Natu switch (vcpu->state) { 1368318224bbSNeel Natu case VCPU_IDLE: 1369318224bbSNeel Natu case VCPU_RUNNING: 1370318224bbSNeel Natu case VCPU_SLEEPING: 1371318224bbSNeel Natu error = (newstate != VCPU_FROZEN); 1372318224bbSNeel Natu break; 1373318224bbSNeel Natu case VCPU_FROZEN: 1374318224bbSNeel Natu error = (newstate == VCPU_FROZEN); 1375318224bbSNeel Natu break; 1376318224bbSNeel Natu default: 1377318224bbSNeel Natu error = 1; 1378318224bbSNeel Natu break; 1379318224bbSNeel Natu } 1380318224bbSNeel Natu 1381f80330a8SNeel Natu if (error) 1382f80330a8SNeel Natu return (EBUSY); 1383318224bbSNeel Natu 13843f0f4b15SJohn Baldwin VMM_CTR2(vcpu, "vcpu state changed from %s to %s", 1385248e6799SNeel Natu vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); 1386248e6799SNeel Natu 1387f80330a8SNeel Natu vcpu->state = newstate; 1388ef39d7e9SNeel Natu if (newstate == VCPU_RUNNING) 1389ef39d7e9SNeel Natu vcpu->hostcpu = curcpu; 1390ef39d7e9SNeel Natu else 1391ef39d7e9SNeel Natu vcpu->hostcpu = NOCPU; 1392ef39d7e9SNeel Natu 1393f80330a8SNeel Natu if (newstate == VCPU_IDLE) 1394f80330a8SNeel Natu wakeup(&vcpu->state); 1395f80330a8SNeel Natu 1396f80330a8SNeel Natu return (0); 1397318224bbSNeel Natu } 1398318224bbSNeel Natu 1399318224bbSNeel Natu static void 14003f0f4b15SJohn Baldwin vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1401318224bbSNeel Natu { 1402318224bbSNeel Natu int error; 1403318224bbSNeel Natu 14043f0f4b15SJohn Baldwin if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1405318224bbSNeel Natu panic("Error %d setting state to %d\n", error, newstate); 1406318224bbSNeel Natu } 1407318224bbSNeel Natu 1408318224bbSNeel Natu static void 14093f0f4b15SJohn Baldwin vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1410318224bbSNeel Natu { 1411318224bbSNeel Natu int error; 1412318224bbSNeel Natu 14133f0f4b15SJohn Baldwin if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1414318224bbSNeel Natu panic("Error %d setting state to %d", error, newstate); 1415318224bbSNeel Natu } 1416318224bbSNeel Natu 1417b837daddSKonstantin Belousov static int 1418d8be3d52SJohn Baldwin vm_handle_rendezvous(struct vcpu *vcpu) 14195b8a8cd1SNeel Natu { 1420d8be3d52SJohn Baldwin struct vm *vm = vcpu->vm; 1421b837daddSKonstantin Belousov struct thread *td; 1422d8be3d52SJohn Baldwin int error, vcpuid; 14235b8a8cd1SNeel Natu 1424b837daddSKonstantin Belousov error = 0; 1425d8be3d52SJohn Baldwin vcpuid = vcpu->vcpuid; 1426b837daddSKonstantin Belousov td = curthread; 14275b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 14285b8a8cd1SNeel Natu while (vm->rendezvous_func != NULL) { 142922d822c6SNeel Natu /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 1430e2650af1SStefan Eßer CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus); 143122d822c6SNeel Natu 1432949f0f47SJohn Baldwin if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 143322d822c6SNeel Natu !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 1434d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Calling rendezvous func"); 1435d8be3d52SJohn Baldwin (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); 14365b8a8cd1SNeel Natu CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 14375b8a8cd1SNeel Natu } 14385b8a8cd1SNeel Natu if (CPU_CMP(&vm->rendezvous_req_cpus, 14395b8a8cd1SNeel Natu &vm->rendezvous_done_cpus) == 0) { 1440d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Rendezvous completed"); 1441*892feec2SCorvin Köhne CPU_ZERO(&vm->rendezvous_req_cpus); 1442869dbab7SAndriy Gapon vm->rendezvous_func = NULL; 14435b8a8cd1SNeel Natu wakeup(&vm->rendezvous_func); 14445b8a8cd1SNeel Natu break; 14455b8a8cd1SNeel Natu } 1446d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Wait for rendezvous completion"); 14475b8a8cd1SNeel Natu mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 1448b837daddSKonstantin Belousov "vmrndv", hz); 1449c6d31b83SKonstantin Belousov if (td_ast_pending(td, TDA_SUSPEND)) { 1450b837daddSKonstantin Belousov mtx_unlock(&vm->rendezvous_mtx); 1451b837daddSKonstantin Belousov error = thread_check_susp(td, true); 1452b837daddSKonstantin Belousov if (error != 0) 1453b837daddSKonstantin Belousov return (error); 1454b837daddSKonstantin Belousov mtx_lock(&vm->rendezvous_mtx); 1455b837daddSKonstantin Belousov } 14565b8a8cd1SNeel Natu } 14575b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 1458b837daddSKonstantin Belousov return (0); 14595b8a8cd1SNeel Natu } 14605b8a8cd1SNeel Natu 1461318224bbSNeel Natu /* 1462318224bbSNeel Natu * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1463318224bbSNeel Natu */ 1464318224bbSNeel Natu static int 14653f0f4b15SJohn Baldwin vm_handle_hlt(struct vcpu *vcpu, bool intr_disabled, bool *retu) 1466318224bbSNeel Natu { 14673f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 1468c6a0cc2eSNeel Natu const char *wmesg; 1469b837daddSKonstantin Belousov struct thread *td; 14703f0f4b15SJohn Baldwin int error, t, vcpuid, vcpu_halted, vm_halted; 1471e50ce2aaSNeel Natu 14723f0f4b15SJohn Baldwin vcpuid = vcpu->vcpuid; 1473e50ce2aaSNeel Natu vcpu_halted = 0; 1474e50ce2aaSNeel Natu vm_halted = 0; 1475b837daddSKonstantin Belousov error = 0; 1476b837daddSKonstantin Belousov td = curthread; 1477318224bbSNeel Natu 14783f0f4b15SJohn Baldwin KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 14793f0f4b15SJohn Baldwin 1480f76fc5d4SNeel Natu vcpu_lock(vcpu); 1481c6a0cc2eSNeel Natu while (1) { 1482f76fc5d4SNeel Natu /* 1483f76fc5d4SNeel Natu * Do a final check for pending NMI or interrupts before 1484c6a0cc2eSNeel Natu * really putting this thread to sleep. Also check for 1485c6a0cc2eSNeel Natu * software events that would cause this vcpu to wakeup. 1486f76fc5d4SNeel Natu * 1487c6a0cc2eSNeel Natu * These interrupts/events could have happened after the 148815add60dSPeter Grehan * vcpu returned from vmmops_run() and before it acquired the 1489c6a0cc2eSNeel Natu * vcpu lock above. 1490f76fc5d4SNeel Natu */ 1491248e6799SNeel Natu if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) 1492c6a0cc2eSNeel Natu break; 149380cb5d84SJohn Baldwin if (vm_nmi_pending(vcpu)) 1494c6a0cc2eSNeel Natu break; 1495c6a0cc2eSNeel Natu if (!intr_disabled) { 149680cb5d84SJohn Baldwin if (vm_extint_pending(vcpu) || 1497c6a0cc2eSNeel Natu vlapic_pending_intr(vcpu->vlapic, NULL)) { 1498c6a0cc2eSNeel Natu break; 1499c6a0cc2eSNeel Natu } 1500c6a0cc2eSNeel Natu } 1501c6a0cc2eSNeel Natu 1502f008d157SNeel Natu /* Don't go to sleep if the vcpu thread needs to yield */ 150380cb5d84SJohn Baldwin if (vcpu_should_yield(vcpu)) 1504f008d157SNeel Natu break; 1505f008d157SNeel Natu 150680cb5d84SJohn Baldwin if (vcpu_debugged(vcpu)) 1507fc276d92SJohn Baldwin break; 1508fc276d92SJohn Baldwin 1509e50ce2aaSNeel Natu /* 1510e50ce2aaSNeel Natu * Some Linux guests implement "halt" by having all vcpus 1511e50ce2aaSNeel Natu * execute HLT with interrupts disabled. 'halted_cpus' keeps 1512e50ce2aaSNeel Natu * track of the vcpus that have entered this state. When all 1513e50ce2aaSNeel Natu * vcpus enter the halted state the virtual machine is halted. 1514e50ce2aaSNeel Natu */ 1515e50ce2aaSNeel Natu if (intr_disabled) { 1516c6a0cc2eSNeel Natu wmesg = "vmhalt"; 15173f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "Halted"); 1518055fc2cbSNeel Natu if (!vcpu_halted && halt_detection_enabled) { 1519e50ce2aaSNeel Natu vcpu_halted = 1; 1520e50ce2aaSNeel Natu CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1521e50ce2aaSNeel Natu } 1522e50ce2aaSNeel Natu if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1523e50ce2aaSNeel Natu vm_halted = 1; 1524e50ce2aaSNeel Natu break; 1525e50ce2aaSNeel Natu } 1526e50ce2aaSNeel Natu } else { 1527e50ce2aaSNeel Natu wmesg = "vmidle"; 1528e50ce2aaSNeel Natu } 1529c6a0cc2eSNeel Natu 1530f76fc5d4SNeel Natu t = ticks; 15313f0f4b15SJohn Baldwin vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1532f008d157SNeel Natu /* 1533f008d157SNeel Natu * XXX msleep_spin() cannot be interrupted by signals so 1534f008d157SNeel Natu * wake up periodically to check pending signals. 1535f008d157SNeel Natu */ 1536f008d157SNeel Natu msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); 15373f0f4b15SJohn Baldwin vcpu_require_state_locked(vcpu, VCPU_FROZEN); 15383dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VCPU_IDLE_TICKS, ticks - t); 1539c6d31b83SKonstantin Belousov if (td_ast_pending(td, TDA_SUSPEND)) { 1540b837daddSKonstantin Belousov vcpu_unlock(vcpu); 1541b837daddSKonstantin Belousov error = thread_check_susp(td, false); 15424d447b30SKonstantin Belousov if (error != 0) { 15434d447b30SKonstantin Belousov if (vcpu_halted) { 15444d447b30SKonstantin Belousov CPU_CLR_ATOMIC(vcpuid, 15454d447b30SKonstantin Belousov &vm->halted_cpus); 15464d447b30SKonstantin Belousov } 1547b837daddSKonstantin Belousov return (error); 15484d447b30SKonstantin Belousov } 1549b837daddSKonstantin Belousov vcpu_lock(vcpu); 1550b837daddSKonstantin Belousov } 1551f76fc5d4SNeel Natu } 1552e50ce2aaSNeel Natu 1553e50ce2aaSNeel Natu if (vcpu_halted) 1554e50ce2aaSNeel Natu CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1555e50ce2aaSNeel Natu 1556f76fc5d4SNeel Natu vcpu_unlock(vcpu); 1557f76fc5d4SNeel Natu 1558e50ce2aaSNeel Natu if (vm_halted) 1559e50ce2aaSNeel Natu vm_suspend(vm, VM_SUSPEND_HALT); 1560e50ce2aaSNeel Natu 1561318224bbSNeel Natu return (0); 1562318224bbSNeel Natu } 1563318224bbSNeel Natu 1564318224bbSNeel Natu static int 15653f0f4b15SJohn Baldwin vm_handle_paging(struct vcpu *vcpu, bool *retu) 1566318224bbSNeel Natu { 15673f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 1568318224bbSNeel Natu int rv, ftype; 1569318224bbSNeel Natu struct vm_map *map; 1570318224bbSNeel Natu struct vm_exit *vme; 1571318224bbSNeel Natu 1572318224bbSNeel Natu vme = &vcpu->exitinfo; 1573318224bbSNeel Natu 1574d087a399SNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 1575d087a399SNeel Natu __func__, vme->inst_length)); 1576d087a399SNeel Natu 1577318224bbSNeel Natu ftype = vme->u.paging.fault_type; 1578318224bbSNeel Natu KASSERT(ftype == VM_PROT_READ || 1579318224bbSNeel Natu ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1580318224bbSNeel Natu ("vm_handle_paging: invalid fault_type %d", ftype)); 1581318224bbSNeel Natu 1582318224bbSNeel Natu if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1583318224bbSNeel Natu rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1584318224bbSNeel Natu vme->u.paging.gpa, ftype); 15859d8d8e3eSNeel Natu if (rv == 0) { 15863f0f4b15SJohn Baldwin VMM_CTR2(vcpu, "%s bit emulation for gpa %#lx", 15879d8d8e3eSNeel Natu ftype == VM_PROT_READ ? "accessed" : "dirty", 15889d8d8e3eSNeel Natu vme->u.paging.gpa); 1589318224bbSNeel Natu goto done; 1590318224bbSNeel Natu } 15919d8d8e3eSNeel Natu } 1592318224bbSNeel Natu 1593318224bbSNeel Natu map = &vm->vmspace->vm_map; 1594df08823dSKonstantin Belousov rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1595318224bbSNeel Natu 15963f0f4b15SJohn Baldwin VMM_CTR3(vcpu, "vm_handle_paging rv = %d, gpa = %#lx, " 1597513c8d33SNeel Natu "ftype = %d", rv, vme->u.paging.gpa, ftype); 1598318224bbSNeel Natu 1599318224bbSNeel Natu if (rv != KERN_SUCCESS) 1600318224bbSNeel Natu return (EFAULT); 1601318224bbSNeel Natu done: 1602318224bbSNeel Natu return (0); 1603318224bbSNeel Natu } 1604318224bbSNeel Natu 1605318224bbSNeel Natu static int 16063f0f4b15SJohn Baldwin vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 1607318224bbSNeel Natu { 1608318224bbSNeel Natu struct vie *vie; 1609318224bbSNeel Natu struct vm_exit *vme; 1610e4f605eeSTycho Nightingale uint64_t gla, gpa, cs_base; 1611e813a873SNeel Natu struct vm_guest_paging *paging; 1612565bbb86SNeel Natu mem_region_read_t mread; 1613565bbb86SNeel Natu mem_region_write_t mwrite; 1614f7a9f178SNeel Natu enum vm_cpu_mode cpu_mode; 16151c73ea3eSNeel Natu int cs_d, error, fault; 1616318224bbSNeel Natu 1617318224bbSNeel Natu vme = &vcpu->exitinfo; 1618318224bbSNeel Natu 16191c73ea3eSNeel Natu KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 16201c73ea3eSNeel Natu __func__, vme->inst_length)); 16211c73ea3eSNeel Natu 1622318224bbSNeel Natu gla = vme->u.inst_emul.gla; 1623318224bbSNeel Natu gpa = vme->u.inst_emul.gpa; 1624e4f605eeSTycho Nightingale cs_base = vme->u.inst_emul.cs_base; 1625f7a9f178SNeel Natu cs_d = vme->u.inst_emul.cs_d; 1626318224bbSNeel Natu vie = &vme->u.inst_emul.vie; 1627e813a873SNeel Natu paging = &vme->u.inst_emul.paging; 1628f7a9f178SNeel Natu cpu_mode = paging->cpu_mode; 1629318224bbSNeel Natu 16303f0f4b15SJohn Baldwin VMM_CTR1(vcpu, "inst_emul fault accessing gpa %#lx", gpa); 16319d8d8e3eSNeel Natu 1632318224bbSNeel Natu /* Fetch, decode and emulate the faulting instruction */ 1633c2a875f9SNeel Natu if (vie->num_valid == 0) { 1634d3956e46SJohn Baldwin error = vmm_fetch_instruction(vcpu, paging, vme->rip + cs_base, 1635d3956e46SJohn Baldwin VIE_INST_SIZE, vie, &fault); 1636c2a875f9SNeel Natu } else { 1637c2a875f9SNeel Natu /* 1638c2a875f9SNeel Natu * The instruction bytes have already been copied into 'vie' 1639c2a875f9SNeel Natu */ 16409c4d5478SNeel Natu error = fault = 0; 1641c2a875f9SNeel Natu } 16429c4d5478SNeel Natu if (error || fault) 16439c4d5478SNeel Natu return (error); 1644318224bbSNeel Natu 1645d3956e46SJohn Baldwin if (vmm_decode_instruction(vcpu, gla, cpu_mode, cs_d, vie) != 0) { 16463f0f4b15SJohn Baldwin VMM_CTR1(vcpu, "Error decoding instruction at %#lx", 1647c07a0648SNeel Natu vme->rip + cs_base); 1648c07a0648SNeel Natu *retu = true; /* dump instruction bytes in userspace */ 1649c07a0648SNeel Natu return (0); 1650c07a0648SNeel Natu } 1651318224bbSNeel Natu 1652a0b78f09SPeter Grehan /* 16531c73ea3eSNeel Natu * Update 'nextrip' based on the length of the emulated instruction. 1654a0b78f09SPeter Grehan */ 1655a0b78f09SPeter Grehan vme->inst_length = vie->num_processed; 1656d087a399SNeel Natu vcpu->nextrip += vie->num_processed; 16573f0f4b15SJohn Baldwin VMM_CTR1(vcpu, "nextrip updated to %#lx after instruction decoding", 16583f0f4b15SJohn Baldwin vcpu->nextrip); 1659a0b78f09SPeter Grehan 166008e3ff32SNeel Natu /* return to userland unless this is an in-kernel emulated device */ 1661565bbb86SNeel Natu if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1662565bbb86SNeel Natu mread = lapic_mmio_read; 1663565bbb86SNeel Natu mwrite = lapic_mmio_write; 1664565bbb86SNeel Natu } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1665565bbb86SNeel Natu mread = vioapic_mmio_read; 1666565bbb86SNeel Natu mwrite = vioapic_mmio_write; 166708e3ff32SNeel Natu } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 166808e3ff32SNeel Natu mread = vhpet_mmio_read; 166908e3ff32SNeel Natu mwrite = vhpet_mmio_write; 1670565bbb86SNeel Natu } else { 1671becd9849SNeel Natu *retu = true; 1672318224bbSNeel Natu return (0); 1673318224bbSNeel Natu } 1674318224bbSNeel Natu 1675d3956e46SJohn Baldwin error = vmm_emulate_instruction(vcpu, gpa, vie, paging, mread, mwrite, 1676d3956e46SJohn Baldwin retu); 1677318224bbSNeel Natu 1678318224bbSNeel Natu return (error); 1679318224bbSNeel Natu } 1680318224bbSNeel Natu 1681b15a09c0SNeel Natu static int 16823f0f4b15SJohn Baldwin vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1683b15a09c0SNeel Natu { 16843f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 1685b837daddSKonstantin Belousov int error, i; 1686b837daddSKonstantin Belousov struct thread *td; 1687b15a09c0SNeel Natu 1688b837daddSKonstantin Belousov error = 0; 1689b837daddSKonstantin Belousov td = curthread; 1690b15a09c0SNeel Natu 16913f0f4b15SJohn Baldwin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1692b15a09c0SNeel Natu 1693b15a09c0SNeel Natu /* 1694b15a09c0SNeel Natu * Wait until all 'active_cpus' have suspended themselves. 1695b15a09c0SNeel Natu * 1696b15a09c0SNeel Natu * Since a VM may be suspended at any time including when one or 1697b15a09c0SNeel Natu * more vcpus are doing a rendezvous we need to call the rendezvous 1698b15a09c0SNeel Natu * handler while we are waiting to prevent a deadlock. 1699b15a09c0SNeel Natu */ 1700b15a09c0SNeel Natu vcpu_lock(vcpu); 1701b837daddSKonstantin Belousov while (error == 0) { 1702b15a09c0SNeel Natu if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 17033f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "All vcpus suspended"); 1704b15a09c0SNeel Natu break; 1705b15a09c0SNeel Natu } 1706b15a09c0SNeel Natu 1707b15a09c0SNeel Natu if (vm->rendezvous_func == NULL) { 17083f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "Sleeping during suspend"); 17093f0f4b15SJohn Baldwin vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1710b15a09c0SNeel Natu msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 17113f0f4b15SJohn Baldwin vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1712c6d31b83SKonstantin Belousov if (td_ast_pending(td, TDA_SUSPEND)) { 1713b837daddSKonstantin Belousov vcpu_unlock(vcpu); 1714b837daddSKonstantin Belousov error = thread_check_susp(td, false); 1715b837daddSKonstantin Belousov vcpu_lock(vcpu); 1716b837daddSKonstantin Belousov } 1717b15a09c0SNeel Natu } else { 17183f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "Rendezvous during suspend"); 1719b15a09c0SNeel Natu vcpu_unlock(vcpu); 1720d8be3d52SJohn Baldwin error = vm_handle_rendezvous(vcpu); 1721b15a09c0SNeel Natu vcpu_lock(vcpu); 1722b15a09c0SNeel Natu } 1723b15a09c0SNeel Natu } 1724b15a09c0SNeel Natu vcpu_unlock(vcpu); 1725b15a09c0SNeel Natu 1726b15a09c0SNeel Natu /* 1727b15a09c0SNeel Natu * Wakeup the other sleeping vcpus and return to userspace. 1728b15a09c0SNeel Natu */ 1729a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 1730b15a09c0SNeel Natu if (CPU_ISSET(i, &vm->suspended_cpus)) { 17313f0f4b15SJohn Baldwin vcpu_notify_event(vm_vcpu(vm, i), false); 1732b15a09c0SNeel Natu } 1733b15a09c0SNeel Natu } 1734b15a09c0SNeel Natu 1735b15a09c0SNeel Natu *retu = true; 1736b837daddSKonstantin Belousov return (error); 1737b15a09c0SNeel Natu } 1738b15a09c0SNeel Natu 1739248e6799SNeel Natu static int 17403f0f4b15SJohn Baldwin vm_handle_reqidle(struct vcpu *vcpu, bool *retu) 1741248e6799SNeel Natu { 1742248e6799SNeel Natu vcpu_lock(vcpu); 1743248e6799SNeel Natu KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); 1744248e6799SNeel Natu vcpu->reqidle = 0; 1745248e6799SNeel Natu vcpu_unlock(vcpu); 1746248e6799SNeel Natu *retu = true; 1747248e6799SNeel Natu return (0); 1748248e6799SNeel Natu } 1749248e6799SNeel Natu 1750b15a09c0SNeel Natu int 1751f0fdcfe2SNeel Natu vm_suspend(struct vm *vm, enum vm_suspend_how how) 1752b15a09c0SNeel Natu { 1753f0fdcfe2SNeel Natu int i; 1754b15a09c0SNeel Natu 1755f0fdcfe2SNeel Natu if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1756f0fdcfe2SNeel Natu return (EINVAL); 1757f0fdcfe2SNeel Natu 1758f0fdcfe2SNeel Natu if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1759f0fdcfe2SNeel Natu VM_CTR2(vm, "virtual machine already suspended %d/%d", 1760f0fdcfe2SNeel Natu vm->suspend, how); 1761b15a09c0SNeel Natu return (EALREADY); 1762b15a09c0SNeel Natu } 1763f0fdcfe2SNeel Natu 1764f0fdcfe2SNeel Natu VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1765f0fdcfe2SNeel Natu 1766f0fdcfe2SNeel Natu /* 1767f0fdcfe2SNeel Natu * Notify all active vcpus that they are now suspended. 1768f0fdcfe2SNeel Natu */ 1769a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 1770f0fdcfe2SNeel Natu if (CPU_ISSET(i, &vm->active_cpus)) 17713f0f4b15SJohn Baldwin vcpu_notify_event(vm_vcpu(vm, i), false); 1772f0fdcfe2SNeel Natu } 1773f0fdcfe2SNeel Natu 1774f0fdcfe2SNeel Natu return (0); 1775f0fdcfe2SNeel Natu } 1776f0fdcfe2SNeel Natu 1777f0fdcfe2SNeel Natu void 177880cb5d84SJohn Baldwin vm_exit_suspended(struct vcpu *vcpu, uint64_t rip) 1779f0fdcfe2SNeel Natu { 178080cb5d84SJohn Baldwin struct vm *vm = vcpu->vm; 1781f0fdcfe2SNeel Natu struct vm_exit *vmexit; 1782f0fdcfe2SNeel Natu 1783f0fdcfe2SNeel Natu KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1784f0fdcfe2SNeel Natu ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1785f0fdcfe2SNeel Natu 178680cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 1787f0fdcfe2SNeel Natu vmexit->rip = rip; 1788f0fdcfe2SNeel Natu vmexit->inst_length = 0; 1789f0fdcfe2SNeel Natu vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1790f0fdcfe2SNeel Natu vmexit->u.suspended.how = vm->suspend; 1791b15a09c0SNeel Natu } 1792b15a09c0SNeel Natu 179340487465SNeel Natu void 179480cb5d84SJohn Baldwin vm_exit_debug(struct vcpu *vcpu, uint64_t rip) 1795fc276d92SJohn Baldwin { 1796fc276d92SJohn Baldwin struct vm_exit *vmexit; 1797fc276d92SJohn Baldwin 179880cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 1799fc276d92SJohn Baldwin vmexit->rip = rip; 1800fc276d92SJohn Baldwin vmexit->inst_length = 0; 1801fc276d92SJohn Baldwin vmexit->exitcode = VM_EXITCODE_DEBUG; 1802fc276d92SJohn Baldwin } 1803fc276d92SJohn Baldwin 1804fc276d92SJohn Baldwin void 180580cb5d84SJohn Baldwin vm_exit_rendezvous(struct vcpu *vcpu, uint64_t rip) 180640487465SNeel Natu { 180740487465SNeel Natu struct vm_exit *vmexit; 180840487465SNeel Natu 180980cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 181040487465SNeel Natu vmexit->rip = rip; 181140487465SNeel Natu vmexit->inst_length = 0; 181240487465SNeel Natu vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 181380cb5d84SJohn Baldwin vmm_stat_incr(vcpu, VMEXIT_RENDEZVOUS, 1); 181440487465SNeel Natu } 181540487465SNeel Natu 181640487465SNeel Natu void 181780cb5d84SJohn Baldwin vm_exit_reqidle(struct vcpu *vcpu, uint64_t rip) 1818248e6799SNeel Natu { 1819248e6799SNeel Natu struct vm_exit *vmexit; 1820248e6799SNeel Natu 182180cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 1822248e6799SNeel Natu vmexit->rip = rip; 1823248e6799SNeel Natu vmexit->inst_length = 0; 1824248e6799SNeel Natu vmexit->exitcode = VM_EXITCODE_REQIDLE; 182580cb5d84SJohn Baldwin vmm_stat_incr(vcpu, VMEXIT_REQIDLE, 1); 1826248e6799SNeel Natu } 1827248e6799SNeel Natu 1828248e6799SNeel Natu void 182980cb5d84SJohn Baldwin vm_exit_astpending(struct vcpu *vcpu, uint64_t rip) 183040487465SNeel Natu { 183140487465SNeel Natu struct vm_exit *vmexit; 183240487465SNeel Natu 183380cb5d84SJohn Baldwin vmexit = vm_exitinfo(vcpu); 183440487465SNeel Natu vmexit->rip = rip; 183540487465SNeel Natu vmexit->inst_length = 0; 183640487465SNeel Natu vmexit->exitcode = VM_EXITCODE_BOGUS; 183780cb5d84SJohn Baldwin vmm_stat_incr(vcpu, VMEXIT_ASTPENDING, 1); 183840487465SNeel Natu } 183940487465SNeel Natu 1840318224bbSNeel Natu int 18413f0f4b15SJohn Baldwin vm_run(struct vcpu *vcpu, struct vm_exit *vme_user) 1842318224bbSNeel Natu { 18433f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 1844248e6799SNeel Natu struct vm_eventinfo evinfo; 1845318224bbSNeel Natu int error, vcpuid; 1846318224bbSNeel Natu struct pcb *pcb; 1847d087a399SNeel Natu uint64_t tscval; 1848318224bbSNeel Natu struct vm_exit *vme; 1849becd9849SNeel Natu bool retu, intr_disabled; 1850318224bbSNeel Natu pmap_t pmap; 1851318224bbSNeel Natu 18523f0f4b15SJohn Baldwin vcpuid = vcpu->vcpuid; 1853318224bbSNeel Natu 185495ebc360SNeel Natu if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 185595ebc360SNeel Natu return (EINVAL); 185695ebc360SNeel Natu 185795ebc360SNeel Natu if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 185895ebc360SNeel Natu return (EINVAL); 185995ebc360SNeel Natu 1860318224bbSNeel Natu pmap = vmspace_pmap(vm->vmspace); 1861318224bbSNeel Natu vme = &vcpu->exitinfo; 1862*892feec2SCorvin Köhne evinfo.rptr = &vm->rendezvous_req_cpus; 1863248e6799SNeel Natu evinfo.sptr = &vm->suspend; 1864248e6799SNeel Natu evinfo.iptr = &vcpu->reqidle; 1865318224bbSNeel Natu restart: 1866318224bbSNeel Natu critical_enter(); 1867318224bbSNeel Natu 1868318224bbSNeel Natu KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1869318224bbSNeel Natu ("vm_run: absurd pm_active")); 1870318224bbSNeel Natu 1871318224bbSNeel Natu tscval = rdtsc(); 1872318224bbSNeel Natu 1873318224bbSNeel Natu pcb = PCPU_GET(curpcb); 1874318224bbSNeel Natu set_pcb_flags(pcb, PCB_FULL_IRET); 1875318224bbSNeel Natu 1876318224bbSNeel Natu restore_guest_fpustate(vcpu); 1877318224bbSNeel Natu 18783f0f4b15SJohn Baldwin vcpu_require_state(vcpu, VCPU_RUNNING); 1879869c8d19SJohn Baldwin error = vmmops_run(vcpu->cookie, vcpu->nextrip, pmap, &evinfo); 18803f0f4b15SJohn Baldwin vcpu_require_state(vcpu, VCPU_FROZEN); 1881318224bbSNeel Natu 1882318224bbSNeel Natu save_guest_fpustate(vcpu); 1883318224bbSNeel Natu 18843dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1885318224bbSNeel Natu 1886318224bbSNeel Natu critical_exit(); 1887318224bbSNeel Natu 1888318224bbSNeel Natu if (error == 0) { 1889becd9849SNeel Natu retu = false; 1890d087a399SNeel Natu vcpu->nextrip = vme->rip + vme->inst_length; 1891318224bbSNeel Natu switch (vme->exitcode) { 1892248e6799SNeel Natu case VM_EXITCODE_REQIDLE: 18933f0f4b15SJohn Baldwin error = vm_handle_reqidle(vcpu, &retu); 1894248e6799SNeel Natu break; 1895b15a09c0SNeel Natu case VM_EXITCODE_SUSPENDED: 18963f0f4b15SJohn Baldwin error = vm_handle_suspend(vcpu, &retu); 1897b15a09c0SNeel Natu break; 189830b94db8SNeel Natu case VM_EXITCODE_IOAPIC_EOI: 1899e42c24d5SJohn Baldwin vioapic_process_eoi(vm, vme->u.ioapic_eoi.vector); 190030b94db8SNeel Natu break; 19015b8a8cd1SNeel Natu case VM_EXITCODE_RENDEZVOUS: 1902d8be3d52SJohn Baldwin error = vm_handle_rendezvous(vcpu); 19035b8a8cd1SNeel Natu break; 1904318224bbSNeel Natu case VM_EXITCODE_HLT: 1905becd9849SNeel Natu intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 19063f0f4b15SJohn Baldwin error = vm_handle_hlt(vcpu, intr_disabled, &retu); 1907318224bbSNeel Natu break; 1908318224bbSNeel Natu case VM_EXITCODE_PAGING: 19093f0f4b15SJohn Baldwin error = vm_handle_paging(vcpu, &retu); 1910318224bbSNeel Natu break; 1911318224bbSNeel Natu case VM_EXITCODE_INST_EMUL: 19123f0f4b15SJohn Baldwin error = vm_handle_inst_emul(vcpu, &retu); 1913318224bbSNeel Natu break; 1914d17b5104SNeel Natu case VM_EXITCODE_INOUT: 1915d17b5104SNeel Natu case VM_EXITCODE_INOUT_STR: 19163f0f4b15SJohn Baldwin error = vm_handle_inout(vcpu, vme, &retu); 1917d17b5104SNeel Natu break; 191865145c7fSNeel Natu case VM_EXITCODE_MONITOR: 191965145c7fSNeel Natu case VM_EXITCODE_MWAIT: 192027d26457SAndrew Turner case VM_EXITCODE_VMINSN: 1921d3956e46SJohn Baldwin vm_inject_ud(vcpu); 192265145c7fSNeel Natu break; 1923318224bbSNeel Natu default: 1924becd9849SNeel Natu retu = true; /* handled in userland */ 1925318224bbSNeel Natu break; 1926318224bbSNeel Natu } 1927318224bbSNeel Natu } 1928318224bbSNeel Natu 19290bda8d3eSCorvin Köhne /* 19300bda8d3eSCorvin Köhne * VM_EXITCODE_INST_EMUL could access the apic which could transform the 19310bda8d3eSCorvin Köhne * exit code into VM_EXITCODE_IPI. 19320bda8d3eSCorvin Köhne */ 19330bda8d3eSCorvin Köhne if (error == 0 && vme->exitcode == VM_EXITCODE_IPI) { 19340bda8d3eSCorvin Köhne retu = false; 1935d8be3d52SJohn Baldwin error = vm_handle_ipi(vcpu, vme, &retu); 19360bda8d3eSCorvin Köhne } 19370bda8d3eSCorvin Köhne 1938d087a399SNeel Natu if (error == 0 && retu == false) 1939f76fc5d4SNeel Natu goto restart; 1940f76fc5d4SNeel Natu 19413dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VMEXIT_USERSPACE, 1); 19423f0f4b15SJohn Baldwin VMM_CTR2(vcpu, "retu %d/%d", error, vme->exitcode); 1943248e6799SNeel Natu 1944318224bbSNeel Natu /* copy the exit information */ 19453f0f4b15SJohn Baldwin *vme_user = *vme; 1946366f6083SPeter Grehan return (error); 1947366f6083SPeter Grehan } 1948366f6083SPeter Grehan 1949366f6083SPeter Grehan int 1950d3956e46SJohn Baldwin vm_restart_instruction(struct vcpu *vcpu) 1951c9c75df4SNeel Natu { 1952d087a399SNeel Natu enum vcpu_state state; 1953d087a399SNeel Natu uint64_t rip; 195473505a10SRobert Wing int error __diagused; 1955c9c75df4SNeel Natu 1956d3956e46SJohn Baldwin state = vcpu_get_state(vcpu, NULL); 1957d087a399SNeel Natu if (state == VCPU_RUNNING) { 1958d087a399SNeel Natu /* 1959d087a399SNeel Natu * When a vcpu is "running" the next instruction is determined 1960d087a399SNeel Natu * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. 1961d087a399SNeel Natu * Thus setting 'inst_length' to zero will cause the current 1962d087a399SNeel Natu * instruction to be restarted. 1963d087a399SNeel Natu */ 1964c9c75df4SNeel Natu vcpu->exitinfo.inst_length = 0; 1965d3956e46SJohn Baldwin VMM_CTR1(vcpu, "restarting instruction at %#lx by " 1966d087a399SNeel Natu "setting inst_length to zero", vcpu->exitinfo.rip); 1967d087a399SNeel Natu } else if (state == VCPU_FROZEN) { 1968d087a399SNeel Natu /* 1969d087a399SNeel Natu * When a vcpu is "frozen" it is outside the critical section 197015add60dSPeter Grehan * around vmmops_run() and 'nextrip' points to the next 197115add60dSPeter Grehan * instruction. Thus instruction restart is achieved by setting 197215add60dSPeter Grehan * 'nextrip' to the vcpu's %rip. 1973d087a399SNeel Natu */ 1974d3956e46SJohn Baldwin error = vm_get_register(vcpu, VM_REG_GUEST_RIP, &rip); 1975d087a399SNeel Natu KASSERT(!error, ("%s: error %d getting rip", __func__, error)); 1976d3956e46SJohn Baldwin VMM_CTR2(vcpu, "restarting instruction by updating " 1977d087a399SNeel Natu "nextrip from %#lx to %#lx", vcpu->nextrip, rip); 1978d087a399SNeel Natu vcpu->nextrip = rip; 1979d087a399SNeel Natu } else { 1980d087a399SNeel Natu panic("%s: invalid state %d", __func__, state); 1981d087a399SNeel Natu } 1982c9c75df4SNeel Natu return (0); 1983c9c75df4SNeel Natu } 1984c9c75df4SNeel Natu 1985c9c75df4SNeel Natu int 198680cb5d84SJohn Baldwin vm_exit_intinfo(struct vcpu *vcpu, uint64_t info) 1987091d4532SNeel Natu { 1988091d4532SNeel Natu int type, vector; 1989091d4532SNeel Natu 1990091d4532SNeel Natu if (info & VM_INTINFO_VALID) { 1991091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 1992091d4532SNeel Natu vector = info & 0xff; 1993091d4532SNeel Natu if (type == VM_INTINFO_NMI && vector != IDT_NMI) 1994091d4532SNeel Natu return (EINVAL); 1995091d4532SNeel Natu if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) 1996091d4532SNeel Natu return (EINVAL); 1997091d4532SNeel Natu if (info & VM_INTINFO_RSVD) 1998091d4532SNeel Natu return (EINVAL); 1999091d4532SNeel Natu } else { 2000091d4532SNeel Natu info = 0; 2001091d4532SNeel Natu } 200280cb5d84SJohn Baldwin VMM_CTR2(vcpu, "%s: info1(%#lx)", __func__, info); 2003091d4532SNeel Natu vcpu->exitintinfo = info; 2004091d4532SNeel Natu return (0); 2005091d4532SNeel Natu } 2006091d4532SNeel Natu 2007091d4532SNeel Natu enum exc_class { 2008091d4532SNeel Natu EXC_BENIGN, 2009091d4532SNeel Natu EXC_CONTRIBUTORY, 2010091d4532SNeel Natu EXC_PAGEFAULT 2011091d4532SNeel Natu }; 2012091d4532SNeel Natu 2013091d4532SNeel Natu #define IDT_VE 20 /* Virtualization Exception (Intel specific) */ 2014091d4532SNeel Natu 2015091d4532SNeel Natu static enum exc_class 2016091d4532SNeel Natu exception_class(uint64_t info) 2017091d4532SNeel Natu { 2018091d4532SNeel Natu int type, vector; 2019091d4532SNeel Natu 2020091d4532SNeel Natu KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); 2021091d4532SNeel Natu type = info & VM_INTINFO_TYPE; 2022091d4532SNeel Natu vector = info & 0xff; 2023091d4532SNeel Natu 2024091d4532SNeel Natu /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ 2025091d4532SNeel Natu switch (type) { 2026091d4532SNeel Natu case VM_INTINFO_HWINTR: 2027091d4532SNeel Natu case VM_INTINFO_SWINTR: 2028091d4532SNeel Natu case VM_INTINFO_NMI: 2029091d4532SNeel Natu return (EXC_BENIGN); 2030091d4532SNeel Natu default: 2031091d4532SNeel Natu /* 2032091d4532SNeel Natu * Hardware exception. 2033091d4532SNeel Natu * 2034091d4532SNeel Natu * SVM and VT-x use identical type values to represent NMI, 2035091d4532SNeel Natu * hardware interrupt and software interrupt. 2036091d4532SNeel Natu * 2037091d4532SNeel Natu * SVM uses type '3' for all exceptions. VT-x uses type '3' 2038091d4532SNeel Natu * for exceptions except #BP and #OF. #BP and #OF use a type 2039091d4532SNeel Natu * value of '5' or '6'. Therefore we don't check for explicit 2040091d4532SNeel Natu * values of 'type' to classify 'intinfo' into a hardware 2041091d4532SNeel Natu * exception. 2042091d4532SNeel Natu */ 2043091d4532SNeel Natu break; 2044091d4532SNeel Natu } 2045091d4532SNeel Natu 2046091d4532SNeel Natu switch (vector) { 2047091d4532SNeel Natu case IDT_PF: 2048091d4532SNeel Natu case IDT_VE: 2049091d4532SNeel Natu return (EXC_PAGEFAULT); 2050091d4532SNeel Natu case IDT_DE: 2051091d4532SNeel Natu case IDT_TS: 2052091d4532SNeel Natu case IDT_NP: 2053091d4532SNeel Natu case IDT_SS: 2054091d4532SNeel Natu case IDT_GP: 2055091d4532SNeel Natu return (EXC_CONTRIBUTORY); 2056091d4532SNeel Natu default: 2057091d4532SNeel Natu return (EXC_BENIGN); 2058091d4532SNeel Natu } 2059091d4532SNeel Natu } 2060091d4532SNeel Natu 2061091d4532SNeel Natu static int 206280cb5d84SJohn Baldwin nested_fault(struct vcpu *vcpu, uint64_t info1, uint64_t info2, 2063091d4532SNeel Natu uint64_t *retinfo) 2064091d4532SNeel Natu { 2065091d4532SNeel Natu enum exc_class exc1, exc2; 2066091d4532SNeel Natu int type1, vector1; 2067091d4532SNeel Natu 2068091d4532SNeel Natu KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); 2069091d4532SNeel Natu KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); 2070091d4532SNeel Natu 2071091d4532SNeel Natu /* 2072091d4532SNeel Natu * If an exception occurs while attempting to call the double-fault 2073091d4532SNeel Natu * handler the processor enters shutdown mode (aka triple fault). 2074091d4532SNeel Natu */ 2075091d4532SNeel Natu type1 = info1 & VM_INTINFO_TYPE; 2076091d4532SNeel Natu vector1 = info1 & 0xff; 2077091d4532SNeel Natu if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { 207880cb5d84SJohn Baldwin VMM_CTR2(vcpu, "triple fault: info1(%#lx), info2(%#lx)", 2079091d4532SNeel Natu info1, info2); 208080cb5d84SJohn Baldwin vm_suspend(vcpu->vm, VM_SUSPEND_TRIPLEFAULT); 2081091d4532SNeel Natu *retinfo = 0; 2082091d4532SNeel Natu return (0); 2083091d4532SNeel Natu } 2084091d4532SNeel Natu 2085091d4532SNeel Natu /* 2086091d4532SNeel Natu * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 2087091d4532SNeel Natu */ 2088091d4532SNeel Natu exc1 = exception_class(info1); 2089091d4532SNeel Natu exc2 = exception_class(info2); 2090091d4532SNeel Natu if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || 2091091d4532SNeel Natu (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { 2092091d4532SNeel Natu /* Convert nested fault into a double fault. */ 2093091d4532SNeel Natu *retinfo = IDT_DF; 2094091d4532SNeel Natu *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 2095091d4532SNeel Natu *retinfo |= VM_INTINFO_DEL_ERRCODE; 2096091d4532SNeel Natu } else { 2097091d4532SNeel Natu /* Handle exceptions serially */ 2098091d4532SNeel Natu *retinfo = info2; 2099091d4532SNeel Natu } 2100091d4532SNeel Natu return (1); 2101091d4532SNeel Natu } 2102091d4532SNeel Natu 2103091d4532SNeel Natu static uint64_t 2104091d4532SNeel Natu vcpu_exception_intinfo(struct vcpu *vcpu) 2105091d4532SNeel Natu { 2106091d4532SNeel Natu uint64_t info = 0; 2107091d4532SNeel Natu 2108091d4532SNeel Natu if (vcpu->exception_pending) { 2109c9c75df4SNeel Natu info = vcpu->exc_vector & 0xff; 2110091d4532SNeel Natu info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 2111c9c75df4SNeel Natu if (vcpu->exc_errcode_valid) { 2112091d4532SNeel Natu info |= VM_INTINFO_DEL_ERRCODE; 2113c9c75df4SNeel Natu info |= (uint64_t)vcpu->exc_errcode << 32; 2114091d4532SNeel Natu } 2115091d4532SNeel Natu } 2116091d4532SNeel Natu return (info); 2117091d4532SNeel Natu } 2118091d4532SNeel Natu 2119091d4532SNeel Natu int 212080cb5d84SJohn Baldwin vm_entry_intinfo(struct vcpu *vcpu, uint64_t *retinfo) 2121091d4532SNeel Natu { 2122091d4532SNeel Natu uint64_t info1, info2; 2123091d4532SNeel Natu int valid; 2124091d4532SNeel Natu 2125091d4532SNeel Natu info1 = vcpu->exitintinfo; 2126091d4532SNeel Natu vcpu->exitintinfo = 0; 2127091d4532SNeel Natu 2128091d4532SNeel Natu info2 = 0; 2129091d4532SNeel Natu if (vcpu->exception_pending) { 2130091d4532SNeel Natu info2 = vcpu_exception_intinfo(vcpu); 2131091d4532SNeel Natu vcpu->exception_pending = 0; 213280cb5d84SJohn Baldwin VMM_CTR2(vcpu, "Exception %d delivered: %#lx", 2133c9c75df4SNeel Natu vcpu->exc_vector, info2); 2134091d4532SNeel Natu } 2135091d4532SNeel Natu 2136091d4532SNeel Natu if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { 213780cb5d84SJohn Baldwin valid = nested_fault(vcpu, info1, info2, retinfo); 2138091d4532SNeel Natu } else if (info1 & VM_INTINFO_VALID) { 2139091d4532SNeel Natu *retinfo = info1; 2140091d4532SNeel Natu valid = 1; 2141091d4532SNeel Natu } else if (info2 & VM_INTINFO_VALID) { 2142091d4532SNeel Natu *retinfo = info2; 2143091d4532SNeel Natu valid = 1; 2144091d4532SNeel Natu } else { 2145091d4532SNeel Natu valid = 0; 2146091d4532SNeel Natu } 2147091d4532SNeel Natu 2148091d4532SNeel Natu if (valid) { 2149d3956e46SJohn Baldwin VMM_CTR4(vcpu, "%s: info1(%#lx), info2(%#lx), " 2150091d4532SNeel Natu "retinfo(%#lx)", __func__, info1, info2, *retinfo); 2151091d4532SNeel Natu } 2152091d4532SNeel Natu 2153091d4532SNeel Natu return (valid); 2154091d4532SNeel Natu } 2155091d4532SNeel Natu 2156091d4532SNeel Natu int 21573f0f4b15SJohn Baldwin vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2) 2158091d4532SNeel Natu { 2159091d4532SNeel Natu *info1 = vcpu->exitintinfo; 2160091d4532SNeel Natu *info2 = vcpu_exception_intinfo(vcpu); 2161091d4532SNeel Natu return (0); 2162091d4532SNeel Natu } 2163091d4532SNeel Natu 2164091d4532SNeel Natu int 2165d3956e46SJohn Baldwin vm_inject_exception(struct vcpu *vcpu, int vector, int errcode_valid, 2166c9c75df4SNeel Natu uint32_t errcode, int restart_instruction) 2167366f6083SPeter Grehan { 216847b9935dSNeel Natu uint64_t regval; 216973505a10SRobert Wing int error __diagused; 2170dc506506SNeel Natu 2171c9c75df4SNeel Natu if (vector < 0 || vector >= 32) 2172366f6083SPeter Grehan return (EINVAL); 2173366f6083SPeter Grehan 2174091d4532SNeel Natu /* 2175091d4532SNeel Natu * A double fault exception should never be injected directly into 2176091d4532SNeel Natu * the guest. It is a derived exception that results from specific 2177091d4532SNeel Natu * combinations of nested faults. 2178091d4532SNeel Natu */ 2179c9c75df4SNeel Natu if (vector == IDT_DF) 2180091d4532SNeel Natu return (EINVAL); 2181091d4532SNeel Natu 2182dc506506SNeel Natu if (vcpu->exception_pending) { 2183d3956e46SJohn Baldwin VMM_CTR2(vcpu, "Unable to inject exception %d due to " 2184c9c75df4SNeel Natu "pending exception %d", vector, vcpu->exc_vector); 2185dc506506SNeel Natu return (EBUSY); 2186dc506506SNeel Natu } 2187dc506506SNeel Natu 218847b9935dSNeel Natu if (errcode_valid) { 218947b9935dSNeel Natu /* 219047b9935dSNeel Natu * Exceptions don't deliver an error code in real mode. 219147b9935dSNeel Natu */ 2192d3956e46SJohn Baldwin error = vm_get_register(vcpu, VM_REG_GUEST_CR0, ®val); 219347b9935dSNeel Natu KASSERT(!error, ("%s: error %d getting CR0", __func__, error)); 219447b9935dSNeel Natu if (!(regval & CR0_PE)) 219547b9935dSNeel Natu errcode_valid = 0; 219647b9935dSNeel Natu } 219747b9935dSNeel Natu 21982ce12423SNeel Natu /* 21992ce12423SNeel Natu * From section 26.6.1 "Interruptibility State" in Intel SDM: 22002ce12423SNeel Natu * 22012ce12423SNeel Natu * Event blocking by "STI" or "MOV SS" is cleared after guest executes 22022ce12423SNeel Natu * one instruction or incurs an exception. 22032ce12423SNeel Natu */ 2204d3956e46SJohn Baldwin error = vm_set_register(vcpu, VM_REG_GUEST_INTR_SHADOW, 0); 22052ce12423SNeel Natu KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", 22062ce12423SNeel Natu __func__, error)); 22072ce12423SNeel Natu 2208c9c75df4SNeel Natu if (restart_instruction) 2209d3956e46SJohn Baldwin vm_restart_instruction(vcpu); 2210c9c75df4SNeel Natu 2211dc506506SNeel Natu vcpu->exception_pending = 1; 2212c9c75df4SNeel Natu vcpu->exc_vector = vector; 2213c9c75df4SNeel Natu vcpu->exc_errcode = errcode; 2214c9c75df4SNeel Natu vcpu->exc_errcode_valid = errcode_valid; 2215d3956e46SJohn Baldwin VMM_CTR1(vcpu, "Exception %d pending", vector); 2216dc506506SNeel Natu return (0); 2217dc506506SNeel Natu } 2218dc506506SNeel Natu 2219d37f2adbSNeel Natu void 2220d3956e46SJohn Baldwin vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid, int errcode) 2221dc506506SNeel Natu { 222273505a10SRobert Wing int error __diagused, restart_instruction; 2223dc506506SNeel Natu 2224c9c75df4SNeel Natu restart_instruction = 1; 2225d37f2adbSNeel Natu 2226d3956e46SJohn Baldwin error = vm_inject_exception(vcpu, vector, errcode_valid, 2227c9c75df4SNeel Natu errcode, restart_instruction); 2228dc506506SNeel Natu KASSERT(error == 0, ("vm_inject_exception error %d", error)); 2229dc506506SNeel Natu } 2230dc506506SNeel Natu 2231dc506506SNeel Natu void 2232d3956e46SJohn Baldwin vm_inject_pf(struct vcpu *vcpu, int error_code, uint64_t cr2) 2233fd949af6SNeel Natu { 223473505a10SRobert Wing int error __diagused; 223537a723a5SNeel Natu 2236d3956e46SJohn Baldwin VMM_CTR2(vcpu, "Injecting page fault: error_code %#x, cr2 %#lx", 223737a723a5SNeel Natu error_code, cr2); 223837a723a5SNeel Natu 2239d3956e46SJohn Baldwin error = vm_set_register(vcpu, VM_REG_GUEST_CR2, cr2); 224037a723a5SNeel Natu KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 2241fd949af6SNeel Natu 2242d3956e46SJohn Baldwin vm_inject_fault(vcpu, IDT_PF, 1, error_code); 2243366f6083SPeter Grehan } 2244366f6083SPeter Grehan 224561592433SNeel Natu static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 2246366f6083SPeter Grehan 2247f352ff0cSNeel Natu int 22483f0f4b15SJohn Baldwin vm_inject_nmi(struct vcpu *vcpu) 2249f352ff0cSNeel Natu { 2250f352ff0cSNeel Natu 2251f352ff0cSNeel Natu vcpu->nmi_pending = 1; 22523f0f4b15SJohn Baldwin vcpu_notify_event(vcpu, false); 2253f352ff0cSNeel Natu return (0); 2254f352ff0cSNeel Natu } 2255f352ff0cSNeel Natu 2256f352ff0cSNeel Natu int 225780cb5d84SJohn Baldwin vm_nmi_pending(struct vcpu *vcpu) 2258f352ff0cSNeel Natu { 2259f352ff0cSNeel Natu return (vcpu->nmi_pending); 2260f352ff0cSNeel Natu } 2261f352ff0cSNeel Natu 2262f352ff0cSNeel Natu void 226380cb5d84SJohn Baldwin vm_nmi_clear(struct vcpu *vcpu) 2264f352ff0cSNeel Natu { 2265f352ff0cSNeel Natu if (vcpu->nmi_pending == 0) 2266f352ff0cSNeel Natu panic("vm_nmi_clear: inconsistent nmi_pending state"); 2267f352ff0cSNeel Natu 2268f352ff0cSNeel Natu vcpu->nmi_pending = 0; 22693dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VCPU_NMI_COUNT, 1); 2270366f6083SPeter Grehan } 2271366f6083SPeter Grehan 22720775fbb4STycho Nightingale static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 22730775fbb4STycho Nightingale 22740775fbb4STycho Nightingale int 22753f0f4b15SJohn Baldwin vm_inject_extint(struct vcpu *vcpu) 22760775fbb4STycho Nightingale { 22770775fbb4STycho Nightingale 22780775fbb4STycho Nightingale vcpu->extint_pending = 1; 22793f0f4b15SJohn Baldwin vcpu_notify_event(vcpu, false); 22800775fbb4STycho Nightingale return (0); 22810775fbb4STycho Nightingale } 22820775fbb4STycho Nightingale 22830775fbb4STycho Nightingale int 228480cb5d84SJohn Baldwin vm_extint_pending(struct vcpu *vcpu) 22850775fbb4STycho Nightingale { 22860775fbb4STycho Nightingale return (vcpu->extint_pending); 22870775fbb4STycho Nightingale } 22880775fbb4STycho Nightingale 22890775fbb4STycho Nightingale void 229080cb5d84SJohn Baldwin vm_extint_clear(struct vcpu *vcpu) 22910775fbb4STycho Nightingale { 22920775fbb4STycho Nightingale if (vcpu->extint_pending == 0) 22930775fbb4STycho Nightingale panic("vm_extint_clear: inconsistent extint_pending state"); 22940775fbb4STycho Nightingale 22950775fbb4STycho Nightingale vcpu->extint_pending = 0; 22963dc3d32aSJohn Baldwin vmm_stat_incr(vcpu, VCPU_EXTINT_COUNT, 1); 22970775fbb4STycho Nightingale } 22980775fbb4STycho Nightingale 2299366f6083SPeter Grehan int 23003f0f4b15SJohn Baldwin vm_get_capability(struct vcpu *vcpu, int type, int *retval) 2301366f6083SPeter Grehan { 2302366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2303366f6083SPeter Grehan return (EINVAL); 2304366f6083SPeter Grehan 23053f0f4b15SJohn Baldwin return (vmmops_getcap(vcpu->cookie, type, retval)); 2306366f6083SPeter Grehan } 2307366f6083SPeter Grehan 2308366f6083SPeter Grehan int 23093f0f4b15SJohn Baldwin vm_set_capability(struct vcpu *vcpu, int type, int val) 2310366f6083SPeter Grehan { 2311366f6083SPeter Grehan if (type < 0 || type >= VM_CAP_MAX) 2312366f6083SPeter Grehan return (EINVAL); 2313366f6083SPeter Grehan 23143f0f4b15SJohn Baldwin return (vmmops_setcap(vcpu->cookie, type, val)); 2315366f6083SPeter Grehan } 2316366f6083SPeter Grehan 2317950af9ffSJohn Baldwin struct vm * 2318950af9ffSJohn Baldwin vcpu_vm(struct vcpu *vcpu) 2319950af9ffSJohn Baldwin { 2320950af9ffSJohn Baldwin return (vcpu->vm); 2321950af9ffSJohn Baldwin } 2322950af9ffSJohn Baldwin 2323950af9ffSJohn Baldwin int 2324950af9ffSJohn Baldwin vcpu_vcpuid(struct vcpu *vcpu) 2325950af9ffSJohn Baldwin { 2326950af9ffSJohn Baldwin return (vcpu->vcpuid); 2327950af9ffSJohn Baldwin } 2328950af9ffSJohn Baldwin 2329950af9ffSJohn Baldwin struct vcpu * 2330950af9ffSJohn Baldwin vm_vcpu(struct vm *vm, int vcpuid) 2331950af9ffSJohn Baldwin { 233298568a00SJohn Baldwin return (vm->vcpu[vcpuid]); 2333950af9ffSJohn Baldwin } 2334950af9ffSJohn Baldwin 2335366f6083SPeter Grehan struct vlapic * 2336d3956e46SJohn Baldwin vm_lapic(struct vcpu *vcpu) 2337366f6083SPeter Grehan { 2338d3956e46SJohn Baldwin return (vcpu->vlapic); 2339366f6083SPeter Grehan } 2340366f6083SPeter Grehan 2341565bbb86SNeel Natu struct vioapic * 2342565bbb86SNeel Natu vm_ioapic(struct vm *vm) 2343565bbb86SNeel Natu { 2344565bbb86SNeel Natu 2345565bbb86SNeel Natu return (vm->vioapic); 2346565bbb86SNeel Natu } 2347565bbb86SNeel Natu 234808e3ff32SNeel Natu struct vhpet * 234908e3ff32SNeel Natu vm_hpet(struct vm *vm) 235008e3ff32SNeel Natu { 235108e3ff32SNeel Natu 235208e3ff32SNeel Natu return (vm->vhpet); 235308e3ff32SNeel Natu } 235408e3ff32SNeel Natu 2355490d56c5SEd Maste bool 2356366f6083SPeter Grehan vmm_is_pptdev(int bus, int slot, int func) 2357366f6083SPeter Grehan { 2358490d56c5SEd Maste int b, f, i, n, s; 2359366f6083SPeter Grehan char *val, *cp, *cp2; 2360490d56c5SEd Maste bool found; 2361366f6083SPeter Grehan 2362366f6083SPeter Grehan /* 236307044a96SNeel Natu * XXX 236407044a96SNeel Natu * The length of an environment variable is limited to 128 bytes which 236507044a96SNeel Natu * puts an upper limit on the number of passthru devices that may be 236607044a96SNeel Natu * specified using a single environment variable. 236707044a96SNeel Natu * 236807044a96SNeel Natu * Work around this by scanning multiple environment variable 236907044a96SNeel Natu * names instead of a single one - yuck! 2370366f6083SPeter Grehan */ 237107044a96SNeel Natu const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 237207044a96SNeel Natu 237307044a96SNeel Natu /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 2374490d56c5SEd Maste found = false; 237507044a96SNeel Natu for (i = 0; names[i] != NULL && !found; i++) { 23762be111bfSDavide Italiano cp = val = kern_getenv(names[i]); 2377366f6083SPeter Grehan while (cp != NULL && *cp != '\0') { 2378366f6083SPeter Grehan if ((cp2 = strchr(cp, ' ')) != NULL) 2379366f6083SPeter Grehan *cp2 = '\0'; 2380366f6083SPeter Grehan 2381366f6083SPeter Grehan n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 2382366f6083SPeter Grehan if (n == 3 && bus == b && slot == s && func == f) { 2383490d56c5SEd Maste found = true; 2384366f6083SPeter Grehan break; 2385366f6083SPeter Grehan } 2386366f6083SPeter Grehan 2387366f6083SPeter Grehan if (cp2 != NULL) 2388366f6083SPeter Grehan *cp2++ = ' '; 2389366f6083SPeter Grehan 2390366f6083SPeter Grehan cp = cp2; 2391366f6083SPeter Grehan } 2392366f6083SPeter Grehan freeenv(val); 239307044a96SNeel Natu } 2394366f6083SPeter Grehan return (found); 2395366f6083SPeter Grehan } 2396366f6083SPeter Grehan 2397366f6083SPeter Grehan void * 2398366f6083SPeter Grehan vm_iommu_domain(struct vm *vm) 2399366f6083SPeter Grehan { 2400366f6083SPeter Grehan 2401366f6083SPeter Grehan return (vm->iommu); 2402366f6083SPeter Grehan } 2403366f6083SPeter Grehan 240475dd3366SNeel Natu int 24053f0f4b15SJohn Baldwin vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 2406366f6083SPeter Grehan { 240775dd3366SNeel Natu int error; 2408366f6083SPeter Grehan 240975dd3366SNeel Natu vcpu_lock(vcpu); 24103f0f4b15SJohn Baldwin error = vcpu_set_state_locked(vcpu, newstate, from_idle); 241175dd3366SNeel Natu vcpu_unlock(vcpu); 241275dd3366SNeel Natu 241375dd3366SNeel Natu return (error); 241475dd3366SNeel Natu } 241575dd3366SNeel Natu 241675dd3366SNeel Natu enum vcpu_state 2417d3956e46SJohn Baldwin vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 2418366f6083SPeter Grehan { 241975dd3366SNeel Natu enum vcpu_state state; 2420366f6083SPeter Grehan 242175dd3366SNeel Natu vcpu_lock(vcpu); 242275dd3366SNeel Natu state = vcpu->state; 2423d3c11f40SPeter Grehan if (hostcpu != NULL) 2424d3c11f40SPeter Grehan *hostcpu = vcpu->hostcpu; 242575dd3366SNeel Natu vcpu_unlock(vcpu); 2426366f6083SPeter Grehan 242775dd3366SNeel Natu return (state); 2428366f6083SPeter Grehan } 2429366f6083SPeter Grehan 243095ebc360SNeel Natu int 24313f0f4b15SJohn Baldwin vm_activate_cpu(struct vcpu *vcpu) 2432366f6083SPeter Grehan { 24333f0f4b15SJohn Baldwin struct vm *vm = vcpu->vm; 2434366f6083SPeter Grehan 24353f0f4b15SJohn Baldwin if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 243695ebc360SNeel Natu return (EBUSY); 243722d822c6SNeel Natu 24383f0f4b15SJohn Baldwin VMM_CTR0(vcpu, "activated"); 24393f0f4b15SJohn Baldwin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 244095ebc360SNeel Natu return (0); 2441366f6083SPeter Grehan } 2442366f6083SPeter Grehan 2443fc276d92SJohn Baldwin int 24443f0f4b15SJohn Baldwin vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 2445fc276d92SJohn Baldwin { 24463f0f4b15SJohn Baldwin if (vcpu == NULL) { 2447fc276d92SJohn Baldwin vm->debug_cpus = vm->active_cpus; 24483f0f4b15SJohn Baldwin for (int i = 0; i < vm->maxcpus; i++) { 2449fc276d92SJohn Baldwin if (CPU_ISSET(i, &vm->active_cpus)) 24503f0f4b15SJohn Baldwin vcpu_notify_event(vm_vcpu(vm, i), false); 2451fc276d92SJohn Baldwin } 2452fc276d92SJohn Baldwin } else { 24533f0f4b15SJohn Baldwin if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 2454fc276d92SJohn Baldwin return (EINVAL); 2455fc276d92SJohn Baldwin 24563f0f4b15SJohn Baldwin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 24573f0f4b15SJohn Baldwin vcpu_notify_event(vcpu, false); 2458fc276d92SJohn Baldwin } 2459fc276d92SJohn Baldwin return (0); 2460fc276d92SJohn Baldwin } 2461fc276d92SJohn Baldwin 2462fc276d92SJohn Baldwin int 24633f0f4b15SJohn Baldwin vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 2464fc276d92SJohn Baldwin { 2465fc276d92SJohn Baldwin 24663f0f4b15SJohn Baldwin if (vcpu == NULL) { 2467fc276d92SJohn Baldwin CPU_ZERO(&vm->debug_cpus); 2468fc276d92SJohn Baldwin } else { 24693f0f4b15SJohn Baldwin if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 2470fc276d92SJohn Baldwin return (EINVAL); 2471fc276d92SJohn Baldwin 24723f0f4b15SJohn Baldwin CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 2473fc276d92SJohn Baldwin } 2474fc276d92SJohn Baldwin return (0); 2475fc276d92SJohn Baldwin } 2476fc276d92SJohn Baldwin 2477fc276d92SJohn Baldwin int 247880cb5d84SJohn Baldwin vcpu_debugged(struct vcpu *vcpu) 2479fc276d92SJohn Baldwin { 2480fc276d92SJohn Baldwin 248180cb5d84SJohn Baldwin return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 2482fc276d92SJohn Baldwin } 2483fc276d92SJohn Baldwin 2484a5615c90SPeter Grehan cpuset_t 2485366f6083SPeter Grehan vm_active_cpus(struct vm *vm) 2486366f6083SPeter Grehan { 2487366f6083SPeter Grehan 2488366f6083SPeter Grehan return (vm->active_cpus); 2489366f6083SPeter Grehan } 2490366f6083SPeter Grehan 249195ebc360SNeel Natu cpuset_t 2492fc276d92SJohn Baldwin vm_debug_cpus(struct vm *vm) 2493fc276d92SJohn Baldwin { 2494fc276d92SJohn Baldwin 2495fc276d92SJohn Baldwin return (vm->debug_cpus); 2496fc276d92SJohn Baldwin } 2497fc276d92SJohn Baldwin 2498fc276d92SJohn Baldwin cpuset_t 249995ebc360SNeel Natu vm_suspended_cpus(struct vm *vm) 250095ebc360SNeel Natu { 250195ebc360SNeel Natu 250295ebc360SNeel Natu return (vm->suspended_cpus); 250395ebc360SNeel Natu } 250495ebc360SNeel Natu 2505c0f35dbfSJohn Baldwin /* 2506c0f35dbfSJohn Baldwin * Returns the subset of vCPUs in tostart that are awaiting startup. 2507c0f35dbfSJohn Baldwin * These vCPUs are also marked as no longer awaiting startup. 2508c0f35dbfSJohn Baldwin */ 2509c0f35dbfSJohn Baldwin cpuset_t 2510c0f35dbfSJohn Baldwin vm_start_cpus(struct vm *vm, const cpuset_t *tostart) 2511c0f35dbfSJohn Baldwin { 2512c0f35dbfSJohn Baldwin cpuset_t set; 2513c0f35dbfSJohn Baldwin 2514c0f35dbfSJohn Baldwin mtx_lock(&vm->rendezvous_mtx); 2515c0f35dbfSJohn Baldwin CPU_AND(&set, &vm->startup_cpus, tostart); 2516c0f35dbfSJohn Baldwin CPU_ANDNOT(&vm->startup_cpus, &vm->startup_cpus, &set); 2517c0f35dbfSJohn Baldwin mtx_unlock(&vm->rendezvous_mtx); 2518c0f35dbfSJohn Baldwin return (set); 2519c0f35dbfSJohn Baldwin } 2520c0f35dbfSJohn Baldwin 2521c0f35dbfSJohn Baldwin void 2522c0f35dbfSJohn Baldwin vm_await_start(struct vm *vm, const cpuset_t *waiting) 2523c0f35dbfSJohn Baldwin { 2524c0f35dbfSJohn Baldwin mtx_lock(&vm->rendezvous_mtx); 2525c0f35dbfSJohn Baldwin CPU_OR(&vm->startup_cpus, &vm->startup_cpus, waiting); 2526c0f35dbfSJohn Baldwin mtx_unlock(&vm->rendezvous_mtx); 2527c0f35dbfSJohn Baldwin } 2528c0f35dbfSJohn Baldwin 2529366f6083SPeter Grehan void * 25303dc3d32aSJohn Baldwin vcpu_stats(struct vcpu *vcpu) 2531366f6083SPeter Grehan { 2532366f6083SPeter Grehan 25333dc3d32aSJohn Baldwin return (vcpu->stats); 2534366f6083SPeter Grehan } 2535e9027382SNeel Natu 2536e9027382SNeel Natu int 25373f0f4b15SJohn Baldwin vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state) 2538e9027382SNeel Natu { 25393f0f4b15SJohn Baldwin *state = vcpu->x2apic_state; 2540e9027382SNeel Natu 2541e9027382SNeel Natu return (0); 2542e9027382SNeel Natu } 2543e9027382SNeel Natu 2544e9027382SNeel Natu int 25453f0f4b15SJohn Baldwin vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 2546e9027382SNeel Natu { 25473f23d3caSNeel Natu if (state >= X2APIC_STATE_LAST) 2548e9027382SNeel Natu return (EINVAL); 2549e9027382SNeel Natu 2550d3956e46SJohn Baldwin vcpu->x2apic_state = state; 2551e9027382SNeel Natu 2552d3956e46SJohn Baldwin vlapic_set_x2apic_state(vcpu, state); 255373820fb0SNeel Natu 2554e9027382SNeel Natu return (0); 2555e9027382SNeel Natu } 255675dd3366SNeel Natu 255722821874SNeel Natu /* 255822821874SNeel Natu * This function is called to ensure that a vcpu "sees" a pending event 255922821874SNeel Natu * as soon as possible: 256022821874SNeel Natu * - If the vcpu thread is sleeping then it is woken up. 256122821874SNeel Natu * - If the vcpu is running on a different host_cpu then an IPI will be directed 256222821874SNeel Natu * to the host_cpu to cause the vcpu to trap into the hypervisor. 256322821874SNeel Natu */ 2564248e6799SNeel Natu static void 2565248e6799SNeel Natu vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) 256675dd3366SNeel Natu { 256775dd3366SNeel Natu int hostcpu; 256875dd3366SNeel Natu 256975dd3366SNeel Natu hostcpu = vcpu->hostcpu; 2570ef39d7e9SNeel Natu if (vcpu->state == VCPU_RUNNING) { 2571ef39d7e9SNeel Natu KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 2572de5ea6b6SNeel Natu if (hostcpu != curcpu) { 2573ef39d7e9SNeel Natu if (lapic_intr) { 2574add611fdSNeel Natu vlapic_post_intr(vcpu->vlapic, hostcpu, 2575add611fdSNeel Natu vmm_ipinum); 2576ef39d7e9SNeel Natu } else { 257775dd3366SNeel Natu ipi_cpu(hostcpu, vmm_ipinum); 257875dd3366SNeel Natu } 2579ef39d7e9SNeel Natu } else { 2580ef39d7e9SNeel Natu /* 2581ef39d7e9SNeel Natu * If the 'vcpu' is running on 'curcpu' then it must 2582ef39d7e9SNeel Natu * be sending a notification to itself (e.g. SELF_IPI). 2583ef39d7e9SNeel Natu * The pending event will be picked up when the vcpu 2584ef39d7e9SNeel Natu * transitions back to guest context. 2585ef39d7e9SNeel Natu */ 2586ef39d7e9SNeel Natu } 2587ef39d7e9SNeel Natu } else { 2588ef39d7e9SNeel Natu KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 2589ef39d7e9SNeel Natu "with hostcpu %d", vcpu->state, hostcpu)); 2590366f6083SPeter Grehan if (vcpu->state == VCPU_SLEEPING) 2591366f6083SPeter Grehan wakeup_one(vcpu); 2592366f6083SPeter Grehan } 2593248e6799SNeel Natu } 2594248e6799SNeel Natu 2595248e6799SNeel Natu void 25963f0f4b15SJohn Baldwin vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr) 2597248e6799SNeel Natu { 2598248e6799SNeel Natu vcpu_lock(vcpu); 2599248e6799SNeel Natu vcpu_notify_event_locked(vcpu, lapic_intr); 2600f76fc5d4SNeel Natu vcpu_unlock(vcpu); 2601f76fc5d4SNeel Natu } 2602318224bbSNeel Natu 2603318224bbSNeel Natu struct vmspace * 2604318224bbSNeel Natu vm_get_vmspace(struct vm *vm) 2605318224bbSNeel Natu { 2606318224bbSNeel Natu 2607318224bbSNeel Natu return (vm->vmspace); 2608318224bbSNeel Natu } 2609565bbb86SNeel Natu 2610565bbb86SNeel Natu int 2611565bbb86SNeel Natu vm_apicid2vcpuid(struct vm *vm, int apicid) 2612565bbb86SNeel Natu { 2613565bbb86SNeel Natu /* 2614565bbb86SNeel Natu * XXX apic id is assumed to be numerically identical to vcpu id 2615565bbb86SNeel Natu */ 2616565bbb86SNeel Natu return (apicid); 2617565bbb86SNeel Natu } 26185b8a8cd1SNeel Natu 2619b837daddSKonstantin Belousov int 2620d8be3d52SJohn Baldwin vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest, 26215b8a8cd1SNeel Natu vm_rendezvous_func_t func, void *arg) 26225b8a8cd1SNeel Natu { 2623d8be3d52SJohn Baldwin struct vm *vm = vcpu->vm; 2624b837daddSKonstantin Belousov int error, i; 2625970955e4SNeel Natu 26265b8a8cd1SNeel Natu /* 26275b8a8cd1SNeel Natu * Enforce that this function is called without any locks 26285b8a8cd1SNeel Natu */ 26295b8a8cd1SNeel Natu WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 26305b8a8cd1SNeel Natu 26315b8a8cd1SNeel Natu restart: 26325b8a8cd1SNeel Natu mtx_lock(&vm->rendezvous_mtx); 26335b8a8cd1SNeel Natu if (vm->rendezvous_func != NULL) { 26345b8a8cd1SNeel Natu /* 26355b8a8cd1SNeel Natu * If a rendezvous is already in progress then we need to 26363f0f4b15SJohn Baldwin * call the rendezvous handler in case this 'vcpu' is one 26375b8a8cd1SNeel Natu * of the targets of the rendezvous. 26385b8a8cd1SNeel Natu */ 2639d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Rendezvous already in progress"); 26405b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 2641d8be3d52SJohn Baldwin error = vm_handle_rendezvous(vcpu); 2642b837daddSKonstantin Belousov if (error != 0) 2643b837daddSKonstantin Belousov return (error); 26445b8a8cd1SNeel Natu goto restart; 26455b8a8cd1SNeel Natu } 26465b8a8cd1SNeel Natu KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 26475b8a8cd1SNeel Natu "rendezvous is still in progress")); 26485b8a8cd1SNeel Natu 2649d8be3d52SJohn Baldwin VMM_CTR0(vcpu, "Initiating rendezvous"); 26505b8a8cd1SNeel Natu vm->rendezvous_req_cpus = dest; 26515b8a8cd1SNeel Natu CPU_ZERO(&vm->rendezvous_done_cpus); 26525b8a8cd1SNeel Natu vm->rendezvous_arg = arg; 2653869dbab7SAndriy Gapon vm->rendezvous_func = func; 26545b8a8cd1SNeel Natu mtx_unlock(&vm->rendezvous_mtx); 26555b8a8cd1SNeel Natu 2656970955e4SNeel Natu /* 2657970955e4SNeel Natu * Wake up any sleeping vcpus and trigger a VM-exit in any running 2658970955e4SNeel Natu * vcpus so they handle the rendezvous as soon as possible. 2659970955e4SNeel Natu */ 2660a488c9c9SRodney W. Grimes for (i = 0; i < vm->maxcpus; i++) { 2661970955e4SNeel Natu if (CPU_ISSET(i, &dest)) 26623f0f4b15SJohn Baldwin vcpu_notify_event(vm_vcpu(vm, i), false); 2663970955e4SNeel Natu } 2664970955e4SNeel Natu 2665d8be3d52SJohn Baldwin return (vm_handle_rendezvous(vcpu)); 26665b8a8cd1SNeel Natu } 2667762fd208STycho Nightingale 2668762fd208STycho Nightingale struct vatpic * 2669762fd208STycho Nightingale vm_atpic(struct vm *vm) 2670762fd208STycho Nightingale { 2671762fd208STycho Nightingale return (vm->vatpic); 2672762fd208STycho Nightingale } 2673e883c9bbSTycho Nightingale 2674e883c9bbSTycho Nightingale struct vatpit * 2675e883c9bbSTycho Nightingale vm_atpit(struct vm *vm) 2676e883c9bbSTycho Nightingale { 2677e883c9bbSTycho Nightingale return (vm->vatpit); 2678e883c9bbSTycho Nightingale } 2679d17b5104SNeel Natu 2680160ef77aSNeel Natu struct vpmtmr * 2681160ef77aSNeel Natu vm_pmtmr(struct vm *vm) 2682160ef77aSNeel Natu { 2683160ef77aSNeel Natu 2684160ef77aSNeel Natu return (vm->vpmtmr); 2685160ef77aSNeel Natu } 2686160ef77aSNeel Natu 26870dafa5cdSNeel Natu struct vrtc * 26880dafa5cdSNeel Natu vm_rtc(struct vm *vm) 26890dafa5cdSNeel Natu { 26900dafa5cdSNeel Natu 26910dafa5cdSNeel Natu return (vm->vrtc); 26920dafa5cdSNeel Natu } 26930dafa5cdSNeel Natu 2694d17b5104SNeel Natu enum vm_reg_name 2695d17b5104SNeel Natu vm_segment_name(int seg) 2696d17b5104SNeel Natu { 2697d17b5104SNeel Natu static enum vm_reg_name seg_names[] = { 2698d17b5104SNeel Natu VM_REG_GUEST_ES, 2699d17b5104SNeel Natu VM_REG_GUEST_CS, 2700d17b5104SNeel Natu VM_REG_GUEST_SS, 2701d17b5104SNeel Natu VM_REG_GUEST_DS, 2702d17b5104SNeel Natu VM_REG_GUEST_FS, 2703d17b5104SNeel Natu VM_REG_GUEST_GS 2704d17b5104SNeel Natu }; 2705d17b5104SNeel Natu 2706d17b5104SNeel Natu KASSERT(seg >= 0 && seg < nitems(seg_names), 2707d17b5104SNeel Natu ("%s: invalid segment encoding %d", __func__, seg)); 2708d17b5104SNeel Natu return (seg_names[seg]); 2709d17b5104SNeel Natu } 2710cf1d80d8SPeter Grehan 2711d665d229SNeel Natu void 27122b4fe856SJohn Baldwin vm_copy_teardown(struct vm_copyinfo *copyinfo, int num_copyinfo) 2713d665d229SNeel Natu { 2714d665d229SNeel Natu int idx; 2715d665d229SNeel Natu 2716d665d229SNeel Natu for (idx = 0; idx < num_copyinfo; idx++) { 2717d665d229SNeel Natu if (copyinfo[idx].cookie != NULL) 2718d665d229SNeel Natu vm_gpa_release(copyinfo[idx].cookie); 2719d665d229SNeel Natu } 2720d665d229SNeel Natu bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); 2721d665d229SNeel Natu } 2722d665d229SNeel Natu 2723d665d229SNeel Natu int 2724d3956e46SJohn Baldwin vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging, 2725d665d229SNeel Natu uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 27269c4d5478SNeel Natu int num_copyinfo, int *fault) 2727d665d229SNeel Natu { 2728d665d229SNeel Natu int error, idx, nused; 2729d665d229SNeel Natu size_t n, off, remaining; 2730d665d229SNeel Natu void *hva, *cookie; 2731d665d229SNeel Natu uint64_t gpa; 2732d665d229SNeel Natu 2733d665d229SNeel Natu bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); 2734d665d229SNeel Natu 2735d665d229SNeel Natu nused = 0; 2736d665d229SNeel Natu remaining = len; 2737d665d229SNeel Natu while (remaining > 0) { 2738d665d229SNeel Natu KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); 2739d3956e46SJohn Baldwin error = vm_gla2gpa(vcpu, paging, gla, prot, &gpa, fault); 27409c4d5478SNeel Natu if (error || *fault) 2741d665d229SNeel Natu return (error); 2742d665d229SNeel Natu off = gpa & PAGE_MASK; 2743d665d229SNeel Natu n = min(remaining, PAGE_SIZE - off); 2744d665d229SNeel Natu copyinfo[nused].gpa = gpa; 2745d665d229SNeel Natu copyinfo[nused].len = n; 2746d665d229SNeel Natu remaining -= n; 2747d665d229SNeel Natu gla += n; 2748d665d229SNeel Natu nused++; 2749d665d229SNeel Natu } 2750d665d229SNeel Natu 2751d665d229SNeel Natu for (idx = 0; idx < nused; idx++) { 2752d3956e46SJohn Baldwin hva = vm_gpa_hold(vcpu, copyinfo[idx].gpa, 27539b1aa8d6SNeel Natu copyinfo[idx].len, prot, &cookie); 2754d665d229SNeel Natu if (hva == NULL) 2755d665d229SNeel Natu break; 2756d665d229SNeel Natu copyinfo[idx].hva = hva; 2757d665d229SNeel Natu copyinfo[idx].cookie = cookie; 2758d665d229SNeel Natu } 2759d665d229SNeel Natu 2760d665d229SNeel Natu if (idx != nused) { 27612b4fe856SJohn Baldwin vm_copy_teardown(copyinfo, num_copyinfo); 27629c4d5478SNeel Natu return (EFAULT); 2763d665d229SNeel Natu } else { 27649c4d5478SNeel Natu *fault = 0; 2765d665d229SNeel Natu return (0); 2766d665d229SNeel Natu } 2767d665d229SNeel Natu } 2768d665d229SNeel Natu 2769d665d229SNeel Natu void 27702b4fe856SJohn Baldwin vm_copyin(struct vm_copyinfo *copyinfo, void *kaddr, size_t len) 2771d665d229SNeel Natu { 2772d665d229SNeel Natu char *dst; 2773d665d229SNeel Natu int idx; 2774d665d229SNeel Natu 2775d665d229SNeel Natu dst = kaddr; 2776d665d229SNeel Natu idx = 0; 2777d665d229SNeel Natu while (len > 0) { 2778d665d229SNeel Natu bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); 2779d665d229SNeel Natu len -= copyinfo[idx].len; 2780d665d229SNeel Natu dst += copyinfo[idx].len; 2781d665d229SNeel Natu idx++; 2782d665d229SNeel Natu } 2783d665d229SNeel Natu } 2784d665d229SNeel Natu 2785d665d229SNeel Natu void 27862b4fe856SJohn Baldwin vm_copyout(const void *kaddr, struct vm_copyinfo *copyinfo, size_t len) 2787d665d229SNeel Natu { 2788d665d229SNeel Natu const char *src; 2789d665d229SNeel Natu int idx; 2790d665d229SNeel Natu 2791d665d229SNeel Natu src = kaddr; 2792d665d229SNeel Natu idx = 0; 2793d665d229SNeel Natu while (len > 0) { 2794d665d229SNeel Natu bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); 2795d665d229SNeel Natu len -= copyinfo[idx].len; 2796d665d229SNeel Natu src += copyinfo[idx].len; 2797d665d229SNeel Natu idx++; 2798d665d229SNeel Natu } 2799d665d229SNeel Natu } 2800cf1d80d8SPeter Grehan 2801cf1d80d8SPeter Grehan /* 2802cf1d80d8SPeter Grehan * Return the amount of in-use and wired memory for the VM. Since 2803cf1d80d8SPeter Grehan * these are global stats, only return the values with for vCPU 0 2804cf1d80d8SPeter Grehan */ 2805cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_RESIDENT); 2806cf1d80d8SPeter Grehan VMM_STAT_DECLARE(VMM_MEM_WIRED); 2807cf1d80d8SPeter Grehan 2808cf1d80d8SPeter Grehan static void 28093f0f4b15SJohn Baldwin vm_get_rescnt(struct vcpu *vcpu, struct vmm_stat_type *stat) 2810cf1d80d8SPeter Grehan { 2811cf1d80d8SPeter Grehan 28123f0f4b15SJohn Baldwin if (vcpu->vcpuid == 0) { 28133f0f4b15SJohn Baldwin vmm_stat_set(vcpu, VMM_MEM_RESIDENT, PAGE_SIZE * 28143f0f4b15SJohn Baldwin vmspace_resident_count(vcpu->vm->vmspace)); 2815cf1d80d8SPeter Grehan } 2816cf1d80d8SPeter Grehan } 2817cf1d80d8SPeter Grehan 2818cf1d80d8SPeter Grehan static void 28193f0f4b15SJohn Baldwin vm_get_wiredcnt(struct vcpu *vcpu, struct vmm_stat_type *stat) 2820cf1d80d8SPeter Grehan { 2821cf1d80d8SPeter Grehan 28223f0f4b15SJohn Baldwin if (vcpu->vcpuid == 0) { 28233f0f4b15SJohn Baldwin vmm_stat_set(vcpu, VMM_MEM_WIRED, PAGE_SIZE * 28243f0f4b15SJohn Baldwin pmap_wired_count(vmspace_pmap(vcpu->vm->vmspace))); 2825cf1d80d8SPeter Grehan } 2826cf1d80d8SPeter Grehan } 2827cf1d80d8SPeter Grehan 2828cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); 2829cf1d80d8SPeter Grehan VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); 2830483d953aSJohn Baldwin 2831483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 2832483d953aSJohn Baldwin static int 2833483d953aSJohn Baldwin vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta) 2834483d953aSJohn Baldwin { 2835a7db532eSJohn Baldwin uint64_t tsc, now; 2836483d953aSJohn Baldwin int ret; 2837483d953aSJohn Baldwin struct vcpu *vcpu; 283835abc6c2SJohn Baldwin uint16_t i, maxcpus; 2839483d953aSJohn Baldwin 2840a7db532eSJohn Baldwin now = rdtsc(); 284135abc6c2SJohn Baldwin maxcpus = vm_get_maxcpus(vm); 284235abc6c2SJohn Baldwin for (i = 0; i < maxcpus; i++) { 284398568a00SJohn Baldwin vcpu = vm->vcpu[i]; 284498568a00SJohn Baldwin if (vcpu == NULL) 284598568a00SJohn Baldwin continue; 2846483d953aSJohn Baldwin 2847483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done); 2848483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done); 2849483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_vector, meta, ret, done); 2850483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode_valid, meta, ret, done); 2851483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode, meta, ret, done); 2852483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done); 2853483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done); 2854483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done); 2855a7db532eSJohn Baldwin 2856a7db532eSJohn Baldwin /* 2857a7db532eSJohn Baldwin * Save the absolute TSC value by adding now to tsc_offset. 2858483d953aSJohn Baldwin * 2859483d953aSJohn Baldwin * It will be turned turned back into an actual offset when the 2860483d953aSJohn Baldwin * TSC restore function is called 2861483d953aSJohn Baldwin */ 2862a7db532eSJohn Baldwin tsc = now + vcpu->tsc_offset; 2863a7db532eSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(tsc, meta, ret, done); 2864483d953aSJohn Baldwin } 2865483d953aSJohn Baldwin 2866483d953aSJohn Baldwin done: 2867483d953aSJohn Baldwin return (ret); 2868483d953aSJohn Baldwin } 2869483d953aSJohn Baldwin 2870483d953aSJohn Baldwin static int 2871483d953aSJohn Baldwin vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta) 2872483d953aSJohn Baldwin { 2873483d953aSJohn Baldwin int ret; 2874483d953aSJohn Baldwin 2875483d953aSJohn Baldwin ret = vm_snapshot_vcpus(vm, meta); 2876a7db532eSJohn Baldwin if (ret != 0) 2877483d953aSJohn Baldwin goto done; 2878483d953aSJohn Baldwin 2879c0f35dbfSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(vm->startup_cpus, meta, ret, done); 2880483d953aSJohn Baldwin done: 2881483d953aSJohn Baldwin return (ret); 2882483d953aSJohn Baldwin } 2883483d953aSJohn Baldwin 2884483d953aSJohn Baldwin static int 28851aa51504SJohn Baldwin vm_snapshot_vcpu(struct vm *vm, struct vm_snapshot_meta *meta) 2886483d953aSJohn Baldwin { 288735abc6c2SJohn Baldwin int error; 28881aa51504SJohn Baldwin struct vcpu *vcpu; 288935abc6c2SJohn Baldwin uint16_t i, maxcpus; 2890483d953aSJohn Baldwin 2891483d953aSJohn Baldwin error = 0; 2892483d953aSJohn Baldwin 289335abc6c2SJohn Baldwin maxcpus = vm_get_maxcpus(vm); 289435abc6c2SJohn Baldwin for (i = 0; i < maxcpus; i++) { 289598568a00SJohn Baldwin vcpu = vm->vcpu[i]; 289698568a00SJohn Baldwin if (vcpu == NULL) 289798568a00SJohn Baldwin continue; 28981aa51504SJohn Baldwin 2899869c8d19SJohn Baldwin error = vmmops_vcpu_snapshot(vcpu->cookie, meta); 2900483d953aSJohn Baldwin if (error != 0) { 2901483d953aSJohn Baldwin printf("%s: failed to snapshot vmcs/vmcb data for " 2902483d953aSJohn Baldwin "vCPU: %d; error: %d\n", __func__, i, error); 2903483d953aSJohn Baldwin goto done; 2904483d953aSJohn Baldwin } 2905483d953aSJohn Baldwin } 2906483d953aSJohn Baldwin 2907483d953aSJohn Baldwin done: 2908483d953aSJohn Baldwin return (error); 2909483d953aSJohn Baldwin } 2910483d953aSJohn Baldwin 2911483d953aSJohn Baldwin /* 2912483d953aSJohn Baldwin * Save kernel-side structures to user-space for snapshotting. 2913483d953aSJohn Baldwin */ 2914483d953aSJohn Baldwin int 2915483d953aSJohn Baldwin vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta) 2916483d953aSJohn Baldwin { 2917483d953aSJohn Baldwin int ret = 0; 2918483d953aSJohn Baldwin 2919483d953aSJohn Baldwin switch (meta->dev_req) { 2920483d953aSJohn Baldwin case STRUCT_VMX: 292115add60dSPeter Grehan ret = vmmops_snapshot(vm->cookie, meta); 2922483d953aSJohn Baldwin break; 2923483d953aSJohn Baldwin case STRUCT_VMCX: 29241aa51504SJohn Baldwin ret = vm_snapshot_vcpu(vm, meta); 2925483d953aSJohn Baldwin break; 2926483d953aSJohn Baldwin case STRUCT_VM: 2927483d953aSJohn Baldwin ret = vm_snapshot_vm(vm, meta); 2928483d953aSJohn Baldwin break; 2929483d953aSJohn Baldwin case STRUCT_VIOAPIC: 2930483d953aSJohn Baldwin ret = vioapic_snapshot(vm_ioapic(vm), meta); 2931483d953aSJohn Baldwin break; 2932483d953aSJohn Baldwin case STRUCT_VLAPIC: 2933483d953aSJohn Baldwin ret = vlapic_snapshot(vm, meta); 2934483d953aSJohn Baldwin break; 2935483d953aSJohn Baldwin case STRUCT_VHPET: 2936483d953aSJohn Baldwin ret = vhpet_snapshot(vm_hpet(vm), meta); 2937483d953aSJohn Baldwin break; 2938483d953aSJohn Baldwin case STRUCT_VATPIC: 2939483d953aSJohn Baldwin ret = vatpic_snapshot(vm_atpic(vm), meta); 2940483d953aSJohn Baldwin break; 2941483d953aSJohn Baldwin case STRUCT_VATPIT: 2942483d953aSJohn Baldwin ret = vatpit_snapshot(vm_atpit(vm), meta); 2943483d953aSJohn Baldwin break; 2944483d953aSJohn Baldwin case STRUCT_VPMTMR: 2945483d953aSJohn Baldwin ret = vpmtmr_snapshot(vm_pmtmr(vm), meta); 2946483d953aSJohn Baldwin break; 2947483d953aSJohn Baldwin case STRUCT_VRTC: 2948483d953aSJohn Baldwin ret = vrtc_snapshot(vm_rtc(vm), meta); 2949483d953aSJohn Baldwin break; 2950483d953aSJohn Baldwin default: 2951483d953aSJohn Baldwin printf("%s: failed to find the requested type %#x\n", 2952483d953aSJohn Baldwin __func__, meta->dev_req); 2953483d953aSJohn Baldwin ret = (EINVAL); 2954483d953aSJohn Baldwin } 2955483d953aSJohn Baldwin return (ret); 2956483d953aSJohn Baldwin } 2957483d953aSJohn Baldwin 295880cb5d84SJohn Baldwin void 295980cb5d84SJohn Baldwin vm_set_tsc_offset(struct vcpu *vcpu, uint64_t offset) 2960483d953aSJohn Baldwin { 2961483d953aSJohn Baldwin vcpu->tsc_offset = offset; 2962483d953aSJohn Baldwin } 2963483d953aSJohn Baldwin 2964483d953aSJohn Baldwin int 2965483d953aSJohn Baldwin vm_restore_time(struct vm *vm) 2966483d953aSJohn Baldwin { 296735abc6c2SJohn Baldwin int error; 2968483d953aSJohn Baldwin uint64_t now; 2969483d953aSJohn Baldwin struct vcpu *vcpu; 297035abc6c2SJohn Baldwin uint16_t i, maxcpus; 2971483d953aSJohn Baldwin 2972483d953aSJohn Baldwin now = rdtsc(); 2973483d953aSJohn Baldwin 2974483d953aSJohn Baldwin error = vhpet_restore_time(vm_hpet(vm)); 2975483d953aSJohn Baldwin if (error) 2976483d953aSJohn Baldwin return (error); 2977483d953aSJohn Baldwin 297835abc6c2SJohn Baldwin maxcpus = vm_get_maxcpus(vm); 297935abc6c2SJohn Baldwin for (i = 0; i < maxcpus; i++) { 298098568a00SJohn Baldwin vcpu = vm->vcpu[i]; 298198568a00SJohn Baldwin if (vcpu == NULL) 298298568a00SJohn Baldwin continue; 2983483d953aSJohn Baldwin 2984869c8d19SJohn Baldwin error = vmmops_restore_tsc(vcpu->cookie, 29851aa51504SJohn Baldwin vcpu->tsc_offset - now); 2986483d953aSJohn Baldwin if (error) 2987483d953aSJohn Baldwin return (error); 2988483d953aSJohn Baldwin } 2989483d953aSJohn Baldwin 2990483d953aSJohn Baldwin return (0); 2991483d953aSJohn Baldwin } 2992483d953aSJohn Baldwin #endif 2993