1d3916eacSRuslan Bukin /*- 2d3916eacSRuslan Bukin * SPDX-License-Identifier: BSD-2-Clause 3d3916eacSRuslan Bukin * 4d3916eacSRuslan Bukin * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5d3916eacSRuslan Bukin * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com> 6d3916eacSRuslan Bukin * 7d3916eacSRuslan Bukin * This software was developed by the University of Cambridge Computer 8d3916eacSRuslan Bukin * Laboratory (Department of Computer Science and Technology) under Innovate 9d3916eacSRuslan Bukin * UK project 105694, "Digital Security by Design (DSbD) Technology Platform 10d3916eacSRuslan Bukin * Prototype". 11d3916eacSRuslan Bukin * 12d3916eacSRuslan Bukin * Redistribution and use in source and binary forms, with or without 13d3916eacSRuslan Bukin * modification, are permitted provided that the following conditions 14d3916eacSRuslan Bukin * are met: 15d3916eacSRuslan Bukin * 1. Redistributions of source code must retain the above copyright 16d3916eacSRuslan Bukin * notice, this list of conditions and the following disclaimer. 17d3916eacSRuslan Bukin * 2. Redistributions in binary form must reproduce the above copyright 18d3916eacSRuslan Bukin * notice, this list of conditions and the following disclaimer in the 19d3916eacSRuslan Bukin * documentation and/or other materials provided with the distribution. 20d3916eacSRuslan Bukin * 21d3916eacSRuslan Bukin * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22d3916eacSRuslan Bukin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23d3916eacSRuslan Bukin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24d3916eacSRuslan Bukin * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 25d3916eacSRuslan Bukin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26d3916eacSRuslan Bukin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27d3916eacSRuslan Bukin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28d3916eacSRuslan Bukin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29d3916eacSRuslan Bukin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30d3916eacSRuslan Bukin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31d3916eacSRuslan Bukin * SUCH DAMAGE. 32d3916eacSRuslan Bukin */ 33d3916eacSRuslan Bukin 34d3916eacSRuslan Bukin #include <sys/param.h> 35d3916eacSRuslan Bukin #include <sys/systm.h> 36d3916eacSRuslan Bukin #include <sys/cpuset.h> 37d3916eacSRuslan Bukin #include <sys/kernel.h> 38d3916eacSRuslan Bukin #include <sys/linker.h> 39d3916eacSRuslan Bukin #include <sys/lock.h> 40d3916eacSRuslan Bukin #include <sys/malloc.h> 41d3916eacSRuslan Bukin #include <sys/module.h> 42d3916eacSRuslan Bukin #include <sys/mutex.h> 43d3916eacSRuslan Bukin #include <sys/pcpu.h> 44d3916eacSRuslan Bukin #include <sys/proc.h> 45d3916eacSRuslan Bukin #include <sys/queue.h> 46d3916eacSRuslan Bukin #include <sys/rwlock.h> 47d3916eacSRuslan Bukin #include <sys/sched.h> 48d3916eacSRuslan Bukin #include <sys/smp.h> 49d3916eacSRuslan Bukin #include <sys/sysctl.h> 50d3916eacSRuslan Bukin 51d3916eacSRuslan Bukin #include <vm/vm.h> 52d3916eacSRuslan Bukin #include <vm/vm_object.h> 53d3916eacSRuslan Bukin #include <vm/vm_page.h> 54d3916eacSRuslan Bukin #include <vm/pmap.h> 55d3916eacSRuslan Bukin #include <vm/vm_map.h> 56d3916eacSRuslan Bukin #include <vm/vm_extern.h> 57d3916eacSRuslan Bukin #include <vm/vm_param.h> 58d3916eacSRuslan Bukin 59d3916eacSRuslan Bukin #include <machine/riscvreg.h> 60d3916eacSRuslan Bukin #include <machine/cpu.h> 61d3916eacSRuslan Bukin #include <machine/fpe.h> 62d3916eacSRuslan Bukin #include <machine/machdep.h> 63d3916eacSRuslan Bukin #include <machine/pcb.h> 64d3916eacSRuslan Bukin #include <machine/smp.h> 65d3916eacSRuslan Bukin #include <machine/vm.h> 66d3916eacSRuslan Bukin #include <machine/vmparam.h> 67d3916eacSRuslan Bukin #include <machine/vmm.h> 68d3916eacSRuslan Bukin #include <machine/vmm_instruction_emul.h> 69d3916eacSRuslan Bukin 70d3916eacSRuslan Bukin #include <dev/pci/pcireg.h> 71d3916eacSRuslan Bukin 72d3916eacSRuslan Bukin #include <dev/vmm/vmm_dev.h> 73d3916eacSRuslan Bukin #include <dev/vmm/vmm_ktr.h> 74d3916eacSRuslan Bukin 75d3916eacSRuslan Bukin #include "vmm_stat.h" 76d3916eacSRuslan Bukin #include "riscv.h" 77d3916eacSRuslan Bukin 78d3916eacSRuslan Bukin #include "vmm_aplic.h" 79d3916eacSRuslan Bukin 80d3916eacSRuslan Bukin struct vcpu { 81d3916eacSRuslan Bukin int flags; 82d3916eacSRuslan Bukin enum vcpu_state state; 83d3916eacSRuslan Bukin struct mtx mtx; 84d3916eacSRuslan Bukin int hostcpu; /* host cpuid this vcpu last ran on */ 85d3916eacSRuslan Bukin int vcpuid; 86d3916eacSRuslan Bukin void *stats; 87d3916eacSRuslan Bukin struct vm_exit exitinfo; 88d3916eacSRuslan Bukin uint64_t nextpc; /* (x) next instruction to execute */ 89d3916eacSRuslan Bukin struct vm *vm; /* (o) */ 90d3916eacSRuslan Bukin void *cookie; /* (i) cpu-specific data */ 91d3916eacSRuslan Bukin struct fpreg *guestfpu; /* (a,i) guest fpu state */ 92d3916eacSRuslan Bukin }; 93d3916eacSRuslan Bukin 94d3916eacSRuslan Bukin #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 95d3916eacSRuslan Bukin #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 96d3916eacSRuslan Bukin #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 97d3916eacSRuslan Bukin #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 98d3916eacSRuslan Bukin #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 99d3916eacSRuslan Bukin #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 100d3916eacSRuslan Bukin 101d3916eacSRuslan Bukin struct mem_seg { 102d3916eacSRuslan Bukin uint64_t gpa; 103d3916eacSRuslan Bukin size_t len; 104d3916eacSRuslan Bukin bool wired; 105d3916eacSRuslan Bukin bool sysmem; 106d3916eacSRuslan Bukin vm_object_t object; 107d3916eacSRuslan Bukin }; 108d3916eacSRuslan Bukin #define VM_MAX_MEMSEGS 3 109d3916eacSRuslan Bukin 110d3916eacSRuslan Bukin struct mem_map { 111d3916eacSRuslan Bukin vm_paddr_t gpa; 112d3916eacSRuslan Bukin size_t len; 113d3916eacSRuslan Bukin vm_ooffset_t segoff; 114d3916eacSRuslan Bukin int segid; 115d3916eacSRuslan Bukin int prot; 116d3916eacSRuslan Bukin int flags; 117d3916eacSRuslan Bukin }; 118d3916eacSRuslan Bukin #define VM_MAX_MEMMAPS 4 119d3916eacSRuslan Bukin 120d3916eacSRuslan Bukin struct vmm_mmio_region { 121d3916eacSRuslan Bukin uint64_t start; 122d3916eacSRuslan Bukin uint64_t end; 123d3916eacSRuslan Bukin mem_region_read_t read; 124d3916eacSRuslan Bukin mem_region_write_t write; 125d3916eacSRuslan Bukin }; 126d3916eacSRuslan Bukin #define VM_MAX_MMIO_REGIONS 4 127d3916eacSRuslan Bukin 128d3916eacSRuslan Bukin /* 129d3916eacSRuslan Bukin * Initialization: 130d3916eacSRuslan Bukin * (o) initialized the first time the VM is created 131d3916eacSRuslan Bukin * (i) initialized when VM is created and when it is reinitialized 132d3916eacSRuslan Bukin * (x) initialized before use 133d3916eacSRuslan Bukin */ 134d3916eacSRuslan Bukin struct vm { 135d3916eacSRuslan Bukin void *cookie; /* (i) cpu-specific data */ 136d3916eacSRuslan Bukin volatile cpuset_t active_cpus; /* (i) active vcpus */ 137d3916eacSRuslan Bukin volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ 138d3916eacSRuslan Bukin int suspend; /* (i) stop VM execution */ 139d3916eacSRuslan Bukin bool dying; /* (o) is dying */ 140d3916eacSRuslan Bukin volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 141d3916eacSRuslan Bukin volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 142d3916eacSRuslan Bukin struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 143d3916eacSRuslan Bukin struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 144d3916eacSRuslan Bukin struct vmspace *vmspace; /* (o) guest's address space */ 145d3916eacSRuslan Bukin char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 146d3916eacSRuslan Bukin struct vcpu **vcpu; /* (i) guest vcpus */ 147d3916eacSRuslan Bukin struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 148d3916eacSRuslan Bukin /* (o) guest MMIO regions */ 149d3916eacSRuslan Bukin /* The following describe the vm cpu topology */ 150d3916eacSRuslan Bukin uint16_t sockets; /* (o) num of sockets */ 151d3916eacSRuslan Bukin uint16_t cores; /* (o) num of cores/socket */ 152d3916eacSRuslan Bukin uint16_t threads; /* (o) num of threads/core */ 153d3916eacSRuslan Bukin uint16_t maxcpus; /* (o) max pluggable cpus */ 154d3916eacSRuslan Bukin struct sx mem_segs_lock; /* (o) */ 155d3916eacSRuslan Bukin struct sx vcpus_init_lock; /* (o) */ 156d3916eacSRuslan Bukin }; 157d3916eacSRuslan Bukin 158d3916eacSRuslan Bukin static bool vmm_initialized = false; 159d3916eacSRuslan Bukin 160d3916eacSRuslan Bukin static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 161d3916eacSRuslan Bukin 162d3916eacSRuslan Bukin /* statistics */ 163d3916eacSRuslan Bukin static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 164d3916eacSRuslan Bukin 165d3916eacSRuslan Bukin SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 166d3916eacSRuslan Bukin 167d3916eacSRuslan Bukin static int vmm_ipinum; 168d3916eacSRuslan Bukin SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 169d3916eacSRuslan Bukin "IPI vector used for vcpu notifications"); 170d3916eacSRuslan Bukin 171d3916eacSRuslan Bukin u_int vm_maxcpu; 172d3916eacSRuslan Bukin SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 173d3916eacSRuslan Bukin &vm_maxcpu, 0, "Maximum number of vCPUs"); 174d3916eacSRuslan Bukin 175d3916eacSRuslan Bukin static void vm_free_memmap(struct vm *vm, int ident); 176d3916eacSRuslan Bukin static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 177d3916eacSRuslan Bukin static void vcpu_notify_event_locked(struct vcpu *vcpu); 178d3916eacSRuslan Bukin 1790a897e67SMark Johnston /* global statistics */ 1800a897e67SMark Johnston VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 1810a897e67SMark Johnston VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 1820a897e67SMark Johnston VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 1830a897e67SMark Johnston 184d3916eacSRuslan Bukin /* 185d3916eacSRuslan Bukin * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 186d3916eacSRuslan Bukin * is a safe value for now. 187d3916eacSRuslan Bukin */ 188d3916eacSRuslan Bukin #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 189d3916eacSRuslan Bukin 190d3916eacSRuslan Bukin static void 191d3916eacSRuslan Bukin vcpu_cleanup(struct vcpu *vcpu, bool destroy) 192d3916eacSRuslan Bukin { 193d3916eacSRuslan Bukin vmmops_vcpu_cleanup(vcpu->cookie); 194d3916eacSRuslan Bukin vcpu->cookie = NULL; 195d3916eacSRuslan Bukin if (destroy) { 196d3916eacSRuslan Bukin vmm_stat_free(vcpu->stats); 197d3916eacSRuslan Bukin fpu_save_area_free(vcpu->guestfpu); 198d3916eacSRuslan Bukin vcpu_lock_destroy(vcpu); 199d3916eacSRuslan Bukin } 200d3916eacSRuslan Bukin } 201d3916eacSRuslan Bukin 202d3916eacSRuslan Bukin static struct vcpu * 203d3916eacSRuslan Bukin vcpu_alloc(struct vm *vm, int vcpu_id) 204d3916eacSRuslan Bukin { 205d3916eacSRuslan Bukin struct vcpu *vcpu; 206d3916eacSRuslan Bukin 207d3916eacSRuslan Bukin KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 208d3916eacSRuslan Bukin ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 209d3916eacSRuslan Bukin 210d3916eacSRuslan Bukin vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 211d3916eacSRuslan Bukin vcpu_lock_init(vcpu); 212d3916eacSRuslan Bukin vcpu->state = VCPU_IDLE; 213d3916eacSRuslan Bukin vcpu->hostcpu = NOCPU; 214d3916eacSRuslan Bukin vcpu->vcpuid = vcpu_id; 215d3916eacSRuslan Bukin vcpu->vm = vm; 216d3916eacSRuslan Bukin vcpu->guestfpu = fpu_save_area_alloc(); 217d3916eacSRuslan Bukin vcpu->stats = vmm_stat_alloc(); 218d3916eacSRuslan Bukin return (vcpu); 219d3916eacSRuslan Bukin } 220d3916eacSRuslan Bukin 221d3916eacSRuslan Bukin static void 222d3916eacSRuslan Bukin vcpu_init(struct vcpu *vcpu) 223d3916eacSRuslan Bukin { 224d3916eacSRuslan Bukin vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 225d3916eacSRuslan Bukin MPASS(vcpu->cookie != NULL); 226d3916eacSRuslan Bukin fpu_save_area_reset(vcpu->guestfpu); 227d3916eacSRuslan Bukin vmm_stat_init(vcpu->stats); 228d3916eacSRuslan Bukin } 229d3916eacSRuslan Bukin 230d3916eacSRuslan Bukin struct vm_exit * 231d3916eacSRuslan Bukin vm_exitinfo(struct vcpu *vcpu) 232d3916eacSRuslan Bukin { 233d3916eacSRuslan Bukin return (&vcpu->exitinfo); 234d3916eacSRuslan Bukin } 235d3916eacSRuslan Bukin 236d3916eacSRuslan Bukin static int 237d3916eacSRuslan Bukin vmm_init(void) 238d3916eacSRuslan Bukin { 239d3916eacSRuslan Bukin 240d3916eacSRuslan Bukin vm_maxcpu = mp_ncpus; 241d3916eacSRuslan Bukin 242d3916eacSRuslan Bukin TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 243d3916eacSRuslan Bukin 244d3916eacSRuslan Bukin if (vm_maxcpu > VM_MAXCPU) { 245d3916eacSRuslan Bukin printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 246d3916eacSRuslan Bukin vm_maxcpu = VM_MAXCPU; 247d3916eacSRuslan Bukin } 248d3916eacSRuslan Bukin 249d3916eacSRuslan Bukin if (vm_maxcpu == 0) 250d3916eacSRuslan Bukin vm_maxcpu = 1; 251d3916eacSRuslan Bukin 252d3916eacSRuslan Bukin return (vmmops_modinit()); 253d3916eacSRuslan Bukin } 254d3916eacSRuslan Bukin 255d3916eacSRuslan Bukin static int 256d3916eacSRuslan Bukin vmm_handler(module_t mod, int what, void *arg) 257d3916eacSRuslan Bukin { 258d3916eacSRuslan Bukin int error; 259d3916eacSRuslan Bukin 260d3916eacSRuslan Bukin switch (what) { 261d3916eacSRuslan Bukin case MOD_LOAD: 262d3916eacSRuslan Bukin /* TODO: check if has_hyp here? */ 263043999b1SMark Johnston error = vmmdev_init(); 264043999b1SMark Johnston if (error != 0) 265043999b1SMark Johnston break; 266d3916eacSRuslan Bukin error = vmm_init(); 267d3916eacSRuslan Bukin if (error == 0) 268d3916eacSRuslan Bukin vmm_initialized = true; 269d3916eacSRuslan Bukin break; 270d3916eacSRuslan Bukin case MOD_UNLOAD: 271d3916eacSRuslan Bukin /* TODO: check if has_hyp here? */ 272d3916eacSRuslan Bukin error = vmmdev_cleanup(); 273d3916eacSRuslan Bukin if (error == 0 && vmm_initialized) { 274d3916eacSRuslan Bukin error = vmmops_modcleanup(); 275d3916eacSRuslan Bukin if (error) 276d3916eacSRuslan Bukin vmm_initialized = false; 277d3916eacSRuslan Bukin } 278d3916eacSRuslan Bukin break; 279d3916eacSRuslan Bukin default: 280d3916eacSRuslan Bukin error = 0; 281d3916eacSRuslan Bukin break; 282d3916eacSRuslan Bukin } 283d3916eacSRuslan Bukin return (error); 284d3916eacSRuslan Bukin } 285d3916eacSRuslan Bukin 286d3916eacSRuslan Bukin static moduledata_t vmm_kmod = { 287d3916eacSRuslan Bukin "vmm", 288d3916eacSRuslan Bukin vmm_handler, 289d3916eacSRuslan Bukin NULL 290d3916eacSRuslan Bukin }; 291d3916eacSRuslan Bukin 292d3916eacSRuslan Bukin /* 293d3916eacSRuslan Bukin * vmm initialization has the following dependencies: 294d3916eacSRuslan Bukin * 295d7023078SMark Johnston * - vmm device initialization requires an initialized devfs. 296d3916eacSRuslan Bukin */ 297d7023078SMark Johnston DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_DEVFS + 1, SI_ORDER_ANY); 298d3916eacSRuslan Bukin MODULE_VERSION(vmm, 1); 299d3916eacSRuslan Bukin 300d3916eacSRuslan Bukin static void 301d3916eacSRuslan Bukin vm_init(struct vm *vm, bool create) 302d3916eacSRuslan Bukin { 303d3916eacSRuslan Bukin int i; 304d3916eacSRuslan Bukin 305d3916eacSRuslan Bukin vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 306d3916eacSRuslan Bukin MPASS(vm->cookie != NULL); 307d3916eacSRuslan Bukin 308d3916eacSRuslan Bukin CPU_ZERO(&vm->active_cpus); 309d3916eacSRuslan Bukin CPU_ZERO(&vm->debug_cpus); 310d3916eacSRuslan Bukin 311d3916eacSRuslan Bukin vm->suspend = 0; 312d3916eacSRuslan Bukin CPU_ZERO(&vm->suspended_cpus); 313d3916eacSRuslan Bukin 314d3916eacSRuslan Bukin memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 315d3916eacSRuslan Bukin 316d3916eacSRuslan Bukin if (!create) { 317d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) { 318d3916eacSRuslan Bukin if (vm->vcpu[i] != NULL) 319d3916eacSRuslan Bukin vcpu_init(vm->vcpu[i]); 320d3916eacSRuslan Bukin } 321d3916eacSRuslan Bukin } 322d3916eacSRuslan Bukin } 323d3916eacSRuslan Bukin 324d3916eacSRuslan Bukin void 325d3916eacSRuslan Bukin vm_disable_vcpu_creation(struct vm *vm) 326d3916eacSRuslan Bukin { 327d3916eacSRuslan Bukin sx_xlock(&vm->vcpus_init_lock); 328d3916eacSRuslan Bukin vm->dying = true; 329d3916eacSRuslan Bukin sx_xunlock(&vm->vcpus_init_lock); 330d3916eacSRuslan Bukin } 331d3916eacSRuslan Bukin 332d3916eacSRuslan Bukin struct vcpu * 333d3916eacSRuslan Bukin vm_alloc_vcpu(struct vm *vm, int vcpuid) 334d3916eacSRuslan Bukin { 335d3916eacSRuslan Bukin struct vcpu *vcpu; 336d3916eacSRuslan Bukin 337d3916eacSRuslan Bukin if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 338d3916eacSRuslan Bukin return (NULL); 339d3916eacSRuslan Bukin 340d3916eacSRuslan Bukin /* Some interrupt controllers may have a CPU limit */ 341d3916eacSRuslan Bukin if (vcpuid >= aplic_max_cpu_count(vm->cookie)) 342d3916eacSRuslan Bukin return (NULL); 343d3916eacSRuslan Bukin 344d3916eacSRuslan Bukin vcpu = (struct vcpu *) 345d3916eacSRuslan Bukin atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 346d3916eacSRuslan Bukin if (__predict_true(vcpu != NULL)) 347d3916eacSRuslan Bukin return (vcpu); 348d3916eacSRuslan Bukin 349d3916eacSRuslan Bukin sx_xlock(&vm->vcpus_init_lock); 350d3916eacSRuslan Bukin vcpu = vm->vcpu[vcpuid]; 351d3916eacSRuslan Bukin if (vcpu == NULL && !vm->dying) { 352d3916eacSRuslan Bukin vcpu = vcpu_alloc(vm, vcpuid); 353d3916eacSRuslan Bukin vcpu_init(vcpu); 354d3916eacSRuslan Bukin 355d3916eacSRuslan Bukin /* 356d3916eacSRuslan Bukin * Ensure vCPU is fully created before updating pointer 357d3916eacSRuslan Bukin * to permit unlocked reads above. 358d3916eacSRuslan Bukin */ 359d3916eacSRuslan Bukin atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 360d3916eacSRuslan Bukin (uintptr_t)vcpu); 361d3916eacSRuslan Bukin } 362d3916eacSRuslan Bukin sx_xunlock(&vm->vcpus_init_lock); 363d3916eacSRuslan Bukin return (vcpu); 364d3916eacSRuslan Bukin } 365d3916eacSRuslan Bukin 366d3916eacSRuslan Bukin void 367d3916eacSRuslan Bukin vm_slock_vcpus(struct vm *vm) 368d3916eacSRuslan Bukin { 369d3916eacSRuslan Bukin sx_slock(&vm->vcpus_init_lock); 370d3916eacSRuslan Bukin } 371d3916eacSRuslan Bukin 372d3916eacSRuslan Bukin void 373d3916eacSRuslan Bukin vm_unlock_vcpus(struct vm *vm) 374d3916eacSRuslan Bukin { 375d3916eacSRuslan Bukin sx_unlock(&vm->vcpus_init_lock); 376d3916eacSRuslan Bukin } 377d3916eacSRuslan Bukin 378d3916eacSRuslan Bukin int 379d3916eacSRuslan Bukin vm_create(const char *name, struct vm **retvm) 380d3916eacSRuslan Bukin { 381d3916eacSRuslan Bukin struct vm *vm; 382d3916eacSRuslan Bukin struct vmspace *vmspace; 383d3916eacSRuslan Bukin 384d3916eacSRuslan Bukin /* 385d3916eacSRuslan Bukin * If vmm.ko could not be successfully initialized then don't attempt 386d3916eacSRuslan Bukin * to create the virtual machine. 387d3916eacSRuslan Bukin */ 388d3916eacSRuslan Bukin if (!vmm_initialized) 389d3916eacSRuslan Bukin return (ENXIO); 390d3916eacSRuslan Bukin 391d3916eacSRuslan Bukin if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 392d3916eacSRuslan Bukin return (EINVAL); 393d3916eacSRuslan Bukin 394d3916eacSRuslan Bukin vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 395d3916eacSRuslan Bukin if (vmspace == NULL) 396d3916eacSRuslan Bukin return (ENOMEM); 397d3916eacSRuslan Bukin 398d3916eacSRuslan Bukin vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 399d3916eacSRuslan Bukin strcpy(vm->name, name); 400d3916eacSRuslan Bukin vm->vmspace = vmspace; 401d3916eacSRuslan Bukin sx_init(&vm->mem_segs_lock, "vm mem_segs"); 402d3916eacSRuslan Bukin sx_init(&vm->vcpus_init_lock, "vm vcpus"); 403d3916eacSRuslan Bukin 404d3916eacSRuslan Bukin vm->sockets = 1; 405d3916eacSRuslan Bukin vm->cores = 1; /* XXX backwards compatibility */ 406d3916eacSRuslan Bukin vm->threads = 1; /* XXX backwards compatibility */ 407d3916eacSRuslan Bukin vm->maxcpus = vm_maxcpu; 408d3916eacSRuslan Bukin 409d3916eacSRuslan Bukin vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 410d3916eacSRuslan Bukin M_WAITOK | M_ZERO); 411d3916eacSRuslan Bukin 412d3916eacSRuslan Bukin vm_init(vm, true); 413d3916eacSRuslan Bukin 414d3916eacSRuslan Bukin *retvm = vm; 415d3916eacSRuslan Bukin return (0); 416d3916eacSRuslan Bukin } 417d3916eacSRuslan Bukin 418d3916eacSRuslan Bukin void 419d3916eacSRuslan Bukin vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 420d3916eacSRuslan Bukin uint16_t *threads, uint16_t *maxcpus) 421d3916eacSRuslan Bukin { 422d3916eacSRuslan Bukin *sockets = vm->sockets; 423d3916eacSRuslan Bukin *cores = vm->cores; 424d3916eacSRuslan Bukin *threads = vm->threads; 425d3916eacSRuslan Bukin *maxcpus = vm->maxcpus; 426d3916eacSRuslan Bukin } 427d3916eacSRuslan Bukin 428d3916eacSRuslan Bukin uint16_t 429d3916eacSRuslan Bukin vm_get_maxcpus(struct vm *vm) 430d3916eacSRuslan Bukin { 431d3916eacSRuslan Bukin return (vm->maxcpus); 432d3916eacSRuslan Bukin } 433d3916eacSRuslan Bukin 434d3916eacSRuslan Bukin int 435d3916eacSRuslan Bukin vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 436d3916eacSRuslan Bukin uint16_t threads, uint16_t maxcpus) 437d3916eacSRuslan Bukin { 438d3916eacSRuslan Bukin /* Ignore maxcpus. */ 439d3916eacSRuslan Bukin if ((sockets * cores * threads) > vm->maxcpus) 440d3916eacSRuslan Bukin return (EINVAL); 441d3916eacSRuslan Bukin vm->sockets = sockets; 442d3916eacSRuslan Bukin vm->cores = cores; 443d3916eacSRuslan Bukin vm->threads = threads; 444d3916eacSRuslan Bukin return(0); 445d3916eacSRuslan Bukin } 446d3916eacSRuslan Bukin 447d3916eacSRuslan Bukin static void 448d3916eacSRuslan Bukin vm_cleanup(struct vm *vm, bool destroy) 449d3916eacSRuslan Bukin { 450d3916eacSRuslan Bukin struct mem_map *mm; 451d3916eacSRuslan Bukin int i; 452d3916eacSRuslan Bukin 453d3916eacSRuslan Bukin aplic_detach_from_vm(vm->cookie); 454d3916eacSRuslan Bukin 455d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) { 456d3916eacSRuslan Bukin if (vm->vcpu[i] != NULL) 457d3916eacSRuslan Bukin vcpu_cleanup(vm->vcpu[i], destroy); 458d3916eacSRuslan Bukin } 459d3916eacSRuslan Bukin 460d3916eacSRuslan Bukin vmmops_cleanup(vm->cookie); 461d3916eacSRuslan Bukin 462d3916eacSRuslan Bukin /* 463d3916eacSRuslan Bukin * System memory is removed from the guest address space only when 464d3916eacSRuslan Bukin * the VM is destroyed. This is because the mapping remains the same 465d3916eacSRuslan Bukin * across VM reset. 466d3916eacSRuslan Bukin * 467d3916eacSRuslan Bukin * Device memory can be relocated by the guest (e.g. using PCI BARs) 468d3916eacSRuslan Bukin * so those mappings are removed on a VM reset. 469d3916eacSRuslan Bukin */ 470d3916eacSRuslan Bukin if (!destroy) { 471d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 472d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 473d3916eacSRuslan Bukin if (destroy || !sysmem_mapping(vm, mm)) 474d3916eacSRuslan Bukin vm_free_memmap(vm, i); 475d3916eacSRuslan Bukin } 476d3916eacSRuslan Bukin } 477d3916eacSRuslan Bukin 478d3916eacSRuslan Bukin if (destroy) { 479d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMSEGS; i++) 480d3916eacSRuslan Bukin vm_free_memseg(vm, i); 481d3916eacSRuslan Bukin 482d3916eacSRuslan Bukin vmmops_vmspace_free(vm->vmspace); 483d3916eacSRuslan Bukin vm->vmspace = NULL; 484d3916eacSRuslan Bukin 485d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) 486d3916eacSRuslan Bukin free(vm->vcpu[i], M_VMM); 487d3916eacSRuslan Bukin free(vm->vcpu, M_VMM); 488d3916eacSRuslan Bukin sx_destroy(&vm->vcpus_init_lock); 489d3916eacSRuslan Bukin sx_destroy(&vm->mem_segs_lock); 490d3916eacSRuslan Bukin } 491d3916eacSRuslan Bukin } 492d3916eacSRuslan Bukin 493d3916eacSRuslan Bukin void 494d3916eacSRuslan Bukin vm_destroy(struct vm *vm) 495d3916eacSRuslan Bukin { 496d3916eacSRuslan Bukin 497d3916eacSRuslan Bukin vm_cleanup(vm, true); 498d3916eacSRuslan Bukin 499d3916eacSRuslan Bukin free(vm, M_VMM); 500d3916eacSRuslan Bukin } 501d3916eacSRuslan Bukin 502d3916eacSRuslan Bukin int 503d3916eacSRuslan Bukin vm_reinit(struct vm *vm) 504d3916eacSRuslan Bukin { 505d3916eacSRuslan Bukin int error; 506d3916eacSRuslan Bukin 507d3916eacSRuslan Bukin /* 508d3916eacSRuslan Bukin * A virtual machine can be reset only if all vcpus are suspended. 509d3916eacSRuslan Bukin */ 510d3916eacSRuslan Bukin if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 511d3916eacSRuslan Bukin vm_cleanup(vm, false); 512d3916eacSRuslan Bukin vm_init(vm, false); 513d3916eacSRuslan Bukin error = 0; 514d3916eacSRuslan Bukin } else { 515d3916eacSRuslan Bukin error = EBUSY; 516d3916eacSRuslan Bukin } 517d3916eacSRuslan Bukin 518d3916eacSRuslan Bukin return (error); 519d3916eacSRuslan Bukin } 520d3916eacSRuslan Bukin 521d3916eacSRuslan Bukin const char * 522d3916eacSRuslan Bukin vm_name(struct vm *vm) 523d3916eacSRuslan Bukin { 524d3916eacSRuslan Bukin return (vm->name); 525d3916eacSRuslan Bukin } 526d3916eacSRuslan Bukin 527d3916eacSRuslan Bukin void 528d3916eacSRuslan Bukin vm_slock_memsegs(struct vm *vm) 529d3916eacSRuslan Bukin { 530d3916eacSRuslan Bukin sx_slock(&vm->mem_segs_lock); 531d3916eacSRuslan Bukin } 532d3916eacSRuslan Bukin 533d3916eacSRuslan Bukin void 534d3916eacSRuslan Bukin vm_xlock_memsegs(struct vm *vm) 535d3916eacSRuslan Bukin { 536d3916eacSRuslan Bukin sx_xlock(&vm->mem_segs_lock); 537d3916eacSRuslan Bukin } 538d3916eacSRuslan Bukin 539d3916eacSRuslan Bukin void 540d3916eacSRuslan Bukin vm_unlock_memsegs(struct vm *vm) 541d3916eacSRuslan Bukin { 542d3916eacSRuslan Bukin sx_unlock(&vm->mem_segs_lock); 543d3916eacSRuslan Bukin } 544d3916eacSRuslan Bukin 545d3916eacSRuslan Bukin /* 546d3916eacSRuslan Bukin * Return 'true' if 'gpa' is allocated in the guest address space. 547d3916eacSRuslan Bukin * 548d3916eacSRuslan Bukin * This function is called in the context of a running vcpu which acts as 549d3916eacSRuslan Bukin * an implicit lock on 'vm->mem_maps[]'. 550d3916eacSRuslan Bukin */ 551d3916eacSRuslan Bukin bool 552d3916eacSRuslan Bukin vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 553d3916eacSRuslan Bukin { 554d3916eacSRuslan Bukin struct vm *vm = vcpu->vm; 555d3916eacSRuslan Bukin struct mem_map *mm; 556d3916eacSRuslan Bukin int i; 557d3916eacSRuslan Bukin 558d3916eacSRuslan Bukin #ifdef INVARIANTS 559d3916eacSRuslan Bukin int hostcpu, state; 560d3916eacSRuslan Bukin state = vcpu_get_state(vcpu, &hostcpu); 561d3916eacSRuslan Bukin KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 562d3916eacSRuslan Bukin ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 563d3916eacSRuslan Bukin #endif 564d3916eacSRuslan Bukin 565d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 566d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 567d3916eacSRuslan Bukin if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 568d3916eacSRuslan Bukin return (true); /* 'gpa' is sysmem or devmem */ 569d3916eacSRuslan Bukin } 570d3916eacSRuslan Bukin 571d3916eacSRuslan Bukin return (false); 572d3916eacSRuslan Bukin } 573d3916eacSRuslan Bukin 574d3916eacSRuslan Bukin int 575d3916eacSRuslan Bukin vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 576d3916eacSRuslan Bukin { 577d3916eacSRuslan Bukin struct mem_seg *seg; 578d3916eacSRuslan Bukin vm_object_t obj; 579d3916eacSRuslan Bukin 580d3916eacSRuslan Bukin sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 581d3916eacSRuslan Bukin 582d3916eacSRuslan Bukin if (ident < 0 || ident >= VM_MAX_MEMSEGS) 583d3916eacSRuslan Bukin return (EINVAL); 584d3916eacSRuslan Bukin 585d3916eacSRuslan Bukin if (len == 0 || (len & PAGE_MASK)) 586d3916eacSRuslan Bukin return (EINVAL); 587d3916eacSRuslan Bukin 588d3916eacSRuslan Bukin seg = &vm->mem_segs[ident]; 589d3916eacSRuslan Bukin if (seg->object != NULL) { 590d3916eacSRuslan Bukin if (seg->len == len && seg->sysmem == sysmem) 591d3916eacSRuslan Bukin return (EEXIST); 592d3916eacSRuslan Bukin else 593d3916eacSRuslan Bukin return (EINVAL); 594d3916eacSRuslan Bukin } 595d3916eacSRuslan Bukin 596d3916eacSRuslan Bukin obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 597d3916eacSRuslan Bukin if (obj == NULL) 598d3916eacSRuslan Bukin return (ENOMEM); 599d3916eacSRuslan Bukin 600d3916eacSRuslan Bukin seg->len = len; 601d3916eacSRuslan Bukin seg->object = obj; 602d3916eacSRuslan Bukin seg->sysmem = sysmem; 603d3916eacSRuslan Bukin return (0); 604d3916eacSRuslan Bukin } 605d3916eacSRuslan Bukin 606d3916eacSRuslan Bukin int 607d3916eacSRuslan Bukin vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 608d3916eacSRuslan Bukin vm_object_t *objptr) 609d3916eacSRuslan Bukin { 610d3916eacSRuslan Bukin struct mem_seg *seg; 611d3916eacSRuslan Bukin 612d3916eacSRuslan Bukin sx_assert(&vm->mem_segs_lock, SX_LOCKED); 613d3916eacSRuslan Bukin 614d3916eacSRuslan Bukin if (ident < 0 || ident >= VM_MAX_MEMSEGS) 615d3916eacSRuslan Bukin return (EINVAL); 616d3916eacSRuslan Bukin 617d3916eacSRuslan Bukin seg = &vm->mem_segs[ident]; 618d3916eacSRuslan Bukin if (len) 619d3916eacSRuslan Bukin *len = seg->len; 620d3916eacSRuslan Bukin if (sysmem) 621d3916eacSRuslan Bukin *sysmem = seg->sysmem; 622d3916eacSRuslan Bukin if (objptr) 623d3916eacSRuslan Bukin *objptr = seg->object; 624d3916eacSRuslan Bukin return (0); 625d3916eacSRuslan Bukin } 626d3916eacSRuslan Bukin 627d3916eacSRuslan Bukin void 628d3916eacSRuslan Bukin vm_free_memseg(struct vm *vm, int ident) 629d3916eacSRuslan Bukin { 630d3916eacSRuslan Bukin struct mem_seg *seg; 631d3916eacSRuslan Bukin 632d3916eacSRuslan Bukin KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 633d3916eacSRuslan Bukin ("%s: invalid memseg ident %d", __func__, ident)); 634d3916eacSRuslan Bukin 635d3916eacSRuslan Bukin seg = &vm->mem_segs[ident]; 636d3916eacSRuslan Bukin if (seg->object != NULL) { 637d3916eacSRuslan Bukin vm_object_deallocate(seg->object); 638d3916eacSRuslan Bukin bzero(seg, sizeof(struct mem_seg)); 639d3916eacSRuslan Bukin } 640d3916eacSRuslan Bukin } 641d3916eacSRuslan Bukin 642d3916eacSRuslan Bukin int 643d3916eacSRuslan Bukin vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 644d3916eacSRuslan Bukin size_t len, int prot, int flags) 645d3916eacSRuslan Bukin { 646d3916eacSRuslan Bukin struct mem_seg *seg; 647d3916eacSRuslan Bukin struct mem_map *m, *map; 648d3916eacSRuslan Bukin vm_ooffset_t last; 649d3916eacSRuslan Bukin int i, error; 650d3916eacSRuslan Bukin 651d3916eacSRuslan Bukin dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len); 652d3916eacSRuslan Bukin 653d3916eacSRuslan Bukin if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 654d3916eacSRuslan Bukin return (EINVAL); 655d3916eacSRuslan Bukin 656d3916eacSRuslan Bukin if (flags & ~VM_MEMMAP_F_WIRED) 657d3916eacSRuslan Bukin return (EINVAL); 658d3916eacSRuslan Bukin 659d3916eacSRuslan Bukin if (segid < 0 || segid >= VM_MAX_MEMSEGS) 660d3916eacSRuslan Bukin return (EINVAL); 661d3916eacSRuslan Bukin 662d3916eacSRuslan Bukin seg = &vm->mem_segs[segid]; 663d3916eacSRuslan Bukin if (seg->object == NULL) 664d3916eacSRuslan Bukin return (EINVAL); 665d3916eacSRuslan Bukin 666d3916eacSRuslan Bukin last = first + len; 667d3916eacSRuslan Bukin if (first < 0 || first >= last || last > seg->len) 668d3916eacSRuslan Bukin return (EINVAL); 669d3916eacSRuslan Bukin 670d3916eacSRuslan Bukin if ((gpa | first | last) & PAGE_MASK) 671d3916eacSRuslan Bukin return (EINVAL); 672d3916eacSRuslan Bukin 673d3916eacSRuslan Bukin map = NULL; 674d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 675d3916eacSRuslan Bukin m = &vm->mem_maps[i]; 676d3916eacSRuslan Bukin if (m->len == 0) { 677d3916eacSRuslan Bukin map = m; 678d3916eacSRuslan Bukin break; 679d3916eacSRuslan Bukin } 680d3916eacSRuslan Bukin } 681d3916eacSRuslan Bukin 682d3916eacSRuslan Bukin if (map == NULL) 683d3916eacSRuslan Bukin return (ENOSPC); 684d3916eacSRuslan Bukin 685d3916eacSRuslan Bukin error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 686d3916eacSRuslan Bukin len, 0, VMFS_NO_SPACE, prot, prot, 0); 687d3916eacSRuslan Bukin if (error != KERN_SUCCESS) 688d3916eacSRuslan Bukin return (EFAULT); 689d3916eacSRuslan Bukin 690d3916eacSRuslan Bukin vm_object_reference(seg->object); 691d3916eacSRuslan Bukin 692d3916eacSRuslan Bukin if (flags & VM_MEMMAP_F_WIRED) { 693d3916eacSRuslan Bukin error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 694d3916eacSRuslan Bukin VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 695d3916eacSRuslan Bukin if (error != KERN_SUCCESS) { 696d3916eacSRuslan Bukin vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 697d3916eacSRuslan Bukin return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 698d3916eacSRuslan Bukin EFAULT); 699d3916eacSRuslan Bukin } 700d3916eacSRuslan Bukin } 701d3916eacSRuslan Bukin 702d3916eacSRuslan Bukin map->gpa = gpa; 703d3916eacSRuslan Bukin map->len = len; 704d3916eacSRuslan Bukin map->segoff = first; 705d3916eacSRuslan Bukin map->segid = segid; 706d3916eacSRuslan Bukin map->prot = prot; 707d3916eacSRuslan Bukin map->flags = flags; 708d3916eacSRuslan Bukin return (0); 709d3916eacSRuslan Bukin } 710d3916eacSRuslan Bukin 711d3916eacSRuslan Bukin int 712d3916eacSRuslan Bukin vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 713d3916eacSRuslan Bukin { 714d3916eacSRuslan Bukin struct mem_map *m; 715d3916eacSRuslan Bukin int i; 716d3916eacSRuslan Bukin 717d3916eacSRuslan Bukin dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len); 718d3916eacSRuslan Bukin 719d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 720d3916eacSRuslan Bukin m = &vm->mem_maps[i]; 721d3916eacSRuslan Bukin if (m->gpa == gpa && m->len == len) { 722d3916eacSRuslan Bukin vm_free_memmap(vm, i); 723d3916eacSRuslan Bukin return (0); 724d3916eacSRuslan Bukin } 725d3916eacSRuslan Bukin } 726d3916eacSRuslan Bukin 727d3916eacSRuslan Bukin return (EINVAL); 728d3916eacSRuslan Bukin } 729d3916eacSRuslan Bukin 730d3916eacSRuslan Bukin int 731d3916eacSRuslan Bukin vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 732d3916eacSRuslan Bukin vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 733d3916eacSRuslan Bukin { 734d3916eacSRuslan Bukin struct mem_map *mm, *mmnext; 735d3916eacSRuslan Bukin int i; 736d3916eacSRuslan Bukin 737d3916eacSRuslan Bukin mmnext = NULL; 738d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 739d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 740d3916eacSRuslan Bukin if (mm->len == 0 || mm->gpa < *gpa) 741d3916eacSRuslan Bukin continue; 742d3916eacSRuslan Bukin if (mmnext == NULL || mm->gpa < mmnext->gpa) 743d3916eacSRuslan Bukin mmnext = mm; 744d3916eacSRuslan Bukin } 745d3916eacSRuslan Bukin 746d3916eacSRuslan Bukin if (mmnext != NULL) { 747d3916eacSRuslan Bukin *gpa = mmnext->gpa; 748d3916eacSRuslan Bukin if (segid) 749d3916eacSRuslan Bukin *segid = mmnext->segid; 750d3916eacSRuslan Bukin if (segoff) 751d3916eacSRuslan Bukin *segoff = mmnext->segoff; 752d3916eacSRuslan Bukin if (len) 753d3916eacSRuslan Bukin *len = mmnext->len; 754d3916eacSRuslan Bukin if (prot) 755d3916eacSRuslan Bukin *prot = mmnext->prot; 756d3916eacSRuslan Bukin if (flags) 757d3916eacSRuslan Bukin *flags = mmnext->flags; 758d3916eacSRuslan Bukin return (0); 759d3916eacSRuslan Bukin } else { 760d3916eacSRuslan Bukin return (ENOENT); 761d3916eacSRuslan Bukin } 762d3916eacSRuslan Bukin } 763d3916eacSRuslan Bukin 764d3916eacSRuslan Bukin static void 765d3916eacSRuslan Bukin vm_free_memmap(struct vm *vm, int ident) 766d3916eacSRuslan Bukin { 767d3916eacSRuslan Bukin struct mem_map *mm; 768d3916eacSRuslan Bukin int error __diagused; 769d3916eacSRuslan Bukin 770d3916eacSRuslan Bukin mm = &vm->mem_maps[ident]; 771d3916eacSRuslan Bukin if (mm->len) { 772d3916eacSRuslan Bukin error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 773d3916eacSRuslan Bukin mm->gpa + mm->len); 774d3916eacSRuslan Bukin KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 775d3916eacSRuslan Bukin __func__, error)); 776d3916eacSRuslan Bukin bzero(mm, sizeof(struct mem_map)); 777d3916eacSRuslan Bukin } 778d3916eacSRuslan Bukin } 779d3916eacSRuslan Bukin 780d3916eacSRuslan Bukin static __inline bool 781d3916eacSRuslan Bukin sysmem_mapping(struct vm *vm, struct mem_map *mm) 782d3916eacSRuslan Bukin { 783d3916eacSRuslan Bukin 784d3916eacSRuslan Bukin if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 785d3916eacSRuslan Bukin return (true); 786d3916eacSRuslan Bukin else 787d3916eacSRuslan Bukin return (false); 788d3916eacSRuslan Bukin } 789d3916eacSRuslan Bukin 790d3916eacSRuslan Bukin vm_paddr_t 791d3916eacSRuslan Bukin vmm_sysmem_maxaddr(struct vm *vm) 792d3916eacSRuslan Bukin { 793d3916eacSRuslan Bukin struct mem_map *mm; 794d3916eacSRuslan Bukin vm_paddr_t maxaddr; 795d3916eacSRuslan Bukin int i; 796d3916eacSRuslan Bukin 797d3916eacSRuslan Bukin maxaddr = 0; 798d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 799d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 800d3916eacSRuslan Bukin if (sysmem_mapping(vm, mm)) { 801d3916eacSRuslan Bukin if (maxaddr < mm->gpa + mm->len) 802d3916eacSRuslan Bukin maxaddr = mm->gpa + mm->len; 803d3916eacSRuslan Bukin } 804d3916eacSRuslan Bukin } 805d3916eacSRuslan Bukin return (maxaddr); 806d3916eacSRuslan Bukin } 807d3916eacSRuslan Bukin 808d3916eacSRuslan Bukin int 809d3916eacSRuslan Bukin vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 810d3916eacSRuslan Bukin uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 811d3916eacSRuslan Bukin { 812d3916eacSRuslan Bukin int error; 813d3916eacSRuslan Bukin 814d3916eacSRuslan Bukin error = vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 815d3916eacSRuslan Bukin 816d3916eacSRuslan Bukin return (error); 817d3916eacSRuslan Bukin } 818d3916eacSRuslan Bukin 819d3916eacSRuslan Bukin void 820d3916eacSRuslan Bukin vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 821d3916eacSRuslan Bukin mem_region_read_t mmio_read, mem_region_write_t mmio_write) 822d3916eacSRuslan Bukin { 823d3916eacSRuslan Bukin int i; 824d3916eacSRuslan Bukin 825d3916eacSRuslan Bukin for (i = 0; i < nitems(vm->mmio_region); i++) { 826d3916eacSRuslan Bukin if (vm->mmio_region[i].start == 0 && 827d3916eacSRuslan Bukin vm->mmio_region[i].end == 0) { 828d3916eacSRuslan Bukin vm->mmio_region[i].start = start; 829d3916eacSRuslan Bukin vm->mmio_region[i].end = start + size; 830d3916eacSRuslan Bukin vm->mmio_region[i].read = mmio_read; 831d3916eacSRuslan Bukin vm->mmio_region[i].write = mmio_write; 832d3916eacSRuslan Bukin return; 833d3916eacSRuslan Bukin } 834d3916eacSRuslan Bukin } 835d3916eacSRuslan Bukin 836d3916eacSRuslan Bukin panic("%s: No free MMIO region", __func__); 837d3916eacSRuslan Bukin } 838d3916eacSRuslan Bukin 839d3916eacSRuslan Bukin void 840d3916eacSRuslan Bukin vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 841d3916eacSRuslan Bukin { 842d3916eacSRuslan Bukin int i; 843d3916eacSRuslan Bukin 844d3916eacSRuslan Bukin for (i = 0; i < nitems(vm->mmio_region); i++) { 845d3916eacSRuslan Bukin if (vm->mmio_region[i].start == start && 846d3916eacSRuslan Bukin vm->mmio_region[i].end == start + size) { 847d3916eacSRuslan Bukin memset(&vm->mmio_region[i], 0, 848d3916eacSRuslan Bukin sizeof(vm->mmio_region[i])); 849d3916eacSRuslan Bukin return; 850d3916eacSRuslan Bukin } 851d3916eacSRuslan Bukin } 852d3916eacSRuslan Bukin 853d3916eacSRuslan Bukin panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 854d3916eacSRuslan Bukin start + size); 855d3916eacSRuslan Bukin } 856d3916eacSRuslan Bukin 857d3916eacSRuslan Bukin static int 858d3916eacSRuslan Bukin vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 859d3916eacSRuslan Bukin { 860d3916eacSRuslan Bukin struct vm *vm; 861d3916eacSRuslan Bukin struct vm_exit *vme; 862d3916eacSRuslan Bukin struct vie *vie; 863d3916eacSRuslan Bukin struct hyp *hyp; 864d3916eacSRuslan Bukin uint64_t fault_ipa; 865d3916eacSRuslan Bukin struct vm_guest_paging *paging; 866d3916eacSRuslan Bukin struct vmm_mmio_region *vmr; 867d3916eacSRuslan Bukin int error, i; 868d3916eacSRuslan Bukin 869d3916eacSRuslan Bukin vm = vcpu->vm; 870d3916eacSRuslan Bukin hyp = vm->cookie; 871d3916eacSRuslan Bukin if (!hyp->aplic_attached) 872d3916eacSRuslan Bukin goto out_user; 873d3916eacSRuslan Bukin 874d3916eacSRuslan Bukin vme = &vcpu->exitinfo; 875d3916eacSRuslan Bukin vie = &vme->u.inst_emul.vie; 876d3916eacSRuslan Bukin paging = &vme->u.inst_emul.paging; 877d3916eacSRuslan Bukin 878d3916eacSRuslan Bukin fault_ipa = vme->u.inst_emul.gpa; 879d3916eacSRuslan Bukin 880d3916eacSRuslan Bukin vmr = NULL; 881d3916eacSRuslan Bukin for (i = 0; i < nitems(vm->mmio_region); i++) { 882d3916eacSRuslan Bukin if (vm->mmio_region[i].start <= fault_ipa && 883d3916eacSRuslan Bukin vm->mmio_region[i].end > fault_ipa) { 884d3916eacSRuslan Bukin vmr = &vm->mmio_region[i]; 885d3916eacSRuslan Bukin break; 886d3916eacSRuslan Bukin } 887d3916eacSRuslan Bukin } 888d3916eacSRuslan Bukin if (vmr == NULL) 889d3916eacSRuslan Bukin goto out_user; 890d3916eacSRuslan Bukin 891d3916eacSRuslan Bukin error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 892d3916eacSRuslan Bukin vmr->read, vmr->write, retu); 893d3916eacSRuslan Bukin return (error); 894d3916eacSRuslan Bukin 895d3916eacSRuslan Bukin out_user: 896d3916eacSRuslan Bukin *retu = true; 897d3916eacSRuslan Bukin return (0); 898d3916eacSRuslan Bukin } 899d3916eacSRuslan Bukin 900d3916eacSRuslan Bukin int 901d3916eacSRuslan Bukin vm_suspend(struct vm *vm, enum vm_suspend_how how) 902d3916eacSRuslan Bukin { 903d3916eacSRuslan Bukin int i; 904d3916eacSRuslan Bukin 905d3916eacSRuslan Bukin if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 906d3916eacSRuslan Bukin return (EINVAL); 907d3916eacSRuslan Bukin 908d3916eacSRuslan Bukin if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 909d3916eacSRuslan Bukin VM_CTR2(vm, "virtual machine already suspended %d/%d", 910d3916eacSRuslan Bukin vm->suspend, how); 911d3916eacSRuslan Bukin return (EALREADY); 912d3916eacSRuslan Bukin } 913d3916eacSRuslan Bukin 914d3916eacSRuslan Bukin VM_CTR1(vm, "virtual machine successfully suspended %d", how); 915d3916eacSRuslan Bukin 916d3916eacSRuslan Bukin /* 917d3916eacSRuslan Bukin * Notify all active vcpus that they are now suspended. 918d3916eacSRuslan Bukin */ 919d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) { 920d3916eacSRuslan Bukin if (CPU_ISSET(i, &vm->active_cpus)) 921d3916eacSRuslan Bukin vcpu_notify_event(vm_vcpu(vm, i)); 922d3916eacSRuslan Bukin } 923d3916eacSRuslan Bukin 924d3916eacSRuslan Bukin return (0); 925d3916eacSRuslan Bukin } 926d3916eacSRuslan Bukin 927d3916eacSRuslan Bukin void 928d3916eacSRuslan Bukin vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 929d3916eacSRuslan Bukin { 930d3916eacSRuslan Bukin struct vm *vm = vcpu->vm; 931d3916eacSRuslan Bukin struct vm_exit *vmexit; 932d3916eacSRuslan Bukin 933d3916eacSRuslan Bukin KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 934d3916eacSRuslan Bukin ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 935d3916eacSRuslan Bukin 936d3916eacSRuslan Bukin vmexit = vm_exitinfo(vcpu); 937d3916eacSRuslan Bukin vmexit->pc = pc; 938d3916eacSRuslan Bukin vmexit->inst_length = 4; 939d3916eacSRuslan Bukin vmexit->exitcode = VM_EXITCODE_SUSPENDED; 940d3916eacSRuslan Bukin vmexit->u.suspended.how = vm->suspend; 941d3916eacSRuslan Bukin } 942d3916eacSRuslan Bukin 943d3916eacSRuslan Bukin void 944d3916eacSRuslan Bukin vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 945d3916eacSRuslan Bukin { 946d3916eacSRuslan Bukin struct vm_exit *vmexit; 947d3916eacSRuslan Bukin 948d3916eacSRuslan Bukin vmexit = vm_exitinfo(vcpu); 949d3916eacSRuslan Bukin vmexit->pc = pc; 950d3916eacSRuslan Bukin vmexit->inst_length = 4; 951d3916eacSRuslan Bukin vmexit->exitcode = VM_EXITCODE_DEBUG; 952d3916eacSRuslan Bukin } 953d3916eacSRuslan Bukin 954d3916eacSRuslan Bukin int 955d3916eacSRuslan Bukin vm_activate_cpu(struct vcpu *vcpu) 956d3916eacSRuslan Bukin { 957d3916eacSRuslan Bukin struct vm *vm = vcpu->vm; 958d3916eacSRuslan Bukin 959d3916eacSRuslan Bukin if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 960d3916eacSRuslan Bukin return (EBUSY); 961d3916eacSRuslan Bukin 962d3916eacSRuslan Bukin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 963d3916eacSRuslan Bukin return (0); 964d3916eacSRuslan Bukin 965d3916eacSRuslan Bukin } 966d3916eacSRuslan Bukin 967d3916eacSRuslan Bukin int 968d3916eacSRuslan Bukin vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 969d3916eacSRuslan Bukin { 970d3916eacSRuslan Bukin if (vcpu == NULL) { 971d3916eacSRuslan Bukin vm->debug_cpus = vm->active_cpus; 972d3916eacSRuslan Bukin for (int i = 0; i < vm->maxcpus; i++) { 973d3916eacSRuslan Bukin if (CPU_ISSET(i, &vm->active_cpus)) 974d3916eacSRuslan Bukin vcpu_notify_event(vm_vcpu(vm, i)); 975d3916eacSRuslan Bukin } 976d3916eacSRuslan Bukin } else { 977d3916eacSRuslan Bukin if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 978d3916eacSRuslan Bukin return (EINVAL); 979d3916eacSRuslan Bukin 980d3916eacSRuslan Bukin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 981d3916eacSRuslan Bukin vcpu_notify_event(vcpu); 982d3916eacSRuslan Bukin } 983d3916eacSRuslan Bukin return (0); 984d3916eacSRuslan Bukin } 985d3916eacSRuslan Bukin 986d3916eacSRuslan Bukin int 987d3916eacSRuslan Bukin vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 988d3916eacSRuslan Bukin { 989d3916eacSRuslan Bukin 990d3916eacSRuslan Bukin if (vcpu == NULL) { 991d3916eacSRuslan Bukin CPU_ZERO(&vm->debug_cpus); 992d3916eacSRuslan Bukin } else { 993d3916eacSRuslan Bukin if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 994d3916eacSRuslan Bukin return (EINVAL); 995d3916eacSRuslan Bukin 996d3916eacSRuslan Bukin CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 997d3916eacSRuslan Bukin } 998d3916eacSRuslan Bukin return (0); 999d3916eacSRuslan Bukin } 1000d3916eacSRuslan Bukin 1001d3916eacSRuslan Bukin int 1002d3916eacSRuslan Bukin vcpu_debugged(struct vcpu *vcpu) 1003d3916eacSRuslan Bukin { 1004d3916eacSRuslan Bukin 1005d3916eacSRuslan Bukin return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1006d3916eacSRuslan Bukin } 1007d3916eacSRuslan Bukin 1008d3916eacSRuslan Bukin cpuset_t 1009d3916eacSRuslan Bukin vm_active_cpus(struct vm *vm) 1010d3916eacSRuslan Bukin { 1011d3916eacSRuslan Bukin 1012d3916eacSRuslan Bukin return (vm->active_cpus); 1013d3916eacSRuslan Bukin } 1014d3916eacSRuslan Bukin 1015d3916eacSRuslan Bukin cpuset_t 1016d3916eacSRuslan Bukin vm_debug_cpus(struct vm *vm) 1017d3916eacSRuslan Bukin { 1018d3916eacSRuslan Bukin 1019d3916eacSRuslan Bukin return (vm->debug_cpus); 1020d3916eacSRuslan Bukin } 1021d3916eacSRuslan Bukin 1022d3916eacSRuslan Bukin cpuset_t 1023d3916eacSRuslan Bukin vm_suspended_cpus(struct vm *vm) 1024d3916eacSRuslan Bukin { 1025d3916eacSRuslan Bukin 1026d3916eacSRuslan Bukin return (vm->suspended_cpus); 1027d3916eacSRuslan Bukin } 1028d3916eacSRuslan Bukin 1029d3916eacSRuslan Bukin 1030d3916eacSRuslan Bukin void * 1031d3916eacSRuslan Bukin vcpu_stats(struct vcpu *vcpu) 1032d3916eacSRuslan Bukin { 1033d3916eacSRuslan Bukin 1034d3916eacSRuslan Bukin return (vcpu->stats); 1035d3916eacSRuslan Bukin } 1036d3916eacSRuslan Bukin 1037d3916eacSRuslan Bukin /* 1038d3916eacSRuslan Bukin * This function is called to ensure that a vcpu "sees" a pending event 1039d3916eacSRuslan Bukin * as soon as possible: 1040d3916eacSRuslan Bukin * - If the vcpu thread is sleeping then it is woken up. 1041d3916eacSRuslan Bukin * - If the vcpu is running on a different host_cpu then an IPI will be directed 1042d3916eacSRuslan Bukin * to the host_cpu to cause the vcpu to trap into the hypervisor. 1043d3916eacSRuslan Bukin */ 1044d3916eacSRuslan Bukin static void 1045d3916eacSRuslan Bukin vcpu_notify_event_locked(struct vcpu *vcpu) 1046d3916eacSRuslan Bukin { 1047d3916eacSRuslan Bukin int hostcpu; 1048d3916eacSRuslan Bukin 1049d3916eacSRuslan Bukin hostcpu = vcpu->hostcpu; 1050d3916eacSRuslan Bukin if (vcpu->state == VCPU_RUNNING) { 1051d3916eacSRuslan Bukin KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1052d3916eacSRuslan Bukin if (hostcpu != curcpu) { 1053d3916eacSRuslan Bukin ipi_cpu(hostcpu, vmm_ipinum); 1054d3916eacSRuslan Bukin } else { 1055d3916eacSRuslan Bukin /* 1056d3916eacSRuslan Bukin * If the 'vcpu' is running on 'curcpu' then it must 1057d3916eacSRuslan Bukin * be sending a notification to itself (e.g. SELF_IPI). 1058d3916eacSRuslan Bukin * The pending event will be picked up when the vcpu 1059d3916eacSRuslan Bukin * transitions back to guest context. 1060d3916eacSRuslan Bukin */ 1061d3916eacSRuslan Bukin } 1062d3916eacSRuslan Bukin } else { 1063d3916eacSRuslan Bukin KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1064d3916eacSRuslan Bukin "with hostcpu %d", vcpu->state, hostcpu)); 1065d3916eacSRuslan Bukin if (vcpu->state == VCPU_SLEEPING) 1066d3916eacSRuslan Bukin wakeup_one(vcpu); 1067d3916eacSRuslan Bukin } 1068d3916eacSRuslan Bukin } 1069d3916eacSRuslan Bukin 1070d3916eacSRuslan Bukin void 1071d3916eacSRuslan Bukin vcpu_notify_event(struct vcpu *vcpu) 1072d3916eacSRuslan Bukin { 1073d3916eacSRuslan Bukin vcpu_lock(vcpu); 1074d3916eacSRuslan Bukin vcpu_notify_event_locked(vcpu); 1075d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1076d3916eacSRuslan Bukin } 1077d3916eacSRuslan Bukin 1078d3916eacSRuslan Bukin static void 1079d3916eacSRuslan Bukin restore_guest_fpustate(struct vcpu *vcpu) 1080d3916eacSRuslan Bukin { 1081d3916eacSRuslan Bukin 1082d3916eacSRuslan Bukin /* Flush host state to the pcb. */ 1083d3916eacSRuslan Bukin fpe_state_save(curthread); 1084d3916eacSRuslan Bukin 1085d3916eacSRuslan Bukin /* Ensure the VFP state will be re-loaded when exiting the guest. */ 1086d3916eacSRuslan Bukin PCPU_SET(fpcurthread, NULL); 1087d3916eacSRuslan Bukin 1088d3916eacSRuslan Bukin /* restore guest FPU state */ 1089d3916eacSRuslan Bukin fpe_enable(); 1090d3916eacSRuslan Bukin fpe_restore(vcpu->guestfpu); 1091d3916eacSRuslan Bukin 1092d3916eacSRuslan Bukin /* 1093d3916eacSRuslan Bukin * The FPU is now "dirty" with the guest's state so turn on emulation 1094d3916eacSRuslan Bukin * to trap any access to the FPU by the host. 1095d3916eacSRuslan Bukin */ 1096d3916eacSRuslan Bukin fpe_disable(); 1097d3916eacSRuslan Bukin } 1098d3916eacSRuslan Bukin 1099d3916eacSRuslan Bukin static void 1100d3916eacSRuslan Bukin save_guest_fpustate(struct vcpu *vcpu) 1101d3916eacSRuslan Bukin { 1102d3916eacSRuslan Bukin 1103d3916eacSRuslan Bukin /* Save guest FPE state. */ 1104d3916eacSRuslan Bukin fpe_enable(); 1105d3916eacSRuslan Bukin fpe_store(vcpu->guestfpu); 1106d3916eacSRuslan Bukin fpe_disable(); 1107d3916eacSRuslan Bukin 1108d3916eacSRuslan Bukin KASSERT(PCPU_GET(fpcurthread) == NULL, 1109d3916eacSRuslan Bukin ("%s: fpcurthread set with guest registers", __func__)); 1110d3916eacSRuslan Bukin } 1111d3916eacSRuslan Bukin 1112d3916eacSRuslan Bukin static int 1113d3916eacSRuslan Bukin vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1114d3916eacSRuslan Bukin bool from_idle) 1115d3916eacSRuslan Bukin { 1116d3916eacSRuslan Bukin int error; 1117d3916eacSRuslan Bukin 1118d3916eacSRuslan Bukin vcpu_assert_locked(vcpu); 1119d3916eacSRuslan Bukin 1120d3916eacSRuslan Bukin /* 1121d3916eacSRuslan Bukin * State transitions from the vmmdev_ioctl() must always begin from 1122d3916eacSRuslan Bukin * the VCPU_IDLE state. This guarantees that there is only a single 1123d3916eacSRuslan Bukin * ioctl() operating on a vcpu at any point. 1124d3916eacSRuslan Bukin */ 1125d3916eacSRuslan Bukin if (from_idle) { 1126d3916eacSRuslan Bukin while (vcpu->state != VCPU_IDLE) { 1127d3916eacSRuslan Bukin vcpu_notify_event_locked(vcpu); 1128d3916eacSRuslan Bukin msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", 1129d3916eacSRuslan Bukin hz / 1000); 1130d3916eacSRuslan Bukin } 1131d3916eacSRuslan Bukin } else { 1132d3916eacSRuslan Bukin KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1133d3916eacSRuslan Bukin "vcpu idle state")); 1134d3916eacSRuslan Bukin } 1135d3916eacSRuslan Bukin 1136d3916eacSRuslan Bukin if (vcpu->state == VCPU_RUNNING) { 1137d3916eacSRuslan Bukin KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1138d3916eacSRuslan Bukin "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1139d3916eacSRuslan Bukin } else { 1140d3916eacSRuslan Bukin KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1141d3916eacSRuslan Bukin "vcpu that is not running", vcpu->hostcpu)); 1142d3916eacSRuslan Bukin } 1143d3916eacSRuslan Bukin 1144d3916eacSRuslan Bukin /* 1145d3916eacSRuslan Bukin * The following state transitions are allowed: 1146d3916eacSRuslan Bukin * IDLE -> FROZEN -> IDLE 1147d3916eacSRuslan Bukin * FROZEN -> RUNNING -> FROZEN 1148d3916eacSRuslan Bukin * FROZEN -> SLEEPING -> FROZEN 1149d3916eacSRuslan Bukin */ 1150d3916eacSRuslan Bukin switch (vcpu->state) { 1151d3916eacSRuslan Bukin case VCPU_IDLE: 1152d3916eacSRuslan Bukin case VCPU_RUNNING: 1153d3916eacSRuslan Bukin case VCPU_SLEEPING: 1154d3916eacSRuslan Bukin error = (newstate != VCPU_FROZEN); 1155d3916eacSRuslan Bukin break; 1156d3916eacSRuslan Bukin case VCPU_FROZEN: 1157d3916eacSRuslan Bukin error = (newstate == VCPU_FROZEN); 1158d3916eacSRuslan Bukin break; 1159d3916eacSRuslan Bukin default: 1160d3916eacSRuslan Bukin error = 1; 1161d3916eacSRuslan Bukin break; 1162d3916eacSRuslan Bukin } 1163d3916eacSRuslan Bukin 1164d3916eacSRuslan Bukin if (error) 1165d3916eacSRuslan Bukin return (EBUSY); 1166d3916eacSRuslan Bukin 1167d3916eacSRuslan Bukin vcpu->state = newstate; 1168d3916eacSRuslan Bukin if (newstate == VCPU_RUNNING) 1169d3916eacSRuslan Bukin vcpu->hostcpu = curcpu; 1170d3916eacSRuslan Bukin else 1171d3916eacSRuslan Bukin vcpu->hostcpu = NOCPU; 1172d3916eacSRuslan Bukin 1173d3916eacSRuslan Bukin if (newstate == VCPU_IDLE) 1174d3916eacSRuslan Bukin wakeup(&vcpu->state); 1175d3916eacSRuslan Bukin 1176d3916eacSRuslan Bukin return (0); 1177d3916eacSRuslan Bukin } 1178d3916eacSRuslan Bukin 1179d3916eacSRuslan Bukin static void 1180d3916eacSRuslan Bukin vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1181d3916eacSRuslan Bukin { 1182d3916eacSRuslan Bukin int error; 1183d3916eacSRuslan Bukin 1184d3916eacSRuslan Bukin if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1185d3916eacSRuslan Bukin panic("Error %d setting state to %d\n", error, newstate); 1186d3916eacSRuslan Bukin } 1187d3916eacSRuslan Bukin 1188d3916eacSRuslan Bukin static void 1189d3916eacSRuslan Bukin vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1190d3916eacSRuslan Bukin { 1191d3916eacSRuslan Bukin int error; 1192d3916eacSRuslan Bukin 1193d3916eacSRuslan Bukin if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1194d3916eacSRuslan Bukin panic("Error %d setting state to %d", error, newstate); 1195d3916eacSRuslan Bukin } 1196d3916eacSRuslan Bukin 1197d3916eacSRuslan Bukin int 1198d3916eacSRuslan Bukin vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1199d3916eacSRuslan Bukin { 1200d3916eacSRuslan Bukin 1201d3916eacSRuslan Bukin if (type < 0 || type >= VM_CAP_MAX) 1202d3916eacSRuslan Bukin return (EINVAL); 1203d3916eacSRuslan Bukin 1204d3916eacSRuslan Bukin return (vmmops_getcap(vcpu->cookie, type, retval)); 1205d3916eacSRuslan Bukin } 1206d3916eacSRuslan Bukin 1207d3916eacSRuslan Bukin int 1208d3916eacSRuslan Bukin vm_set_capability(struct vcpu *vcpu, int type, int val) 1209d3916eacSRuslan Bukin { 1210d3916eacSRuslan Bukin 1211d3916eacSRuslan Bukin if (type < 0 || type >= VM_CAP_MAX) 1212d3916eacSRuslan Bukin return (EINVAL); 1213d3916eacSRuslan Bukin 1214d3916eacSRuslan Bukin return (vmmops_setcap(vcpu->cookie, type, val)); 1215d3916eacSRuslan Bukin } 1216d3916eacSRuslan Bukin 1217d3916eacSRuslan Bukin struct vm * 1218d3916eacSRuslan Bukin vcpu_vm(struct vcpu *vcpu) 1219d3916eacSRuslan Bukin { 1220d3916eacSRuslan Bukin 1221d3916eacSRuslan Bukin return (vcpu->vm); 1222d3916eacSRuslan Bukin } 1223d3916eacSRuslan Bukin 1224d3916eacSRuslan Bukin int 1225d3916eacSRuslan Bukin vcpu_vcpuid(struct vcpu *vcpu) 1226d3916eacSRuslan Bukin { 1227d3916eacSRuslan Bukin 1228d3916eacSRuslan Bukin return (vcpu->vcpuid); 1229d3916eacSRuslan Bukin } 1230d3916eacSRuslan Bukin 1231d3916eacSRuslan Bukin void * 1232d3916eacSRuslan Bukin vcpu_get_cookie(struct vcpu *vcpu) 1233d3916eacSRuslan Bukin { 1234d3916eacSRuslan Bukin 1235d3916eacSRuslan Bukin return (vcpu->cookie); 1236d3916eacSRuslan Bukin } 1237d3916eacSRuslan Bukin 1238d3916eacSRuslan Bukin struct vcpu * 1239d3916eacSRuslan Bukin vm_vcpu(struct vm *vm, int vcpuid) 1240d3916eacSRuslan Bukin { 1241d3916eacSRuslan Bukin 1242d3916eacSRuslan Bukin return (vm->vcpu[vcpuid]); 1243d3916eacSRuslan Bukin } 1244d3916eacSRuslan Bukin 1245d3916eacSRuslan Bukin int 1246d3916eacSRuslan Bukin vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1247d3916eacSRuslan Bukin { 1248d3916eacSRuslan Bukin int error; 1249d3916eacSRuslan Bukin 1250d3916eacSRuslan Bukin vcpu_lock(vcpu); 1251d3916eacSRuslan Bukin error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1252d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1253d3916eacSRuslan Bukin 1254d3916eacSRuslan Bukin return (error); 1255d3916eacSRuslan Bukin } 1256d3916eacSRuslan Bukin 1257d3916eacSRuslan Bukin enum vcpu_state 1258d3916eacSRuslan Bukin vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1259d3916eacSRuslan Bukin { 1260d3916eacSRuslan Bukin enum vcpu_state state; 1261d3916eacSRuslan Bukin 1262d3916eacSRuslan Bukin vcpu_lock(vcpu); 1263d3916eacSRuslan Bukin state = vcpu->state; 1264d3916eacSRuslan Bukin if (hostcpu != NULL) 1265d3916eacSRuslan Bukin *hostcpu = vcpu->hostcpu; 1266d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1267d3916eacSRuslan Bukin 1268d3916eacSRuslan Bukin return (state); 1269d3916eacSRuslan Bukin } 1270d3916eacSRuslan Bukin 1271d3916eacSRuslan Bukin static void * 1272d3916eacSRuslan Bukin _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1273d3916eacSRuslan Bukin void **cookie) 1274d3916eacSRuslan Bukin { 1275d3916eacSRuslan Bukin int i, count, pageoff; 1276d3916eacSRuslan Bukin struct mem_map *mm; 1277d3916eacSRuslan Bukin vm_page_t m; 1278d3916eacSRuslan Bukin 1279d3916eacSRuslan Bukin pageoff = gpa & PAGE_MASK; 1280d3916eacSRuslan Bukin if (len > PAGE_SIZE - pageoff) 1281d3916eacSRuslan Bukin panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1282d3916eacSRuslan Bukin 1283d3916eacSRuslan Bukin count = 0; 1284d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1285d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 1286d3916eacSRuslan Bukin if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1287d3916eacSRuslan Bukin gpa < mm->gpa + mm->len) { 1288d3916eacSRuslan Bukin count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1289d3916eacSRuslan Bukin trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1290d3916eacSRuslan Bukin break; 1291d3916eacSRuslan Bukin } 1292d3916eacSRuslan Bukin } 1293d3916eacSRuslan Bukin 1294d3916eacSRuslan Bukin if (count == 1) { 1295d3916eacSRuslan Bukin *cookie = m; 1296d3916eacSRuslan Bukin return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1297d3916eacSRuslan Bukin } else { 1298d3916eacSRuslan Bukin *cookie = NULL; 1299d3916eacSRuslan Bukin return (NULL); 1300d3916eacSRuslan Bukin } 1301d3916eacSRuslan Bukin } 1302d3916eacSRuslan Bukin 1303d3916eacSRuslan Bukin void * 1304d3916eacSRuslan Bukin vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1305d3916eacSRuslan Bukin void **cookie) 1306d3916eacSRuslan Bukin { 1307d3916eacSRuslan Bukin #ifdef INVARIANTS 1308d3916eacSRuslan Bukin /* 1309d3916eacSRuslan Bukin * The current vcpu should be frozen to ensure 'vm_memmap[]' 1310d3916eacSRuslan Bukin * stability. 1311d3916eacSRuslan Bukin */ 1312d3916eacSRuslan Bukin int state = vcpu_get_state(vcpu, NULL); 1313d3916eacSRuslan Bukin KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1314d3916eacSRuslan Bukin __func__, state)); 1315d3916eacSRuslan Bukin #endif 1316d3916eacSRuslan Bukin return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1317d3916eacSRuslan Bukin } 1318d3916eacSRuslan Bukin 1319d3916eacSRuslan Bukin void * 1320d3916eacSRuslan Bukin vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1321d3916eacSRuslan Bukin void **cookie) 1322d3916eacSRuslan Bukin { 1323d3916eacSRuslan Bukin sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1324d3916eacSRuslan Bukin return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1325d3916eacSRuslan Bukin } 1326d3916eacSRuslan Bukin 1327d3916eacSRuslan Bukin void 1328d3916eacSRuslan Bukin vm_gpa_release(void *cookie) 1329d3916eacSRuslan Bukin { 1330d3916eacSRuslan Bukin vm_page_t m = cookie; 1331d3916eacSRuslan Bukin 1332d3916eacSRuslan Bukin vm_page_unwire(m, PQ_ACTIVE); 1333d3916eacSRuslan Bukin } 1334d3916eacSRuslan Bukin 1335d3916eacSRuslan Bukin int 1336d3916eacSRuslan Bukin vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1337d3916eacSRuslan Bukin { 1338d3916eacSRuslan Bukin 1339d3916eacSRuslan Bukin if (reg >= VM_REG_LAST) 1340d3916eacSRuslan Bukin return (EINVAL); 1341d3916eacSRuslan Bukin 1342d3916eacSRuslan Bukin return (vmmops_getreg(vcpu->cookie, reg, retval)); 1343d3916eacSRuslan Bukin } 1344d3916eacSRuslan Bukin 1345d3916eacSRuslan Bukin int 1346d3916eacSRuslan Bukin vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1347d3916eacSRuslan Bukin { 1348d3916eacSRuslan Bukin int error; 1349d3916eacSRuslan Bukin 1350d3916eacSRuslan Bukin if (reg >= VM_REG_LAST) 1351d3916eacSRuslan Bukin return (EINVAL); 1352d3916eacSRuslan Bukin error = vmmops_setreg(vcpu->cookie, reg, val); 1353d3916eacSRuslan Bukin if (error || reg != VM_REG_GUEST_SEPC) 1354d3916eacSRuslan Bukin return (error); 1355d3916eacSRuslan Bukin 1356d3916eacSRuslan Bukin vcpu->nextpc = val; 1357d3916eacSRuslan Bukin 1358d3916eacSRuslan Bukin return (0); 1359d3916eacSRuslan Bukin } 1360d3916eacSRuslan Bukin 1361d3916eacSRuslan Bukin void * 1362d3916eacSRuslan Bukin vm_get_cookie(struct vm *vm) 1363d3916eacSRuslan Bukin { 1364d3916eacSRuslan Bukin 1365d3916eacSRuslan Bukin return (vm->cookie); 1366d3916eacSRuslan Bukin } 1367d3916eacSRuslan Bukin 1368d3916eacSRuslan Bukin int 1369d3916eacSRuslan Bukin vm_inject_exception(struct vcpu *vcpu, uint64_t scause) 1370d3916eacSRuslan Bukin { 1371d3916eacSRuslan Bukin 1372d3916eacSRuslan Bukin return (vmmops_exception(vcpu->cookie, scause)); 1373d3916eacSRuslan Bukin } 1374d3916eacSRuslan Bukin 1375d3916eacSRuslan Bukin int 1376d3916eacSRuslan Bukin vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) 1377d3916eacSRuslan Bukin { 1378d3916eacSRuslan Bukin 1379d3916eacSRuslan Bukin return (aplic_attach_to_vm(vm->cookie, descr)); 1380d3916eacSRuslan Bukin } 1381d3916eacSRuslan Bukin 1382d3916eacSRuslan Bukin int 1383d3916eacSRuslan Bukin vm_assert_irq(struct vm *vm, uint32_t irq) 1384d3916eacSRuslan Bukin { 1385d3916eacSRuslan Bukin 1386d3916eacSRuslan Bukin return (aplic_inject_irq(vm->cookie, -1, irq, true)); 1387d3916eacSRuslan Bukin } 1388d3916eacSRuslan Bukin 1389d3916eacSRuslan Bukin int 1390d3916eacSRuslan Bukin vm_deassert_irq(struct vm *vm, uint32_t irq) 1391d3916eacSRuslan Bukin { 1392d3916eacSRuslan Bukin 1393d3916eacSRuslan Bukin return (aplic_inject_irq(vm->cookie, -1, irq, false)); 1394d3916eacSRuslan Bukin } 1395d3916eacSRuslan Bukin 1396d3916eacSRuslan Bukin int 1397d3916eacSRuslan Bukin vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1398d3916eacSRuslan Bukin int func) 1399d3916eacSRuslan Bukin { 1400d3916eacSRuslan Bukin 1401d3916eacSRuslan Bukin return (aplic_inject_msi(vm->cookie, msg, addr)); 1402d3916eacSRuslan Bukin } 1403d3916eacSRuslan Bukin 1404d3916eacSRuslan Bukin static int 1405d3916eacSRuslan Bukin vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1406d3916eacSRuslan Bukin { 1407d3916eacSRuslan Bukin 1408d3916eacSRuslan Bukin vcpu_lock(vcpu); 1409d3916eacSRuslan Bukin 1410d3916eacSRuslan Bukin while (1) { 1411d3916eacSRuslan Bukin if (aplic_check_pending(vcpu->cookie)) 1412d3916eacSRuslan Bukin break; 1413d3916eacSRuslan Bukin 1414d3916eacSRuslan Bukin if (riscv_check_ipi(vcpu->cookie, false)) 1415d3916eacSRuslan Bukin break; 1416d3916eacSRuslan Bukin 1417*9be0058eSRuslan Bukin if (riscv_check_interrupts_pending(vcpu->cookie)) 1418*9be0058eSRuslan Bukin break; 1419*9be0058eSRuslan Bukin 1420d3916eacSRuslan Bukin if (vcpu_should_yield(vcpu)) 1421d3916eacSRuslan Bukin break; 1422d3916eacSRuslan Bukin 1423d3916eacSRuslan Bukin vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1424d3916eacSRuslan Bukin /* 1425d3916eacSRuslan Bukin * XXX msleep_spin() cannot be interrupted by signals so 1426d3916eacSRuslan Bukin * wake up periodically to check pending signals. 1427d3916eacSRuslan Bukin */ 1428d3916eacSRuslan Bukin msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000); 1429d3916eacSRuslan Bukin vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1430d3916eacSRuslan Bukin } 1431d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1432d3916eacSRuslan Bukin 1433d3916eacSRuslan Bukin *retu = false; 1434d3916eacSRuslan Bukin 1435d3916eacSRuslan Bukin return (0); 1436d3916eacSRuslan Bukin } 1437d3916eacSRuslan Bukin 1438d3916eacSRuslan Bukin static int 1439d3916eacSRuslan Bukin vm_handle_paging(struct vcpu *vcpu, bool *retu) 1440d3916eacSRuslan Bukin { 1441d3916eacSRuslan Bukin struct vm *vm; 1442d3916eacSRuslan Bukin struct vm_exit *vme; 1443d3916eacSRuslan Bukin struct vm_map *map; 1444d3916eacSRuslan Bukin uint64_t addr; 1445d3916eacSRuslan Bukin pmap_t pmap; 1446d3916eacSRuslan Bukin int ftype, rv; 1447d3916eacSRuslan Bukin 1448d3916eacSRuslan Bukin vm = vcpu->vm; 1449d3916eacSRuslan Bukin vme = &vcpu->exitinfo; 1450d3916eacSRuslan Bukin 1451d3916eacSRuslan Bukin pmap = vmspace_pmap(vm->vmspace); 1452d3916eacSRuslan Bukin addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); 1453d3916eacSRuslan Bukin 1454d3916eacSRuslan Bukin dprintf("%s: %lx\n", __func__, addr); 1455d3916eacSRuslan Bukin 1456d3916eacSRuslan Bukin switch (vme->scause) { 1457d3916eacSRuslan Bukin case SCAUSE_STORE_GUEST_PAGE_FAULT: 1458d3916eacSRuslan Bukin ftype = VM_PROT_WRITE; 1459d3916eacSRuslan Bukin break; 1460d3916eacSRuslan Bukin case SCAUSE_FETCH_GUEST_PAGE_FAULT: 1461d3916eacSRuslan Bukin ftype = VM_PROT_EXECUTE; 1462d3916eacSRuslan Bukin break; 1463d3916eacSRuslan Bukin case SCAUSE_LOAD_GUEST_PAGE_FAULT: 1464d3916eacSRuslan Bukin ftype = VM_PROT_READ; 1465d3916eacSRuslan Bukin break; 1466d3916eacSRuslan Bukin default: 1467d3916eacSRuslan Bukin panic("unknown page trap: %lu", vme->scause); 1468d3916eacSRuslan Bukin } 1469d3916eacSRuslan Bukin 1470d3916eacSRuslan Bukin /* The page exists, but the page table needs to be updated. */ 1471d3916eacSRuslan Bukin if (pmap_fault(pmap, addr, ftype)) 1472d3916eacSRuslan Bukin return (0); 1473d3916eacSRuslan Bukin 1474d3916eacSRuslan Bukin map = &vm->vmspace->vm_map; 1475d3916eacSRuslan Bukin rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); 1476d3916eacSRuslan Bukin if (rv != KERN_SUCCESS) { 1477d3916eacSRuslan Bukin printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", 1478d3916eacSRuslan Bukin __func__, addr, ftype, rv); 1479d3916eacSRuslan Bukin return (EFAULT); 1480d3916eacSRuslan Bukin } 1481d3916eacSRuslan Bukin 1482d3916eacSRuslan Bukin return (0); 1483d3916eacSRuslan Bukin } 1484d3916eacSRuslan Bukin 1485d3916eacSRuslan Bukin static int 1486d3916eacSRuslan Bukin vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1487d3916eacSRuslan Bukin { 1488d3916eacSRuslan Bukin struct vm *vm = vcpu->vm; 1489d3916eacSRuslan Bukin int error, i; 1490d3916eacSRuslan Bukin struct thread *td; 1491d3916eacSRuslan Bukin 1492d3916eacSRuslan Bukin error = 0; 1493d3916eacSRuslan Bukin td = curthread; 1494d3916eacSRuslan Bukin 1495d3916eacSRuslan Bukin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1496d3916eacSRuslan Bukin 1497d3916eacSRuslan Bukin /* 1498d3916eacSRuslan Bukin * Wait until all 'active_cpus' have suspended themselves. 1499d3916eacSRuslan Bukin * 1500d3916eacSRuslan Bukin * Since a VM may be suspended at any time including when one or 1501d3916eacSRuslan Bukin * more vcpus are doing a rendezvous we need to call the rendezvous 1502d3916eacSRuslan Bukin * handler while we are waiting to prevent a deadlock. 1503d3916eacSRuslan Bukin */ 1504d3916eacSRuslan Bukin vcpu_lock(vcpu); 1505d3916eacSRuslan Bukin while (error == 0) { 1506d3916eacSRuslan Bukin if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1507d3916eacSRuslan Bukin break; 1508d3916eacSRuslan Bukin 1509d3916eacSRuslan Bukin vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1510d3916eacSRuslan Bukin msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1511d3916eacSRuslan Bukin vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1512d3916eacSRuslan Bukin if (td_ast_pending(td, TDA_SUSPEND)) { 1513d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1514d3916eacSRuslan Bukin error = thread_check_susp(td, false); 1515d3916eacSRuslan Bukin vcpu_lock(vcpu); 1516d3916eacSRuslan Bukin } 1517d3916eacSRuslan Bukin } 1518d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1519d3916eacSRuslan Bukin 1520d3916eacSRuslan Bukin /* 1521d3916eacSRuslan Bukin * Wakeup the other sleeping vcpus and return to userspace. 1522d3916eacSRuslan Bukin */ 1523d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) { 1524d3916eacSRuslan Bukin if (CPU_ISSET(i, &vm->suspended_cpus)) { 1525d3916eacSRuslan Bukin vcpu_notify_event(vm_vcpu(vm, i)); 1526d3916eacSRuslan Bukin } 1527d3916eacSRuslan Bukin } 1528d3916eacSRuslan Bukin 1529d3916eacSRuslan Bukin *retu = true; 1530d3916eacSRuslan Bukin return (error); 1531d3916eacSRuslan Bukin } 1532d3916eacSRuslan Bukin 1533d3916eacSRuslan Bukin int 1534d3916eacSRuslan Bukin vm_run(struct vcpu *vcpu) 1535d3916eacSRuslan Bukin { 1536d3916eacSRuslan Bukin struct vm_eventinfo evinfo; 1537d3916eacSRuslan Bukin struct vm_exit *vme; 1538d3916eacSRuslan Bukin struct vm *vm; 1539d3916eacSRuslan Bukin pmap_t pmap; 1540d3916eacSRuslan Bukin int error; 1541d3916eacSRuslan Bukin int vcpuid; 1542d3916eacSRuslan Bukin bool retu; 1543d3916eacSRuslan Bukin 1544d3916eacSRuslan Bukin vm = vcpu->vm; 1545d3916eacSRuslan Bukin 1546d3916eacSRuslan Bukin dprintf("%s\n", __func__); 1547d3916eacSRuslan Bukin 1548d3916eacSRuslan Bukin vcpuid = vcpu->vcpuid; 1549d3916eacSRuslan Bukin 1550d3916eacSRuslan Bukin if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1551d3916eacSRuslan Bukin return (EINVAL); 1552d3916eacSRuslan Bukin 1553d3916eacSRuslan Bukin if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1554d3916eacSRuslan Bukin return (EINVAL); 1555d3916eacSRuslan Bukin 1556d3916eacSRuslan Bukin pmap = vmspace_pmap(vm->vmspace); 1557d3916eacSRuslan Bukin vme = &vcpu->exitinfo; 1558d3916eacSRuslan Bukin evinfo.rptr = NULL; 1559d3916eacSRuslan Bukin evinfo.sptr = &vm->suspend; 1560d3916eacSRuslan Bukin evinfo.iptr = NULL; 1561d3916eacSRuslan Bukin restart: 1562d3916eacSRuslan Bukin critical_enter(); 1563d3916eacSRuslan Bukin 1564d3916eacSRuslan Bukin restore_guest_fpustate(vcpu); 1565d3916eacSRuslan Bukin 1566d3916eacSRuslan Bukin vcpu_require_state(vcpu, VCPU_RUNNING); 1567d3916eacSRuslan Bukin error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1568d3916eacSRuslan Bukin vcpu_require_state(vcpu, VCPU_FROZEN); 1569d3916eacSRuslan Bukin 1570d3916eacSRuslan Bukin save_guest_fpustate(vcpu); 1571d3916eacSRuslan Bukin 1572d3916eacSRuslan Bukin critical_exit(); 1573d3916eacSRuslan Bukin 1574d3916eacSRuslan Bukin if (error == 0) { 1575d3916eacSRuslan Bukin retu = false; 1576d3916eacSRuslan Bukin switch (vme->exitcode) { 1577d3916eacSRuslan Bukin case VM_EXITCODE_INST_EMUL: 1578d3916eacSRuslan Bukin vcpu->nextpc = vme->pc + vme->inst_length; 1579d3916eacSRuslan Bukin error = vm_handle_inst_emul(vcpu, &retu); 1580d3916eacSRuslan Bukin break; 1581d3916eacSRuslan Bukin case VM_EXITCODE_WFI: 1582d3916eacSRuslan Bukin vcpu->nextpc = vme->pc + vme->inst_length; 1583d3916eacSRuslan Bukin error = vm_handle_wfi(vcpu, vme, &retu); 1584d3916eacSRuslan Bukin break; 1585d3916eacSRuslan Bukin case VM_EXITCODE_ECALL: 1586d3916eacSRuslan Bukin /* Handle in userland. */ 1587d3916eacSRuslan Bukin vcpu->nextpc = vme->pc + vme->inst_length; 1588d3916eacSRuslan Bukin retu = true; 1589d3916eacSRuslan Bukin break; 1590d3916eacSRuslan Bukin case VM_EXITCODE_PAGING: 1591d3916eacSRuslan Bukin vcpu->nextpc = vme->pc; 1592d3916eacSRuslan Bukin error = vm_handle_paging(vcpu, &retu); 1593d3916eacSRuslan Bukin break; 1594d3916eacSRuslan Bukin case VM_EXITCODE_BOGUS: 1595d3916eacSRuslan Bukin vcpu->nextpc = vme->pc; 1596d3916eacSRuslan Bukin retu = false; 1597d3916eacSRuslan Bukin error = 0; 1598d3916eacSRuslan Bukin break; 1599d3916eacSRuslan Bukin case VM_EXITCODE_SUSPENDED: 1600d3916eacSRuslan Bukin vcpu->nextpc = vme->pc; 1601d3916eacSRuslan Bukin error = vm_handle_suspend(vcpu, &retu); 1602d3916eacSRuslan Bukin break; 1603d3916eacSRuslan Bukin default: 1604d3916eacSRuslan Bukin /* Handle in userland. */ 1605d3916eacSRuslan Bukin vcpu->nextpc = vme->pc; 1606d3916eacSRuslan Bukin retu = true; 1607d3916eacSRuslan Bukin break; 1608d3916eacSRuslan Bukin } 1609d3916eacSRuslan Bukin } 1610d3916eacSRuslan Bukin 1611d3916eacSRuslan Bukin if (error == 0 && retu == false) 1612d3916eacSRuslan Bukin goto restart; 1613d3916eacSRuslan Bukin 1614d3916eacSRuslan Bukin return (error); 1615d3916eacSRuslan Bukin } 1616