1d3916eacSRuslan Bukin /*- 2d3916eacSRuslan Bukin * SPDX-License-Identifier: BSD-2-Clause 3d3916eacSRuslan Bukin * 4d3916eacSRuslan Bukin * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5d3916eacSRuslan Bukin * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com> 6d3916eacSRuslan Bukin * 7d3916eacSRuslan Bukin * This software was developed by the University of Cambridge Computer 8d3916eacSRuslan Bukin * Laboratory (Department of Computer Science and Technology) under Innovate 9d3916eacSRuslan Bukin * UK project 105694, "Digital Security by Design (DSbD) Technology Platform 10d3916eacSRuslan Bukin * Prototype". 11d3916eacSRuslan Bukin * 12d3916eacSRuslan Bukin * Redistribution and use in source and binary forms, with or without 13d3916eacSRuslan Bukin * modification, are permitted provided that the following conditions 14d3916eacSRuslan Bukin * are met: 15d3916eacSRuslan Bukin * 1. Redistributions of source code must retain the above copyright 16d3916eacSRuslan Bukin * notice, this list of conditions and the following disclaimer. 17d3916eacSRuslan Bukin * 2. Redistributions in binary form must reproduce the above copyright 18d3916eacSRuslan Bukin * notice, this list of conditions and the following disclaimer in the 19d3916eacSRuslan Bukin * documentation and/or other materials provided with the distribution. 20d3916eacSRuslan Bukin * 21d3916eacSRuslan Bukin * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22d3916eacSRuslan Bukin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23d3916eacSRuslan Bukin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24d3916eacSRuslan Bukin * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 25d3916eacSRuslan Bukin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26d3916eacSRuslan Bukin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27d3916eacSRuslan Bukin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28d3916eacSRuslan Bukin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29d3916eacSRuslan Bukin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30d3916eacSRuslan Bukin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31d3916eacSRuslan Bukin * SUCH DAMAGE. 32d3916eacSRuslan Bukin */ 33d3916eacSRuslan Bukin 34d3916eacSRuslan Bukin #include <sys/param.h> 35d3916eacSRuslan Bukin #include <sys/systm.h> 36d3916eacSRuslan Bukin #include <sys/cpuset.h> 37d3916eacSRuslan Bukin #include <sys/kernel.h> 38d3916eacSRuslan Bukin #include <sys/linker.h> 39d3916eacSRuslan Bukin #include <sys/lock.h> 40d3916eacSRuslan Bukin #include <sys/malloc.h> 41d3916eacSRuslan Bukin #include <sys/module.h> 42d3916eacSRuslan Bukin #include <sys/mutex.h> 43d3916eacSRuslan Bukin #include <sys/pcpu.h> 44d3916eacSRuslan Bukin #include <sys/proc.h> 45d3916eacSRuslan Bukin #include <sys/queue.h> 46d3916eacSRuslan Bukin #include <sys/rwlock.h> 47d3916eacSRuslan Bukin #include <sys/sched.h> 48d3916eacSRuslan Bukin #include <sys/smp.h> 49d3916eacSRuslan Bukin #include <sys/sysctl.h> 50d3916eacSRuslan Bukin 51d3916eacSRuslan Bukin #include <vm/vm.h> 52d3916eacSRuslan Bukin #include <vm/vm_object.h> 53d3916eacSRuslan Bukin #include <vm/vm_page.h> 54d3916eacSRuslan Bukin #include <vm/pmap.h> 55d3916eacSRuslan Bukin #include <vm/vm_map.h> 56d3916eacSRuslan Bukin #include <vm/vm_extern.h> 57d3916eacSRuslan Bukin #include <vm/vm_param.h> 58d3916eacSRuslan Bukin 59d3916eacSRuslan Bukin #include <machine/riscvreg.h> 60d3916eacSRuslan Bukin #include <machine/cpu.h> 61d3916eacSRuslan Bukin #include <machine/fpe.h> 62d3916eacSRuslan Bukin #include <machine/machdep.h> 63d3916eacSRuslan Bukin #include <machine/pcb.h> 64d3916eacSRuslan Bukin #include <machine/smp.h> 65d3916eacSRuslan Bukin #include <machine/vm.h> 66d3916eacSRuslan Bukin #include <machine/vmparam.h> 67d3916eacSRuslan Bukin #include <machine/vmm.h> 68d3916eacSRuslan Bukin #include <machine/vmm_instruction_emul.h> 69d3916eacSRuslan Bukin 70d3916eacSRuslan Bukin #include <dev/pci/pcireg.h> 71d3916eacSRuslan Bukin 72d3916eacSRuslan Bukin #include <dev/vmm/vmm_dev.h> 73d3916eacSRuslan Bukin #include <dev/vmm/vmm_ktr.h> 74d3916eacSRuslan Bukin 75d3916eacSRuslan Bukin #include "vmm_stat.h" 76d3916eacSRuslan Bukin #include "riscv.h" 77d3916eacSRuslan Bukin 78d3916eacSRuslan Bukin #include "vmm_aplic.h" 79d3916eacSRuslan Bukin 80d3916eacSRuslan Bukin struct vcpu { 81d3916eacSRuslan Bukin int flags; 82d3916eacSRuslan Bukin enum vcpu_state state; 83d3916eacSRuslan Bukin struct mtx mtx; 84d3916eacSRuslan Bukin int hostcpu; /* host cpuid this vcpu last ran on */ 85d3916eacSRuslan Bukin int vcpuid; 86d3916eacSRuslan Bukin void *stats; 87d3916eacSRuslan Bukin struct vm_exit exitinfo; 88d3916eacSRuslan Bukin uint64_t nextpc; /* (x) next instruction to execute */ 89d3916eacSRuslan Bukin struct vm *vm; /* (o) */ 90d3916eacSRuslan Bukin void *cookie; /* (i) cpu-specific data */ 91d3916eacSRuslan Bukin struct fpreg *guestfpu; /* (a,i) guest fpu state */ 92d3916eacSRuslan Bukin }; 93d3916eacSRuslan Bukin 94d3916eacSRuslan Bukin #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 95d3916eacSRuslan Bukin #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 96d3916eacSRuslan Bukin #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 97d3916eacSRuslan Bukin #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 98d3916eacSRuslan Bukin #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 99d3916eacSRuslan Bukin #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 100d3916eacSRuslan Bukin 101d3916eacSRuslan Bukin struct mem_seg { 102d3916eacSRuslan Bukin uint64_t gpa; 103d3916eacSRuslan Bukin size_t len; 104d3916eacSRuslan Bukin bool wired; 105d3916eacSRuslan Bukin bool sysmem; 106d3916eacSRuslan Bukin vm_object_t object; 107d3916eacSRuslan Bukin }; 108d3916eacSRuslan Bukin #define VM_MAX_MEMSEGS 3 109d3916eacSRuslan Bukin 110d3916eacSRuslan Bukin struct mem_map { 111d3916eacSRuslan Bukin vm_paddr_t gpa; 112d3916eacSRuslan Bukin size_t len; 113d3916eacSRuslan Bukin vm_ooffset_t segoff; 114d3916eacSRuslan Bukin int segid; 115d3916eacSRuslan Bukin int prot; 116d3916eacSRuslan Bukin int flags; 117d3916eacSRuslan Bukin }; 118d3916eacSRuslan Bukin #define VM_MAX_MEMMAPS 4 119d3916eacSRuslan Bukin 120d3916eacSRuslan Bukin struct vmm_mmio_region { 121d3916eacSRuslan Bukin uint64_t start; 122d3916eacSRuslan Bukin uint64_t end; 123d3916eacSRuslan Bukin mem_region_read_t read; 124d3916eacSRuslan Bukin mem_region_write_t write; 125d3916eacSRuslan Bukin }; 126d3916eacSRuslan Bukin #define VM_MAX_MMIO_REGIONS 4 127d3916eacSRuslan Bukin 128d3916eacSRuslan Bukin /* 129d3916eacSRuslan Bukin * Initialization: 130d3916eacSRuslan Bukin * (o) initialized the first time the VM is created 131d3916eacSRuslan Bukin * (i) initialized when VM is created and when it is reinitialized 132d3916eacSRuslan Bukin * (x) initialized before use 133d3916eacSRuslan Bukin */ 134d3916eacSRuslan Bukin struct vm { 135d3916eacSRuslan Bukin void *cookie; /* (i) cpu-specific data */ 136d3916eacSRuslan Bukin volatile cpuset_t active_cpus; /* (i) active vcpus */ 137d3916eacSRuslan Bukin volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ 138d3916eacSRuslan Bukin int suspend; /* (i) stop VM execution */ 139d3916eacSRuslan Bukin bool dying; /* (o) is dying */ 140d3916eacSRuslan Bukin volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 141d3916eacSRuslan Bukin volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 142d3916eacSRuslan Bukin struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 143d3916eacSRuslan Bukin struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 144d3916eacSRuslan Bukin struct vmspace *vmspace; /* (o) guest's address space */ 145d3916eacSRuslan Bukin char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 146d3916eacSRuslan Bukin struct vcpu **vcpu; /* (i) guest vcpus */ 147d3916eacSRuslan Bukin struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 148d3916eacSRuslan Bukin /* (o) guest MMIO regions */ 149d3916eacSRuslan Bukin /* The following describe the vm cpu topology */ 150d3916eacSRuslan Bukin uint16_t sockets; /* (o) num of sockets */ 151d3916eacSRuslan Bukin uint16_t cores; /* (o) num of cores/socket */ 152d3916eacSRuslan Bukin uint16_t threads; /* (o) num of threads/core */ 153d3916eacSRuslan Bukin uint16_t maxcpus; /* (o) max pluggable cpus */ 154d3916eacSRuslan Bukin struct sx mem_segs_lock; /* (o) */ 155d3916eacSRuslan Bukin struct sx vcpus_init_lock; /* (o) */ 156d3916eacSRuslan Bukin }; 157d3916eacSRuslan Bukin 158d3916eacSRuslan Bukin static bool vmm_initialized = false; 159d3916eacSRuslan Bukin 160d3916eacSRuslan Bukin static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 161d3916eacSRuslan Bukin 162d3916eacSRuslan Bukin /* statistics */ 163d3916eacSRuslan Bukin static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 164d3916eacSRuslan Bukin 165d3916eacSRuslan Bukin SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 166d3916eacSRuslan Bukin 167d3916eacSRuslan Bukin static int vmm_ipinum; 168d3916eacSRuslan Bukin SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 169d3916eacSRuslan Bukin "IPI vector used for vcpu notifications"); 170d3916eacSRuslan Bukin 171d3916eacSRuslan Bukin u_int vm_maxcpu; 172d3916eacSRuslan Bukin SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 173d3916eacSRuslan Bukin &vm_maxcpu, 0, "Maximum number of vCPUs"); 174d3916eacSRuslan Bukin 175d3916eacSRuslan Bukin static void vm_free_memmap(struct vm *vm, int ident); 176d3916eacSRuslan Bukin static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 177d3916eacSRuslan Bukin static void vcpu_notify_event_locked(struct vcpu *vcpu); 178d3916eacSRuslan Bukin 1790a897e67SMark Johnston /* global statistics */ 1800a897e67SMark Johnston VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 1810a897e67SMark Johnston VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 1820a897e67SMark Johnston VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 1830a897e67SMark Johnston 184d3916eacSRuslan Bukin /* 185d3916eacSRuslan Bukin * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 186d3916eacSRuslan Bukin * is a safe value for now. 187d3916eacSRuslan Bukin */ 188d3916eacSRuslan Bukin #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 189d3916eacSRuslan Bukin 190d3916eacSRuslan Bukin static void 191d3916eacSRuslan Bukin vcpu_cleanup(struct vcpu *vcpu, bool destroy) 192d3916eacSRuslan Bukin { 193d3916eacSRuslan Bukin vmmops_vcpu_cleanup(vcpu->cookie); 194d3916eacSRuslan Bukin vcpu->cookie = NULL; 195d3916eacSRuslan Bukin if (destroy) { 196d3916eacSRuslan Bukin vmm_stat_free(vcpu->stats); 197d3916eacSRuslan Bukin fpu_save_area_free(vcpu->guestfpu); 198d3916eacSRuslan Bukin vcpu_lock_destroy(vcpu); 199d3916eacSRuslan Bukin } 200d3916eacSRuslan Bukin } 201d3916eacSRuslan Bukin 202d3916eacSRuslan Bukin static struct vcpu * 203d3916eacSRuslan Bukin vcpu_alloc(struct vm *vm, int vcpu_id) 204d3916eacSRuslan Bukin { 205d3916eacSRuslan Bukin struct vcpu *vcpu; 206d3916eacSRuslan Bukin 207d3916eacSRuslan Bukin KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 208d3916eacSRuslan Bukin ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 209d3916eacSRuslan Bukin 210d3916eacSRuslan Bukin vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 211d3916eacSRuslan Bukin vcpu_lock_init(vcpu); 212d3916eacSRuslan Bukin vcpu->state = VCPU_IDLE; 213d3916eacSRuslan Bukin vcpu->hostcpu = NOCPU; 214d3916eacSRuslan Bukin vcpu->vcpuid = vcpu_id; 215d3916eacSRuslan Bukin vcpu->vm = vm; 216d3916eacSRuslan Bukin vcpu->guestfpu = fpu_save_area_alloc(); 217d3916eacSRuslan Bukin vcpu->stats = vmm_stat_alloc(); 218d3916eacSRuslan Bukin return (vcpu); 219d3916eacSRuslan Bukin } 220d3916eacSRuslan Bukin 221d3916eacSRuslan Bukin static void 222d3916eacSRuslan Bukin vcpu_init(struct vcpu *vcpu) 223d3916eacSRuslan Bukin { 224d3916eacSRuslan Bukin vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 225d3916eacSRuslan Bukin MPASS(vcpu->cookie != NULL); 226d3916eacSRuslan Bukin fpu_save_area_reset(vcpu->guestfpu); 227d3916eacSRuslan Bukin vmm_stat_init(vcpu->stats); 228d3916eacSRuslan Bukin } 229d3916eacSRuslan Bukin 230d3916eacSRuslan Bukin struct vm_exit * 231d3916eacSRuslan Bukin vm_exitinfo(struct vcpu *vcpu) 232d3916eacSRuslan Bukin { 233d3916eacSRuslan Bukin return (&vcpu->exitinfo); 234d3916eacSRuslan Bukin } 235d3916eacSRuslan Bukin 236d3916eacSRuslan Bukin static int 237d3916eacSRuslan Bukin vmm_init(void) 238d3916eacSRuslan Bukin { 239d3916eacSRuslan Bukin 240d3916eacSRuslan Bukin vm_maxcpu = mp_ncpus; 241d3916eacSRuslan Bukin 242d3916eacSRuslan Bukin TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 243d3916eacSRuslan Bukin 244d3916eacSRuslan Bukin if (vm_maxcpu > VM_MAXCPU) { 245d3916eacSRuslan Bukin printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 246d3916eacSRuslan Bukin vm_maxcpu = VM_MAXCPU; 247d3916eacSRuslan Bukin } 248d3916eacSRuslan Bukin 249d3916eacSRuslan Bukin if (vm_maxcpu == 0) 250d3916eacSRuslan Bukin vm_maxcpu = 1; 251d3916eacSRuslan Bukin 252d3916eacSRuslan Bukin return (vmmops_modinit()); 253d3916eacSRuslan Bukin } 254d3916eacSRuslan Bukin 255d3916eacSRuslan Bukin static int 256d3916eacSRuslan Bukin vmm_handler(module_t mod, int what, void *arg) 257d3916eacSRuslan Bukin { 258d3916eacSRuslan Bukin int error; 259d3916eacSRuslan Bukin 260d3916eacSRuslan Bukin switch (what) { 261d3916eacSRuslan Bukin case MOD_LOAD: 262043999b1SMark Johnston error = vmmdev_init(); 263043999b1SMark Johnston if (error != 0) 264043999b1SMark Johnston break; 265d3916eacSRuslan Bukin error = vmm_init(); 266d3916eacSRuslan Bukin if (error == 0) 267d3916eacSRuslan Bukin vmm_initialized = true; 268*4a46ece6SMark Johnston else 269*4a46ece6SMark Johnston (void)vmmdev_cleanup(); 270d3916eacSRuslan Bukin break; 271d3916eacSRuslan Bukin case MOD_UNLOAD: 272d3916eacSRuslan Bukin error = vmmdev_cleanup(); 273d3916eacSRuslan Bukin if (error == 0 && vmm_initialized) { 274d3916eacSRuslan Bukin error = vmmops_modcleanup(); 275*4a46ece6SMark Johnston if (error) { 276*4a46ece6SMark Johnston /* 277*4a46ece6SMark Johnston * Something bad happened - prevent new 278*4a46ece6SMark Johnston * VMs from being created 279*4a46ece6SMark Johnston */ 280d3916eacSRuslan Bukin vmm_initialized = false; 281d3916eacSRuslan Bukin } 282*4a46ece6SMark Johnston } 283d3916eacSRuslan Bukin break; 284d3916eacSRuslan Bukin default: 285d3916eacSRuslan Bukin error = 0; 286d3916eacSRuslan Bukin break; 287d3916eacSRuslan Bukin } 288d3916eacSRuslan Bukin return (error); 289d3916eacSRuslan Bukin } 290d3916eacSRuslan Bukin 291d3916eacSRuslan Bukin static moduledata_t vmm_kmod = { 292d3916eacSRuslan Bukin "vmm", 293d3916eacSRuslan Bukin vmm_handler, 294d3916eacSRuslan Bukin NULL 295d3916eacSRuslan Bukin }; 296d3916eacSRuslan Bukin 297d3916eacSRuslan Bukin /* 298d3916eacSRuslan Bukin * vmm initialization has the following dependencies: 299d3916eacSRuslan Bukin * 300d7023078SMark Johnston * - vmm device initialization requires an initialized devfs. 301d3916eacSRuslan Bukin */ 302d7023078SMark Johnston DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_DEVFS + 1, SI_ORDER_ANY); 303d3916eacSRuslan Bukin MODULE_VERSION(vmm, 1); 304d3916eacSRuslan Bukin 305d3916eacSRuslan Bukin static void 306d3916eacSRuslan Bukin vm_init(struct vm *vm, bool create) 307d3916eacSRuslan Bukin { 308d3916eacSRuslan Bukin int i; 309d3916eacSRuslan Bukin 310d3916eacSRuslan Bukin vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 311d3916eacSRuslan Bukin MPASS(vm->cookie != NULL); 312d3916eacSRuslan Bukin 313d3916eacSRuslan Bukin CPU_ZERO(&vm->active_cpus); 314d3916eacSRuslan Bukin CPU_ZERO(&vm->debug_cpus); 315d3916eacSRuslan Bukin 316d3916eacSRuslan Bukin vm->suspend = 0; 317d3916eacSRuslan Bukin CPU_ZERO(&vm->suspended_cpus); 318d3916eacSRuslan Bukin 319d3916eacSRuslan Bukin memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 320d3916eacSRuslan Bukin 321d3916eacSRuslan Bukin if (!create) { 322d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) { 323d3916eacSRuslan Bukin if (vm->vcpu[i] != NULL) 324d3916eacSRuslan Bukin vcpu_init(vm->vcpu[i]); 325d3916eacSRuslan Bukin } 326d3916eacSRuslan Bukin } 327d3916eacSRuslan Bukin } 328d3916eacSRuslan Bukin 329d3916eacSRuslan Bukin void 330d3916eacSRuslan Bukin vm_disable_vcpu_creation(struct vm *vm) 331d3916eacSRuslan Bukin { 332d3916eacSRuslan Bukin sx_xlock(&vm->vcpus_init_lock); 333d3916eacSRuslan Bukin vm->dying = true; 334d3916eacSRuslan Bukin sx_xunlock(&vm->vcpus_init_lock); 335d3916eacSRuslan Bukin } 336d3916eacSRuslan Bukin 337d3916eacSRuslan Bukin struct vcpu * 338d3916eacSRuslan Bukin vm_alloc_vcpu(struct vm *vm, int vcpuid) 339d3916eacSRuslan Bukin { 340d3916eacSRuslan Bukin struct vcpu *vcpu; 341d3916eacSRuslan Bukin 342d3916eacSRuslan Bukin if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 343d3916eacSRuslan Bukin return (NULL); 344d3916eacSRuslan Bukin 345d3916eacSRuslan Bukin /* Some interrupt controllers may have a CPU limit */ 346d3916eacSRuslan Bukin if (vcpuid >= aplic_max_cpu_count(vm->cookie)) 347d3916eacSRuslan Bukin return (NULL); 348d3916eacSRuslan Bukin 349d3916eacSRuslan Bukin vcpu = (struct vcpu *) 350d3916eacSRuslan Bukin atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 351d3916eacSRuslan Bukin if (__predict_true(vcpu != NULL)) 352d3916eacSRuslan Bukin return (vcpu); 353d3916eacSRuslan Bukin 354d3916eacSRuslan Bukin sx_xlock(&vm->vcpus_init_lock); 355d3916eacSRuslan Bukin vcpu = vm->vcpu[vcpuid]; 356d3916eacSRuslan Bukin if (vcpu == NULL && !vm->dying) { 357d3916eacSRuslan Bukin vcpu = vcpu_alloc(vm, vcpuid); 358d3916eacSRuslan Bukin vcpu_init(vcpu); 359d3916eacSRuslan Bukin 360d3916eacSRuslan Bukin /* 361d3916eacSRuslan Bukin * Ensure vCPU is fully created before updating pointer 362d3916eacSRuslan Bukin * to permit unlocked reads above. 363d3916eacSRuslan Bukin */ 364d3916eacSRuslan Bukin atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 365d3916eacSRuslan Bukin (uintptr_t)vcpu); 366d3916eacSRuslan Bukin } 367d3916eacSRuslan Bukin sx_xunlock(&vm->vcpus_init_lock); 368d3916eacSRuslan Bukin return (vcpu); 369d3916eacSRuslan Bukin } 370d3916eacSRuslan Bukin 371d3916eacSRuslan Bukin void 372d3916eacSRuslan Bukin vm_slock_vcpus(struct vm *vm) 373d3916eacSRuslan Bukin { 374d3916eacSRuslan Bukin sx_slock(&vm->vcpus_init_lock); 375d3916eacSRuslan Bukin } 376d3916eacSRuslan Bukin 377d3916eacSRuslan Bukin void 378d3916eacSRuslan Bukin vm_unlock_vcpus(struct vm *vm) 379d3916eacSRuslan Bukin { 380d3916eacSRuslan Bukin sx_unlock(&vm->vcpus_init_lock); 381d3916eacSRuslan Bukin } 382d3916eacSRuslan Bukin 383d3916eacSRuslan Bukin int 384d3916eacSRuslan Bukin vm_create(const char *name, struct vm **retvm) 385d3916eacSRuslan Bukin { 386d3916eacSRuslan Bukin struct vm *vm; 387d3916eacSRuslan Bukin struct vmspace *vmspace; 388d3916eacSRuslan Bukin 389d3916eacSRuslan Bukin /* 390d3916eacSRuslan Bukin * If vmm.ko could not be successfully initialized then don't attempt 391d3916eacSRuslan Bukin * to create the virtual machine. 392d3916eacSRuslan Bukin */ 393d3916eacSRuslan Bukin if (!vmm_initialized) 394d3916eacSRuslan Bukin return (ENXIO); 395d3916eacSRuslan Bukin 396d3916eacSRuslan Bukin if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 397d3916eacSRuslan Bukin return (EINVAL); 398d3916eacSRuslan Bukin 399d3916eacSRuslan Bukin vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 400d3916eacSRuslan Bukin if (vmspace == NULL) 401d3916eacSRuslan Bukin return (ENOMEM); 402d3916eacSRuslan Bukin 403d3916eacSRuslan Bukin vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 404d3916eacSRuslan Bukin strcpy(vm->name, name); 405d3916eacSRuslan Bukin vm->vmspace = vmspace; 406d3916eacSRuslan Bukin sx_init(&vm->mem_segs_lock, "vm mem_segs"); 407d3916eacSRuslan Bukin sx_init(&vm->vcpus_init_lock, "vm vcpus"); 408d3916eacSRuslan Bukin 409d3916eacSRuslan Bukin vm->sockets = 1; 410d3916eacSRuslan Bukin vm->cores = 1; /* XXX backwards compatibility */ 411d3916eacSRuslan Bukin vm->threads = 1; /* XXX backwards compatibility */ 412d3916eacSRuslan Bukin vm->maxcpus = vm_maxcpu; 413d3916eacSRuslan Bukin 414d3916eacSRuslan Bukin vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 415d3916eacSRuslan Bukin M_WAITOK | M_ZERO); 416d3916eacSRuslan Bukin 417d3916eacSRuslan Bukin vm_init(vm, true); 418d3916eacSRuslan Bukin 419d3916eacSRuslan Bukin *retvm = vm; 420d3916eacSRuslan Bukin return (0); 421d3916eacSRuslan Bukin } 422d3916eacSRuslan Bukin 423d3916eacSRuslan Bukin void 424d3916eacSRuslan Bukin vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 425d3916eacSRuslan Bukin uint16_t *threads, uint16_t *maxcpus) 426d3916eacSRuslan Bukin { 427d3916eacSRuslan Bukin *sockets = vm->sockets; 428d3916eacSRuslan Bukin *cores = vm->cores; 429d3916eacSRuslan Bukin *threads = vm->threads; 430d3916eacSRuslan Bukin *maxcpus = vm->maxcpus; 431d3916eacSRuslan Bukin } 432d3916eacSRuslan Bukin 433d3916eacSRuslan Bukin uint16_t 434d3916eacSRuslan Bukin vm_get_maxcpus(struct vm *vm) 435d3916eacSRuslan Bukin { 436d3916eacSRuslan Bukin return (vm->maxcpus); 437d3916eacSRuslan Bukin } 438d3916eacSRuslan Bukin 439d3916eacSRuslan Bukin int 440d3916eacSRuslan Bukin vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 441d3916eacSRuslan Bukin uint16_t threads, uint16_t maxcpus) 442d3916eacSRuslan Bukin { 443d3916eacSRuslan Bukin /* Ignore maxcpus. */ 444d3916eacSRuslan Bukin if ((sockets * cores * threads) > vm->maxcpus) 445d3916eacSRuslan Bukin return (EINVAL); 446d3916eacSRuslan Bukin vm->sockets = sockets; 447d3916eacSRuslan Bukin vm->cores = cores; 448d3916eacSRuslan Bukin vm->threads = threads; 449d3916eacSRuslan Bukin return(0); 450d3916eacSRuslan Bukin } 451d3916eacSRuslan Bukin 452d3916eacSRuslan Bukin static void 453d3916eacSRuslan Bukin vm_cleanup(struct vm *vm, bool destroy) 454d3916eacSRuslan Bukin { 455d3916eacSRuslan Bukin struct mem_map *mm; 456d3916eacSRuslan Bukin int i; 457d3916eacSRuslan Bukin 458d3916eacSRuslan Bukin aplic_detach_from_vm(vm->cookie); 459d3916eacSRuslan Bukin 460d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) { 461d3916eacSRuslan Bukin if (vm->vcpu[i] != NULL) 462d3916eacSRuslan Bukin vcpu_cleanup(vm->vcpu[i], destroy); 463d3916eacSRuslan Bukin } 464d3916eacSRuslan Bukin 465d3916eacSRuslan Bukin vmmops_cleanup(vm->cookie); 466d3916eacSRuslan Bukin 467d3916eacSRuslan Bukin /* 468d3916eacSRuslan Bukin * System memory is removed from the guest address space only when 469d3916eacSRuslan Bukin * the VM is destroyed. This is because the mapping remains the same 470d3916eacSRuslan Bukin * across VM reset. 471d3916eacSRuslan Bukin * 472d3916eacSRuslan Bukin * Device memory can be relocated by the guest (e.g. using PCI BARs) 473d3916eacSRuslan Bukin * so those mappings are removed on a VM reset. 474d3916eacSRuslan Bukin */ 475d3916eacSRuslan Bukin if (!destroy) { 476d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 477d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 478d3916eacSRuslan Bukin if (destroy || !sysmem_mapping(vm, mm)) 479d3916eacSRuslan Bukin vm_free_memmap(vm, i); 480d3916eacSRuslan Bukin } 481d3916eacSRuslan Bukin } 482d3916eacSRuslan Bukin 483d3916eacSRuslan Bukin if (destroy) { 484d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMSEGS; i++) 485d3916eacSRuslan Bukin vm_free_memseg(vm, i); 486d3916eacSRuslan Bukin 487d3916eacSRuslan Bukin vmmops_vmspace_free(vm->vmspace); 488d3916eacSRuslan Bukin vm->vmspace = NULL; 489d3916eacSRuslan Bukin 490d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) 491d3916eacSRuslan Bukin free(vm->vcpu[i], M_VMM); 492d3916eacSRuslan Bukin free(vm->vcpu, M_VMM); 493d3916eacSRuslan Bukin sx_destroy(&vm->vcpus_init_lock); 494d3916eacSRuslan Bukin sx_destroy(&vm->mem_segs_lock); 495d3916eacSRuslan Bukin } 496d3916eacSRuslan Bukin } 497d3916eacSRuslan Bukin 498d3916eacSRuslan Bukin void 499d3916eacSRuslan Bukin vm_destroy(struct vm *vm) 500d3916eacSRuslan Bukin { 501d3916eacSRuslan Bukin 502d3916eacSRuslan Bukin vm_cleanup(vm, true); 503d3916eacSRuslan Bukin 504d3916eacSRuslan Bukin free(vm, M_VMM); 505d3916eacSRuslan Bukin } 506d3916eacSRuslan Bukin 507d3916eacSRuslan Bukin int 508d3916eacSRuslan Bukin vm_reinit(struct vm *vm) 509d3916eacSRuslan Bukin { 510d3916eacSRuslan Bukin int error; 511d3916eacSRuslan Bukin 512d3916eacSRuslan Bukin /* 513d3916eacSRuslan Bukin * A virtual machine can be reset only if all vcpus are suspended. 514d3916eacSRuslan Bukin */ 515d3916eacSRuslan Bukin if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 516d3916eacSRuslan Bukin vm_cleanup(vm, false); 517d3916eacSRuslan Bukin vm_init(vm, false); 518d3916eacSRuslan Bukin error = 0; 519d3916eacSRuslan Bukin } else { 520d3916eacSRuslan Bukin error = EBUSY; 521d3916eacSRuslan Bukin } 522d3916eacSRuslan Bukin 523d3916eacSRuslan Bukin return (error); 524d3916eacSRuslan Bukin } 525d3916eacSRuslan Bukin 526d3916eacSRuslan Bukin const char * 527d3916eacSRuslan Bukin vm_name(struct vm *vm) 528d3916eacSRuslan Bukin { 529d3916eacSRuslan Bukin return (vm->name); 530d3916eacSRuslan Bukin } 531d3916eacSRuslan Bukin 532d3916eacSRuslan Bukin void 533d3916eacSRuslan Bukin vm_slock_memsegs(struct vm *vm) 534d3916eacSRuslan Bukin { 535d3916eacSRuslan Bukin sx_slock(&vm->mem_segs_lock); 536d3916eacSRuslan Bukin } 537d3916eacSRuslan Bukin 538d3916eacSRuslan Bukin void 539d3916eacSRuslan Bukin vm_xlock_memsegs(struct vm *vm) 540d3916eacSRuslan Bukin { 541d3916eacSRuslan Bukin sx_xlock(&vm->mem_segs_lock); 542d3916eacSRuslan Bukin } 543d3916eacSRuslan Bukin 544d3916eacSRuslan Bukin void 545d3916eacSRuslan Bukin vm_unlock_memsegs(struct vm *vm) 546d3916eacSRuslan Bukin { 547d3916eacSRuslan Bukin sx_unlock(&vm->mem_segs_lock); 548d3916eacSRuslan Bukin } 549d3916eacSRuslan Bukin 550d3916eacSRuslan Bukin /* 551d3916eacSRuslan Bukin * Return 'true' if 'gpa' is allocated in the guest address space. 552d3916eacSRuslan Bukin * 553d3916eacSRuslan Bukin * This function is called in the context of a running vcpu which acts as 554d3916eacSRuslan Bukin * an implicit lock on 'vm->mem_maps[]'. 555d3916eacSRuslan Bukin */ 556d3916eacSRuslan Bukin bool 557d3916eacSRuslan Bukin vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 558d3916eacSRuslan Bukin { 559d3916eacSRuslan Bukin struct vm *vm = vcpu->vm; 560d3916eacSRuslan Bukin struct mem_map *mm; 561d3916eacSRuslan Bukin int i; 562d3916eacSRuslan Bukin 563d3916eacSRuslan Bukin #ifdef INVARIANTS 564d3916eacSRuslan Bukin int hostcpu, state; 565d3916eacSRuslan Bukin state = vcpu_get_state(vcpu, &hostcpu); 566d3916eacSRuslan Bukin KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 567d3916eacSRuslan Bukin ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 568d3916eacSRuslan Bukin #endif 569d3916eacSRuslan Bukin 570d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 571d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 572d3916eacSRuslan Bukin if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 573d3916eacSRuslan Bukin return (true); /* 'gpa' is sysmem or devmem */ 574d3916eacSRuslan Bukin } 575d3916eacSRuslan Bukin 576d3916eacSRuslan Bukin return (false); 577d3916eacSRuslan Bukin } 578d3916eacSRuslan Bukin 579d3916eacSRuslan Bukin int 580d3916eacSRuslan Bukin vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 581d3916eacSRuslan Bukin { 582d3916eacSRuslan Bukin struct mem_seg *seg; 583d3916eacSRuslan Bukin vm_object_t obj; 584d3916eacSRuslan Bukin 585d3916eacSRuslan Bukin sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 586d3916eacSRuslan Bukin 587d3916eacSRuslan Bukin if (ident < 0 || ident >= VM_MAX_MEMSEGS) 588d3916eacSRuslan Bukin return (EINVAL); 589d3916eacSRuslan Bukin 590d3916eacSRuslan Bukin if (len == 0 || (len & PAGE_MASK)) 591d3916eacSRuslan Bukin return (EINVAL); 592d3916eacSRuslan Bukin 593d3916eacSRuslan Bukin seg = &vm->mem_segs[ident]; 594d3916eacSRuslan Bukin if (seg->object != NULL) { 595d3916eacSRuslan Bukin if (seg->len == len && seg->sysmem == sysmem) 596d3916eacSRuslan Bukin return (EEXIST); 597d3916eacSRuslan Bukin else 598d3916eacSRuslan Bukin return (EINVAL); 599d3916eacSRuslan Bukin } 600d3916eacSRuslan Bukin 601d3916eacSRuslan Bukin obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 602d3916eacSRuslan Bukin if (obj == NULL) 603d3916eacSRuslan Bukin return (ENOMEM); 604d3916eacSRuslan Bukin 605d3916eacSRuslan Bukin seg->len = len; 606d3916eacSRuslan Bukin seg->object = obj; 607d3916eacSRuslan Bukin seg->sysmem = sysmem; 608d3916eacSRuslan Bukin return (0); 609d3916eacSRuslan Bukin } 610d3916eacSRuslan Bukin 611d3916eacSRuslan Bukin int 612d3916eacSRuslan Bukin vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 613d3916eacSRuslan Bukin vm_object_t *objptr) 614d3916eacSRuslan Bukin { 615d3916eacSRuslan Bukin struct mem_seg *seg; 616d3916eacSRuslan Bukin 617d3916eacSRuslan Bukin sx_assert(&vm->mem_segs_lock, SX_LOCKED); 618d3916eacSRuslan Bukin 619d3916eacSRuslan Bukin if (ident < 0 || ident >= VM_MAX_MEMSEGS) 620d3916eacSRuslan Bukin return (EINVAL); 621d3916eacSRuslan Bukin 622d3916eacSRuslan Bukin seg = &vm->mem_segs[ident]; 623d3916eacSRuslan Bukin if (len) 624d3916eacSRuslan Bukin *len = seg->len; 625d3916eacSRuslan Bukin if (sysmem) 626d3916eacSRuslan Bukin *sysmem = seg->sysmem; 627d3916eacSRuslan Bukin if (objptr) 628d3916eacSRuslan Bukin *objptr = seg->object; 629d3916eacSRuslan Bukin return (0); 630d3916eacSRuslan Bukin } 631d3916eacSRuslan Bukin 632d3916eacSRuslan Bukin void 633d3916eacSRuslan Bukin vm_free_memseg(struct vm *vm, int ident) 634d3916eacSRuslan Bukin { 635d3916eacSRuslan Bukin struct mem_seg *seg; 636d3916eacSRuslan Bukin 637d3916eacSRuslan Bukin KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 638d3916eacSRuslan Bukin ("%s: invalid memseg ident %d", __func__, ident)); 639d3916eacSRuslan Bukin 640d3916eacSRuslan Bukin seg = &vm->mem_segs[ident]; 641d3916eacSRuslan Bukin if (seg->object != NULL) { 642d3916eacSRuslan Bukin vm_object_deallocate(seg->object); 643d3916eacSRuslan Bukin bzero(seg, sizeof(struct mem_seg)); 644d3916eacSRuslan Bukin } 645d3916eacSRuslan Bukin } 646d3916eacSRuslan Bukin 647d3916eacSRuslan Bukin int 648d3916eacSRuslan Bukin vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 649d3916eacSRuslan Bukin size_t len, int prot, int flags) 650d3916eacSRuslan Bukin { 651d3916eacSRuslan Bukin struct mem_seg *seg; 652d3916eacSRuslan Bukin struct mem_map *m, *map; 653d3916eacSRuslan Bukin vm_ooffset_t last; 654d3916eacSRuslan Bukin int i, error; 655d3916eacSRuslan Bukin 656d3916eacSRuslan Bukin dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len); 657d3916eacSRuslan Bukin 658d3916eacSRuslan Bukin if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 659d3916eacSRuslan Bukin return (EINVAL); 660d3916eacSRuslan Bukin 661d3916eacSRuslan Bukin if (flags & ~VM_MEMMAP_F_WIRED) 662d3916eacSRuslan Bukin return (EINVAL); 663d3916eacSRuslan Bukin 664d3916eacSRuslan Bukin if (segid < 0 || segid >= VM_MAX_MEMSEGS) 665d3916eacSRuslan Bukin return (EINVAL); 666d3916eacSRuslan Bukin 667d3916eacSRuslan Bukin seg = &vm->mem_segs[segid]; 668d3916eacSRuslan Bukin if (seg->object == NULL) 669d3916eacSRuslan Bukin return (EINVAL); 670d3916eacSRuslan Bukin 671d3916eacSRuslan Bukin last = first + len; 672d3916eacSRuslan Bukin if (first < 0 || first >= last || last > seg->len) 673d3916eacSRuslan Bukin return (EINVAL); 674d3916eacSRuslan Bukin 675d3916eacSRuslan Bukin if ((gpa | first | last) & PAGE_MASK) 676d3916eacSRuslan Bukin return (EINVAL); 677d3916eacSRuslan Bukin 678d3916eacSRuslan Bukin map = NULL; 679d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 680d3916eacSRuslan Bukin m = &vm->mem_maps[i]; 681d3916eacSRuslan Bukin if (m->len == 0) { 682d3916eacSRuslan Bukin map = m; 683d3916eacSRuslan Bukin break; 684d3916eacSRuslan Bukin } 685d3916eacSRuslan Bukin } 686d3916eacSRuslan Bukin 687d3916eacSRuslan Bukin if (map == NULL) 688d3916eacSRuslan Bukin return (ENOSPC); 689d3916eacSRuslan Bukin 690d3916eacSRuslan Bukin error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 691d3916eacSRuslan Bukin len, 0, VMFS_NO_SPACE, prot, prot, 0); 692d3916eacSRuslan Bukin if (error != KERN_SUCCESS) 693d3916eacSRuslan Bukin return (EFAULT); 694d3916eacSRuslan Bukin 695d3916eacSRuslan Bukin vm_object_reference(seg->object); 696d3916eacSRuslan Bukin 697d3916eacSRuslan Bukin if (flags & VM_MEMMAP_F_WIRED) { 698d3916eacSRuslan Bukin error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 699d3916eacSRuslan Bukin VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 700d3916eacSRuslan Bukin if (error != KERN_SUCCESS) { 701d3916eacSRuslan Bukin vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 702d3916eacSRuslan Bukin return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 703d3916eacSRuslan Bukin EFAULT); 704d3916eacSRuslan Bukin } 705d3916eacSRuslan Bukin } 706d3916eacSRuslan Bukin 707d3916eacSRuslan Bukin map->gpa = gpa; 708d3916eacSRuslan Bukin map->len = len; 709d3916eacSRuslan Bukin map->segoff = first; 710d3916eacSRuslan Bukin map->segid = segid; 711d3916eacSRuslan Bukin map->prot = prot; 712d3916eacSRuslan Bukin map->flags = flags; 713d3916eacSRuslan Bukin return (0); 714d3916eacSRuslan Bukin } 715d3916eacSRuslan Bukin 716d3916eacSRuslan Bukin int 717d3916eacSRuslan Bukin vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 718d3916eacSRuslan Bukin { 719d3916eacSRuslan Bukin struct mem_map *m; 720d3916eacSRuslan Bukin int i; 721d3916eacSRuslan Bukin 722d3916eacSRuslan Bukin dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len); 723d3916eacSRuslan Bukin 724d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 725d3916eacSRuslan Bukin m = &vm->mem_maps[i]; 726d3916eacSRuslan Bukin if (m->gpa == gpa && m->len == len) { 727d3916eacSRuslan Bukin vm_free_memmap(vm, i); 728d3916eacSRuslan Bukin return (0); 729d3916eacSRuslan Bukin } 730d3916eacSRuslan Bukin } 731d3916eacSRuslan Bukin 732d3916eacSRuslan Bukin return (EINVAL); 733d3916eacSRuslan Bukin } 734d3916eacSRuslan Bukin 735d3916eacSRuslan Bukin int 736d3916eacSRuslan Bukin vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 737d3916eacSRuslan Bukin vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 738d3916eacSRuslan Bukin { 739d3916eacSRuslan Bukin struct mem_map *mm, *mmnext; 740d3916eacSRuslan Bukin int i; 741d3916eacSRuslan Bukin 742d3916eacSRuslan Bukin mmnext = NULL; 743d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 744d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 745d3916eacSRuslan Bukin if (mm->len == 0 || mm->gpa < *gpa) 746d3916eacSRuslan Bukin continue; 747d3916eacSRuslan Bukin if (mmnext == NULL || mm->gpa < mmnext->gpa) 748d3916eacSRuslan Bukin mmnext = mm; 749d3916eacSRuslan Bukin } 750d3916eacSRuslan Bukin 751d3916eacSRuslan Bukin if (mmnext != NULL) { 752d3916eacSRuslan Bukin *gpa = mmnext->gpa; 753d3916eacSRuslan Bukin if (segid) 754d3916eacSRuslan Bukin *segid = mmnext->segid; 755d3916eacSRuslan Bukin if (segoff) 756d3916eacSRuslan Bukin *segoff = mmnext->segoff; 757d3916eacSRuslan Bukin if (len) 758d3916eacSRuslan Bukin *len = mmnext->len; 759d3916eacSRuslan Bukin if (prot) 760d3916eacSRuslan Bukin *prot = mmnext->prot; 761d3916eacSRuslan Bukin if (flags) 762d3916eacSRuslan Bukin *flags = mmnext->flags; 763d3916eacSRuslan Bukin return (0); 764d3916eacSRuslan Bukin } else { 765d3916eacSRuslan Bukin return (ENOENT); 766d3916eacSRuslan Bukin } 767d3916eacSRuslan Bukin } 768d3916eacSRuslan Bukin 769d3916eacSRuslan Bukin static void 770d3916eacSRuslan Bukin vm_free_memmap(struct vm *vm, int ident) 771d3916eacSRuslan Bukin { 772d3916eacSRuslan Bukin struct mem_map *mm; 773d3916eacSRuslan Bukin int error __diagused; 774d3916eacSRuslan Bukin 775d3916eacSRuslan Bukin mm = &vm->mem_maps[ident]; 776d3916eacSRuslan Bukin if (mm->len) { 777d3916eacSRuslan Bukin error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 778d3916eacSRuslan Bukin mm->gpa + mm->len); 779d3916eacSRuslan Bukin KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 780d3916eacSRuslan Bukin __func__, error)); 781d3916eacSRuslan Bukin bzero(mm, sizeof(struct mem_map)); 782d3916eacSRuslan Bukin } 783d3916eacSRuslan Bukin } 784d3916eacSRuslan Bukin 785d3916eacSRuslan Bukin static __inline bool 786d3916eacSRuslan Bukin sysmem_mapping(struct vm *vm, struct mem_map *mm) 787d3916eacSRuslan Bukin { 788d3916eacSRuslan Bukin 789d3916eacSRuslan Bukin if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 790d3916eacSRuslan Bukin return (true); 791d3916eacSRuslan Bukin else 792d3916eacSRuslan Bukin return (false); 793d3916eacSRuslan Bukin } 794d3916eacSRuslan Bukin 795d3916eacSRuslan Bukin vm_paddr_t 796d3916eacSRuslan Bukin vmm_sysmem_maxaddr(struct vm *vm) 797d3916eacSRuslan Bukin { 798d3916eacSRuslan Bukin struct mem_map *mm; 799d3916eacSRuslan Bukin vm_paddr_t maxaddr; 800d3916eacSRuslan Bukin int i; 801d3916eacSRuslan Bukin 802d3916eacSRuslan Bukin maxaddr = 0; 803d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 804d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 805d3916eacSRuslan Bukin if (sysmem_mapping(vm, mm)) { 806d3916eacSRuslan Bukin if (maxaddr < mm->gpa + mm->len) 807d3916eacSRuslan Bukin maxaddr = mm->gpa + mm->len; 808d3916eacSRuslan Bukin } 809d3916eacSRuslan Bukin } 810d3916eacSRuslan Bukin return (maxaddr); 811d3916eacSRuslan Bukin } 812d3916eacSRuslan Bukin 813d3916eacSRuslan Bukin int 814d3916eacSRuslan Bukin vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 815d3916eacSRuslan Bukin uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 816d3916eacSRuslan Bukin { 817d3916eacSRuslan Bukin int error; 818d3916eacSRuslan Bukin 819d3916eacSRuslan Bukin error = vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 820d3916eacSRuslan Bukin 821d3916eacSRuslan Bukin return (error); 822d3916eacSRuslan Bukin } 823d3916eacSRuslan Bukin 824d3916eacSRuslan Bukin void 825d3916eacSRuslan Bukin vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 826d3916eacSRuslan Bukin mem_region_read_t mmio_read, mem_region_write_t mmio_write) 827d3916eacSRuslan Bukin { 828d3916eacSRuslan Bukin int i; 829d3916eacSRuslan Bukin 830d3916eacSRuslan Bukin for (i = 0; i < nitems(vm->mmio_region); i++) { 831d3916eacSRuslan Bukin if (vm->mmio_region[i].start == 0 && 832d3916eacSRuslan Bukin vm->mmio_region[i].end == 0) { 833d3916eacSRuslan Bukin vm->mmio_region[i].start = start; 834d3916eacSRuslan Bukin vm->mmio_region[i].end = start + size; 835d3916eacSRuslan Bukin vm->mmio_region[i].read = mmio_read; 836d3916eacSRuslan Bukin vm->mmio_region[i].write = mmio_write; 837d3916eacSRuslan Bukin return; 838d3916eacSRuslan Bukin } 839d3916eacSRuslan Bukin } 840d3916eacSRuslan Bukin 841d3916eacSRuslan Bukin panic("%s: No free MMIO region", __func__); 842d3916eacSRuslan Bukin } 843d3916eacSRuslan Bukin 844d3916eacSRuslan Bukin void 845d3916eacSRuslan Bukin vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 846d3916eacSRuslan Bukin { 847d3916eacSRuslan Bukin int i; 848d3916eacSRuslan Bukin 849d3916eacSRuslan Bukin for (i = 0; i < nitems(vm->mmio_region); i++) { 850d3916eacSRuslan Bukin if (vm->mmio_region[i].start == start && 851d3916eacSRuslan Bukin vm->mmio_region[i].end == start + size) { 852d3916eacSRuslan Bukin memset(&vm->mmio_region[i], 0, 853d3916eacSRuslan Bukin sizeof(vm->mmio_region[i])); 854d3916eacSRuslan Bukin return; 855d3916eacSRuslan Bukin } 856d3916eacSRuslan Bukin } 857d3916eacSRuslan Bukin 858d3916eacSRuslan Bukin panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 859d3916eacSRuslan Bukin start + size); 860d3916eacSRuslan Bukin } 861d3916eacSRuslan Bukin 862d3916eacSRuslan Bukin static int 863d3916eacSRuslan Bukin vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 864d3916eacSRuslan Bukin { 865d3916eacSRuslan Bukin struct vm *vm; 866d3916eacSRuslan Bukin struct vm_exit *vme; 867d3916eacSRuslan Bukin struct vie *vie; 868d3916eacSRuslan Bukin struct hyp *hyp; 869d3916eacSRuslan Bukin uint64_t fault_ipa; 870d3916eacSRuslan Bukin struct vm_guest_paging *paging; 871d3916eacSRuslan Bukin struct vmm_mmio_region *vmr; 872d3916eacSRuslan Bukin int error, i; 873d3916eacSRuslan Bukin 874d3916eacSRuslan Bukin vm = vcpu->vm; 875d3916eacSRuslan Bukin hyp = vm->cookie; 876d3916eacSRuslan Bukin if (!hyp->aplic_attached) 877d3916eacSRuslan Bukin goto out_user; 878d3916eacSRuslan Bukin 879d3916eacSRuslan Bukin vme = &vcpu->exitinfo; 880d3916eacSRuslan Bukin vie = &vme->u.inst_emul.vie; 881d3916eacSRuslan Bukin paging = &vme->u.inst_emul.paging; 882d3916eacSRuslan Bukin 883d3916eacSRuslan Bukin fault_ipa = vme->u.inst_emul.gpa; 884d3916eacSRuslan Bukin 885d3916eacSRuslan Bukin vmr = NULL; 886d3916eacSRuslan Bukin for (i = 0; i < nitems(vm->mmio_region); i++) { 887d3916eacSRuslan Bukin if (vm->mmio_region[i].start <= fault_ipa && 888d3916eacSRuslan Bukin vm->mmio_region[i].end > fault_ipa) { 889d3916eacSRuslan Bukin vmr = &vm->mmio_region[i]; 890d3916eacSRuslan Bukin break; 891d3916eacSRuslan Bukin } 892d3916eacSRuslan Bukin } 893d3916eacSRuslan Bukin if (vmr == NULL) 894d3916eacSRuslan Bukin goto out_user; 895d3916eacSRuslan Bukin 896d3916eacSRuslan Bukin error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 897d3916eacSRuslan Bukin vmr->read, vmr->write, retu); 898d3916eacSRuslan Bukin return (error); 899d3916eacSRuslan Bukin 900d3916eacSRuslan Bukin out_user: 901d3916eacSRuslan Bukin *retu = true; 902d3916eacSRuslan Bukin return (0); 903d3916eacSRuslan Bukin } 904d3916eacSRuslan Bukin 905d3916eacSRuslan Bukin int 906d3916eacSRuslan Bukin vm_suspend(struct vm *vm, enum vm_suspend_how how) 907d3916eacSRuslan Bukin { 908d3916eacSRuslan Bukin int i; 909d3916eacSRuslan Bukin 910d3916eacSRuslan Bukin if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 911d3916eacSRuslan Bukin return (EINVAL); 912d3916eacSRuslan Bukin 913d3916eacSRuslan Bukin if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 914d3916eacSRuslan Bukin VM_CTR2(vm, "virtual machine already suspended %d/%d", 915d3916eacSRuslan Bukin vm->suspend, how); 916d3916eacSRuslan Bukin return (EALREADY); 917d3916eacSRuslan Bukin } 918d3916eacSRuslan Bukin 919d3916eacSRuslan Bukin VM_CTR1(vm, "virtual machine successfully suspended %d", how); 920d3916eacSRuslan Bukin 921d3916eacSRuslan Bukin /* 922d3916eacSRuslan Bukin * Notify all active vcpus that they are now suspended. 923d3916eacSRuslan Bukin */ 924d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) { 925d3916eacSRuslan Bukin if (CPU_ISSET(i, &vm->active_cpus)) 926d3916eacSRuslan Bukin vcpu_notify_event(vm_vcpu(vm, i)); 927d3916eacSRuslan Bukin } 928d3916eacSRuslan Bukin 929d3916eacSRuslan Bukin return (0); 930d3916eacSRuslan Bukin } 931d3916eacSRuslan Bukin 932d3916eacSRuslan Bukin void 933d3916eacSRuslan Bukin vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 934d3916eacSRuslan Bukin { 935d3916eacSRuslan Bukin struct vm *vm = vcpu->vm; 936d3916eacSRuslan Bukin struct vm_exit *vmexit; 937d3916eacSRuslan Bukin 938d3916eacSRuslan Bukin KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 939d3916eacSRuslan Bukin ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 940d3916eacSRuslan Bukin 941d3916eacSRuslan Bukin vmexit = vm_exitinfo(vcpu); 942d3916eacSRuslan Bukin vmexit->pc = pc; 943d3916eacSRuslan Bukin vmexit->inst_length = 4; 944d3916eacSRuslan Bukin vmexit->exitcode = VM_EXITCODE_SUSPENDED; 945d3916eacSRuslan Bukin vmexit->u.suspended.how = vm->suspend; 946d3916eacSRuslan Bukin } 947d3916eacSRuslan Bukin 948d3916eacSRuslan Bukin void 949d3916eacSRuslan Bukin vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 950d3916eacSRuslan Bukin { 951d3916eacSRuslan Bukin struct vm_exit *vmexit; 952d3916eacSRuslan Bukin 953d3916eacSRuslan Bukin vmexit = vm_exitinfo(vcpu); 954d3916eacSRuslan Bukin vmexit->pc = pc; 955d3916eacSRuslan Bukin vmexit->inst_length = 4; 956d3916eacSRuslan Bukin vmexit->exitcode = VM_EXITCODE_DEBUG; 957d3916eacSRuslan Bukin } 958d3916eacSRuslan Bukin 959d3916eacSRuslan Bukin int 960d3916eacSRuslan Bukin vm_activate_cpu(struct vcpu *vcpu) 961d3916eacSRuslan Bukin { 962d3916eacSRuslan Bukin struct vm *vm = vcpu->vm; 963d3916eacSRuslan Bukin 964d3916eacSRuslan Bukin if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 965d3916eacSRuslan Bukin return (EBUSY); 966d3916eacSRuslan Bukin 967d3916eacSRuslan Bukin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 968d3916eacSRuslan Bukin return (0); 969d3916eacSRuslan Bukin 970d3916eacSRuslan Bukin } 971d3916eacSRuslan Bukin 972d3916eacSRuslan Bukin int 973d3916eacSRuslan Bukin vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 974d3916eacSRuslan Bukin { 975d3916eacSRuslan Bukin if (vcpu == NULL) { 976d3916eacSRuslan Bukin vm->debug_cpus = vm->active_cpus; 977d3916eacSRuslan Bukin for (int i = 0; i < vm->maxcpus; i++) { 978d3916eacSRuslan Bukin if (CPU_ISSET(i, &vm->active_cpus)) 979d3916eacSRuslan Bukin vcpu_notify_event(vm_vcpu(vm, i)); 980d3916eacSRuslan Bukin } 981d3916eacSRuslan Bukin } else { 982d3916eacSRuslan Bukin if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 983d3916eacSRuslan Bukin return (EINVAL); 984d3916eacSRuslan Bukin 985d3916eacSRuslan Bukin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 986d3916eacSRuslan Bukin vcpu_notify_event(vcpu); 987d3916eacSRuslan Bukin } 988d3916eacSRuslan Bukin return (0); 989d3916eacSRuslan Bukin } 990d3916eacSRuslan Bukin 991d3916eacSRuslan Bukin int 992d3916eacSRuslan Bukin vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 993d3916eacSRuslan Bukin { 994d3916eacSRuslan Bukin 995d3916eacSRuslan Bukin if (vcpu == NULL) { 996d3916eacSRuslan Bukin CPU_ZERO(&vm->debug_cpus); 997d3916eacSRuslan Bukin } else { 998d3916eacSRuslan Bukin if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 999d3916eacSRuslan Bukin return (EINVAL); 1000d3916eacSRuslan Bukin 1001d3916eacSRuslan Bukin CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1002d3916eacSRuslan Bukin } 1003d3916eacSRuslan Bukin return (0); 1004d3916eacSRuslan Bukin } 1005d3916eacSRuslan Bukin 1006d3916eacSRuslan Bukin int 1007d3916eacSRuslan Bukin vcpu_debugged(struct vcpu *vcpu) 1008d3916eacSRuslan Bukin { 1009d3916eacSRuslan Bukin 1010d3916eacSRuslan Bukin return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1011d3916eacSRuslan Bukin } 1012d3916eacSRuslan Bukin 1013d3916eacSRuslan Bukin cpuset_t 1014d3916eacSRuslan Bukin vm_active_cpus(struct vm *vm) 1015d3916eacSRuslan Bukin { 1016d3916eacSRuslan Bukin 1017d3916eacSRuslan Bukin return (vm->active_cpus); 1018d3916eacSRuslan Bukin } 1019d3916eacSRuslan Bukin 1020d3916eacSRuslan Bukin cpuset_t 1021d3916eacSRuslan Bukin vm_debug_cpus(struct vm *vm) 1022d3916eacSRuslan Bukin { 1023d3916eacSRuslan Bukin 1024d3916eacSRuslan Bukin return (vm->debug_cpus); 1025d3916eacSRuslan Bukin } 1026d3916eacSRuslan Bukin 1027d3916eacSRuslan Bukin cpuset_t 1028d3916eacSRuslan Bukin vm_suspended_cpus(struct vm *vm) 1029d3916eacSRuslan Bukin { 1030d3916eacSRuslan Bukin 1031d3916eacSRuslan Bukin return (vm->suspended_cpus); 1032d3916eacSRuslan Bukin } 1033d3916eacSRuslan Bukin 1034d3916eacSRuslan Bukin 1035d3916eacSRuslan Bukin void * 1036d3916eacSRuslan Bukin vcpu_stats(struct vcpu *vcpu) 1037d3916eacSRuslan Bukin { 1038d3916eacSRuslan Bukin 1039d3916eacSRuslan Bukin return (vcpu->stats); 1040d3916eacSRuslan Bukin } 1041d3916eacSRuslan Bukin 1042d3916eacSRuslan Bukin /* 1043d3916eacSRuslan Bukin * This function is called to ensure that a vcpu "sees" a pending event 1044d3916eacSRuslan Bukin * as soon as possible: 1045d3916eacSRuslan Bukin * - If the vcpu thread is sleeping then it is woken up. 1046d3916eacSRuslan Bukin * - If the vcpu is running on a different host_cpu then an IPI will be directed 1047d3916eacSRuslan Bukin * to the host_cpu to cause the vcpu to trap into the hypervisor. 1048d3916eacSRuslan Bukin */ 1049d3916eacSRuslan Bukin static void 1050d3916eacSRuslan Bukin vcpu_notify_event_locked(struct vcpu *vcpu) 1051d3916eacSRuslan Bukin { 1052d3916eacSRuslan Bukin int hostcpu; 1053d3916eacSRuslan Bukin 1054d3916eacSRuslan Bukin hostcpu = vcpu->hostcpu; 1055d3916eacSRuslan Bukin if (vcpu->state == VCPU_RUNNING) { 1056d3916eacSRuslan Bukin KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1057d3916eacSRuslan Bukin if (hostcpu != curcpu) { 1058d3916eacSRuslan Bukin ipi_cpu(hostcpu, vmm_ipinum); 1059d3916eacSRuslan Bukin } else { 1060d3916eacSRuslan Bukin /* 1061d3916eacSRuslan Bukin * If the 'vcpu' is running on 'curcpu' then it must 1062d3916eacSRuslan Bukin * be sending a notification to itself (e.g. SELF_IPI). 1063d3916eacSRuslan Bukin * The pending event will be picked up when the vcpu 1064d3916eacSRuslan Bukin * transitions back to guest context. 1065d3916eacSRuslan Bukin */ 1066d3916eacSRuslan Bukin } 1067d3916eacSRuslan Bukin } else { 1068d3916eacSRuslan Bukin KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1069d3916eacSRuslan Bukin "with hostcpu %d", vcpu->state, hostcpu)); 1070d3916eacSRuslan Bukin if (vcpu->state == VCPU_SLEEPING) 1071d3916eacSRuslan Bukin wakeup_one(vcpu); 1072d3916eacSRuslan Bukin } 1073d3916eacSRuslan Bukin } 1074d3916eacSRuslan Bukin 1075d3916eacSRuslan Bukin void 1076d3916eacSRuslan Bukin vcpu_notify_event(struct vcpu *vcpu) 1077d3916eacSRuslan Bukin { 1078d3916eacSRuslan Bukin vcpu_lock(vcpu); 1079d3916eacSRuslan Bukin vcpu_notify_event_locked(vcpu); 1080d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1081d3916eacSRuslan Bukin } 1082d3916eacSRuslan Bukin 1083d3916eacSRuslan Bukin static void 1084d3916eacSRuslan Bukin restore_guest_fpustate(struct vcpu *vcpu) 1085d3916eacSRuslan Bukin { 1086d3916eacSRuslan Bukin 1087d3916eacSRuslan Bukin /* Flush host state to the pcb. */ 1088d3916eacSRuslan Bukin fpe_state_save(curthread); 1089d3916eacSRuslan Bukin 1090d3916eacSRuslan Bukin /* Ensure the VFP state will be re-loaded when exiting the guest. */ 1091d3916eacSRuslan Bukin PCPU_SET(fpcurthread, NULL); 1092d3916eacSRuslan Bukin 1093d3916eacSRuslan Bukin /* restore guest FPU state */ 1094d3916eacSRuslan Bukin fpe_enable(); 1095d3916eacSRuslan Bukin fpe_restore(vcpu->guestfpu); 1096d3916eacSRuslan Bukin 1097d3916eacSRuslan Bukin /* 1098d3916eacSRuslan Bukin * The FPU is now "dirty" with the guest's state so turn on emulation 1099d3916eacSRuslan Bukin * to trap any access to the FPU by the host. 1100d3916eacSRuslan Bukin */ 1101d3916eacSRuslan Bukin fpe_disable(); 1102d3916eacSRuslan Bukin } 1103d3916eacSRuslan Bukin 1104d3916eacSRuslan Bukin static void 1105d3916eacSRuslan Bukin save_guest_fpustate(struct vcpu *vcpu) 1106d3916eacSRuslan Bukin { 1107d3916eacSRuslan Bukin 1108d3916eacSRuslan Bukin /* Save guest FPE state. */ 1109d3916eacSRuslan Bukin fpe_enable(); 1110d3916eacSRuslan Bukin fpe_store(vcpu->guestfpu); 1111d3916eacSRuslan Bukin fpe_disable(); 1112d3916eacSRuslan Bukin 1113d3916eacSRuslan Bukin KASSERT(PCPU_GET(fpcurthread) == NULL, 1114d3916eacSRuslan Bukin ("%s: fpcurthread set with guest registers", __func__)); 1115d3916eacSRuslan Bukin } 1116d3916eacSRuslan Bukin 1117d3916eacSRuslan Bukin static int 1118d3916eacSRuslan Bukin vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1119d3916eacSRuslan Bukin bool from_idle) 1120d3916eacSRuslan Bukin { 1121d3916eacSRuslan Bukin int error; 1122d3916eacSRuslan Bukin 1123d3916eacSRuslan Bukin vcpu_assert_locked(vcpu); 1124d3916eacSRuslan Bukin 1125d3916eacSRuslan Bukin /* 1126d3916eacSRuslan Bukin * State transitions from the vmmdev_ioctl() must always begin from 1127d3916eacSRuslan Bukin * the VCPU_IDLE state. This guarantees that there is only a single 1128d3916eacSRuslan Bukin * ioctl() operating on a vcpu at any point. 1129d3916eacSRuslan Bukin */ 1130d3916eacSRuslan Bukin if (from_idle) { 1131d3916eacSRuslan Bukin while (vcpu->state != VCPU_IDLE) { 1132d3916eacSRuslan Bukin vcpu_notify_event_locked(vcpu); 1133215c8b79SMark Johnston msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1134d3916eacSRuslan Bukin } 1135d3916eacSRuslan Bukin } else { 1136d3916eacSRuslan Bukin KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1137d3916eacSRuslan Bukin "vcpu idle state")); 1138d3916eacSRuslan Bukin } 1139d3916eacSRuslan Bukin 1140d3916eacSRuslan Bukin if (vcpu->state == VCPU_RUNNING) { 1141d3916eacSRuslan Bukin KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1142d3916eacSRuslan Bukin "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1143d3916eacSRuslan Bukin } else { 1144d3916eacSRuslan Bukin KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1145d3916eacSRuslan Bukin "vcpu that is not running", vcpu->hostcpu)); 1146d3916eacSRuslan Bukin } 1147d3916eacSRuslan Bukin 1148d3916eacSRuslan Bukin /* 1149d3916eacSRuslan Bukin * The following state transitions are allowed: 1150d3916eacSRuslan Bukin * IDLE -> FROZEN -> IDLE 1151d3916eacSRuslan Bukin * FROZEN -> RUNNING -> FROZEN 1152d3916eacSRuslan Bukin * FROZEN -> SLEEPING -> FROZEN 1153d3916eacSRuslan Bukin */ 1154d3916eacSRuslan Bukin switch (vcpu->state) { 1155d3916eacSRuslan Bukin case VCPU_IDLE: 1156d3916eacSRuslan Bukin case VCPU_RUNNING: 1157d3916eacSRuslan Bukin case VCPU_SLEEPING: 1158d3916eacSRuslan Bukin error = (newstate != VCPU_FROZEN); 1159d3916eacSRuslan Bukin break; 1160d3916eacSRuslan Bukin case VCPU_FROZEN: 1161d3916eacSRuslan Bukin error = (newstate == VCPU_FROZEN); 1162d3916eacSRuslan Bukin break; 1163d3916eacSRuslan Bukin default: 1164d3916eacSRuslan Bukin error = 1; 1165d3916eacSRuslan Bukin break; 1166d3916eacSRuslan Bukin } 1167d3916eacSRuslan Bukin 1168d3916eacSRuslan Bukin if (error) 1169d3916eacSRuslan Bukin return (EBUSY); 1170d3916eacSRuslan Bukin 1171d3916eacSRuslan Bukin vcpu->state = newstate; 1172d3916eacSRuslan Bukin if (newstate == VCPU_RUNNING) 1173d3916eacSRuslan Bukin vcpu->hostcpu = curcpu; 1174d3916eacSRuslan Bukin else 1175d3916eacSRuslan Bukin vcpu->hostcpu = NOCPU; 1176d3916eacSRuslan Bukin 1177d3916eacSRuslan Bukin if (newstate == VCPU_IDLE) 1178d3916eacSRuslan Bukin wakeup(&vcpu->state); 1179d3916eacSRuslan Bukin 1180d3916eacSRuslan Bukin return (0); 1181d3916eacSRuslan Bukin } 1182d3916eacSRuslan Bukin 1183d3916eacSRuslan Bukin static void 1184d3916eacSRuslan Bukin vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1185d3916eacSRuslan Bukin { 1186d3916eacSRuslan Bukin int error; 1187d3916eacSRuslan Bukin 1188d3916eacSRuslan Bukin if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1189d3916eacSRuslan Bukin panic("Error %d setting state to %d\n", error, newstate); 1190d3916eacSRuslan Bukin } 1191d3916eacSRuslan Bukin 1192d3916eacSRuslan Bukin static void 1193d3916eacSRuslan Bukin vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1194d3916eacSRuslan Bukin { 1195d3916eacSRuslan Bukin int error; 1196d3916eacSRuslan Bukin 1197d3916eacSRuslan Bukin if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1198d3916eacSRuslan Bukin panic("Error %d setting state to %d", error, newstate); 1199d3916eacSRuslan Bukin } 1200d3916eacSRuslan Bukin 1201d3916eacSRuslan Bukin int 1202d3916eacSRuslan Bukin vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1203d3916eacSRuslan Bukin { 1204d3916eacSRuslan Bukin 1205d3916eacSRuslan Bukin if (type < 0 || type >= VM_CAP_MAX) 1206d3916eacSRuslan Bukin return (EINVAL); 1207d3916eacSRuslan Bukin 1208d3916eacSRuslan Bukin return (vmmops_getcap(vcpu->cookie, type, retval)); 1209d3916eacSRuslan Bukin } 1210d3916eacSRuslan Bukin 1211d3916eacSRuslan Bukin int 1212d3916eacSRuslan Bukin vm_set_capability(struct vcpu *vcpu, int type, int val) 1213d3916eacSRuslan Bukin { 1214d3916eacSRuslan Bukin 1215d3916eacSRuslan Bukin if (type < 0 || type >= VM_CAP_MAX) 1216d3916eacSRuslan Bukin return (EINVAL); 1217d3916eacSRuslan Bukin 1218d3916eacSRuslan Bukin return (vmmops_setcap(vcpu->cookie, type, val)); 1219d3916eacSRuslan Bukin } 1220d3916eacSRuslan Bukin 1221d3916eacSRuslan Bukin struct vm * 1222d3916eacSRuslan Bukin vcpu_vm(struct vcpu *vcpu) 1223d3916eacSRuslan Bukin { 1224d3916eacSRuslan Bukin 1225d3916eacSRuslan Bukin return (vcpu->vm); 1226d3916eacSRuslan Bukin } 1227d3916eacSRuslan Bukin 1228d3916eacSRuslan Bukin int 1229d3916eacSRuslan Bukin vcpu_vcpuid(struct vcpu *vcpu) 1230d3916eacSRuslan Bukin { 1231d3916eacSRuslan Bukin 1232d3916eacSRuslan Bukin return (vcpu->vcpuid); 1233d3916eacSRuslan Bukin } 1234d3916eacSRuslan Bukin 1235d3916eacSRuslan Bukin void * 1236d3916eacSRuslan Bukin vcpu_get_cookie(struct vcpu *vcpu) 1237d3916eacSRuslan Bukin { 1238d3916eacSRuslan Bukin 1239d3916eacSRuslan Bukin return (vcpu->cookie); 1240d3916eacSRuslan Bukin } 1241d3916eacSRuslan Bukin 1242d3916eacSRuslan Bukin struct vcpu * 1243d3916eacSRuslan Bukin vm_vcpu(struct vm *vm, int vcpuid) 1244d3916eacSRuslan Bukin { 1245d3916eacSRuslan Bukin 1246d3916eacSRuslan Bukin return (vm->vcpu[vcpuid]); 1247d3916eacSRuslan Bukin } 1248d3916eacSRuslan Bukin 1249d3916eacSRuslan Bukin int 1250d3916eacSRuslan Bukin vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1251d3916eacSRuslan Bukin { 1252d3916eacSRuslan Bukin int error; 1253d3916eacSRuslan Bukin 1254d3916eacSRuslan Bukin vcpu_lock(vcpu); 1255d3916eacSRuslan Bukin error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1256d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1257d3916eacSRuslan Bukin 1258d3916eacSRuslan Bukin return (error); 1259d3916eacSRuslan Bukin } 1260d3916eacSRuslan Bukin 1261d3916eacSRuslan Bukin enum vcpu_state 1262d3916eacSRuslan Bukin vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1263d3916eacSRuslan Bukin { 1264d3916eacSRuslan Bukin enum vcpu_state state; 1265d3916eacSRuslan Bukin 1266d3916eacSRuslan Bukin vcpu_lock(vcpu); 1267d3916eacSRuslan Bukin state = vcpu->state; 1268d3916eacSRuslan Bukin if (hostcpu != NULL) 1269d3916eacSRuslan Bukin *hostcpu = vcpu->hostcpu; 1270d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1271d3916eacSRuslan Bukin 1272d3916eacSRuslan Bukin return (state); 1273d3916eacSRuslan Bukin } 1274d3916eacSRuslan Bukin 1275d3916eacSRuslan Bukin static void * 1276d3916eacSRuslan Bukin _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1277d3916eacSRuslan Bukin void **cookie) 1278d3916eacSRuslan Bukin { 1279d3916eacSRuslan Bukin int i, count, pageoff; 1280d3916eacSRuslan Bukin struct mem_map *mm; 1281d3916eacSRuslan Bukin vm_page_t m; 1282d3916eacSRuslan Bukin 1283d3916eacSRuslan Bukin pageoff = gpa & PAGE_MASK; 1284d3916eacSRuslan Bukin if (len > PAGE_SIZE - pageoff) 1285d3916eacSRuslan Bukin panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1286d3916eacSRuslan Bukin 1287d3916eacSRuslan Bukin count = 0; 1288d3916eacSRuslan Bukin for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1289d3916eacSRuslan Bukin mm = &vm->mem_maps[i]; 1290d3916eacSRuslan Bukin if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1291d3916eacSRuslan Bukin gpa < mm->gpa + mm->len) { 1292d3916eacSRuslan Bukin count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1293d3916eacSRuslan Bukin trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1294d3916eacSRuslan Bukin break; 1295d3916eacSRuslan Bukin } 1296d3916eacSRuslan Bukin } 1297d3916eacSRuslan Bukin 1298d3916eacSRuslan Bukin if (count == 1) { 1299d3916eacSRuslan Bukin *cookie = m; 1300d3916eacSRuslan Bukin return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1301d3916eacSRuslan Bukin } else { 1302d3916eacSRuslan Bukin *cookie = NULL; 1303d3916eacSRuslan Bukin return (NULL); 1304d3916eacSRuslan Bukin } 1305d3916eacSRuslan Bukin } 1306d3916eacSRuslan Bukin 1307d3916eacSRuslan Bukin void * 1308d3916eacSRuslan Bukin vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1309d3916eacSRuslan Bukin void **cookie) 1310d3916eacSRuslan Bukin { 1311d3916eacSRuslan Bukin #ifdef INVARIANTS 1312d3916eacSRuslan Bukin /* 1313d3916eacSRuslan Bukin * The current vcpu should be frozen to ensure 'vm_memmap[]' 1314d3916eacSRuslan Bukin * stability. 1315d3916eacSRuslan Bukin */ 1316d3916eacSRuslan Bukin int state = vcpu_get_state(vcpu, NULL); 1317d3916eacSRuslan Bukin KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1318d3916eacSRuslan Bukin __func__, state)); 1319d3916eacSRuslan Bukin #endif 1320d3916eacSRuslan Bukin return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1321d3916eacSRuslan Bukin } 1322d3916eacSRuslan Bukin 1323d3916eacSRuslan Bukin void * 1324d3916eacSRuslan Bukin vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1325d3916eacSRuslan Bukin void **cookie) 1326d3916eacSRuslan Bukin { 1327d3916eacSRuslan Bukin sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1328d3916eacSRuslan Bukin return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1329d3916eacSRuslan Bukin } 1330d3916eacSRuslan Bukin 1331d3916eacSRuslan Bukin void 1332d3916eacSRuslan Bukin vm_gpa_release(void *cookie) 1333d3916eacSRuslan Bukin { 1334d3916eacSRuslan Bukin vm_page_t m = cookie; 1335d3916eacSRuslan Bukin 1336d3916eacSRuslan Bukin vm_page_unwire(m, PQ_ACTIVE); 1337d3916eacSRuslan Bukin } 1338d3916eacSRuslan Bukin 1339d3916eacSRuslan Bukin int 1340d3916eacSRuslan Bukin vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1341d3916eacSRuslan Bukin { 1342d3916eacSRuslan Bukin 1343d3916eacSRuslan Bukin if (reg >= VM_REG_LAST) 1344d3916eacSRuslan Bukin return (EINVAL); 1345d3916eacSRuslan Bukin 1346d3916eacSRuslan Bukin return (vmmops_getreg(vcpu->cookie, reg, retval)); 1347d3916eacSRuslan Bukin } 1348d3916eacSRuslan Bukin 1349d3916eacSRuslan Bukin int 1350d3916eacSRuslan Bukin vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1351d3916eacSRuslan Bukin { 1352d3916eacSRuslan Bukin int error; 1353d3916eacSRuslan Bukin 1354d3916eacSRuslan Bukin if (reg >= VM_REG_LAST) 1355d3916eacSRuslan Bukin return (EINVAL); 1356d3916eacSRuslan Bukin error = vmmops_setreg(vcpu->cookie, reg, val); 1357d3916eacSRuslan Bukin if (error || reg != VM_REG_GUEST_SEPC) 1358d3916eacSRuslan Bukin return (error); 1359d3916eacSRuslan Bukin 1360d3916eacSRuslan Bukin vcpu->nextpc = val; 1361d3916eacSRuslan Bukin 1362d3916eacSRuslan Bukin return (0); 1363d3916eacSRuslan Bukin } 1364d3916eacSRuslan Bukin 1365d3916eacSRuslan Bukin void * 1366d3916eacSRuslan Bukin vm_get_cookie(struct vm *vm) 1367d3916eacSRuslan Bukin { 1368d3916eacSRuslan Bukin 1369d3916eacSRuslan Bukin return (vm->cookie); 1370d3916eacSRuslan Bukin } 1371d3916eacSRuslan Bukin 1372d3916eacSRuslan Bukin int 1373d3916eacSRuslan Bukin vm_inject_exception(struct vcpu *vcpu, uint64_t scause) 1374d3916eacSRuslan Bukin { 1375d3916eacSRuslan Bukin 1376d3916eacSRuslan Bukin return (vmmops_exception(vcpu->cookie, scause)); 1377d3916eacSRuslan Bukin } 1378d3916eacSRuslan Bukin 1379d3916eacSRuslan Bukin int 1380d3916eacSRuslan Bukin vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) 1381d3916eacSRuslan Bukin { 1382d3916eacSRuslan Bukin 1383d3916eacSRuslan Bukin return (aplic_attach_to_vm(vm->cookie, descr)); 1384d3916eacSRuslan Bukin } 1385d3916eacSRuslan Bukin 1386d3916eacSRuslan Bukin int 1387d3916eacSRuslan Bukin vm_assert_irq(struct vm *vm, uint32_t irq) 1388d3916eacSRuslan Bukin { 1389d3916eacSRuslan Bukin 1390d3916eacSRuslan Bukin return (aplic_inject_irq(vm->cookie, -1, irq, true)); 1391d3916eacSRuslan Bukin } 1392d3916eacSRuslan Bukin 1393d3916eacSRuslan Bukin int 1394d3916eacSRuslan Bukin vm_deassert_irq(struct vm *vm, uint32_t irq) 1395d3916eacSRuslan Bukin { 1396d3916eacSRuslan Bukin 1397d3916eacSRuslan Bukin return (aplic_inject_irq(vm->cookie, -1, irq, false)); 1398d3916eacSRuslan Bukin } 1399d3916eacSRuslan Bukin 1400d3916eacSRuslan Bukin int 1401d3916eacSRuslan Bukin vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1402d3916eacSRuslan Bukin int func) 1403d3916eacSRuslan Bukin { 1404d3916eacSRuslan Bukin 1405d3916eacSRuslan Bukin return (aplic_inject_msi(vm->cookie, msg, addr)); 1406d3916eacSRuslan Bukin } 1407d3916eacSRuslan Bukin 1408d3916eacSRuslan Bukin static int 1409d3916eacSRuslan Bukin vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1410d3916eacSRuslan Bukin { 1411d3916eacSRuslan Bukin 1412d3916eacSRuslan Bukin vcpu_lock(vcpu); 1413d3916eacSRuslan Bukin 1414d3916eacSRuslan Bukin while (1) { 1415d3916eacSRuslan Bukin if (aplic_check_pending(vcpu->cookie)) 1416d3916eacSRuslan Bukin break; 1417d3916eacSRuslan Bukin 1418d3916eacSRuslan Bukin if (riscv_check_ipi(vcpu->cookie, false)) 1419d3916eacSRuslan Bukin break; 1420d3916eacSRuslan Bukin 14219be0058eSRuslan Bukin if (riscv_check_interrupts_pending(vcpu->cookie)) 14229be0058eSRuslan Bukin break; 14239be0058eSRuslan Bukin 1424d3916eacSRuslan Bukin if (vcpu_should_yield(vcpu)) 1425d3916eacSRuslan Bukin break; 1426d3916eacSRuslan Bukin 1427d3916eacSRuslan Bukin vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1428d3916eacSRuslan Bukin /* 1429d3916eacSRuslan Bukin * XXX msleep_spin() cannot be interrupted by signals so 1430d3916eacSRuslan Bukin * wake up periodically to check pending signals. 1431d3916eacSRuslan Bukin */ 1432215c8b79SMark Johnston msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1433d3916eacSRuslan Bukin vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1434d3916eacSRuslan Bukin } 1435d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1436d3916eacSRuslan Bukin 1437d3916eacSRuslan Bukin *retu = false; 1438d3916eacSRuslan Bukin 1439d3916eacSRuslan Bukin return (0); 1440d3916eacSRuslan Bukin } 1441d3916eacSRuslan Bukin 1442d3916eacSRuslan Bukin static int 1443d3916eacSRuslan Bukin vm_handle_paging(struct vcpu *vcpu, bool *retu) 1444d3916eacSRuslan Bukin { 1445d3916eacSRuslan Bukin struct vm *vm; 1446d3916eacSRuslan Bukin struct vm_exit *vme; 1447d3916eacSRuslan Bukin struct vm_map *map; 1448d3916eacSRuslan Bukin uint64_t addr; 1449d3916eacSRuslan Bukin pmap_t pmap; 1450d3916eacSRuslan Bukin int ftype, rv; 1451d3916eacSRuslan Bukin 1452d3916eacSRuslan Bukin vm = vcpu->vm; 1453d3916eacSRuslan Bukin vme = &vcpu->exitinfo; 1454d3916eacSRuslan Bukin 1455d3916eacSRuslan Bukin pmap = vmspace_pmap(vm->vmspace); 1456d3916eacSRuslan Bukin addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); 1457d3916eacSRuslan Bukin 1458d3916eacSRuslan Bukin dprintf("%s: %lx\n", __func__, addr); 1459d3916eacSRuslan Bukin 1460d3916eacSRuslan Bukin switch (vme->scause) { 1461d3916eacSRuslan Bukin case SCAUSE_STORE_GUEST_PAGE_FAULT: 1462d3916eacSRuslan Bukin ftype = VM_PROT_WRITE; 1463d3916eacSRuslan Bukin break; 1464d3916eacSRuslan Bukin case SCAUSE_FETCH_GUEST_PAGE_FAULT: 1465d3916eacSRuslan Bukin ftype = VM_PROT_EXECUTE; 1466d3916eacSRuslan Bukin break; 1467d3916eacSRuslan Bukin case SCAUSE_LOAD_GUEST_PAGE_FAULT: 1468d3916eacSRuslan Bukin ftype = VM_PROT_READ; 1469d3916eacSRuslan Bukin break; 1470d3916eacSRuslan Bukin default: 1471d3916eacSRuslan Bukin panic("unknown page trap: %lu", vme->scause); 1472d3916eacSRuslan Bukin } 1473d3916eacSRuslan Bukin 1474d3916eacSRuslan Bukin /* The page exists, but the page table needs to be updated. */ 1475d3916eacSRuslan Bukin if (pmap_fault(pmap, addr, ftype)) 1476d3916eacSRuslan Bukin return (0); 1477d3916eacSRuslan Bukin 1478d3916eacSRuslan Bukin map = &vm->vmspace->vm_map; 1479d3916eacSRuslan Bukin rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); 1480d3916eacSRuslan Bukin if (rv != KERN_SUCCESS) { 1481d3916eacSRuslan Bukin printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", 1482d3916eacSRuslan Bukin __func__, addr, ftype, rv); 1483d3916eacSRuslan Bukin return (EFAULT); 1484d3916eacSRuslan Bukin } 1485d3916eacSRuslan Bukin 1486d3916eacSRuslan Bukin return (0); 1487d3916eacSRuslan Bukin } 1488d3916eacSRuslan Bukin 1489d3916eacSRuslan Bukin static int 1490d3916eacSRuslan Bukin vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1491d3916eacSRuslan Bukin { 1492d3916eacSRuslan Bukin struct vm *vm = vcpu->vm; 1493d3916eacSRuslan Bukin int error, i; 1494d3916eacSRuslan Bukin struct thread *td; 1495d3916eacSRuslan Bukin 1496d3916eacSRuslan Bukin error = 0; 1497d3916eacSRuslan Bukin td = curthread; 1498d3916eacSRuslan Bukin 1499d3916eacSRuslan Bukin CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1500d3916eacSRuslan Bukin 1501d3916eacSRuslan Bukin /* 1502d3916eacSRuslan Bukin * Wait until all 'active_cpus' have suspended themselves. 1503d3916eacSRuslan Bukin * 1504d3916eacSRuslan Bukin * Since a VM may be suspended at any time including when one or 1505d3916eacSRuslan Bukin * more vcpus are doing a rendezvous we need to call the rendezvous 1506d3916eacSRuslan Bukin * handler while we are waiting to prevent a deadlock. 1507d3916eacSRuslan Bukin */ 1508d3916eacSRuslan Bukin vcpu_lock(vcpu); 1509d3916eacSRuslan Bukin while (error == 0) { 1510d3916eacSRuslan Bukin if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1511d3916eacSRuslan Bukin break; 1512d3916eacSRuslan Bukin 1513d3916eacSRuslan Bukin vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1514d3916eacSRuslan Bukin msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1515d3916eacSRuslan Bukin vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1516d3916eacSRuslan Bukin if (td_ast_pending(td, TDA_SUSPEND)) { 1517d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1518d3916eacSRuslan Bukin error = thread_check_susp(td, false); 1519d3916eacSRuslan Bukin vcpu_lock(vcpu); 1520d3916eacSRuslan Bukin } 1521d3916eacSRuslan Bukin } 1522d3916eacSRuslan Bukin vcpu_unlock(vcpu); 1523d3916eacSRuslan Bukin 1524d3916eacSRuslan Bukin /* 1525d3916eacSRuslan Bukin * Wakeup the other sleeping vcpus and return to userspace. 1526d3916eacSRuslan Bukin */ 1527d3916eacSRuslan Bukin for (i = 0; i < vm->maxcpus; i++) { 1528d3916eacSRuslan Bukin if (CPU_ISSET(i, &vm->suspended_cpus)) { 1529d3916eacSRuslan Bukin vcpu_notify_event(vm_vcpu(vm, i)); 1530d3916eacSRuslan Bukin } 1531d3916eacSRuslan Bukin } 1532d3916eacSRuslan Bukin 1533d3916eacSRuslan Bukin *retu = true; 1534d3916eacSRuslan Bukin return (error); 1535d3916eacSRuslan Bukin } 1536d3916eacSRuslan Bukin 1537d3916eacSRuslan Bukin int 1538d3916eacSRuslan Bukin vm_run(struct vcpu *vcpu) 1539d3916eacSRuslan Bukin { 1540d3916eacSRuslan Bukin struct vm_eventinfo evinfo; 1541d3916eacSRuslan Bukin struct vm_exit *vme; 1542d3916eacSRuslan Bukin struct vm *vm; 1543d3916eacSRuslan Bukin pmap_t pmap; 1544d3916eacSRuslan Bukin int error; 1545d3916eacSRuslan Bukin int vcpuid; 1546d3916eacSRuslan Bukin bool retu; 1547d3916eacSRuslan Bukin 1548d3916eacSRuslan Bukin vm = vcpu->vm; 1549d3916eacSRuslan Bukin 1550d3916eacSRuslan Bukin dprintf("%s\n", __func__); 1551d3916eacSRuslan Bukin 1552d3916eacSRuslan Bukin vcpuid = vcpu->vcpuid; 1553d3916eacSRuslan Bukin 1554d3916eacSRuslan Bukin if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1555d3916eacSRuslan Bukin return (EINVAL); 1556d3916eacSRuslan Bukin 1557d3916eacSRuslan Bukin if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1558d3916eacSRuslan Bukin return (EINVAL); 1559d3916eacSRuslan Bukin 1560d3916eacSRuslan Bukin pmap = vmspace_pmap(vm->vmspace); 1561d3916eacSRuslan Bukin vme = &vcpu->exitinfo; 1562d3916eacSRuslan Bukin evinfo.rptr = NULL; 1563d3916eacSRuslan Bukin evinfo.sptr = &vm->suspend; 1564d3916eacSRuslan Bukin evinfo.iptr = NULL; 1565d3916eacSRuslan Bukin restart: 1566d3916eacSRuslan Bukin critical_enter(); 1567d3916eacSRuslan Bukin 1568d3916eacSRuslan Bukin restore_guest_fpustate(vcpu); 1569d3916eacSRuslan Bukin 1570d3916eacSRuslan Bukin vcpu_require_state(vcpu, VCPU_RUNNING); 1571d3916eacSRuslan Bukin error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1572d3916eacSRuslan Bukin vcpu_require_state(vcpu, VCPU_FROZEN); 1573d3916eacSRuslan Bukin 1574d3916eacSRuslan Bukin save_guest_fpustate(vcpu); 1575d3916eacSRuslan Bukin 1576d3916eacSRuslan Bukin critical_exit(); 1577d3916eacSRuslan Bukin 1578d3916eacSRuslan Bukin if (error == 0) { 1579d3916eacSRuslan Bukin retu = false; 1580d3916eacSRuslan Bukin switch (vme->exitcode) { 1581d3916eacSRuslan Bukin case VM_EXITCODE_INST_EMUL: 1582d3916eacSRuslan Bukin vcpu->nextpc = vme->pc + vme->inst_length; 1583d3916eacSRuslan Bukin error = vm_handle_inst_emul(vcpu, &retu); 1584d3916eacSRuslan Bukin break; 1585d3916eacSRuslan Bukin case VM_EXITCODE_WFI: 1586d3916eacSRuslan Bukin vcpu->nextpc = vme->pc + vme->inst_length; 1587d3916eacSRuslan Bukin error = vm_handle_wfi(vcpu, vme, &retu); 1588d3916eacSRuslan Bukin break; 1589d3916eacSRuslan Bukin case VM_EXITCODE_ECALL: 1590d3916eacSRuslan Bukin /* Handle in userland. */ 1591d3916eacSRuslan Bukin vcpu->nextpc = vme->pc + vme->inst_length; 1592d3916eacSRuslan Bukin retu = true; 1593d3916eacSRuslan Bukin break; 1594d3916eacSRuslan Bukin case VM_EXITCODE_PAGING: 1595d3916eacSRuslan Bukin vcpu->nextpc = vme->pc; 1596d3916eacSRuslan Bukin error = vm_handle_paging(vcpu, &retu); 1597d3916eacSRuslan Bukin break; 1598d3916eacSRuslan Bukin case VM_EXITCODE_BOGUS: 1599d3916eacSRuslan Bukin vcpu->nextpc = vme->pc; 1600d3916eacSRuslan Bukin retu = false; 1601d3916eacSRuslan Bukin error = 0; 1602d3916eacSRuslan Bukin break; 1603d3916eacSRuslan Bukin case VM_EXITCODE_SUSPENDED: 1604d3916eacSRuslan Bukin vcpu->nextpc = vme->pc; 1605d3916eacSRuslan Bukin error = vm_handle_suspend(vcpu, &retu); 1606d3916eacSRuslan Bukin break; 1607d3916eacSRuslan Bukin default: 1608d3916eacSRuslan Bukin /* Handle in userland. */ 1609d3916eacSRuslan Bukin vcpu->nextpc = vme->pc; 1610d3916eacSRuslan Bukin retu = true; 1611d3916eacSRuslan Bukin break; 1612d3916eacSRuslan Bukin } 1613d3916eacSRuslan Bukin } 1614d3916eacSRuslan Bukin 1615d3916eacSRuslan Bukin if (error == 0 && retu == false) 1616d3916eacSRuslan Bukin goto restart; 1617d3916eacSRuslan Bukin 1618d3916eacSRuslan Bukin return (error); 1619d3916eacSRuslan Bukin } 1620