xref: /freebsd/sys/arm64/vmm/vmm.c (revision c76c2a19ae3763d17aa6a60a5831ed24cbc16e83)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/cpuset.h>
32 #include <sys/kernel.h>
33 #include <sys/linker.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/mutex.h>
38 #include <sys/pcpu.h>
39 #include <sys/proc.h>
40 #include <sys/queue.h>
41 #include <sys/rwlock.h>
42 #include <sys/sched.h>
43 #include <sys/smp.h>
44 #include <sys/sysctl.h>
45 
46 #include <vm/vm.h>
47 #include <vm/vm_object.h>
48 #include <vm/vm_page.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_map.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_param.h>
53 
54 #include <machine/armreg.h>
55 #include <machine/cpu.h>
56 #include <machine/fpu.h>
57 #include <machine/machdep.h>
58 #include <machine/pcb.h>
59 #include <machine/smp.h>
60 #include <machine/vm.h>
61 #include <machine/vmparam.h>
62 #include <machine/vmm.h>
63 #include <machine/vmm_instruction_emul.h>
64 
65 #include <dev/pci/pcireg.h>
66 #include <dev/vmm/vmm_dev.h>
67 #include <dev/vmm/vmm_ktr.h>
68 #include <dev/vmm/vmm_mem.h>
69 #include <dev/vmm/vmm_stat.h>
70 
71 #include "arm64.h"
72 #include "mmu.h"
73 
74 #include "io/vgic.h"
75 #include "io/vtimer.h"
76 
77 struct vcpu {
78 	int		flags;
79 	enum vcpu_state	state;
80 	struct mtx	mtx;
81 	int		hostcpu;	/* host cpuid this vcpu last ran on */
82 	int		vcpuid;
83 	void		*stats;
84 	struct vm_exit	exitinfo;
85 	uint64_t	nextpc;		/* (x) next instruction to execute */
86 	struct vm	*vm;		/* (o) */
87 	void		*cookie;	/* (i) cpu-specific data */
88 	struct vfpstate	*guestfpu;	/* (a,i) guest fpu state */
89 };
90 
91 #define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
92 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
93 #define	vcpu_lock_destroy(v)	mtx_destroy(&((v)->mtx))
94 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
95 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
96 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
97 
98 struct vmm_mmio_region {
99 	uint64_t start;
100 	uint64_t end;
101 	mem_region_read_t read;
102 	mem_region_write_t write;
103 };
104 #define	VM_MAX_MMIO_REGIONS	4
105 
106 struct vmm_special_reg {
107 	uint32_t	esr_iss;
108 	uint32_t	esr_mask;
109 	reg_read_t	reg_read;
110 	reg_write_t	reg_write;
111 	void		*arg;
112 };
113 #define	VM_MAX_SPECIAL_REGS	16
114 
115 /*
116  * Initialization:
117  * (o) initialized the first time the VM is created
118  * (i) initialized when VM is created and when it is reinitialized
119  * (x) initialized before use
120  */
121 struct vm {
122 	void		*cookie;		/* (i) cpu-specific data */
123 	volatile cpuset_t active_cpus;		/* (i) active vcpus */
124 	volatile cpuset_t debug_cpus;		/* (i) vcpus stopped for debug */
125 	int		suspend;		/* (i) stop VM execution */
126 	bool		dying;			/* (o) is dying */
127 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
128 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
129 	struct vmspace	*vmspace;		/* (o) guest's address space */
130 	struct vm_mem	mem;			/* (i) guest memory */
131 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
132 	struct vcpu	**vcpu;			/* (i) guest vcpus */
133 	struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
134 						/* (o) guest MMIO regions */
135 	struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS];
136 	/* The following describe the vm cpu topology */
137 	uint16_t	sockets;		/* (o) num of sockets */
138 	uint16_t	cores;			/* (o) num of cores/socket */
139 	uint16_t	threads;		/* (o) num of threads/core */
140 	uint16_t	maxcpus;		/* (o) max pluggable cpus */
141 	struct sx	vcpus_init_lock;	/* (o) */
142 };
143 
144 static bool vmm_initialized = false;
145 
146 static int vm_handle_wfi(struct vcpu *vcpu,
147 			 struct vm_exit *vme, bool *retu);
148 
149 static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
150 
151 /* statistics */
152 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
153 
154 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
155 
156 static int vmm_ipinum;
157 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
158     "IPI vector used for vcpu notifications");
159 
160 struct vmm_regs {
161 	uint64_t	id_aa64afr0;
162 	uint64_t	id_aa64afr1;
163 	uint64_t	id_aa64dfr0;
164 	uint64_t	id_aa64dfr1;
165 	uint64_t	id_aa64isar0;
166 	uint64_t	id_aa64isar1;
167 	uint64_t	id_aa64isar2;
168 	uint64_t	id_aa64mmfr0;
169 	uint64_t	id_aa64mmfr1;
170 	uint64_t	id_aa64mmfr2;
171 	uint64_t	id_aa64pfr0;
172 	uint64_t	id_aa64pfr1;
173 };
174 
175 static const struct vmm_regs vmm_arch_regs_masks = {
176 	.id_aa64dfr0 =
177 	    ID_AA64DFR0_CTX_CMPs_MASK |
178 	    ID_AA64DFR0_WRPs_MASK |
179 	    ID_AA64DFR0_BRPs_MASK |
180 	    ID_AA64DFR0_PMUVer_3 |
181 	    ID_AA64DFR0_DebugVer_8,
182 	.id_aa64isar0 =
183 	    ID_AA64ISAR0_TLB_TLBIOSR |
184 	    ID_AA64ISAR0_SHA3_IMPL |
185 	    ID_AA64ISAR0_RDM_IMPL |
186 	    ID_AA64ISAR0_Atomic_IMPL |
187 	    ID_AA64ISAR0_CRC32_BASE |
188 	    ID_AA64ISAR0_SHA2_512 |
189 	    ID_AA64ISAR0_SHA1_BASE |
190 	    ID_AA64ISAR0_AES_PMULL,
191 	.id_aa64mmfr0 =
192 	    ID_AA64MMFR0_TGran4_IMPL |
193 	    ID_AA64MMFR0_TGran64_IMPL |
194 	    ID_AA64MMFR0_TGran16_IMPL |
195 	    ID_AA64MMFR0_ASIDBits_16 |
196 	    ID_AA64MMFR0_PARange_4P,
197 	.id_aa64mmfr1 =
198 	    ID_AA64MMFR1_SpecSEI_IMPL |
199 	    ID_AA64MMFR1_PAN_ATS1E1 |
200 	    ID_AA64MMFR1_HAFDBS_AF,
201 	.id_aa64pfr0 =
202 	    ID_AA64PFR0_GIC_CPUIF_NONE |
203 	    ID_AA64PFR0_AdvSIMD_HP |
204 	    ID_AA64PFR0_FP_HP |
205 	    ID_AA64PFR0_EL3_64 |
206 	    ID_AA64PFR0_EL2_64 |
207 	    ID_AA64PFR0_EL1_64 |
208 	    ID_AA64PFR0_EL0_64,
209 };
210 
211 /* Host registers masked by vmm_arch_regs_masks. */
212 static struct vmm_regs vmm_arch_regs;
213 
214 u_int vm_maxcpu;
215 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
216     &vm_maxcpu, 0, "Maximum number of vCPUs");
217 
218 static void vcpu_notify_event_locked(struct vcpu *vcpu);
219 
220 /* global statistics */
221 VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
222 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
223 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
224 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
225 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
226 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
227 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
228 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
229 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
230 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
231 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
232 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception");
233 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception");
234 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
235 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
236 
237 /*
238  * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
239  * is a safe value for now.
240  */
241 #define	VM_MAXCPU	MIN(0xffff - 1, CPU_SETSIZE)
242 
243 static int
vmm_regs_init(struct vmm_regs * regs,const struct vmm_regs * masks)244 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
245 {
246 #define	_FETCH_KERN_REG(reg, field) do {				\
247 	regs->field = vmm_arch_regs_masks.field;			\
248 	if (!get_kernel_reg_masked(reg, &regs->field, masks->field))	\
249 		regs->field = 0;					\
250 } while (0)
251 	_FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0);
252 	_FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1);
253 	_FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0);
254 	_FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1);
255 	_FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0);
256 	_FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1);
257 	_FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2);
258 	_FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0);
259 	_FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1);
260 	_FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2);
261 	_FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0);
262 	_FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1);
263 #undef _FETCH_KERN_REG
264 	return (0);
265 }
266 
267 static void
vcpu_cleanup(struct vcpu * vcpu,bool destroy)268 vcpu_cleanup(struct vcpu *vcpu, bool destroy)
269 {
270 	vmmops_vcpu_cleanup(vcpu->cookie);
271 	vcpu->cookie = NULL;
272 	if (destroy) {
273 		vmm_stat_free(vcpu->stats);
274 		fpu_save_area_free(vcpu->guestfpu);
275 		vcpu_lock_destroy(vcpu);
276 	}
277 }
278 
279 static struct vcpu *
vcpu_alloc(struct vm * vm,int vcpu_id)280 vcpu_alloc(struct vm *vm, int vcpu_id)
281 {
282 	struct vcpu *vcpu;
283 
284 	KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
285 	    ("vcpu_alloc: invalid vcpu %d", vcpu_id));
286 
287 	vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
288 	vcpu_lock_init(vcpu);
289 	vcpu->state = VCPU_IDLE;
290 	vcpu->hostcpu = NOCPU;
291 	vcpu->vcpuid = vcpu_id;
292 	vcpu->vm = vm;
293 	vcpu->guestfpu = fpu_save_area_alloc();
294 	vcpu->stats = vmm_stat_alloc();
295 	return (vcpu);
296 }
297 
298 static void
vcpu_init(struct vcpu * vcpu)299 vcpu_init(struct vcpu *vcpu)
300 {
301 	vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
302 	MPASS(vcpu->cookie != NULL);
303 	fpu_save_area_reset(vcpu->guestfpu);
304 	vmm_stat_init(vcpu->stats);
305 }
306 
307 struct vm_exit *
vm_exitinfo(struct vcpu * vcpu)308 vm_exitinfo(struct vcpu *vcpu)
309 {
310 	return (&vcpu->exitinfo);
311 }
312 
313 static int
vmm_init(void)314 vmm_init(void)
315 {
316 	int error;
317 
318 	vm_maxcpu = mp_ncpus;
319 	TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
320 
321 	if (vm_maxcpu > VM_MAXCPU) {
322 		printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
323 		vm_maxcpu = VM_MAXCPU;
324 	}
325 	if (vm_maxcpu == 0)
326 		vm_maxcpu = 1;
327 
328 	error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
329 	if (error != 0)
330 		return (error);
331 
332 	return (vmmops_modinit(0));
333 }
334 
335 static int
vmm_handler(module_t mod,int what,void * arg)336 vmm_handler(module_t mod, int what, void *arg)
337 {
338 	int error;
339 
340 	switch (what) {
341 	case MOD_LOAD:
342 		error = vmmdev_init();
343 		if (error != 0)
344 			break;
345 		error = vmm_init();
346 		if (error == 0)
347 			vmm_initialized = true;
348 		else
349 			(void)vmmdev_cleanup();
350 		break;
351 	case MOD_UNLOAD:
352 		error = vmmdev_cleanup();
353 		if (error == 0 && vmm_initialized) {
354 			error = vmmops_modcleanup();
355 			if (error) {
356 				/*
357 				 * Something bad happened - prevent new
358 				 * VMs from being created
359 				 */
360 				vmm_initialized = false;
361 			}
362 		}
363 		break;
364 	default:
365 		error = 0;
366 		break;
367 	}
368 	return (error);
369 }
370 
371 static moduledata_t vmm_kmod = {
372 	"vmm",
373 	vmm_handler,
374 	NULL
375 };
376 
377 /*
378  * vmm initialization has the following dependencies:
379  *
380  * - HYP initialization requires smp_rendezvous() and therefore must happen
381  *   after SMP is fully functional (after SI_SUB_SMP).
382  * - vmm device initialization requires an initialized devfs.
383  */
384 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
385 MODULE_VERSION(vmm, 1);
386 
387 static void
vm_init(struct vm * vm,bool create)388 vm_init(struct vm *vm, bool create)
389 {
390 	int i;
391 
392 	vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
393 	MPASS(vm->cookie != NULL);
394 
395 	CPU_ZERO(&vm->active_cpus);
396 	CPU_ZERO(&vm->debug_cpus);
397 
398 	vm->suspend = 0;
399 	CPU_ZERO(&vm->suspended_cpus);
400 
401 	memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
402 	memset(vm->special_reg, 0, sizeof(vm->special_reg));
403 
404 	if (!create) {
405 		for (i = 0; i < vm->maxcpus; i++) {
406 			if (vm->vcpu[i] != NULL)
407 				vcpu_init(vm->vcpu[i]);
408 		}
409 	}
410 }
411 
412 void
vm_disable_vcpu_creation(struct vm * vm)413 vm_disable_vcpu_creation(struct vm *vm)
414 {
415 	sx_xlock(&vm->vcpus_init_lock);
416 	vm->dying = true;
417 	sx_xunlock(&vm->vcpus_init_lock);
418 }
419 
420 struct vcpu *
vm_alloc_vcpu(struct vm * vm,int vcpuid)421 vm_alloc_vcpu(struct vm *vm, int vcpuid)
422 {
423 	struct vcpu *vcpu;
424 
425 	if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
426 		return (NULL);
427 
428 	/* Some interrupt controllers may have a CPU limit */
429 	if (vcpuid >= vgic_max_cpu_count(vm->cookie))
430 		return (NULL);
431 
432 	vcpu = (struct vcpu *)
433 	    atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
434 	if (__predict_true(vcpu != NULL))
435 		return (vcpu);
436 
437 	sx_xlock(&vm->vcpus_init_lock);
438 	vcpu = vm->vcpu[vcpuid];
439 	if (vcpu == NULL && !vm->dying) {
440 		vcpu = vcpu_alloc(vm, vcpuid);
441 		vcpu_init(vcpu);
442 
443 		/*
444 		 * Ensure vCPU is fully created before updating pointer
445 		 * to permit unlocked reads above.
446 		 */
447 		atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
448 		    (uintptr_t)vcpu);
449 	}
450 	sx_xunlock(&vm->vcpus_init_lock);
451 	return (vcpu);
452 }
453 
454 void
vm_slock_vcpus(struct vm * vm)455 vm_slock_vcpus(struct vm *vm)
456 {
457 	sx_slock(&vm->vcpus_init_lock);
458 }
459 
460 void
vm_unlock_vcpus(struct vm * vm)461 vm_unlock_vcpus(struct vm *vm)
462 {
463 	sx_unlock(&vm->vcpus_init_lock);
464 }
465 
466 int
vm_create(const char * name,struct vm ** retvm)467 vm_create(const char *name, struct vm **retvm)
468 {
469 	struct vm *vm;
470 	struct vmspace *vmspace;
471 
472 	/*
473 	 * If vmm.ko could not be successfully initialized then don't attempt
474 	 * to create the virtual machine.
475 	 */
476 	if (!vmm_initialized)
477 		return (ENXIO);
478 
479 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
480 		return (EINVAL);
481 
482 	vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
483 	if (vmspace == NULL)
484 		return (ENOMEM);
485 
486 	vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
487 	strcpy(vm->name, name);
488 	vm->vmspace = vmspace;
489 	vm_mem_init(&vm->mem);
490 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
491 
492 	vm->sockets = 1;
493 	vm->cores = 1;			/* XXX backwards compatibility */
494 	vm->threads = 1;		/* XXX backwards compatibility */
495 	vm->maxcpus = vm_maxcpu;
496 
497 	vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
498 	    M_WAITOK | M_ZERO);
499 
500 	vm_init(vm, true);
501 
502 	*retvm = vm;
503 	return (0);
504 }
505 
506 void
vm_get_topology(struct vm * vm,uint16_t * sockets,uint16_t * cores,uint16_t * threads,uint16_t * maxcpus)507 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
508     uint16_t *threads, uint16_t *maxcpus)
509 {
510 	*sockets = vm->sockets;
511 	*cores = vm->cores;
512 	*threads = vm->threads;
513 	*maxcpus = vm->maxcpus;
514 }
515 
516 uint16_t
vm_get_maxcpus(struct vm * vm)517 vm_get_maxcpus(struct vm *vm)
518 {
519 	return (vm->maxcpus);
520 }
521 
522 int
vm_set_topology(struct vm * vm,uint16_t sockets,uint16_t cores,uint16_t threads,uint16_t maxcpus)523 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
524     uint16_t threads, uint16_t maxcpus)
525 {
526 	/* Ignore maxcpus. */
527 	if ((sockets * cores * threads) > vm->maxcpus)
528 		return (EINVAL);
529 	vm->sockets = sockets;
530 	vm->cores = cores;
531 	vm->threads = threads;
532 	return(0);
533 }
534 
535 static void
vm_cleanup(struct vm * vm,bool destroy)536 vm_cleanup(struct vm *vm, bool destroy)
537 {
538 	pmap_t pmap __diagused;
539 	int i;
540 
541 	if (destroy) {
542 		vm_xlock_memsegs(vm);
543 		pmap = vmspace_pmap(vm->vmspace);
544 		sched_pin();
545 		PCPU_SET(curvmpmap, NULL);
546 		sched_unpin();
547 		CPU_FOREACH(i) {
548 			MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
549 		}
550 	} else
551 		vm_assert_memseg_xlocked(vm);
552 
553 
554 	vgic_detach_from_vm(vm->cookie);
555 
556 	for (i = 0; i < vm->maxcpus; i++) {
557 		if (vm->vcpu[i] != NULL)
558 			vcpu_cleanup(vm->vcpu[i], destroy);
559 	}
560 
561 	vmmops_cleanup(vm->cookie);
562 
563 	vm_mem_cleanup(vm);
564 	if (destroy) {
565 		vm_mem_destroy(vm);
566 
567 		vmmops_vmspace_free(vm->vmspace);
568 		vm->vmspace = NULL;
569 
570 		for (i = 0; i < vm->maxcpus; i++)
571 			free(vm->vcpu[i], M_VMM);
572 		free(vm->vcpu, M_VMM);
573 		sx_destroy(&vm->vcpus_init_lock);
574 	}
575 }
576 
577 void
vm_destroy(struct vm * vm)578 vm_destroy(struct vm *vm)
579 {
580 	vm_cleanup(vm, true);
581 	free(vm, M_VMM);
582 }
583 
584 int
vm_reinit(struct vm * vm)585 vm_reinit(struct vm *vm)
586 {
587 	int error;
588 
589 	/*
590 	 * A virtual machine can be reset only if all vcpus are suspended.
591 	 */
592 	if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
593 		vm_cleanup(vm, false);
594 		vm_init(vm, false);
595 		error = 0;
596 	} else {
597 		error = EBUSY;
598 	}
599 
600 	return (error);
601 }
602 
603 const char *
vm_name(struct vm * vm)604 vm_name(struct vm *vm)
605 {
606 	return (vm->name);
607 }
608 
609 int
vm_gla2gpa_nofault(struct vcpu * vcpu,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)610 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
611     uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
612 {
613 	return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault));
614 }
615 
616 static int
vmm_reg_raz(struct vcpu * vcpu,uint64_t * rval,void * arg)617 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg)
618 {
619 	*rval = 0;
620 	return (0);
621 }
622 
623 static int
vmm_reg_read_arg(struct vcpu * vcpu,uint64_t * rval,void * arg)624 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg)
625 {
626 	*rval = *(uint64_t *)arg;
627 	return (0);
628 }
629 
630 static int
vmm_reg_wi(struct vcpu * vcpu,uint64_t wval,void * arg)631 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg)
632 {
633 	return (0);
634 }
635 
636 static const struct vmm_special_reg vmm_special_regs[] = {
637 #define	SPECIAL_REG(_reg, _read, _write)				\
638 	{								\
639 		.esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) |	\
640 		    ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) |		\
641 		    ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) |		\
642 		    ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) |		\
643 		    ((_reg ## _op2) << ISS_MSR_OP2_SHIFT),		\
644 		.esr_mask = ISS_MSR_REG_MASK,				\
645 		.reg_read = (_read),					\
646 		.reg_write = (_write),					\
647 		.arg = NULL,						\
648 	}
649 #define	ID_SPECIAL_REG(_reg, _name)					\
650 	{								\
651 		.esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) |	\
652 		    ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) |		\
653 		    ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) |		\
654 		    ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) |		\
655 		    ((_reg ## _op2) << ISS_MSR_OP2_SHIFT),		\
656 		.esr_mask = ISS_MSR_REG_MASK,				\
657 		.reg_read = vmm_reg_read_arg,				\
658 		.reg_write = vmm_reg_wi,				\
659 		.arg = &(vmm_arch_regs._name),				\
660 	}
661 
662 	/* ID registers */
663 	ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
664 	ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
665 	ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
666 	ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
667 	ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
668 
669 	/*
670 	 * All other ID registers are read as zero.
671 	 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
672 	 */
673 	{
674 		.esr_iss = (3 << ISS_MSR_OP0_SHIFT) |
675 		    (0 << ISS_MSR_OP1_SHIFT) |
676 		    (0 << ISS_MSR_CRn_SHIFT) |
677 		    (0 << ISS_MSR_CRm_SHIFT),
678 		.esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK |
679 		    ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT),
680 		.reg_read = vmm_reg_raz,
681 		.reg_write = vmm_reg_wi,
682 		.arg = NULL,
683 	},
684 
685 	/* Counter physical registers */
686 	SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
687 	SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
688 	    vtimer_phys_cval_write),
689 	SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
690 	    vtimer_phys_tval_write),
691 	SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
692 #undef SPECIAL_REG
693 };
694 
695 void
vm_register_reg_handler(struct vm * vm,uint64_t iss,uint64_t mask,reg_read_t reg_read,reg_write_t reg_write,void * arg)696 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
697     reg_read_t reg_read, reg_write_t reg_write, void *arg)
698 {
699 	int i;
700 
701 	for (i = 0; i < nitems(vm->special_reg); i++) {
702 		if (vm->special_reg[i].esr_iss == 0 &&
703 		    vm->special_reg[i].esr_mask == 0) {
704 			vm->special_reg[i].esr_iss = iss;
705 			vm->special_reg[i].esr_mask = mask;
706 			vm->special_reg[i].reg_read = reg_read;
707 			vm->special_reg[i].reg_write = reg_write;
708 			vm->special_reg[i].arg = arg;
709 			return;
710 		}
711 	}
712 
713 	panic("%s: No free special register slot", __func__);
714 }
715 
716 void
vm_deregister_reg_handler(struct vm * vm,uint64_t iss,uint64_t mask)717 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask)
718 {
719 	int i;
720 
721 	for (i = 0; i < nitems(vm->special_reg); i++) {
722 		if (vm->special_reg[i].esr_iss == iss &&
723 		    vm->special_reg[i].esr_mask == mask) {
724 			memset(&vm->special_reg[i], 0,
725 			    sizeof(vm->special_reg[i]));
726 			return;
727 		}
728 	}
729 
730 	panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss,
731 	    mask);
732 }
733 
734 static int
vm_handle_reg_emul(struct vcpu * vcpu,bool * retu)735 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu)
736 {
737 	struct vm *vm;
738 	struct vm_exit *vme;
739 	struct vre *vre;
740 	int i, rv;
741 
742 	vm = vcpu->vm;
743 	vme = &vcpu->exitinfo;
744 	vre = &vme->u.reg_emul.vre;
745 
746 	for (i = 0; i < nitems(vm->special_reg); i++) {
747 		if (vm->special_reg[i].esr_iss == 0 &&
748 		    vm->special_reg[i].esr_mask == 0)
749 			continue;
750 
751 		if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) ==
752 		    vm->special_reg[i].esr_iss) {
753 			rv = vmm_emulate_register(vcpu, vre,
754 			    vm->special_reg[i].reg_read,
755 			    vm->special_reg[i].reg_write,
756 			    vm->special_reg[i].arg);
757 			if (rv == 0) {
758 				*retu = false;
759 			}
760 			return (rv);
761 		}
762 	}
763 	for (i = 0; i < nitems(vmm_special_regs); i++) {
764 		if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
765 		    vmm_special_regs[i].esr_iss) {
766 			rv = vmm_emulate_register(vcpu, vre,
767 			    vmm_special_regs[i].reg_read,
768 			    vmm_special_regs[i].reg_write,
769 			    vmm_special_regs[i].arg);
770 			if (rv == 0) {
771 				*retu = false;
772 			}
773 			return (rv);
774 		}
775 	}
776 
777 
778 	*retu = true;
779 	return (0);
780 }
781 
782 void
vm_register_inst_handler(struct vm * vm,uint64_t start,uint64_t size,mem_region_read_t mmio_read,mem_region_write_t mmio_write)783 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
784     mem_region_read_t mmio_read, mem_region_write_t mmio_write)
785 {
786 	int i;
787 
788 	for (i = 0; i < nitems(vm->mmio_region); i++) {
789 		if (vm->mmio_region[i].start == 0 &&
790 		    vm->mmio_region[i].end == 0) {
791 			vm->mmio_region[i].start = start;
792 			vm->mmio_region[i].end = start + size;
793 			vm->mmio_region[i].read = mmio_read;
794 			vm->mmio_region[i].write = mmio_write;
795 			return;
796 		}
797 	}
798 
799 	panic("%s: No free MMIO region", __func__);
800 }
801 
802 void
vm_deregister_inst_handler(struct vm * vm,uint64_t start,uint64_t size)803 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
804 {
805 	int i;
806 
807 	for (i = 0; i < nitems(vm->mmio_region); i++) {
808 		if (vm->mmio_region[i].start == start &&
809 		    vm->mmio_region[i].end == start + size) {
810 			memset(&vm->mmio_region[i], 0,
811 			    sizeof(vm->mmio_region[i]));
812 			return;
813 		}
814 	}
815 
816 	panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
817 	    start + size);
818 }
819 
820 static int
vm_handle_inst_emul(struct vcpu * vcpu,bool * retu)821 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
822 {
823 	struct vm *vm;
824 	struct vm_exit *vme;
825 	struct vie *vie;
826 	struct hyp *hyp;
827 	uint64_t fault_ipa;
828 	struct vm_guest_paging *paging;
829 	struct vmm_mmio_region *vmr;
830 	int error, i;
831 
832 	vm = vcpu->vm;
833 	hyp = vm->cookie;
834 	if (!hyp->vgic_attached)
835 		goto out_user;
836 
837 	vme = &vcpu->exitinfo;
838 	vie = &vme->u.inst_emul.vie;
839 	paging = &vme->u.inst_emul.paging;
840 
841 	fault_ipa = vme->u.inst_emul.gpa;
842 
843 	vmr = NULL;
844 	for (i = 0; i < nitems(vm->mmio_region); i++) {
845 		if (vm->mmio_region[i].start <= fault_ipa &&
846 		    vm->mmio_region[i].end > fault_ipa) {
847 			vmr = &vm->mmio_region[i];
848 			break;
849 		}
850 	}
851 	if (vmr == NULL)
852 		goto out_user;
853 
854 	error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
855 	    vmr->read, vmr->write, retu);
856 	return (error);
857 
858 out_user:
859 	*retu = true;
860 	return (0);
861 }
862 
863 int
vm_suspend(struct vm * vm,enum vm_suspend_how how)864 vm_suspend(struct vm *vm, enum vm_suspend_how how)
865 {
866 	int i;
867 
868 	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
869 		return (EINVAL);
870 
871 	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
872 		VM_CTR2(vm, "virtual machine already suspended %d/%d",
873 		    vm->suspend, how);
874 		return (EALREADY);
875 	}
876 
877 	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
878 
879 	/*
880 	 * Notify all active vcpus that they are now suspended.
881 	 */
882 	for (i = 0; i < vm->maxcpus; i++) {
883 		if (CPU_ISSET(i, &vm->active_cpus))
884 			vcpu_notify_event(vm_vcpu(vm, i));
885 	}
886 
887 	return (0);
888 }
889 
890 void
vm_exit_suspended(struct vcpu * vcpu,uint64_t pc)891 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
892 {
893 	struct vm *vm = vcpu->vm;
894 	struct vm_exit *vmexit;
895 
896 	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
897 	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
898 
899 	vmexit = vm_exitinfo(vcpu);
900 	vmexit->pc = pc;
901 	vmexit->inst_length = 4;
902 	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
903 	vmexit->u.suspended.how = vm->suspend;
904 }
905 
906 void
vm_exit_debug(struct vcpu * vcpu,uint64_t pc)907 vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
908 {
909 	struct vm_exit *vmexit;
910 
911 	vmexit = vm_exitinfo(vcpu);
912 	vmexit->pc = pc;
913 	vmexit->inst_length = 4;
914 	vmexit->exitcode = VM_EXITCODE_DEBUG;
915 }
916 
917 int
vm_activate_cpu(struct vcpu * vcpu)918 vm_activate_cpu(struct vcpu *vcpu)
919 {
920 	struct vm *vm = vcpu->vm;
921 
922 	if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
923 		return (EBUSY);
924 
925 	CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
926 	return (0);
927 
928 }
929 
930 int
vm_suspend_cpu(struct vm * vm,struct vcpu * vcpu)931 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
932 {
933 	if (vcpu == NULL) {
934 		vm->debug_cpus = vm->active_cpus;
935 		for (int i = 0; i < vm->maxcpus; i++) {
936 			if (CPU_ISSET(i, &vm->active_cpus))
937 				vcpu_notify_event(vm_vcpu(vm, i));
938 		}
939 	} else {
940 		if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
941 			return (EINVAL);
942 
943 		CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
944 		vcpu_notify_event(vcpu);
945 	}
946 	return (0);
947 }
948 
949 int
vm_resume_cpu(struct vm * vm,struct vcpu * vcpu)950 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
951 {
952 
953 	if (vcpu == NULL) {
954 		CPU_ZERO(&vm->debug_cpus);
955 	} else {
956 		if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
957 			return (EINVAL);
958 
959 		CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
960 	}
961 	return (0);
962 }
963 
964 int
vcpu_debugged(struct vcpu * vcpu)965 vcpu_debugged(struct vcpu *vcpu)
966 {
967 
968 	return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
969 }
970 
971 cpuset_t
vm_active_cpus(struct vm * vm)972 vm_active_cpus(struct vm *vm)
973 {
974 
975 	return (vm->active_cpus);
976 }
977 
978 cpuset_t
vm_debug_cpus(struct vm * vm)979 vm_debug_cpus(struct vm *vm)
980 {
981 
982 	return (vm->debug_cpus);
983 }
984 
985 cpuset_t
vm_suspended_cpus(struct vm * vm)986 vm_suspended_cpus(struct vm *vm)
987 {
988 
989 	return (vm->suspended_cpus);
990 }
991 
992 
993 void *
vcpu_stats(struct vcpu * vcpu)994 vcpu_stats(struct vcpu *vcpu)
995 {
996 
997 	return (vcpu->stats);
998 }
999 
1000 /*
1001  * This function is called to ensure that a vcpu "sees" a pending event
1002  * as soon as possible:
1003  * - If the vcpu thread is sleeping then it is woken up.
1004  * - If the vcpu is running on a different host_cpu then an IPI will be directed
1005  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
1006  */
1007 static void
vcpu_notify_event_locked(struct vcpu * vcpu)1008 vcpu_notify_event_locked(struct vcpu *vcpu)
1009 {
1010 	int hostcpu;
1011 
1012 	hostcpu = vcpu->hostcpu;
1013 	if (vcpu->state == VCPU_RUNNING) {
1014 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
1015 		if (hostcpu != curcpu) {
1016 			ipi_cpu(hostcpu, vmm_ipinum);
1017 		} else {
1018 			/*
1019 			 * If the 'vcpu' is running on 'curcpu' then it must
1020 			 * be sending a notification to itself (e.g. SELF_IPI).
1021 			 * The pending event will be picked up when the vcpu
1022 			 * transitions back to guest context.
1023 			 */
1024 		}
1025 	} else {
1026 		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
1027 		    "with hostcpu %d", vcpu->state, hostcpu));
1028 		if (vcpu->state == VCPU_SLEEPING)
1029 			wakeup_one(vcpu);
1030 	}
1031 }
1032 
1033 void
vcpu_notify_event(struct vcpu * vcpu)1034 vcpu_notify_event(struct vcpu *vcpu)
1035 {
1036 	vcpu_lock(vcpu);
1037 	vcpu_notify_event_locked(vcpu);
1038 	vcpu_unlock(vcpu);
1039 }
1040 
1041 struct vmspace *
vm_vmspace(struct vm * vm)1042 vm_vmspace(struct vm *vm)
1043 {
1044 	return (vm->vmspace);
1045 }
1046 
1047 struct vm_mem *
vm_mem(struct vm * vm)1048 vm_mem(struct vm *vm)
1049 {
1050 	return (&vm->mem);
1051 }
1052 
1053 static void
restore_guest_fpustate(struct vcpu * vcpu)1054 restore_guest_fpustate(struct vcpu *vcpu)
1055 {
1056 
1057 	/* flush host state to the pcb */
1058 	vfp_save_state(curthread, curthread->td_pcb);
1059 	/* Ensure the VFP state will be re-loaded when exiting the guest */
1060 	PCPU_SET(fpcurthread, NULL);
1061 
1062 	/* restore guest FPU state */
1063 	vfp_enable();
1064 	vfp_restore(vcpu->guestfpu);
1065 
1066 	/*
1067 	 * The FPU is now "dirty" with the guest's state so turn on emulation
1068 	 * to trap any access to the FPU by the host.
1069 	 */
1070 	vfp_disable();
1071 }
1072 
1073 static void
save_guest_fpustate(struct vcpu * vcpu)1074 save_guest_fpustate(struct vcpu *vcpu)
1075 {
1076 	if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) !=
1077 	    CPACR_FPEN_TRAP_ALL1)
1078 		panic("VFP not enabled in host!");
1079 
1080 	/* save guest FPU state */
1081 	vfp_enable();
1082 	vfp_store(vcpu->guestfpu);
1083 	vfp_disable();
1084 
1085 	KASSERT(PCPU_GET(fpcurthread) == NULL,
1086 	    ("%s: fpcurthread set with guest registers", __func__));
1087 }
1088 static int
vcpu_set_state_locked(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)1089 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
1090     bool from_idle)
1091 {
1092 	int error;
1093 
1094 	vcpu_assert_locked(vcpu);
1095 
1096 	/*
1097 	 * State transitions from the vmmdev_ioctl() must always begin from
1098 	 * the VCPU_IDLE state. This guarantees that there is only a single
1099 	 * ioctl() operating on a vcpu at any point.
1100 	 */
1101 	if (from_idle) {
1102 		while (vcpu->state != VCPU_IDLE) {
1103 			vcpu_notify_event_locked(vcpu);
1104 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
1105 		}
1106 	} else {
1107 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
1108 		    "vcpu idle state"));
1109 	}
1110 
1111 	if (vcpu->state == VCPU_RUNNING) {
1112 		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
1113 		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
1114 	} else {
1115 		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
1116 		    "vcpu that is not running", vcpu->hostcpu));
1117 	}
1118 
1119 	/*
1120 	 * The following state transitions are allowed:
1121 	 * IDLE -> FROZEN -> IDLE
1122 	 * FROZEN -> RUNNING -> FROZEN
1123 	 * FROZEN -> SLEEPING -> FROZEN
1124 	 */
1125 	switch (vcpu->state) {
1126 	case VCPU_IDLE:
1127 	case VCPU_RUNNING:
1128 	case VCPU_SLEEPING:
1129 		error = (newstate != VCPU_FROZEN);
1130 		break;
1131 	case VCPU_FROZEN:
1132 		error = (newstate == VCPU_FROZEN);
1133 		break;
1134 	default:
1135 		error = 1;
1136 		break;
1137 	}
1138 
1139 	if (error)
1140 		return (EBUSY);
1141 
1142 	vcpu->state = newstate;
1143 	if (newstate == VCPU_RUNNING)
1144 		vcpu->hostcpu = curcpu;
1145 	else
1146 		vcpu->hostcpu = NOCPU;
1147 
1148 	if (newstate == VCPU_IDLE)
1149 		wakeup(&vcpu->state);
1150 
1151 	return (0);
1152 }
1153 
1154 static void
vcpu_require_state(struct vcpu * vcpu,enum vcpu_state newstate)1155 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
1156 {
1157 	int error;
1158 
1159 	if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
1160 		panic("Error %d setting state to %d\n", error, newstate);
1161 }
1162 
1163 static void
vcpu_require_state_locked(struct vcpu * vcpu,enum vcpu_state newstate)1164 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
1165 {
1166 	int error;
1167 
1168 	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
1169 		panic("Error %d setting state to %d", error, newstate);
1170 }
1171 
1172 int
vm_get_capability(struct vcpu * vcpu,int type,int * retval)1173 vm_get_capability(struct vcpu *vcpu, int type, int *retval)
1174 {
1175 	if (type < 0 || type >= VM_CAP_MAX)
1176 		return (EINVAL);
1177 
1178 	return (vmmops_getcap(vcpu->cookie, type, retval));
1179 }
1180 
1181 int
vm_set_capability(struct vcpu * vcpu,int type,int val)1182 vm_set_capability(struct vcpu *vcpu, int type, int val)
1183 {
1184 	if (type < 0 || type >= VM_CAP_MAX)
1185 		return (EINVAL);
1186 
1187 	return (vmmops_setcap(vcpu->cookie, type, val));
1188 }
1189 
1190 struct vm *
vcpu_vm(struct vcpu * vcpu)1191 vcpu_vm(struct vcpu *vcpu)
1192 {
1193 	return (vcpu->vm);
1194 }
1195 
1196 int
vcpu_vcpuid(struct vcpu * vcpu)1197 vcpu_vcpuid(struct vcpu *vcpu)
1198 {
1199 	return (vcpu->vcpuid);
1200 }
1201 
1202 void *
vcpu_get_cookie(struct vcpu * vcpu)1203 vcpu_get_cookie(struct vcpu *vcpu)
1204 {
1205 	return (vcpu->cookie);
1206 }
1207 
1208 struct vcpu *
vm_vcpu(struct vm * vm,int vcpuid)1209 vm_vcpu(struct vm *vm, int vcpuid)
1210 {
1211 	return (vm->vcpu[vcpuid]);
1212 }
1213 
1214 int
vcpu_set_state(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)1215 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
1216 {
1217 	int error;
1218 
1219 	vcpu_lock(vcpu);
1220 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1221 	vcpu_unlock(vcpu);
1222 
1223 	return (error);
1224 }
1225 
1226 enum vcpu_state
vcpu_get_state(struct vcpu * vcpu,int * hostcpu)1227 vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
1228 {
1229 	enum vcpu_state state;
1230 
1231 	vcpu_lock(vcpu);
1232 	state = vcpu->state;
1233 	if (hostcpu != NULL)
1234 		*hostcpu = vcpu->hostcpu;
1235 	vcpu_unlock(vcpu);
1236 
1237 	return (state);
1238 }
1239 
1240 int
vm_get_register(struct vcpu * vcpu,int reg,uint64_t * retval)1241 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
1242 {
1243 
1244 	if (reg >= VM_REG_LAST)
1245 		return (EINVAL);
1246 
1247 	return (vmmops_getreg(vcpu->cookie, reg, retval));
1248 }
1249 
1250 int
vm_set_register(struct vcpu * vcpu,int reg,uint64_t val)1251 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
1252 {
1253 	int error;
1254 
1255 	if (reg >= VM_REG_LAST)
1256 		return (EINVAL);
1257 	error = vmmops_setreg(vcpu->cookie, reg, val);
1258 	if (error || reg != VM_REG_GUEST_PC)
1259 		return (error);
1260 
1261 	vcpu->nextpc = val;
1262 
1263 	return (0);
1264 }
1265 
1266 void *
vm_get_cookie(struct vm * vm)1267 vm_get_cookie(struct vm *vm)
1268 {
1269 	return (vm->cookie);
1270 }
1271 
1272 int
vm_inject_exception(struct vcpu * vcpu,uint64_t esr,uint64_t far)1273 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far)
1274 {
1275 	return (vmmops_exception(vcpu->cookie, esr, far));
1276 }
1277 
1278 int
vm_attach_vgic(struct vm * vm,struct vm_vgic_descr * descr)1279 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr)
1280 {
1281 	return (vgic_attach_to_vm(vm->cookie, descr));
1282 }
1283 
1284 int
vm_assert_irq(struct vm * vm,uint32_t irq)1285 vm_assert_irq(struct vm *vm, uint32_t irq)
1286 {
1287 	return (vgic_inject_irq(vm->cookie, -1, irq, true));
1288 }
1289 
1290 int
vm_deassert_irq(struct vm * vm,uint32_t irq)1291 vm_deassert_irq(struct vm *vm, uint32_t irq)
1292 {
1293 	return (vgic_inject_irq(vm->cookie, -1, irq, false));
1294 }
1295 
1296 int
vm_raise_msi(struct vm * vm,uint64_t msg,uint64_t addr,int bus,int slot,int func)1297 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
1298     int func)
1299 {
1300 	/* TODO: Should we raise an SError? */
1301 	return (vgic_inject_msi(vm->cookie, msg, addr));
1302 }
1303 
1304 static int
vm_handle_smccc_call(struct vcpu * vcpu,struct vm_exit * vme,bool * retu)1305 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1306 {
1307 	struct hypctx *hypctx;
1308 	int i;
1309 
1310 	hypctx = vcpu_get_cookie(vcpu);
1311 
1312 	if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0)
1313 		return (1);
1314 
1315 	vme->exitcode = VM_EXITCODE_SMCCC;
1316 	vme->u.smccc_call.func_id = hypctx->tf.tf_x[0];
1317 	for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
1318 		vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
1319 
1320 	*retu = true;
1321 	return (0);
1322 }
1323 
1324 static int
vm_handle_wfi(struct vcpu * vcpu,struct vm_exit * vme,bool * retu)1325 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1326 {
1327 	vcpu_lock(vcpu);
1328 	while (1) {
1329 		if (vgic_has_pending_irq(vcpu->cookie))
1330 			break;
1331 
1332 		if (vcpu_should_yield(vcpu))
1333 			break;
1334 
1335 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1336 		/*
1337 		 * XXX msleep_spin() cannot be interrupted by signals so
1338 		 * wake up periodically to check pending signals.
1339 		 */
1340 		msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
1341 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1342 	}
1343 	vcpu_unlock(vcpu);
1344 
1345 	*retu = false;
1346 	return (0);
1347 }
1348 
1349 static int
vm_handle_paging(struct vcpu * vcpu,bool * retu)1350 vm_handle_paging(struct vcpu *vcpu, bool *retu)
1351 {
1352 	struct vm *vm = vcpu->vm;
1353 	struct vm_exit *vme;
1354 	struct vm_map *map;
1355 	uint64_t addr, esr;
1356 	pmap_t pmap;
1357 	int ftype, rv;
1358 
1359 	vme = &vcpu->exitinfo;
1360 
1361 	pmap = vmspace_pmap(vcpu->vm->vmspace);
1362 	addr = vme->u.paging.gpa;
1363 	esr = vme->u.paging.esr;
1364 
1365 	/* The page exists, but the page table needs to be updated. */
1366 	if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
1367 		return (0);
1368 
1369 	switch (ESR_ELx_EXCEPTION(esr)) {
1370 	case EXCP_INSN_ABORT_L:
1371 	case EXCP_DATA_ABORT_L:
1372 		ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE;
1373 		break;
1374 	default:
1375 		panic("%s: Invalid exception (esr = %lx)", __func__, esr);
1376 	}
1377 
1378 	map = &vm->vmspace->vm_map;
1379 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
1380 	if (rv != KERN_SUCCESS)
1381 		return (EFAULT);
1382 
1383 	return (0);
1384 }
1385 
1386 static int
vm_handle_suspend(struct vcpu * vcpu,bool * retu)1387 vm_handle_suspend(struct vcpu *vcpu, bool *retu)
1388 {
1389 	struct vm *vm = vcpu->vm;
1390 	int error, i;
1391 	struct thread *td;
1392 
1393 	error = 0;
1394 	td = curthread;
1395 
1396 	CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus);
1397 
1398 	/*
1399 	 * Wait until all 'active_cpus' have suspended themselves.
1400 	 *
1401 	 * Since a VM may be suspended at any time including when one or
1402 	 * more vcpus are doing a rendezvous we need to call the rendezvous
1403 	 * handler while we are waiting to prevent a deadlock.
1404 	 */
1405 	vcpu_lock(vcpu);
1406 	while (error == 0) {
1407 		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0)
1408 			break;
1409 
1410 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1411 		msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1412 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1413 		if (td_ast_pending(td, TDA_SUSPEND)) {
1414 			vcpu_unlock(vcpu);
1415 			error = thread_check_susp(td, false);
1416 			vcpu_lock(vcpu);
1417 		}
1418 	}
1419 	vcpu_unlock(vcpu);
1420 
1421 	/*
1422 	 * Wakeup the other sleeping vcpus and return to userspace.
1423 	 */
1424 	for (i = 0; i < vm->maxcpus; i++) {
1425 		if (CPU_ISSET(i, &vm->suspended_cpus)) {
1426 			vcpu_notify_event(vm_vcpu(vm, i));
1427 		}
1428 	}
1429 
1430 	*retu = true;
1431 	return (error);
1432 }
1433 
1434 int
vm_run(struct vcpu * vcpu)1435 vm_run(struct vcpu *vcpu)
1436 {
1437 	struct vm *vm = vcpu->vm;
1438 	struct vm_eventinfo evinfo;
1439 	int error, vcpuid;
1440 	struct vm_exit *vme;
1441 	bool retu;
1442 	pmap_t pmap;
1443 
1444 	vcpuid = vcpu->vcpuid;
1445 
1446 	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1447 		return (EINVAL);
1448 
1449 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1450 		return (EINVAL);
1451 
1452 	pmap = vmspace_pmap(vm->vmspace);
1453 	vme = &vcpu->exitinfo;
1454 	evinfo.rptr = NULL;
1455 	evinfo.sptr = &vm->suspend;
1456 	evinfo.iptr = NULL;
1457 restart:
1458 	critical_enter();
1459 
1460 	restore_guest_fpustate(vcpu);
1461 
1462 	vcpu_require_state(vcpu, VCPU_RUNNING);
1463 	error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
1464 	vcpu_require_state(vcpu, VCPU_FROZEN);
1465 
1466 	save_guest_fpustate(vcpu);
1467 
1468 	critical_exit();
1469 
1470 	if (error == 0) {
1471 		retu = false;
1472 		switch (vme->exitcode) {
1473 		case VM_EXITCODE_INST_EMUL:
1474 			vcpu->nextpc = vme->pc + vme->inst_length;
1475 			error = vm_handle_inst_emul(vcpu, &retu);
1476 			break;
1477 
1478 		case VM_EXITCODE_REG_EMUL:
1479 			vcpu->nextpc = vme->pc + vme->inst_length;
1480 			error = vm_handle_reg_emul(vcpu, &retu);
1481 			break;
1482 
1483 		case VM_EXITCODE_HVC:
1484 			/*
1485 			 * The HVC instruction saves the address for the
1486 			 * next instruction as the return address.
1487 			 */
1488 			vcpu->nextpc = vme->pc;
1489 			/*
1490 			 * The PSCI call can change the exit information in the
1491 			 * case of suspend/reset/poweroff/cpu off/cpu on.
1492 			 */
1493 			error = vm_handle_smccc_call(vcpu, vme, &retu);
1494 			break;
1495 
1496 		case VM_EXITCODE_WFI:
1497 			vcpu->nextpc = vme->pc + vme->inst_length;
1498 			error = vm_handle_wfi(vcpu, vme, &retu);
1499 			break;
1500 
1501 		case VM_EXITCODE_PAGING:
1502 			vcpu->nextpc = vme->pc;
1503 			error = vm_handle_paging(vcpu, &retu);
1504 			break;
1505 
1506 		case VM_EXITCODE_SUSPENDED:
1507 			vcpu->nextpc = vme->pc;
1508 			error = vm_handle_suspend(vcpu, &retu);
1509 			break;
1510 
1511 		default:
1512 			/* Handle in userland */
1513 			vcpu->nextpc = vme->pc;
1514 			retu = true;
1515 			break;
1516 		}
1517 	}
1518 
1519 	if (error == 0 && retu == false)
1520 		goto restart;
1521 
1522 	return (error);
1523 }
1524