xref: /freebsd/sys/arm64/vmm/vmm.c (revision f3a7ed2047dffaebbfbb3920e993e9df424be728)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/cpuset.h>
32 #include <sys/kernel.h>
33 #include <sys/linker.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/mutex.h>
37 #include <sys/pcpu.h>
38 #include <sys/proc.h>
39 #include <sys/queue.h>
40 #include <sys/rwlock.h>
41 #include <sys/sched.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 
45 #include <vm/vm.h>
46 #include <vm/vm_object.h>
47 #include <vm/vm_page.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_map.h>
50 #include <vm/vm_extern.h>
51 #include <vm/vm_param.h>
52 
53 #include <machine/cpu.h>
54 #include <machine/fpu.h>
55 #include <machine/machdep.h>
56 #include <machine/pcb.h>
57 #include <machine/smp.h>
58 #include <machine/vm.h>
59 #include <machine/vmparam.h>
60 #include <machine/vmm.h>
61 #include <machine/vmm_instruction_emul.h>
62 
63 #include <dev/pci/pcireg.h>
64 #include <dev/vmm/vmm_dev.h>
65 #include <dev/vmm/vmm_ktr.h>
66 #include <dev/vmm/vmm_mem.h>
67 #include <dev/vmm/vmm_stat.h>
68 
69 #include "arm64.h"
70 #include "mmu.h"
71 
72 #include "io/vgic.h"
73 #include "io/vtimer.h"
74 
75 struct vcpu {
76 	int		flags;
77 	enum vcpu_state	state;
78 	struct mtx	mtx;
79 	int		hostcpu;	/* host cpuid this vcpu last ran on */
80 	int		vcpuid;
81 	void		*stats;
82 	struct vm_exit	exitinfo;
83 	uint64_t	nextpc;		/* (x) next instruction to execute */
84 	struct vm	*vm;		/* (o) */
85 	void		*cookie;	/* (i) cpu-specific data */
86 	struct vfpstate	*guestfpu;	/* (a,i) guest fpu state */
87 };
88 
89 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
90 #define	vcpu_lock_destroy(v)	mtx_destroy(&((v)->mtx))
91 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
92 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
93 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
94 
95 struct vmm_mmio_region {
96 	uint64_t start;
97 	uint64_t end;
98 	mem_region_read_t read;
99 	mem_region_write_t write;
100 };
101 #define	VM_MAX_MMIO_REGIONS	4
102 
103 struct vmm_special_reg {
104 	uint32_t	esr_iss;
105 	uint32_t	esr_mask;
106 	reg_read_t	reg_read;
107 	reg_write_t	reg_write;
108 	void		*arg;
109 };
110 #define	VM_MAX_SPECIAL_REGS	16
111 
112 /*
113  * Initialization:
114  * (o) initialized the first time the VM is created
115  * (i) initialized when VM is created and when it is reinitialized
116  * (x) initialized before use
117  */
118 struct vm {
119 	void		*cookie;		/* (i) cpu-specific data */
120 	volatile cpuset_t active_cpus;		/* (i) active vcpus */
121 	volatile cpuset_t debug_cpus;		/* (i) vcpus stopped for debug */
122 	int		suspend;		/* (i) stop VM execution */
123 	bool		dying;			/* (o) is dying */
124 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
125 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
126 	struct vm_mem	mem;			/* (i) guest memory */
127 	char		name[VM_MAX_NAMELEN + 1];	/* (o) virtual machine name */
128 	struct vcpu	**vcpu;			/* (i) guest vcpus */
129 	struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
130 						/* (o) guest MMIO regions */
131 	struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS];
132 	/* The following describe the vm cpu topology */
133 	uint16_t	sockets;		/* (o) num of sockets */
134 	uint16_t	cores;			/* (o) num of cores/socket */
135 	uint16_t	threads;		/* (o) num of threads/core */
136 	uint16_t	maxcpus;		/* (o) max pluggable cpus */
137 	struct sx	vcpus_init_lock;	/* (o) */
138 };
139 
140 static int vm_handle_wfi(struct vcpu *vcpu,
141 			 struct vm_exit *vme, bool *retu);
142 
143 static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
144 
145 /* statistics */
146 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
147 
148 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
149 
150 static int vmm_ipinum;
151 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
152     "IPI vector used for vcpu notifications");
153 
154 struct vmm_regs {
155 	uint64_t	id_aa64afr0;
156 	uint64_t	id_aa64afr1;
157 	uint64_t	id_aa64dfr0;
158 	uint64_t	id_aa64dfr1;
159 	uint64_t	id_aa64isar0;
160 	uint64_t	id_aa64isar1;
161 	uint64_t	id_aa64isar2;
162 	uint64_t	id_aa64mmfr0;
163 	uint64_t	id_aa64mmfr1;
164 	uint64_t	id_aa64mmfr2;
165 	uint64_t	id_aa64pfr0;
166 	uint64_t	id_aa64pfr1;
167 };
168 
169 static const struct vmm_regs vmm_arch_regs_masks = {
170 	.id_aa64dfr0 =
171 	    ID_AA64DFR0_CTX_CMPs_MASK |
172 	    ID_AA64DFR0_WRPs_MASK |
173 	    ID_AA64DFR0_BRPs_MASK |
174 	    ID_AA64DFR0_PMUVer_3 |
175 	    ID_AA64DFR0_DebugVer_8,
176 	.id_aa64isar0 =
177 	    ID_AA64ISAR0_TLB_TLBIOSR |
178 	    ID_AA64ISAR0_SHA3_IMPL |
179 	    ID_AA64ISAR0_RDM_IMPL |
180 	    ID_AA64ISAR0_Atomic_IMPL |
181 	    ID_AA64ISAR0_CRC32_BASE |
182 	    ID_AA64ISAR0_SHA2_512 |
183 	    ID_AA64ISAR0_SHA1_BASE |
184 	    ID_AA64ISAR0_AES_PMULL,
185 	.id_aa64mmfr0 =
186 	    ID_AA64MMFR0_TGran4_IMPL |
187 	    ID_AA64MMFR0_TGran64_IMPL |
188 	    ID_AA64MMFR0_TGran16_IMPL |
189 	    ID_AA64MMFR0_ASIDBits_16 |
190 	    ID_AA64MMFR0_PARange_4P,
191 	.id_aa64mmfr1 =
192 	    ID_AA64MMFR1_SpecSEI_IMPL |
193 	    ID_AA64MMFR1_PAN_ATS1E1 |
194 	    ID_AA64MMFR1_HAFDBS_AF,
195 	.id_aa64pfr0 =
196 	    ID_AA64PFR0_GIC_CPUIF_NONE |
197 	    ID_AA64PFR0_AdvSIMD_HP |
198 	    ID_AA64PFR0_FP_HP |
199 	    ID_AA64PFR0_EL3_64 |
200 	    ID_AA64PFR0_EL2_64 |
201 	    ID_AA64PFR0_EL1_64 |
202 	    ID_AA64PFR0_EL0_64,
203 };
204 
205 /* Host registers masked by vmm_arch_regs_masks. */
206 static struct vmm_regs vmm_arch_regs;
207 
208 static void vcpu_notify_event_locked(struct vcpu *vcpu);
209 
210 /* global statistics */
211 VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
212 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
213 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
214 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
215 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
216 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
217 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
218 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
219 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
220 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
221 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
222 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception");
223 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception");
224 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
225 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
226 
227 static int
vmm_regs_init(struct vmm_regs * regs,const struct vmm_regs * masks)228 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
229 {
230 #define	_FETCH_KERN_REG(reg, field) do {				\
231 	regs->field = vmm_arch_regs_masks.field;			\
232 	if (!get_kernel_reg_iss_masked(reg ## _ISS, &regs->field,	\
233 	    masks->field))						\
234 		regs->field = 0;					\
235 } while (0)
236 	_FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0);
237 	_FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1);
238 	_FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0);
239 	_FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1);
240 	_FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0);
241 	_FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1);
242 	_FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2);
243 	_FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0);
244 	_FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1);
245 	_FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2);
246 	_FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0);
247 	_FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1);
248 #undef _FETCH_KERN_REG
249 	return (0);
250 }
251 
252 static void
vcpu_cleanup(struct vcpu * vcpu,bool destroy)253 vcpu_cleanup(struct vcpu *vcpu, bool destroy)
254 {
255 	vmmops_vcpu_cleanup(vcpu->cookie);
256 	vcpu->cookie = NULL;
257 	if (destroy) {
258 		vmm_stat_free(vcpu->stats);
259 		fpu_save_area_free(vcpu->guestfpu);
260 		vcpu_lock_destroy(vcpu);
261 		free(vcpu, M_VMM);
262 	}
263 }
264 
265 static struct vcpu *
vcpu_alloc(struct vm * vm,int vcpu_id)266 vcpu_alloc(struct vm *vm, int vcpu_id)
267 {
268 	struct vcpu *vcpu;
269 
270 	KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
271 	    ("vcpu_alloc: invalid vcpu %d", vcpu_id));
272 
273 	vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
274 	vcpu_lock_init(vcpu);
275 	vcpu->state = VCPU_IDLE;
276 	vcpu->hostcpu = NOCPU;
277 	vcpu->vcpuid = vcpu_id;
278 	vcpu->vm = vm;
279 	vcpu->guestfpu = fpu_save_area_alloc();
280 	vcpu->stats = vmm_stat_alloc();
281 	return (vcpu);
282 }
283 
284 static void
vcpu_init(struct vcpu * vcpu)285 vcpu_init(struct vcpu *vcpu)
286 {
287 	vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
288 	MPASS(vcpu->cookie != NULL);
289 	fpu_save_area_reset(vcpu->guestfpu);
290 	vmm_stat_init(vcpu->stats);
291 }
292 
293 struct vm_exit *
vm_exitinfo(struct vcpu * vcpu)294 vm_exitinfo(struct vcpu *vcpu)
295 {
296 	return (&vcpu->exitinfo);
297 }
298 
299 static int
vmm_unsupported_quirk(void)300 vmm_unsupported_quirk(void)
301 {
302 	/*
303 	 * Known to not load on Ampere eMAG
304 	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051
305 	 */
306 	if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM,
307 	    CPU_PART_EMAG8180, 0, 0))
308 		return (ENXIO);
309 
310 	return (0);
311 }
312 
313 int
vmm_modinit(void)314 vmm_modinit(void)
315 {
316 	int error;
317 
318 	error = vmm_unsupported_quirk();
319 	if (error != 0)
320 		return (error);
321 
322 	error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
323 	if (error != 0)
324 		return (error);
325 
326 	return (vmmops_modinit(0));
327 }
328 
329 int
vmm_modcleanup(void)330 vmm_modcleanup(void)
331 {
332 	return (vmmops_modcleanup());
333 }
334 
335 static void
vm_init(struct vm * vm,bool create)336 vm_init(struct vm *vm, bool create)
337 {
338 	int i;
339 
340 	vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm)));
341 	MPASS(vm->cookie != NULL);
342 
343 	CPU_ZERO(&vm->active_cpus);
344 	CPU_ZERO(&vm->debug_cpus);
345 
346 	vm->suspend = 0;
347 	CPU_ZERO(&vm->suspended_cpus);
348 
349 	memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
350 	memset(vm->special_reg, 0, sizeof(vm->special_reg));
351 
352 	if (!create) {
353 		for (i = 0; i < vm->maxcpus; i++) {
354 			if (vm->vcpu[i] != NULL)
355 				vcpu_init(vm->vcpu[i]);
356 		}
357 	}
358 }
359 
360 void
vm_disable_vcpu_creation(struct vm * vm)361 vm_disable_vcpu_creation(struct vm *vm)
362 {
363 	sx_xlock(&vm->vcpus_init_lock);
364 	vm->dying = true;
365 	sx_xunlock(&vm->vcpus_init_lock);
366 }
367 
368 struct vcpu *
vm_alloc_vcpu(struct vm * vm,int vcpuid)369 vm_alloc_vcpu(struct vm *vm, int vcpuid)
370 {
371 	struct vcpu *vcpu;
372 
373 	if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
374 		return (NULL);
375 
376 	vcpu = (struct vcpu *)
377 	    atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
378 	if (__predict_true(vcpu != NULL))
379 		return (vcpu);
380 
381 	sx_xlock(&vm->vcpus_init_lock);
382 	vcpu = vm->vcpu[vcpuid];
383 	if (vcpu == NULL && !vm->dying) {
384 		/* Some interrupt controllers may have a CPU limit */
385 		if (vcpuid >= vgic_max_cpu_count(vm->cookie)) {
386 			sx_xunlock(&vm->vcpus_init_lock);
387 			return (NULL);
388 		}
389 
390 		vcpu = vcpu_alloc(vm, vcpuid);
391 		vcpu_init(vcpu);
392 
393 		/*
394 		 * Ensure vCPU is fully created before updating pointer
395 		 * to permit unlocked reads above.
396 		 */
397 		atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
398 		    (uintptr_t)vcpu);
399 	}
400 	sx_xunlock(&vm->vcpus_init_lock);
401 	return (vcpu);
402 }
403 
404 void
vm_lock_vcpus(struct vm * vm)405 vm_lock_vcpus(struct vm *vm)
406 {
407 	sx_xlock(&vm->vcpus_init_lock);
408 }
409 
410 void
vm_unlock_vcpus(struct vm * vm)411 vm_unlock_vcpus(struct vm *vm)
412 {
413 	sx_unlock(&vm->vcpus_init_lock);
414 }
415 
416 int
vm_create(const char * name,struct vm ** retvm)417 vm_create(const char *name, struct vm **retvm)
418 {
419 	struct vm *vm;
420 	int error;
421 
422 	vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
423 	error = vm_mem_init(&vm->mem, 0, 1ul << 39);
424 	if (error != 0) {
425 		free(vm, M_VMM);
426 		return (error);
427 	}
428 	strcpy(vm->name, name);
429 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
430 
431 	vm->sockets = 1;
432 	vm->cores = 1;			/* XXX backwards compatibility */
433 	vm->threads = 1;		/* XXX backwards compatibility */
434 	vm->maxcpus = vm_maxcpu;
435 
436 	vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
437 	    M_WAITOK | M_ZERO);
438 
439 	vm_init(vm, true);
440 
441 	*retvm = vm;
442 	return (0);
443 }
444 
445 void
vm_get_topology(struct vm * vm,uint16_t * sockets,uint16_t * cores,uint16_t * threads,uint16_t * maxcpus)446 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
447     uint16_t *threads, uint16_t *maxcpus)
448 {
449 	*sockets = vm->sockets;
450 	*cores = vm->cores;
451 	*threads = vm->threads;
452 	*maxcpus = vm->maxcpus;
453 }
454 
455 uint16_t
vm_get_maxcpus(struct vm * vm)456 vm_get_maxcpus(struct vm *vm)
457 {
458 	return (vm->maxcpus);
459 }
460 
461 int
vm_set_topology(struct vm * vm,uint16_t sockets,uint16_t cores,uint16_t threads,uint16_t maxcpus)462 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
463     uint16_t threads, uint16_t maxcpus)
464 {
465 	/* Ignore maxcpus. */
466 	if ((sockets * cores * threads) > vm->maxcpus)
467 		return (EINVAL);
468 	vm->sockets = sockets;
469 	vm->cores = cores;
470 	vm->threads = threads;
471 	return(0);
472 }
473 
474 static void
vm_cleanup(struct vm * vm,bool destroy)475 vm_cleanup(struct vm *vm, bool destroy)
476 {
477 	pmap_t pmap __diagused;
478 	int i;
479 
480 	if (destroy) {
481 		vm_xlock_memsegs(vm);
482 		pmap = vmspace_pmap(vm_vmspace(vm));
483 		sched_pin();
484 		PCPU_SET(curvmpmap, NULL);
485 		sched_unpin();
486 		CPU_FOREACH(i) {
487 			MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
488 		}
489 	} else
490 		vm_assert_memseg_xlocked(vm);
491 
492 
493 	vgic_detach_from_vm(vm->cookie);
494 
495 	for (i = 0; i < vm->maxcpus; i++) {
496 		if (vm->vcpu[i] != NULL)
497 			vcpu_cleanup(vm->vcpu[i], destroy);
498 	}
499 
500 	vmmops_cleanup(vm->cookie);
501 
502 	vm_mem_cleanup(vm);
503 	if (destroy) {
504 		vm_mem_destroy(vm);
505 
506 		free(vm->vcpu, M_VMM);
507 		sx_destroy(&vm->vcpus_init_lock);
508 	}
509 }
510 
511 void
vm_destroy(struct vm * vm)512 vm_destroy(struct vm *vm)
513 {
514 	vm_cleanup(vm, true);
515 	free(vm, M_VMM);
516 }
517 
518 int
vm_reinit(struct vm * vm)519 vm_reinit(struct vm *vm)
520 {
521 	int error;
522 
523 	/*
524 	 * A virtual machine can be reset only if all vcpus are suspended.
525 	 */
526 	if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
527 		vm_cleanup(vm, false);
528 		vm_init(vm, false);
529 		error = 0;
530 	} else {
531 		error = EBUSY;
532 	}
533 
534 	return (error);
535 }
536 
537 const char *
vm_name(struct vm * vm)538 vm_name(struct vm *vm)
539 {
540 	return (vm->name);
541 }
542 
543 int
vm_gla2gpa_nofault(struct vcpu * vcpu,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)544 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
545     uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
546 {
547 	return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault));
548 }
549 
550 static int
vmm_reg_raz(struct vcpu * vcpu,uint64_t * rval,void * arg)551 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg)
552 {
553 	*rval = 0;
554 	return (0);
555 }
556 
557 static int
vmm_reg_read_arg(struct vcpu * vcpu,uint64_t * rval,void * arg)558 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg)
559 {
560 	*rval = *(uint64_t *)arg;
561 	return (0);
562 }
563 
564 static int
vmm_reg_wi(struct vcpu * vcpu,uint64_t wval,void * arg)565 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg)
566 {
567 	return (0);
568 }
569 
570 static int
vmm_write_oslar_el1(struct vcpu * vcpu,uint64_t wval,void * arg)571 vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg)
572 {
573 	struct hypctx *hypctx;
574 
575 	hypctx = vcpu_get_cookie(vcpu);
576 	/* All other fields are RES0 & we don't do anything with this */
577 	/* TODO: Disable access to other debug state when locked */
578 	hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK;
579 	return (0);
580 }
581 
582 static int
vmm_read_oslsr_el1(struct vcpu * vcpu,uint64_t * rval,void * arg)583 vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg)
584 {
585 	struct hypctx *hypctx;
586 	uint64_t val;
587 
588 	hypctx = vcpu_get_cookie(vcpu);
589 	val = OSLSR_OSLM_1;
590 	if (hypctx->dbg_oslock)
591 		val |= OSLSR_OSLK;
592 	*rval = val;
593 
594 	return (0);
595 }
596 
597 static const struct vmm_special_reg vmm_special_regs[] = {
598 #define	SPECIAL_REG(_reg, _read, _write)				\
599 	{								\
600 		.esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) |	\
601 		    ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) |		\
602 		    ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) |		\
603 		    ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) |		\
604 		    ((_reg ## _op2) << ISS_MSR_OP2_SHIFT),		\
605 		.esr_mask = ISS_MSR_REG_MASK,				\
606 		.reg_read = (_read),					\
607 		.reg_write = (_write),					\
608 		.arg = NULL,						\
609 	}
610 #define	ID_SPECIAL_REG(_reg, _name)					\
611 	{								\
612 		.esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) |	\
613 		    ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) |		\
614 		    ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) |		\
615 		    ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) |		\
616 		    ((_reg ## _op2) << ISS_MSR_OP2_SHIFT),		\
617 		.esr_mask = ISS_MSR_REG_MASK,				\
618 		.reg_read = vmm_reg_read_arg,				\
619 		.reg_write = vmm_reg_wi,				\
620 		.arg = &(vmm_arch_regs._name),				\
621 	}
622 
623 	/* ID registers */
624 	ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
625 	ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
626 	ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
627 	ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
628 	ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
629 
630 	/*
631 	 * All other ID registers are read as zero.
632 	 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
633 	 */
634 	{
635 		.esr_iss = (3 << ISS_MSR_OP0_SHIFT) |
636 		    (0 << ISS_MSR_OP1_SHIFT) |
637 		    (0 << ISS_MSR_CRn_SHIFT) |
638 		    (0 << ISS_MSR_CRm_SHIFT),
639 		.esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK |
640 		    ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT),
641 		.reg_read = vmm_reg_raz,
642 		.reg_write = vmm_reg_wi,
643 		.arg = NULL,
644 	},
645 
646 	/* Counter physical registers */
647 	SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
648 	SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
649 	    vtimer_phys_cval_write),
650 	SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
651 	    vtimer_phys_tval_write),
652 	SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
653 
654 	/* Debug registers */
655 	SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi),
656 	SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi),
657 	/* TODO: Exceptions on invalid access */
658 	SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1),
659 	SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi),
660 #undef SPECIAL_REG
661 };
662 
663 void
vm_register_reg_handler(struct vm * vm,uint64_t iss,uint64_t mask,reg_read_t reg_read,reg_write_t reg_write,void * arg)664 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
665     reg_read_t reg_read, reg_write_t reg_write, void *arg)
666 {
667 	int i;
668 
669 	for (i = 0; i < nitems(vm->special_reg); i++) {
670 		if (vm->special_reg[i].esr_iss == 0 &&
671 		    vm->special_reg[i].esr_mask == 0) {
672 			vm->special_reg[i].esr_iss = iss;
673 			vm->special_reg[i].esr_mask = mask;
674 			vm->special_reg[i].reg_read = reg_read;
675 			vm->special_reg[i].reg_write = reg_write;
676 			vm->special_reg[i].arg = arg;
677 			return;
678 		}
679 	}
680 
681 	panic("%s: No free special register slot", __func__);
682 }
683 
684 void
vm_deregister_reg_handler(struct vm * vm,uint64_t iss,uint64_t mask)685 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask)
686 {
687 	int i;
688 
689 	for (i = 0; i < nitems(vm->special_reg); i++) {
690 		if (vm->special_reg[i].esr_iss == iss &&
691 		    vm->special_reg[i].esr_mask == mask) {
692 			memset(&vm->special_reg[i], 0,
693 			    sizeof(vm->special_reg[i]));
694 			return;
695 		}
696 	}
697 
698 	panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss,
699 	    mask);
700 }
701 
702 static int
vm_handle_reg_emul(struct vcpu * vcpu,bool * retu)703 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu)
704 {
705 	struct vm *vm;
706 	struct vm_exit *vme;
707 	struct vre *vre;
708 	int i, rv;
709 
710 	vm = vcpu->vm;
711 	vme = &vcpu->exitinfo;
712 	vre = &vme->u.reg_emul.vre;
713 
714 	for (i = 0; i < nitems(vm->special_reg); i++) {
715 		if (vm->special_reg[i].esr_iss == 0 &&
716 		    vm->special_reg[i].esr_mask == 0)
717 			continue;
718 
719 		if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) ==
720 		    vm->special_reg[i].esr_iss) {
721 			rv = vmm_emulate_register(vcpu, vre,
722 			    vm->special_reg[i].reg_read,
723 			    vm->special_reg[i].reg_write,
724 			    vm->special_reg[i].arg);
725 			if (rv == 0) {
726 				*retu = false;
727 			}
728 			return (rv);
729 		}
730 	}
731 	for (i = 0; i < nitems(vmm_special_regs); i++) {
732 		if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
733 		    vmm_special_regs[i].esr_iss) {
734 			rv = vmm_emulate_register(vcpu, vre,
735 			    vmm_special_regs[i].reg_read,
736 			    vmm_special_regs[i].reg_write,
737 			    vmm_special_regs[i].arg);
738 			if (rv == 0) {
739 				*retu = false;
740 			}
741 			return (rv);
742 		}
743 	}
744 
745 
746 	*retu = true;
747 	return (0);
748 }
749 
750 void
vm_register_inst_handler(struct vm * vm,uint64_t start,uint64_t size,mem_region_read_t mmio_read,mem_region_write_t mmio_write)751 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
752     mem_region_read_t mmio_read, mem_region_write_t mmio_write)
753 {
754 	int i;
755 
756 	for (i = 0; i < nitems(vm->mmio_region); i++) {
757 		if (vm->mmio_region[i].start == 0 &&
758 		    vm->mmio_region[i].end == 0) {
759 			vm->mmio_region[i].start = start;
760 			vm->mmio_region[i].end = start + size;
761 			vm->mmio_region[i].read = mmio_read;
762 			vm->mmio_region[i].write = mmio_write;
763 			return;
764 		}
765 	}
766 
767 	panic("%s: No free MMIO region", __func__);
768 }
769 
770 void
vm_deregister_inst_handler(struct vm * vm,uint64_t start,uint64_t size)771 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
772 {
773 	int i;
774 
775 	for (i = 0; i < nitems(vm->mmio_region); i++) {
776 		if (vm->mmio_region[i].start == start &&
777 		    vm->mmio_region[i].end == start + size) {
778 			memset(&vm->mmio_region[i], 0,
779 			    sizeof(vm->mmio_region[i]));
780 			return;
781 		}
782 	}
783 
784 	panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
785 	    start + size);
786 }
787 
788 static int
vm_handle_inst_emul(struct vcpu * vcpu,bool * retu)789 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
790 {
791 	struct vm *vm;
792 	struct vm_exit *vme;
793 	struct vie *vie;
794 	struct hyp *hyp;
795 	uint64_t fault_ipa;
796 	struct vm_guest_paging *paging;
797 	struct vmm_mmio_region *vmr;
798 	int error, i;
799 
800 	vm = vcpu->vm;
801 	hyp = vm->cookie;
802 	if (!hyp->vgic_attached)
803 		goto out_user;
804 
805 	vme = &vcpu->exitinfo;
806 	vie = &vme->u.inst_emul.vie;
807 	paging = &vme->u.inst_emul.paging;
808 
809 	fault_ipa = vme->u.inst_emul.gpa;
810 
811 	vmr = NULL;
812 	for (i = 0; i < nitems(vm->mmio_region); i++) {
813 		if (vm->mmio_region[i].start <= fault_ipa &&
814 		    vm->mmio_region[i].end > fault_ipa) {
815 			vmr = &vm->mmio_region[i];
816 			break;
817 		}
818 	}
819 	if (vmr == NULL)
820 		goto out_user;
821 
822 	error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
823 	    vmr->read, vmr->write, retu);
824 	return (error);
825 
826 out_user:
827 	*retu = true;
828 	return (0);
829 }
830 
831 int
vm_suspend(struct vm * vm,enum vm_suspend_how how)832 vm_suspend(struct vm *vm, enum vm_suspend_how how)
833 {
834 	int i;
835 
836 	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
837 		return (EINVAL);
838 
839 	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
840 		VM_CTR2(vm, "virtual machine already suspended %d/%d",
841 		    vm->suspend, how);
842 		return (EALREADY);
843 	}
844 
845 	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
846 
847 	/*
848 	 * Notify all active vcpus that they are now suspended.
849 	 */
850 	for (i = 0; i < vm->maxcpus; i++) {
851 		if (CPU_ISSET(i, &vm->active_cpus))
852 			vcpu_notify_event(vm_vcpu(vm, i));
853 	}
854 
855 	return (0);
856 }
857 
858 void
vm_exit_suspended(struct vcpu * vcpu,uint64_t pc)859 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
860 {
861 	struct vm *vm = vcpu->vm;
862 	struct vm_exit *vmexit;
863 
864 	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
865 	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
866 
867 	vmexit = vm_exitinfo(vcpu);
868 	vmexit->pc = pc;
869 	vmexit->inst_length = 4;
870 	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
871 	vmexit->u.suspended.how = vm->suspend;
872 }
873 
874 void
vm_exit_debug(struct vcpu * vcpu,uint64_t pc)875 vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
876 {
877 	struct vm_exit *vmexit;
878 
879 	vmexit = vm_exitinfo(vcpu);
880 	vmexit->pc = pc;
881 	vmexit->inst_length = 4;
882 	vmexit->exitcode = VM_EXITCODE_DEBUG;
883 }
884 
885 int
vm_activate_cpu(struct vcpu * vcpu)886 vm_activate_cpu(struct vcpu *vcpu)
887 {
888 	struct vm *vm = vcpu->vm;
889 
890 	if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
891 		return (EBUSY);
892 
893 	CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
894 	return (0);
895 
896 }
897 
898 int
vm_suspend_cpu(struct vm * vm,struct vcpu * vcpu)899 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
900 {
901 	if (vcpu == NULL) {
902 		vm->debug_cpus = vm->active_cpus;
903 		for (int i = 0; i < vm->maxcpus; i++) {
904 			if (CPU_ISSET(i, &vm->active_cpus))
905 				vcpu_notify_event(vm_vcpu(vm, i));
906 		}
907 	} else {
908 		if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
909 			return (EINVAL);
910 
911 		CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
912 		vcpu_notify_event(vcpu);
913 	}
914 	return (0);
915 }
916 
917 int
vm_resume_cpu(struct vm * vm,struct vcpu * vcpu)918 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
919 {
920 
921 	if (vcpu == NULL) {
922 		CPU_ZERO(&vm->debug_cpus);
923 	} else {
924 		if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
925 			return (EINVAL);
926 
927 		CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
928 	}
929 	return (0);
930 }
931 
932 int
vcpu_debugged(struct vcpu * vcpu)933 vcpu_debugged(struct vcpu *vcpu)
934 {
935 
936 	return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
937 }
938 
939 cpuset_t
vm_active_cpus(struct vm * vm)940 vm_active_cpus(struct vm *vm)
941 {
942 
943 	return (vm->active_cpus);
944 }
945 
946 cpuset_t
vm_debug_cpus(struct vm * vm)947 vm_debug_cpus(struct vm *vm)
948 {
949 
950 	return (vm->debug_cpus);
951 }
952 
953 cpuset_t
vm_suspended_cpus(struct vm * vm)954 vm_suspended_cpus(struct vm *vm)
955 {
956 
957 	return (vm->suspended_cpus);
958 }
959 
960 
961 void *
vcpu_stats(struct vcpu * vcpu)962 vcpu_stats(struct vcpu *vcpu)
963 {
964 
965 	return (vcpu->stats);
966 }
967 
968 /*
969  * This function is called to ensure that a vcpu "sees" a pending event
970  * as soon as possible:
971  * - If the vcpu thread is sleeping then it is woken up.
972  * - If the vcpu is running on a different host_cpu then an IPI will be directed
973  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
974  */
975 static void
vcpu_notify_event_locked(struct vcpu * vcpu)976 vcpu_notify_event_locked(struct vcpu *vcpu)
977 {
978 	int hostcpu;
979 
980 	hostcpu = vcpu->hostcpu;
981 	if (vcpu->state == VCPU_RUNNING) {
982 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
983 		if (hostcpu != curcpu) {
984 			ipi_cpu(hostcpu, vmm_ipinum);
985 		} else {
986 			/*
987 			 * If the 'vcpu' is running on 'curcpu' then it must
988 			 * be sending a notification to itself (e.g. SELF_IPI).
989 			 * The pending event will be picked up when the vcpu
990 			 * transitions back to guest context.
991 			 */
992 		}
993 	} else {
994 		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
995 		    "with hostcpu %d", vcpu->state, hostcpu));
996 		if (vcpu->state == VCPU_SLEEPING)
997 			wakeup_one(vcpu);
998 	}
999 }
1000 
1001 void
vcpu_notify_event(struct vcpu * vcpu)1002 vcpu_notify_event(struct vcpu *vcpu)
1003 {
1004 	vcpu_lock(vcpu);
1005 	vcpu_notify_event_locked(vcpu);
1006 	vcpu_unlock(vcpu);
1007 }
1008 
1009 struct vm_mem *
vm_mem(struct vm * vm)1010 vm_mem(struct vm *vm)
1011 {
1012 	return (&vm->mem);
1013 }
1014 
1015 static void
restore_guest_fpustate(struct vcpu * vcpu)1016 restore_guest_fpustate(struct vcpu *vcpu)
1017 {
1018 
1019 	/* flush host state to the pcb */
1020 	vfp_save_state(curthread, curthread->td_pcb);
1021 	/* Ensure the VFP state will be re-loaded when exiting the guest */
1022 	PCPU_SET(fpcurthread, NULL);
1023 
1024 	/* restore guest FPU state */
1025 	vfp_enable();
1026 	vfp_restore(vcpu->guestfpu);
1027 
1028 	/*
1029 	 * The FPU is now "dirty" with the guest's state so turn on emulation
1030 	 * to trap any access to the FPU by the host.
1031 	 */
1032 	vfp_disable();
1033 }
1034 
1035 static void
save_guest_fpustate(struct vcpu * vcpu)1036 save_guest_fpustate(struct vcpu *vcpu)
1037 {
1038 	if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) !=
1039 	    CPACR_FPEN_TRAP_ALL1)
1040 		panic("VFP not enabled in host!");
1041 
1042 	/* save guest FPU state */
1043 	vfp_enable();
1044 	vfp_store(vcpu->guestfpu);
1045 	vfp_disable();
1046 
1047 	KASSERT(PCPU_GET(fpcurthread) == NULL,
1048 	    ("%s: fpcurthread set with guest registers", __func__));
1049 }
1050 static int
vcpu_set_state_locked(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)1051 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
1052     bool from_idle)
1053 {
1054 	int error;
1055 
1056 	vcpu_assert_locked(vcpu);
1057 
1058 	/*
1059 	 * State transitions from the vmmdev_ioctl() must always begin from
1060 	 * the VCPU_IDLE state. This guarantees that there is only a single
1061 	 * ioctl() operating on a vcpu at any point.
1062 	 */
1063 	if (from_idle) {
1064 		while (vcpu->state != VCPU_IDLE) {
1065 			vcpu_notify_event_locked(vcpu);
1066 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
1067 		}
1068 	} else {
1069 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
1070 		    "vcpu idle state"));
1071 	}
1072 
1073 	if (vcpu->state == VCPU_RUNNING) {
1074 		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
1075 		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
1076 	} else {
1077 		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
1078 		    "vcpu that is not running", vcpu->hostcpu));
1079 	}
1080 
1081 	/*
1082 	 * The following state transitions are allowed:
1083 	 * IDLE -> FROZEN -> IDLE
1084 	 * FROZEN -> RUNNING -> FROZEN
1085 	 * FROZEN -> SLEEPING -> FROZEN
1086 	 */
1087 	switch (vcpu->state) {
1088 	case VCPU_IDLE:
1089 	case VCPU_RUNNING:
1090 	case VCPU_SLEEPING:
1091 		error = (newstate != VCPU_FROZEN);
1092 		break;
1093 	case VCPU_FROZEN:
1094 		error = (newstate == VCPU_FROZEN);
1095 		break;
1096 	default:
1097 		error = 1;
1098 		break;
1099 	}
1100 
1101 	if (error)
1102 		return (EBUSY);
1103 
1104 	vcpu->state = newstate;
1105 	if (newstate == VCPU_RUNNING)
1106 		vcpu->hostcpu = curcpu;
1107 	else
1108 		vcpu->hostcpu = NOCPU;
1109 
1110 	if (newstate == VCPU_IDLE)
1111 		wakeup(&vcpu->state);
1112 
1113 	return (0);
1114 }
1115 
1116 static void
vcpu_require_state(struct vcpu * vcpu,enum vcpu_state newstate)1117 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
1118 {
1119 	int error;
1120 
1121 	if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
1122 		panic("Error %d setting state to %d\n", error, newstate);
1123 }
1124 
1125 static void
vcpu_require_state_locked(struct vcpu * vcpu,enum vcpu_state newstate)1126 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
1127 {
1128 	int error;
1129 
1130 	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
1131 		panic("Error %d setting state to %d", error, newstate);
1132 }
1133 
1134 int
vm_get_capability(struct vcpu * vcpu,int type,int * retval)1135 vm_get_capability(struct vcpu *vcpu, int type, int *retval)
1136 {
1137 	if (type < 0 || type >= VM_CAP_MAX)
1138 		return (EINVAL);
1139 
1140 	return (vmmops_getcap(vcpu->cookie, type, retval));
1141 }
1142 
1143 int
vm_set_capability(struct vcpu * vcpu,int type,int val)1144 vm_set_capability(struct vcpu *vcpu, int type, int val)
1145 {
1146 	if (type < 0 || type >= VM_CAP_MAX)
1147 		return (EINVAL);
1148 
1149 	return (vmmops_setcap(vcpu->cookie, type, val));
1150 }
1151 
1152 struct vm *
vcpu_vm(struct vcpu * vcpu)1153 vcpu_vm(struct vcpu *vcpu)
1154 {
1155 	return (vcpu->vm);
1156 }
1157 
1158 int
vcpu_vcpuid(struct vcpu * vcpu)1159 vcpu_vcpuid(struct vcpu *vcpu)
1160 {
1161 	return (vcpu->vcpuid);
1162 }
1163 
1164 void *
vcpu_get_cookie(struct vcpu * vcpu)1165 vcpu_get_cookie(struct vcpu *vcpu)
1166 {
1167 	return (vcpu->cookie);
1168 }
1169 
1170 struct vcpu *
vm_vcpu(struct vm * vm,int vcpuid)1171 vm_vcpu(struct vm *vm, int vcpuid)
1172 {
1173 	return (vm->vcpu[vcpuid]);
1174 }
1175 
1176 int
vcpu_set_state(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)1177 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
1178 {
1179 	int error;
1180 
1181 	vcpu_lock(vcpu);
1182 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1183 	vcpu_unlock(vcpu);
1184 
1185 	return (error);
1186 }
1187 
1188 enum vcpu_state
vcpu_get_state(struct vcpu * vcpu,int * hostcpu)1189 vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
1190 {
1191 	enum vcpu_state state;
1192 
1193 	vcpu_lock(vcpu);
1194 	state = vcpu->state;
1195 	if (hostcpu != NULL)
1196 		*hostcpu = vcpu->hostcpu;
1197 	vcpu_unlock(vcpu);
1198 
1199 	return (state);
1200 }
1201 
1202 int
vm_get_register(struct vcpu * vcpu,int reg,uint64_t * retval)1203 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
1204 {
1205 	if (reg < 0 || reg >= VM_REG_LAST)
1206 		return (EINVAL);
1207 
1208 	return (vmmops_getreg(vcpu->cookie, reg, retval));
1209 }
1210 
1211 int
vm_set_register(struct vcpu * vcpu,int reg,uint64_t val)1212 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
1213 {
1214 	int error;
1215 
1216 	if (reg < 0 || reg >= VM_REG_LAST)
1217 		return (EINVAL);
1218 	error = vmmops_setreg(vcpu->cookie, reg, val);
1219 	if (error || reg != VM_REG_GUEST_PC)
1220 		return (error);
1221 
1222 	vcpu->nextpc = val;
1223 
1224 	return (0);
1225 }
1226 
1227 void *
vm_get_cookie(struct vm * vm)1228 vm_get_cookie(struct vm *vm)
1229 {
1230 	return (vm->cookie);
1231 }
1232 
1233 int
vm_inject_exception(struct vcpu * vcpu,uint64_t esr,uint64_t far)1234 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far)
1235 {
1236 	return (vmmops_exception(vcpu->cookie, esr, far));
1237 }
1238 
1239 int
vm_attach_vgic(struct vm * vm,struct vm_vgic_descr * descr)1240 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr)
1241 {
1242 	return (vgic_attach_to_vm(vm->cookie, descr));
1243 }
1244 
1245 int
vm_assert_irq(struct vm * vm,uint32_t irq)1246 vm_assert_irq(struct vm *vm, uint32_t irq)
1247 {
1248 	return (vgic_inject_irq(vm->cookie, -1, irq, true));
1249 }
1250 
1251 int
vm_deassert_irq(struct vm * vm,uint32_t irq)1252 vm_deassert_irq(struct vm *vm, uint32_t irq)
1253 {
1254 	return (vgic_inject_irq(vm->cookie, -1, irq, false));
1255 }
1256 
1257 int
vm_raise_msi(struct vm * vm,uint64_t msg,uint64_t addr,int bus,int slot,int func)1258 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
1259     int func)
1260 {
1261 	/* TODO: Should we raise an SError? */
1262 	return (vgic_inject_msi(vm->cookie, msg, addr));
1263 }
1264 
1265 static int
vm_handle_smccc_call(struct vcpu * vcpu,struct vm_exit * vme,bool * retu)1266 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1267 {
1268 	struct hypctx *hypctx;
1269 	int i;
1270 
1271 	hypctx = vcpu_get_cookie(vcpu);
1272 
1273 	if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0)
1274 		return (1);
1275 
1276 	vme->exitcode = VM_EXITCODE_SMCCC;
1277 	vme->u.smccc_call.func_id = hypctx->tf.tf_x[0];
1278 	for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
1279 		vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
1280 
1281 	*retu = true;
1282 	return (0);
1283 }
1284 
1285 static int
vm_handle_wfi(struct vcpu * vcpu,struct vm_exit * vme,bool * retu)1286 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1287 {
1288 	struct vm *vm;
1289 
1290 	vm = vcpu->vm;
1291 	vcpu_lock(vcpu);
1292 	while (1) {
1293 		if (vm->suspend)
1294 			break;
1295 
1296 		if (vgic_has_pending_irq(vcpu->cookie))
1297 			break;
1298 
1299 		if (vcpu_should_yield(vcpu))
1300 			break;
1301 
1302 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1303 		/*
1304 		 * XXX msleep_spin() cannot be interrupted by signals so
1305 		 * wake up periodically to check pending signals.
1306 		 */
1307 		msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
1308 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1309 	}
1310 	vcpu_unlock(vcpu);
1311 
1312 	*retu = false;
1313 	return (0);
1314 }
1315 
1316 static int
vm_handle_paging(struct vcpu * vcpu,bool * retu)1317 vm_handle_paging(struct vcpu *vcpu, bool *retu)
1318 {
1319 	struct vm *vm = vcpu->vm;
1320 	struct vm_exit *vme;
1321 	struct vm_map *map;
1322 	uint64_t addr, esr;
1323 	pmap_t pmap;
1324 	int ftype, rv;
1325 
1326 	vme = &vcpu->exitinfo;
1327 
1328 	pmap = vmspace_pmap(vm_vmspace(vcpu->vm));
1329 	addr = vme->u.paging.gpa;
1330 	esr = vme->u.paging.esr;
1331 
1332 	/* The page exists, but the page table needs to be updated. */
1333 	if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
1334 		return (0);
1335 
1336 	switch (ESR_ELx_EXCEPTION(esr)) {
1337 	case EXCP_INSN_ABORT_L:
1338 	case EXCP_DATA_ABORT_L:
1339 		ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE;
1340 		break;
1341 	default:
1342 		panic("%s: Invalid exception (esr = %lx)", __func__, esr);
1343 	}
1344 
1345 	map = &vm_vmspace(vm)->vm_map;
1346 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
1347 	if (rv != KERN_SUCCESS)
1348 		return (EFAULT);
1349 
1350 	return (0);
1351 }
1352 
1353 static int
vm_handle_suspend(struct vcpu * vcpu,bool * retu)1354 vm_handle_suspend(struct vcpu *vcpu, bool *retu)
1355 {
1356 	struct vm *vm = vcpu->vm;
1357 	int error, i;
1358 	struct thread *td;
1359 
1360 	error = 0;
1361 	td = curthread;
1362 
1363 	CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus);
1364 
1365 	/*
1366 	 * Wait until all 'active_cpus' have suspended themselves.
1367 	 *
1368 	 * Since a VM may be suspended at any time including when one or
1369 	 * more vcpus are doing a rendezvous we need to call the rendezvous
1370 	 * handler while we are waiting to prevent a deadlock.
1371 	 */
1372 	vcpu_lock(vcpu);
1373 	while (error == 0) {
1374 		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0)
1375 			break;
1376 
1377 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1378 		msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1379 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1380 		if (td_ast_pending(td, TDA_SUSPEND)) {
1381 			vcpu_unlock(vcpu);
1382 			error = thread_check_susp(td, false);
1383 			vcpu_lock(vcpu);
1384 		}
1385 	}
1386 	vcpu_unlock(vcpu);
1387 
1388 	/*
1389 	 * Wakeup the other sleeping vcpus and return to userspace.
1390 	 */
1391 	for (i = 0; i < vm->maxcpus; i++) {
1392 		if (CPU_ISSET(i, &vm->suspended_cpus)) {
1393 			vcpu_notify_event(vm_vcpu(vm, i));
1394 		}
1395 	}
1396 
1397 	*retu = true;
1398 	return (error);
1399 }
1400 
1401 int
vm_run(struct vcpu * vcpu)1402 vm_run(struct vcpu *vcpu)
1403 {
1404 	struct vm *vm = vcpu->vm;
1405 	struct vm_eventinfo evinfo;
1406 	int error, vcpuid;
1407 	struct vm_exit *vme;
1408 	bool retu;
1409 	pmap_t pmap;
1410 
1411 	vcpuid = vcpu->vcpuid;
1412 
1413 	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1414 		return (EINVAL);
1415 
1416 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1417 		return (EINVAL);
1418 
1419 	pmap = vmspace_pmap(vm_vmspace(vm));
1420 	vme = &vcpu->exitinfo;
1421 	evinfo.rptr = NULL;
1422 	evinfo.sptr = &vm->suspend;
1423 	evinfo.iptr = NULL;
1424 restart:
1425 	critical_enter();
1426 
1427 	restore_guest_fpustate(vcpu);
1428 
1429 	vcpu_require_state(vcpu, VCPU_RUNNING);
1430 	error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
1431 	vcpu_require_state(vcpu, VCPU_FROZEN);
1432 
1433 	save_guest_fpustate(vcpu);
1434 
1435 	critical_exit();
1436 
1437 	if (error == 0) {
1438 		retu = false;
1439 		switch (vme->exitcode) {
1440 		case VM_EXITCODE_INST_EMUL:
1441 			vcpu->nextpc = vme->pc + vme->inst_length;
1442 			error = vm_handle_inst_emul(vcpu, &retu);
1443 			break;
1444 
1445 		case VM_EXITCODE_REG_EMUL:
1446 			vcpu->nextpc = vme->pc + vme->inst_length;
1447 			error = vm_handle_reg_emul(vcpu, &retu);
1448 			break;
1449 
1450 		case VM_EXITCODE_HVC:
1451 			/*
1452 			 * The HVC instruction saves the address for the
1453 			 * next instruction as the return address.
1454 			 */
1455 			vcpu->nextpc = vme->pc;
1456 			/*
1457 			 * The PSCI call can change the exit information in the
1458 			 * case of suspend/reset/poweroff/cpu off/cpu on.
1459 			 */
1460 			error = vm_handle_smccc_call(vcpu, vme, &retu);
1461 			break;
1462 
1463 		case VM_EXITCODE_WFI:
1464 			vcpu->nextpc = vme->pc + vme->inst_length;
1465 			error = vm_handle_wfi(vcpu, vme, &retu);
1466 			break;
1467 
1468 		case VM_EXITCODE_PAGING:
1469 			vcpu->nextpc = vme->pc;
1470 			error = vm_handle_paging(vcpu, &retu);
1471 			break;
1472 
1473 		case VM_EXITCODE_SUSPENDED:
1474 			vcpu->nextpc = vme->pc;
1475 			error = vm_handle_suspend(vcpu, &retu);
1476 			break;
1477 
1478 		default:
1479 			/* Handle in userland */
1480 			vcpu->nextpc = vme->pc;
1481 			retu = true;
1482 			break;
1483 		}
1484 	}
1485 
1486 	if (error == 0 && retu == false)
1487 		goto restart;
1488 
1489 	return (error);
1490 }
1491