1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/cpuset.h>
32 #include <sys/kernel.h>
33 #include <sys/linker.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/mutex.h>
38 #include <sys/pcpu.h>
39 #include <sys/proc.h>
40 #include <sys/queue.h>
41 #include <sys/rwlock.h>
42 #include <sys/sched.h>
43 #include <sys/smp.h>
44 #include <sys/sysctl.h>
45
46 #include <vm/vm.h>
47 #include <vm/vm_object.h>
48 #include <vm/vm_page.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_map.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_param.h>
53
54 #include <machine/armreg.h>
55 #include <machine/cpu.h>
56 #include <machine/fpu.h>
57 #include <machine/machdep.h>
58 #include <machine/pcb.h>
59 #include <machine/smp.h>
60 #include <machine/vm.h>
61 #include <machine/vmparam.h>
62 #include <machine/vmm.h>
63 #include <machine/vmm_instruction_emul.h>
64
65 #include <dev/pci/pcireg.h>
66 #include <dev/vmm/vmm_dev.h>
67 #include <dev/vmm/vmm_ktr.h>
68 #include <dev/vmm/vmm_stat.h>
69
70 #include "arm64.h"
71 #include "mmu.h"
72
73 #include "io/vgic.h"
74 #include "io/vtimer.h"
75
76 struct vcpu {
77 int flags;
78 enum vcpu_state state;
79 struct mtx mtx;
80 int hostcpu; /* host cpuid this vcpu last ran on */
81 int vcpuid;
82 void *stats;
83 struct vm_exit exitinfo;
84 uint64_t nextpc; /* (x) next instruction to execute */
85 struct vm *vm; /* (o) */
86 void *cookie; /* (i) cpu-specific data */
87 struct vfpstate *guestfpu; /* (a,i) guest fpu state */
88 };
89
90 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
91 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
92 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
93 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
94 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
95 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
96
97 struct mem_seg {
98 uint64_t gpa;
99 size_t len;
100 bool wired;
101 bool sysmem;
102 vm_object_t object;
103 };
104 #define VM_MAX_MEMSEGS 3
105
106 struct mem_map {
107 vm_paddr_t gpa;
108 size_t len;
109 vm_ooffset_t segoff;
110 int segid;
111 int prot;
112 int flags;
113 };
114 #define VM_MAX_MEMMAPS 4
115
116 struct vmm_mmio_region {
117 uint64_t start;
118 uint64_t end;
119 mem_region_read_t read;
120 mem_region_write_t write;
121 };
122 #define VM_MAX_MMIO_REGIONS 4
123
124 struct vmm_special_reg {
125 uint32_t esr_iss;
126 uint32_t esr_mask;
127 reg_read_t reg_read;
128 reg_write_t reg_write;
129 void *arg;
130 };
131 #define VM_MAX_SPECIAL_REGS 16
132
133 /*
134 * Initialization:
135 * (o) initialized the first time the VM is created
136 * (i) initialized when VM is created and when it is reinitialized
137 * (x) initialized before use
138 */
139 struct vm {
140 void *cookie; /* (i) cpu-specific data */
141 volatile cpuset_t active_cpus; /* (i) active vcpus */
142 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
143 int suspend; /* (i) stop VM execution */
144 bool dying; /* (o) is dying */
145 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
146 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
147 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
148 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
149 struct vmspace *vmspace; /* (o) guest's address space */
150 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
151 struct vcpu **vcpu; /* (i) guest vcpus */
152 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
153 /* (o) guest MMIO regions */
154 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS];
155 /* The following describe the vm cpu topology */
156 uint16_t sockets; /* (o) num of sockets */
157 uint16_t cores; /* (o) num of cores/socket */
158 uint16_t threads; /* (o) num of threads/core */
159 uint16_t maxcpus; /* (o) max pluggable cpus */
160 struct sx mem_segs_lock; /* (o) */
161 struct sx vcpus_init_lock; /* (o) */
162 };
163
164 static bool vmm_initialized = false;
165
166 static int vm_handle_wfi(struct vcpu *vcpu,
167 struct vm_exit *vme, bool *retu);
168
169 static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
170
171 /* statistics */
172 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
173
174 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
175
176 static int vmm_ipinum;
177 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
178 "IPI vector used for vcpu notifications");
179
180 struct vmm_regs {
181 uint64_t id_aa64afr0;
182 uint64_t id_aa64afr1;
183 uint64_t id_aa64dfr0;
184 uint64_t id_aa64dfr1;
185 uint64_t id_aa64isar0;
186 uint64_t id_aa64isar1;
187 uint64_t id_aa64isar2;
188 uint64_t id_aa64mmfr0;
189 uint64_t id_aa64mmfr1;
190 uint64_t id_aa64mmfr2;
191 uint64_t id_aa64pfr0;
192 uint64_t id_aa64pfr1;
193 };
194
195 static const struct vmm_regs vmm_arch_regs_masks = {
196 .id_aa64dfr0 =
197 ID_AA64DFR0_CTX_CMPs_MASK |
198 ID_AA64DFR0_WRPs_MASK |
199 ID_AA64DFR0_BRPs_MASK |
200 ID_AA64DFR0_PMUVer_3 |
201 ID_AA64DFR0_DebugVer_8,
202 .id_aa64isar0 =
203 ID_AA64ISAR0_TLB_TLBIOSR |
204 ID_AA64ISAR0_SHA3_IMPL |
205 ID_AA64ISAR0_RDM_IMPL |
206 ID_AA64ISAR0_Atomic_IMPL |
207 ID_AA64ISAR0_CRC32_BASE |
208 ID_AA64ISAR0_SHA2_512 |
209 ID_AA64ISAR0_SHA1_BASE |
210 ID_AA64ISAR0_AES_PMULL,
211 .id_aa64mmfr0 =
212 ID_AA64MMFR0_TGran4_IMPL |
213 ID_AA64MMFR0_TGran64_IMPL |
214 ID_AA64MMFR0_TGran16_IMPL |
215 ID_AA64MMFR0_ASIDBits_16 |
216 ID_AA64MMFR0_PARange_4P,
217 .id_aa64mmfr1 =
218 ID_AA64MMFR1_SpecSEI_IMPL |
219 ID_AA64MMFR1_PAN_ATS1E1 |
220 ID_AA64MMFR1_HAFDBS_AF,
221 .id_aa64pfr0 =
222 ID_AA64PFR0_GIC_CPUIF_NONE |
223 ID_AA64PFR0_AdvSIMD_HP |
224 ID_AA64PFR0_FP_HP |
225 ID_AA64PFR0_EL3_64 |
226 ID_AA64PFR0_EL2_64 |
227 ID_AA64PFR0_EL1_64 |
228 ID_AA64PFR0_EL0_64,
229 };
230
231 /* Host registers masked by vmm_arch_regs_masks. */
232 static struct vmm_regs vmm_arch_regs;
233
234 u_int vm_maxcpu;
235 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
236 &vm_maxcpu, 0, "Maximum number of vCPUs");
237
238 static void vm_free_memmap(struct vm *vm, int ident);
239 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
240 static void vcpu_notify_event_locked(struct vcpu *vcpu);
241
242 /* global statistics */
243 VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
244 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
245 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
246 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
247 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
248 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
249 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
250 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
251 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
252 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
253 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
254 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception");
255 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception");
256 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
257 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
258
259 /*
260 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
261 * is a safe value for now.
262 */
263 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
264
265 static int
vmm_regs_init(struct vmm_regs * regs,const struct vmm_regs * masks)266 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
267 {
268 #define _FETCH_KERN_REG(reg, field) do { \
269 regs->field = vmm_arch_regs_masks.field; \
270 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \
271 regs->field = 0; \
272 } while (0)
273 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0);
274 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1);
275 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0);
276 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1);
277 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0);
278 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1);
279 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2);
280 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0);
281 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1);
282 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2);
283 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0);
284 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1);
285 #undef _FETCH_KERN_REG
286 return (0);
287 }
288
289 static void
vcpu_cleanup(struct vcpu * vcpu,bool destroy)290 vcpu_cleanup(struct vcpu *vcpu, bool destroy)
291 {
292 vmmops_vcpu_cleanup(vcpu->cookie);
293 vcpu->cookie = NULL;
294 if (destroy) {
295 vmm_stat_free(vcpu->stats);
296 fpu_save_area_free(vcpu->guestfpu);
297 vcpu_lock_destroy(vcpu);
298 }
299 }
300
301 static struct vcpu *
vcpu_alloc(struct vm * vm,int vcpu_id)302 vcpu_alloc(struct vm *vm, int vcpu_id)
303 {
304 struct vcpu *vcpu;
305
306 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
307 ("vcpu_alloc: invalid vcpu %d", vcpu_id));
308
309 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
310 vcpu_lock_init(vcpu);
311 vcpu->state = VCPU_IDLE;
312 vcpu->hostcpu = NOCPU;
313 vcpu->vcpuid = vcpu_id;
314 vcpu->vm = vm;
315 vcpu->guestfpu = fpu_save_area_alloc();
316 vcpu->stats = vmm_stat_alloc();
317 return (vcpu);
318 }
319
320 static void
vcpu_init(struct vcpu * vcpu)321 vcpu_init(struct vcpu *vcpu)
322 {
323 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
324 MPASS(vcpu->cookie != NULL);
325 fpu_save_area_reset(vcpu->guestfpu);
326 vmm_stat_init(vcpu->stats);
327 }
328
329 struct vm_exit *
vm_exitinfo(struct vcpu * vcpu)330 vm_exitinfo(struct vcpu *vcpu)
331 {
332 return (&vcpu->exitinfo);
333 }
334
335 static int
vmm_init(void)336 vmm_init(void)
337 {
338 int error;
339
340 vm_maxcpu = mp_ncpus;
341 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
342
343 if (vm_maxcpu > VM_MAXCPU) {
344 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
345 vm_maxcpu = VM_MAXCPU;
346 }
347 if (vm_maxcpu == 0)
348 vm_maxcpu = 1;
349
350 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
351 if (error != 0)
352 return (error);
353
354 return (vmmops_modinit(0));
355 }
356
357 static int
vmm_handler(module_t mod,int what,void * arg)358 vmm_handler(module_t mod, int what, void *arg)
359 {
360 int error;
361
362 switch (what) {
363 case MOD_LOAD:
364 /* TODO: if (vmm_is_hw_supported()) { */
365 error = vmmdev_init();
366 if (error != 0)
367 break;
368 error = vmm_init();
369 if (error == 0)
370 vmm_initialized = true;
371 break;
372 case MOD_UNLOAD:
373 /* TODO: if (vmm_is_hw_supported()) { */
374 error = vmmdev_cleanup();
375 if (error == 0 && vmm_initialized) {
376 error = vmmops_modcleanup();
377 if (error)
378 vmm_initialized = false;
379 }
380 break;
381 default:
382 error = 0;
383 break;
384 }
385 return (error);
386 }
387
388 static moduledata_t vmm_kmod = {
389 "vmm",
390 vmm_handler,
391 NULL
392 };
393
394 /*
395 * vmm initialization has the following dependencies:
396 *
397 * - HYP initialization requires smp_rendezvous() and therefore must happen
398 * after SMP is fully functional (after SI_SUB_SMP).
399 * - vmm device initialization requires an initialized devfs.
400 */
401 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
402 MODULE_VERSION(vmm, 1);
403
404 static void
vm_init(struct vm * vm,bool create)405 vm_init(struct vm *vm, bool create)
406 {
407 int i;
408
409 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
410 MPASS(vm->cookie != NULL);
411
412 CPU_ZERO(&vm->active_cpus);
413 CPU_ZERO(&vm->debug_cpus);
414
415 vm->suspend = 0;
416 CPU_ZERO(&vm->suspended_cpus);
417
418 memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
419 memset(vm->special_reg, 0, sizeof(vm->special_reg));
420
421 if (!create) {
422 for (i = 0; i < vm->maxcpus; i++) {
423 if (vm->vcpu[i] != NULL)
424 vcpu_init(vm->vcpu[i]);
425 }
426 }
427 }
428
429 void
vm_disable_vcpu_creation(struct vm * vm)430 vm_disable_vcpu_creation(struct vm *vm)
431 {
432 sx_xlock(&vm->vcpus_init_lock);
433 vm->dying = true;
434 sx_xunlock(&vm->vcpus_init_lock);
435 }
436
437 struct vcpu *
vm_alloc_vcpu(struct vm * vm,int vcpuid)438 vm_alloc_vcpu(struct vm *vm, int vcpuid)
439 {
440 struct vcpu *vcpu;
441
442 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
443 return (NULL);
444
445 /* Some interrupt controllers may have a CPU limit */
446 if (vcpuid >= vgic_max_cpu_count(vm->cookie))
447 return (NULL);
448
449 vcpu = (struct vcpu *)
450 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
451 if (__predict_true(vcpu != NULL))
452 return (vcpu);
453
454 sx_xlock(&vm->vcpus_init_lock);
455 vcpu = vm->vcpu[vcpuid];
456 if (vcpu == NULL && !vm->dying) {
457 vcpu = vcpu_alloc(vm, vcpuid);
458 vcpu_init(vcpu);
459
460 /*
461 * Ensure vCPU is fully created before updating pointer
462 * to permit unlocked reads above.
463 */
464 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
465 (uintptr_t)vcpu);
466 }
467 sx_xunlock(&vm->vcpus_init_lock);
468 return (vcpu);
469 }
470
471 void
vm_slock_vcpus(struct vm * vm)472 vm_slock_vcpus(struct vm *vm)
473 {
474 sx_slock(&vm->vcpus_init_lock);
475 }
476
477 void
vm_unlock_vcpus(struct vm * vm)478 vm_unlock_vcpus(struct vm *vm)
479 {
480 sx_unlock(&vm->vcpus_init_lock);
481 }
482
483 int
vm_create(const char * name,struct vm ** retvm)484 vm_create(const char *name, struct vm **retvm)
485 {
486 struct vm *vm;
487 struct vmspace *vmspace;
488
489 /*
490 * If vmm.ko could not be successfully initialized then don't attempt
491 * to create the virtual machine.
492 */
493 if (!vmm_initialized)
494 return (ENXIO);
495
496 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
497 return (EINVAL);
498
499 vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
500 if (vmspace == NULL)
501 return (ENOMEM);
502
503 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
504 strcpy(vm->name, name);
505 vm->vmspace = vmspace;
506 sx_init(&vm->mem_segs_lock, "vm mem_segs");
507 sx_init(&vm->vcpus_init_lock, "vm vcpus");
508
509 vm->sockets = 1;
510 vm->cores = 1; /* XXX backwards compatibility */
511 vm->threads = 1; /* XXX backwards compatibility */
512 vm->maxcpus = vm_maxcpu;
513
514 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
515 M_WAITOK | M_ZERO);
516
517 vm_init(vm, true);
518
519 *retvm = vm;
520 return (0);
521 }
522
523 void
vm_get_topology(struct vm * vm,uint16_t * sockets,uint16_t * cores,uint16_t * threads,uint16_t * maxcpus)524 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
525 uint16_t *threads, uint16_t *maxcpus)
526 {
527 *sockets = vm->sockets;
528 *cores = vm->cores;
529 *threads = vm->threads;
530 *maxcpus = vm->maxcpus;
531 }
532
533 uint16_t
vm_get_maxcpus(struct vm * vm)534 vm_get_maxcpus(struct vm *vm)
535 {
536 return (vm->maxcpus);
537 }
538
539 int
vm_set_topology(struct vm * vm,uint16_t sockets,uint16_t cores,uint16_t threads,uint16_t maxcpus)540 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
541 uint16_t threads, uint16_t maxcpus)
542 {
543 /* Ignore maxcpus. */
544 if ((sockets * cores * threads) > vm->maxcpus)
545 return (EINVAL);
546 vm->sockets = sockets;
547 vm->cores = cores;
548 vm->threads = threads;
549 return(0);
550 }
551
552 static void
vm_cleanup(struct vm * vm,bool destroy)553 vm_cleanup(struct vm *vm, bool destroy)
554 {
555 struct mem_map *mm;
556 pmap_t pmap __diagused;
557 int i;
558
559 if (destroy) {
560 pmap = vmspace_pmap(vm->vmspace);
561 sched_pin();
562 PCPU_SET(curvmpmap, NULL);
563 sched_unpin();
564 CPU_FOREACH(i) {
565 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
566 }
567 }
568
569 vgic_detach_from_vm(vm->cookie);
570
571 for (i = 0; i < vm->maxcpus; i++) {
572 if (vm->vcpu[i] != NULL)
573 vcpu_cleanup(vm->vcpu[i], destroy);
574 }
575
576 vmmops_cleanup(vm->cookie);
577
578 /*
579 * System memory is removed from the guest address space only when
580 * the VM is destroyed. This is because the mapping remains the same
581 * across VM reset.
582 *
583 * Device memory can be relocated by the guest (e.g. using PCI BARs)
584 * so those mappings are removed on a VM reset.
585 */
586 if (!destroy) {
587 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
588 mm = &vm->mem_maps[i];
589 if (destroy || !sysmem_mapping(vm, mm))
590 vm_free_memmap(vm, i);
591 }
592 }
593
594 if (destroy) {
595 for (i = 0; i < VM_MAX_MEMSEGS; i++)
596 vm_free_memseg(vm, i);
597
598 vmmops_vmspace_free(vm->vmspace);
599 vm->vmspace = NULL;
600
601 for (i = 0; i < vm->maxcpus; i++)
602 free(vm->vcpu[i], M_VMM);
603 free(vm->vcpu, M_VMM);
604 sx_destroy(&vm->vcpus_init_lock);
605 sx_destroy(&vm->mem_segs_lock);
606 }
607 }
608
609 void
vm_destroy(struct vm * vm)610 vm_destroy(struct vm *vm)
611 {
612 vm_cleanup(vm, true);
613 free(vm, M_VMM);
614 }
615
616 int
vm_reinit(struct vm * vm)617 vm_reinit(struct vm *vm)
618 {
619 int error;
620
621 /*
622 * A virtual machine can be reset only if all vcpus are suspended.
623 */
624 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
625 vm_cleanup(vm, false);
626 vm_init(vm, false);
627 error = 0;
628 } else {
629 error = EBUSY;
630 }
631
632 return (error);
633 }
634
635 const char *
vm_name(struct vm * vm)636 vm_name(struct vm *vm)
637 {
638 return (vm->name);
639 }
640
641 void
vm_slock_memsegs(struct vm * vm)642 vm_slock_memsegs(struct vm *vm)
643 {
644 sx_slock(&vm->mem_segs_lock);
645 }
646
647 void
vm_xlock_memsegs(struct vm * vm)648 vm_xlock_memsegs(struct vm *vm)
649 {
650 sx_xlock(&vm->mem_segs_lock);
651 }
652
653 void
vm_unlock_memsegs(struct vm * vm)654 vm_unlock_memsegs(struct vm *vm)
655 {
656 sx_unlock(&vm->mem_segs_lock);
657 }
658
659 /*
660 * Return 'true' if 'gpa' is allocated in the guest address space.
661 *
662 * This function is called in the context of a running vcpu which acts as
663 * an implicit lock on 'vm->mem_maps[]'.
664 */
665 bool
vm_mem_allocated(struct vcpu * vcpu,vm_paddr_t gpa)666 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
667 {
668 struct vm *vm = vcpu->vm;
669 struct mem_map *mm;
670 int i;
671
672 #ifdef INVARIANTS
673 int hostcpu, state;
674 state = vcpu_get_state(vcpu, &hostcpu);
675 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
676 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
677 #endif
678
679 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
680 mm = &vm->mem_maps[i];
681 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
682 return (true); /* 'gpa' is sysmem or devmem */
683 }
684
685 return (false);
686 }
687
688 int
vm_alloc_memseg(struct vm * vm,int ident,size_t len,bool sysmem)689 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
690 {
691 struct mem_seg *seg;
692 vm_object_t obj;
693
694 sx_assert(&vm->mem_segs_lock, SX_XLOCKED);
695
696 if (ident < 0 || ident >= VM_MAX_MEMSEGS)
697 return (EINVAL);
698
699 if (len == 0 || (len & PAGE_MASK))
700 return (EINVAL);
701
702 seg = &vm->mem_segs[ident];
703 if (seg->object != NULL) {
704 if (seg->len == len && seg->sysmem == sysmem)
705 return (EEXIST);
706 else
707 return (EINVAL);
708 }
709
710 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
711 if (obj == NULL)
712 return (ENOMEM);
713
714 seg->len = len;
715 seg->object = obj;
716 seg->sysmem = sysmem;
717 return (0);
718 }
719
720 int
vm_get_memseg(struct vm * vm,int ident,size_t * len,bool * sysmem,vm_object_t * objptr)721 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
722 vm_object_t *objptr)
723 {
724 struct mem_seg *seg;
725
726 sx_assert(&vm->mem_segs_lock, SX_LOCKED);
727
728 if (ident < 0 || ident >= VM_MAX_MEMSEGS)
729 return (EINVAL);
730
731 seg = &vm->mem_segs[ident];
732 if (len)
733 *len = seg->len;
734 if (sysmem)
735 *sysmem = seg->sysmem;
736 if (objptr)
737 *objptr = seg->object;
738 return (0);
739 }
740
741 void
vm_free_memseg(struct vm * vm,int ident)742 vm_free_memseg(struct vm *vm, int ident)
743 {
744 struct mem_seg *seg;
745
746 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
747 ("%s: invalid memseg ident %d", __func__, ident));
748
749 seg = &vm->mem_segs[ident];
750 if (seg->object != NULL) {
751 vm_object_deallocate(seg->object);
752 bzero(seg, sizeof(struct mem_seg));
753 }
754 }
755
756 int
vm_mmap_memseg(struct vm * vm,vm_paddr_t gpa,int segid,vm_ooffset_t first,size_t len,int prot,int flags)757 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
758 size_t len, int prot, int flags)
759 {
760 struct mem_seg *seg;
761 struct mem_map *m, *map;
762 vm_ooffset_t last;
763 int i, error;
764
765 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
766 return (EINVAL);
767
768 if (flags & ~VM_MEMMAP_F_WIRED)
769 return (EINVAL);
770
771 if (segid < 0 || segid >= VM_MAX_MEMSEGS)
772 return (EINVAL);
773
774 seg = &vm->mem_segs[segid];
775 if (seg->object == NULL)
776 return (EINVAL);
777
778 last = first + len;
779 if (first < 0 || first >= last || last > seg->len)
780 return (EINVAL);
781
782 if ((gpa | first | last) & PAGE_MASK)
783 return (EINVAL);
784
785 map = NULL;
786 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
787 m = &vm->mem_maps[i];
788 if (m->len == 0) {
789 map = m;
790 break;
791 }
792 }
793
794 if (map == NULL)
795 return (ENOSPC);
796
797 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
798 len, 0, VMFS_NO_SPACE, prot, prot, 0);
799 if (error != KERN_SUCCESS)
800 return (EFAULT);
801
802 vm_object_reference(seg->object);
803
804 if (flags & VM_MEMMAP_F_WIRED) {
805 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
806 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
807 if (error != KERN_SUCCESS) {
808 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
809 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
810 EFAULT);
811 }
812 }
813
814 map->gpa = gpa;
815 map->len = len;
816 map->segoff = first;
817 map->segid = segid;
818 map->prot = prot;
819 map->flags = flags;
820 return (0);
821 }
822
823 int
vm_munmap_memseg(struct vm * vm,vm_paddr_t gpa,size_t len)824 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
825 {
826 struct mem_map *m;
827 int i;
828
829 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
830 m = &vm->mem_maps[i];
831 if (m->gpa == gpa && m->len == len) {
832 vm_free_memmap(vm, i);
833 return (0);
834 }
835 }
836
837 return (EINVAL);
838 }
839
840 int
vm_mmap_getnext(struct vm * vm,vm_paddr_t * gpa,int * segid,vm_ooffset_t * segoff,size_t * len,int * prot,int * flags)841 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
842 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
843 {
844 struct mem_map *mm, *mmnext;
845 int i;
846
847 mmnext = NULL;
848 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
849 mm = &vm->mem_maps[i];
850 if (mm->len == 0 || mm->gpa < *gpa)
851 continue;
852 if (mmnext == NULL || mm->gpa < mmnext->gpa)
853 mmnext = mm;
854 }
855
856 if (mmnext != NULL) {
857 *gpa = mmnext->gpa;
858 if (segid)
859 *segid = mmnext->segid;
860 if (segoff)
861 *segoff = mmnext->segoff;
862 if (len)
863 *len = mmnext->len;
864 if (prot)
865 *prot = mmnext->prot;
866 if (flags)
867 *flags = mmnext->flags;
868 return (0);
869 } else {
870 return (ENOENT);
871 }
872 }
873
874 static void
vm_free_memmap(struct vm * vm,int ident)875 vm_free_memmap(struct vm *vm, int ident)
876 {
877 struct mem_map *mm;
878 int error __diagused;
879
880 mm = &vm->mem_maps[ident];
881 if (mm->len) {
882 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
883 mm->gpa + mm->len);
884 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
885 __func__, error));
886 bzero(mm, sizeof(struct mem_map));
887 }
888 }
889
890 static __inline bool
sysmem_mapping(struct vm * vm,struct mem_map * mm)891 sysmem_mapping(struct vm *vm, struct mem_map *mm)
892 {
893
894 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
895 return (true);
896 else
897 return (false);
898 }
899
900 vm_paddr_t
vmm_sysmem_maxaddr(struct vm * vm)901 vmm_sysmem_maxaddr(struct vm *vm)
902 {
903 struct mem_map *mm;
904 vm_paddr_t maxaddr;
905 int i;
906
907 maxaddr = 0;
908 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
909 mm = &vm->mem_maps[i];
910 if (sysmem_mapping(vm, mm)) {
911 if (maxaddr < mm->gpa + mm->len)
912 maxaddr = mm->gpa + mm->len;
913 }
914 }
915 return (maxaddr);
916 }
917
918 int
vm_gla2gpa_nofault(struct vcpu * vcpu,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)919 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
920 uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
921 {
922
923 vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault);
924 return (0);
925 }
926
927 static int
vmm_reg_raz(struct vcpu * vcpu,uint64_t * rval,void * arg)928 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg)
929 {
930 *rval = 0;
931 return (0);
932 }
933
934 static int
vmm_reg_read_arg(struct vcpu * vcpu,uint64_t * rval,void * arg)935 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg)
936 {
937 *rval = *(uint64_t *)arg;
938 return (0);
939 }
940
941 static int
vmm_reg_wi(struct vcpu * vcpu,uint64_t wval,void * arg)942 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg)
943 {
944 return (0);
945 }
946
947 static const struct vmm_special_reg vmm_special_regs[] = {
948 #define SPECIAL_REG(_reg, _read, _write) \
949 { \
950 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
951 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
952 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
953 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
954 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
955 .esr_mask = ISS_MSR_REG_MASK, \
956 .reg_read = (_read), \
957 .reg_write = (_write), \
958 .arg = NULL, \
959 }
960 #define ID_SPECIAL_REG(_reg, _name) \
961 { \
962 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
963 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
964 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
965 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
966 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
967 .esr_mask = ISS_MSR_REG_MASK, \
968 .reg_read = vmm_reg_read_arg, \
969 .reg_write = vmm_reg_wi, \
970 .arg = &(vmm_arch_regs._name), \
971 }
972
973 /* ID registers */
974 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
975 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
976 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
977 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
978 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
979
980 /*
981 * All other ID registers are read as zero.
982 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
983 */
984 {
985 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) |
986 (0 << ISS_MSR_OP1_SHIFT) |
987 (0 << ISS_MSR_CRn_SHIFT) |
988 (0 << ISS_MSR_CRm_SHIFT),
989 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK |
990 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT),
991 .reg_read = vmm_reg_raz,
992 .reg_write = vmm_reg_wi,
993 .arg = NULL,
994 },
995
996 /* Counter physical registers */
997 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
998 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
999 vtimer_phys_cval_write),
1000 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
1001 vtimer_phys_tval_write),
1002 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
1003 #undef SPECIAL_REG
1004 };
1005
1006 void
vm_register_reg_handler(struct vm * vm,uint64_t iss,uint64_t mask,reg_read_t reg_read,reg_write_t reg_write,void * arg)1007 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
1008 reg_read_t reg_read, reg_write_t reg_write, void *arg)
1009 {
1010 int i;
1011
1012 for (i = 0; i < nitems(vm->special_reg); i++) {
1013 if (vm->special_reg[i].esr_iss == 0 &&
1014 vm->special_reg[i].esr_mask == 0) {
1015 vm->special_reg[i].esr_iss = iss;
1016 vm->special_reg[i].esr_mask = mask;
1017 vm->special_reg[i].reg_read = reg_read;
1018 vm->special_reg[i].reg_write = reg_write;
1019 vm->special_reg[i].arg = arg;
1020 return;
1021 }
1022 }
1023
1024 panic("%s: No free special register slot", __func__);
1025 }
1026
1027 void
vm_deregister_reg_handler(struct vm * vm,uint64_t iss,uint64_t mask)1028 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask)
1029 {
1030 int i;
1031
1032 for (i = 0; i < nitems(vm->special_reg); i++) {
1033 if (vm->special_reg[i].esr_iss == iss &&
1034 vm->special_reg[i].esr_mask == mask) {
1035 memset(&vm->special_reg[i], 0,
1036 sizeof(vm->special_reg[i]));
1037 return;
1038 }
1039 }
1040
1041 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss,
1042 mask);
1043 }
1044
1045 static int
vm_handle_reg_emul(struct vcpu * vcpu,bool * retu)1046 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu)
1047 {
1048 struct vm *vm;
1049 struct vm_exit *vme;
1050 struct vre *vre;
1051 int i, rv;
1052
1053 vm = vcpu->vm;
1054 vme = &vcpu->exitinfo;
1055 vre = &vme->u.reg_emul.vre;
1056
1057 for (i = 0; i < nitems(vm->special_reg); i++) {
1058 if (vm->special_reg[i].esr_iss == 0 &&
1059 vm->special_reg[i].esr_mask == 0)
1060 continue;
1061
1062 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) ==
1063 vm->special_reg[i].esr_iss) {
1064 rv = vmm_emulate_register(vcpu, vre,
1065 vm->special_reg[i].reg_read,
1066 vm->special_reg[i].reg_write,
1067 vm->special_reg[i].arg);
1068 if (rv == 0) {
1069 *retu = false;
1070 }
1071 return (rv);
1072 }
1073 }
1074 for (i = 0; i < nitems(vmm_special_regs); i++) {
1075 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
1076 vmm_special_regs[i].esr_iss) {
1077 rv = vmm_emulate_register(vcpu, vre,
1078 vmm_special_regs[i].reg_read,
1079 vmm_special_regs[i].reg_write,
1080 vmm_special_regs[i].arg);
1081 if (rv == 0) {
1082 *retu = false;
1083 }
1084 return (rv);
1085 }
1086 }
1087
1088
1089 *retu = true;
1090 return (0);
1091 }
1092
1093 void
vm_register_inst_handler(struct vm * vm,uint64_t start,uint64_t size,mem_region_read_t mmio_read,mem_region_write_t mmio_write)1094 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
1095 mem_region_read_t mmio_read, mem_region_write_t mmio_write)
1096 {
1097 int i;
1098
1099 for (i = 0; i < nitems(vm->mmio_region); i++) {
1100 if (vm->mmio_region[i].start == 0 &&
1101 vm->mmio_region[i].end == 0) {
1102 vm->mmio_region[i].start = start;
1103 vm->mmio_region[i].end = start + size;
1104 vm->mmio_region[i].read = mmio_read;
1105 vm->mmio_region[i].write = mmio_write;
1106 return;
1107 }
1108 }
1109
1110 panic("%s: No free MMIO region", __func__);
1111 }
1112
1113 void
vm_deregister_inst_handler(struct vm * vm,uint64_t start,uint64_t size)1114 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
1115 {
1116 int i;
1117
1118 for (i = 0; i < nitems(vm->mmio_region); i++) {
1119 if (vm->mmio_region[i].start == start &&
1120 vm->mmio_region[i].end == start + size) {
1121 memset(&vm->mmio_region[i], 0,
1122 sizeof(vm->mmio_region[i]));
1123 return;
1124 }
1125 }
1126
1127 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
1128 start + size);
1129 }
1130
1131 static int
vm_handle_inst_emul(struct vcpu * vcpu,bool * retu)1132 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
1133 {
1134 struct vm *vm;
1135 struct vm_exit *vme;
1136 struct vie *vie;
1137 struct hyp *hyp;
1138 uint64_t fault_ipa;
1139 struct vm_guest_paging *paging;
1140 struct vmm_mmio_region *vmr;
1141 int error, i;
1142
1143 vm = vcpu->vm;
1144 hyp = vm->cookie;
1145 if (!hyp->vgic_attached)
1146 goto out_user;
1147
1148 vme = &vcpu->exitinfo;
1149 vie = &vme->u.inst_emul.vie;
1150 paging = &vme->u.inst_emul.paging;
1151
1152 fault_ipa = vme->u.inst_emul.gpa;
1153
1154 vmr = NULL;
1155 for (i = 0; i < nitems(vm->mmio_region); i++) {
1156 if (vm->mmio_region[i].start <= fault_ipa &&
1157 vm->mmio_region[i].end > fault_ipa) {
1158 vmr = &vm->mmio_region[i];
1159 break;
1160 }
1161 }
1162 if (vmr == NULL)
1163 goto out_user;
1164
1165 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
1166 vmr->read, vmr->write, retu);
1167 return (error);
1168
1169 out_user:
1170 *retu = true;
1171 return (0);
1172 }
1173
1174 int
vm_suspend(struct vm * vm,enum vm_suspend_how how)1175 vm_suspend(struct vm *vm, enum vm_suspend_how how)
1176 {
1177 int i;
1178
1179 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
1180 return (EINVAL);
1181
1182 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
1183 VM_CTR2(vm, "virtual machine already suspended %d/%d",
1184 vm->suspend, how);
1185 return (EALREADY);
1186 }
1187
1188 VM_CTR1(vm, "virtual machine successfully suspended %d", how);
1189
1190 /*
1191 * Notify all active vcpus that they are now suspended.
1192 */
1193 for (i = 0; i < vm->maxcpus; i++) {
1194 if (CPU_ISSET(i, &vm->active_cpus))
1195 vcpu_notify_event(vm_vcpu(vm, i));
1196 }
1197
1198 return (0);
1199 }
1200
1201 void
vm_exit_suspended(struct vcpu * vcpu,uint64_t pc)1202 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
1203 {
1204 struct vm *vm = vcpu->vm;
1205 struct vm_exit *vmexit;
1206
1207 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
1208 ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
1209
1210 vmexit = vm_exitinfo(vcpu);
1211 vmexit->pc = pc;
1212 vmexit->inst_length = 4;
1213 vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1214 vmexit->u.suspended.how = vm->suspend;
1215 }
1216
1217 void
vm_exit_debug(struct vcpu * vcpu,uint64_t pc)1218 vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
1219 {
1220 struct vm_exit *vmexit;
1221
1222 vmexit = vm_exitinfo(vcpu);
1223 vmexit->pc = pc;
1224 vmexit->inst_length = 4;
1225 vmexit->exitcode = VM_EXITCODE_DEBUG;
1226 }
1227
1228 int
vm_activate_cpu(struct vcpu * vcpu)1229 vm_activate_cpu(struct vcpu *vcpu)
1230 {
1231 struct vm *vm = vcpu->vm;
1232
1233 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
1234 return (EBUSY);
1235
1236 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
1237 return (0);
1238
1239 }
1240
1241 int
vm_suspend_cpu(struct vm * vm,struct vcpu * vcpu)1242 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
1243 {
1244 if (vcpu == NULL) {
1245 vm->debug_cpus = vm->active_cpus;
1246 for (int i = 0; i < vm->maxcpus; i++) {
1247 if (CPU_ISSET(i, &vm->active_cpus))
1248 vcpu_notify_event(vm_vcpu(vm, i));
1249 }
1250 } else {
1251 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
1252 return (EINVAL);
1253
1254 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
1255 vcpu_notify_event(vcpu);
1256 }
1257 return (0);
1258 }
1259
1260 int
vm_resume_cpu(struct vm * vm,struct vcpu * vcpu)1261 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
1262 {
1263
1264 if (vcpu == NULL) {
1265 CPU_ZERO(&vm->debug_cpus);
1266 } else {
1267 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
1268 return (EINVAL);
1269
1270 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
1271 }
1272 return (0);
1273 }
1274
1275 int
vcpu_debugged(struct vcpu * vcpu)1276 vcpu_debugged(struct vcpu *vcpu)
1277 {
1278
1279 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
1280 }
1281
1282 cpuset_t
vm_active_cpus(struct vm * vm)1283 vm_active_cpus(struct vm *vm)
1284 {
1285
1286 return (vm->active_cpus);
1287 }
1288
1289 cpuset_t
vm_debug_cpus(struct vm * vm)1290 vm_debug_cpus(struct vm *vm)
1291 {
1292
1293 return (vm->debug_cpus);
1294 }
1295
1296 cpuset_t
vm_suspended_cpus(struct vm * vm)1297 vm_suspended_cpus(struct vm *vm)
1298 {
1299
1300 return (vm->suspended_cpus);
1301 }
1302
1303
1304 void *
vcpu_stats(struct vcpu * vcpu)1305 vcpu_stats(struct vcpu *vcpu)
1306 {
1307
1308 return (vcpu->stats);
1309 }
1310
1311 /*
1312 * This function is called to ensure that a vcpu "sees" a pending event
1313 * as soon as possible:
1314 * - If the vcpu thread is sleeping then it is woken up.
1315 * - If the vcpu is running on a different host_cpu then an IPI will be directed
1316 * to the host_cpu to cause the vcpu to trap into the hypervisor.
1317 */
1318 static void
vcpu_notify_event_locked(struct vcpu * vcpu)1319 vcpu_notify_event_locked(struct vcpu *vcpu)
1320 {
1321 int hostcpu;
1322
1323 hostcpu = vcpu->hostcpu;
1324 if (vcpu->state == VCPU_RUNNING) {
1325 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
1326 if (hostcpu != curcpu) {
1327 ipi_cpu(hostcpu, vmm_ipinum);
1328 } else {
1329 /*
1330 * If the 'vcpu' is running on 'curcpu' then it must
1331 * be sending a notification to itself (e.g. SELF_IPI).
1332 * The pending event will be picked up when the vcpu
1333 * transitions back to guest context.
1334 */
1335 }
1336 } else {
1337 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
1338 "with hostcpu %d", vcpu->state, hostcpu));
1339 if (vcpu->state == VCPU_SLEEPING)
1340 wakeup_one(vcpu);
1341 }
1342 }
1343
1344 void
vcpu_notify_event(struct vcpu * vcpu)1345 vcpu_notify_event(struct vcpu *vcpu)
1346 {
1347 vcpu_lock(vcpu);
1348 vcpu_notify_event_locked(vcpu);
1349 vcpu_unlock(vcpu);
1350 }
1351
1352 static void
restore_guest_fpustate(struct vcpu * vcpu)1353 restore_guest_fpustate(struct vcpu *vcpu)
1354 {
1355
1356 /* flush host state to the pcb */
1357 vfp_save_state(curthread, curthread->td_pcb);
1358 /* Ensure the VFP state will be re-loaded when exiting the guest */
1359 PCPU_SET(fpcurthread, NULL);
1360
1361 /* restore guest FPU state */
1362 vfp_enable();
1363 vfp_restore(vcpu->guestfpu);
1364
1365 /*
1366 * The FPU is now "dirty" with the guest's state so turn on emulation
1367 * to trap any access to the FPU by the host.
1368 */
1369 vfp_disable();
1370 }
1371
1372 static void
save_guest_fpustate(struct vcpu * vcpu)1373 save_guest_fpustate(struct vcpu *vcpu)
1374 {
1375 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) !=
1376 CPACR_FPEN_TRAP_ALL1)
1377 panic("VFP not enabled in host!");
1378
1379 /* save guest FPU state */
1380 vfp_enable();
1381 vfp_store(vcpu->guestfpu);
1382 vfp_disable();
1383
1384 KASSERT(PCPU_GET(fpcurthread) == NULL,
1385 ("%s: fpcurthread set with guest registers", __func__));
1386 }
1387 static int
vcpu_set_state_locked(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)1388 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
1389 bool from_idle)
1390 {
1391 int error;
1392
1393 vcpu_assert_locked(vcpu);
1394
1395 /*
1396 * State transitions from the vmmdev_ioctl() must always begin from
1397 * the VCPU_IDLE state. This guarantees that there is only a single
1398 * ioctl() operating on a vcpu at any point.
1399 */
1400 if (from_idle) {
1401 while (vcpu->state != VCPU_IDLE) {
1402 vcpu_notify_event_locked(vcpu);
1403 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
1404 }
1405 } else {
1406 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
1407 "vcpu idle state"));
1408 }
1409
1410 if (vcpu->state == VCPU_RUNNING) {
1411 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
1412 "mismatch for running vcpu", curcpu, vcpu->hostcpu));
1413 } else {
1414 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
1415 "vcpu that is not running", vcpu->hostcpu));
1416 }
1417
1418 /*
1419 * The following state transitions are allowed:
1420 * IDLE -> FROZEN -> IDLE
1421 * FROZEN -> RUNNING -> FROZEN
1422 * FROZEN -> SLEEPING -> FROZEN
1423 */
1424 switch (vcpu->state) {
1425 case VCPU_IDLE:
1426 case VCPU_RUNNING:
1427 case VCPU_SLEEPING:
1428 error = (newstate != VCPU_FROZEN);
1429 break;
1430 case VCPU_FROZEN:
1431 error = (newstate == VCPU_FROZEN);
1432 break;
1433 default:
1434 error = 1;
1435 break;
1436 }
1437
1438 if (error)
1439 return (EBUSY);
1440
1441 vcpu->state = newstate;
1442 if (newstate == VCPU_RUNNING)
1443 vcpu->hostcpu = curcpu;
1444 else
1445 vcpu->hostcpu = NOCPU;
1446
1447 if (newstate == VCPU_IDLE)
1448 wakeup(&vcpu->state);
1449
1450 return (0);
1451 }
1452
1453 static void
vcpu_require_state(struct vcpu * vcpu,enum vcpu_state newstate)1454 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
1455 {
1456 int error;
1457
1458 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
1459 panic("Error %d setting state to %d\n", error, newstate);
1460 }
1461
1462 static void
vcpu_require_state_locked(struct vcpu * vcpu,enum vcpu_state newstate)1463 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
1464 {
1465 int error;
1466
1467 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
1468 panic("Error %d setting state to %d", error, newstate);
1469 }
1470
1471 int
vm_get_capability(struct vcpu * vcpu,int type,int * retval)1472 vm_get_capability(struct vcpu *vcpu, int type, int *retval)
1473 {
1474 if (type < 0 || type >= VM_CAP_MAX)
1475 return (EINVAL);
1476
1477 return (vmmops_getcap(vcpu->cookie, type, retval));
1478 }
1479
1480 int
vm_set_capability(struct vcpu * vcpu,int type,int val)1481 vm_set_capability(struct vcpu *vcpu, int type, int val)
1482 {
1483 if (type < 0 || type >= VM_CAP_MAX)
1484 return (EINVAL);
1485
1486 return (vmmops_setcap(vcpu->cookie, type, val));
1487 }
1488
1489 struct vm *
vcpu_vm(struct vcpu * vcpu)1490 vcpu_vm(struct vcpu *vcpu)
1491 {
1492 return (vcpu->vm);
1493 }
1494
1495 int
vcpu_vcpuid(struct vcpu * vcpu)1496 vcpu_vcpuid(struct vcpu *vcpu)
1497 {
1498 return (vcpu->vcpuid);
1499 }
1500
1501 void *
vcpu_get_cookie(struct vcpu * vcpu)1502 vcpu_get_cookie(struct vcpu *vcpu)
1503 {
1504 return (vcpu->cookie);
1505 }
1506
1507 struct vcpu *
vm_vcpu(struct vm * vm,int vcpuid)1508 vm_vcpu(struct vm *vm, int vcpuid)
1509 {
1510 return (vm->vcpu[vcpuid]);
1511 }
1512
1513 int
vcpu_set_state(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)1514 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
1515 {
1516 int error;
1517
1518 vcpu_lock(vcpu);
1519 error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1520 vcpu_unlock(vcpu);
1521
1522 return (error);
1523 }
1524
1525 enum vcpu_state
vcpu_get_state(struct vcpu * vcpu,int * hostcpu)1526 vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
1527 {
1528 enum vcpu_state state;
1529
1530 vcpu_lock(vcpu);
1531 state = vcpu->state;
1532 if (hostcpu != NULL)
1533 *hostcpu = vcpu->hostcpu;
1534 vcpu_unlock(vcpu);
1535
1536 return (state);
1537 }
1538
1539 static void *
_vm_gpa_hold(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)1540 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
1541 void **cookie)
1542 {
1543 int i, count, pageoff;
1544 struct mem_map *mm;
1545 vm_page_t m;
1546
1547 pageoff = gpa & PAGE_MASK;
1548 if (len > PAGE_SIZE - pageoff)
1549 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
1550
1551 count = 0;
1552 for (i = 0; i < VM_MAX_MEMMAPS; i++) {
1553 mm = &vm->mem_maps[i];
1554 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
1555 gpa < mm->gpa + mm->len) {
1556 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
1557 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
1558 break;
1559 }
1560 }
1561
1562 if (count == 1) {
1563 *cookie = m;
1564 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
1565 } else {
1566 *cookie = NULL;
1567 return (NULL);
1568 }
1569 }
1570
1571 void *
vm_gpa_hold(struct vcpu * vcpu,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)1572 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
1573 void **cookie)
1574 {
1575 #ifdef INVARIANTS
1576 /*
1577 * The current vcpu should be frozen to ensure 'vm_memmap[]'
1578 * stability.
1579 */
1580 int state = vcpu_get_state(vcpu, NULL);
1581 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
1582 __func__, state));
1583 #endif
1584 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie));
1585 }
1586
1587 void *
vm_gpa_hold_global(struct vm * vm,vm_paddr_t gpa,size_t len,int reqprot,void ** cookie)1588 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
1589 void **cookie)
1590 {
1591 sx_assert(&vm->mem_segs_lock, SX_LOCKED);
1592 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
1593 }
1594
1595 void
vm_gpa_release(void * cookie)1596 vm_gpa_release(void *cookie)
1597 {
1598 vm_page_t m = cookie;
1599
1600 vm_page_unwire(m, PQ_ACTIVE);
1601 }
1602
1603 int
vm_get_register(struct vcpu * vcpu,int reg,uint64_t * retval)1604 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
1605 {
1606
1607 if (reg >= VM_REG_LAST)
1608 return (EINVAL);
1609
1610 return (vmmops_getreg(vcpu->cookie, reg, retval));
1611 }
1612
1613 int
vm_set_register(struct vcpu * vcpu,int reg,uint64_t val)1614 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
1615 {
1616 int error;
1617
1618 if (reg >= VM_REG_LAST)
1619 return (EINVAL);
1620 error = vmmops_setreg(vcpu->cookie, reg, val);
1621 if (error || reg != VM_REG_GUEST_PC)
1622 return (error);
1623
1624 vcpu->nextpc = val;
1625
1626 return (0);
1627 }
1628
1629 void *
vm_get_cookie(struct vm * vm)1630 vm_get_cookie(struct vm *vm)
1631 {
1632 return (vm->cookie);
1633 }
1634
1635 int
vm_inject_exception(struct vcpu * vcpu,uint64_t esr,uint64_t far)1636 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far)
1637 {
1638 return (vmmops_exception(vcpu->cookie, esr, far));
1639 }
1640
1641 int
vm_attach_vgic(struct vm * vm,struct vm_vgic_descr * descr)1642 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr)
1643 {
1644 return (vgic_attach_to_vm(vm->cookie, descr));
1645 }
1646
1647 int
vm_assert_irq(struct vm * vm,uint32_t irq)1648 vm_assert_irq(struct vm *vm, uint32_t irq)
1649 {
1650 return (vgic_inject_irq(vm->cookie, -1, irq, true));
1651 }
1652
1653 int
vm_deassert_irq(struct vm * vm,uint32_t irq)1654 vm_deassert_irq(struct vm *vm, uint32_t irq)
1655 {
1656 return (vgic_inject_irq(vm->cookie, -1, irq, false));
1657 }
1658
1659 int
vm_raise_msi(struct vm * vm,uint64_t msg,uint64_t addr,int bus,int slot,int func)1660 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
1661 int func)
1662 {
1663 /* TODO: Should we raise an SError? */
1664 return (vgic_inject_msi(vm->cookie, msg, addr));
1665 }
1666
1667 static int
vm_handle_smccc_call(struct vcpu * vcpu,struct vm_exit * vme,bool * retu)1668 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1669 {
1670 struct hypctx *hypctx;
1671 int i;
1672
1673 hypctx = vcpu_get_cookie(vcpu);
1674
1675 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0)
1676 return (1);
1677
1678 vme->exitcode = VM_EXITCODE_SMCCC;
1679 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0];
1680 for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
1681 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
1682
1683 *retu = true;
1684 return (0);
1685 }
1686
1687 static int
vm_handle_wfi(struct vcpu * vcpu,struct vm_exit * vme,bool * retu)1688 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1689 {
1690 vcpu_lock(vcpu);
1691 while (1) {
1692 if (vgic_has_pending_irq(vcpu->cookie))
1693 break;
1694
1695 if (vcpu_should_yield(vcpu))
1696 break;
1697
1698 vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1699 /*
1700 * XXX msleep_spin() cannot be interrupted by signals so
1701 * wake up periodically to check pending signals.
1702 */
1703 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
1704 vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1705 }
1706 vcpu_unlock(vcpu);
1707
1708 *retu = false;
1709 return (0);
1710 }
1711
1712 static int
vm_handle_paging(struct vcpu * vcpu,bool * retu)1713 vm_handle_paging(struct vcpu *vcpu, bool *retu)
1714 {
1715 struct vm *vm = vcpu->vm;
1716 struct vm_exit *vme;
1717 struct vm_map *map;
1718 uint64_t addr, esr;
1719 pmap_t pmap;
1720 int ftype, rv;
1721
1722 vme = &vcpu->exitinfo;
1723
1724 pmap = vmspace_pmap(vcpu->vm->vmspace);
1725 addr = vme->u.paging.gpa;
1726 esr = vme->u.paging.esr;
1727
1728 /* The page exists, but the page table needs to be updated. */
1729 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
1730 return (0);
1731
1732 switch (ESR_ELx_EXCEPTION(esr)) {
1733 case EXCP_INSN_ABORT_L:
1734 case EXCP_DATA_ABORT_L:
1735 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE;
1736 break;
1737 default:
1738 panic("%s: Invalid exception (esr = %lx)", __func__, esr);
1739 }
1740
1741 map = &vm->vmspace->vm_map;
1742 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
1743 if (rv != KERN_SUCCESS)
1744 return (EFAULT);
1745
1746 return (0);
1747 }
1748
1749 static int
vm_handle_suspend(struct vcpu * vcpu,bool * retu)1750 vm_handle_suspend(struct vcpu *vcpu, bool *retu)
1751 {
1752 struct vm *vm = vcpu->vm;
1753 int error, i;
1754 struct thread *td;
1755
1756 error = 0;
1757 td = curthread;
1758
1759 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus);
1760
1761 /*
1762 * Wait until all 'active_cpus' have suspended themselves.
1763 *
1764 * Since a VM may be suspended at any time including when one or
1765 * more vcpus are doing a rendezvous we need to call the rendezvous
1766 * handler while we are waiting to prevent a deadlock.
1767 */
1768 vcpu_lock(vcpu);
1769 while (error == 0) {
1770 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0)
1771 break;
1772
1773 vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1774 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1775 vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1776 if (td_ast_pending(td, TDA_SUSPEND)) {
1777 vcpu_unlock(vcpu);
1778 error = thread_check_susp(td, false);
1779 vcpu_lock(vcpu);
1780 }
1781 }
1782 vcpu_unlock(vcpu);
1783
1784 /*
1785 * Wakeup the other sleeping vcpus and return to userspace.
1786 */
1787 for (i = 0; i < vm->maxcpus; i++) {
1788 if (CPU_ISSET(i, &vm->suspended_cpus)) {
1789 vcpu_notify_event(vm_vcpu(vm, i));
1790 }
1791 }
1792
1793 *retu = true;
1794 return (error);
1795 }
1796
1797 int
vm_run(struct vcpu * vcpu)1798 vm_run(struct vcpu *vcpu)
1799 {
1800 struct vm *vm = vcpu->vm;
1801 struct vm_eventinfo evinfo;
1802 int error, vcpuid;
1803 struct vm_exit *vme;
1804 bool retu;
1805 pmap_t pmap;
1806
1807 vcpuid = vcpu->vcpuid;
1808
1809 if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1810 return (EINVAL);
1811
1812 if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1813 return (EINVAL);
1814
1815 pmap = vmspace_pmap(vm->vmspace);
1816 vme = &vcpu->exitinfo;
1817 evinfo.rptr = NULL;
1818 evinfo.sptr = &vm->suspend;
1819 evinfo.iptr = NULL;
1820 restart:
1821 critical_enter();
1822
1823 restore_guest_fpustate(vcpu);
1824
1825 vcpu_require_state(vcpu, VCPU_RUNNING);
1826 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
1827 vcpu_require_state(vcpu, VCPU_FROZEN);
1828
1829 save_guest_fpustate(vcpu);
1830
1831 critical_exit();
1832
1833 if (error == 0) {
1834 retu = false;
1835 switch (vme->exitcode) {
1836 case VM_EXITCODE_INST_EMUL:
1837 vcpu->nextpc = vme->pc + vme->inst_length;
1838 error = vm_handle_inst_emul(vcpu, &retu);
1839 break;
1840
1841 case VM_EXITCODE_REG_EMUL:
1842 vcpu->nextpc = vme->pc + vme->inst_length;
1843 error = vm_handle_reg_emul(vcpu, &retu);
1844 break;
1845
1846 case VM_EXITCODE_HVC:
1847 /*
1848 * The HVC instruction saves the address for the
1849 * next instruction as the return address.
1850 */
1851 vcpu->nextpc = vme->pc;
1852 /*
1853 * The PSCI call can change the exit information in the
1854 * case of suspend/reset/poweroff/cpu off/cpu on.
1855 */
1856 error = vm_handle_smccc_call(vcpu, vme, &retu);
1857 break;
1858
1859 case VM_EXITCODE_WFI:
1860 vcpu->nextpc = vme->pc + vme->inst_length;
1861 error = vm_handle_wfi(vcpu, vme, &retu);
1862 break;
1863
1864 case VM_EXITCODE_PAGING:
1865 vcpu->nextpc = vme->pc;
1866 error = vm_handle_paging(vcpu, &retu);
1867 break;
1868
1869 case VM_EXITCODE_SUSPENDED:
1870 vcpu->nextpc = vme->pc;
1871 error = vm_handle_suspend(vcpu, &retu);
1872 break;
1873
1874 default:
1875 /* Handle in userland */
1876 vcpu->nextpc = vme->pc;
1877 retu = true;
1878 break;
1879 }
1880 }
1881
1882 if (error == 0 && retu == false)
1883 goto restart;
1884
1885 return (error);
1886 }
1887