1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/cpuset.h>
32 #include <sys/kernel.h>
33 #include <sys/linker.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/mutex.h>
38 #include <sys/pcpu.h>
39 #include <sys/proc.h>
40 #include <sys/queue.h>
41 #include <sys/rwlock.h>
42 #include <sys/sched.h>
43 #include <sys/smp.h>
44 #include <sys/sysctl.h>
45
46 #include <vm/vm.h>
47 #include <vm/vm_object.h>
48 #include <vm/vm_page.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_map.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_param.h>
53
54 #include <machine/armreg.h>
55 #include <machine/cpu.h>
56 #include <machine/fpu.h>
57 #include <machine/machdep.h>
58 #include <machine/pcb.h>
59 #include <machine/smp.h>
60 #include <machine/vm.h>
61 #include <machine/vmparam.h>
62 #include <machine/vmm.h>
63 #include <machine/vmm_instruction_emul.h>
64
65 #include <dev/pci/pcireg.h>
66 #include <dev/vmm/vmm_dev.h>
67 #include <dev/vmm/vmm_ktr.h>
68 #include <dev/vmm/vmm_mem.h>
69 #include <dev/vmm/vmm_stat.h>
70
71 #include "arm64.h"
72 #include "mmu.h"
73
74 #include "io/vgic.h"
75 #include "io/vtimer.h"
76
77 struct vcpu {
78 int flags;
79 enum vcpu_state state;
80 struct mtx mtx;
81 int hostcpu; /* host cpuid this vcpu last ran on */
82 int vcpuid;
83 void *stats;
84 struct vm_exit exitinfo;
85 uint64_t nextpc; /* (x) next instruction to execute */
86 struct vm *vm; /* (o) */
87 void *cookie; /* (i) cpu-specific data */
88 struct vfpstate *guestfpu; /* (a,i) guest fpu state */
89 };
90
91 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
92 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
93 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
94 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
95 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
96 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
97
98 struct vmm_mmio_region {
99 uint64_t start;
100 uint64_t end;
101 mem_region_read_t read;
102 mem_region_write_t write;
103 };
104 #define VM_MAX_MMIO_REGIONS 4
105
106 struct vmm_special_reg {
107 uint32_t esr_iss;
108 uint32_t esr_mask;
109 reg_read_t reg_read;
110 reg_write_t reg_write;
111 void *arg;
112 };
113 #define VM_MAX_SPECIAL_REGS 16
114
115 /*
116 * Initialization:
117 * (o) initialized the first time the VM is created
118 * (i) initialized when VM is created and when it is reinitialized
119 * (x) initialized before use
120 */
121 struct vm {
122 void *cookie; /* (i) cpu-specific data */
123 volatile cpuset_t active_cpus; /* (i) active vcpus */
124 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
125 int suspend; /* (i) stop VM execution */
126 bool dying; /* (o) is dying */
127 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
128 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
129 struct vmspace *vmspace; /* (o) guest's address space */
130 struct vm_mem mem; /* (i) guest memory */
131 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
132 struct vcpu **vcpu; /* (i) guest vcpus */
133 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
134 /* (o) guest MMIO regions */
135 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS];
136 /* The following describe the vm cpu topology */
137 uint16_t sockets; /* (o) num of sockets */
138 uint16_t cores; /* (o) num of cores/socket */
139 uint16_t threads; /* (o) num of threads/core */
140 uint16_t maxcpus; /* (o) max pluggable cpus */
141 struct sx vcpus_init_lock; /* (o) */
142 };
143
144 static bool vmm_initialized = false;
145
146 static int vm_handle_wfi(struct vcpu *vcpu,
147 struct vm_exit *vme, bool *retu);
148
149 static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
150
151 /* statistics */
152 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
153
154 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
155
156 static int vmm_ipinum;
157 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
158 "IPI vector used for vcpu notifications");
159
160 struct vmm_regs {
161 uint64_t id_aa64afr0;
162 uint64_t id_aa64afr1;
163 uint64_t id_aa64dfr0;
164 uint64_t id_aa64dfr1;
165 uint64_t id_aa64isar0;
166 uint64_t id_aa64isar1;
167 uint64_t id_aa64isar2;
168 uint64_t id_aa64mmfr0;
169 uint64_t id_aa64mmfr1;
170 uint64_t id_aa64mmfr2;
171 uint64_t id_aa64pfr0;
172 uint64_t id_aa64pfr1;
173 };
174
175 static const struct vmm_regs vmm_arch_regs_masks = {
176 .id_aa64dfr0 =
177 ID_AA64DFR0_CTX_CMPs_MASK |
178 ID_AA64DFR0_WRPs_MASK |
179 ID_AA64DFR0_BRPs_MASK |
180 ID_AA64DFR0_PMUVer_3 |
181 ID_AA64DFR0_DebugVer_8,
182 .id_aa64isar0 =
183 ID_AA64ISAR0_TLB_TLBIOSR |
184 ID_AA64ISAR0_SHA3_IMPL |
185 ID_AA64ISAR0_RDM_IMPL |
186 ID_AA64ISAR0_Atomic_IMPL |
187 ID_AA64ISAR0_CRC32_BASE |
188 ID_AA64ISAR0_SHA2_512 |
189 ID_AA64ISAR0_SHA1_BASE |
190 ID_AA64ISAR0_AES_PMULL,
191 .id_aa64mmfr0 =
192 ID_AA64MMFR0_TGran4_IMPL |
193 ID_AA64MMFR0_TGran64_IMPL |
194 ID_AA64MMFR0_TGran16_IMPL |
195 ID_AA64MMFR0_ASIDBits_16 |
196 ID_AA64MMFR0_PARange_4P,
197 .id_aa64mmfr1 =
198 ID_AA64MMFR1_SpecSEI_IMPL |
199 ID_AA64MMFR1_PAN_ATS1E1 |
200 ID_AA64MMFR1_HAFDBS_AF,
201 .id_aa64pfr0 =
202 ID_AA64PFR0_GIC_CPUIF_NONE |
203 ID_AA64PFR0_AdvSIMD_HP |
204 ID_AA64PFR0_FP_HP |
205 ID_AA64PFR0_EL3_64 |
206 ID_AA64PFR0_EL2_64 |
207 ID_AA64PFR0_EL1_64 |
208 ID_AA64PFR0_EL0_64,
209 };
210
211 /* Host registers masked by vmm_arch_regs_masks. */
212 static struct vmm_regs vmm_arch_regs;
213
214 u_int vm_maxcpu;
215 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
216 &vm_maxcpu, 0, "Maximum number of vCPUs");
217
218 static void vcpu_notify_event_locked(struct vcpu *vcpu);
219
220 /* global statistics */
221 VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
222 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
223 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
224 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
225 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
226 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
227 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
228 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
229 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
230 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
231 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
232 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception");
233 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception");
234 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
235 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
236
237 /*
238 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
239 * is a safe value for now.
240 */
241 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
242
243 static int
vmm_regs_init(struct vmm_regs * regs,const struct vmm_regs * masks)244 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
245 {
246 #define _FETCH_KERN_REG(reg, field) do { \
247 regs->field = vmm_arch_regs_masks.field; \
248 if (!get_kernel_reg_iss_masked(reg ## _ISS, ®s->field, \
249 masks->field)) \
250 regs->field = 0; \
251 } while (0)
252 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0);
253 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1);
254 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0);
255 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1);
256 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0);
257 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1);
258 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2);
259 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0);
260 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1);
261 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2);
262 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0);
263 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1);
264 #undef _FETCH_KERN_REG
265 return (0);
266 }
267
268 static void
vcpu_cleanup(struct vcpu * vcpu,bool destroy)269 vcpu_cleanup(struct vcpu *vcpu, bool destroy)
270 {
271 vmmops_vcpu_cleanup(vcpu->cookie);
272 vcpu->cookie = NULL;
273 if (destroy) {
274 vmm_stat_free(vcpu->stats);
275 fpu_save_area_free(vcpu->guestfpu);
276 vcpu_lock_destroy(vcpu);
277 }
278 }
279
280 static struct vcpu *
vcpu_alloc(struct vm * vm,int vcpu_id)281 vcpu_alloc(struct vm *vm, int vcpu_id)
282 {
283 struct vcpu *vcpu;
284
285 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
286 ("vcpu_alloc: invalid vcpu %d", vcpu_id));
287
288 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
289 vcpu_lock_init(vcpu);
290 vcpu->state = VCPU_IDLE;
291 vcpu->hostcpu = NOCPU;
292 vcpu->vcpuid = vcpu_id;
293 vcpu->vm = vm;
294 vcpu->guestfpu = fpu_save_area_alloc();
295 vcpu->stats = vmm_stat_alloc();
296 return (vcpu);
297 }
298
299 static void
vcpu_init(struct vcpu * vcpu)300 vcpu_init(struct vcpu *vcpu)
301 {
302 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
303 MPASS(vcpu->cookie != NULL);
304 fpu_save_area_reset(vcpu->guestfpu);
305 vmm_stat_init(vcpu->stats);
306 }
307
308 struct vm_exit *
vm_exitinfo(struct vcpu * vcpu)309 vm_exitinfo(struct vcpu *vcpu)
310 {
311 return (&vcpu->exitinfo);
312 }
313
314 static int
vmm_unsupported_quirk(void)315 vmm_unsupported_quirk(void)
316 {
317 /*
318 * Known to not load on Ampere eMAG
319 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051
320 */
321 if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM,
322 CPU_PART_EMAG8180, 0, 0))
323 return (ENXIO);
324
325 return (0);
326 }
327
328 static int
vmm_init(void)329 vmm_init(void)
330 {
331 int error;
332
333 vm_maxcpu = mp_ncpus;
334 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
335
336 if (vm_maxcpu > VM_MAXCPU) {
337 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
338 vm_maxcpu = VM_MAXCPU;
339 }
340 if (vm_maxcpu == 0)
341 vm_maxcpu = 1;
342
343 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
344 if (error != 0)
345 return (error);
346
347 return (vmmops_modinit(0));
348 }
349
350 static int
vmm_handler(module_t mod,int what,void * arg)351 vmm_handler(module_t mod, int what, void *arg)
352 {
353 int error;
354
355 switch (what) {
356 case MOD_LOAD:
357 error = vmm_unsupported_quirk();
358 if (error != 0)
359 break;
360 error = vmmdev_init();
361 if (error != 0)
362 break;
363 error = vmm_init();
364 if (error == 0)
365 vmm_initialized = true;
366 else
367 (void)vmmdev_cleanup();
368 break;
369 case MOD_UNLOAD:
370 error = vmmdev_cleanup();
371 if (error == 0 && vmm_initialized) {
372 error = vmmops_modcleanup();
373 if (error) {
374 /*
375 * Something bad happened - prevent new
376 * VMs from being created
377 */
378 vmm_initialized = false;
379 }
380 }
381 break;
382 default:
383 error = 0;
384 break;
385 }
386 return (error);
387 }
388
389 static moduledata_t vmm_kmod = {
390 "vmm",
391 vmm_handler,
392 NULL
393 };
394
395 /*
396 * vmm initialization has the following dependencies:
397 *
398 * - HYP initialization requires smp_rendezvous() and therefore must happen
399 * after SMP is fully functional (after SI_SUB_SMP).
400 * - vmm device initialization requires an initialized devfs.
401 */
402 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
403 MODULE_VERSION(vmm, 1);
404
405 static void
vm_init(struct vm * vm,bool create)406 vm_init(struct vm *vm, bool create)
407 {
408 int i;
409
410 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
411 MPASS(vm->cookie != NULL);
412
413 CPU_ZERO(&vm->active_cpus);
414 CPU_ZERO(&vm->debug_cpus);
415
416 vm->suspend = 0;
417 CPU_ZERO(&vm->suspended_cpus);
418
419 memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
420 memset(vm->special_reg, 0, sizeof(vm->special_reg));
421
422 if (!create) {
423 for (i = 0; i < vm->maxcpus; i++) {
424 if (vm->vcpu[i] != NULL)
425 vcpu_init(vm->vcpu[i]);
426 }
427 }
428 }
429
430 void
vm_disable_vcpu_creation(struct vm * vm)431 vm_disable_vcpu_creation(struct vm *vm)
432 {
433 sx_xlock(&vm->vcpus_init_lock);
434 vm->dying = true;
435 sx_xunlock(&vm->vcpus_init_lock);
436 }
437
438 struct vcpu *
vm_alloc_vcpu(struct vm * vm,int vcpuid)439 vm_alloc_vcpu(struct vm *vm, int vcpuid)
440 {
441 struct vcpu *vcpu;
442
443 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
444 return (NULL);
445
446 /* Some interrupt controllers may have a CPU limit */
447 if (vcpuid >= vgic_max_cpu_count(vm->cookie))
448 return (NULL);
449
450 vcpu = (struct vcpu *)
451 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
452 if (__predict_true(vcpu != NULL))
453 return (vcpu);
454
455 sx_xlock(&vm->vcpus_init_lock);
456 vcpu = vm->vcpu[vcpuid];
457 if (vcpu == NULL && !vm->dying) {
458 vcpu = vcpu_alloc(vm, vcpuid);
459 vcpu_init(vcpu);
460
461 /*
462 * Ensure vCPU is fully created before updating pointer
463 * to permit unlocked reads above.
464 */
465 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
466 (uintptr_t)vcpu);
467 }
468 sx_xunlock(&vm->vcpus_init_lock);
469 return (vcpu);
470 }
471
472 void
vm_slock_vcpus(struct vm * vm)473 vm_slock_vcpus(struct vm *vm)
474 {
475 sx_slock(&vm->vcpus_init_lock);
476 }
477
478 void
vm_unlock_vcpus(struct vm * vm)479 vm_unlock_vcpus(struct vm *vm)
480 {
481 sx_unlock(&vm->vcpus_init_lock);
482 }
483
484 int
vm_create(const char * name,struct vm ** retvm)485 vm_create(const char *name, struct vm **retvm)
486 {
487 struct vm *vm;
488 struct vmspace *vmspace;
489
490 /*
491 * If vmm.ko could not be successfully initialized then don't attempt
492 * to create the virtual machine.
493 */
494 if (!vmm_initialized)
495 return (ENXIO);
496
497 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
498 return (EINVAL);
499
500 vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
501 if (vmspace == NULL)
502 return (ENOMEM);
503
504 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
505 strcpy(vm->name, name);
506 vm->vmspace = vmspace;
507 vm_mem_init(&vm->mem);
508 sx_init(&vm->vcpus_init_lock, "vm vcpus");
509
510 vm->sockets = 1;
511 vm->cores = 1; /* XXX backwards compatibility */
512 vm->threads = 1; /* XXX backwards compatibility */
513 vm->maxcpus = vm_maxcpu;
514
515 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
516 M_WAITOK | M_ZERO);
517
518 vm_init(vm, true);
519
520 *retvm = vm;
521 return (0);
522 }
523
524 void
vm_get_topology(struct vm * vm,uint16_t * sockets,uint16_t * cores,uint16_t * threads,uint16_t * maxcpus)525 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
526 uint16_t *threads, uint16_t *maxcpus)
527 {
528 *sockets = vm->sockets;
529 *cores = vm->cores;
530 *threads = vm->threads;
531 *maxcpus = vm->maxcpus;
532 }
533
534 uint16_t
vm_get_maxcpus(struct vm * vm)535 vm_get_maxcpus(struct vm *vm)
536 {
537 return (vm->maxcpus);
538 }
539
540 int
vm_set_topology(struct vm * vm,uint16_t sockets,uint16_t cores,uint16_t threads,uint16_t maxcpus)541 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
542 uint16_t threads, uint16_t maxcpus)
543 {
544 /* Ignore maxcpus. */
545 if ((sockets * cores * threads) > vm->maxcpus)
546 return (EINVAL);
547 vm->sockets = sockets;
548 vm->cores = cores;
549 vm->threads = threads;
550 return(0);
551 }
552
553 static void
vm_cleanup(struct vm * vm,bool destroy)554 vm_cleanup(struct vm *vm, bool destroy)
555 {
556 pmap_t pmap __diagused;
557 int i;
558
559 if (destroy) {
560 vm_xlock_memsegs(vm);
561 pmap = vmspace_pmap(vm->vmspace);
562 sched_pin();
563 PCPU_SET(curvmpmap, NULL);
564 sched_unpin();
565 CPU_FOREACH(i) {
566 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
567 }
568 } else
569 vm_assert_memseg_xlocked(vm);
570
571
572 vgic_detach_from_vm(vm->cookie);
573
574 for (i = 0; i < vm->maxcpus; i++) {
575 if (vm->vcpu[i] != NULL)
576 vcpu_cleanup(vm->vcpu[i], destroy);
577 }
578
579 vmmops_cleanup(vm->cookie);
580
581 vm_mem_cleanup(vm);
582 if (destroy) {
583 vm_mem_destroy(vm);
584
585 vmmops_vmspace_free(vm->vmspace);
586 vm->vmspace = NULL;
587
588 for (i = 0; i < vm->maxcpus; i++)
589 free(vm->vcpu[i], M_VMM);
590 free(vm->vcpu, M_VMM);
591 sx_destroy(&vm->vcpus_init_lock);
592 }
593 }
594
595 void
vm_destroy(struct vm * vm)596 vm_destroy(struct vm *vm)
597 {
598 vm_cleanup(vm, true);
599 free(vm, M_VMM);
600 }
601
602 int
vm_reinit(struct vm * vm)603 vm_reinit(struct vm *vm)
604 {
605 int error;
606
607 /*
608 * A virtual machine can be reset only if all vcpus are suspended.
609 */
610 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
611 vm_cleanup(vm, false);
612 vm_init(vm, false);
613 error = 0;
614 } else {
615 error = EBUSY;
616 }
617
618 return (error);
619 }
620
621 const char *
vm_name(struct vm * vm)622 vm_name(struct vm *vm)
623 {
624 return (vm->name);
625 }
626
627 int
vm_gla2gpa_nofault(struct vcpu * vcpu,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)628 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
629 uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
630 {
631 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault));
632 }
633
634 static int
vmm_reg_raz(struct vcpu * vcpu,uint64_t * rval,void * arg)635 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg)
636 {
637 *rval = 0;
638 return (0);
639 }
640
641 static int
vmm_reg_read_arg(struct vcpu * vcpu,uint64_t * rval,void * arg)642 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg)
643 {
644 *rval = *(uint64_t *)arg;
645 return (0);
646 }
647
648 static int
vmm_reg_wi(struct vcpu * vcpu,uint64_t wval,void * arg)649 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg)
650 {
651 return (0);
652 }
653
654 static const struct vmm_special_reg vmm_special_regs[] = {
655 #define SPECIAL_REG(_reg, _read, _write) \
656 { \
657 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
658 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
659 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
660 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
661 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
662 .esr_mask = ISS_MSR_REG_MASK, \
663 .reg_read = (_read), \
664 .reg_write = (_write), \
665 .arg = NULL, \
666 }
667 #define ID_SPECIAL_REG(_reg, _name) \
668 { \
669 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
670 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
671 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
672 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
673 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
674 .esr_mask = ISS_MSR_REG_MASK, \
675 .reg_read = vmm_reg_read_arg, \
676 .reg_write = vmm_reg_wi, \
677 .arg = &(vmm_arch_regs._name), \
678 }
679
680 /* ID registers */
681 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
682 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
683 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
684 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
685 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
686
687 /*
688 * All other ID registers are read as zero.
689 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
690 */
691 {
692 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) |
693 (0 << ISS_MSR_OP1_SHIFT) |
694 (0 << ISS_MSR_CRn_SHIFT) |
695 (0 << ISS_MSR_CRm_SHIFT),
696 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK |
697 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT),
698 .reg_read = vmm_reg_raz,
699 .reg_write = vmm_reg_wi,
700 .arg = NULL,
701 },
702
703 /* Counter physical registers */
704 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
705 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
706 vtimer_phys_cval_write),
707 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
708 vtimer_phys_tval_write),
709 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
710 #undef SPECIAL_REG
711 };
712
713 void
vm_register_reg_handler(struct vm * vm,uint64_t iss,uint64_t mask,reg_read_t reg_read,reg_write_t reg_write,void * arg)714 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
715 reg_read_t reg_read, reg_write_t reg_write, void *arg)
716 {
717 int i;
718
719 for (i = 0; i < nitems(vm->special_reg); i++) {
720 if (vm->special_reg[i].esr_iss == 0 &&
721 vm->special_reg[i].esr_mask == 0) {
722 vm->special_reg[i].esr_iss = iss;
723 vm->special_reg[i].esr_mask = mask;
724 vm->special_reg[i].reg_read = reg_read;
725 vm->special_reg[i].reg_write = reg_write;
726 vm->special_reg[i].arg = arg;
727 return;
728 }
729 }
730
731 panic("%s: No free special register slot", __func__);
732 }
733
734 void
vm_deregister_reg_handler(struct vm * vm,uint64_t iss,uint64_t mask)735 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask)
736 {
737 int i;
738
739 for (i = 0; i < nitems(vm->special_reg); i++) {
740 if (vm->special_reg[i].esr_iss == iss &&
741 vm->special_reg[i].esr_mask == mask) {
742 memset(&vm->special_reg[i], 0,
743 sizeof(vm->special_reg[i]));
744 return;
745 }
746 }
747
748 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss,
749 mask);
750 }
751
752 static int
vm_handle_reg_emul(struct vcpu * vcpu,bool * retu)753 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu)
754 {
755 struct vm *vm;
756 struct vm_exit *vme;
757 struct vre *vre;
758 int i, rv;
759
760 vm = vcpu->vm;
761 vme = &vcpu->exitinfo;
762 vre = &vme->u.reg_emul.vre;
763
764 for (i = 0; i < nitems(vm->special_reg); i++) {
765 if (vm->special_reg[i].esr_iss == 0 &&
766 vm->special_reg[i].esr_mask == 0)
767 continue;
768
769 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) ==
770 vm->special_reg[i].esr_iss) {
771 rv = vmm_emulate_register(vcpu, vre,
772 vm->special_reg[i].reg_read,
773 vm->special_reg[i].reg_write,
774 vm->special_reg[i].arg);
775 if (rv == 0) {
776 *retu = false;
777 }
778 return (rv);
779 }
780 }
781 for (i = 0; i < nitems(vmm_special_regs); i++) {
782 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
783 vmm_special_regs[i].esr_iss) {
784 rv = vmm_emulate_register(vcpu, vre,
785 vmm_special_regs[i].reg_read,
786 vmm_special_regs[i].reg_write,
787 vmm_special_regs[i].arg);
788 if (rv == 0) {
789 *retu = false;
790 }
791 return (rv);
792 }
793 }
794
795
796 *retu = true;
797 return (0);
798 }
799
800 void
vm_register_inst_handler(struct vm * vm,uint64_t start,uint64_t size,mem_region_read_t mmio_read,mem_region_write_t mmio_write)801 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
802 mem_region_read_t mmio_read, mem_region_write_t mmio_write)
803 {
804 int i;
805
806 for (i = 0; i < nitems(vm->mmio_region); i++) {
807 if (vm->mmio_region[i].start == 0 &&
808 vm->mmio_region[i].end == 0) {
809 vm->mmio_region[i].start = start;
810 vm->mmio_region[i].end = start + size;
811 vm->mmio_region[i].read = mmio_read;
812 vm->mmio_region[i].write = mmio_write;
813 return;
814 }
815 }
816
817 panic("%s: No free MMIO region", __func__);
818 }
819
820 void
vm_deregister_inst_handler(struct vm * vm,uint64_t start,uint64_t size)821 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
822 {
823 int i;
824
825 for (i = 0; i < nitems(vm->mmio_region); i++) {
826 if (vm->mmio_region[i].start == start &&
827 vm->mmio_region[i].end == start + size) {
828 memset(&vm->mmio_region[i], 0,
829 sizeof(vm->mmio_region[i]));
830 return;
831 }
832 }
833
834 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
835 start + size);
836 }
837
838 static int
vm_handle_inst_emul(struct vcpu * vcpu,bool * retu)839 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
840 {
841 struct vm *vm;
842 struct vm_exit *vme;
843 struct vie *vie;
844 struct hyp *hyp;
845 uint64_t fault_ipa;
846 struct vm_guest_paging *paging;
847 struct vmm_mmio_region *vmr;
848 int error, i;
849
850 vm = vcpu->vm;
851 hyp = vm->cookie;
852 if (!hyp->vgic_attached)
853 goto out_user;
854
855 vme = &vcpu->exitinfo;
856 vie = &vme->u.inst_emul.vie;
857 paging = &vme->u.inst_emul.paging;
858
859 fault_ipa = vme->u.inst_emul.gpa;
860
861 vmr = NULL;
862 for (i = 0; i < nitems(vm->mmio_region); i++) {
863 if (vm->mmio_region[i].start <= fault_ipa &&
864 vm->mmio_region[i].end > fault_ipa) {
865 vmr = &vm->mmio_region[i];
866 break;
867 }
868 }
869 if (vmr == NULL)
870 goto out_user;
871
872 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
873 vmr->read, vmr->write, retu);
874 return (error);
875
876 out_user:
877 *retu = true;
878 return (0);
879 }
880
881 int
vm_suspend(struct vm * vm,enum vm_suspend_how how)882 vm_suspend(struct vm *vm, enum vm_suspend_how how)
883 {
884 int i;
885
886 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
887 return (EINVAL);
888
889 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
890 VM_CTR2(vm, "virtual machine already suspended %d/%d",
891 vm->suspend, how);
892 return (EALREADY);
893 }
894
895 VM_CTR1(vm, "virtual machine successfully suspended %d", how);
896
897 /*
898 * Notify all active vcpus that they are now suspended.
899 */
900 for (i = 0; i < vm->maxcpus; i++) {
901 if (CPU_ISSET(i, &vm->active_cpus))
902 vcpu_notify_event(vm_vcpu(vm, i));
903 }
904
905 return (0);
906 }
907
908 void
vm_exit_suspended(struct vcpu * vcpu,uint64_t pc)909 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
910 {
911 struct vm *vm = vcpu->vm;
912 struct vm_exit *vmexit;
913
914 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
915 ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
916
917 vmexit = vm_exitinfo(vcpu);
918 vmexit->pc = pc;
919 vmexit->inst_length = 4;
920 vmexit->exitcode = VM_EXITCODE_SUSPENDED;
921 vmexit->u.suspended.how = vm->suspend;
922 }
923
924 void
vm_exit_debug(struct vcpu * vcpu,uint64_t pc)925 vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
926 {
927 struct vm_exit *vmexit;
928
929 vmexit = vm_exitinfo(vcpu);
930 vmexit->pc = pc;
931 vmexit->inst_length = 4;
932 vmexit->exitcode = VM_EXITCODE_DEBUG;
933 }
934
935 int
vm_activate_cpu(struct vcpu * vcpu)936 vm_activate_cpu(struct vcpu *vcpu)
937 {
938 struct vm *vm = vcpu->vm;
939
940 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
941 return (EBUSY);
942
943 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
944 return (0);
945
946 }
947
948 int
vm_suspend_cpu(struct vm * vm,struct vcpu * vcpu)949 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
950 {
951 if (vcpu == NULL) {
952 vm->debug_cpus = vm->active_cpus;
953 for (int i = 0; i < vm->maxcpus; i++) {
954 if (CPU_ISSET(i, &vm->active_cpus))
955 vcpu_notify_event(vm_vcpu(vm, i));
956 }
957 } else {
958 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
959 return (EINVAL);
960
961 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
962 vcpu_notify_event(vcpu);
963 }
964 return (0);
965 }
966
967 int
vm_resume_cpu(struct vm * vm,struct vcpu * vcpu)968 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
969 {
970
971 if (vcpu == NULL) {
972 CPU_ZERO(&vm->debug_cpus);
973 } else {
974 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
975 return (EINVAL);
976
977 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
978 }
979 return (0);
980 }
981
982 int
vcpu_debugged(struct vcpu * vcpu)983 vcpu_debugged(struct vcpu *vcpu)
984 {
985
986 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
987 }
988
989 cpuset_t
vm_active_cpus(struct vm * vm)990 vm_active_cpus(struct vm *vm)
991 {
992
993 return (vm->active_cpus);
994 }
995
996 cpuset_t
vm_debug_cpus(struct vm * vm)997 vm_debug_cpus(struct vm *vm)
998 {
999
1000 return (vm->debug_cpus);
1001 }
1002
1003 cpuset_t
vm_suspended_cpus(struct vm * vm)1004 vm_suspended_cpus(struct vm *vm)
1005 {
1006
1007 return (vm->suspended_cpus);
1008 }
1009
1010
1011 void *
vcpu_stats(struct vcpu * vcpu)1012 vcpu_stats(struct vcpu *vcpu)
1013 {
1014
1015 return (vcpu->stats);
1016 }
1017
1018 /*
1019 * This function is called to ensure that a vcpu "sees" a pending event
1020 * as soon as possible:
1021 * - If the vcpu thread is sleeping then it is woken up.
1022 * - If the vcpu is running on a different host_cpu then an IPI will be directed
1023 * to the host_cpu to cause the vcpu to trap into the hypervisor.
1024 */
1025 static void
vcpu_notify_event_locked(struct vcpu * vcpu)1026 vcpu_notify_event_locked(struct vcpu *vcpu)
1027 {
1028 int hostcpu;
1029
1030 hostcpu = vcpu->hostcpu;
1031 if (vcpu->state == VCPU_RUNNING) {
1032 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
1033 if (hostcpu != curcpu) {
1034 ipi_cpu(hostcpu, vmm_ipinum);
1035 } else {
1036 /*
1037 * If the 'vcpu' is running on 'curcpu' then it must
1038 * be sending a notification to itself (e.g. SELF_IPI).
1039 * The pending event will be picked up when the vcpu
1040 * transitions back to guest context.
1041 */
1042 }
1043 } else {
1044 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
1045 "with hostcpu %d", vcpu->state, hostcpu));
1046 if (vcpu->state == VCPU_SLEEPING)
1047 wakeup_one(vcpu);
1048 }
1049 }
1050
1051 void
vcpu_notify_event(struct vcpu * vcpu)1052 vcpu_notify_event(struct vcpu *vcpu)
1053 {
1054 vcpu_lock(vcpu);
1055 vcpu_notify_event_locked(vcpu);
1056 vcpu_unlock(vcpu);
1057 }
1058
1059 struct vmspace *
vm_vmspace(struct vm * vm)1060 vm_vmspace(struct vm *vm)
1061 {
1062 return (vm->vmspace);
1063 }
1064
1065 struct vm_mem *
vm_mem(struct vm * vm)1066 vm_mem(struct vm *vm)
1067 {
1068 return (&vm->mem);
1069 }
1070
1071 static void
restore_guest_fpustate(struct vcpu * vcpu)1072 restore_guest_fpustate(struct vcpu *vcpu)
1073 {
1074
1075 /* flush host state to the pcb */
1076 vfp_save_state(curthread, curthread->td_pcb);
1077 /* Ensure the VFP state will be re-loaded when exiting the guest */
1078 PCPU_SET(fpcurthread, NULL);
1079
1080 /* restore guest FPU state */
1081 vfp_enable();
1082 vfp_restore(vcpu->guestfpu);
1083
1084 /*
1085 * The FPU is now "dirty" with the guest's state so turn on emulation
1086 * to trap any access to the FPU by the host.
1087 */
1088 vfp_disable();
1089 }
1090
1091 static void
save_guest_fpustate(struct vcpu * vcpu)1092 save_guest_fpustate(struct vcpu *vcpu)
1093 {
1094 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) !=
1095 CPACR_FPEN_TRAP_ALL1)
1096 panic("VFP not enabled in host!");
1097
1098 /* save guest FPU state */
1099 vfp_enable();
1100 vfp_store(vcpu->guestfpu);
1101 vfp_disable();
1102
1103 KASSERT(PCPU_GET(fpcurthread) == NULL,
1104 ("%s: fpcurthread set with guest registers", __func__));
1105 }
1106 static int
vcpu_set_state_locked(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)1107 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
1108 bool from_idle)
1109 {
1110 int error;
1111
1112 vcpu_assert_locked(vcpu);
1113
1114 /*
1115 * State transitions from the vmmdev_ioctl() must always begin from
1116 * the VCPU_IDLE state. This guarantees that there is only a single
1117 * ioctl() operating on a vcpu at any point.
1118 */
1119 if (from_idle) {
1120 while (vcpu->state != VCPU_IDLE) {
1121 vcpu_notify_event_locked(vcpu);
1122 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
1123 }
1124 } else {
1125 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
1126 "vcpu idle state"));
1127 }
1128
1129 if (vcpu->state == VCPU_RUNNING) {
1130 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
1131 "mismatch for running vcpu", curcpu, vcpu->hostcpu));
1132 } else {
1133 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
1134 "vcpu that is not running", vcpu->hostcpu));
1135 }
1136
1137 /*
1138 * The following state transitions are allowed:
1139 * IDLE -> FROZEN -> IDLE
1140 * FROZEN -> RUNNING -> FROZEN
1141 * FROZEN -> SLEEPING -> FROZEN
1142 */
1143 switch (vcpu->state) {
1144 case VCPU_IDLE:
1145 case VCPU_RUNNING:
1146 case VCPU_SLEEPING:
1147 error = (newstate != VCPU_FROZEN);
1148 break;
1149 case VCPU_FROZEN:
1150 error = (newstate == VCPU_FROZEN);
1151 break;
1152 default:
1153 error = 1;
1154 break;
1155 }
1156
1157 if (error)
1158 return (EBUSY);
1159
1160 vcpu->state = newstate;
1161 if (newstate == VCPU_RUNNING)
1162 vcpu->hostcpu = curcpu;
1163 else
1164 vcpu->hostcpu = NOCPU;
1165
1166 if (newstate == VCPU_IDLE)
1167 wakeup(&vcpu->state);
1168
1169 return (0);
1170 }
1171
1172 static void
vcpu_require_state(struct vcpu * vcpu,enum vcpu_state newstate)1173 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
1174 {
1175 int error;
1176
1177 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
1178 panic("Error %d setting state to %d\n", error, newstate);
1179 }
1180
1181 static void
vcpu_require_state_locked(struct vcpu * vcpu,enum vcpu_state newstate)1182 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
1183 {
1184 int error;
1185
1186 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
1187 panic("Error %d setting state to %d", error, newstate);
1188 }
1189
1190 int
vm_get_capability(struct vcpu * vcpu,int type,int * retval)1191 vm_get_capability(struct vcpu *vcpu, int type, int *retval)
1192 {
1193 if (type < 0 || type >= VM_CAP_MAX)
1194 return (EINVAL);
1195
1196 return (vmmops_getcap(vcpu->cookie, type, retval));
1197 }
1198
1199 int
vm_set_capability(struct vcpu * vcpu,int type,int val)1200 vm_set_capability(struct vcpu *vcpu, int type, int val)
1201 {
1202 if (type < 0 || type >= VM_CAP_MAX)
1203 return (EINVAL);
1204
1205 return (vmmops_setcap(vcpu->cookie, type, val));
1206 }
1207
1208 struct vm *
vcpu_vm(struct vcpu * vcpu)1209 vcpu_vm(struct vcpu *vcpu)
1210 {
1211 return (vcpu->vm);
1212 }
1213
1214 int
vcpu_vcpuid(struct vcpu * vcpu)1215 vcpu_vcpuid(struct vcpu *vcpu)
1216 {
1217 return (vcpu->vcpuid);
1218 }
1219
1220 void *
vcpu_get_cookie(struct vcpu * vcpu)1221 vcpu_get_cookie(struct vcpu *vcpu)
1222 {
1223 return (vcpu->cookie);
1224 }
1225
1226 struct vcpu *
vm_vcpu(struct vm * vm,int vcpuid)1227 vm_vcpu(struct vm *vm, int vcpuid)
1228 {
1229 return (vm->vcpu[vcpuid]);
1230 }
1231
1232 int
vcpu_set_state(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)1233 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
1234 {
1235 int error;
1236
1237 vcpu_lock(vcpu);
1238 error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1239 vcpu_unlock(vcpu);
1240
1241 return (error);
1242 }
1243
1244 enum vcpu_state
vcpu_get_state(struct vcpu * vcpu,int * hostcpu)1245 vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
1246 {
1247 enum vcpu_state state;
1248
1249 vcpu_lock(vcpu);
1250 state = vcpu->state;
1251 if (hostcpu != NULL)
1252 *hostcpu = vcpu->hostcpu;
1253 vcpu_unlock(vcpu);
1254
1255 return (state);
1256 }
1257
1258 int
vm_get_register(struct vcpu * vcpu,int reg,uint64_t * retval)1259 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
1260 {
1261
1262 if (reg >= VM_REG_LAST)
1263 return (EINVAL);
1264
1265 return (vmmops_getreg(vcpu->cookie, reg, retval));
1266 }
1267
1268 int
vm_set_register(struct vcpu * vcpu,int reg,uint64_t val)1269 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
1270 {
1271 int error;
1272
1273 if (reg >= VM_REG_LAST)
1274 return (EINVAL);
1275 error = vmmops_setreg(vcpu->cookie, reg, val);
1276 if (error || reg != VM_REG_GUEST_PC)
1277 return (error);
1278
1279 vcpu->nextpc = val;
1280
1281 return (0);
1282 }
1283
1284 void *
vm_get_cookie(struct vm * vm)1285 vm_get_cookie(struct vm *vm)
1286 {
1287 return (vm->cookie);
1288 }
1289
1290 int
vm_inject_exception(struct vcpu * vcpu,uint64_t esr,uint64_t far)1291 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far)
1292 {
1293 return (vmmops_exception(vcpu->cookie, esr, far));
1294 }
1295
1296 int
vm_attach_vgic(struct vm * vm,struct vm_vgic_descr * descr)1297 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr)
1298 {
1299 return (vgic_attach_to_vm(vm->cookie, descr));
1300 }
1301
1302 int
vm_assert_irq(struct vm * vm,uint32_t irq)1303 vm_assert_irq(struct vm *vm, uint32_t irq)
1304 {
1305 return (vgic_inject_irq(vm->cookie, -1, irq, true));
1306 }
1307
1308 int
vm_deassert_irq(struct vm * vm,uint32_t irq)1309 vm_deassert_irq(struct vm *vm, uint32_t irq)
1310 {
1311 return (vgic_inject_irq(vm->cookie, -1, irq, false));
1312 }
1313
1314 int
vm_raise_msi(struct vm * vm,uint64_t msg,uint64_t addr,int bus,int slot,int func)1315 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
1316 int func)
1317 {
1318 /* TODO: Should we raise an SError? */
1319 return (vgic_inject_msi(vm->cookie, msg, addr));
1320 }
1321
1322 static int
vm_handle_smccc_call(struct vcpu * vcpu,struct vm_exit * vme,bool * retu)1323 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1324 {
1325 struct hypctx *hypctx;
1326 int i;
1327
1328 hypctx = vcpu_get_cookie(vcpu);
1329
1330 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0)
1331 return (1);
1332
1333 vme->exitcode = VM_EXITCODE_SMCCC;
1334 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0];
1335 for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
1336 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
1337
1338 *retu = true;
1339 return (0);
1340 }
1341
1342 static int
vm_handle_wfi(struct vcpu * vcpu,struct vm_exit * vme,bool * retu)1343 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1344 {
1345 vcpu_lock(vcpu);
1346 while (1) {
1347 if (vgic_has_pending_irq(vcpu->cookie))
1348 break;
1349
1350 if (vcpu_should_yield(vcpu))
1351 break;
1352
1353 vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1354 /*
1355 * XXX msleep_spin() cannot be interrupted by signals so
1356 * wake up periodically to check pending signals.
1357 */
1358 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
1359 vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1360 }
1361 vcpu_unlock(vcpu);
1362
1363 *retu = false;
1364 return (0);
1365 }
1366
1367 static int
vm_handle_paging(struct vcpu * vcpu,bool * retu)1368 vm_handle_paging(struct vcpu *vcpu, bool *retu)
1369 {
1370 struct vm *vm = vcpu->vm;
1371 struct vm_exit *vme;
1372 struct vm_map *map;
1373 uint64_t addr, esr;
1374 pmap_t pmap;
1375 int ftype, rv;
1376
1377 vme = &vcpu->exitinfo;
1378
1379 pmap = vmspace_pmap(vcpu->vm->vmspace);
1380 addr = vme->u.paging.gpa;
1381 esr = vme->u.paging.esr;
1382
1383 /* The page exists, but the page table needs to be updated. */
1384 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
1385 return (0);
1386
1387 switch (ESR_ELx_EXCEPTION(esr)) {
1388 case EXCP_INSN_ABORT_L:
1389 case EXCP_DATA_ABORT_L:
1390 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE;
1391 break;
1392 default:
1393 panic("%s: Invalid exception (esr = %lx)", __func__, esr);
1394 }
1395
1396 map = &vm->vmspace->vm_map;
1397 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
1398 if (rv != KERN_SUCCESS)
1399 return (EFAULT);
1400
1401 return (0);
1402 }
1403
1404 static int
vm_handle_suspend(struct vcpu * vcpu,bool * retu)1405 vm_handle_suspend(struct vcpu *vcpu, bool *retu)
1406 {
1407 struct vm *vm = vcpu->vm;
1408 int error, i;
1409 struct thread *td;
1410
1411 error = 0;
1412 td = curthread;
1413
1414 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus);
1415
1416 /*
1417 * Wait until all 'active_cpus' have suspended themselves.
1418 *
1419 * Since a VM may be suspended at any time including when one or
1420 * more vcpus are doing a rendezvous we need to call the rendezvous
1421 * handler while we are waiting to prevent a deadlock.
1422 */
1423 vcpu_lock(vcpu);
1424 while (error == 0) {
1425 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0)
1426 break;
1427
1428 vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1429 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1430 vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1431 if (td_ast_pending(td, TDA_SUSPEND)) {
1432 vcpu_unlock(vcpu);
1433 error = thread_check_susp(td, false);
1434 vcpu_lock(vcpu);
1435 }
1436 }
1437 vcpu_unlock(vcpu);
1438
1439 /*
1440 * Wakeup the other sleeping vcpus and return to userspace.
1441 */
1442 for (i = 0; i < vm->maxcpus; i++) {
1443 if (CPU_ISSET(i, &vm->suspended_cpus)) {
1444 vcpu_notify_event(vm_vcpu(vm, i));
1445 }
1446 }
1447
1448 *retu = true;
1449 return (error);
1450 }
1451
1452 int
vm_run(struct vcpu * vcpu)1453 vm_run(struct vcpu *vcpu)
1454 {
1455 struct vm *vm = vcpu->vm;
1456 struct vm_eventinfo evinfo;
1457 int error, vcpuid;
1458 struct vm_exit *vme;
1459 bool retu;
1460 pmap_t pmap;
1461
1462 vcpuid = vcpu->vcpuid;
1463
1464 if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1465 return (EINVAL);
1466
1467 if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1468 return (EINVAL);
1469
1470 pmap = vmspace_pmap(vm->vmspace);
1471 vme = &vcpu->exitinfo;
1472 evinfo.rptr = NULL;
1473 evinfo.sptr = &vm->suspend;
1474 evinfo.iptr = NULL;
1475 restart:
1476 critical_enter();
1477
1478 restore_guest_fpustate(vcpu);
1479
1480 vcpu_require_state(vcpu, VCPU_RUNNING);
1481 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
1482 vcpu_require_state(vcpu, VCPU_FROZEN);
1483
1484 save_guest_fpustate(vcpu);
1485
1486 critical_exit();
1487
1488 if (error == 0) {
1489 retu = false;
1490 switch (vme->exitcode) {
1491 case VM_EXITCODE_INST_EMUL:
1492 vcpu->nextpc = vme->pc + vme->inst_length;
1493 error = vm_handle_inst_emul(vcpu, &retu);
1494 break;
1495
1496 case VM_EXITCODE_REG_EMUL:
1497 vcpu->nextpc = vme->pc + vme->inst_length;
1498 error = vm_handle_reg_emul(vcpu, &retu);
1499 break;
1500
1501 case VM_EXITCODE_HVC:
1502 /*
1503 * The HVC instruction saves the address for the
1504 * next instruction as the return address.
1505 */
1506 vcpu->nextpc = vme->pc;
1507 /*
1508 * The PSCI call can change the exit information in the
1509 * case of suspend/reset/poweroff/cpu off/cpu on.
1510 */
1511 error = vm_handle_smccc_call(vcpu, vme, &retu);
1512 break;
1513
1514 case VM_EXITCODE_WFI:
1515 vcpu->nextpc = vme->pc + vme->inst_length;
1516 error = vm_handle_wfi(vcpu, vme, &retu);
1517 break;
1518
1519 case VM_EXITCODE_PAGING:
1520 vcpu->nextpc = vme->pc;
1521 error = vm_handle_paging(vcpu, &retu);
1522 break;
1523
1524 case VM_EXITCODE_SUSPENDED:
1525 vcpu->nextpc = vme->pc;
1526 error = vm_handle_suspend(vcpu, &retu);
1527 break;
1528
1529 default:
1530 /* Handle in userland */
1531 vcpu->nextpc = vme->pc;
1532 retu = true;
1533 break;
1534 }
1535 }
1536
1537 if (error == 0 && retu == false)
1538 goto restart;
1539
1540 return (error);
1541 }
1542