1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/smp.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/mman.h>
36 #include <sys/pcpu.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/vmem.h>
42
43 #include <vm/vm.h>
44 #include <vm/pmap.h>
45 #include <vm/vm_extern.h>
46 #include <vm/vm_map.h>
47 #include <vm/vm_page.h>
48 #include <vm/vm_param.h>
49
50 #include <machine/armreg.h>
51 #include <machine/vm.h>
52 #include <machine/cpufunc.h>
53 #include <machine/cpu.h>
54 #include <machine/machdep.h>
55 #include <machine/vmm.h>
56 #include <machine/vmm_dev.h>
57 #include <machine/atomic.h>
58 #include <machine/hypervisor.h>
59 #include <machine/pmap.h>
60
61 #include <dev/vmm/vmm_mem.h>
62
63 #include "mmu.h"
64 #include "arm64.h"
65 #include "hyp.h"
66 #include "reset.h"
67 #include "io/vgic.h"
68 #include "io/vgic_v3.h"
69 #include "io/vtimer.h"
70 #include "vmm_handlers.h"
71 #include "vmm_stat.h"
72
73 #define HANDLED 1
74 #define UNHANDLED 0
75
76 /* Number of bits in an EL2 virtual address */
77 #define EL2_VIRT_BITS 48
78 CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS);
79
80 /* TODO: Move the host hypctx off the stack */
81 #define VMM_STACK_PAGES 4
82 #define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE)
83
84 static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits;
85
86 /* Register values passed to arm_setup_vectors to set in the hypervisor */
87 struct vmm_init_regs {
88 uint64_t tcr_el2;
89 uint64_t vtcr_el2;
90 };
91
92 MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
93
94 extern char hyp_init_vectors[];
95 extern char hyp_vectors[];
96 extern char hyp_stub_vectors[];
97
98 static vm_paddr_t hyp_code_base;
99 static size_t hyp_code_len;
100
101 static char *stack[MAXCPU];
102 static vm_offset_t stack_hyp_va[MAXCPU];
103
104 static vmem_t *el2_mem_alloc;
105
106 static void arm_setup_vectors(void *arg);
107
108 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
109
110 static inline void
arm64_set_active_vcpu(struct hypctx * hypctx)111 arm64_set_active_vcpu(struct hypctx *hypctx)
112 {
113 DPCPU_SET(vcpu, hypctx);
114 }
115
116 struct hypctx *
arm64_get_active_vcpu(void)117 arm64_get_active_vcpu(void)
118 {
119 return (DPCPU_GET(vcpu));
120 }
121
122 static void
arm_setup_vectors(void * arg)123 arm_setup_vectors(void *arg)
124 {
125 struct vmm_init_regs *el2_regs;
126 uintptr_t stack_top;
127 uint32_t sctlr_el2;
128 register_t daif;
129
130 el2_regs = arg;
131 arm64_set_active_vcpu(NULL);
132
133 /*
134 * Configure the system control register for EL2:
135 *
136 * SCTLR_EL2_M: MMU on
137 * SCTLR_EL2_C: Data cacheability not affected
138 * SCTLR_EL2_I: Instruction cacheability not affected
139 * SCTLR_EL2_A: Instruction alignment check
140 * SCTLR_EL2_SA: Stack pointer alignment check
141 * SCTLR_EL2_WXN: Treat writable memory as execute never
142 * ~SCTLR_EL2_EE: Data accesses are little-endian
143 */
144 sctlr_el2 = SCTLR_EL2_RES1;
145 sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I;
146 sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA;
147 sctlr_el2 |= SCTLR_EL2_WXN;
148 sctlr_el2 &= ~SCTLR_EL2_EE;
149
150 daif = intr_disable();
151
152 if (in_vhe()) {
153 WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2);
154 } else {
155 /*
156 * Install the temporary vectors which will be responsible for
157 * initializing the VMM when we next trap into EL2.
158 *
159 * x0: the exception vector table responsible for hypervisor
160 * initialization on the next call.
161 */
162 vmm_call_hyp(vtophys(&vmm_hyp_code));
163
164 /* Create and map the hypervisor stack */
165 stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
166
167 /* Special call to initialize EL2 */
168 vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
169 sctlr_el2, el2_regs->vtcr_el2);
170 }
171
172 intr_restore(daif);
173 }
174
175 static void
arm_teardown_vectors(void * arg)176 arm_teardown_vectors(void *arg)
177 {
178 register_t daif;
179
180 /*
181 * vmm_cleanup() will disable the MMU. For the next few instructions,
182 * before the hardware disables the MMU, one of the following is
183 * possible:
184 *
185 * a. The instruction addresses are fetched with the MMU disabled,
186 * and they must represent the actual physical addresses. This will work
187 * because we call the vmm_cleanup() function by its physical address.
188 *
189 * b. The instruction addresses are fetched using the old translation
190 * tables. This will work because we have an identity mapping in place
191 * in the translation tables and vmm_cleanup() is called by its physical
192 * address.
193 */
194 daif = intr_disable();
195 /* TODO: Invalidate the cache */
196 vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors));
197 intr_restore(daif);
198
199 arm64_set_active_vcpu(NULL);
200 }
201
202 static uint64_t
vmm_vtcr_el2_sl(u_int levels)203 vmm_vtcr_el2_sl(u_int levels)
204 {
205 #if PAGE_SIZE == PAGE_SIZE_4K
206 switch (levels) {
207 case 2:
208 return (VTCR_EL2_SL0_4K_LVL2);
209 case 3:
210 return (VTCR_EL2_SL0_4K_LVL1);
211 case 4:
212 return (VTCR_EL2_SL0_4K_LVL0);
213 default:
214 panic("%s: Invalid number of page table levels %u", __func__,
215 levels);
216 }
217 #elif PAGE_SIZE == PAGE_SIZE_16K
218 switch (levels) {
219 case 2:
220 return (VTCR_EL2_SL0_16K_LVL2);
221 case 3:
222 return (VTCR_EL2_SL0_16K_LVL1);
223 case 4:
224 return (VTCR_EL2_SL0_16K_LVL0);
225 default:
226 panic("%s: Invalid number of page table levels %u", __func__,
227 levels);
228 }
229 #else
230 #error Unsupported page size
231 #endif
232 }
233
234 int
vmmops_modinit(int ipinum)235 vmmops_modinit(int ipinum)
236 {
237 struct vmm_init_regs el2_regs;
238 vm_offset_t next_hyp_va;
239 vm_paddr_t vmm_base;
240 uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
241 int cpu, i;
242 bool rv __diagused;
243
244 if (!has_hyp()) {
245 printf(
246 "vmm: Processor doesn't have support for virtualization\n");
247 return (ENXIO);
248 }
249
250 if (!vgic_present()) {
251 printf("vmm: No vgic found\n");
252 return (ENODEV);
253 }
254
255 if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) {
256 printf("vmm: Unable to read ID_AA64MMFR0_EL1\n");
257 return (ENXIO);
258 }
259 pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
260 /*
261 * Use 3 levels to give us up to 39 bits with 4k pages, or
262 * 47 bits with 16k pages.
263 */
264 /* TODO: Check the number of levels for 64k pages */
265 vmm_pmap_levels = 3;
266 switch (pa_range_field) {
267 case ID_AA64MMFR0_PARange_4G:
268 printf("vmm: Not enough physical address bits\n");
269 return (ENXIO);
270 case ID_AA64MMFR0_PARange_64G:
271 vmm_virt_bits = 36;
272 #if PAGE_SIZE == PAGE_SIZE_16K
273 vmm_pmap_levels = 2;
274 #endif
275 break;
276 default:
277 vmm_virt_bits = 39;
278 break;
279 }
280 pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
281
282 if (!in_vhe()) {
283 /* Initialise the EL2 MMU */
284 if (!vmmpmap_init()) {
285 printf("vmm: Failed to init the EL2 MMU\n");
286 return (ENOMEM);
287 }
288 }
289
290 /* Set up the stage 2 pmap callbacks */
291 MPASS(pmap_clean_stage2_tlbi == NULL);
292 pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi;
293 pmap_stage2_invalidate_range = vmm_s2_tlbi_range;
294 pmap_stage2_invalidate_all = vmm_s2_tlbi_all;
295
296 if (!in_vhe()) {
297 /*
298 * Create an allocator for the virtual address space used by
299 * EL2. EL2 code is identity-mapped; the allocator is used to
300 * find space for VM structures.
301 */
302 el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0,
303 M_WAITOK);
304
305 /* Create the mappings for the hypervisor translation table. */
306 hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code);
307
308 /* We need an physical identity mapping for when we activate the MMU */
309 hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
310 rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base,
311 VM_PROT_READ | VM_PROT_EXECUTE);
312 MPASS(rv);
313
314 next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE);
315
316 /* Create a per-CPU hypervisor stack */
317 CPU_FOREACH(cpu) {
318 stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO);
319 stack_hyp_va[cpu] = next_hyp_va;
320
321 for (i = 0; i < VMM_STACK_PAGES; i++) {
322 rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i),
323 PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)),
324 VM_PROT_READ | VM_PROT_WRITE);
325 MPASS(rv);
326 }
327 next_hyp_va += L2_SIZE;
328 }
329
330 el2_regs.tcr_el2 = TCR_EL2_RES1;
331 el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT,
332 TCR_EL2_PS_52BITS);
333 el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
334 el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA;
335 #if PAGE_SIZE == PAGE_SIZE_4K
336 el2_regs.tcr_el2 |= TCR_EL2_TG0_4K;
337 #elif PAGE_SIZE == PAGE_SIZE_16K
338 el2_regs.tcr_el2 |= TCR_EL2_TG0_16K;
339 #else
340 #error Unsupported page size
341 #endif
342 #ifdef SMP
343 el2_regs.tcr_el2 |= TCR_EL2_SH0_IS;
344 #endif
345 }
346
347 switch (pa_range_bits << TCR_EL2_PS_SHIFT) {
348 case TCR_EL2_PS_32BITS:
349 vmm_max_ipa_bits = 32;
350 break;
351 case TCR_EL2_PS_36BITS:
352 vmm_max_ipa_bits = 36;
353 break;
354 case TCR_EL2_PS_40BITS:
355 vmm_max_ipa_bits = 40;
356 break;
357 case TCR_EL2_PS_42BITS:
358 vmm_max_ipa_bits = 42;
359 break;
360 case TCR_EL2_PS_44BITS:
361 vmm_max_ipa_bits = 44;
362 break;
363 case TCR_EL2_PS_48BITS:
364 vmm_max_ipa_bits = 48;
365 break;
366 case TCR_EL2_PS_52BITS:
367 default:
368 vmm_max_ipa_bits = 52;
369 break;
370 }
371
372 /*
373 * Configure the Stage 2 translation control register:
374 *
375 * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
376 * normal memory
377 * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
378 * normal memory
379 * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel
380 * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
381 * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
382 * shareable
383 */
384 el2_regs.vtcr_el2 = VTCR_EL2_RES1;
385 el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
386 el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
387 el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels);
388 #if PAGE_SIZE == PAGE_SIZE_4K
389 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K;
390 #elif PAGE_SIZE == PAGE_SIZE_16K
391 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K;
392 #else
393 #error Unsupported page size
394 #endif
395 #ifdef SMP
396 el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS;
397 #endif
398 /*
399 * If FEAT_LPA2 is enabled in the host then we need to enable it here
400 * so the page tables created by pmap.c are correct. The meaning of
401 * the shareability field changes to become address bits when this
402 * is set.
403 */
404 if ((READ_SPECIALREG(tcr_el1) & TCR_DS) != 0) {
405 el2_regs.vtcr_el2 |= VTCR_EL2_DS;
406 el2_regs.vtcr_el2 |=
407 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_52BIT);
408 } else {
409 el2_regs.vtcr_el2 |=
410 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
411 }
412
413 smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
414
415 if (!in_vhe()) {
416 /* Add memory to the vmem allocator (checking there is space) */
417 if (vmm_base > (L2_SIZE + PAGE_SIZE)) {
418 /*
419 * Ensure there is an L2 block before the vmm code to check
420 * for buffer overflows on earlier data. Include the PAGE_SIZE
421 * of the minimum we can allocate.
422 */
423 vmm_base -= L2_SIZE + PAGE_SIZE;
424 vmm_base = rounddown2(vmm_base, L2_SIZE);
425
426 /*
427 * Check there is memory before the vmm code to add.
428 *
429 * Reserve the L2 block at address 0 so NULL dereference will
430 * raise an exception.
431 */
432 if (vmm_base > L2_SIZE)
433 vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE,
434 M_WAITOK);
435 }
436
437 /*
438 * Add the memory after the stacks. There is most of an L2 block
439 * between the last stack and the first allocation so this should
440 * be safe without adding more padding.
441 */
442 if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
443 vmem_add(el2_mem_alloc, next_hyp_va,
444 HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
445 }
446
447 vgic_init();
448 vtimer_init();
449
450 return (0);
451 }
452
453 int
vmmops_modcleanup(void)454 vmmops_modcleanup(void)
455 {
456 int cpu;
457
458 if (!in_vhe()) {
459 smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
460
461 CPU_FOREACH(cpu) {
462 vmmpmap_remove(stack_hyp_va[cpu],
463 VMM_STACK_PAGES * PAGE_SIZE, false);
464 }
465
466 vmmpmap_remove(hyp_code_base, hyp_code_len, false);
467 }
468
469 vtimer_cleanup();
470
471 if (!in_vhe()) {
472 vmmpmap_fini();
473
474 CPU_FOREACH(cpu)
475 free(stack[cpu], M_HYP);
476 }
477
478 pmap_clean_stage2_tlbi = NULL;
479 pmap_stage2_invalidate_range = NULL;
480 pmap_stage2_invalidate_all = NULL;
481
482 return (0);
483 }
484
485 static vm_size_t
el2_hyp_size(struct vm * vm)486 el2_hyp_size(struct vm *vm)
487 {
488 return (round_page(sizeof(struct hyp) +
489 sizeof(struct hypctx *) * vm_get_maxcpus(vm)));
490 }
491
492 static vm_size_t
el2_hypctx_size(void)493 el2_hypctx_size(void)
494 {
495 return (round_page(sizeof(struct hypctx)));
496 }
497
498 static vm_offset_t
el2_map_enter(vm_offset_t data,vm_size_t size,vm_prot_t prot)499 el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot)
500 {
501 vmem_addr_t addr;
502 int err __diagused;
503 bool rv __diagused;
504
505 err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr);
506 MPASS(err == 0);
507 rv = vmmpmap_enter(addr, size, vtophys(data), prot);
508 MPASS(rv);
509
510 return (addr);
511 }
512
513 void *
vmmops_init(struct vm * vm,pmap_t pmap)514 vmmops_init(struct vm *vm, pmap_t pmap)
515 {
516 struct hyp *hyp;
517 vm_size_t size;
518 uint64_t idreg;
519
520 size = el2_hyp_size(vm);
521 hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
522
523 hyp->vm = vm;
524 hyp->vgic_attached = false;
525
526 if (get_kernel_reg(ID_AA64MMFR0_EL1, &idreg)) {
527 if (ID_AA64MMFR0_ECV_VAL(idreg) >= ID_AA64MMFR0_ECV_POFF)
528 hyp->feats |= HYP_FEAT_ECV_POFF;
529 }
530
531 if (get_kernel_reg(ID_AA64MMFR1_EL1, &idreg)) {
532 if (ID_AA64MMFR1_HCX_VAL(idreg) >= ID_AA64MMFR1_HCX_IMPL)
533 hyp->feats |= HYP_FEAT_HCX;
534 }
535
536 vtimer_vminit(hyp);
537 vgic_vminit(hyp);
538
539 if (!in_vhe())
540 hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size,
541 VM_PROT_READ | VM_PROT_WRITE);
542
543 return (hyp);
544 }
545
546 void *
vmmops_vcpu_init(void * vmi,struct vcpu * vcpu1,int vcpuid)547 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
548 {
549 struct hyp *hyp = vmi;
550 struct hypctx *hypctx;
551 vm_size_t size;
552
553 size = el2_hypctx_size();
554 hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
555
556 KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
557 ("%s: Invalid vcpuid %d", __func__, vcpuid));
558 hyp->ctx[vcpuid] = hypctx;
559
560 hypctx->hyp = hyp;
561 hypctx->vcpu = vcpu1;
562
563 reset_vm_el01_regs(hypctx);
564 reset_vm_el2_regs(hypctx);
565
566 vtimer_cpuinit(hypctx);
567 vgic_cpuinit(hypctx);
568
569 if (!in_vhe())
570 hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size,
571 VM_PROT_READ | VM_PROT_WRITE);
572
573 return (hypctx);
574 }
575
576 static int
arm_vmm_pinit(pmap_t pmap)577 arm_vmm_pinit(pmap_t pmap)
578 {
579
580 pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels);
581 return (1);
582 }
583
584 struct vmspace *
vmmops_vmspace_alloc(vm_offset_t min,vm_offset_t max)585 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
586 {
587 return (vmspace_alloc(min, max, arm_vmm_pinit));
588 }
589
590 void
vmmops_vmspace_free(struct vmspace * vmspace)591 vmmops_vmspace_free(struct vmspace *vmspace)
592 {
593
594 pmap_remove_pages(vmspace_pmap(vmspace));
595 vmspace_free(vmspace);
596 }
597
598 static inline void
arm64_print_hyp_regs(struct vm_exit * vme)599 arm64_print_hyp_regs(struct vm_exit *vme)
600 {
601 printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2);
602 printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
603 printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
604 printf("elr_el2: 0x%016lx\n", vme->pc);
605 }
606
607 static void
arm64_gen_inst_emul_data(struct hypctx * hypctx,uint32_t esr_iss,struct vm_exit * vme_ret)608 arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss,
609 struct vm_exit *vme_ret)
610 {
611 struct vm_guest_paging *paging;
612 struct vie *vie;
613 uint32_t esr_sas, reg_num;
614
615 /*
616 * Get the page address from HPFAR_EL2.
617 */
618 vme_ret->u.inst_emul.gpa =
619 HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
620 /* Bits [11:0] are the same as bits [11:0] from the virtual address. */
621 vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 &
622 FAR_EL2_HPFAR_PAGE_MASK;
623
624 esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
625 reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
626
627 vie = &vme_ret->u.inst_emul.vie;
628 vie->access_size = 1 << esr_sas;
629 vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
630 vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
631 vie->reg = reg_num;
632
633 paging = &vme_ret->u.inst_emul.paging;
634 paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
635 paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
636 paging->tcr_el1 = hypctx->tcr_el1;
637 paging->tcr2_el1 = hypctx->tcr2_el1;
638 paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
639 if ((hypctx->sctlr_el1 & SCTLR_M) != 0)
640 paging->flags |= VM_GP_MMU_ENABLED;
641 }
642
643 static void
arm64_gen_reg_emul_data(uint32_t esr_iss,struct vm_exit * vme_ret)644 arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
645 {
646 uint32_t reg_num;
647 struct vre *vre;
648
649 /* u.hyp member will be replaced by u.reg_emul */
650 vre = &vme_ret->u.reg_emul.vre;
651
652 vre->inst_syndrome = esr_iss;
653 /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
654 vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
655 reg_num = ISS_MSR_Rt(esr_iss);
656 vre->reg = reg_num;
657 }
658
659 void
raise_data_insn_abort(struct hypctx * hypctx,uint64_t far,bool dabort,int fsc)660 raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc)
661 {
662 uint64_t esr;
663
664 if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t)
665 esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT;
666 else
667 esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT;
668 /* Set the bit that changes from insn -> data abort */
669 if (dabort)
670 esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT;
671 /* Set the IL bit if set by hardware */
672 esr |= hypctx->tf.tf_esr & ESR_ELx_IL;
673
674 vmmops_exception(hypctx, esr | fsc, far);
675 }
676
677 static int
handle_el1_sync_excp(struct hypctx * hypctx,struct vm_exit * vme_ret,pmap_t pmap)678 handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret,
679 pmap_t pmap)
680 {
681 uint64_t gpa;
682 uint32_t esr_ec, esr_iss;
683
684 esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr);
685 esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK;
686
687 switch (esr_ec) {
688 case EXCP_UNKNOWN:
689 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1);
690 arm64_print_hyp_regs(vme_ret);
691 vme_ret->exitcode = VM_EXITCODE_HYP;
692 break;
693 case EXCP_TRAP_WFI_WFE:
694 if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */
695 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1);
696 vme_ret->exitcode = VM_EXITCODE_WFI;
697 } else {
698 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1);
699 vme_ret->exitcode = VM_EXITCODE_HYP;
700 }
701 break;
702 case EXCP_HVC:
703 vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1);
704 vme_ret->exitcode = VM_EXITCODE_HVC;
705 break;
706 case EXCP_MSR:
707 vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1);
708 arm64_gen_reg_emul_data(esr_iss, vme_ret);
709 vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
710 break;
711 case EXCP_BRK:
712 vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1);
713 vme_ret->exitcode = VM_EXITCODE_BRK;
714 break;
715 case EXCP_SOFTSTP_EL0:
716 vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1);
717 vme_ret->exitcode = VM_EXITCODE_SS;
718 break;
719 case EXCP_INSN_ABORT_L:
720 case EXCP_DATA_ABORT_L:
721 vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ?
722 VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1);
723 switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
724 case ISS_DATA_DFSC_TF_L0:
725 case ISS_DATA_DFSC_TF_L1:
726 case ISS_DATA_DFSC_TF_L2:
727 case ISS_DATA_DFSC_TF_L3:
728 case ISS_DATA_DFSC_AFF_L1:
729 case ISS_DATA_DFSC_AFF_L2:
730 case ISS_DATA_DFSC_AFF_L3:
731 case ISS_DATA_DFSC_PF_L1:
732 case ISS_DATA_DFSC_PF_L2:
733 case ISS_DATA_DFSC_PF_L3:
734 gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
735 /* Check the IPA is valid */
736 if (gpa >= (1ul << vmm_max_ipa_bits)) {
737 raise_data_insn_abort(hypctx,
738 hypctx->exit_info.far_el2,
739 esr_ec == EXCP_DATA_ABORT_L,
740 ISS_DATA_DFSC_ASF_L0);
741 vme_ret->inst_length = 0;
742 return (HANDLED);
743 }
744
745 if (vm_mem_allocated(hypctx->vcpu, gpa)) {
746 vme_ret->exitcode = VM_EXITCODE_PAGING;
747 vme_ret->inst_length = 0;
748 vme_ret->u.paging.esr = hypctx->tf.tf_esr;
749 vme_ret->u.paging.gpa = gpa;
750 } else if (esr_ec == EXCP_INSN_ABORT_L) {
751 /*
752 * Raise an external abort. Device memory is
753 * not executable
754 */
755 raise_data_insn_abort(hypctx,
756 hypctx->exit_info.far_el2, false,
757 ISS_DATA_DFSC_EXT);
758 vme_ret->inst_length = 0;
759 return (HANDLED);
760 } else {
761 arm64_gen_inst_emul_data(hypctx, esr_iss,
762 vme_ret);
763 vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
764 }
765 break;
766 default:
767 arm64_print_hyp_regs(vme_ret);
768 vme_ret->exitcode = VM_EXITCODE_HYP;
769 break;
770 }
771
772 break;
773
774 default:
775 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1);
776 arm64_print_hyp_regs(vme_ret);
777 vme_ret->exitcode = VM_EXITCODE_HYP;
778 break;
779 }
780
781 /* We don't don't do any instruction emulation here */
782 return (UNHANDLED);
783 }
784
785 static int
arm64_handle_world_switch(struct hypctx * hypctx,int excp_type,struct vm_exit * vme,pmap_t pmap)786 arm64_handle_world_switch(struct hypctx *hypctx, int excp_type,
787 struct vm_exit *vme, pmap_t pmap)
788 {
789 int handled;
790
791 switch (excp_type) {
792 case EXCP_TYPE_EL1_SYNC:
793 /* The exit code will be set by handle_el1_sync_excp(). */
794 handled = handle_el1_sync_excp(hypctx, vme, pmap);
795 break;
796
797 case EXCP_TYPE_EL1_IRQ:
798 case EXCP_TYPE_EL1_FIQ:
799 /* The host kernel will handle IRQs and FIQs. */
800 vmm_stat_incr(hypctx->vcpu,
801 excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1);
802 vme->exitcode = VM_EXITCODE_BOGUS;
803 handled = UNHANDLED;
804 break;
805
806 case EXCP_TYPE_EL1_ERROR:
807 case EXCP_TYPE_EL2_SYNC:
808 case EXCP_TYPE_EL2_IRQ:
809 case EXCP_TYPE_EL2_FIQ:
810 case EXCP_TYPE_EL2_ERROR:
811 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1);
812 vme->exitcode = VM_EXITCODE_BOGUS;
813 handled = UNHANDLED;
814 break;
815
816 default:
817 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
818 vme->exitcode = VM_EXITCODE_BOGUS;
819 handled = UNHANDLED;
820 break;
821 }
822
823 return (handled);
824 }
825
826 static void
ptp_release(void ** cookie)827 ptp_release(void **cookie)
828 {
829 if (*cookie != NULL) {
830 vm_gpa_release(*cookie);
831 *cookie = NULL;
832 }
833 }
834
835 static void *
ptp_hold(struct vcpu * vcpu,vm_paddr_t ptpphys,size_t len,void ** cookie)836 ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
837 {
838 void *ptr;
839
840 ptp_release(cookie);
841 ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie);
842 return (ptr);
843 }
844
845 /* log2 of the number of bytes in a page table entry */
846 #define PTE_SHIFT 3
847 int
vmmops_gla2gpa(void * vcpui,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)848 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
849 int prot, uint64_t *gpa, int *is_fault)
850 {
851 struct hypctx *hypctx;
852 void *cookie;
853 uint64_t mask, *ptep, pte, pte_addr;
854 int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz;
855 bool is_el0;
856
857 /* Check if the MMU is off */
858 if ((paging->flags & VM_GP_MMU_ENABLED) == 0) {
859 *is_fault = 0;
860 *gpa = gla;
861 return (0);
862 }
863
864 is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t;
865
866 if (ADDR_IS_KERNEL(gla)) {
867 /* If address translation is disabled raise an exception */
868 if ((paging->tcr_el1 & TCR_EPD1) != 0) {
869 *is_fault = 1;
870 return (0);
871 }
872 if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) {
873 *is_fault = 1;
874 return (0);
875 }
876 pte_addr = paging->ttbr1_addr;
877 tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT;
878 /* Clear the top byte if TBI is on */
879 if ((paging->tcr_el1 & TCR_TBI1) != 0)
880 gla |= (0xfful << 56);
881 switch (paging->tcr_el1 & TCR_TG1_MASK) {
882 case TCR_TG1_4K:
883 granule_shift = PAGE_SHIFT_4K;
884 break;
885 case TCR_TG1_16K:
886 granule_shift = PAGE_SHIFT_16K;
887 break;
888 case TCR_TG1_64K:
889 granule_shift = PAGE_SHIFT_64K;
890 break;
891 default:
892 *is_fault = 1;
893 return (EINVAL);
894 }
895 } else {
896 /* If address translation is disabled raise an exception */
897 if ((paging->tcr_el1 & TCR_EPD0) != 0) {
898 *is_fault = 1;
899 return (0);
900 }
901 if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) {
902 *is_fault = 1;
903 return (0);
904 }
905 pte_addr = paging->ttbr0_addr;
906 tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT;
907 /* Clear the top byte if TBI is on */
908 if ((paging->tcr_el1 & TCR_TBI0) != 0)
909 gla &= ~(0xfful << 56);
910 switch (paging->tcr_el1 & TCR_TG0_MASK) {
911 case TCR_TG0_4K:
912 granule_shift = PAGE_SHIFT_4K;
913 break;
914 case TCR_TG0_16K:
915 granule_shift = PAGE_SHIFT_16K;
916 break;
917 case TCR_TG0_64K:
918 granule_shift = PAGE_SHIFT_64K;
919 break;
920 default:
921 *is_fault = 1;
922 return (EINVAL);
923 }
924 }
925
926 /*
927 * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2
928 * for larger values.
929 */
930 switch (granule_shift) {
931 case PAGE_SHIFT_4K:
932 case PAGE_SHIFT_16K:
933 /*
934 * See "Table D8-11 4KB granule, determining stage 1 initial
935 * lookup level" and "Table D8-21 16KB granule, determining
936 * stage 1 initial lookup level" from the "Arm Architecture
937 * Reference Manual for A-Profile architecture" revision I.a
938 * for the minimum and maximum values.
939 *
940 * TODO: Support less than 16 when FEAT_LPA2 is implemented
941 * and TCR_EL1.DS == 1
942 * TODO: Support more than 39 when FEAT_TTST is implemented
943 */
944 if (tsz < 16 || tsz > 39) {
945 *is_fault = 1;
946 return (EINVAL);
947 }
948 break;
949 case PAGE_SHIFT_64K:
950 /* TODO: Support 64k granule. It will probably work, but is untested */
951 default:
952 *is_fault = 1;
953 return (EINVAL);
954 }
955
956 /*
957 * Calculate the input address bits. These are 64 bit in an address
958 * with the top tsz bits being all 0 or all 1.
959 */
960 ia_bits = 64 - tsz;
961
962 /*
963 * Calculate the number of address bits used in the page table
964 * calculation. This is ia_bits minus the bottom granule_shift
965 * bits that are passed to the output address.
966 */
967 address_bits = ia_bits - granule_shift;
968
969 /*
970 * Calculate the number of levels. Each level uses
971 * granule_shift - PTE_SHIFT bits of the input address.
972 * This is because the table is 1 << granule_shift and each
973 * entry is 1 << PTE_SHIFT bytes.
974 */
975 levels = howmany(address_bits, granule_shift - PTE_SHIFT);
976
977 /* Mask of the upper unused bits in the virtual address */
978 gla &= (1ul << ia_bits) - 1;
979 hypctx = (struct hypctx *)vcpui;
980 cookie = NULL;
981 /* TODO: Check if the level supports block descriptors */
982 for (;levels > 0; levels--) {
983 int idx;
984
985 pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) +
986 granule_shift;
987 idx = (gla >> pte_shift) &
988 ((1ul << (granule_shift - PTE_SHIFT)) - 1);
989 while (idx > PAGE_SIZE / sizeof(pte)) {
990 idx -= PAGE_SIZE / sizeof(pte);
991 pte_addr += PAGE_SIZE;
992 }
993
994 ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie);
995 if (ptep == NULL)
996 goto error;
997 pte = ptep[idx];
998
999 /* Calculate the level we are looking at */
1000 switch (levels) {
1001 default:
1002 goto fault;
1003 /* TODO: Level -1 when FEAT_LPA2 is implemented */
1004 case 4: /* Level 0 */
1005 if ((pte & ATTR_DESCR_MASK) != L0_TABLE)
1006 goto fault;
1007 /* FALLTHROUGH */
1008 case 3: /* Level 1 */
1009 case 2: /* Level 2 */
1010 switch (pte & ATTR_DESCR_MASK) {
1011 /* Use L1 macro as all levels are the same */
1012 case L1_TABLE:
1013 /* Check if EL0 can access this address space */
1014 if (is_el0 &&
1015 (pte & TATTR_AP_TABLE_NO_EL0) != 0)
1016 goto fault;
1017 /* Check if the address space is writable */
1018 if ((prot & PROT_WRITE) != 0 &&
1019 (pte & TATTR_AP_TABLE_RO) != 0)
1020 goto fault;
1021 if ((prot & PROT_EXEC) != 0) {
1022 /* Check the table exec attribute */
1023 if ((is_el0 &&
1024 (pte & TATTR_UXN_TABLE) != 0) ||
1025 (!is_el0 &&
1026 (pte & TATTR_PXN_TABLE) != 0))
1027 goto fault;
1028 }
1029 pte_addr = pte & ~ATTR_MASK;
1030 break;
1031 case L1_BLOCK:
1032 goto done;
1033 default:
1034 goto fault;
1035 }
1036 break;
1037 case 1: /* Level 3 */
1038 if ((pte & ATTR_DESCR_MASK) == L3_PAGE)
1039 goto done;
1040 goto fault;
1041 }
1042 }
1043
1044 done:
1045 /* Check if EL0 has access to the block/page */
1046 if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0)
1047 goto fault;
1048 if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0)
1049 goto fault;
1050 if ((prot & PROT_EXEC) != 0) {
1051 if ((is_el0 && (pte & ATTR_S1_UXN) != 0) ||
1052 (!is_el0 && (pte & ATTR_S1_PXN) != 0))
1053 goto fault;
1054 }
1055 mask = (1ul << pte_shift) - 1;
1056 *gpa = (pte & ~ATTR_MASK) | (gla & mask);
1057 *is_fault = 0;
1058 ptp_release(&cookie);
1059 return (0);
1060
1061 error:
1062 ptp_release(&cookie);
1063 return (EFAULT);
1064 fault:
1065 *is_fault = 1;
1066 ptp_release(&cookie);
1067 return (0);
1068 }
1069
1070 int
vmmops_run(void * vcpui,register_t pc,pmap_t pmap,struct vm_eventinfo * evinfo)1071 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
1072 {
1073 uint64_t excp_type;
1074 int handled;
1075 register_t daif;
1076 struct hyp *hyp;
1077 struct hypctx *hypctx;
1078 struct vcpu *vcpu;
1079 struct vm_exit *vme;
1080 int mode;
1081
1082 hypctx = (struct hypctx *)vcpui;
1083 hyp = hypctx->hyp;
1084 vcpu = hypctx->vcpu;
1085 vme = vm_exitinfo(vcpu);
1086
1087 hypctx->tf.tf_elr = (uint64_t)pc;
1088
1089 for (;;) {
1090 if (hypctx->has_exception) {
1091 hypctx->has_exception = false;
1092 hypctx->elr_el1 = hypctx->tf.tf_elr;
1093
1094 mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
1095
1096 if (mode == PSR_M_EL1t) {
1097 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0;
1098 } else if (mode == PSR_M_EL1h) {
1099 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200;
1100 } else if ((mode & PSR_M_32) == PSR_M_64) {
1101 /* 64-bit EL0 */
1102 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400;
1103 } else {
1104 /* 32-bit EL0 */
1105 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600;
1106 }
1107
1108 /* Set the new spsr */
1109 hypctx->spsr_el1 = hypctx->tf.tf_spsr;
1110
1111 /* Set the new cpsr */
1112 hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS;
1113 hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h;
1114
1115 /*
1116 * Update fields that may change on exeption entry
1117 * based on how sctlr_el1 is configured.
1118 */
1119 if ((hypctx->sctlr_el1 & SCTLR_SPAN) == 0)
1120 hypctx->tf.tf_spsr |= PSR_PAN;
1121 if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0)
1122 hypctx->tf.tf_spsr &= ~PSR_SSBS;
1123 else
1124 hypctx->tf.tf_spsr |= PSR_SSBS;
1125 }
1126
1127 daif = intr_disable();
1128
1129 /* Check if the vcpu is suspended */
1130 if (vcpu_suspended(evinfo)) {
1131 intr_restore(daif);
1132 vm_exit_suspended(vcpu, pc);
1133 break;
1134 }
1135
1136 if (vcpu_debugged(vcpu)) {
1137 intr_restore(daif);
1138 vm_exit_debug(vcpu, pc);
1139 break;
1140 }
1141
1142 /* Activate the stage2 pmap so the vmid is valid */
1143 pmap_activate_vm(pmap);
1144 hyp->vttbr_el2 = pmap_to_ttbr0(pmap);
1145
1146 /*
1147 * TODO: What happens if a timer interrupt is asserted exactly
1148 * here, but for the previous VM?
1149 */
1150 arm64_set_active_vcpu(hypctx);
1151 vgic_flush_hwstate(hypctx);
1152
1153 /* Call into EL2 to switch to the guest */
1154 excp_type = vmm_enter_guest(hyp, hypctx);
1155
1156 vgic_sync_hwstate(hypctx);
1157 vtimer_sync_hwstate(hypctx);
1158
1159 /*
1160 * Deactivate the stage2 pmap.
1161 */
1162 PCPU_SET(curvmpmap, NULL);
1163 intr_restore(daif);
1164
1165 vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
1166 if (excp_type == EXCP_TYPE_MAINT_IRQ)
1167 continue;
1168
1169 vme->pc = hypctx->tf.tf_elr;
1170 vme->inst_length = INSN_SIZE;
1171 vme->u.hyp.exception_nr = excp_type;
1172 vme->u.hyp.esr_el2 = hypctx->tf.tf_esr;
1173 vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
1174 vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
1175
1176 handled = arm64_handle_world_switch(hypctx, excp_type, vme,
1177 pmap);
1178 if (handled == UNHANDLED)
1179 /* Exit loop to emulate instruction. */
1180 break;
1181 else
1182 /* Resume guest execution from the next instruction. */
1183 hypctx->tf.tf_elr += vme->inst_length;
1184 }
1185
1186 return (0);
1187 }
1188
1189 static void
arm_pcpu_vmcleanup(void * arg)1190 arm_pcpu_vmcleanup(void *arg)
1191 {
1192 struct hyp *hyp;
1193 int i, maxcpus;
1194
1195 hyp = arg;
1196 maxcpus = vm_get_maxcpus(hyp->vm);
1197 for (i = 0; i < maxcpus; i++) {
1198 if (arm64_get_active_vcpu() == hyp->ctx[i]) {
1199 arm64_set_active_vcpu(NULL);
1200 break;
1201 }
1202 }
1203 }
1204
1205 void
vmmops_vcpu_cleanup(void * vcpui)1206 vmmops_vcpu_cleanup(void *vcpui)
1207 {
1208 struct hypctx *hypctx = vcpui;
1209
1210 vtimer_cpucleanup(hypctx);
1211 vgic_cpucleanup(hypctx);
1212
1213 if (!in_vhe())
1214 vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true);
1215
1216 free(hypctx, M_HYP);
1217 }
1218
1219 void
vmmops_cleanup(void * vmi)1220 vmmops_cleanup(void *vmi)
1221 {
1222 struct hyp *hyp = vmi;
1223
1224 vtimer_vmcleanup(hyp);
1225 vgic_vmcleanup(hyp);
1226
1227 smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
1228
1229 if (!in_vhe())
1230 vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true);
1231
1232 free(hyp, M_HYP);
1233 }
1234
1235 /*
1236 * Return register value. Registers have different sizes and an explicit cast
1237 * must be made to ensure proper conversion.
1238 */
1239 static uint64_t *
hypctx_regptr(struct hypctx * hypctx,int reg)1240 hypctx_regptr(struct hypctx *hypctx, int reg)
1241 {
1242 switch (reg) {
1243 case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29:
1244 return (&hypctx->tf.tf_x[reg]);
1245 case VM_REG_GUEST_LR:
1246 return (&hypctx->tf.tf_lr);
1247 case VM_REG_GUEST_SP:
1248 return (&hypctx->tf.tf_sp);
1249 case VM_REG_GUEST_CPSR:
1250 return (&hypctx->tf.tf_spsr);
1251 case VM_REG_GUEST_PC:
1252 return (&hypctx->tf.tf_elr);
1253 case VM_REG_GUEST_SCTLR_EL1:
1254 return (&hypctx->sctlr_el1);
1255 case VM_REG_GUEST_TTBR0_EL1:
1256 return (&hypctx->ttbr0_el1);
1257 case VM_REG_GUEST_TTBR1_EL1:
1258 return (&hypctx->ttbr1_el1);
1259 case VM_REG_GUEST_TCR_EL1:
1260 return (&hypctx->tcr_el1);
1261 case VM_REG_GUEST_TCR2_EL1:
1262 return (&hypctx->tcr2_el1);
1263 case VM_REG_GUEST_MPIDR_EL1:
1264 return (&hypctx->vmpidr_el2);
1265 default:
1266 break;
1267 }
1268 return (NULL);
1269 }
1270
1271 int
vmmops_getreg(void * vcpui,int reg,uint64_t * retval)1272 vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
1273 {
1274 uint64_t *regp;
1275 int running, hostcpu;
1276 struct hypctx *hypctx = vcpui;
1277
1278 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1279 if (running && hostcpu != curcpu)
1280 panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm),
1281 vcpu_vcpuid(hypctx->vcpu));
1282
1283 regp = hypctx_regptr(hypctx, reg);
1284 if (regp == NULL)
1285 return (EINVAL);
1286
1287 *retval = *regp;
1288 return (0);
1289 }
1290
1291 int
vmmops_setreg(void * vcpui,int reg,uint64_t val)1292 vmmops_setreg(void *vcpui, int reg, uint64_t val)
1293 {
1294 uint64_t *regp;
1295 struct hypctx *hypctx = vcpui;
1296 int running, hostcpu;
1297
1298 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1299 if (running && hostcpu != curcpu)
1300 panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm),
1301 vcpu_vcpuid(hypctx->vcpu));
1302
1303 regp = hypctx_regptr(hypctx, reg);
1304 if (regp == NULL)
1305 return (EINVAL);
1306
1307 *regp = val;
1308 return (0);
1309 }
1310
1311 int
vmmops_exception(void * vcpui,uint64_t esr,uint64_t far)1312 vmmops_exception(void *vcpui, uint64_t esr, uint64_t far)
1313 {
1314 struct hypctx *hypctx = vcpui;
1315 int running, hostcpu;
1316
1317 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1318 if (running && hostcpu != curcpu)
1319 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
1320 vcpu_vcpuid(hypctx->vcpu));
1321
1322 hypctx->far_el1 = far;
1323 hypctx->esr_el1 = esr;
1324 hypctx->has_exception = true;
1325
1326 return (0);
1327 }
1328
1329 int
vmmops_getcap(void * vcpui,int num,int * retval)1330 vmmops_getcap(void *vcpui, int num, int *retval)
1331 {
1332 struct hypctx *hypctx = vcpui;
1333 int ret;
1334
1335 ret = ENOENT;
1336
1337 switch (num) {
1338 case VM_CAP_UNRESTRICTED_GUEST:
1339 *retval = 1;
1340 ret = 0;
1341 break;
1342 case VM_CAP_BRK_EXIT:
1343 case VM_CAP_SS_EXIT:
1344 case VM_CAP_MASK_HWINTR:
1345 *retval = (hypctx->setcaps & (1ul << num)) != 0;
1346 break;
1347 default:
1348 break;
1349 }
1350
1351 return (ret);
1352 }
1353
1354 int
vmmops_setcap(void * vcpui,int num,int val)1355 vmmops_setcap(void *vcpui, int num, int val)
1356 {
1357 struct hypctx *hypctx = vcpui;
1358 int ret;
1359
1360 ret = 0;
1361
1362 switch (num) {
1363 case VM_CAP_BRK_EXIT:
1364 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1365 break;
1366 if (val != 0)
1367 hypctx->mdcr_el2 |= MDCR_EL2_TDE;
1368 else
1369 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
1370 break;
1371 case VM_CAP_SS_EXIT:
1372 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1373 break;
1374
1375 if (val != 0) {
1376 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS);
1377 hypctx->debug_mdscr |= hypctx->mdscr_el1 &
1378 (MDSCR_SS | MDSCR_KDE);
1379
1380 hypctx->tf.tf_spsr |= PSR_SS;
1381 hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE;
1382 hypctx->mdcr_el2 |= MDCR_EL2_TDE;
1383 } else {
1384 hypctx->tf.tf_spsr &= ~PSR_SS;
1385 hypctx->tf.tf_spsr |= hypctx->debug_spsr;
1386 hypctx->debug_spsr &= ~PSR_SS;
1387 hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE);
1388 hypctx->mdscr_el1 |= hypctx->debug_mdscr;
1389 hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE);
1390 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
1391 }
1392 break;
1393 case VM_CAP_MASK_HWINTR:
1394 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1395 break;
1396
1397 if (val != 0) {
1398 hypctx->debug_spsr |= (hypctx->tf.tf_spsr &
1399 (PSR_I | PSR_F));
1400 hypctx->tf.tf_spsr |= PSR_I | PSR_F;
1401 } else {
1402 hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F);
1403 hypctx->tf.tf_spsr |= (hypctx->debug_spsr &
1404 (PSR_I | PSR_F));
1405 hypctx->debug_spsr &= ~(PSR_I | PSR_F);
1406 }
1407 break;
1408 default:
1409 ret = ENOENT;
1410 break;
1411 }
1412
1413 if (ret == 0) {
1414 if (val == 0)
1415 hypctx->setcaps &= ~(1ul << num);
1416 else
1417 hypctx->setcaps |= (1ul << num);
1418 }
1419
1420 return (ret);
1421 }
1422