1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/smp.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/mman.h>
36 #include <sys/pcpu.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/vmem.h>
42
43 #include <vm/vm.h>
44 #include <vm/pmap.h>
45 #include <vm/vm_extern.h>
46 #include <vm/vm_map.h>
47 #include <vm/vm_page.h>
48 #include <vm/vm_param.h>
49
50 #include <machine/armreg.h>
51 #include <machine/vm.h>
52 #include <machine/cpufunc.h>
53 #include <machine/cpu.h>
54 #include <machine/machdep.h>
55 #include <machine/vmm.h>
56 #include <machine/vmm_dev.h>
57 #include <machine/atomic.h>
58 #include <machine/hypervisor.h>
59 #include <machine/pmap.h>
60
61 #include "mmu.h"
62 #include "arm64.h"
63 #include "hyp.h"
64 #include "reset.h"
65 #include "io/vgic.h"
66 #include "io/vgic_v3.h"
67 #include "io/vtimer.h"
68 #include "vmm_handlers.h"
69 #include "vmm_stat.h"
70
71 #define HANDLED 1
72 #define UNHANDLED 0
73
74 /* Number of bits in an EL2 virtual address */
75 #define EL2_VIRT_BITS 48
76 CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS);
77
78 /* TODO: Move the host hypctx off the stack */
79 #define VMM_STACK_PAGES 4
80 #define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE)
81
82 static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits;
83
84 /* Register values passed to arm_setup_vectors to set in the hypervisor */
85 struct vmm_init_regs {
86 uint64_t tcr_el2;
87 uint64_t vtcr_el2;
88 };
89
90 MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
91
92 extern char hyp_init_vectors[];
93 extern char hyp_vectors[];
94 extern char hyp_stub_vectors[];
95
96 static vm_paddr_t hyp_code_base;
97 static size_t hyp_code_len;
98
99 static char *stack[MAXCPU];
100 static vm_offset_t stack_hyp_va[MAXCPU];
101
102 static vmem_t *el2_mem_alloc;
103
104 static void arm_setup_vectors(void *arg);
105
106 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
107
108 static inline void
arm64_set_active_vcpu(struct hypctx * hypctx)109 arm64_set_active_vcpu(struct hypctx *hypctx)
110 {
111 DPCPU_SET(vcpu, hypctx);
112 }
113
114 struct hypctx *
arm64_get_active_vcpu(void)115 arm64_get_active_vcpu(void)
116 {
117 return (DPCPU_GET(vcpu));
118 }
119
120 static void
arm_setup_vectors(void * arg)121 arm_setup_vectors(void *arg)
122 {
123 struct vmm_init_regs *el2_regs;
124 uintptr_t stack_top;
125 uint32_t sctlr_el2;
126 register_t daif;
127
128 el2_regs = arg;
129 arm64_set_active_vcpu(NULL);
130
131 /*
132 * Configure the system control register for EL2:
133 *
134 * SCTLR_EL2_M: MMU on
135 * SCTLR_EL2_C: Data cacheability not affected
136 * SCTLR_EL2_I: Instruction cacheability not affected
137 * SCTLR_EL2_A: Instruction alignment check
138 * SCTLR_EL2_SA: Stack pointer alignment check
139 * SCTLR_EL2_WXN: Treat writable memory as execute never
140 * ~SCTLR_EL2_EE: Data accesses are little-endian
141 */
142 sctlr_el2 = SCTLR_EL2_RES1;
143 sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I;
144 sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA;
145 sctlr_el2 |= SCTLR_EL2_WXN;
146 sctlr_el2 &= ~SCTLR_EL2_EE;
147
148 daif = intr_disable();
149
150 if (in_vhe()) {
151 WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2);
152 } else {
153 /*
154 * Install the temporary vectors which will be responsible for
155 * initializing the VMM when we next trap into EL2.
156 *
157 * x0: the exception vector table responsible for hypervisor
158 * initialization on the next call.
159 */
160 vmm_call_hyp(vtophys(&vmm_hyp_code));
161
162 /* Create and map the hypervisor stack */
163 stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
164
165 /* Special call to initialize EL2 */
166 vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
167 sctlr_el2, el2_regs->vtcr_el2);
168 }
169
170 intr_restore(daif);
171 }
172
173 static void
arm_teardown_vectors(void * arg)174 arm_teardown_vectors(void *arg)
175 {
176 register_t daif;
177
178 /*
179 * vmm_cleanup() will disable the MMU. For the next few instructions,
180 * before the hardware disables the MMU, one of the following is
181 * possible:
182 *
183 * a. The instruction addresses are fetched with the MMU disabled,
184 * and they must represent the actual physical addresses. This will work
185 * because we call the vmm_cleanup() function by its physical address.
186 *
187 * b. The instruction addresses are fetched using the old translation
188 * tables. This will work because we have an identity mapping in place
189 * in the translation tables and vmm_cleanup() is called by its physical
190 * address.
191 */
192 daif = intr_disable();
193 /* TODO: Invalidate the cache */
194 vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors));
195 intr_restore(daif);
196
197 arm64_set_active_vcpu(NULL);
198 }
199
200 static uint64_t
vmm_vtcr_el2_sl(u_int levels)201 vmm_vtcr_el2_sl(u_int levels)
202 {
203 #if PAGE_SIZE == PAGE_SIZE_4K
204 switch (levels) {
205 case 2:
206 return (VTCR_EL2_SL0_4K_LVL2);
207 case 3:
208 return (VTCR_EL2_SL0_4K_LVL1);
209 case 4:
210 return (VTCR_EL2_SL0_4K_LVL0);
211 default:
212 panic("%s: Invalid number of page table levels %u", __func__,
213 levels);
214 }
215 #elif PAGE_SIZE == PAGE_SIZE_16K
216 switch (levels) {
217 case 2:
218 return (VTCR_EL2_SL0_16K_LVL2);
219 case 3:
220 return (VTCR_EL2_SL0_16K_LVL1);
221 case 4:
222 return (VTCR_EL2_SL0_16K_LVL0);
223 default:
224 panic("%s: Invalid number of page table levels %u", __func__,
225 levels);
226 }
227 #else
228 #error Unsupported page size
229 #endif
230 }
231
232 int
vmmops_modinit(int ipinum)233 vmmops_modinit(int ipinum)
234 {
235 struct vmm_init_regs el2_regs;
236 vm_offset_t next_hyp_va;
237 vm_paddr_t vmm_base;
238 uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
239 uint64_t cnthctl_el2;
240 int cpu, i;
241 bool rv __diagused;
242
243 if (!has_hyp()) {
244 printf(
245 "vmm: Processor doesn't have support for virtualization\n");
246 return (ENXIO);
247 }
248
249 if (!vgic_present()) {
250 printf("vmm: No vgic found\n");
251 return (ENODEV);
252 }
253
254 if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) {
255 printf("vmm: Unable to read ID_AA64MMFR0_EL1\n");
256 return (ENXIO);
257 }
258 pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
259 /*
260 * Use 3 levels to give us up to 39 bits with 4k pages, or
261 * 47 bits with 16k pages.
262 */
263 /* TODO: Check the number of levels for 64k pages */
264 vmm_pmap_levels = 3;
265 switch (pa_range_field) {
266 case ID_AA64MMFR0_PARange_4G:
267 printf("vmm: Not enough physical address bits\n");
268 return (ENXIO);
269 case ID_AA64MMFR0_PARange_64G:
270 vmm_virt_bits = 36;
271 #if PAGE_SIZE == PAGE_SIZE_16K
272 vmm_pmap_levels = 2;
273 #endif
274 break;
275 default:
276 vmm_virt_bits = 39;
277 break;
278 }
279 pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
280
281 if (!in_vhe()) {
282 /* Initialise the EL2 MMU */
283 if (!vmmpmap_init()) {
284 printf("vmm: Failed to init the EL2 MMU\n");
285 return (ENOMEM);
286 }
287 }
288
289 /* Set up the stage 2 pmap callbacks */
290 MPASS(pmap_clean_stage2_tlbi == NULL);
291 pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi;
292 pmap_stage2_invalidate_range = vmm_s2_tlbi_range;
293 pmap_stage2_invalidate_all = vmm_s2_tlbi_all;
294
295 if (!in_vhe()) {
296 /*
297 * Create an allocator for the virtual address space used by
298 * EL2. EL2 code is identity-mapped; the allocator is used to
299 * find space for VM structures.
300 */
301 el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0,
302 M_WAITOK);
303
304 /* Create the mappings for the hypervisor translation table. */
305 hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code);
306
307 /* We need an physical identity mapping for when we activate the MMU */
308 hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
309 rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base,
310 VM_PROT_READ | VM_PROT_EXECUTE);
311 MPASS(rv);
312
313 next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE);
314
315 /* Create a per-CPU hypervisor stack */
316 CPU_FOREACH(cpu) {
317 stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO);
318 stack_hyp_va[cpu] = next_hyp_va;
319
320 for (i = 0; i < VMM_STACK_PAGES; i++) {
321 rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i),
322 PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)),
323 VM_PROT_READ | VM_PROT_WRITE);
324 MPASS(rv);
325 }
326 next_hyp_va += L2_SIZE;
327 }
328
329 el2_regs.tcr_el2 = TCR_EL2_RES1;
330 el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT,
331 TCR_EL2_PS_52BITS);
332 el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
333 el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA;
334 #if PAGE_SIZE == PAGE_SIZE_4K
335 el2_regs.tcr_el2 |= TCR_EL2_TG0_4K;
336 #elif PAGE_SIZE == PAGE_SIZE_16K
337 el2_regs.tcr_el2 |= TCR_EL2_TG0_16K;
338 #else
339 #error Unsupported page size
340 #endif
341 #ifdef SMP
342 el2_regs.tcr_el2 |= TCR_EL2_SH0_IS;
343 #endif
344 }
345
346 switch (pa_range_bits << TCR_EL2_PS_SHIFT) {
347 case TCR_EL2_PS_32BITS:
348 vmm_max_ipa_bits = 32;
349 break;
350 case TCR_EL2_PS_36BITS:
351 vmm_max_ipa_bits = 36;
352 break;
353 case TCR_EL2_PS_40BITS:
354 vmm_max_ipa_bits = 40;
355 break;
356 case TCR_EL2_PS_42BITS:
357 vmm_max_ipa_bits = 42;
358 break;
359 case TCR_EL2_PS_44BITS:
360 vmm_max_ipa_bits = 44;
361 break;
362 case TCR_EL2_PS_48BITS:
363 vmm_max_ipa_bits = 48;
364 break;
365 case TCR_EL2_PS_52BITS:
366 default:
367 vmm_max_ipa_bits = 52;
368 break;
369 }
370
371 /*
372 * Configure the Stage 2 translation control register:
373 *
374 * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
375 * normal memory
376 * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
377 * normal memory
378 * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel
379 * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
380 * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
381 * shareable
382 */
383 el2_regs.vtcr_el2 = VTCR_EL2_RES1;
384 el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
385 el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
386 el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels);
387 #if PAGE_SIZE == PAGE_SIZE_4K
388 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K;
389 #elif PAGE_SIZE == PAGE_SIZE_16K
390 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K;
391 #else
392 #error Unsupported page size
393 #endif
394 #ifdef SMP
395 el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS;
396 #endif
397 /*
398 * If FEAT_LPA2 is enabled in the host then we need to enable it here
399 * so the page tables created by pmap.c are correct. The meaning of
400 * the shareability field changes to become address bits when this
401 * is set.
402 */
403 if ((READ_SPECIALREG(tcr_el1) & TCR_DS) != 0) {
404 el2_regs.vtcr_el2 |= VTCR_EL2_DS;
405 el2_regs.vtcr_el2 |=
406 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_52BIT);
407 } else {
408 el2_regs.vtcr_el2 |=
409 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
410 }
411
412 smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
413
414 if (!in_vhe()) {
415 /* Add memory to the vmem allocator (checking there is space) */
416 if (vmm_base > (L2_SIZE + PAGE_SIZE)) {
417 /*
418 * Ensure there is an L2 block before the vmm code to check
419 * for buffer overflows on earlier data. Include the PAGE_SIZE
420 * of the minimum we can allocate.
421 */
422 vmm_base -= L2_SIZE + PAGE_SIZE;
423 vmm_base = rounddown2(vmm_base, L2_SIZE);
424
425 /*
426 * Check there is memory before the vmm code to add.
427 *
428 * Reserve the L2 block at address 0 so NULL dereference will
429 * raise an exception.
430 */
431 if (vmm_base > L2_SIZE)
432 vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE,
433 M_WAITOK);
434 }
435
436 /*
437 * Add the memory after the stacks. There is most of an L2 block
438 * between the last stack and the first allocation so this should
439 * be safe without adding more padding.
440 */
441 if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
442 vmem_add(el2_mem_alloc, next_hyp_va,
443 HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
444 }
445 cnthctl_el2 = vmm_read_reg(HYP_REG_CNTHCTL);
446
447 vgic_init();
448 vtimer_init(cnthctl_el2);
449
450 return (0);
451 }
452
453 int
vmmops_modcleanup(void)454 vmmops_modcleanup(void)
455 {
456 int cpu;
457
458 if (!in_vhe()) {
459 smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
460
461 CPU_FOREACH(cpu) {
462 vmmpmap_remove(stack_hyp_va[cpu],
463 VMM_STACK_PAGES * PAGE_SIZE, false);
464 }
465
466 vmmpmap_remove(hyp_code_base, hyp_code_len, false);
467 }
468
469 vtimer_cleanup();
470
471 if (!in_vhe()) {
472 vmmpmap_fini();
473
474 CPU_FOREACH(cpu)
475 free(stack[cpu], M_HYP);
476 }
477
478 pmap_clean_stage2_tlbi = NULL;
479 pmap_stage2_invalidate_range = NULL;
480 pmap_stage2_invalidate_all = NULL;
481
482 return (0);
483 }
484
485 static vm_size_t
el2_hyp_size(struct vm * vm)486 el2_hyp_size(struct vm *vm)
487 {
488 return (round_page(sizeof(struct hyp) +
489 sizeof(struct hypctx *) * vm_get_maxcpus(vm)));
490 }
491
492 static vm_size_t
el2_hypctx_size(void)493 el2_hypctx_size(void)
494 {
495 return (round_page(sizeof(struct hypctx)));
496 }
497
498 static vm_offset_t
el2_map_enter(vm_offset_t data,vm_size_t size,vm_prot_t prot)499 el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot)
500 {
501 vmem_addr_t addr;
502 int err __diagused;
503 bool rv __diagused;
504
505 err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr);
506 MPASS(err == 0);
507 rv = vmmpmap_enter(addr, size, vtophys(data), prot);
508 MPASS(rv);
509
510 return (addr);
511 }
512
513 void *
vmmops_init(struct vm * vm,pmap_t pmap)514 vmmops_init(struct vm *vm, pmap_t pmap)
515 {
516 struct hyp *hyp;
517 vm_size_t size;
518
519 size = el2_hyp_size(vm);
520 hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
521
522 hyp->vm = vm;
523 hyp->vgic_attached = false;
524
525 vtimer_vminit(hyp);
526 vgic_vminit(hyp);
527
528 if (!in_vhe())
529 hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size,
530 VM_PROT_READ | VM_PROT_WRITE);
531
532 return (hyp);
533 }
534
535 void *
vmmops_vcpu_init(void * vmi,struct vcpu * vcpu1,int vcpuid)536 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
537 {
538 struct hyp *hyp = vmi;
539 struct hypctx *hypctx;
540 vm_size_t size;
541
542 size = el2_hypctx_size();
543 hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
544
545 KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
546 ("%s: Invalid vcpuid %d", __func__, vcpuid));
547 hyp->ctx[vcpuid] = hypctx;
548
549 hypctx->hyp = hyp;
550 hypctx->vcpu = vcpu1;
551
552 reset_vm_el01_regs(hypctx);
553 reset_vm_el2_regs(hypctx);
554
555 vtimer_cpuinit(hypctx);
556 vgic_cpuinit(hypctx);
557
558 if (!in_vhe())
559 hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size,
560 VM_PROT_READ | VM_PROT_WRITE);
561
562 return (hypctx);
563 }
564
565 static int
arm_vmm_pinit(pmap_t pmap)566 arm_vmm_pinit(pmap_t pmap)
567 {
568
569 pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels);
570 return (1);
571 }
572
573 struct vmspace *
vmmops_vmspace_alloc(vm_offset_t min,vm_offset_t max)574 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
575 {
576 return (vmspace_alloc(min, max, arm_vmm_pinit));
577 }
578
579 void
vmmops_vmspace_free(struct vmspace * vmspace)580 vmmops_vmspace_free(struct vmspace *vmspace)
581 {
582
583 pmap_remove_pages(vmspace_pmap(vmspace));
584 vmspace_free(vmspace);
585 }
586
587 static inline void
arm64_print_hyp_regs(struct vm_exit * vme)588 arm64_print_hyp_regs(struct vm_exit *vme)
589 {
590 printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2);
591 printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
592 printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
593 printf("elr_el2: 0x%016lx\n", vme->pc);
594 }
595
596 static void
arm64_gen_inst_emul_data(struct hypctx * hypctx,uint32_t esr_iss,struct vm_exit * vme_ret)597 arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss,
598 struct vm_exit *vme_ret)
599 {
600 struct vm_guest_paging *paging;
601 struct vie *vie;
602 uint32_t esr_sas, reg_num;
603
604 /*
605 * Get the page address from HPFAR_EL2.
606 */
607 vme_ret->u.inst_emul.gpa =
608 HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
609 /* Bits [11:0] are the same as bits [11:0] from the virtual address. */
610 vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 &
611 FAR_EL2_HPFAR_PAGE_MASK;
612
613 esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
614 reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
615
616 vie = &vme_ret->u.inst_emul.vie;
617 vie->access_size = 1 << esr_sas;
618 vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
619 vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
620 vie->reg = reg_num;
621
622 paging = &vme_ret->u.inst_emul.paging;
623 paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
624 paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
625 paging->tcr_el1 = hypctx->tcr_el1;
626 paging->tcr2_el1 = hypctx->tcr2_el1;
627 paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
628 if ((hypctx->sctlr_el1 & SCTLR_M) != 0)
629 paging->flags |= VM_GP_MMU_ENABLED;
630 }
631
632 static void
arm64_gen_reg_emul_data(uint32_t esr_iss,struct vm_exit * vme_ret)633 arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
634 {
635 uint32_t reg_num;
636 struct vre *vre;
637
638 /* u.hyp member will be replaced by u.reg_emul */
639 vre = &vme_ret->u.reg_emul.vre;
640
641 vre->inst_syndrome = esr_iss;
642 /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
643 vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
644 reg_num = ISS_MSR_Rt(esr_iss);
645 vre->reg = reg_num;
646 }
647
648 void
raise_data_insn_abort(struct hypctx * hypctx,uint64_t far,bool dabort,int fsc)649 raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc)
650 {
651 uint64_t esr;
652
653 if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t)
654 esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT;
655 else
656 esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT;
657 /* Set the bit that changes from insn -> data abort */
658 if (dabort)
659 esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT;
660 /* Set the IL bit if set by hardware */
661 esr |= hypctx->tf.tf_esr & ESR_ELx_IL;
662
663 vmmops_exception(hypctx, esr | fsc, far);
664 }
665
666 static int
handle_el1_sync_excp(struct hypctx * hypctx,struct vm_exit * vme_ret,pmap_t pmap)667 handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret,
668 pmap_t pmap)
669 {
670 uint64_t gpa;
671 uint32_t esr_ec, esr_iss;
672
673 esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr);
674 esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK;
675
676 switch (esr_ec) {
677 case EXCP_UNKNOWN:
678 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1);
679 arm64_print_hyp_regs(vme_ret);
680 vme_ret->exitcode = VM_EXITCODE_HYP;
681 break;
682 case EXCP_TRAP_WFI_WFE:
683 if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */
684 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1);
685 vme_ret->exitcode = VM_EXITCODE_WFI;
686 } else {
687 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1);
688 vme_ret->exitcode = VM_EXITCODE_HYP;
689 }
690 break;
691 case EXCP_HVC:
692 vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1);
693 vme_ret->exitcode = VM_EXITCODE_HVC;
694 break;
695 case EXCP_MSR:
696 vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1);
697 arm64_gen_reg_emul_data(esr_iss, vme_ret);
698 vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
699 break;
700 case EXCP_BRK:
701 vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1);
702 vme_ret->exitcode = VM_EXITCODE_BRK;
703 break;
704 case EXCP_SOFTSTP_EL0:
705 vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1);
706 vme_ret->exitcode = VM_EXITCODE_SS;
707 break;
708 case EXCP_INSN_ABORT_L:
709 case EXCP_DATA_ABORT_L:
710 vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ?
711 VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1);
712 switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
713 case ISS_DATA_DFSC_TF_L0:
714 case ISS_DATA_DFSC_TF_L1:
715 case ISS_DATA_DFSC_TF_L2:
716 case ISS_DATA_DFSC_TF_L3:
717 case ISS_DATA_DFSC_AFF_L1:
718 case ISS_DATA_DFSC_AFF_L2:
719 case ISS_DATA_DFSC_AFF_L3:
720 case ISS_DATA_DFSC_PF_L1:
721 case ISS_DATA_DFSC_PF_L2:
722 case ISS_DATA_DFSC_PF_L3:
723 gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
724 /* Check the IPA is valid */
725 if (gpa >= (1ul << vmm_max_ipa_bits)) {
726 raise_data_insn_abort(hypctx,
727 hypctx->exit_info.far_el2,
728 esr_ec == EXCP_DATA_ABORT_L,
729 ISS_DATA_DFSC_ASF_L0);
730 vme_ret->inst_length = 0;
731 return (HANDLED);
732 }
733
734 if (vm_mem_allocated(hypctx->vcpu, gpa)) {
735 vme_ret->exitcode = VM_EXITCODE_PAGING;
736 vme_ret->inst_length = 0;
737 vme_ret->u.paging.esr = hypctx->tf.tf_esr;
738 vme_ret->u.paging.gpa = gpa;
739 } else if (esr_ec == EXCP_INSN_ABORT_L) {
740 /*
741 * Raise an external abort. Device memory is
742 * not executable
743 */
744 raise_data_insn_abort(hypctx,
745 hypctx->exit_info.far_el2, false,
746 ISS_DATA_DFSC_EXT);
747 vme_ret->inst_length = 0;
748 return (HANDLED);
749 } else {
750 arm64_gen_inst_emul_data(hypctx, esr_iss,
751 vme_ret);
752 vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
753 }
754 break;
755 default:
756 arm64_print_hyp_regs(vme_ret);
757 vme_ret->exitcode = VM_EXITCODE_HYP;
758 break;
759 }
760
761 break;
762
763 default:
764 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1);
765 arm64_print_hyp_regs(vme_ret);
766 vme_ret->exitcode = VM_EXITCODE_HYP;
767 break;
768 }
769
770 /* We don't don't do any instruction emulation here */
771 return (UNHANDLED);
772 }
773
774 static int
arm64_handle_world_switch(struct hypctx * hypctx,int excp_type,struct vm_exit * vme,pmap_t pmap)775 arm64_handle_world_switch(struct hypctx *hypctx, int excp_type,
776 struct vm_exit *vme, pmap_t pmap)
777 {
778 int handled;
779
780 switch (excp_type) {
781 case EXCP_TYPE_EL1_SYNC:
782 /* The exit code will be set by handle_el1_sync_excp(). */
783 handled = handle_el1_sync_excp(hypctx, vme, pmap);
784 break;
785
786 case EXCP_TYPE_EL1_IRQ:
787 case EXCP_TYPE_EL1_FIQ:
788 /* The host kernel will handle IRQs and FIQs. */
789 vmm_stat_incr(hypctx->vcpu,
790 excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1);
791 vme->exitcode = VM_EXITCODE_BOGUS;
792 handled = UNHANDLED;
793 break;
794
795 case EXCP_TYPE_EL1_ERROR:
796 case EXCP_TYPE_EL2_SYNC:
797 case EXCP_TYPE_EL2_IRQ:
798 case EXCP_TYPE_EL2_FIQ:
799 case EXCP_TYPE_EL2_ERROR:
800 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1);
801 vme->exitcode = VM_EXITCODE_BOGUS;
802 handled = UNHANDLED;
803 break;
804
805 default:
806 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
807 vme->exitcode = VM_EXITCODE_BOGUS;
808 handled = UNHANDLED;
809 break;
810 }
811
812 return (handled);
813 }
814
815 static void
ptp_release(void ** cookie)816 ptp_release(void **cookie)
817 {
818 if (*cookie != NULL) {
819 vm_gpa_release(*cookie);
820 *cookie = NULL;
821 }
822 }
823
824 static void *
ptp_hold(struct vcpu * vcpu,vm_paddr_t ptpphys,size_t len,void ** cookie)825 ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
826 {
827 void *ptr;
828
829 ptp_release(cookie);
830 ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie);
831 return (ptr);
832 }
833
834 /* log2 of the number of bytes in a page table entry */
835 #define PTE_SHIFT 3
836 int
vmmops_gla2gpa(void * vcpui,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)837 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
838 int prot, uint64_t *gpa, int *is_fault)
839 {
840 struct hypctx *hypctx;
841 void *cookie;
842 uint64_t mask, *ptep, pte, pte_addr;
843 int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz;
844 bool is_el0;
845
846 /* Check if the MMU is off */
847 if ((paging->flags & VM_GP_MMU_ENABLED) == 0) {
848 *is_fault = 0;
849 *gpa = gla;
850 return (0);
851 }
852
853 is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t;
854
855 if (ADDR_IS_KERNEL(gla)) {
856 /* If address translation is disabled raise an exception */
857 if ((paging->tcr_el1 & TCR_EPD1) != 0) {
858 *is_fault = 1;
859 return (0);
860 }
861 if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) {
862 *is_fault = 1;
863 return (0);
864 }
865 pte_addr = paging->ttbr1_addr;
866 tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT;
867 /* Clear the top byte if TBI is on */
868 if ((paging->tcr_el1 & TCR_TBI1) != 0)
869 gla |= (0xfful << 56);
870 switch (paging->tcr_el1 & TCR_TG1_MASK) {
871 case TCR_TG1_4K:
872 granule_shift = PAGE_SHIFT_4K;
873 break;
874 case TCR_TG1_16K:
875 granule_shift = PAGE_SHIFT_16K;
876 break;
877 case TCR_TG1_64K:
878 granule_shift = PAGE_SHIFT_64K;
879 break;
880 default:
881 *is_fault = 1;
882 return (EINVAL);
883 }
884 } else {
885 /* If address translation is disabled raise an exception */
886 if ((paging->tcr_el1 & TCR_EPD0) != 0) {
887 *is_fault = 1;
888 return (0);
889 }
890 if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) {
891 *is_fault = 1;
892 return (0);
893 }
894 pte_addr = paging->ttbr0_addr;
895 tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT;
896 /* Clear the top byte if TBI is on */
897 if ((paging->tcr_el1 & TCR_TBI0) != 0)
898 gla &= ~(0xfful << 56);
899 switch (paging->tcr_el1 & TCR_TG0_MASK) {
900 case TCR_TG0_4K:
901 granule_shift = PAGE_SHIFT_4K;
902 break;
903 case TCR_TG0_16K:
904 granule_shift = PAGE_SHIFT_16K;
905 break;
906 case TCR_TG0_64K:
907 granule_shift = PAGE_SHIFT_64K;
908 break;
909 default:
910 *is_fault = 1;
911 return (EINVAL);
912 }
913 }
914
915 /*
916 * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2
917 * for larger values.
918 */
919 switch (granule_shift) {
920 case PAGE_SHIFT_4K:
921 case PAGE_SHIFT_16K:
922 /*
923 * See "Table D8-11 4KB granule, determining stage 1 initial
924 * lookup level" and "Table D8-21 16KB granule, determining
925 * stage 1 initial lookup level" from the "Arm Architecture
926 * Reference Manual for A-Profile architecture" revision I.a
927 * for the minimum and maximum values.
928 *
929 * TODO: Support less than 16 when FEAT_LPA2 is implemented
930 * and TCR_EL1.DS == 1
931 * TODO: Support more than 39 when FEAT_TTST is implemented
932 */
933 if (tsz < 16 || tsz > 39) {
934 *is_fault = 1;
935 return (EINVAL);
936 }
937 break;
938 case PAGE_SHIFT_64K:
939 /* TODO: Support 64k granule. It will probably work, but is untested */
940 default:
941 *is_fault = 1;
942 return (EINVAL);
943 }
944
945 /*
946 * Calculate the input address bits. These are 64 bit in an address
947 * with the top tsz bits being all 0 or all 1.
948 */
949 ia_bits = 64 - tsz;
950
951 /*
952 * Calculate the number of address bits used in the page table
953 * calculation. This is ia_bits minus the bottom granule_shift
954 * bits that are passed to the output address.
955 */
956 address_bits = ia_bits - granule_shift;
957
958 /*
959 * Calculate the number of levels. Each level uses
960 * granule_shift - PTE_SHIFT bits of the input address.
961 * This is because the table is 1 << granule_shift and each
962 * entry is 1 << PTE_SHIFT bytes.
963 */
964 levels = howmany(address_bits, granule_shift - PTE_SHIFT);
965
966 /* Mask of the upper unused bits in the virtual address */
967 gla &= (1ul << ia_bits) - 1;
968 hypctx = (struct hypctx *)vcpui;
969 cookie = NULL;
970 /* TODO: Check if the level supports block descriptors */
971 for (;levels > 0; levels--) {
972 int idx;
973
974 pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) +
975 granule_shift;
976 idx = (gla >> pte_shift) &
977 ((1ul << (granule_shift - PTE_SHIFT)) - 1);
978 while (idx > PAGE_SIZE / sizeof(pte)) {
979 idx -= PAGE_SIZE / sizeof(pte);
980 pte_addr += PAGE_SIZE;
981 }
982
983 ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie);
984 if (ptep == NULL)
985 goto error;
986 pte = ptep[idx];
987
988 /* Calculate the level we are looking at */
989 switch (levels) {
990 default:
991 goto fault;
992 /* TODO: Level -1 when FEAT_LPA2 is implemented */
993 case 4: /* Level 0 */
994 if ((pte & ATTR_DESCR_MASK) != L0_TABLE)
995 goto fault;
996 /* FALLTHROUGH */
997 case 3: /* Level 1 */
998 case 2: /* Level 2 */
999 switch (pte & ATTR_DESCR_MASK) {
1000 /* Use L1 macro as all levels are the same */
1001 case L1_TABLE:
1002 /* Check if EL0 can access this address space */
1003 if (is_el0 &&
1004 (pte & TATTR_AP_TABLE_NO_EL0) != 0)
1005 goto fault;
1006 /* Check if the address space is writable */
1007 if ((prot & PROT_WRITE) != 0 &&
1008 (pte & TATTR_AP_TABLE_RO) != 0)
1009 goto fault;
1010 if ((prot & PROT_EXEC) != 0) {
1011 /* Check the table exec attribute */
1012 if ((is_el0 &&
1013 (pte & TATTR_UXN_TABLE) != 0) ||
1014 (!is_el0 &&
1015 (pte & TATTR_PXN_TABLE) != 0))
1016 goto fault;
1017 }
1018 pte_addr = pte & ~ATTR_MASK;
1019 break;
1020 case L1_BLOCK:
1021 goto done;
1022 default:
1023 goto fault;
1024 }
1025 break;
1026 case 1: /* Level 3 */
1027 if ((pte & ATTR_DESCR_MASK) == L3_PAGE)
1028 goto done;
1029 goto fault;
1030 }
1031 }
1032
1033 done:
1034 /* Check if EL0 has access to the block/page */
1035 if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0)
1036 goto fault;
1037 if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0)
1038 goto fault;
1039 if ((prot & PROT_EXEC) != 0) {
1040 if ((is_el0 && (pte & ATTR_S1_UXN) != 0) ||
1041 (!is_el0 && (pte & ATTR_S1_PXN) != 0))
1042 goto fault;
1043 }
1044 mask = (1ul << pte_shift) - 1;
1045 *gpa = (pte & ~ATTR_MASK) | (gla & mask);
1046 *is_fault = 0;
1047 ptp_release(&cookie);
1048 return (0);
1049
1050 error:
1051 ptp_release(&cookie);
1052 return (EFAULT);
1053 fault:
1054 *is_fault = 1;
1055 ptp_release(&cookie);
1056 return (0);
1057 }
1058
1059 int
vmmops_run(void * vcpui,register_t pc,pmap_t pmap,struct vm_eventinfo * evinfo)1060 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
1061 {
1062 uint64_t excp_type;
1063 int handled;
1064 register_t daif;
1065 struct hyp *hyp;
1066 struct hypctx *hypctx;
1067 struct vcpu *vcpu;
1068 struct vm_exit *vme;
1069 int mode;
1070
1071 hypctx = (struct hypctx *)vcpui;
1072 hyp = hypctx->hyp;
1073 vcpu = hypctx->vcpu;
1074 vme = vm_exitinfo(vcpu);
1075
1076 hypctx->tf.tf_elr = (uint64_t)pc;
1077
1078 for (;;) {
1079 if (hypctx->has_exception) {
1080 hypctx->has_exception = false;
1081 hypctx->elr_el1 = hypctx->tf.tf_elr;
1082
1083 mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
1084
1085 if (mode == PSR_M_EL1t) {
1086 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0;
1087 } else if (mode == PSR_M_EL1h) {
1088 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200;
1089 } else if ((mode & PSR_M_32) == PSR_M_64) {
1090 /* 64-bit EL0 */
1091 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400;
1092 } else {
1093 /* 32-bit EL0 */
1094 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600;
1095 }
1096
1097 /* Set the new spsr */
1098 hypctx->spsr_el1 = hypctx->tf.tf_spsr;
1099
1100 /* Set the new cpsr */
1101 hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS;
1102 hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h;
1103
1104 /*
1105 * Update fields that may change on exeption entry
1106 * based on how sctlr_el1 is configured.
1107 */
1108 if ((hypctx->sctlr_el1 & SCTLR_SPAN) == 0)
1109 hypctx->tf.tf_spsr |= PSR_PAN;
1110 if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0)
1111 hypctx->tf.tf_spsr &= ~PSR_SSBS;
1112 else
1113 hypctx->tf.tf_spsr |= PSR_SSBS;
1114 }
1115
1116 daif = intr_disable();
1117
1118 /* Check if the vcpu is suspended */
1119 if (vcpu_suspended(evinfo)) {
1120 intr_restore(daif);
1121 vm_exit_suspended(vcpu, pc);
1122 break;
1123 }
1124
1125 if (vcpu_debugged(vcpu)) {
1126 intr_restore(daif);
1127 vm_exit_debug(vcpu, pc);
1128 break;
1129 }
1130
1131 /* Activate the stage2 pmap so the vmid is valid */
1132 pmap_activate_vm(pmap);
1133 hyp->vttbr_el2 = pmap_to_ttbr0(pmap);
1134
1135 /*
1136 * TODO: What happens if a timer interrupt is asserted exactly
1137 * here, but for the previous VM?
1138 */
1139 arm64_set_active_vcpu(hypctx);
1140 vgic_flush_hwstate(hypctx);
1141
1142 /* Call into EL2 to switch to the guest */
1143 excp_type = vmm_enter_guest(hyp, hypctx);
1144
1145 vgic_sync_hwstate(hypctx);
1146 vtimer_sync_hwstate(hypctx);
1147
1148 /*
1149 * Deactivate the stage2 pmap.
1150 */
1151 PCPU_SET(curvmpmap, NULL);
1152 intr_restore(daif);
1153
1154 vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
1155 if (excp_type == EXCP_TYPE_MAINT_IRQ)
1156 continue;
1157
1158 vme->pc = hypctx->tf.tf_elr;
1159 vme->inst_length = INSN_SIZE;
1160 vme->u.hyp.exception_nr = excp_type;
1161 vme->u.hyp.esr_el2 = hypctx->tf.tf_esr;
1162 vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
1163 vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
1164
1165 handled = arm64_handle_world_switch(hypctx, excp_type, vme,
1166 pmap);
1167 if (handled == UNHANDLED)
1168 /* Exit loop to emulate instruction. */
1169 break;
1170 else
1171 /* Resume guest execution from the next instruction. */
1172 hypctx->tf.tf_elr += vme->inst_length;
1173 }
1174
1175 return (0);
1176 }
1177
1178 static void
arm_pcpu_vmcleanup(void * arg)1179 arm_pcpu_vmcleanup(void *arg)
1180 {
1181 struct hyp *hyp;
1182 int i, maxcpus;
1183
1184 hyp = arg;
1185 maxcpus = vm_get_maxcpus(hyp->vm);
1186 for (i = 0; i < maxcpus; i++) {
1187 if (arm64_get_active_vcpu() == hyp->ctx[i]) {
1188 arm64_set_active_vcpu(NULL);
1189 break;
1190 }
1191 }
1192 }
1193
1194 void
vmmops_vcpu_cleanup(void * vcpui)1195 vmmops_vcpu_cleanup(void *vcpui)
1196 {
1197 struct hypctx *hypctx = vcpui;
1198
1199 vtimer_cpucleanup(hypctx);
1200 vgic_cpucleanup(hypctx);
1201
1202 if (!in_vhe())
1203 vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true);
1204
1205 free(hypctx, M_HYP);
1206 }
1207
1208 void
vmmops_cleanup(void * vmi)1209 vmmops_cleanup(void *vmi)
1210 {
1211 struct hyp *hyp = vmi;
1212
1213 vtimer_vmcleanup(hyp);
1214 vgic_vmcleanup(hyp);
1215
1216 smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
1217
1218 if (!in_vhe())
1219 vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true);
1220
1221 free(hyp, M_HYP);
1222 }
1223
1224 /*
1225 * Return register value. Registers have different sizes and an explicit cast
1226 * must be made to ensure proper conversion.
1227 */
1228 static uint64_t *
hypctx_regptr(struct hypctx * hypctx,int reg)1229 hypctx_regptr(struct hypctx *hypctx, int reg)
1230 {
1231 switch (reg) {
1232 case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29:
1233 return (&hypctx->tf.tf_x[reg]);
1234 case VM_REG_GUEST_LR:
1235 return (&hypctx->tf.tf_lr);
1236 case VM_REG_GUEST_SP:
1237 return (&hypctx->tf.tf_sp);
1238 case VM_REG_GUEST_CPSR:
1239 return (&hypctx->tf.tf_spsr);
1240 case VM_REG_GUEST_PC:
1241 return (&hypctx->tf.tf_elr);
1242 case VM_REG_GUEST_SCTLR_EL1:
1243 return (&hypctx->sctlr_el1);
1244 case VM_REG_GUEST_TTBR0_EL1:
1245 return (&hypctx->ttbr0_el1);
1246 case VM_REG_GUEST_TTBR1_EL1:
1247 return (&hypctx->ttbr1_el1);
1248 case VM_REG_GUEST_TCR_EL1:
1249 return (&hypctx->tcr_el1);
1250 case VM_REG_GUEST_TCR2_EL1:
1251 return (&hypctx->tcr2_el1);
1252 default:
1253 break;
1254 }
1255 return (NULL);
1256 }
1257
1258 int
vmmops_getreg(void * vcpui,int reg,uint64_t * retval)1259 vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
1260 {
1261 uint64_t *regp;
1262 int running, hostcpu;
1263 struct hypctx *hypctx = vcpui;
1264
1265 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1266 if (running && hostcpu != curcpu)
1267 panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm),
1268 vcpu_vcpuid(hypctx->vcpu));
1269
1270 regp = hypctx_regptr(hypctx, reg);
1271 if (regp == NULL)
1272 return (EINVAL);
1273
1274 *retval = *regp;
1275 return (0);
1276 }
1277
1278 int
vmmops_setreg(void * vcpui,int reg,uint64_t val)1279 vmmops_setreg(void *vcpui, int reg, uint64_t val)
1280 {
1281 uint64_t *regp;
1282 struct hypctx *hypctx = vcpui;
1283 int running, hostcpu;
1284
1285 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1286 if (running && hostcpu != curcpu)
1287 panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm),
1288 vcpu_vcpuid(hypctx->vcpu));
1289
1290 regp = hypctx_regptr(hypctx, reg);
1291 if (regp == NULL)
1292 return (EINVAL);
1293
1294 *regp = val;
1295 return (0);
1296 }
1297
1298 int
vmmops_exception(void * vcpui,uint64_t esr,uint64_t far)1299 vmmops_exception(void *vcpui, uint64_t esr, uint64_t far)
1300 {
1301 struct hypctx *hypctx = vcpui;
1302 int running, hostcpu;
1303
1304 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1305 if (running && hostcpu != curcpu)
1306 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
1307 vcpu_vcpuid(hypctx->vcpu));
1308
1309 hypctx->far_el1 = far;
1310 hypctx->esr_el1 = esr;
1311 hypctx->has_exception = true;
1312
1313 return (0);
1314 }
1315
1316 int
vmmops_getcap(void * vcpui,int num,int * retval)1317 vmmops_getcap(void *vcpui, int num, int *retval)
1318 {
1319 struct hypctx *hypctx = vcpui;
1320 int ret;
1321
1322 ret = ENOENT;
1323
1324 switch (num) {
1325 case VM_CAP_UNRESTRICTED_GUEST:
1326 *retval = 1;
1327 ret = 0;
1328 break;
1329 case VM_CAP_BRK_EXIT:
1330 case VM_CAP_SS_EXIT:
1331 case VM_CAP_MASK_HWINTR:
1332 *retval = (hypctx->setcaps & (1ul << num)) != 0;
1333 break;
1334 default:
1335 break;
1336 }
1337
1338 return (ret);
1339 }
1340
1341 int
vmmops_setcap(void * vcpui,int num,int val)1342 vmmops_setcap(void *vcpui, int num, int val)
1343 {
1344 struct hypctx *hypctx = vcpui;
1345 int ret;
1346
1347 ret = 0;
1348
1349 switch (num) {
1350 case VM_CAP_BRK_EXIT:
1351 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1352 break;
1353 if (val != 0)
1354 hypctx->mdcr_el2 |= MDCR_EL2_TDE;
1355 else
1356 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
1357 break;
1358 case VM_CAP_SS_EXIT:
1359 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1360 break;
1361
1362 if (val != 0) {
1363 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS);
1364 hypctx->debug_mdscr |= hypctx->mdscr_el1 &
1365 (MDSCR_SS | MDSCR_KDE);
1366
1367 hypctx->tf.tf_spsr |= PSR_SS;
1368 hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE;
1369 hypctx->mdcr_el2 |= MDCR_EL2_TDE;
1370 } else {
1371 hypctx->tf.tf_spsr &= ~PSR_SS;
1372 hypctx->tf.tf_spsr |= hypctx->debug_spsr;
1373 hypctx->debug_spsr &= ~PSR_SS;
1374 hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE);
1375 hypctx->mdscr_el1 |= hypctx->debug_mdscr;
1376 hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE);
1377 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
1378 }
1379 break;
1380 case VM_CAP_MASK_HWINTR:
1381 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1382 break;
1383
1384 if (val != 0) {
1385 hypctx->debug_spsr |= (hypctx->tf.tf_spsr &
1386 (PSR_I | PSR_F));
1387 hypctx->tf.tf_spsr |= PSR_I | PSR_F;
1388 } else {
1389 hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F);
1390 hypctx->tf.tf_spsr |= (hypctx->debug_spsr &
1391 (PSR_I | PSR_F));
1392 hypctx->debug_spsr &= ~(PSR_I | PSR_F);
1393 }
1394 break;
1395 default:
1396 ret = ENOENT;
1397 break;
1398 }
1399
1400 if (ret == 0) {
1401 if (val == 0)
1402 hypctx->setcaps &= ~(1ul << num);
1403 else
1404 hypctx->setcaps |= (1ul << num);
1405 }
1406
1407 return (ret);
1408 }
1409