1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/smp.h> 33 #include <sys/kernel.h> 34 #include <sys/malloc.h> 35 #include <sys/mman.h> 36 #include <sys/pcpu.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/vmem.h> 42 43 #include <vm/vm.h> 44 #include <vm/pmap.h> 45 #include <vm/vm_extern.h> 46 #include <vm/vm_map.h> 47 #include <vm/vm_page.h> 48 #include <vm/vm_param.h> 49 50 #include <machine/armreg.h> 51 #include <machine/vm.h> 52 #include <machine/cpufunc.h> 53 #include <machine/cpu.h> 54 #include <machine/machdep.h> 55 #include <machine/vmm.h> 56 #include <machine/vmm_dev.h> 57 #include <machine/atomic.h> 58 #include <machine/hypervisor.h> 59 #include <machine/pmap.h> 60 61 #include "mmu.h" 62 #include "arm64.h" 63 #include "hyp.h" 64 #include "reset.h" 65 #include "io/vgic.h" 66 #include "io/vgic_v3.h" 67 #include "io/vtimer.h" 68 #include "vmm_stat.h" 69 70 #define HANDLED 1 71 #define UNHANDLED 0 72 73 /* Number of bits in an EL2 virtual address */ 74 #define EL2_VIRT_BITS 48 75 CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS); 76 77 /* TODO: Move the host hypctx off the stack */ 78 #define VMM_STACK_PAGES 4 79 #define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE) 80 81 static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits; 82 83 /* Register values passed to arm_setup_vectors to set in the hypervisor */ 84 struct vmm_init_regs { 85 uint64_t tcr_el2; 86 uint64_t vtcr_el2; 87 }; 88 89 MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP"); 90 91 extern char hyp_init_vectors[]; 92 extern char hyp_vectors[]; 93 extern char hyp_stub_vectors[]; 94 95 static vm_paddr_t hyp_code_base; 96 static size_t hyp_code_len; 97 98 static char *stack[MAXCPU]; 99 static vm_offset_t stack_hyp_va[MAXCPU]; 100 101 static vmem_t *el2_mem_alloc; 102 103 static void arm_setup_vectors(void *arg); 104 static void vmm_pmap_clean_stage2_tlbi(void); 105 static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool); 106 static void vmm_pmap_invalidate_all(uint64_t); 107 108 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); 109 110 static inline void 111 arm64_set_active_vcpu(struct hypctx *hypctx) 112 { 113 DPCPU_SET(vcpu, hypctx); 114 } 115 116 struct hypctx * 117 arm64_get_active_vcpu(void) 118 { 119 return (DPCPU_GET(vcpu)); 120 } 121 122 static void 123 arm_setup_vectors(void *arg) 124 { 125 struct vmm_init_regs *el2_regs; 126 uintptr_t stack_top; 127 uint32_t sctlr_el2; 128 register_t daif; 129 130 el2_regs = arg; 131 arm64_set_active_vcpu(NULL); 132 133 daif = intr_disable(); 134 135 /* 136 * Install the temporary vectors which will be responsible for 137 * initializing the VMM when we next trap into EL2. 138 * 139 * x0: the exception vector table responsible for hypervisor 140 * initialization on the next call. 141 */ 142 vmm_call_hyp(vtophys(&vmm_hyp_code)); 143 144 /* Create and map the hypervisor stack */ 145 stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE; 146 147 /* 148 * Configure the system control register for EL2: 149 * 150 * SCTLR_EL2_M: MMU on 151 * SCTLR_EL2_C: Data cacheability not affected 152 * SCTLR_EL2_I: Instruction cacheability not affected 153 * SCTLR_EL2_A: Instruction alignment check 154 * SCTLR_EL2_SA: Stack pointer alignment check 155 * SCTLR_EL2_WXN: Treat writable memory as execute never 156 * ~SCTLR_EL2_EE: Data accesses are little-endian 157 */ 158 sctlr_el2 = SCTLR_EL2_RES1; 159 sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I; 160 sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA; 161 sctlr_el2 |= SCTLR_EL2_WXN; 162 sctlr_el2 &= ~SCTLR_EL2_EE; 163 164 /* Special call to initialize EL2 */ 165 vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2, 166 sctlr_el2, el2_regs->vtcr_el2); 167 168 intr_restore(daif); 169 } 170 171 static void 172 arm_teardown_vectors(void *arg) 173 { 174 register_t daif; 175 176 /* 177 * vmm_cleanup() will disable the MMU. For the next few instructions, 178 * before the hardware disables the MMU, one of the following is 179 * possible: 180 * 181 * a. The instruction addresses are fetched with the MMU disabled, 182 * and they must represent the actual physical addresses. This will work 183 * because we call the vmm_cleanup() function by its physical address. 184 * 185 * b. The instruction addresses are fetched using the old translation 186 * tables. This will work because we have an identity mapping in place 187 * in the translation tables and vmm_cleanup() is called by its physical 188 * address. 189 */ 190 daif = intr_disable(); 191 /* TODO: Invalidate the cache */ 192 vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors)); 193 intr_restore(daif); 194 195 arm64_set_active_vcpu(NULL); 196 } 197 198 static uint64_t 199 vmm_vtcr_el2_sl(u_int levels) 200 { 201 #if PAGE_SIZE == PAGE_SIZE_4K 202 switch (levels) { 203 case 2: 204 return (VTCR_EL2_SL0_4K_LVL2); 205 case 3: 206 return (VTCR_EL2_SL0_4K_LVL1); 207 case 4: 208 return (VTCR_EL2_SL0_4K_LVL0); 209 default: 210 panic("%s: Invalid number of page table levels %u", __func__, 211 levels); 212 } 213 #elif PAGE_SIZE == PAGE_SIZE_16K 214 switch (levels) { 215 case 2: 216 return (VTCR_EL2_SL0_16K_LVL2); 217 case 3: 218 return (VTCR_EL2_SL0_16K_LVL1); 219 case 4: 220 return (VTCR_EL2_SL0_16K_LVL0); 221 default: 222 panic("%s: Invalid number of page table levels %u", __func__, 223 levels); 224 } 225 #else 226 #error Unsupported page size 227 #endif 228 } 229 230 int 231 vmmops_modinit(int ipinum) 232 { 233 struct vmm_init_regs el2_regs; 234 vm_offset_t next_hyp_va; 235 vm_paddr_t vmm_base; 236 uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field; 237 uint64_t cnthctl_el2; 238 register_t daif; 239 int cpu, i; 240 bool rv __diagused; 241 242 if (!virt_enabled()) { 243 printf( 244 "vmm: Processor doesn't have support for virtualization\n"); 245 return (ENXIO); 246 } 247 248 /* TODO: Support VHE */ 249 if (in_vhe()) { 250 printf("vmm: VHE is unsupported\n"); 251 return (ENXIO); 252 } 253 254 if (!vgic_present()) { 255 printf("vmm: No vgic found\n"); 256 return (ENODEV); 257 } 258 259 if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) { 260 printf("vmm: Unable to read ID_AA64MMFR0_EL1\n"); 261 return (ENXIO); 262 } 263 pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1); 264 /* 265 * Use 3 levels to give us up to 39 bits with 4k pages, or 266 * 47 bits with 16k pages. 267 */ 268 /* TODO: Check the number of levels for 64k pages */ 269 vmm_pmap_levels = 3; 270 switch (pa_range_field) { 271 case ID_AA64MMFR0_PARange_4G: 272 printf("vmm: Not enough physical address bits\n"); 273 return (ENXIO); 274 case ID_AA64MMFR0_PARange_64G: 275 vmm_virt_bits = 36; 276 #if PAGE_SIZE == PAGE_SIZE_16K 277 vmm_pmap_levels = 2; 278 #endif 279 break; 280 default: 281 vmm_virt_bits = 39; 282 break; 283 } 284 pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT; 285 286 /* Initialise the EL2 MMU */ 287 if (!vmmpmap_init()) { 288 printf("vmm: Failed to init the EL2 MMU\n"); 289 return (ENOMEM); 290 } 291 292 /* Set up the stage 2 pmap callbacks */ 293 MPASS(pmap_clean_stage2_tlbi == NULL); 294 pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi; 295 pmap_stage2_invalidate_range = vmm_pmap_invalidate_range; 296 pmap_stage2_invalidate_all = vmm_pmap_invalidate_all; 297 298 /* 299 * Create an allocator for the virtual address space used by EL2. 300 * EL2 code is identity-mapped; the allocator is used to find space for 301 * VM structures. 302 */ 303 el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK); 304 305 /* Create the mappings for the hypervisor translation table. */ 306 hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code); 307 308 /* We need an physical identity mapping for when we activate the MMU */ 309 hyp_code_base = vmm_base = vtophys(&vmm_hyp_code); 310 rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base, 311 VM_PROT_READ | VM_PROT_EXECUTE); 312 MPASS(rv); 313 314 next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE); 315 316 /* Create a per-CPU hypervisor stack */ 317 CPU_FOREACH(cpu) { 318 stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO); 319 stack_hyp_va[cpu] = next_hyp_va; 320 321 for (i = 0; i < VMM_STACK_PAGES; i++) { 322 rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i), 323 PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)), 324 VM_PROT_READ | VM_PROT_WRITE); 325 MPASS(rv); 326 } 327 next_hyp_va += L2_SIZE; 328 } 329 330 el2_regs.tcr_el2 = TCR_EL2_RES1; 331 el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT, 332 TCR_EL2_PS_52BITS); 333 el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS); 334 el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA; 335 #if PAGE_SIZE == PAGE_SIZE_4K 336 el2_regs.tcr_el2 |= TCR_EL2_TG0_4K; 337 #elif PAGE_SIZE == PAGE_SIZE_16K 338 el2_regs.tcr_el2 |= TCR_EL2_TG0_16K; 339 #else 340 #error Unsupported page size 341 #endif 342 #ifdef SMP 343 el2_regs.tcr_el2 |= TCR_EL2_SH0_IS; 344 #endif 345 346 switch (el2_regs.tcr_el2 & TCR_EL2_PS_MASK) { 347 case TCR_EL2_PS_32BITS: 348 vmm_max_ipa_bits = 32; 349 break; 350 case TCR_EL2_PS_36BITS: 351 vmm_max_ipa_bits = 36; 352 break; 353 case TCR_EL2_PS_40BITS: 354 vmm_max_ipa_bits = 40; 355 break; 356 case TCR_EL2_PS_42BITS: 357 vmm_max_ipa_bits = 42; 358 break; 359 case TCR_EL2_PS_44BITS: 360 vmm_max_ipa_bits = 44; 361 break; 362 case TCR_EL2_PS_48BITS: 363 vmm_max_ipa_bits = 48; 364 break; 365 case TCR_EL2_PS_52BITS: 366 default: 367 vmm_max_ipa_bits = 52; 368 break; 369 } 370 371 /* 372 * Configure the Stage 2 translation control register: 373 * 374 * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable 375 * normal memory 376 * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable 377 * normal memory 378 * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel 379 * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables 380 * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner 381 * shareable 382 */ 383 el2_regs.vtcr_el2 = VTCR_EL2_RES1; 384 el2_regs.vtcr_el2 |= 385 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT); 386 el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA; 387 el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits); 388 el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels); 389 #if PAGE_SIZE == PAGE_SIZE_4K 390 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K; 391 #elif PAGE_SIZE == PAGE_SIZE_16K 392 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K; 393 #else 394 #error Unsupported page size 395 #endif 396 #ifdef SMP 397 el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS; 398 #endif 399 400 smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs); 401 402 /* Add memory to the vmem allocator (checking there is space) */ 403 if (vmm_base > (L2_SIZE + PAGE_SIZE)) { 404 /* 405 * Ensure there is an L2 block before the vmm code to check 406 * for buffer overflows on earlier data. Include the PAGE_SIZE 407 * of the minimum we can allocate. 408 */ 409 vmm_base -= L2_SIZE + PAGE_SIZE; 410 vmm_base = rounddown2(vmm_base, L2_SIZE); 411 412 /* 413 * Check there is memory before the vmm code to add. 414 * 415 * Reserve the L2 block at address 0 so NULL dereference will 416 * raise an exception. 417 */ 418 if (vmm_base > L2_SIZE) 419 vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE, 420 M_WAITOK); 421 } 422 423 /* 424 * Add the memory after the stacks. There is most of an L2 block 425 * between the last stack and the first allocation so this should 426 * be safe without adding more padding. 427 */ 428 if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE) 429 vmem_add(el2_mem_alloc, next_hyp_va, 430 HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK); 431 432 daif = intr_disable(); 433 cnthctl_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_CNTHCTL); 434 intr_restore(daif); 435 436 vgic_init(); 437 vtimer_init(cnthctl_el2); 438 439 return (0); 440 } 441 442 int 443 vmmops_modcleanup(void) 444 { 445 int cpu; 446 447 smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL); 448 449 CPU_FOREACH(cpu) { 450 vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE, 451 false); 452 } 453 454 vmmpmap_remove(hyp_code_base, hyp_code_len, false); 455 456 vtimer_cleanup(); 457 458 vmmpmap_fini(); 459 460 CPU_FOREACH(cpu) 461 free(stack[cpu], M_HYP); 462 463 pmap_clean_stage2_tlbi = NULL; 464 pmap_stage2_invalidate_range = NULL; 465 pmap_stage2_invalidate_all = NULL; 466 467 return (0); 468 } 469 470 static vm_size_t 471 el2_hyp_size(struct vm *vm) 472 { 473 return (round_page(sizeof(struct hyp) + 474 sizeof(struct hypctx *) * vm_get_maxcpus(vm))); 475 } 476 477 static vm_size_t 478 el2_hypctx_size(void) 479 { 480 return (round_page(sizeof(struct hypctx))); 481 } 482 483 static vm_offset_t 484 el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot) 485 { 486 vmem_addr_t addr; 487 int err __diagused; 488 bool rv __diagused; 489 490 err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr); 491 MPASS(err == 0); 492 rv = vmmpmap_enter(addr, size, vtophys(data), prot); 493 MPASS(rv); 494 495 return (addr); 496 } 497 498 void * 499 vmmops_init(struct vm *vm, pmap_t pmap) 500 { 501 struct hyp *hyp; 502 vm_size_t size; 503 504 size = el2_hyp_size(vm); 505 hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); 506 507 hyp->vm = vm; 508 hyp->vgic_attached = false; 509 510 vtimer_vminit(hyp); 511 vgic_vminit(hyp); 512 513 hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size, 514 VM_PROT_READ | VM_PROT_WRITE); 515 516 return (hyp); 517 } 518 519 void * 520 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) 521 { 522 struct hyp *hyp = vmi; 523 struct hypctx *hypctx; 524 vm_size_t size; 525 526 size = el2_hypctx_size(); 527 hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); 528 529 KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), 530 ("%s: Invalid vcpuid %d", __func__, vcpuid)); 531 hyp->ctx[vcpuid] = hypctx; 532 533 hypctx->hyp = hyp; 534 hypctx->vcpu = vcpu1; 535 536 reset_vm_el01_regs(hypctx); 537 reset_vm_el2_regs(hypctx); 538 539 vtimer_cpuinit(hypctx); 540 vgic_cpuinit(hypctx); 541 542 hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size, 543 VM_PROT_READ | VM_PROT_WRITE); 544 545 return (hypctx); 546 } 547 548 static int 549 arm_vmm_pinit(pmap_t pmap) 550 { 551 552 pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels); 553 return (1); 554 } 555 556 struct vmspace * 557 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) 558 { 559 return (vmspace_alloc(min, max, arm_vmm_pinit)); 560 } 561 562 void 563 vmmops_vmspace_free(struct vmspace *vmspace) 564 { 565 566 pmap_remove_pages(vmspace_pmap(vmspace)); 567 vmspace_free(vmspace); 568 } 569 570 static void 571 vmm_pmap_clean_stage2_tlbi(void) 572 { 573 vmm_call_hyp(HYP_CLEAN_S2_TLBI); 574 } 575 576 static void 577 vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva, 578 bool final_only) 579 { 580 MPASS(eva > sva); 581 vmm_call_hyp(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only); 582 } 583 584 static void 585 vmm_pmap_invalidate_all(uint64_t vttbr) 586 { 587 vmm_call_hyp(HYP_S2_TLBI_ALL, vttbr); 588 } 589 590 static inline void 591 arm64_print_hyp_regs(struct vm_exit *vme) 592 { 593 printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2); 594 printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2); 595 printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2); 596 printf("elr_el2: 0x%016lx\n", vme->pc); 597 } 598 599 static void 600 arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss, 601 struct vm_exit *vme_ret) 602 { 603 struct vm_guest_paging *paging; 604 struct vie *vie; 605 uint32_t esr_sas, reg_num; 606 607 /* 608 * Get the page address from HPFAR_EL2. 609 */ 610 vme_ret->u.inst_emul.gpa = 611 HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); 612 /* Bits [11:0] are the same as bits [11:0] from the virtual address. */ 613 vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 & 614 FAR_EL2_HPFAR_PAGE_MASK; 615 616 esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT; 617 reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT; 618 619 vie = &vme_ret->u.inst_emul.vie; 620 vie->access_size = 1 << esr_sas; 621 vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0; 622 vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ; 623 vie->reg = reg_num; 624 625 paging = &vme_ret->u.inst_emul.paging; 626 paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); 627 paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); 628 paging->tcr_el1 = hypctx->tcr_el1; 629 paging->tcr2_el1 = hypctx->tcr2_el1; 630 paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); 631 if ((hypctx->sctlr_el1 & SCTLR_M) != 0) 632 paging->flags |= VM_GP_MMU_ENABLED; 633 } 634 635 static void 636 arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret) 637 { 638 uint32_t reg_num; 639 struct vre *vre; 640 641 /* u.hyp member will be replaced by u.reg_emul */ 642 vre = &vme_ret->u.reg_emul.vre; 643 644 vre->inst_syndrome = esr_iss; 645 /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */ 646 vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE; 647 reg_num = ISS_MSR_Rt(esr_iss); 648 vre->reg = reg_num; 649 } 650 651 void 652 raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc) 653 { 654 uint64_t esr; 655 656 if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t) 657 esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT; 658 else 659 esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT; 660 /* Set the bit that changes from insn -> data abort */ 661 if (dabort) 662 esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT; 663 /* Set the IL bit if set by hardware */ 664 esr |= hypctx->tf.tf_esr & ESR_ELx_IL; 665 666 vmmops_exception(hypctx, esr | fsc, far); 667 } 668 669 static int 670 handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret, 671 pmap_t pmap) 672 { 673 uint64_t gpa; 674 uint32_t esr_ec, esr_iss; 675 676 esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr); 677 esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK; 678 679 switch (esr_ec) { 680 case EXCP_UNKNOWN: 681 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1); 682 arm64_print_hyp_regs(vme_ret); 683 vme_ret->exitcode = VM_EXITCODE_HYP; 684 break; 685 case EXCP_TRAP_WFI_WFE: 686 if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */ 687 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1); 688 vme_ret->exitcode = VM_EXITCODE_WFI; 689 } else { 690 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1); 691 vme_ret->exitcode = VM_EXITCODE_HYP; 692 } 693 break; 694 case EXCP_HVC: 695 vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1); 696 vme_ret->exitcode = VM_EXITCODE_HVC; 697 break; 698 case EXCP_MSR: 699 vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1); 700 arm64_gen_reg_emul_data(esr_iss, vme_ret); 701 vme_ret->exitcode = VM_EXITCODE_REG_EMUL; 702 break; 703 case EXCP_BRK: 704 vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1); 705 vme_ret->exitcode = VM_EXITCODE_BRK; 706 break; 707 case EXCP_SOFTSTP_EL0: 708 vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1); 709 vme_ret->exitcode = VM_EXITCODE_SS; 710 break; 711 case EXCP_INSN_ABORT_L: 712 case EXCP_DATA_ABORT_L: 713 vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ? 714 VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1); 715 switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) { 716 case ISS_DATA_DFSC_TF_L0: 717 case ISS_DATA_DFSC_TF_L1: 718 case ISS_DATA_DFSC_TF_L2: 719 case ISS_DATA_DFSC_TF_L3: 720 case ISS_DATA_DFSC_AFF_L1: 721 case ISS_DATA_DFSC_AFF_L2: 722 case ISS_DATA_DFSC_AFF_L3: 723 case ISS_DATA_DFSC_PF_L1: 724 case ISS_DATA_DFSC_PF_L2: 725 case ISS_DATA_DFSC_PF_L3: 726 gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); 727 /* Check the IPA is valid */ 728 if (gpa >= (1ul << vmm_max_ipa_bits)) { 729 raise_data_insn_abort(hypctx, 730 hypctx->exit_info.far_el2, 731 esr_ec == EXCP_DATA_ABORT_L, 732 ISS_DATA_DFSC_ASF_L0); 733 vme_ret->inst_length = 0; 734 return (HANDLED); 735 } 736 737 if (vm_mem_allocated(hypctx->vcpu, gpa)) { 738 vme_ret->exitcode = VM_EXITCODE_PAGING; 739 vme_ret->inst_length = 0; 740 vme_ret->u.paging.esr = hypctx->tf.tf_esr; 741 vme_ret->u.paging.gpa = gpa; 742 } else if (esr_ec == EXCP_INSN_ABORT_L) { 743 /* 744 * Raise an external abort. Device memory is 745 * not executable 746 */ 747 raise_data_insn_abort(hypctx, 748 hypctx->exit_info.far_el2, false, 749 ISS_DATA_DFSC_EXT); 750 vme_ret->inst_length = 0; 751 return (HANDLED); 752 } else { 753 arm64_gen_inst_emul_data(hypctx, esr_iss, 754 vme_ret); 755 vme_ret->exitcode = VM_EXITCODE_INST_EMUL; 756 } 757 break; 758 default: 759 arm64_print_hyp_regs(vme_ret); 760 vme_ret->exitcode = VM_EXITCODE_HYP; 761 break; 762 } 763 764 break; 765 766 default: 767 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1); 768 arm64_print_hyp_regs(vme_ret); 769 vme_ret->exitcode = VM_EXITCODE_HYP; 770 break; 771 } 772 773 /* We don't don't do any instruction emulation here */ 774 return (UNHANDLED); 775 } 776 777 static int 778 arm64_handle_world_switch(struct hypctx *hypctx, int excp_type, 779 struct vm_exit *vme, pmap_t pmap) 780 { 781 int handled; 782 783 switch (excp_type) { 784 case EXCP_TYPE_EL1_SYNC: 785 /* The exit code will be set by handle_el1_sync_excp(). */ 786 handled = handle_el1_sync_excp(hypctx, vme, pmap); 787 break; 788 789 case EXCP_TYPE_EL1_IRQ: 790 case EXCP_TYPE_EL1_FIQ: 791 /* The host kernel will handle IRQs and FIQs. */ 792 vmm_stat_incr(hypctx->vcpu, 793 excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1); 794 vme->exitcode = VM_EXITCODE_BOGUS; 795 handled = UNHANDLED; 796 break; 797 798 case EXCP_TYPE_EL1_ERROR: 799 case EXCP_TYPE_EL2_SYNC: 800 case EXCP_TYPE_EL2_IRQ: 801 case EXCP_TYPE_EL2_FIQ: 802 case EXCP_TYPE_EL2_ERROR: 803 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1); 804 vme->exitcode = VM_EXITCODE_BOGUS; 805 handled = UNHANDLED; 806 break; 807 808 default: 809 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); 810 vme->exitcode = VM_EXITCODE_BOGUS; 811 handled = UNHANDLED; 812 break; 813 } 814 815 return (handled); 816 } 817 818 static void 819 ptp_release(void **cookie) 820 { 821 if (*cookie != NULL) { 822 vm_gpa_release(*cookie); 823 *cookie = NULL; 824 } 825 } 826 827 static void * 828 ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) 829 { 830 void *ptr; 831 832 ptp_release(cookie); 833 ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie); 834 return (ptr); 835 } 836 837 /* log2 of the number of bytes in a page table entry */ 838 #define PTE_SHIFT 3 839 int 840 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, 841 int prot, uint64_t *gpa, int *is_fault) 842 { 843 struct hypctx *hypctx; 844 void *cookie; 845 uint64_t mask, *ptep, pte, pte_addr; 846 int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz; 847 bool is_el0; 848 849 /* Check if the MMU is off */ 850 if ((paging->flags & VM_GP_MMU_ENABLED) == 0) { 851 *is_fault = 0; 852 *gpa = gla; 853 return (0); 854 } 855 856 is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t; 857 858 if (ADDR_IS_KERNEL(gla)) { 859 /* If address translation is disabled raise an exception */ 860 if ((paging->tcr_el1 & TCR_EPD1) != 0) { 861 *is_fault = 1; 862 return (0); 863 } 864 if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) { 865 *is_fault = 1; 866 return (0); 867 } 868 pte_addr = paging->ttbr1_addr; 869 tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT; 870 /* Clear the top byte if TBI is on */ 871 if ((paging->tcr_el1 & TCR_TBI1) != 0) 872 gla |= (0xfful << 56); 873 switch (paging->tcr_el1 & TCR_TG1_MASK) { 874 case TCR_TG1_4K: 875 granule_shift = PAGE_SHIFT_4K; 876 break; 877 case TCR_TG1_16K: 878 granule_shift = PAGE_SHIFT_16K; 879 break; 880 case TCR_TG1_64K: 881 granule_shift = PAGE_SHIFT_64K; 882 break; 883 default: 884 *is_fault = 1; 885 return (EINVAL); 886 } 887 } else { 888 /* If address translation is disabled raise an exception */ 889 if ((paging->tcr_el1 & TCR_EPD0) != 0) { 890 *is_fault = 1; 891 return (0); 892 } 893 if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) { 894 *is_fault = 1; 895 return (0); 896 } 897 pte_addr = paging->ttbr0_addr; 898 tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT; 899 /* Clear the top byte if TBI is on */ 900 if ((paging->tcr_el1 & TCR_TBI0) != 0) 901 gla &= ~(0xfful << 56); 902 switch (paging->tcr_el1 & TCR_TG0_MASK) { 903 case TCR_TG0_4K: 904 granule_shift = PAGE_SHIFT_4K; 905 break; 906 case TCR_TG0_16K: 907 granule_shift = PAGE_SHIFT_16K; 908 break; 909 case TCR_TG0_64K: 910 granule_shift = PAGE_SHIFT_64K; 911 break; 912 default: 913 *is_fault = 1; 914 return (EINVAL); 915 } 916 } 917 918 /* 919 * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2 920 * for larger values. 921 */ 922 switch (granule_shift) { 923 case PAGE_SHIFT_4K: 924 case PAGE_SHIFT_16K: 925 /* 926 * See "Table D8-11 4KB granule, determining stage 1 initial 927 * lookup level" and "Table D8-21 16KB granule, determining 928 * stage 1 initial lookup level" from the "Arm Architecture 929 * Reference Manual for A-Profile architecture" revision I.a 930 * for the minimum and maximum values. 931 * 932 * TODO: Support less than 16 when FEAT_LPA2 is implemented 933 * and TCR_EL1.DS == 1 934 * TODO: Support more than 39 when FEAT_TTST is implemented 935 */ 936 if (tsz < 16 || tsz > 39) { 937 *is_fault = 1; 938 return (EINVAL); 939 } 940 break; 941 case PAGE_SHIFT_64K: 942 /* TODO: Support 64k granule. It will probably work, but is untested */ 943 default: 944 *is_fault = 1; 945 return (EINVAL); 946 } 947 948 /* 949 * Calculate the input address bits. These are 64 bit in an address 950 * with the top tsz bits being all 0 or all 1. 951 */ 952 ia_bits = 64 - tsz; 953 954 /* 955 * Calculate the number of address bits used in the page table 956 * calculation. This is ia_bits minus the bottom granule_shift 957 * bits that are passed to the output address. 958 */ 959 address_bits = ia_bits - granule_shift; 960 961 /* 962 * Calculate the number of levels. Each level uses 963 * granule_shift - PTE_SHIFT bits of the input address. 964 * This is because the table is 1 << granule_shift and each 965 * entry is 1 << PTE_SHIFT bytes. 966 */ 967 levels = howmany(address_bits, granule_shift - PTE_SHIFT); 968 969 /* Mask of the upper unused bits in the virtual address */ 970 gla &= (1ul << ia_bits) - 1; 971 hypctx = (struct hypctx *)vcpui; 972 cookie = NULL; 973 /* TODO: Check if the level supports block descriptors */ 974 for (;levels > 0; levels--) { 975 int idx; 976 977 pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) + 978 granule_shift; 979 idx = (gla >> pte_shift) & 980 ((1ul << (granule_shift - PTE_SHIFT)) - 1); 981 while (idx > PAGE_SIZE / sizeof(pte)) { 982 idx -= PAGE_SIZE / sizeof(pte); 983 pte_addr += PAGE_SIZE; 984 } 985 986 ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie); 987 if (ptep == NULL) 988 goto error; 989 pte = ptep[idx]; 990 991 /* Calculate the level we are looking at */ 992 switch (levels) { 993 default: 994 goto fault; 995 /* TODO: Level -1 when FEAT_LPA2 is implemented */ 996 case 4: /* Level 0 */ 997 if ((pte & ATTR_DESCR_MASK) != L0_TABLE) 998 goto fault; 999 /* FALLTHROUGH */ 1000 case 3: /* Level 1 */ 1001 case 2: /* Level 2 */ 1002 switch (pte & ATTR_DESCR_MASK) { 1003 /* Use L1 macro as all levels are the same */ 1004 case L1_TABLE: 1005 /* Check if EL0 can access this address space */ 1006 if (is_el0 && 1007 (pte & TATTR_AP_TABLE_NO_EL0) != 0) 1008 goto fault; 1009 /* Check if the address space is writable */ 1010 if ((prot & PROT_WRITE) != 0 && 1011 (pte & TATTR_AP_TABLE_RO) != 0) 1012 goto fault; 1013 if ((prot & PROT_EXEC) != 0) { 1014 /* Check the table exec attribute */ 1015 if ((is_el0 && 1016 (pte & TATTR_UXN_TABLE) != 0) || 1017 (!is_el0 && 1018 (pte & TATTR_PXN_TABLE) != 0)) 1019 goto fault; 1020 } 1021 pte_addr = pte & ~ATTR_MASK; 1022 break; 1023 case L1_BLOCK: 1024 goto done; 1025 default: 1026 goto fault; 1027 } 1028 break; 1029 case 1: /* Level 3 */ 1030 if ((pte & ATTR_DESCR_MASK) == L3_PAGE) 1031 goto done; 1032 goto fault; 1033 } 1034 } 1035 1036 done: 1037 /* Check if EL0 has access to the block/page */ 1038 if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0) 1039 goto fault; 1040 if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0) 1041 goto fault; 1042 if ((prot & PROT_EXEC) != 0) { 1043 if ((is_el0 && (pte & ATTR_S1_UXN) != 0) || 1044 (!is_el0 && (pte & ATTR_S1_PXN) != 0)) 1045 goto fault; 1046 } 1047 mask = (1ul << pte_shift) - 1; 1048 *gpa = (pte & ~ATTR_MASK) | (gla & mask); 1049 *is_fault = 0; 1050 ptp_release(&cookie); 1051 return (0); 1052 1053 error: 1054 ptp_release(&cookie); 1055 return (EFAULT); 1056 fault: 1057 *is_fault = 1; 1058 ptp_release(&cookie); 1059 return (0); 1060 } 1061 1062 int 1063 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) 1064 { 1065 uint64_t excp_type; 1066 int handled; 1067 register_t daif; 1068 struct hyp *hyp; 1069 struct hypctx *hypctx; 1070 struct vcpu *vcpu; 1071 struct vm_exit *vme; 1072 int mode; 1073 1074 hypctx = (struct hypctx *)vcpui; 1075 hyp = hypctx->hyp; 1076 vcpu = hypctx->vcpu; 1077 vme = vm_exitinfo(vcpu); 1078 1079 hypctx->tf.tf_elr = (uint64_t)pc; 1080 1081 for (;;) { 1082 if (hypctx->has_exception) { 1083 hypctx->has_exception = false; 1084 hypctx->elr_el1 = hypctx->tf.tf_elr; 1085 1086 mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); 1087 1088 if (mode == PSR_M_EL1t) { 1089 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0; 1090 } else if (mode == PSR_M_EL1h) { 1091 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200; 1092 } else if ((mode & PSR_M_32) == PSR_M_64) { 1093 /* 64-bit EL0 */ 1094 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400; 1095 } else { 1096 /* 32-bit EL0 */ 1097 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600; 1098 } 1099 1100 /* Set the new spsr */ 1101 hypctx->spsr_el1 = hypctx->tf.tf_spsr; 1102 1103 /* Set the new cpsr */ 1104 hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS; 1105 hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h; 1106 1107 /* 1108 * Update fields that may change on exeption entry 1109 * based on how sctlr_el1 is configured. 1110 */ 1111 if ((hypctx->sctlr_el1 & SCTLR_SPAN) != 0) 1112 hypctx->tf.tf_spsr |= PSR_PAN; 1113 if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0) 1114 hypctx->tf.tf_spsr &= ~PSR_SSBS; 1115 else 1116 hypctx->tf.tf_spsr |= PSR_SSBS; 1117 } 1118 1119 daif = intr_disable(); 1120 1121 /* Check if the vcpu is suspended */ 1122 if (vcpu_suspended(evinfo)) { 1123 intr_restore(daif); 1124 vm_exit_suspended(vcpu, pc); 1125 break; 1126 } 1127 1128 if (vcpu_debugged(vcpu)) { 1129 intr_restore(daif); 1130 vm_exit_debug(vcpu, pc); 1131 break; 1132 } 1133 1134 /* Activate the stage2 pmap so the vmid is valid */ 1135 pmap_activate_vm(pmap); 1136 hyp->vttbr_el2 = pmap_to_ttbr0(pmap); 1137 1138 /* 1139 * TODO: What happens if a timer interrupt is asserted exactly 1140 * here, but for the previous VM? 1141 */ 1142 arm64_set_active_vcpu(hypctx); 1143 vgic_flush_hwstate(hypctx); 1144 1145 /* Call into EL2 to switch to the guest */ 1146 excp_type = vmm_call_hyp(HYP_ENTER_GUEST, 1147 hyp->el2_addr, hypctx->el2_addr); 1148 1149 vgic_sync_hwstate(hypctx); 1150 vtimer_sync_hwstate(hypctx); 1151 1152 /* 1153 * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi 1154 * depends on this meaning we activate the VM before entering 1155 * the vm again 1156 */ 1157 PCPU_SET(curvmpmap, NULL); 1158 intr_restore(daif); 1159 1160 vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); 1161 if (excp_type == EXCP_TYPE_MAINT_IRQ) 1162 continue; 1163 1164 vme->pc = hypctx->tf.tf_elr; 1165 vme->inst_length = INSN_SIZE; 1166 vme->u.hyp.exception_nr = excp_type; 1167 vme->u.hyp.esr_el2 = hypctx->tf.tf_esr; 1168 vme->u.hyp.far_el2 = hypctx->exit_info.far_el2; 1169 vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2; 1170 1171 handled = arm64_handle_world_switch(hypctx, excp_type, vme, 1172 pmap); 1173 if (handled == UNHANDLED) 1174 /* Exit loop to emulate instruction. */ 1175 break; 1176 else 1177 /* Resume guest execution from the next instruction. */ 1178 hypctx->tf.tf_elr += vme->inst_length; 1179 } 1180 1181 return (0); 1182 } 1183 1184 static void 1185 arm_pcpu_vmcleanup(void *arg) 1186 { 1187 struct hyp *hyp; 1188 int i, maxcpus; 1189 1190 hyp = arg; 1191 maxcpus = vm_get_maxcpus(hyp->vm); 1192 for (i = 0; i < maxcpus; i++) { 1193 if (arm64_get_active_vcpu() == hyp->ctx[i]) { 1194 arm64_set_active_vcpu(NULL); 1195 break; 1196 } 1197 } 1198 } 1199 1200 void 1201 vmmops_vcpu_cleanup(void *vcpui) 1202 { 1203 struct hypctx *hypctx = vcpui; 1204 1205 vtimer_cpucleanup(hypctx); 1206 vgic_cpucleanup(hypctx); 1207 1208 vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true); 1209 1210 free(hypctx, M_HYP); 1211 } 1212 1213 void 1214 vmmops_cleanup(void *vmi) 1215 { 1216 struct hyp *hyp = vmi; 1217 1218 vtimer_vmcleanup(hyp); 1219 vgic_vmcleanup(hyp); 1220 1221 smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp); 1222 1223 vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true); 1224 1225 free(hyp, M_HYP); 1226 } 1227 1228 /* 1229 * Return register value. Registers have different sizes and an explicit cast 1230 * must be made to ensure proper conversion. 1231 */ 1232 static uint64_t * 1233 hypctx_regptr(struct hypctx *hypctx, int reg) 1234 { 1235 switch (reg) { 1236 case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29: 1237 return (&hypctx->tf.tf_x[reg]); 1238 case VM_REG_GUEST_LR: 1239 return (&hypctx->tf.tf_lr); 1240 case VM_REG_GUEST_SP: 1241 return (&hypctx->tf.tf_sp); 1242 case VM_REG_GUEST_CPSR: 1243 return (&hypctx->tf.tf_spsr); 1244 case VM_REG_GUEST_PC: 1245 return (&hypctx->tf.tf_elr); 1246 case VM_REG_GUEST_SCTLR_EL1: 1247 return (&hypctx->sctlr_el1); 1248 case VM_REG_GUEST_TTBR0_EL1: 1249 return (&hypctx->ttbr0_el1); 1250 case VM_REG_GUEST_TTBR1_EL1: 1251 return (&hypctx->ttbr1_el1); 1252 case VM_REG_GUEST_TCR_EL1: 1253 return (&hypctx->tcr_el1); 1254 case VM_REG_GUEST_TCR2_EL1: 1255 return (&hypctx->tcr2_el1); 1256 default: 1257 break; 1258 } 1259 return (NULL); 1260 } 1261 1262 int 1263 vmmops_getreg(void *vcpui, int reg, uint64_t *retval) 1264 { 1265 uint64_t *regp; 1266 int running, hostcpu; 1267 struct hypctx *hypctx = vcpui; 1268 1269 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1270 if (running && hostcpu != curcpu) 1271 panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm), 1272 vcpu_vcpuid(hypctx->vcpu)); 1273 1274 regp = hypctx_regptr(hypctx, reg); 1275 if (regp == NULL) 1276 return (EINVAL); 1277 1278 *retval = *regp; 1279 return (0); 1280 } 1281 1282 int 1283 vmmops_setreg(void *vcpui, int reg, uint64_t val) 1284 { 1285 uint64_t *regp; 1286 struct hypctx *hypctx = vcpui; 1287 int running, hostcpu; 1288 1289 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1290 if (running && hostcpu != curcpu) 1291 panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm), 1292 vcpu_vcpuid(hypctx->vcpu)); 1293 1294 regp = hypctx_regptr(hypctx, reg); 1295 if (regp == NULL) 1296 return (EINVAL); 1297 1298 *regp = val; 1299 return (0); 1300 } 1301 1302 int 1303 vmmops_exception(void *vcpui, uint64_t esr, uint64_t far) 1304 { 1305 struct hypctx *hypctx = vcpui; 1306 int running, hostcpu; 1307 1308 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1309 if (running && hostcpu != curcpu) 1310 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), 1311 vcpu_vcpuid(hypctx->vcpu)); 1312 1313 hypctx->far_el1 = far; 1314 hypctx->esr_el1 = esr; 1315 hypctx->has_exception = true; 1316 1317 return (0); 1318 } 1319 1320 int 1321 vmmops_getcap(void *vcpui, int num, int *retval) 1322 { 1323 struct hypctx *hypctx = vcpui; 1324 int ret; 1325 1326 ret = ENOENT; 1327 1328 switch (num) { 1329 case VM_CAP_UNRESTRICTED_GUEST: 1330 *retval = 1; 1331 ret = 0; 1332 break; 1333 case VM_CAP_BRK_EXIT: 1334 case VM_CAP_SS_EXIT: 1335 case VM_CAP_MASK_HWINTR: 1336 *retval = (hypctx->setcaps & (1ul << num)) != 0; 1337 break; 1338 default: 1339 break; 1340 } 1341 1342 return (ret); 1343 } 1344 1345 int 1346 vmmops_setcap(void *vcpui, int num, int val) 1347 { 1348 struct hypctx *hypctx = vcpui; 1349 int ret; 1350 1351 ret = 0; 1352 1353 switch (num) { 1354 case VM_CAP_BRK_EXIT: 1355 if ((val != 0) == (hypctx->setcaps & (1ul << num)) != 0) 1356 break; 1357 if (val != 0) 1358 hypctx->mdcr_el2 |= MDCR_EL2_TDE; 1359 else 1360 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; 1361 break; 1362 case VM_CAP_SS_EXIT: 1363 if ((val != 0) == (hypctx->setcaps & (1ul << num)) != 0) 1364 break; 1365 1366 if (val != 0) { 1367 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS); 1368 hypctx->debug_mdscr |= hypctx->mdscr_el1 & 1369 (MDSCR_SS | MDSCR_KDE); 1370 1371 hypctx->tf.tf_spsr |= PSR_SS; 1372 hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE; 1373 hypctx->mdcr_el2 |= MDCR_EL2_TDE; 1374 } else { 1375 hypctx->tf.tf_spsr &= ~PSR_SS; 1376 hypctx->tf.tf_spsr |= hypctx->debug_spsr; 1377 hypctx->debug_spsr &= ~PSR_SS; 1378 hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE); 1379 hypctx->mdscr_el1 |= hypctx->debug_mdscr; 1380 hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE); 1381 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; 1382 } 1383 break; 1384 case VM_CAP_MASK_HWINTR: 1385 if ((val != 0) == (hypctx->setcaps & (1ul << num)) != 0) 1386 break; 1387 1388 if (val != 0) { 1389 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & 1390 (PSR_I | PSR_F)); 1391 hypctx->tf.tf_spsr |= PSR_I | PSR_F; 1392 } else { 1393 hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F); 1394 hypctx->tf.tf_spsr |= (hypctx->debug_spsr & 1395 (PSR_I | PSR_F)); 1396 hypctx->debug_spsr &= ~(PSR_I | PSR_F); 1397 } 1398 break; 1399 default: 1400 ret = ENOENT; 1401 break; 1402 } 1403 1404 if (ret == 0) { 1405 if (val == 0) 1406 hypctx->setcaps &= ~(1ul << num); 1407 else 1408 hypctx->setcaps |= (1ul << num); 1409 } 1410 1411 return (ret); 1412 } 1413