1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/smp.h> 33 #include <sys/kernel.h> 34 #include <sys/malloc.h> 35 #include <sys/mman.h> 36 #include <sys/pcpu.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/vmem.h> 42 43 #include <vm/vm.h> 44 #include <vm/pmap.h> 45 #include <vm/vm_extern.h> 46 #include <vm/vm_map.h> 47 #include <vm/vm_page.h> 48 #include <vm/vm_param.h> 49 50 #include <machine/armreg.h> 51 #include <machine/vm.h> 52 #include <machine/cpufunc.h> 53 #include <machine/cpu.h> 54 #include <machine/machdep.h> 55 #include <machine/vmm.h> 56 #include <machine/vmm_dev.h> 57 #include <machine/atomic.h> 58 #include <machine/hypervisor.h> 59 #include <machine/pmap.h> 60 61 #include <dev/vmm/vmm_mem.h> 62 63 #include "mmu.h" 64 #include "arm64.h" 65 #include "hyp.h" 66 #include "reset.h" 67 #include "io/vgic.h" 68 #include "io/vgic_v3.h" 69 #include "io/vtimer.h" 70 #include "vmm_handlers.h" 71 #include "vmm_stat.h" 72 73 #define HANDLED 1 74 #define UNHANDLED 0 75 76 /* Number of bits in an EL2 virtual address */ 77 #define EL2_VIRT_BITS 48 78 CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS); 79 80 /* TODO: Move the host hypctx off the stack */ 81 #define VMM_STACK_PAGES 4 82 #define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE) 83 84 static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits; 85 86 /* Register values passed to arm_setup_vectors to set in the hypervisor */ 87 struct vmm_init_regs { 88 uint64_t tcr_el2; 89 uint64_t vtcr_el2; 90 }; 91 92 MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP"); 93 94 extern char hyp_init_vectors[]; 95 extern char hyp_vectors[]; 96 extern char hyp_stub_vectors[]; 97 98 static vm_paddr_t hyp_code_base; 99 static size_t hyp_code_len; 100 101 static char *stack[MAXCPU]; 102 static vm_offset_t stack_hyp_va[MAXCPU]; 103 104 static vmem_t *el2_mem_alloc; 105 106 static void arm_setup_vectors(void *arg); 107 108 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); 109 110 static inline void 111 arm64_set_active_vcpu(struct hypctx *hypctx) 112 { 113 DPCPU_SET(vcpu, hypctx); 114 } 115 116 struct hypctx * 117 arm64_get_active_vcpu(void) 118 { 119 return (DPCPU_GET(vcpu)); 120 } 121 122 static void 123 arm_setup_vectors(void *arg) 124 { 125 struct vmm_init_regs *el2_regs; 126 uintptr_t stack_top; 127 uint32_t sctlr_el2; 128 register_t daif; 129 130 el2_regs = arg; 131 arm64_set_active_vcpu(NULL); 132 133 /* 134 * Configure the system control register for EL2: 135 * 136 * SCTLR_EL2_M: MMU on 137 * SCTLR_EL2_C: Data cacheability not affected 138 * SCTLR_EL2_I: Instruction cacheability not affected 139 * SCTLR_EL2_A: Instruction alignment check 140 * SCTLR_EL2_SA: Stack pointer alignment check 141 * SCTLR_EL2_WXN: Treat writable memory as execute never 142 * ~SCTLR_EL2_EE: Data accesses are little-endian 143 */ 144 sctlr_el2 = SCTLR_EL2_RES1; 145 sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I; 146 sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA; 147 sctlr_el2 |= SCTLR_EL2_WXN; 148 sctlr_el2 &= ~SCTLR_EL2_EE; 149 150 daif = intr_disable(); 151 152 if (in_vhe()) { 153 WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2); 154 } else { 155 /* 156 * Install the temporary vectors which will be responsible for 157 * initializing the VMM when we next trap into EL2. 158 * 159 * x0: the exception vector table responsible for hypervisor 160 * initialization on the next call. 161 */ 162 vmm_call_hyp(vtophys(&vmm_hyp_code)); 163 164 /* Create and map the hypervisor stack */ 165 stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE; 166 167 /* Special call to initialize EL2 */ 168 vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2, 169 sctlr_el2, el2_regs->vtcr_el2); 170 } 171 172 intr_restore(daif); 173 } 174 175 static void 176 arm_teardown_vectors(void *arg) 177 { 178 register_t daif; 179 180 /* 181 * vmm_cleanup() will disable the MMU. For the next few instructions, 182 * before the hardware disables the MMU, one of the following is 183 * possible: 184 * 185 * a. The instruction addresses are fetched with the MMU disabled, 186 * and they must represent the actual physical addresses. This will work 187 * because we call the vmm_cleanup() function by its physical address. 188 * 189 * b. The instruction addresses are fetched using the old translation 190 * tables. This will work because we have an identity mapping in place 191 * in the translation tables and vmm_cleanup() is called by its physical 192 * address. 193 */ 194 daif = intr_disable(); 195 /* TODO: Invalidate the cache */ 196 vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors)); 197 intr_restore(daif); 198 199 arm64_set_active_vcpu(NULL); 200 } 201 202 static uint64_t 203 vmm_vtcr_el2_sl(u_int levels) 204 { 205 #if PAGE_SIZE == PAGE_SIZE_4K 206 switch (levels) { 207 case 2: 208 return (VTCR_EL2_SL0_4K_LVL2); 209 case 3: 210 return (VTCR_EL2_SL0_4K_LVL1); 211 case 4: 212 return (VTCR_EL2_SL0_4K_LVL0); 213 default: 214 panic("%s: Invalid number of page table levels %u", __func__, 215 levels); 216 } 217 #elif PAGE_SIZE == PAGE_SIZE_16K 218 switch (levels) { 219 case 2: 220 return (VTCR_EL2_SL0_16K_LVL2); 221 case 3: 222 return (VTCR_EL2_SL0_16K_LVL1); 223 case 4: 224 return (VTCR_EL2_SL0_16K_LVL0); 225 default: 226 panic("%s: Invalid number of page table levels %u", __func__, 227 levels); 228 } 229 #else 230 #error Unsupported page size 231 #endif 232 } 233 234 int 235 vmmops_modinit(int ipinum) 236 { 237 struct vmm_init_regs el2_regs; 238 vm_offset_t next_hyp_va; 239 vm_paddr_t vmm_base; 240 uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field; 241 int cpu, i; 242 bool rv __diagused; 243 244 if (!has_hyp()) { 245 printf( 246 "vmm: Processor doesn't have support for virtualization\n"); 247 return (ENXIO); 248 } 249 250 if (!vgic_present()) { 251 printf("vmm: No vgic found\n"); 252 return (ENODEV); 253 } 254 255 if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) { 256 printf("vmm: Unable to read ID_AA64MMFR0_EL1\n"); 257 return (ENXIO); 258 } 259 pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1); 260 /* 261 * Use 3 levels to give us up to 39 bits with 4k pages, or 262 * 47 bits with 16k pages. 263 */ 264 /* TODO: Check the number of levels for 64k pages */ 265 vmm_pmap_levels = 3; 266 switch (pa_range_field) { 267 case ID_AA64MMFR0_PARange_4G: 268 printf("vmm: Not enough physical address bits\n"); 269 return (ENXIO); 270 case ID_AA64MMFR0_PARange_64G: 271 vmm_virt_bits = 36; 272 #if PAGE_SIZE == PAGE_SIZE_16K 273 vmm_pmap_levels = 2; 274 #endif 275 break; 276 default: 277 vmm_virt_bits = 39; 278 break; 279 } 280 pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT; 281 282 if (!in_vhe()) { 283 /* Initialise the EL2 MMU */ 284 if (!vmmpmap_init()) { 285 printf("vmm: Failed to init the EL2 MMU\n"); 286 return (ENOMEM); 287 } 288 } 289 290 /* Set up the stage 2 pmap callbacks */ 291 MPASS(pmap_clean_stage2_tlbi == NULL); 292 pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi; 293 pmap_stage2_invalidate_range = vmm_s2_tlbi_range; 294 pmap_stage2_invalidate_all = vmm_s2_tlbi_all; 295 296 if (!in_vhe()) { 297 /* 298 * Create an allocator for the virtual address space used by 299 * EL2. EL2 code is identity-mapped; the allocator is used to 300 * find space for VM structures. 301 */ 302 el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, 303 M_WAITOK); 304 305 /* Create the mappings for the hypervisor translation table. */ 306 hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code); 307 308 /* We need an physical identity mapping for when we activate the MMU */ 309 hyp_code_base = vmm_base = vtophys(&vmm_hyp_code); 310 rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base, 311 VM_PROT_READ | VM_PROT_EXECUTE); 312 MPASS(rv); 313 314 next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE); 315 316 /* Create a per-CPU hypervisor stack */ 317 CPU_FOREACH(cpu) { 318 stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO); 319 stack_hyp_va[cpu] = next_hyp_va; 320 321 for (i = 0; i < VMM_STACK_PAGES; i++) { 322 rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i), 323 PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)), 324 VM_PROT_READ | VM_PROT_WRITE); 325 MPASS(rv); 326 } 327 next_hyp_va += L2_SIZE; 328 } 329 330 el2_regs.tcr_el2 = TCR_EL2_RES1; 331 el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT, 332 TCR_EL2_PS_52BITS); 333 el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS); 334 el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA; 335 #if PAGE_SIZE == PAGE_SIZE_4K 336 el2_regs.tcr_el2 |= TCR_EL2_TG0_4K; 337 #elif PAGE_SIZE == PAGE_SIZE_16K 338 el2_regs.tcr_el2 |= TCR_EL2_TG0_16K; 339 #else 340 #error Unsupported page size 341 #endif 342 #ifdef SMP 343 el2_regs.tcr_el2 |= TCR_EL2_SH0_IS; 344 #endif 345 } 346 347 switch (pa_range_bits << TCR_EL2_PS_SHIFT) { 348 case TCR_EL2_PS_32BITS: 349 vmm_max_ipa_bits = 32; 350 break; 351 case TCR_EL2_PS_36BITS: 352 vmm_max_ipa_bits = 36; 353 break; 354 case TCR_EL2_PS_40BITS: 355 vmm_max_ipa_bits = 40; 356 break; 357 case TCR_EL2_PS_42BITS: 358 vmm_max_ipa_bits = 42; 359 break; 360 case TCR_EL2_PS_44BITS: 361 vmm_max_ipa_bits = 44; 362 break; 363 case TCR_EL2_PS_48BITS: 364 vmm_max_ipa_bits = 48; 365 break; 366 case TCR_EL2_PS_52BITS: 367 default: 368 vmm_max_ipa_bits = 52; 369 break; 370 } 371 372 /* 373 * Configure the Stage 2 translation control register: 374 * 375 * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable 376 * normal memory 377 * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable 378 * normal memory 379 * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel 380 * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables 381 * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner 382 * shareable 383 */ 384 el2_regs.vtcr_el2 = VTCR_EL2_RES1; 385 el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA; 386 el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits); 387 el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels); 388 #if PAGE_SIZE == PAGE_SIZE_4K 389 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K; 390 #elif PAGE_SIZE == PAGE_SIZE_16K 391 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K; 392 #else 393 #error Unsupported page size 394 #endif 395 #ifdef SMP 396 el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS; 397 #endif 398 /* 399 * If FEAT_LPA2 is enabled in the host then we need to enable it here 400 * so the page tables created by pmap.c are correct. The meaning of 401 * the shareability field changes to become address bits when this 402 * is set. 403 */ 404 if ((READ_SPECIALREG(tcr_el1) & TCR_DS) != 0) { 405 el2_regs.vtcr_el2 |= VTCR_EL2_DS; 406 el2_regs.vtcr_el2 |= 407 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_52BIT); 408 } else { 409 el2_regs.vtcr_el2 |= 410 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT); 411 } 412 413 smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs); 414 415 if (!in_vhe()) { 416 /* Add memory to the vmem allocator (checking there is space) */ 417 if (vmm_base > (L2_SIZE + PAGE_SIZE)) { 418 /* 419 * Ensure there is an L2 block before the vmm code to check 420 * for buffer overflows on earlier data. Include the PAGE_SIZE 421 * of the minimum we can allocate. 422 */ 423 vmm_base -= L2_SIZE + PAGE_SIZE; 424 vmm_base = rounddown2(vmm_base, L2_SIZE); 425 426 /* 427 * Check there is memory before the vmm code to add. 428 * 429 * Reserve the L2 block at address 0 so NULL dereference will 430 * raise an exception. 431 */ 432 if (vmm_base > L2_SIZE) 433 vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE, 434 M_WAITOK); 435 } 436 437 /* 438 * Add the memory after the stacks. There is most of an L2 block 439 * between the last stack and the first allocation so this should 440 * be safe without adding more padding. 441 */ 442 if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE) 443 vmem_add(el2_mem_alloc, next_hyp_va, 444 HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK); 445 } 446 447 vgic_init(); 448 vtimer_init(); 449 450 return (0); 451 } 452 453 int 454 vmmops_modcleanup(void) 455 { 456 int cpu; 457 458 if (!in_vhe()) { 459 smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL); 460 461 CPU_FOREACH(cpu) { 462 vmmpmap_remove(stack_hyp_va[cpu], 463 VMM_STACK_PAGES * PAGE_SIZE, false); 464 } 465 466 vmmpmap_remove(hyp_code_base, hyp_code_len, false); 467 } 468 469 vtimer_cleanup(); 470 471 if (!in_vhe()) { 472 vmmpmap_fini(); 473 474 CPU_FOREACH(cpu) 475 free(stack[cpu], M_HYP); 476 } 477 478 pmap_clean_stage2_tlbi = NULL; 479 pmap_stage2_invalidate_range = NULL; 480 pmap_stage2_invalidate_all = NULL; 481 482 return (0); 483 } 484 485 static vm_size_t 486 el2_hyp_size(struct vm *vm) 487 { 488 return (round_page(sizeof(struct hyp) + 489 sizeof(struct hypctx *) * vm_get_maxcpus(vm))); 490 } 491 492 static vm_size_t 493 el2_hypctx_size(void) 494 { 495 return (round_page(sizeof(struct hypctx))); 496 } 497 498 static vm_offset_t 499 el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot) 500 { 501 vmem_addr_t addr; 502 int err __diagused; 503 bool rv __diagused; 504 505 err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr); 506 MPASS(err == 0); 507 rv = vmmpmap_enter(addr, size, vtophys(data), prot); 508 MPASS(rv); 509 510 return (addr); 511 } 512 513 void * 514 vmmops_init(struct vm *vm, pmap_t pmap) 515 { 516 struct hyp *hyp; 517 vm_size_t size; 518 uint64_t idreg; 519 520 size = el2_hyp_size(vm); 521 hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); 522 523 hyp->vm = vm; 524 hyp->vgic_attached = false; 525 526 if (get_kernel_reg(ID_AA64MMFR0_EL1, &idreg)) { 527 if (ID_AA64MMFR0_ECV_VAL(idreg) >= ID_AA64MMFR0_ECV_POFF) 528 hyp->feats |= HYP_FEAT_ECV_POFF; 529 } 530 531 if (get_kernel_reg(ID_AA64MMFR1_EL1, &idreg)) { 532 if (ID_AA64MMFR1_HCX_VAL(idreg) >= ID_AA64MMFR1_HCX_IMPL) 533 hyp->feats |= HYP_FEAT_HCX; 534 } 535 536 vtimer_vminit(hyp); 537 vgic_vminit(hyp); 538 539 if (!in_vhe()) 540 hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size, 541 VM_PROT_READ | VM_PROT_WRITE); 542 543 return (hyp); 544 } 545 546 void * 547 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) 548 { 549 struct hyp *hyp = vmi; 550 struct hypctx *hypctx; 551 vm_size_t size; 552 553 size = el2_hypctx_size(); 554 hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); 555 556 KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), 557 ("%s: Invalid vcpuid %d", __func__, vcpuid)); 558 hyp->ctx[vcpuid] = hypctx; 559 560 hypctx->hyp = hyp; 561 hypctx->vcpu = vcpu1; 562 563 reset_vm_el01_regs(hypctx); 564 reset_vm_el2_regs(hypctx); 565 566 vtimer_cpuinit(hypctx); 567 vgic_cpuinit(hypctx); 568 569 if (!in_vhe()) 570 hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size, 571 VM_PROT_READ | VM_PROT_WRITE); 572 573 return (hypctx); 574 } 575 576 static int 577 arm_vmm_pinit(pmap_t pmap) 578 { 579 580 pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels); 581 return (1); 582 } 583 584 struct vmspace * 585 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) 586 { 587 return (vmspace_alloc(min, max, arm_vmm_pinit)); 588 } 589 590 void 591 vmmops_vmspace_free(struct vmspace *vmspace) 592 { 593 594 pmap_remove_pages(vmspace_pmap(vmspace)); 595 vmspace_free(vmspace); 596 } 597 598 static inline void 599 arm64_print_hyp_regs(struct vm_exit *vme) 600 { 601 printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2); 602 printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2); 603 printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2); 604 printf("elr_el2: 0x%016lx\n", vme->pc); 605 } 606 607 static void 608 arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss, 609 struct vm_exit *vme_ret) 610 { 611 struct vm_guest_paging *paging; 612 struct vie *vie; 613 uint32_t esr_sas, reg_num; 614 615 /* 616 * Get the page address from HPFAR_EL2. 617 */ 618 vme_ret->u.inst_emul.gpa = 619 HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); 620 /* Bits [11:0] are the same as bits [11:0] from the virtual address. */ 621 vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 & 622 FAR_EL2_HPFAR_PAGE_MASK; 623 624 esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT; 625 reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT; 626 627 vie = &vme_ret->u.inst_emul.vie; 628 vie->access_size = 1 << esr_sas; 629 vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0; 630 vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ; 631 vie->reg = reg_num; 632 633 paging = &vme_ret->u.inst_emul.paging; 634 paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); 635 paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); 636 paging->tcr_el1 = hypctx->tcr_el1; 637 paging->tcr2_el1 = hypctx->tcr2_el1; 638 paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); 639 if ((hypctx->sctlr_el1 & SCTLR_M) != 0) 640 paging->flags |= VM_GP_MMU_ENABLED; 641 } 642 643 static void 644 arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret) 645 { 646 uint32_t reg_num; 647 struct vre *vre; 648 649 /* u.hyp member will be replaced by u.reg_emul */ 650 vre = &vme_ret->u.reg_emul.vre; 651 652 vre->inst_syndrome = esr_iss; 653 /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */ 654 vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE; 655 reg_num = ISS_MSR_Rt(esr_iss); 656 vre->reg = reg_num; 657 } 658 659 void 660 raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc) 661 { 662 uint64_t esr; 663 664 if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t) 665 esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT; 666 else 667 esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT; 668 /* Set the bit that changes from insn -> data abort */ 669 if (dabort) 670 esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT; 671 /* Set the IL bit if set by hardware */ 672 esr |= hypctx->tf.tf_esr & ESR_ELx_IL; 673 674 vmmops_exception(hypctx, esr | fsc, far); 675 } 676 677 static int 678 handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret, 679 pmap_t pmap) 680 { 681 uint64_t gpa; 682 uint32_t esr_ec, esr_iss; 683 684 esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr); 685 esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK; 686 687 switch (esr_ec) { 688 case EXCP_UNKNOWN: 689 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1); 690 arm64_print_hyp_regs(vme_ret); 691 vme_ret->exitcode = VM_EXITCODE_HYP; 692 break; 693 case EXCP_TRAP_WFI_WFE: 694 if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */ 695 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1); 696 vme_ret->exitcode = VM_EXITCODE_WFI; 697 } else { 698 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1); 699 vme_ret->exitcode = VM_EXITCODE_HYP; 700 } 701 break; 702 case EXCP_HVC: 703 vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1); 704 vme_ret->exitcode = VM_EXITCODE_HVC; 705 break; 706 case EXCP_MSR: 707 vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1); 708 arm64_gen_reg_emul_data(esr_iss, vme_ret); 709 vme_ret->exitcode = VM_EXITCODE_REG_EMUL; 710 break; 711 case EXCP_BRK: 712 vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1); 713 vme_ret->exitcode = VM_EXITCODE_BRK; 714 break; 715 case EXCP_SOFTSTP_EL0: 716 vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1); 717 vme_ret->exitcode = VM_EXITCODE_SS; 718 break; 719 case EXCP_INSN_ABORT_L: 720 case EXCP_DATA_ABORT_L: 721 vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ? 722 VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1); 723 switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) { 724 case ISS_DATA_DFSC_TF_L0: 725 case ISS_DATA_DFSC_TF_L1: 726 case ISS_DATA_DFSC_TF_L2: 727 case ISS_DATA_DFSC_TF_L3: 728 case ISS_DATA_DFSC_AFF_L1: 729 case ISS_DATA_DFSC_AFF_L2: 730 case ISS_DATA_DFSC_AFF_L3: 731 case ISS_DATA_DFSC_PF_L1: 732 case ISS_DATA_DFSC_PF_L2: 733 case ISS_DATA_DFSC_PF_L3: 734 gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); 735 /* Check the IPA is valid */ 736 if (gpa >= (1ul << vmm_max_ipa_bits)) { 737 raise_data_insn_abort(hypctx, 738 hypctx->exit_info.far_el2, 739 esr_ec == EXCP_DATA_ABORT_L, 740 ISS_DATA_DFSC_ASF_L0); 741 vme_ret->inst_length = 0; 742 return (HANDLED); 743 } 744 745 if (vm_mem_allocated(hypctx->vcpu, gpa)) { 746 vme_ret->exitcode = VM_EXITCODE_PAGING; 747 vme_ret->inst_length = 0; 748 vme_ret->u.paging.esr = hypctx->tf.tf_esr; 749 vme_ret->u.paging.gpa = gpa; 750 } else if (esr_ec == EXCP_INSN_ABORT_L) { 751 /* 752 * Raise an external abort. Device memory is 753 * not executable 754 */ 755 raise_data_insn_abort(hypctx, 756 hypctx->exit_info.far_el2, false, 757 ISS_DATA_DFSC_EXT); 758 vme_ret->inst_length = 0; 759 return (HANDLED); 760 } else { 761 arm64_gen_inst_emul_data(hypctx, esr_iss, 762 vme_ret); 763 vme_ret->exitcode = VM_EXITCODE_INST_EMUL; 764 } 765 break; 766 default: 767 arm64_print_hyp_regs(vme_ret); 768 vme_ret->exitcode = VM_EXITCODE_HYP; 769 break; 770 } 771 772 break; 773 774 default: 775 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1); 776 arm64_print_hyp_regs(vme_ret); 777 vme_ret->exitcode = VM_EXITCODE_HYP; 778 break; 779 } 780 781 /* We don't don't do any instruction emulation here */ 782 return (UNHANDLED); 783 } 784 785 static int 786 arm64_handle_world_switch(struct hypctx *hypctx, int excp_type, 787 struct vm_exit *vme, pmap_t pmap) 788 { 789 int handled; 790 791 switch (excp_type) { 792 case EXCP_TYPE_EL1_SYNC: 793 /* The exit code will be set by handle_el1_sync_excp(). */ 794 handled = handle_el1_sync_excp(hypctx, vme, pmap); 795 break; 796 797 case EXCP_TYPE_EL1_IRQ: 798 case EXCP_TYPE_EL1_FIQ: 799 /* The host kernel will handle IRQs and FIQs. */ 800 vmm_stat_incr(hypctx->vcpu, 801 excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1); 802 vme->exitcode = VM_EXITCODE_BOGUS; 803 handled = UNHANDLED; 804 break; 805 806 case EXCP_TYPE_EL1_ERROR: 807 case EXCP_TYPE_EL2_SYNC: 808 case EXCP_TYPE_EL2_IRQ: 809 case EXCP_TYPE_EL2_FIQ: 810 case EXCP_TYPE_EL2_ERROR: 811 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1); 812 vme->exitcode = VM_EXITCODE_BOGUS; 813 handled = UNHANDLED; 814 break; 815 816 default: 817 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); 818 vme->exitcode = VM_EXITCODE_BOGUS; 819 handled = UNHANDLED; 820 break; 821 } 822 823 return (handled); 824 } 825 826 static void 827 ptp_release(void **cookie) 828 { 829 if (*cookie != NULL) { 830 vm_gpa_release(*cookie); 831 *cookie = NULL; 832 } 833 } 834 835 static void * 836 ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) 837 { 838 void *ptr; 839 840 ptp_release(cookie); 841 ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie); 842 return (ptr); 843 } 844 845 /* log2 of the number of bytes in a page table entry */ 846 #define PTE_SHIFT 3 847 int 848 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, 849 int prot, uint64_t *gpa, int *is_fault) 850 { 851 struct hypctx *hypctx; 852 void *cookie; 853 uint64_t mask, *ptep, pte, pte_addr; 854 int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz; 855 bool is_el0; 856 857 /* Check if the MMU is off */ 858 if ((paging->flags & VM_GP_MMU_ENABLED) == 0) { 859 *is_fault = 0; 860 *gpa = gla; 861 return (0); 862 } 863 864 is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t; 865 866 if (ADDR_IS_KERNEL(gla)) { 867 /* If address translation is disabled raise an exception */ 868 if ((paging->tcr_el1 & TCR_EPD1) != 0) { 869 *is_fault = 1; 870 return (0); 871 } 872 if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) { 873 *is_fault = 1; 874 return (0); 875 } 876 pte_addr = paging->ttbr1_addr; 877 tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT; 878 /* Clear the top byte if TBI is on */ 879 if ((paging->tcr_el1 & TCR_TBI1) != 0) 880 gla |= (0xfful << 56); 881 switch (paging->tcr_el1 & TCR_TG1_MASK) { 882 case TCR_TG1_4K: 883 granule_shift = PAGE_SHIFT_4K; 884 break; 885 case TCR_TG1_16K: 886 granule_shift = PAGE_SHIFT_16K; 887 break; 888 case TCR_TG1_64K: 889 granule_shift = PAGE_SHIFT_64K; 890 break; 891 default: 892 *is_fault = 1; 893 return (EINVAL); 894 } 895 } else { 896 /* If address translation is disabled raise an exception */ 897 if ((paging->tcr_el1 & TCR_EPD0) != 0) { 898 *is_fault = 1; 899 return (0); 900 } 901 if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) { 902 *is_fault = 1; 903 return (0); 904 } 905 pte_addr = paging->ttbr0_addr; 906 tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT; 907 /* Clear the top byte if TBI is on */ 908 if ((paging->tcr_el1 & TCR_TBI0) != 0) 909 gla &= ~(0xfful << 56); 910 switch (paging->tcr_el1 & TCR_TG0_MASK) { 911 case TCR_TG0_4K: 912 granule_shift = PAGE_SHIFT_4K; 913 break; 914 case TCR_TG0_16K: 915 granule_shift = PAGE_SHIFT_16K; 916 break; 917 case TCR_TG0_64K: 918 granule_shift = PAGE_SHIFT_64K; 919 break; 920 default: 921 *is_fault = 1; 922 return (EINVAL); 923 } 924 } 925 926 /* 927 * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2 928 * for larger values. 929 */ 930 switch (granule_shift) { 931 case PAGE_SHIFT_4K: 932 case PAGE_SHIFT_16K: 933 /* 934 * See "Table D8-11 4KB granule, determining stage 1 initial 935 * lookup level" and "Table D8-21 16KB granule, determining 936 * stage 1 initial lookup level" from the "Arm Architecture 937 * Reference Manual for A-Profile architecture" revision I.a 938 * for the minimum and maximum values. 939 * 940 * TODO: Support less than 16 when FEAT_LPA2 is implemented 941 * and TCR_EL1.DS == 1 942 * TODO: Support more than 39 when FEAT_TTST is implemented 943 */ 944 if (tsz < 16 || tsz > 39) { 945 *is_fault = 1; 946 return (EINVAL); 947 } 948 break; 949 case PAGE_SHIFT_64K: 950 /* TODO: Support 64k granule. It will probably work, but is untested */ 951 default: 952 *is_fault = 1; 953 return (EINVAL); 954 } 955 956 /* 957 * Calculate the input address bits. These are 64 bit in an address 958 * with the top tsz bits being all 0 or all 1. 959 */ 960 ia_bits = 64 - tsz; 961 962 /* 963 * Calculate the number of address bits used in the page table 964 * calculation. This is ia_bits minus the bottom granule_shift 965 * bits that are passed to the output address. 966 */ 967 address_bits = ia_bits - granule_shift; 968 969 /* 970 * Calculate the number of levels. Each level uses 971 * granule_shift - PTE_SHIFT bits of the input address. 972 * This is because the table is 1 << granule_shift and each 973 * entry is 1 << PTE_SHIFT bytes. 974 */ 975 levels = howmany(address_bits, granule_shift - PTE_SHIFT); 976 977 /* Mask of the upper unused bits in the virtual address */ 978 gla &= (1ul << ia_bits) - 1; 979 hypctx = (struct hypctx *)vcpui; 980 cookie = NULL; 981 /* TODO: Check if the level supports block descriptors */ 982 for (;levels > 0; levels--) { 983 int idx; 984 985 pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) + 986 granule_shift; 987 idx = (gla >> pte_shift) & 988 ((1ul << (granule_shift - PTE_SHIFT)) - 1); 989 while (idx > PAGE_SIZE / sizeof(pte)) { 990 idx -= PAGE_SIZE / sizeof(pte); 991 pte_addr += PAGE_SIZE; 992 } 993 994 ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie); 995 if (ptep == NULL) 996 goto error; 997 pte = ptep[idx]; 998 999 /* Calculate the level we are looking at */ 1000 switch (levels) { 1001 default: 1002 goto fault; 1003 /* TODO: Level -1 when FEAT_LPA2 is implemented */ 1004 case 4: /* Level 0 */ 1005 if ((pte & ATTR_DESCR_MASK) != L0_TABLE) 1006 goto fault; 1007 /* FALLTHROUGH */ 1008 case 3: /* Level 1 */ 1009 case 2: /* Level 2 */ 1010 switch (pte & ATTR_DESCR_MASK) { 1011 /* Use L1 macro as all levels are the same */ 1012 case L1_TABLE: 1013 /* Check if EL0 can access this address space */ 1014 if (is_el0 && 1015 (pte & TATTR_AP_TABLE_NO_EL0) != 0) 1016 goto fault; 1017 /* Check if the address space is writable */ 1018 if ((prot & PROT_WRITE) != 0 && 1019 (pte & TATTR_AP_TABLE_RO) != 0) 1020 goto fault; 1021 if ((prot & PROT_EXEC) != 0) { 1022 /* Check the table exec attribute */ 1023 if ((is_el0 && 1024 (pte & TATTR_UXN_TABLE) != 0) || 1025 (!is_el0 && 1026 (pte & TATTR_PXN_TABLE) != 0)) 1027 goto fault; 1028 } 1029 pte_addr = pte & ~ATTR_MASK; 1030 break; 1031 case L1_BLOCK: 1032 goto done; 1033 default: 1034 goto fault; 1035 } 1036 break; 1037 case 1: /* Level 3 */ 1038 if ((pte & ATTR_DESCR_MASK) == L3_PAGE) 1039 goto done; 1040 goto fault; 1041 } 1042 } 1043 1044 done: 1045 /* Check if EL0 has access to the block/page */ 1046 if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0) 1047 goto fault; 1048 if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0) 1049 goto fault; 1050 if ((prot & PROT_EXEC) != 0) { 1051 if ((is_el0 && (pte & ATTR_S1_UXN) != 0) || 1052 (!is_el0 && (pte & ATTR_S1_PXN) != 0)) 1053 goto fault; 1054 } 1055 mask = (1ul << pte_shift) - 1; 1056 *gpa = (pte & ~ATTR_MASK) | (gla & mask); 1057 *is_fault = 0; 1058 ptp_release(&cookie); 1059 return (0); 1060 1061 error: 1062 ptp_release(&cookie); 1063 return (EFAULT); 1064 fault: 1065 *is_fault = 1; 1066 ptp_release(&cookie); 1067 return (0); 1068 } 1069 1070 int 1071 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) 1072 { 1073 uint64_t excp_type; 1074 int handled; 1075 register_t daif; 1076 struct hyp *hyp; 1077 struct hypctx *hypctx; 1078 struct vcpu *vcpu; 1079 struct vm_exit *vme; 1080 int mode; 1081 1082 hypctx = (struct hypctx *)vcpui; 1083 hyp = hypctx->hyp; 1084 vcpu = hypctx->vcpu; 1085 vme = vm_exitinfo(vcpu); 1086 1087 hypctx->tf.tf_elr = (uint64_t)pc; 1088 1089 for (;;) { 1090 if (hypctx->has_exception) { 1091 hypctx->has_exception = false; 1092 hypctx->elr_el1 = hypctx->tf.tf_elr; 1093 1094 mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); 1095 1096 if (mode == PSR_M_EL1t) { 1097 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0; 1098 } else if (mode == PSR_M_EL1h) { 1099 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200; 1100 } else if ((mode & PSR_M_32) == PSR_M_64) { 1101 /* 64-bit EL0 */ 1102 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400; 1103 } else { 1104 /* 32-bit EL0 */ 1105 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600; 1106 } 1107 1108 /* Set the new spsr */ 1109 hypctx->spsr_el1 = hypctx->tf.tf_spsr; 1110 1111 /* Set the new cpsr */ 1112 hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS; 1113 hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h; 1114 1115 /* 1116 * Update fields that may change on exeption entry 1117 * based on how sctlr_el1 is configured. 1118 */ 1119 if ((hypctx->sctlr_el1 & SCTLR_SPAN) == 0) 1120 hypctx->tf.tf_spsr |= PSR_PAN; 1121 if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0) 1122 hypctx->tf.tf_spsr &= ~PSR_SSBS; 1123 else 1124 hypctx->tf.tf_spsr |= PSR_SSBS; 1125 } 1126 1127 daif = intr_disable(); 1128 1129 /* Check if the vcpu is suspended */ 1130 if (vcpu_suspended(evinfo)) { 1131 intr_restore(daif); 1132 vm_exit_suspended(vcpu, pc); 1133 break; 1134 } 1135 1136 if (vcpu_debugged(vcpu)) { 1137 intr_restore(daif); 1138 vm_exit_debug(vcpu, pc); 1139 break; 1140 } 1141 1142 /* Activate the stage2 pmap so the vmid is valid */ 1143 pmap_activate_vm(pmap); 1144 hyp->vttbr_el2 = pmap_to_ttbr0(pmap); 1145 1146 /* 1147 * TODO: What happens if a timer interrupt is asserted exactly 1148 * here, but for the previous VM? 1149 */ 1150 arm64_set_active_vcpu(hypctx); 1151 vgic_flush_hwstate(hypctx); 1152 1153 /* Call into EL2 to switch to the guest */ 1154 excp_type = vmm_enter_guest(hyp, hypctx); 1155 1156 vgic_sync_hwstate(hypctx); 1157 vtimer_sync_hwstate(hypctx); 1158 1159 /* 1160 * Deactivate the stage2 pmap. 1161 */ 1162 PCPU_SET(curvmpmap, NULL); 1163 intr_restore(daif); 1164 1165 vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); 1166 if (excp_type == EXCP_TYPE_MAINT_IRQ) 1167 continue; 1168 1169 vme->pc = hypctx->tf.tf_elr; 1170 vme->inst_length = INSN_SIZE; 1171 vme->u.hyp.exception_nr = excp_type; 1172 vme->u.hyp.esr_el2 = hypctx->tf.tf_esr; 1173 vme->u.hyp.far_el2 = hypctx->exit_info.far_el2; 1174 vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2; 1175 1176 handled = arm64_handle_world_switch(hypctx, excp_type, vme, 1177 pmap); 1178 if (handled == UNHANDLED) 1179 /* Exit loop to emulate instruction. */ 1180 break; 1181 else 1182 /* Resume guest execution from the next instruction. */ 1183 hypctx->tf.tf_elr += vme->inst_length; 1184 } 1185 1186 return (0); 1187 } 1188 1189 static void 1190 arm_pcpu_vmcleanup(void *arg) 1191 { 1192 struct hyp *hyp; 1193 int i, maxcpus; 1194 1195 hyp = arg; 1196 maxcpus = vm_get_maxcpus(hyp->vm); 1197 for (i = 0; i < maxcpus; i++) { 1198 if (arm64_get_active_vcpu() == hyp->ctx[i]) { 1199 arm64_set_active_vcpu(NULL); 1200 break; 1201 } 1202 } 1203 } 1204 1205 void 1206 vmmops_vcpu_cleanup(void *vcpui) 1207 { 1208 struct hypctx *hypctx = vcpui; 1209 1210 vtimer_cpucleanup(hypctx); 1211 vgic_cpucleanup(hypctx); 1212 1213 if (!in_vhe()) 1214 vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true); 1215 1216 free(hypctx, M_HYP); 1217 } 1218 1219 void 1220 vmmops_cleanup(void *vmi) 1221 { 1222 struct hyp *hyp = vmi; 1223 1224 vtimer_vmcleanup(hyp); 1225 vgic_vmcleanup(hyp); 1226 1227 smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp); 1228 1229 if (!in_vhe()) 1230 vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true); 1231 1232 free(hyp, M_HYP); 1233 } 1234 1235 /* 1236 * Return register value. Registers have different sizes and an explicit cast 1237 * must be made to ensure proper conversion. 1238 */ 1239 static uint64_t * 1240 hypctx_regptr(struct hypctx *hypctx, int reg) 1241 { 1242 switch (reg) { 1243 case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29: 1244 return (&hypctx->tf.tf_x[reg]); 1245 case VM_REG_GUEST_LR: 1246 return (&hypctx->tf.tf_lr); 1247 case VM_REG_GUEST_SP: 1248 return (&hypctx->tf.tf_sp); 1249 case VM_REG_GUEST_CPSR: 1250 return (&hypctx->tf.tf_spsr); 1251 case VM_REG_GUEST_PC: 1252 return (&hypctx->tf.tf_elr); 1253 case VM_REG_GUEST_SCTLR_EL1: 1254 return (&hypctx->sctlr_el1); 1255 case VM_REG_GUEST_TTBR0_EL1: 1256 return (&hypctx->ttbr0_el1); 1257 case VM_REG_GUEST_TTBR1_EL1: 1258 return (&hypctx->ttbr1_el1); 1259 case VM_REG_GUEST_TCR_EL1: 1260 return (&hypctx->tcr_el1); 1261 case VM_REG_GUEST_TCR2_EL1: 1262 return (&hypctx->tcr2_el1); 1263 case VM_REG_GUEST_MPIDR_EL1: 1264 return (&hypctx->vmpidr_el2); 1265 default: 1266 break; 1267 } 1268 return (NULL); 1269 } 1270 1271 int 1272 vmmops_getreg(void *vcpui, int reg, uint64_t *retval) 1273 { 1274 uint64_t *regp; 1275 int running, hostcpu; 1276 struct hypctx *hypctx = vcpui; 1277 1278 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1279 if (running && hostcpu != curcpu) 1280 panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm), 1281 vcpu_vcpuid(hypctx->vcpu)); 1282 1283 regp = hypctx_regptr(hypctx, reg); 1284 if (regp == NULL) 1285 return (EINVAL); 1286 1287 *retval = *regp; 1288 return (0); 1289 } 1290 1291 int 1292 vmmops_setreg(void *vcpui, int reg, uint64_t val) 1293 { 1294 uint64_t *regp; 1295 struct hypctx *hypctx = vcpui; 1296 int running, hostcpu; 1297 1298 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1299 if (running && hostcpu != curcpu) 1300 panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm), 1301 vcpu_vcpuid(hypctx->vcpu)); 1302 1303 regp = hypctx_regptr(hypctx, reg); 1304 if (regp == NULL) 1305 return (EINVAL); 1306 1307 *regp = val; 1308 return (0); 1309 } 1310 1311 int 1312 vmmops_exception(void *vcpui, uint64_t esr, uint64_t far) 1313 { 1314 struct hypctx *hypctx = vcpui; 1315 int running, hostcpu; 1316 1317 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1318 if (running && hostcpu != curcpu) 1319 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), 1320 vcpu_vcpuid(hypctx->vcpu)); 1321 1322 hypctx->far_el1 = far; 1323 hypctx->esr_el1 = esr; 1324 hypctx->has_exception = true; 1325 1326 return (0); 1327 } 1328 1329 int 1330 vmmops_getcap(void *vcpui, int num, int *retval) 1331 { 1332 struct hypctx *hypctx = vcpui; 1333 int ret; 1334 1335 ret = ENOENT; 1336 1337 switch (num) { 1338 case VM_CAP_UNRESTRICTED_GUEST: 1339 *retval = 1; 1340 ret = 0; 1341 break; 1342 case VM_CAP_BRK_EXIT: 1343 case VM_CAP_SS_EXIT: 1344 case VM_CAP_MASK_HWINTR: 1345 *retval = (hypctx->setcaps & (1ul << num)) != 0; 1346 break; 1347 default: 1348 break; 1349 } 1350 1351 return (ret); 1352 } 1353 1354 int 1355 vmmops_setcap(void *vcpui, int num, int val) 1356 { 1357 struct hypctx *hypctx = vcpui; 1358 int ret; 1359 1360 ret = 0; 1361 1362 switch (num) { 1363 case VM_CAP_BRK_EXIT: 1364 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) 1365 break; 1366 if (val != 0) 1367 hypctx->mdcr_el2 |= MDCR_EL2_TDE; 1368 else 1369 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; 1370 break; 1371 case VM_CAP_SS_EXIT: 1372 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) 1373 break; 1374 1375 if (val != 0) { 1376 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS); 1377 hypctx->debug_mdscr |= hypctx->mdscr_el1 & 1378 (MDSCR_SS | MDSCR_KDE); 1379 1380 hypctx->tf.tf_spsr |= PSR_SS; 1381 hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE; 1382 hypctx->mdcr_el2 |= MDCR_EL2_TDE; 1383 } else { 1384 hypctx->tf.tf_spsr &= ~PSR_SS; 1385 hypctx->tf.tf_spsr |= hypctx->debug_spsr; 1386 hypctx->debug_spsr &= ~PSR_SS; 1387 hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE); 1388 hypctx->mdscr_el1 |= hypctx->debug_mdscr; 1389 hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE); 1390 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; 1391 } 1392 break; 1393 case VM_CAP_MASK_HWINTR: 1394 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) 1395 break; 1396 1397 if (val != 0) { 1398 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & 1399 (PSR_I | PSR_F)); 1400 hypctx->tf.tf_spsr |= PSR_I | PSR_F; 1401 } else { 1402 hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F); 1403 hypctx->tf.tf_spsr |= (hypctx->debug_spsr & 1404 (PSR_I | PSR_F)); 1405 hypctx->debug_spsr &= ~(PSR_I | PSR_F); 1406 } 1407 break; 1408 default: 1409 ret = ENOENT; 1410 break; 1411 } 1412 1413 if (ret == 0) { 1414 if (val == 0) 1415 hypctx->setcaps &= ~(1ul << num); 1416 else 1417 hypctx->setcaps |= (1ul << num); 1418 } 1419 1420 return (ret); 1421 } 1422