1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/smp.h> 33 #include <sys/kernel.h> 34 #include <sys/malloc.h> 35 #include <sys/mman.h> 36 #include <sys/pcpu.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/vmem.h> 42 43 #include <vm/vm.h> 44 #include <vm/pmap.h> 45 #include <vm/vm_extern.h> 46 #include <vm/vm_map.h> 47 #include <vm/vm_page.h> 48 #include <vm/vm_param.h> 49 50 #include <machine/vm.h> 51 #include <machine/cpufunc.h> 52 #include <machine/cpu.h> 53 #include <machine/machdep.h> 54 #include <machine/vmm.h> 55 #include <machine/vmm_dev.h> 56 #include <machine/atomic.h> 57 #include <machine/hypervisor.h> 58 #include <machine/pmap.h> 59 60 #include <dev/vmm/vmm_mem.h> 61 62 #include "mmu.h" 63 #include "arm64.h" 64 #include "hyp.h" 65 #include "reset.h" 66 #include "io/vgic.h" 67 #include "io/vgic_v3.h" 68 #include "io/vtimer.h" 69 #include "vmm_handlers.h" 70 #include "vmm_stat.h" 71 72 #define HANDLED 1 73 #define UNHANDLED 0 74 75 /* Number of bits in an EL2 virtual address */ 76 #define EL2_VIRT_BITS 48 77 CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS); 78 79 /* TODO: Move the host hypctx off the stack */ 80 #define VMM_STACK_PAGES 4 81 #define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE) 82 83 static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits; 84 85 /* Register values passed to arm_setup_vectors to set in the hypervisor */ 86 struct vmm_init_regs { 87 uint64_t tcr_el2; 88 uint64_t vtcr_el2; 89 }; 90 91 MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP"); 92 93 extern char hyp_init_vectors[]; 94 extern char hyp_vectors[]; 95 extern char hyp_stub_vectors[]; 96 97 static vm_paddr_t hyp_code_base; 98 static size_t hyp_code_len; 99 100 static char *stack[MAXCPU]; 101 static vm_offset_t stack_hyp_va[MAXCPU]; 102 103 static vmem_t *el2_mem_alloc; 104 105 static void arm_setup_vectors(void *arg); 106 107 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); 108 109 static inline void 110 arm64_set_active_vcpu(struct hypctx *hypctx) 111 { 112 DPCPU_SET(vcpu, hypctx); 113 } 114 115 struct hypctx * 116 arm64_get_active_vcpu(void) 117 { 118 return (DPCPU_GET(vcpu)); 119 } 120 121 static void 122 arm_setup_vectors(void *arg) 123 { 124 struct vmm_init_regs *el2_regs; 125 uintptr_t stack_top; 126 uint32_t sctlr_el2; 127 register_t daif; 128 129 el2_regs = arg; 130 arm64_set_active_vcpu(NULL); 131 132 /* 133 * Configure the system control register for EL2: 134 * 135 * SCTLR_EL2_M: MMU on 136 * SCTLR_EL2_C: Data cacheability not affected 137 * SCTLR_EL2_I: Instruction cacheability not affected 138 * SCTLR_EL2_A: Instruction alignment check 139 * SCTLR_EL2_SA: Stack pointer alignment check 140 * SCTLR_EL2_WXN: Treat writable memory as execute never 141 * ~SCTLR_EL2_EE: Data accesses are little-endian 142 */ 143 sctlr_el2 = SCTLR_EL2_RES1; 144 sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I; 145 sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA; 146 sctlr_el2 |= SCTLR_EL2_WXN; 147 sctlr_el2 &= ~SCTLR_EL2_EE; 148 149 daif = intr_disable(); 150 151 if (in_vhe()) { 152 WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2); 153 } else { 154 /* 155 * Install the temporary vectors which will be responsible for 156 * initializing the VMM when we next trap into EL2. 157 * 158 * x0: the exception vector table responsible for hypervisor 159 * initialization on the next call. 160 */ 161 vmm_call_hyp(vtophys(&vmm_hyp_code)); 162 163 /* Create and map the hypervisor stack */ 164 stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE; 165 166 /* Special call to initialize EL2 */ 167 vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2, 168 sctlr_el2, el2_regs->vtcr_el2); 169 } 170 171 intr_restore(daif); 172 } 173 174 static void 175 arm_teardown_vectors(void *arg) 176 { 177 register_t daif; 178 179 /* 180 * vmm_cleanup() will disable the MMU. For the next few instructions, 181 * before the hardware disables the MMU, one of the following is 182 * possible: 183 * 184 * a. The instruction addresses are fetched with the MMU disabled, 185 * and they must represent the actual physical addresses. This will work 186 * because we call the vmm_cleanup() function by its physical address. 187 * 188 * b. The instruction addresses are fetched using the old translation 189 * tables. This will work because we have an identity mapping in place 190 * in the translation tables and vmm_cleanup() is called by its physical 191 * address. 192 */ 193 daif = intr_disable(); 194 /* TODO: Invalidate the cache */ 195 vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors)); 196 intr_restore(daif); 197 198 arm64_set_active_vcpu(NULL); 199 } 200 201 static uint64_t 202 vmm_vtcr_el2_sl(u_int levels) 203 { 204 #if PAGE_SIZE == PAGE_SIZE_4K 205 switch (levels) { 206 case 2: 207 return (VTCR_EL2_SL0_4K_LVL2); 208 case 3: 209 return (VTCR_EL2_SL0_4K_LVL1); 210 case 4: 211 return (VTCR_EL2_SL0_4K_LVL0); 212 default: 213 panic("%s: Invalid number of page table levels %u", __func__, 214 levels); 215 } 216 #elif PAGE_SIZE == PAGE_SIZE_16K 217 switch (levels) { 218 case 2: 219 return (VTCR_EL2_SL0_16K_LVL2); 220 case 3: 221 return (VTCR_EL2_SL0_16K_LVL1); 222 case 4: 223 return (VTCR_EL2_SL0_16K_LVL0); 224 default: 225 panic("%s: Invalid number of page table levels %u", __func__, 226 levels); 227 } 228 #else 229 #error Unsupported page size 230 #endif 231 } 232 233 int 234 vmmops_modinit(int ipinum) 235 { 236 struct vmm_init_regs el2_regs; 237 vm_offset_t next_hyp_va; 238 vm_paddr_t vmm_base; 239 uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field; 240 int cpu, i; 241 bool rv __diagused; 242 243 if (!has_hyp()) { 244 printf( 245 "vmm: Processor doesn't have support for virtualization\n"); 246 return (ENXIO); 247 } 248 249 if (!vgic_present()) { 250 printf("vmm: No vgic found\n"); 251 return (ENODEV); 252 } 253 254 if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) { 255 printf("vmm: Unable to read ID_AA64MMFR0_EL1\n"); 256 return (ENXIO); 257 } 258 pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1); 259 /* 260 * Use 3 levels to give us up to 39 bits with 4k pages, or 261 * 47 bits with 16k pages. 262 */ 263 /* TODO: Check the number of levels for 64k pages */ 264 vmm_pmap_levels = 3; 265 switch (pa_range_field) { 266 case ID_AA64MMFR0_PARange_4G: 267 printf("vmm: Not enough physical address bits\n"); 268 return (ENXIO); 269 case ID_AA64MMFR0_PARange_64G: 270 vmm_virt_bits = 36; 271 #if PAGE_SIZE == PAGE_SIZE_16K 272 vmm_pmap_levels = 2; 273 #endif 274 break; 275 default: 276 vmm_virt_bits = 39; 277 break; 278 } 279 pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT; 280 281 if (!in_vhe()) { 282 /* Initialise the EL2 MMU */ 283 if (!vmmpmap_init()) { 284 printf("vmm: Failed to init the EL2 MMU\n"); 285 return (ENOMEM); 286 } 287 } 288 289 /* Set up the stage 2 pmap callbacks */ 290 MPASS(pmap_clean_stage2_tlbi == NULL); 291 pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi; 292 pmap_stage2_invalidate_range = vmm_s2_tlbi_range; 293 pmap_stage2_invalidate_all = vmm_s2_tlbi_all; 294 295 if (!in_vhe()) { 296 /* 297 * Create an allocator for the virtual address space used by 298 * EL2. EL2 code is identity-mapped; the allocator is used to 299 * find space for VM structures. 300 */ 301 el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, 302 M_WAITOK); 303 304 /* Create the mappings for the hypervisor translation table. */ 305 hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code); 306 307 /* We need an physical identity mapping for when we activate the MMU */ 308 hyp_code_base = vmm_base = vtophys(&vmm_hyp_code); 309 rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base, 310 VM_PROT_READ | VM_PROT_EXECUTE); 311 MPASS(rv); 312 313 next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE); 314 315 /* Create a per-CPU hypervisor stack */ 316 CPU_FOREACH(cpu) { 317 stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO); 318 stack_hyp_va[cpu] = next_hyp_va; 319 320 for (i = 0; i < VMM_STACK_PAGES; i++) { 321 rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i), 322 PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)), 323 VM_PROT_READ | VM_PROT_WRITE); 324 MPASS(rv); 325 } 326 next_hyp_va += L2_SIZE; 327 } 328 329 el2_regs.tcr_el2 = TCR_EL2_RES1; 330 el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT, 331 TCR_EL2_PS_52BITS); 332 el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS); 333 el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA; 334 #if PAGE_SIZE == PAGE_SIZE_4K 335 el2_regs.tcr_el2 |= TCR_EL2_TG0_4K; 336 #elif PAGE_SIZE == PAGE_SIZE_16K 337 el2_regs.tcr_el2 |= TCR_EL2_TG0_16K; 338 #else 339 #error Unsupported page size 340 #endif 341 #ifdef SMP 342 el2_regs.tcr_el2 |= TCR_EL2_SH0_IS; 343 #endif 344 } 345 346 switch (pa_range_bits << TCR_EL2_PS_SHIFT) { 347 case TCR_EL2_PS_32BITS: 348 vmm_max_ipa_bits = 32; 349 break; 350 case TCR_EL2_PS_36BITS: 351 vmm_max_ipa_bits = 36; 352 break; 353 case TCR_EL2_PS_40BITS: 354 vmm_max_ipa_bits = 40; 355 break; 356 case TCR_EL2_PS_42BITS: 357 vmm_max_ipa_bits = 42; 358 break; 359 case TCR_EL2_PS_44BITS: 360 vmm_max_ipa_bits = 44; 361 break; 362 case TCR_EL2_PS_48BITS: 363 vmm_max_ipa_bits = 48; 364 break; 365 case TCR_EL2_PS_52BITS: 366 default: 367 vmm_max_ipa_bits = 52; 368 break; 369 } 370 371 /* 372 * Configure the Stage 2 translation control register: 373 * 374 * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable 375 * normal memory 376 * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable 377 * normal memory 378 * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel 379 * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables 380 * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner 381 * shareable 382 */ 383 el2_regs.vtcr_el2 = VTCR_EL2_RES1; 384 el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA; 385 el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits); 386 el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels); 387 #if PAGE_SIZE == PAGE_SIZE_4K 388 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K; 389 #elif PAGE_SIZE == PAGE_SIZE_16K 390 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K; 391 #else 392 #error Unsupported page size 393 #endif 394 #ifdef SMP 395 el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS; 396 #endif 397 /* 398 * If FEAT_LPA2 is enabled in the host then we need to enable it here 399 * so the page tables created by pmap.c are correct. The meaning of 400 * the shareability field changes to become address bits when this 401 * is set. 402 */ 403 if ((READ_SPECIALREG(tcr_el1) & TCR_DS) != 0) { 404 el2_regs.vtcr_el2 |= VTCR_EL2_DS; 405 el2_regs.vtcr_el2 |= 406 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_52BIT); 407 } else { 408 el2_regs.vtcr_el2 |= 409 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT); 410 } 411 412 smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs); 413 414 if (!in_vhe()) { 415 /* Add memory to the vmem allocator (checking there is space) */ 416 if (vmm_base > (L2_SIZE + PAGE_SIZE)) { 417 /* 418 * Ensure there is an L2 block before the vmm code to check 419 * for buffer overflows on earlier data. Include the PAGE_SIZE 420 * of the minimum we can allocate. 421 */ 422 vmm_base -= L2_SIZE + PAGE_SIZE; 423 vmm_base = rounddown2(vmm_base, L2_SIZE); 424 425 /* 426 * Check there is memory before the vmm code to add. 427 * 428 * Reserve the L2 block at address 0 so NULL dereference will 429 * raise an exception. 430 */ 431 if (vmm_base > L2_SIZE) 432 vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE, 433 M_WAITOK); 434 } 435 436 /* 437 * Add the memory after the stacks. There is most of an L2 block 438 * between the last stack and the first allocation so this should 439 * be safe without adding more padding. 440 */ 441 if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE) 442 vmem_add(el2_mem_alloc, next_hyp_va, 443 HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK); 444 } 445 446 vgic_init(); 447 vtimer_init(); 448 449 return (0); 450 } 451 452 int 453 vmmops_modcleanup(void) 454 { 455 int cpu; 456 457 if (!in_vhe()) { 458 smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL); 459 460 CPU_FOREACH(cpu) { 461 vmmpmap_remove(stack_hyp_va[cpu], 462 VMM_STACK_PAGES * PAGE_SIZE, false); 463 } 464 465 vmmpmap_remove(hyp_code_base, hyp_code_len, false); 466 } 467 468 vtimer_cleanup(); 469 470 if (!in_vhe()) { 471 vmmpmap_fini(); 472 473 CPU_FOREACH(cpu) 474 free(stack[cpu], M_HYP); 475 } 476 477 pmap_clean_stage2_tlbi = NULL; 478 pmap_stage2_invalidate_range = NULL; 479 pmap_stage2_invalidate_all = NULL; 480 481 return (0); 482 } 483 484 static vm_size_t 485 el2_hyp_size(struct vm *vm) 486 { 487 return (round_page(sizeof(struct hyp) + 488 sizeof(struct hypctx *) * vm_get_maxcpus(vm))); 489 } 490 491 static vm_size_t 492 el2_hypctx_size(void) 493 { 494 return (round_page(sizeof(struct hypctx))); 495 } 496 497 static vm_offset_t 498 el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot) 499 { 500 vmem_addr_t addr; 501 int err __diagused; 502 bool rv __diagused; 503 504 err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr); 505 MPASS(err == 0); 506 rv = vmmpmap_enter(addr, size, vtophys(data), prot); 507 MPASS(rv); 508 509 return (addr); 510 } 511 512 void * 513 vmmops_init(struct vm *vm, pmap_t pmap) 514 { 515 struct hyp *hyp; 516 vm_size_t size; 517 uint64_t idreg; 518 519 size = el2_hyp_size(vm); 520 hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); 521 522 hyp->vm = vm; 523 hyp->vgic_attached = false; 524 525 if (get_kernel_reg(ID_AA64MMFR0_EL1, &idreg)) { 526 if (ID_AA64MMFR0_ECV_VAL(idreg) >= ID_AA64MMFR0_ECV_POFF) 527 hyp->feats |= HYP_FEAT_ECV_POFF; 528 } 529 530 if (get_kernel_reg(ID_AA64MMFR1_EL1, &idreg)) { 531 if (ID_AA64MMFR1_HCX_VAL(idreg) >= ID_AA64MMFR1_HCX_IMPL) 532 hyp->feats |= HYP_FEAT_HCX; 533 } 534 535 vtimer_vminit(hyp); 536 vgic_vminit(hyp); 537 538 if (!in_vhe()) 539 hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size, 540 VM_PROT_READ | VM_PROT_WRITE); 541 542 return (hyp); 543 } 544 545 void * 546 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) 547 { 548 struct hyp *hyp = vmi; 549 struct hypctx *hypctx; 550 vm_size_t size; 551 552 size = el2_hypctx_size(); 553 hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); 554 555 KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), 556 ("%s: Invalid vcpuid %d", __func__, vcpuid)); 557 hyp->ctx[vcpuid] = hypctx; 558 559 hypctx->hyp = hyp; 560 hypctx->vcpu = vcpu1; 561 562 reset_vm_el01_regs(hypctx); 563 reset_vm_el2_regs(hypctx); 564 565 vtimer_cpuinit(hypctx); 566 vgic_cpuinit(hypctx); 567 568 if (!in_vhe()) 569 hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size, 570 VM_PROT_READ | VM_PROT_WRITE); 571 572 return (hypctx); 573 } 574 575 static int 576 arm_vmm_pinit(pmap_t pmap) 577 { 578 579 pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels); 580 return (1); 581 } 582 583 struct vmspace * 584 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) 585 { 586 return (vmspace_alloc(min, max, arm_vmm_pinit)); 587 } 588 589 void 590 vmmops_vmspace_free(struct vmspace *vmspace) 591 { 592 593 pmap_remove_pages(vmspace_pmap(vmspace)); 594 vmspace_free(vmspace); 595 } 596 597 static inline void 598 arm64_print_hyp_regs(struct vm_exit *vme) 599 { 600 printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2); 601 printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2); 602 printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2); 603 printf("elr_el2: 0x%016lx\n", vme->pc); 604 } 605 606 static void 607 arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss, 608 struct vm_exit *vme_ret) 609 { 610 struct vm_guest_paging *paging; 611 struct vie *vie; 612 uint32_t esr_sas, reg_num; 613 614 /* 615 * Get the page address from HPFAR_EL2. 616 */ 617 vme_ret->u.inst_emul.gpa = 618 HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); 619 /* Bits [11:0] are the same as bits [11:0] from the virtual address. */ 620 vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 & 621 FAR_EL2_HPFAR_PAGE_MASK; 622 623 esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT; 624 reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT; 625 626 vie = &vme_ret->u.inst_emul.vie; 627 vie->access_size = 1 << esr_sas; 628 vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0; 629 vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ; 630 vie->reg = reg_num; 631 632 paging = &vme_ret->u.inst_emul.paging; 633 paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); 634 paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); 635 paging->tcr_el1 = hypctx->tcr_el1; 636 paging->tcr2_el1 = hypctx->tcr2_el1; 637 paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); 638 if ((hypctx->sctlr_el1 & SCTLR_M) != 0) 639 paging->flags |= VM_GP_MMU_ENABLED; 640 } 641 642 static void 643 arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret) 644 { 645 uint32_t reg_num; 646 struct vre *vre; 647 648 /* u.hyp member will be replaced by u.reg_emul */ 649 vre = &vme_ret->u.reg_emul.vre; 650 651 vre->inst_syndrome = esr_iss; 652 /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */ 653 vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE; 654 reg_num = ISS_MSR_Rt(esr_iss); 655 vre->reg = reg_num; 656 } 657 658 void 659 raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc) 660 { 661 uint64_t esr; 662 663 if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t) 664 esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT; 665 else 666 esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT; 667 /* Set the bit that changes from insn -> data abort */ 668 if (dabort) 669 esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT; 670 /* Set the IL bit if set by hardware */ 671 esr |= hypctx->tf.tf_esr & ESR_ELx_IL; 672 673 vmmops_exception(hypctx, esr | fsc, far); 674 } 675 676 static int 677 handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret, 678 pmap_t pmap) 679 { 680 uint64_t gpa; 681 uint32_t esr_ec, esr_iss; 682 683 esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr); 684 esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK; 685 686 switch (esr_ec) { 687 case EXCP_UNKNOWN: 688 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1); 689 arm64_print_hyp_regs(vme_ret); 690 vme_ret->exitcode = VM_EXITCODE_HYP; 691 break; 692 case EXCP_TRAP_WFI_WFE: 693 if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */ 694 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1); 695 vme_ret->exitcode = VM_EXITCODE_WFI; 696 } else { 697 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1); 698 vme_ret->exitcode = VM_EXITCODE_HYP; 699 } 700 break; 701 case EXCP_HVC: 702 vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1); 703 vme_ret->exitcode = VM_EXITCODE_HVC; 704 break; 705 case EXCP_MSR: 706 vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1); 707 arm64_gen_reg_emul_data(esr_iss, vme_ret); 708 vme_ret->exitcode = VM_EXITCODE_REG_EMUL; 709 break; 710 case EXCP_BRK: 711 vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1); 712 vme_ret->exitcode = VM_EXITCODE_BRK; 713 break; 714 case EXCP_SOFTSTP_EL0: 715 vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1); 716 vme_ret->exitcode = VM_EXITCODE_SS; 717 break; 718 case EXCP_INSN_ABORT_L: 719 case EXCP_DATA_ABORT_L: 720 vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ? 721 VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1); 722 switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) { 723 case ISS_DATA_DFSC_TF_L0: 724 case ISS_DATA_DFSC_TF_L1: 725 case ISS_DATA_DFSC_TF_L2: 726 case ISS_DATA_DFSC_TF_L3: 727 case ISS_DATA_DFSC_AFF_L1: 728 case ISS_DATA_DFSC_AFF_L2: 729 case ISS_DATA_DFSC_AFF_L3: 730 case ISS_DATA_DFSC_PF_L1: 731 case ISS_DATA_DFSC_PF_L2: 732 case ISS_DATA_DFSC_PF_L3: 733 gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); 734 /* Check the IPA is valid */ 735 if (gpa >= (1ul << vmm_max_ipa_bits)) { 736 raise_data_insn_abort(hypctx, 737 hypctx->exit_info.far_el2, 738 esr_ec == EXCP_DATA_ABORT_L, 739 ISS_DATA_DFSC_ASF_L0); 740 vme_ret->inst_length = 0; 741 return (HANDLED); 742 } 743 744 if (vm_mem_allocated(hypctx->vcpu, gpa)) { 745 vme_ret->exitcode = VM_EXITCODE_PAGING; 746 vme_ret->inst_length = 0; 747 vme_ret->u.paging.esr = hypctx->tf.tf_esr; 748 vme_ret->u.paging.gpa = gpa; 749 } else if (esr_ec == EXCP_INSN_ABORT_L) { 750 /* 751 * Raise an external abort. Device memory is 752 * not executable 753 */ 754 raise_data_insn_abort(hypctx, 755 hypctx->exit_info.far_el2, false, 756 ISS_DATA_DFSC_EXT); 757 vme_ret->inst_length = 0; 758 return (HANDLED); 759 } else { 760 arm64_gen_inst_emul_data(hypctx, esr_iss, 761 vme_ret); 762 vme_ret->exitcode = VM_EXITCODE_INST_EMUL; 763 } 764 break; 765 default: 766 arm64_print_hyp_regs(vme_ret); 767 vme_ret->exitcode = VM_EXITCODE_HYP; 768 break; 769 } 770 771 break; 772 773 default: 774 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1); 775 arm64_print_hyp_regs(vme_ret); 776 vme_ret->exitcode = VM_EXITCODE_HYP; 777 break; 778 } 779 780 /* We don't don't do any instruction emulation here */ 781 return (UNHANDLED); 782 } 783 784 static int 785 arm64_handle_world_switch(struct hypctx *hypctx, int excp_type, 786 struct vm_exit *vme, pmap_t pmap) 787 { 788 int handled; 789 790 switch (excp_type) { 791 case EXCP_TYPE_EL1_SYNC: 792 /* The exit code will be set by handle_el1_sync_excp(). */ 793 handled = handle_el1_sync_excp(hypctx, vme, pmap); 794 break; 795 796 case EXCP_TYPE_EL1_IRQ: 797 case EXCP_TYPE_EL1_FIQ: 798 /* The host kernel will handle IRQs and FIQs. */ 799 vmm_stat_incr(hypctx->vcpu, 800 excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1); 801 vme->exitcode = VM_EXITCODE_BOGUS; 802 handled = UNHANDLED; 803 break; 804 805 case EXCP_TYPE_EL1_ERROR: 806 case EXCP_TYPE_EL2_SYNC: 807 case EXCP_TYPE_EL2_IRQ: 808 case EXCP_TYPE_EL2_FIQ: 809 case EXCP_TYPE_EL2_ERROR: 810 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1); 811 vme->exitcode = VM_EXITCODE_BOGUS; 812 handled = UNHANDLED; 813 break; 814 815 default: 816 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); 817 vme->exitcode = VM_EXITCODE_BOGUS; 818 handled = UNHANDLED; 819 break; 820 } 821 822 return (handled); 823 } 824 825 static void 826 ptp_release(void **cookie) 827 { 828 if (*cookie != NULL) { 829 vm_gpa_release(*cookie); 830 *cookie = NULL; 831 } 832 } 833 834 static void * 835 ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) 836 { 837 void *ptr; 838 839 ptp_release(cookie); 840 ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie); 841 return (ptr); 842 } 843 844 /* log2 of the number of bytes in a page table entry */ 845 #define PTE_SHIFT 3 846 int 847 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, 848 int prot, uint64_t *gpa, int *is_fault) 849 { 850 struct hypctx *hypctx; 851 void *cookie; 852 uint64_t mask, *ptep, pte, pte_addr; 853 int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz; 854 bool is_el0; 855 856 /* Check if the MMU is off */ 857 if ((paging->flags & VM_GP_MMU_ENABLED) == 0) { 858 *is_fault = 0; 859 *gpa = gla; 860 return (0); 861 } 862 863 is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t; 864 865 if (ADDR_IS_KERNEL(gla)) { 866 /* If address translation is disabled raise an exception */ 867 if ((paging->tcr_el1 & TCR_EPD1) != 0) { 868 *is_fault = 1; 869 return (0); 870 } 871 if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) { 872 *is_fault = 1; 873 return (0); 874 } 875 pte_addr = paging->ttbr1_addr; 876 tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT; 877 /* Clear the top byte if TBI is on */ 878 if ((paging->tcr_el1 & TCR_TBI1) != 0) 879 gla |= (0xfful << 56); 880 switch (paging->tcr_el1 & TCR_TG1_MASK) { 881 case TCR_TG1_4K: 882 granule_shift = PAGE_SHIFT_4K; 883 break; 884 case TCR_TG1_16K: 885 granule_shift = PAGE_SHIFT_16K; 886 break; 887 case TCR_TG1_64K: 888 granule_shift = PAGE_SHIFT_64K; 889 break; 890 default: 891 *is_fault = 1; 892 return (EINVAL); 893 } 894 } else { 895 /* If address translation is disabled raise an exception */ 896 if ((paging->tcr_el1 & TCR_EPD0) != 0) { 897 *is_fault = 1; 898 return (0); 899 } 900 if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) { 901 *is_fault = 1; 902 return (0); 903 } 904 pte_addr = paging->ttbr0_addr; 905 tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT; 906 /* Clear the top byte if TBI is on */ 907 if ((paging->tcr_el1 & TCR_TBI0) != 0) 908 gla &= ~(0xfful << 56); 909 switch (paging->tcr_el1 & TCR_TG0_MASK) { 910 case TCR_TG0_4K: 911 granule_shift = PAGE_SHIFT_4K; 912 break; 913 case TCR_TG0_16K: 914 granule_shift = PAGE_SHIFT_16K; 915 break; 916 case TCR_TG0_64K: 917 granule_shift = PAGE_SHIFT_64K; 918 break; 919 default: 920 *is_fault = 1; 921 return (EINVAL); 922 } 923 } 924 925 /* 926 * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2 927 * for larger values. 928 */ 929 switch (granule_shift) { 930 case PAGE_SHIFT_4K: 931 case PAGE_SHIFT_16K: 932 /* 933 * See "Table D8-11 4KB granule, determining stage 1 initial 934 * lookup level" and "Table D8-21 16KB granule, determining 935 * stage 1 initial lookup level" from the "Arm Architecture 936 * Reference Manual for A-Profile architecture" revision I.a 937 * for the minimum and maximum values. 938 * 939 * TODO: Support less than 16 when FEAT_LPA2 is implemented 940 * and TCR_EL1.DS == 1 941 * TODO: Support more than 39 when FEAT_TTST is implemented 942 */ 943 if (tsz < 16 || tsz > 39) { 944 *is_fault = 1; 945 return (EINVAL); 946 } 947 break; 948 case PAGE_SHIFT_64K: 949 /* TODO: Support 64k granule. It will probably work, but is untested */ 950 default: 951 *is_fault = 1; 952 return (EINVAL); 953 } 954 955 /* 956 * Calculate the input address bits. These are 64 bit in an address 957 * with the top tsz bits being all 0 or all 1. 958 */ 959 ia_bits = 64 - tsz; 960 961 /* 962 * Calculate the number of address bits used in the page table 963 * calculation. This is ia_bits minus the bottom granule_shift 964 * bits that are passed to the output address. 965 */ 966 address_bits = ia_bits - granule_shift; 967 968 /* 969 * Calculate the number of levels. Each level uses 970 * granule_shift - PTE_SHIFT bits of the input address. 971 * This is because the table is 1 << granule_shift and each 972 * entry is 1 << PTE_SHIFT bytes. 973 */ 974 levels = howmany(address_bits, granule_shift - PTE_SHIFT); 975 976 /* Mask of the upper unused bits in the virtual address */ 977 gla &= (1ul << ia_bits) - 1; 978 hypctx = (struct hypctx *)vcpui; 979 cookie = NULL; 980 /* TODO: Check if the level supports block descriptors */ 981 for (;levels > 0; levels--) { 982 int idx; 983 984 pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) + 985 granule_shift; 986 idx = (gla >> pte_shift) & 987 ((1ul << (granule_shift - PTE_SHIFT)) - 1); 988 while (idx > PAGE_SIZE / sizeof(pte)) { 989 idx -= PAGE_SIZE / sizeof(pte); 990 pte_addr += PAGE_SIZE; 991 } 992 993 ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie); 994 if (ptep == NULL) 995 goto error; 996 pte = ptep[idx]; 997 998 /* Calculate the level we are looking at */ 999 switch (levels) { 1000 default: 1001 goto fault; 1002 /* TODO: Level -1 when FEAT_LPA2 is implemented */ 1003 case 4: /* Level 0 */ 1004 if ((pte & ATTR_DESCR_MASK) != L0_TABLE) 1005 goto fault; 1006 /* FALLTHROUGH */ 1007 case 3: /* Level 1 */ 1008 case 2: /* Level 2 */ 1009 switch (pte & ATTR_DESCR_MASK) { 1010 /* Use L1 macro as all levels are the same */ 1011 case L1_TABLE: 1012 /* Check if EL0 can access this address space */ 1013 if (is_el0 && 1014 (pte & TATTR_AP_TABLE_NO_EL0) != 0) 1015 goto fault; 1016 /* Check if the address space is writable */ 1017 if ((prot & PROT_WRITE) != 0 && 1018 (pte & TATTR_AP_TABLE_RO) != 0) 1019 goto fault; 1020 if ((prot & PROT_EXEC) != 0) { 1021 /* Check the table exec attribute */ 1022 if ((is_el0 && 1023 (pte & TATTR_UXN_TABLE) != 0) || 1024 (!is_el0 && 1025 (pte & TATTR_PXN_TABLE) != 0)) 1026 goto fault; 1027 } 1028 pte_addr = pte & ~ATTR_MASK; 1029 break; 1030 case L1_BLOCK: 1031 goto done; 1032 default: 1033 goto fault; 1034 } 1035 break; 1036 case 1: /* Level 3 */ 1037 if ((pte & ATTR_DESCR_MASK) == L3_PAGE) 1038 goto done; 1039 goto fault; 1040 } 1041 } 1042 1043 done: 1044 /* Check if EL0 has access to the block/page */ 1045 if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0) 1046 goto fault; 1047 if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0) 1048 goto fault; 1049 if ((prot & PROT_EXEC) != 0) { 1050 if ((is_el0 && (pte & ATTR_S1_UXN) != 0) || 1051 (!is_el0 && (pte & ATTR_S1_PXN) != 0)) 1052 goto fault; 1053 } 1054 mask = (1ul << pte_shift) - 1; 1055 *gpa = (pte & ~ATTR_MASK) | (gla & mask); 1056 *is_fault = 0; 1057 ptp_release(&cookie); 1058 return (0); 1059 1060 error: 1061 ptp_release(&cookie); 1062 return (EFAULT); 1063 fault: 1064 *is_fault = 1; 1065 ptp_release(&cookie); 1066 return (0); 1067 } 1068 1069 int 1070 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) 1071 { 1072 uint64_t excp_type; 1073 int handled; 1074 register_t daif; 1075 struct hyp *hyp; 1076 struct hypctx *hypctx; 1077 struct vcpu *vcpu; 1078 struct vm_exit *vme; 1079 int mode; 1080 1081 hypctx = (struct hypctx *)vcpui; 1082 hyp = hypctx->hyp; 1083 vcpu = hypctx->vcpu; 1084 vme = vm_exitinfo(vcpu); 1085 1086 hypctx->tf.tf_elr = (uint64_t)pc; 1087 1088 for (;;) { 1089 if (hypctx->has_exception) { 1090 hypctx->has_exception = false; 1091 hypctx->elr_el1 = hypctx->tf.tf_elr; 1092 1093 mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); 1094 1095 if (mode == PSR_M_EL1t) { 1096 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0; 1097 } else if (mode == PSR_M_EL1h) { 1098 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200; 1099 } else if ((mode & PSR_M_32) == PSR_M_64) { 1100 /* 64-bit EL0 */ 1101 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400; 1102 } else { 1103 /* 32-bit EL0 */ 1104 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600; 1105 } 1106 1107 /* Set the new spsr */ 1108 hypctx->spsr_el1 = hypctx->tf.tf_spsr; 1109 1110 /* Set the new cpsr */ 1111 hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS; 1112 hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h; 1113 1114 /* 1115 * Update fields that may change on exeption entry 1116 * based on how sctlr_el1 is configured. 1117 */ 1118 if ((hypctx->sctlr_el1 & SCTLR_SPAN) == 0) 1119 hypctx->tf.tf_spsr |= PSR_PAN; 1120 if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0) 1121 hypctx->tf.tf_spsr &= ~PSR_SSBS; 1122 else 1123 hypctx->tf.tf_spsr |= PSR_SSBS; 1124 } 1125 1126 daif = intr_disable(); 1127 1128 /* Check if the vcpu is suspended */ 1129 if (vcpu_suspended(evinfo)) { 1130 intr_restore(daif); 1131 vm_exit_suspended(vcpu, pc); 1132 break; 1133 } 1134 1135 if (vcpu_debugged(vcpu)) { 1136 intr_restore(daif); 1137 vm_exit_debug(vcpu, pc); 1138 break; 1139 } 1140 1141 /* Activate the stage2 pmap so the vmid is valid */ 1142 pmap_activate_vm(pmap); 1143 hyp->vttbr_el2 = pmap_to_ttbr0(pmap); 1144 1145 /* 1146 * TODO: What happens if a timer interrupt is asserted exactly 1147 * here, but for the previous VM? 1148 */ 1149 arm64_set_active_vcpu(hypctx); 1150 vgic_flush_hwstate(hypctx); 1151 1152 /* Call into EL2 to switch to the guest */ 1153 excp_type = vmm_enter_guest(hyp, hypctx); 1154 1155 vgic_sync_hwstate(hypctx); 1156 vtimer_sync_hwstate(hypctx); 1157 1158 /* 1159 * Deactivate the stage2 pmap. 1160 */ 1161 PCPU_SET(curvmpmap, NULL); 1162 intr_restore(daif); 1163 1164 vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); 1165 if (excp_type == EXCP_TYPE_MAINT_IRQ) 1166 continue; 1167 1168 vme->pc = hypctx->tf.tf_elr; 1169 vme->inst_length = INSN_SIZE; 1170 vme->u.hyp.exception_nr = excp_type; 1171 vme->u.hyp.esr_el2 = hypctx->tf.tf_esr; 1172 vme->u.hyp.far_el2 = hypctx->exit_info.far_el2; 1173 vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2; 1174 1175 handled = arm64_handle_world_switch(hypctx, excp_type, vme, 1176 pmap); 1177 if (handled == UNHANDLED) 1178 /* Exit loop to emulate instruction. */ 1179 break; 1180 else 1181 /* Resume guest execution from the next instruction. */ 1182 hypctx->tf.tf_elr += vme->inst_length; 1183 } 1184 1185 return (0); 1186 } 1187 1188 static void 1189 arm_pcpu_vmcleanup(void *arg) 1190 { 1191 struct hyp *hyp; 1192 int i, maxcpus; 1193 1194 hyp = arg; 1195 maxcpus = vm_get_maxcpus(hyp->vm); 1196 for (i = 0; i < maxcpus; i++) { 1197 if (arm64_get_active_vcpu() == hyp->ctx[i]) { 1198 arm64_set_active_vcpu(NULL); 1199 break; 1200 } 1201 } 1202 } 1203 1204 void 1205 vmmops_vcpu_cleanup(void *vcpui) 1206 { 1207 struct hypctx *hypctx = vcpui; 1208 1209 vtimer_cpucleanup(hypctx); 1210 vgic_cpucleanup(hypctx); 1211 1212 if (!in_vhe()) 1213 vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true); 1214 1215 free(hypctx, M_HYP); 1216 } 1217 1218 void 1219 vmmops_cleanup(void *vmi) 1220 { 1221 struct hyp *hyp = vmi; 1222 1223 vtimer_vmcleanup(hyp); 1224 vgic_vmcleanup(hyp); 1225 1226 smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp); 1227 1228 if (!in_vhe()) 1229 vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true); 1230 1231 free(hyp, M_HYP); 1232 } 1233 1234 /* 1235 * Return register value. Registers have different sizes and an explicit cast 1236 * must be made to ensure proper conversion. 1237 */ 1238 static uint64_t * 1239 hypctx_regptr(struct hypctx *hypctx, int reg) 1240 { 1241 switch (reg) { 1242 case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29: 1243 return (&hypctx->tf.tf_x[reg]); 1244 case VM_REG_GUEST_LR: 1245 return (&hypctx->tf.tf_lr); 1246 case VM_REG_GUEST_SP: 1247 return (&hypctx->tf.tf_sp); 1248 case VM_REG_GUEST_CPSR: 1249 return (&hypctx->tf.tf_spsr); 1250 case VM_REG_GUEST_PC: 1251 return (&hypctx->tf.tf_elr); 1252 case VM_REG_GUEST_SCTLR_EL1: 1253 return (&hypctx->sctlr_el1); 1254 case VM_REG_GUEST_TTBR0_EL1: 1255 return (&hypctx->ttbr0_el1); 1256 case VM_REG_GUEST_TTBR1_EL1: 1257 return (&hypctx->ttbr1_el1); 1258 case VM_REG_GUEST_TCR_EL1: 1259 return (&hypctx->tcr_el1); 1260 case VM_REG_GUEST_TCR2_EL1: 1261 return (&hypctx->tcr2_el1); 1262 case VM_REG_GUEST_MPIDR_EL1: 1263 return (&hypctx->vmpidr_el2); 1264 default: 1265 break; 1266 } 1267 return (NULL); 1268 } 1269 1270 int 1271 vmmops_getreg(void *vcpui, int reg, uint64_t *retval) 1272 { 1273 uint64_t *regp; 1274 int running, hostcpu; 1275 struct hypctx *hypctx = vcpui; 1276 1277 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1278 if (running && hostcpu != curcpu) 1279 panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm), 1280 vcpu_vcpuid(hypctx->vcpu)); 1281 1282 regp = hypctx_regptr(hypctx, reg); 1283 if (regp == NULL) 1284 return (EINVAL); 1285 1286 *retval = *regp; 1287 return (0); 1288 } 1289 1290 int 1291 vmmops_setreg(void *vcpui, int reg, uint64_t val) 1292 { 1293 uint64_t *regp; 1294 struct hypctx *hypctx = vcpui; 1295 int running, hostcpu; 1296 1297 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1298 if (running && hostcpu != curcpu) 1299 panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm), 1300 vcpu_vcpuid(hypctx->vcpu)); 1301 1302 regp = hypctx_regptr(hypctx, reg); 1303 if (regp == NULL) 1304 return (EINVAL); 1305 1306 *regp = val; 1307 return (0); 1308 } 1309 1310 int 1311 vmmops_exception(void *vcpui, uint64_t esr, uint64_t far) 1312 { 1313 struct hypctx *hypctx = vcpui; 1314 int running, hostcpu; 1315 1316 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1317 if (running && hostcpu != curcpu) 1318 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), 1319 vcpu_vcpuid(hypctx->vcpu)); 1320 1321 hypctx->far_el1 = far; 1322 hypctx->esr_el1 = esr; 1323 hypctx->has_exception = true; 1324 1325 return (0); 1326 } 1327 1328 int 1329 vmmops_getcap(void *vcpui, int num, int *retval) 1330 { 1331 struct hypctx *hypctx = vcpui; 1332 int ret; 1333 1334 ret = ENOENT; 1335 1336 switch (num) { 1337 case VM_CAP_UNRESTRICTED_GUEST: 1338 *retval = 1; 1339 ret = 0; 1340 break; 1341 case VM_CAP_BRK_EXIT: 1342 case VM_CAP_SS_EXIT: 1343 case VM_CAP_MASK_HWINTR: 1344 *retval = (hypctx->setcaps & (1ul << num)) != 0; 1345 break; 1346 default: 1347 break; 1348 } 1349 1350 return (ret); 1351 } 1352 1353 int 1354 vmmops_setcap(void *vcpui, int num, int val) 1355 { 1356 struct hypctx *hypctx = vcpui; 1357 int ret; 1358 1359 ret = 0; 1360 1361 switch (num) { 1362 case VM_CAP_BRK_EXIT: 1363 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) 1364 break; 1365 if (val != 0) 1366 hypctx->mdcr_el2 |= MDCR_EL2_TDE; 1367 else 1368 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; 1369 break; 1370 case VM_CAP_SS_EXIT: 1371 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) 1372 break; 1373 1374 if (val != 0) { 1375 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS); 1376 hypctx->debug_mdscr |= hypctx->mdscr_el1 & 1377 (MDSCR_SS | MDSCR_KDE); 1378 1379 hypctx->tf.tf_spsr |= PSR_SS; 1380 hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE; 1381 hypctx->mdcr_el2 |= MDCR_EL2_TDE; 1382 } else { 1383 hypctx->tf.tf_spsr &= ~PSR_SS; 1384 hypctx->tf.tf_spsr |= hypctx->debug_spsr; 1385 hypctx->debug_spsr &= ~PSR_SS; 1386 hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE); 1387 hypctx->mdscr_el1 |= hypctx->debug_mdscr; 1388 hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE); 1389 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; 1390 } 1391 break; 1392 case VM_CAP_MASK_HWINTR: 1393 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) 1394 break; 1395 1396 if (val != 0) { 1397 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & 1398 (PSR_I | PSR_F)); 1399 hypctx->tf.tf_spsr |= PSR_I | PSR_F; 1400 } else { 1401 hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F); 1402 hypctx->tf.tf_spsr |= (hypctx->debug_spsr & 1403 (PSR_I | PSR_F)); 1404 hypctx->debug_spsr &= ~(PSR_I | PSR_F); 1405 } 1406 break; 1407 default: 1408 ret = ENOENT; 1409 break; 1410 } 1411 1412 if (ret == 0) { 1413 if (val == 0) 1414 hypctx->setcaps &= ~(1ul << num); 1415 else 1416 hypctx->setcaps |= (1ul << num); 1417 } 1418 1419 return (ret); 1420 } 1421