1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/smp.h> 33 #include <sys/kernel.h> 34 #include <sys/malloc.h> 35 #include <sys/mman.h> 36 #include <sys/pcpu.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/vmem.h> 42 43 #include <vm/vm.h> 44 #include <vm/pmap.h> 45 #include <vm/vm_extern.h> 46 #include <vm/vm_map.h> 47 #include <vm/vm_page.h> 48 #include <vm/vm_param.h> 49 50 #include <machine/armreg.h> 51 #include <machine/vm.h> 52 #include <machine/cpufunc.h> 53 #include <machine/cpu.h> 54 #include <machine/machdep.h> 55 #include <machine/vmm.h> 56 #include <machine/vmm_dev.h> 57 #include <machine/atomic.h> 58 #include <machine/hypervisor.h> 59 #include <machine/pmap.h> 60 61 #include "mmu.h" 62 #include "arm64.h" 63 #include "hyp.h" 64 #include "reset.h" 65 #include "io/vgic.h" 66 #include "io/vgic_v3.h" 67 #include "io/vtimer.h" 68 #include "vmm_handlers.h" 69 #include "vmm_stat.h" 70 71 #define HANDLED 1 72 #define UNHANDLED 0 73 74 /* Number of bits in an EL2 virtual address */ 75 #define EL2_VIRT_BITS 48 76 CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS); 77 78 /* TODO: Move the host hypctx off the stack */ 79 #define VMM_STACK_PAGES 4 80 #define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE) 81 82 static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits; 83 84 /* Register values passed to arm_setup_vectors to set in the hypervisor */ 85 struct vmm_init_regs { 86 uint64_t tcr_el2; 87 uint64_t vtcr_el2; 88 }; 89 90 MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP"); 91 92 extern char hyp_init_vectors[]; 93 extern char hyp_vectors[]; 94 extern char hyp_stub_vectors[]; 95 96 static vm_paddr_t hyp_code_base; 97 static size_t hyp_code_len; 98 99 static char *stack[MAXCPU]; 100 static vm_offset_t stack_hyp_va[MAXCPU]; 101 102 static vmem_t *el2_mem_alloc; 103 104 static void arm_setup_vectors(void *arg); 105 106 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); 107 108 static inline void 109 arm64_set_active_vcpu(struct hypctx *hypctx) 110 { 111 DPCPU_SET(vcpu, hypctx); 112 } 113 114 struct hypctx * 115 arm64_get_active_vcpu(void) 116 { 117 return (DPCPU_GET(vcpu)); 118 } 119 120 static void 121 arm_setup_vectors(void *arg) 122 { 123 struct vmm_init_regs *el2_regs; 124 uintptr_t stack_top; 125 uint32_t sctlr_el2; 126 register_t daif; 127 128 el2_regs = arg; 129 arm64_set_active_vcpu(NULL); 130 131 /* 132 * Configure the system control register for EL2: 133 * 134 * SCTLR_EL2_M: MMU on 135 * SCTLR_EL2_C: Data cacheability not affected 136 * SCTLR_EL2_I: Instruction cacheability not affected 137 * SCTLR_EL2_A: Instruction alignment check 138 * SCTLR_EL2_SA: Stack pointer alignment check 139 * SCTLR_EL2_WXN: Treat writable memory as execute never 140 * ~SCTLR_EL2_EE: Data accesses are little-endian 141 */ 142 sctlr_el2 = SCTLR_EL2_RES1; 143 sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I; 144 sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA; 145 sctlr_el2 |= SCTLR_EL2_WXN; 146 sctlr_el2 &= ~SCTLR_EL2_EE; 147 148 daif = intr_disable(); 149 150 if (in_vhe()) { 151 WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2); 152 } else { 153 /* 154 * Install the temporary vectors which will be responsible for 155 * initializing the VMM when we next trap into EL2. 156 * 157 * x0: the exception vector table responsible for hypervisor 158 * initialization on the next call. 159 */ 160 vmm_call_hyp(vtophys(&vmm_hyp_code)); 161 162 /* Create and map the hypervisor stack */ 163 stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE; 164 165 /* Special call to initialize EL2 */ 166 vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2, 167 sctlr_el2, el2_regs->vtcr_el2); 168 } 169 170 intr_restore(daif); 171 } 172 173 static void 174 arm_teardown_vectors(void *arg) 175 { 176 register_t daif; 177 178 /* 179 * vmm_cleanup() will disable the MMU. For the next few instructions, 180 * before the hardware disables the MMU, one of the following is 181 * possible: 182 * 183 * a. The instruction addresses are fetched with the MMU disabled, 184 * and they must represent the actual physical addresses. This will work 185 * because we call the vmm_cleanup() function by its physical address. 186 * 187 * b. The instruction addresses are fetched using the old translation 188 * tables. This will work because we have an identity mapping in place 189 * in the translation tables and vmm_cleanup() is called by its physical 190 * address. 191 */ 192 daif = intr_disable(); 193 /* TODO: Invalidate the cache */ 194 vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors)); 195 intr_restore(daif); 196 197 arm64_set_active_vcpu(NULL); 198 } 199 200 static uint64_t 201 vmm_vtcr_el2_sl(u_int levels) 202 { 203 #if PAGE_SIZE == PAGE_SIZE_4K 204 switch (levels) { 205 case 2: 206 return (VTCR_EL2_SL0_4K_LVL2); 207 case 3: 208 return (VTCR_EL2_SL0_4K_LVL1); 209 case 4: 210 return (VTCR_EL2_SL0_4K_LVL0); 211 default: 212 panic("%s: Invalid number of page table levels %u", __func__, 213 levels); 214 } 215 #elif PAGE_SIZE == PAGE_SIZE_16K 216 switch (levels) { 217 case 2: 218 return (VTCR_EL2_SL0_16K_LVL2); 219 case 3: 220 return (VTCR_EL2_SL0_16K_LVL1); 221 case 4: 222 return (VTCR_EL2_SL0_16K_LVL0); 223 default: 224 panic("%s: Invalid number of page table levels %u", __func__, 225 levels); 226 } 227 #else 228 #error Unsupported page size 229 #endif 230 } 231 232 int 233 vmmops_modinit(int ipinum) 234 { 235 struct vmm_init_regs el2_regs; 236 vm_offset_t next_hyp_va; 237 vm_paddr_t vmm_base; 238 uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field; 239 uint64_t cnthctl_el2; 240 int cpu, i; 241 bool rv __diagused; 242 243 if (!has_hyp()) { 244 printf( 245 "vmm: Processor doesn't have support for virtualization\n"); 246 return (ENXIO); 247 } 248 249 if (!vgic_present()) { 250 printf("vmm: No vgic found\n"); 251 return (ENODEV); 252 } 253 254 if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) { 255 printf("vmm: Unable to read ID_AA64MMFR0_EL1\n"); 256 return (ENXIO); 257 } 258 pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1); 259 /* 260 * Use 3 levels to give us up to 39 bits with 4k pages, or 261 * 47 bits with 16k pages. 262 */ 263 /* TODO: Check the number of levels for 64k pages */ 264 vmm_pmap_levels = 3; 265 switch (pa_range_field) { 266 case ID_AA64MMFR0_PARange_4G: 267 printf("vmm: Not enough physical address bits\n"); 268 return (ENXIO); 269 case ID_AA64MMFR0_PARange_64G: 270 vmm_virt_bits = 36; 271 #if PAGE_SIZE == PAGE_SIZE_16K 272 vmm_pmap_levels = 2; 273 #endif 274 break; 275 default: 276 vmm_virt_bits = 39; 277 break; 278 } 279 pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT; 280 281 if (!in_vhe()) { 282 /* Initialise the EL2 MMU */ 283 if (!vmmpmap_init()) { 284 printf("vmm: Failed to init the EL2 MMU\n"); 285 return (ENOMEM); 286 } 287 } 288 289 /* Set up the stage 2 pmap callbacks */ 290 MPASS(pmap_clean_stage2_tlbi == NULL); 291 pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi; 292 pmap_stage2_invalidate_range = vmm_s2_tlbi_range; 293 pmap_stage2_invalidate_all = vmm_s2_tlbi_all; 294 295 if (!in_vhe()) { 296 /* 297 * Create an allocator for the virtual address space used by 298 * EL2. EL2 code is identity-mapped; the allocator is used to 299 * find space for VM structures. 300 */ 301 el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, 302 M_WAITOK); 303 304 /* Create the mappings for the hypervisor translation table. */ 305 hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code); 306 307 /* We need an physical identity mapping for when we activate the MMU */ 308 hyp_code_base = vmm_base = vtophys(&vmm_hyp_code); 309 rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base, 310 VM_PROT_READ | VM_PROT_EXECUTE); 311 MPASS(rv); 312 313 next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE); 314 315 /* Create a per-CPU hypervisor stack */ 316 CPU_FOREACH(cpu) { 317 stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO); 318 stack_hyp_va[cpu] = next_hyp_va; 319 320 for (i = 0; i < VMM_STACK_PAGES; i++) { 321 rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i), 322 PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)), 323 VM_PROT_READ | VM_PROT_WRITE); 324 MPASS(rv); 325 } 326 next_hyp_va += L2_SIZE; 327 } 328 329 el2_regs.tcr_el2 = TCR_EL2_RES1; 330 el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT, 331 TCR_EL2_PS_52BITS); 332 el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS); 333 el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA; 334 #if PAGE_SIZE == PAGE_SIZE_4K 335 el2_regs.tcr_el2 |= TCR_EL2_TG0_4K; 336 #elif PAGE_SIZE == PAGE_SIZE_16K 337 el2_regs.tcr_el2 |= TCR_EL2_TG0_16K; 338 #else 339 #error Unsupported page size 340 #endif 341 #ifdef SMP 342 el2_regs.tcr_el2 |= TCR_EL2_SH0_IS; 343 #endif 344 } 345 346 switch (pa_range_bits << TCR_EL2_PS_SHIFT) { 347 case TCR_EL2_PS_32BITS: 348 vmm_max_ipa_bits = 32; 349 break; 350 case TCR_EL2_PS_36BITS: 351 vmm_max_ipa_bits = 36; 352 break; 353 case TCR_EL2_PS_40BITS: 354 vmm_max_ipa_bits = 40; 355 break; 356 case TCR_EL2_PS_42BITS: 357 vmm_max_ipa_bits = 42; 358 break; 359 case TCR_EL2_PS_44BITS: 360 vmm_max_ipa_bits = 44; 361 break; 362 case TCR_EL2_PS_48BITS: 363 vmm_max_ipa_bits = 48; 364 break; 365 case TCR_EL2_PS_52BITS: 366 default: 367 vmm_max_ipa_bits = 52; 368 break; 369 } 370 371 /* 372 * Configure the Stage 2 translation control register: 373 * 374 * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable 375 * normal memory 376 * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable 377 * normal memory 378 * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel 379 * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables 380 * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner 381 * shareable 382 */ 383 el2_regs.vtcr_el2 = VTCR_EL2_RES1; 384 el2_regs.vtcr_el2 |= 385 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT); 386 el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA; 387 el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits); 388 el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels); 389 #if PAGE_SIZE == PAGE_SIZE_4K 390 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K; 391 #elif PAGE_SIZE == PAGE_SIZE_16K 392 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K; 393 #else 394 #error Unsupported page size 395 #endif 396 #ifdef SMP 397 el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS; 398 #endif 399 400 smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs); 401 402 if (!in_vhe()) { 403 /* Add memory to the vmem allocator (checking there is space) */ 404 if (vmm_base > (L2_SIZE + PAGE_SIZE)) { 405 /* 406 * Ensure there is an L2 block before the vmm code to check 407 * for buffer overflows on earlier data. Include the PAGE_SIZE 408 * of the minimum we can allocate. 409 */ 410 vmm_base -= L2_SIZE + PAGE_SIZE; 411 vmm_base = rounddown2(vmm_base, L2_SIZE); 412 413 /* 414 * Check there is memory before the vmm code to add. 415 * 416 * Reserve the L2 block at address 0 so NULL dereference will 417 * raise an exception. 418 */ 419 if (vmm_base > L2_SIZE) 420 vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE, 421 M_WAITOK); 422 } 423 424 /* 425 * Add the memory after the stacks. There is most of an L2 block 426 * between the last stack and the first allocation so this should 427 * be safe without adding more padding. 428 */ 429 if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE) 430 vmem_add(el2_mem_alloc, next_hyp_va, 431 HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK); 432 } 433 cnthctl_el2 = vmm_read_reg(HYP_REG_CNTHCTL); 434 435 vgic_init(); 436 vtimer_init(cnthctl_el2); 437 438 return (0); 439 } 440 441 int 442 vmmops_modcleanup(void) 443 { 444 int cpu; 445 446 if (!in_vhe()) { 447 smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL); 448 449 CPU_FOREACH(cpu) { 450 vmmpmap_remove(stack_hyp_va[cpu], 451 VMM_STACK_PAGES * PAGE_SIZE, false); 452 } 453 454 vmmpmap_remove(hyp_code_base, hyp_code_len, false); 455 } 456 457 vtimer_cleanup(); 458 459 if (!in_vhe()) { 460 vmmpmap_fini(); 461 462 CPU_FOREACH(cpu) 463 free(stack[cpu], M_HYP); 464 } 465 466 pmap_clean_stage2_tlbi = NULL; 467 pmap_stage2_invalidate_range = NULL; 468 pmap_stage2_invalidate_all = NULL; 469 470 return (0); 471 } 472 473 static vm_size_t 474 el2_hyp_size(struct vm *vm) 475 { 476 return (round_page(sizeof(struct hyp) + 477 sizeof(struct hypctx *) * vm_get_maxcpus(vm))); 478 } 479 480 static vm_size_t 481 el2_hypctx_size(void) 482 { 483 return (round_page(sizeof(struct hypctx))); 484 } 485 486 static vm_offset_t 487 el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot) 488 { 489 vmem_addr_t addr; 490 int err __diagused; 491 bool rv __diagused; 492 493 err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr); 494 MPASS(err == 0); 495 rv = vmmpmap_enter(addr, size, vtophys(data), prot); 496 MPASS(rv); 497 498 return (addr); 499 } 500 501 void * 502 vmmops_init(struct vm *vm, pmap_t pmap) 503 { 504 struct hyp *hyp; 505 vm_size_t size; 506 507 size = el2_hyp_size(vm); 508 hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); 509 510 hyp->vm = vm; 511 hyp->vgic_attached = false; 512 513 vtimer_vminit(hyp); 514 vgic_vminit(hyp); 515 516 if (!in_vhe()) 517 hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size, 518 VM_PROT_READ | VM_PROT_WRITE); 519 520 return (hyp); 521 } 522 523 void * 524 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) 525 { 526 struct hyp *hyp = vmi; 527 struct hypctx *hypctx; 528 vm_size_t size; 529 530 size = el2_hypctx_size(); 531 hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); 532 533 KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), 534 ("%s: Invalid vcpuid %d", __func__, vcpuid)); 535 hyp->ctx[vcpuid] = hypctx; 536 537 hypctx->hyp = hyp; 538 hypctx->vcpu = vcpu1; 539 540 reset_vm_el01_regs(hypctx); 541 reset_vm_el2_regs(hypctx); 542 543 vtimer_cpuinit(hypctx); 544 vgic_cpuinit(hypctx); 545 546 if (!in_vhe()) 547 hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size, 548 VM_PROT_READ | VM_PROT_WRITE); 549 550 return (hypctx); 551 } 552 553 static int 554 arm_vmm_pinit(pmap_t pmap) 555 { 556 557 pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels); 558 return (1); 559 } 560 561 struct vmspace * 562 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) 563 { 564 return (vmspace_alloc(min, max, arm_vmm_pinit)); 565 } 566 567 void 568 vmmops_vmspace_free(struct vmspace *vmspace) 569 { 570 571 pmap_remove_pages(vmspace_pmap(vmspace)); 572 vmspace_free(vmspace); 573 } 574 575 static inline void 576 arm64_print_hyp_regs(struct vm_exit *vme) 577 { 578 printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2); 579 printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2); 580 printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2); 581 printf("elr_el2: 0x%016lx\n", vme->pc); 582 } 583 584 static void 585 arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss, 586 struct vm_exit *vme_ret) 587 { 588 struct vm_guest_paging *paging; 589 struct vie *vie; 590 uint32_t esr_sas, reg_num; 591 592 /* 593 * Get the page address from HPFAR_EL2. 594 */ 595 vme_ret->u.inst_emul.gpa = 596 HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); 597 /* Bits [11:0] are the same as bits [11:0] from the virtual address. */ 598 vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 & 599 FAR_EL2_HPFAR_PAGE_MASK; 600 601 esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT; 602 reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT; 603 604 vie = &vme_ret->u.inst_emul.vie; 605 vie->access_size = 1 << esr_sas; 606 vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0; 607 vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ; 608 vie->reg = reg_num; 609 610 paging = &vme_ret->u.inst_emul.paging; 611 paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); 612 paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); 613 paging->tcr_el1 = hypctx->tcr_el1; 614 paging->tcr2_el1 = hypctx->tcr2_el1; 615 paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); 616 if ((hypctx->sctlr_el1 & SCTLR_M) != 0) 617 paging->flags |= VM_GP_MMU_ENABLED; 618 } 619 620 static void 621 arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret) 622 { 623 uint32_t reg_num; 624 struct vre *vre; 625 626 /* u.hyp member will be replaced by u.reg_emul */ 627 vre = &vme_ret->u.reg_emul.vre; 628 629 vre->inst_syndrome = esr_iss; 630 /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */ 631 vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE; 632 reg_num = ISS_MSR_Rt(esr_iss); 633 vre->reg = reg_num; 634 } 635 636 void 637 raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc) 638 { 639 uint64_t esr; 640 641 if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t) 642 esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT; 643 else 644 esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT; 645 /* Set the bit that changes from insn -> data abort */ 646 if (dabort) 647 esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT; 648 /* Set the IL bit if set by hardware */ 649 esr |= hypctx->tf.tf_esr & ESR_ELx_IL; 650 651 vmmops_exception(hypctx, esr | fsc, far); 652 } 653 654 static int 655 handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret, 656 pmap_t pmap) 657 { 658 uint64_t gpa; 659 uint32_t esr_ec, esr_iss; 660 661 esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr); 662 esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK; 663 664 switch (esr_ec) { 665 case EXCP_UNKNOWN: 666 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1); 667 arm64_print_hyp_regs(vme_ret); 668 vme_ret->exitcode = VM_EXITCODE_HYP; 669 break; 670 case EXCP_TRAP_WFI_WFE: 671 if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */ 672 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1); 673 vme_ret->exitcode = VM_EXITCODE_WFI; 674 } else { 675 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1); 676 vme_ret->exitcode = VM_EXITCODE_HYP; 677 } 678 break; 679 case EXCP_HVC: 680 vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1); 681 vme_ret->exitcode = VM_EXITCODE_HVC; 682 break; 683 case EXCP_MSR: 684 vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1); 685 arm64_gen_reg_emul_data(esr_iss, vme_ret); 686 vme_ret->exitcode = VM_EXITCODE_REG_EMUL; 687 break; 688 case EXCP_BRK: 689 vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1); 690 vme_ret->exitcode = VM_EXITCODE_BRK; 691 break; 692 case EXCP_SOFTSTP_EL0: 693 vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1); 694 vme_ret->exitcode = VM_EXITCODE_SS; 695 break; 696 case EXCP_INSN_ABORT_L: 697 case EXCP_DATA_ABORT_L: 698 vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ? 699 VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1); 700 switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) { 701 case ISS_DATA_DFSC_TF_L0: 702 case ISS_DATA_DFSC_TF_L1: 703 case ISS_DATA_DFSC_TF_L2: 704 case ISS_DATA_DFSC_TF_L3: 705 case ISS_DATA_DFSC_AFF_L1: 706 case ISS_DATA_DFSC_AFF_L2: 707 case ISS_DATA_DFSC_AFF_L3: 708 case ISS_DATA_DFSC_PF_L1: 709 case ISS_DATA_DFSC_PF_L2: 710 case ISS_DATA_DFSC_PF_L3: 711 gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); 712 /* Check the IPA is valid */ 713 if (gpa >= (1ul << vmm_max_ipa_bits)) { 714 raise_data_insn_abort(hypctx, 715 hypctx->exit_info.far_el2, 716 esr_ec == EXCP_DATA_ABORT_L, 717 ISS_DATA_DFSC_ASF_L0); 718 vme_ret->inst_length = 0; 719 return (HANDLED); 720 } 721 722 if (vm_mem_allocated(hypctx->vcpu, gpa)) { 723 vme_ret->exitcode = VM_EXITCODE_PAGING; 724 vme_ret->inst_length = 0; 725 vme_ret->u.paging.esr = hypctx->tf.tf_esr; 726 vme_ret->u.paging.gpa = gpa; 727 } else if (esr_ec == EXCP_INSN_ABORT_L) { 728 /* 729 * Raise an external abort. Device memory is 730 * not executable 731 */ 732 raise_data_insn_abort(hypctx, 733 hypctx->exit_info.far_el2, false, 734 ISS_DATA_DFSC_EXT); 735 vme_ret->inst_length = 0; 736 return (HANDLED); 737 } else { 738 arm64_gen_inst_emul_data(hypctx, esr_iss, 739 vme_ret); 740 vme_ret->exitcode = VM_EXITCODE_INST_EMUL; 741 } 742 break; 743 default: 744 arm64_print_hyp_regs(vme_ret); 745 vme_ret->exitcode = VM_EXITCODE_HYP; 746 break; 747 } 748 749 break; 750 751 default: 752 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1); 753 arm64_print_hyp_regs(vme_ret); 754 vme_ret->exitcode = VM_EXITCODE_HYP; 755 break; 756 } 757 758 /* We don't don't do any instruction emulation here */ 759 return (UNHANDLED); 760 } 761 762 static int 763 arm64_handle_world_switch(struct hypctx *hypctx, int excp_type, 764 struct vm_exit *vme, pmap_t pmap) 765 { 766 int handled; 767 768 switch (excp_type) { 769 case EXCP_TYPE_EL1_SYNC: 770 /* The exit code will be set by handle_el1_sync_excp(). */ 771 handled = handle_el1_sync_excp(hypctx, vme, pmap); 772 break; 773 774 case EXCP_TYPE_EL1_IRQ: 775 case EXCP_TYPE_EL1_FIQ: 776 /* The host kernel will handle IRQs and FIQs. */ 777 vmm_stat_incr(hypctx->vcpu, 778 excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1); 779 vme->exitcode = VM_EXITCODE_BOGUS; 780 handled = UNHANDLED; 781 break; 782 783 case EXCP_TYPE_EL1_ERROR: 784 case EXCP_TYPE_EL2_SYNC: 785 case EXCP_TYPE_EL2_IRQ: 786 case EXCP_TYPE_EL2_FIQ: 787 case EXCP_TYPE_EL2_ERROR: 788 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1); 789 vme->exitcode = VM_EXITCODE_BOGUS; 790 handled = UNHANDLED; 791 break; 792 793 default: 794 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); 795 vme->exitcode = VM_EXITCODE_BOGUS; 796 handled = UNHANDLED; 797 break; 798 } 799 800 return (handled); 801 } 802 803 static void 804 ptp_release(void **cookie) 805 { 806 if (*cookie != NULL) { 807 vm_gpa_release(*cookie); 808 *cookie = NULL; 809 } 810 } 811 812 static void * 813 ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) 814 { 815 void *ptr; 816 817 ptp_release(cookie); 818 ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie); 819 return (ptr); 820 } 821 822 /* log2 of the number of bytes in a page table entry */ 823 #define PTE_SHIFT 3 824 int 825 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, 826 int prot, uint64_t *gpa, int *is_fault) 827 { 828 struct hypctx *hypctx; 829 void *cookie; 830 uint64_t mask, *ptep, pte, pte_addr; 831 int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz; 832 bool is_el0; 833 834 /* Check if the MMU is off */ 835 if ((paging->flags & VM_GP_MMU_ENABLED) == 0) { 836 *is_fault = 0; 837 *gpa = gla; 838 return (0); 839 } 840 841 is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t; 842 843 if (ADDR_IS_KERNEL(gla)) { 844 /* If address translation is disabled raise an exception */ 845 if ((paging->tcr_el1 & TCR_EPD1) != 0) { 846 *is_fault = 1; 847 return (0); 848 } 849 if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) { 850 *is_fault = 1; 851 return (0); 852 } 853 pte_addr = paging->ttbr1_addr; 854 tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT; 855 /* Clear the top byte if TBI is on */ 856 if ((paging->tcr_el1 & TCR_TBI1) != 0) 857 gla |= (0xfful << 56); 858 switch (paging->tcr_el1 & TCR_TG1_MASK) { 859 case TCR_TG1_4K: 860 granule_shift = PAGE_SHIFT_4K; 861 break; 862 case TCR_TG1_16K: 863 granule_shift = PAGE_SHIFT_16K; 864 break; 865 case TCR_TG1_64K: 866 granule_shift = PAGE_SHIFT_64K; 867 break; 868 default: 869 *is_fault = 1; 870 return (EINVAL); 871 } 872 } else { 873 /* If address translation is disabled raise an exception */ 874 if ((paging->tcr_el1 & TCR_EPD0) != 0) { 875 *is_fault = 1; 876 return (0); 877 } 878 if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) { 879 *is_fault = 1; 880 return (0); 881 } 882 pte_addr = paging->ttbr0_addr; 883 tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT; 884 /* Clear the top byte if TBI is on */ 885 if ((paging->tcr_el1 & TCR_TBI0) != 0) 886 gla &= ~(0xfful << 56); 887 switch (paging->tcr_el1 & TCR_TG0_MASK) { 888 case TCR_TG0_4K: 889 granule_shift = PAGE_SHIFT_4K; 890 break; 891 case TCR_TG0_16K: 892 granule_shift = PAGE_SHIFT_16K; 893 break; 894 case TCR_TG0_64K: 895 granule_shift = PAGE_SHIFT_64K; 896 break; 897 default: 898 *is_fault = 1; 899 return (EINVAL); 900 } 901 } 902 903 /* 904 * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2 905 * for larger values. 906 */ 907 switch (granule_shift) { 908 case PAGE_SHIFT_4K: 909 case PAGE_SHIFT_16K: 910 /* 911 * See "Table D8-11 4KB granule, determining stage 1 initial 912 * lookup level" and "Table D8-21 16KB granule, determining 913 * stage 1 initial lookup level" from the "Arm Architecture 914 * Reference Manual for A-Profile architecture" revision I.a 915 * for the minimum and maximum values. 916 * 917 * TODO: Support less than 16 when FEAT_LPA2 is implemented 918 * and TCR_EL1.DS == 1 919 * TODO: Support more than 39 when FEAT_TTST is implemented 920 */ 921 if (tsz < 16 || tsz > 39) { 922 *is_fault = 1; 923 return (EINVAL); 924 } 925 break; 926 case PAGE_SHIFT_64K: 927 /* TODO: Support 64k granule. It will probably work, but is untested */ 928 default: 929 *is_fault = 1; 930 return (EINVAL); 931 } 932 933 /* 934 * Calculate the input address bits. These are 64 bit in an address 935 * with the top tsz bits being all 0 or all 1. 936 */ 937 ia_bits = 64 - tsz; 938 939 /* 940 * Calculate the number of address bits used in the page table 941 * calculation. This is ia_bits minus the bottom granule_shift 942 * bits that are passed to the output address. 943 */ 944 address_bits = ia_bits - granule_shift; 945 946 /* 947 * Calculate the number of levels. Each level uses 948 * granule_shift - PTE_SHIFT bits of the input address. 949 * This is because the table is 1 << granule_shift and each 950 * entry is 1 << PTE_SHIFT bytes. 951 */ 952 levels = howmany(address_bits, granule_shift - PTE_SHIFT); 953 954 /* Mask of the upper unused bits in the virtual address */ 955 gla &= (1ul << ia_bits) - 1; 956 hypctx = (struct hypctx *)vcpui; 957 cookie = NULL; 958 /* TODO: Check if the level supports block descriptors */ 959 for (;levels > 0; levels--) { 960 int idx; 961 962 pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) + 963 granule_shift; 964 idx = (gla >> pte_shift) & 965 ((1ul << (granule_shift - PTE_SHIFT)) - 1); 966 while (idx > PAGE_SIZE / sizeof(pte)) { 967 idx -= PAGE_SIZE / sizeof(pte); 968 pte_addr += PAGE_SIZE; 969 } 970 971 ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie); 972 if (ptep == NULL) 973 goto error; 974 pte = ptep[idx]; 975 976 /* Calculate the level we are looking at */ 977 switch (levels) { 978 default: 979 goto fault; 980 /* TODO: Level -1 when FEAT_LPA2 is implemented */ 981 case 4: /* Level 0 */ 982 if ((pte & ATTR_DESCR_MASK) != L0_TABLE) 983 goto fault; 984 /* FALLTHROUGH */ 985 case 3: /* Level 1 */ 986 case 2: /* Level 2 */ 987 switch (pte & ATTR_DESCR_MASK) { 988 /* Use L1 macro as all levels are the same */ 989 case L1_TABLE: 990 /* Check if EL0 can access this address space */ 991 if (is_el0 && 992 (pte & TATTR_AP_TABLE_NO_EL0) != 0) 993 goto fault; 994 /* Check if the address space is writable */ 995 if ((prot & PROT_WRITE) != 0 && 996 (pte & TATTR_AP_TABLE_RO) != 0) 997 goto fault; 998 if ((prot & PROT_EXEC) != 0) { 999 /* Check the table exec attribute */ 1000 if ((is_el0 && 1001 (pte & TATTR_UXN_TABLE) != 0) || 1002 (!is_el0 && 1003 (pte & TATTR_PXN_TABLE) != 0)) 1004 goto fault; 1005 } 1006 pte_addr = pte & ~ATTR_MASK; 1007 break; 1008 case L1_BLOCK: 1009 goto done; 1010 default: 1011 goto fault; 1012 } 1013 break; 1014 case 1: /* Level 3 */ 1015 if ((pte & ATTR_DESCR_MASK) == L3_PAGE) 1016 goto done; 1017 goto fault; 1018 } 1019 } 1020 1021 done: 1022 /* Check if EL0 has access to the block/page */ 1023 if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0) 1024 goto fault; 1025 if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0) 1026 goto fault; 1027 if ((prot & PROT_EXEC) != 0) { 1028 if ((is_el0 && (pte & ATTR_S1_UXN) != 0) || 1029 (!is_el0 && (pte & ATTR_S1_PXN) != 0)) 1030 goto fault; 1031 } 1032 mask = (1ul << pte_shift) - 1; 1033 *gpa = (pte & ~ATTR_MASK) | (gla & mask); 1034 *is_fault = 0; 1035 ptp_release(&cookie); 1036 return (0); 1037 1038 error: 1039 ptp_release(&cookie); 1040 return (EFAULT); 1041 fault: 1042 *is_fault = 1; 1043 ptp_release(&cookie); 1044 return (0); 1045 } 1046 1047 int 1048 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) 1049 { 1050 uint64_t excp_type; 1051 int handled; 1052 register_t daif; 1053 struct hyp *hyp; 1054 struct hypctx *hypctx; 1055 struct vcpu *vcpu; 1056 struct vm_exit *vme; 1057 int mode; 1058 1059 hypctx = (struct hypctx *)vcpui; 1060 hyp = hypctx->hyp; 1061 vcpu = hypctx->vcpu; 1062 vme = vm_exitinfo(vcpu); 1063 1064 hypctx->tf.tf_elr = (uint64_t)pc; 1065 1066 for (;;) { 1067 if (hypctx->has_exception) { 1068 hypctx->has_exception = false; 1069 hypctx->elr_el1 = hypctx->tf.tf_elr; 1070 1071 mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); 1072 1073 if (mode == PSR_M_EL1t) { 1074 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0; 1075 } else if (mode == PSR_M_EL1h) { 1076 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200; 1077 } else if ((mode & PSR_M_32) == PSR_M_64) { 1078 /* 64-bit EL0 */ 1079 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400; 1080 } else { 1081 /* 32-bit EL0 */ 1082 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600; 1083 } 1084 1085 /* Set the new spsr */ 1086 hypctx->spsr_el1 = hypctx->tf.tf_spsr; 1087 1088 /* Set the new cpsr */ 1089 hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS; 1090 hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h; 1091 1092 /* 1093 * Update fields that may change on exeption entry 1094 * based on how sctlr_el1 is configured. 1095 */ 1096 if ((hypctx->sctlr_el1 & SCTLR_SPAN) == 0) 1097 hypctx->tf.tf_spsr |= PSR_PAN; 1098 if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0) 1099 hypctx->tf.tf_spsr &= ~PSR_SSBS; 1100 else 1101 hypctx->tf.tf_spsr |= PSR_SSBS; 1102 } 1103 1104 daif = intr_disable(); 1105 1106 /* Check if the vcpu is suspended */ 1107 if (vcpu_suspended(evinfo)) { 1108 intr_restore(daif); 1109 vm_exit_suspended(vcpu, pc); 1110 break; 1111 } 1112 1113 if (vcpu_debugged(vcpu)) { 1114 intr_restore(daif); 1115 vm_exit_debug(vcpu, pc); 1116 break; 1117 } 1118 1119 /* Activate the stage2 pmap so the vmid is valid */ 1120 pmap_activate_vm(pmap); 1121 hyp->vttbr_el2 = pmap_to_ttbr0(pmap); 1122 1123 /* 1124 * TODO: What happens if a timer interrupt is asserted exactly 1125 * here, but for the previous VM? 1126 */ 1127 arm64_set_active_vcpu(hypctx); 1128 vgic_flush_hwstate(hypctx); 1129 1130 /* Call into EL2 to switch to the guest */ 1131 excp_type = vmm_enter_guest(hyp, hypctx); 1132 1133 vgic_sync_hwstate(hypctx); 1134 vtimer_sync_hwstate(hypctx); 1135 1136 /* 1137 * Deactivate the stage2 pmap. 1138 */ 1139 PCPU_SET(curvmpmap, NULL); 1140 intr_restore(daif); 1141 1142 vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); 1143 if (excp_type == EXCP_TYPE_MAINT_IRQ) 1144 continue; 1145 1146 vme->pc = hypctx->tf.tf_elr; 1147 vme->inst_length = INSN_SIZE; 1148 vme->u.hyp.exception_nr = excp_type; 1149 vme->u.hyp.esr_el2 = hypctx->tf.tf_esr; 1150 vme->u.hyp.far_el2 = hypctx->exit_info.far_el2; 1151 vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2; 1152 1153 handled = arm64_handle_world_switch(hypctx, excp_type, vme, 1154 pmap); 1155 if (handled == UNHANDLED) 1156 /* Exit loop to emulate instruction. */ 1157 break; 1158 else 1159 /* Resume guest execution from the next instruction. */ 1160 hypctx->tf.tf_elr += vme->inst_length; 1161 } 1162 1163 return (0); 1164 } 1165 1166 static void 1167 arm_pcpu_vmcleanup(void *arg) 1168 { 1169 struct hyp *hyp; 1170 int i, maxcpus; 1171 1172 hyp = arg; 1173 maxcpus = vm_get_maxcpus(hyp->vm); 1174 for (i = 0; i < maxcpus; i++) { 1175 if (arm64_get_active_vcpu() == hyp->ctx[i]) { 1176 arm64_set_active_vcpu(NULL); 1177 break; 1178 } 1179 } 1180 } 1181 1182 void 1183 vmmops_vcpu_cleanup(void *vcpui) 1184 { 1185 struct hypctx *hypctx = vcpui; 1186 1187 vtimer_cpucleanup(hypctx); 1188 vgic_cpucleanup(hypctx); 1189 1190 if (!in_vhe()) 1191 vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true); 1192 1193 free(hypctx, M_HYP); 1194 } 1195 1196 void 1197 vmmops_cleanup(void *vmi) 1198 { 1199 struct hyp *hyp = vmi; 1200 1201 vtimer_vmcleanup(hyp); 1202 vgic_vmcleanup(hyp); 1203 1204 smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp); 1205 1206 if (!in_vhe()) 1207 vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true); 1208 1209 free(hyp, M_HYP); 1210 } 1211 1212 /* 1213 * Return register value. Registers have different sizes and an explicit cast 1214 * must be made to ensure proper conversion. 1215 */ 1216 static uint64_t * 1217 hypctx_regptr(struct hypctx *hypctx, int reg) 1218 { 1219 switch (reg) { 1220 case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29: 1221 return (&hypctx->tf.tf_x[reg]); 1222 case VM_REG_GUEST_LR: 1223 return (&hypctx->tf.tf_lr); 1224 case VM_REG_GUEST_SP: 1225 return (&hypctx->tf.tf_sp); 1226 case VM_REG_GUEST_CPSR: 1227 return (&hypctx->tf.tf_spsr); 1228 case VM_REG_GUEST_PC: 1229 return (&hypctx->tf.tf_elr); 1230 case VM_REG_GUEST_SCTLR_EL1: 1231 return (&hypctx->sctlr_el1); 1232 case VM_REG_GUEST_TTBR0_EL1: 1233 return (&hypctx->ttbr0_el1); 1234 case VM_REG_GUEST_TTBR1_EL1: 1235 return (&hypctx->ttbr1_el1); 1236 case VM_REG_GUEST_TCR_EL1: 1237 return (&hypctx->tcr_el1); 1238 case VM_REG_GUEST_TCR2_EL1: 1239 return (&hypctx->tcr2_el1); 1240 default: 1241 break; 1242 } 1243 return (NULL); 1244 } 1245 1246 int 1247 vmmops_getreg(void *vcpui, int reg, uint64_t *retval) 1248 { 1249 uint64_t *regp; 1250 int running, hostcpu; 1251 struct hypctx *hypctx = vcpui; 1252 1253 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1254 if (running && hostcpu != curcpu) 1255 panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm), 1256 vcpu_vcpuid(hypctx->vcpu)); 1257 1258 regp = hypctx_regptr(hypctx, reg); 1259 if (regp == NULL) 1260 return (EINVAL); 1261 1262 *retval = *regp; 1263 return (0); 1264 } 1265 1266 int 1267 vmmops_setreg(void *vcpui, int reg, uint64_t val) 1268 { 1269 uint64_t *regp; 1270 struct hypctx *hypctx = vcpui; 1271 int running, hostcpu; 1272 1273 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1274 if (running && hostcpu != curcpu) 1275 panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm), 1276 vcpu_vcpuid(hypctx->vcpu)); 1277 1278 regp = hypctx_regptr(hypctx, reg); 1279 if (regp == NULL) 1280 return (EINVAL); 1281 1282 *regp = val; 1283 return (0); 1284 } 1285 1286 int 1287 vmmops_exception(void *vcpui, uint64_t esr, uint64_t far) 1288 { 1289 struct hypctx *hypctx = vcpui; 1290 int running, hostcpu; 1291 1292 running = vcpu_is_running(hypctx->vcpu, &hostcpu); 1293 if (running && hostcpu != curcpu) 1294 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), 1295 vcpu_vcpuid(hypctx->vcpu)); 1296 1297 hypctx->far_el1 = far; 1298 hypctx->esr_el1 = esr; 1299 hypctx->has_exception = true; 1300 1301 return (0); 1302 } 1303 1304 int 1305 vmmops_getcap(void *vcpui, int num, int *retval) 1306 { 1307 struct hypctx *hypctx = vcpui; 1308 int ret; 1309 1310 ret = ENOENT; 1311 1312 switch (num) { 1313 case VM_CAP_UNRESTRICTED_GUEST: 1314 *retval = 1; 1315 ret = 0; 1316 break; 1317 case VM_CAP_BRK_EXIT: 1318 case VM_CAP_SS_EXIT: 1319 case VM_CAP_MASK_HWINTR: 1320 *retval = (hypctx->setcaps & (1ul << num)) != 0; 1321 break; 1322 default: 1323 break; 1324 } 1325 1326 return (ret); 1327 } 1328 1329 int 1330 vmmops_setcap(void *vcpui, int num, int val) 1331 { 1332 struct hypctx *hypctx = vcpui; 1333 int ret; 1334 1335 ret = 0; 1336 1337 switch (num) { 1338 case VM_CAP_BRK_EXIT: 1339 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) 1340 break; 1341 if (val != 0) 1342 hypctx->mdcr_el2 |= MDCR_EL2_TDE; 1343 else 1344 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; 1345 break; 1346 case VM_CAP_SS_EXIT: 1347 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) 1348 break; 1349 1350 if (val != 0) { 1351 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS); 1352 hypctx->debug_mdscr |= hypctx->mdscr_el1 & 1353 (MDSCR_SS | MDSCR_KDE); 1354 1355 hypctx->tf.tf_spsr |= PSR_SS; 1356 hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE; 1357 hypctx->mdcr_el2 |= MDCR_EL2_TDE; 1358 } else { 1359 hypctx->tf.tf_spsr &= ~PSR_SS; 1360 hypctx->tf.tf_spsr |= hypctx->debug_spsr; 1361 hypctx->debug_spsr &= ~PSR_SS; 1362 hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE); 1363 hypctx->mdscr_el1 |= hypctx->debug_mdscr; 1364 hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE); 1365 hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; 1366 } 1367 break; 1368 case VM_CAP_MASK_HWINTR: 1369 if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) 1370 break; 1371 1372 if (val != 0) { 1373 hypctx->debug_spsr |= (hypctx->tf.tf_spsr & 1374 (PSR_I | PSR_F)); 1375 hypctx->tf.tf_spsr |= PSR_I | PSR_F; 1376 } else { 1377 hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F); 1378 hypctx->tf.tf_spsr |= (hypctx->debug_spsr & 1379 (PSR_I | PSR_F)); 1380 hypctx->debug_spsr &= ~(PSR_I | PSR_F); 1381 } 1382 break; 1383 default: 1384 ret = ENOENT; 1385 break; 1386 } 1387 1388 if (ret == 0) { 1389 if (val == 0) 1390 hypctx->setcaps &= ~(1ul << num); 1391 else 1392 hypctx->setcaps |= (1ul << num); 1393 } 1394 1395 return (ret); 1396 } 1397