1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_stat.h> 69 70 #include "arm64.h" 71 #include "mmu.h" 72 73 #include "io/vgic.h" 74 #include "io/vtimer.h" 75 76 struct vcpu { 77 int flags; 78 enum vcpu_state state; 79 struct mtx mtx; 80 int hostcpu; /* host cpuid this vcpu last ran on */ 81 int vcpuid; 82 void *stats; 83 struct vm_exit exitinfo; 84 uint64_t nextpc; /* (x) next instruction to execute */ 85 struct vm *vm; /* (o) */ 86 void *cookie; /* (i) cpu-specific data */ 87 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 88 }; 89 90 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 91 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 92 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 93 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 94 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 95 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 96 97 struct mem_seg { 98 uint64_t gpa; 99 size_t len; 100 bool wired; 101 bool sysmem; 102 vm_object_t object; 103 }; 104 #define VM_MAX_MEMSEGS 3 105 106 struct mem_map { 107 vm_paddr_t gpa; 108 size_t len; 109 vm_ooffset_t segoff; 110 int segid; 111 int prot; 112 int flags; 113 }; 114 #define VM_MAX_MEMMAPS 4 115 116 struct vmm_mmio_region { 117 uint64_t start; 118 uint64_t end; 119 mem_region_read_t read; 120 mem_region_write_t write; 121 }; 122 #define VM_MAX_MMIO_REGIONS 4 123 124 struct vmm_special_reg { 125 uint32_t esr_iss; 126 uint32_t esr_mask; 127 reg_read_t reg_read; 128 reg_write_t reg_write; 129 void *arg; 130 }; 131 #define VM_MAX_SPECIAL_REGS 16 132 133 /* 134 * Initialization: 135 * (o) initialized the first time the VM is created 136 * (i) initialized when VM is created and when it is reinitialized 137 * (x) initialized before use 138 */ 139 struct vm { 140 void *cookie; /* (i) cpu-specific data */ 141 volatile cpuset_t active_cpus; /* (i) active vcpus */ 142 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 143 int suspend; /* (i) stop VM execution */ 144 bool dying; /* (o) is dying */ 145 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 146 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 147 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 148 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 149 struct vmspace *vmspace; /* (o) guest's address space */ 150 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 151 struct vcpu **vcpu; /* (i) guest vcpus */ 152 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 153 /* (o) guest MMIO regions */ 154 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 155 /* The following describe the vm cpu topology */ 156 uint16_t sockets; /* (o) num of sockets */ 157 uint16_t cores; /* (o) num of cores/socket */ 158 uint16_t threads; /* (o) num of threads/core */ 159 uint16_t maxcpus; /* (o) max pluggable cpus */ 160 struct sx mem_segs_lock; /* (o) */ 161 struct sx vcpus_init_lock; /* (o) */ 162 }; 163 164 static bool vmm_initialized = false; 165 166 static int vm_handle_wfi(struct vcpu *vcpu, 167 struct vm_exit *vme, bool *retu); 168 169 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 170 171 /* statistics */ 172 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 173 174 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 175 176 static int vmm_ipinum; 177 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 178 "IPI vector used for vcpu notifications"); 179 180 struct vmm_regs { 181 uint64_t id_aa64afr0; 182 uint64_t id_aa64afr1; 183 uint64_t id_aa64dfr0; 184 uint64_t id_aa64dfr1; 185 uint64_t id_aa64isar0; 186 uint64_t id_aa64isar1; 187 uint64_t id_aa64isar2; 188 uint64_t id_aa64mmfr0; 189 uint64_t id_aa64mmfr1; 190 uint64_t id_aa64mmfr2; 191 uint64_t id_aa64pfr0; 192 uint64_t id_aa64pfr1; 193 }; 194 195 static const struct vmm_regs vmm_arch_regs_masks = { 196 .id_aa64dfr0 = 197 ID_AA64DFR0_CTX_CMPs_MASK | 198 ID_AA64DFR0_WRPs_MASK | 199 ID_AA64DFR0_BRPs_MASK | 200 ID_AA64DFR0_PMUVer_3 | 201 ID_AA64DFR0_DebugVer_8, 202 .id_aa64isar0 = 203 ID_AA64ISAR0_TLB_TLBIOSR | 204 ID_AA64ISAR0_SHA3_IMPL | 205 ID_AA64ISAR0_RDM_IMPL | 206 ID_AA64ISAR0_Atomic_IMPL | 207 ID_AA64ISAR0_CRC32_BASE | 208 ID_AA64ISAR0_SHA2_512 | 209 ID_AA64ISAR0_SHA1_BASE | 210 ID_AA64ISAR0_AES_PMULL, 211 .id_aa64mmfr0 = 212 ID_AA64MMFR0_TGran4_IMPL | 213 ID_AA64MMFR0_TGran64_IMPL | 214 ID_AA64MMFR0_TGran16_IMPL | 215 ID_AA64MMFR0_ASIDBits_16 | 216 ID_AA64MMFR0_PARange_4P, 217 .id_aa64mmfr1 = 218 ID_AA64MMFR1_SpecSEI_IMPL | 219 ID_AA64MMFR1_PAN_ATS1E1 | 220 ID_AA64MMFR1_HAFDBS_AF, 221 .id_aa64pfr0 = 222 ID_AA64PFR0_GIC_CPUIF_NONE | 223 ID_AA64PFR0_AdvSIMD_HP | 224 ID_AA64PFR0_FP_HP | 225 ID_AA64PFR0_EL3_64 | 226 ID_AA64PFR0_EL2_64 | 227 ID_AA64PFR0_EL1_64 | 228 ID_AA64PFR0_EL0_64, 229 }; 230 231 /* Host registers masked by vmm_arch_regs_masks. */ 232 static struct vmm_regs vmm_arch_regs; 233 234 u_int vm_maxcpu; 235 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 236 &vm_maxcpu, 0, "Maximum number of vCPUs"); 237 238 static void vm_free_memmap(struct vm *vm, int ident); 239 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 240 static void vcpu_notify_event_locked(struct vcpu *vcpu); 241 242 /* global statistics */ 243 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 244 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 245 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 246 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 247 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 248 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 249 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 250 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 251 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 252 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 253 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 254 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 255 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 256 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 257 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 258 259 /* 260 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 261 * is a safe value for now. 262 */ 263 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 264 265 static int 266 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 267 { 268 #define _FETCH_KERN_REG(reg, field) do { \ 269 regs->field = vmm_arch_regs_masks.field; \ 270 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ 271 regs->field = 0; \ 272 } while (0) 273 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 274 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 275 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 276 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 277 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 278 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 279 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 280 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 281 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 282 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 283 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 284 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 285 #undef _FETCH_KERN_REG 286 return (0); 287 } 288 289 static void 290 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 291 { 292 vmmops_vcpu_cleanup(vcpu->cookie); 293 vcpu->cookie = NULL; 294 if (destroy) { 295 vmm_stat_free(vcpu->stats); 296 fpu_save_area_free(vcpu->guestfpu); 297 vcpu_lock_destroy(vcpu); 298 } 299 } 300 301 static struct vcpu * 302 vcpu_alloc(struct vm *vm, int vcpu_id) 303 { 304 struct vcpu *vcpu; 305 306 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 307 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 308 309 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 310 vcpu_lock_init(vcpu); 311 vcpu->state = VCPU_IDLE; 312 vcpu->hostcpu = NOCPU; 313 vcpu->vcpuid = vcpu_id; 314 vcpu->vm = vm; 315 vcpu->guestfpu = fpu_save_area_alloc(); 316 vcpu->stats = vmm_stat_alloc(); 317 return (vcpu); 318 } 319 320 static void 321 vcpu_init(struct vcpu *vcpu) 322 { 323 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 324 MPASS(vcpu->cookie != NULL); 325 fpu_save_area_reset(vcpu->guestfpu); 326 vmm_stat_init(vcpu->stats); 327 } 328 329 struct vm_exit * 330 vm_exitinfo(struct vcpu *vcpu) 331 { 332 return (&vcpu->exitinfo); 333 } 334 335 static int 336 vmm_init(void) 337 { 338 int error; 339 340 vm_maxcpu = mp_ncpus; 341 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 342 343 if (vm_maxcpu > VM_MAXCPU) { 344 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 345 vm_maxcpu = VM_MAXCPU; 346 } 347 if (vm_maxcpu == 0) 348 vm_maxcpu = 1; 349 350 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 351 if (error != 0) 352 return (error); 353 354 return (vmmops_modinit(0)); 355 } 356 357 static int 358 vmm_handler(module_t mod, int what, void *arg) 359 { 360 int error; 361 362 switch (what) { 363 case MOD_LOAD: 364 /* TODO: if (vmm_is_hw_supported()) { */ 365 error = vmmdev_init(); 366 if (error != 0) 367 break; 368 error = vmm_init(); 369 if (error == 0) 370 vmm_initialized = true; 371 break; 372 case MOD_UNLOAD: 373 /* TODO: if (vmm_is_hw_supported()) { */ 374 error = vmmdev_cleanup(); 375 if (error == 0 && vmm_initialized) { 376 error = vmmops_modcleanup(); 377 if (error) 378 vmm_initialized = false; 379 } 380 break; 381 default: 382 error = 0; 383 break; 384 } 385 return (error); 386 } 387 388 static moduledata_t vmm_kmod = { 389 "vmm", 390 vmm_handler, 391 NULL 392 }; 393 394 /* 395 * vmm initialization has the following dependencies: 396 * 397 * - HYP initialization requires smp_rendezvous() and therefore must happen 398 * after SMP is fully functional (after SI_SUB_SMP). 399 */ 400 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 401 MODULE_VERSION(vmm, 1); 402 403 static void 404 vm_init(struct vm *vm, bool create) 405 { 406 int i; 407 408 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 409 MPASS(vm->cookie != NULL); 410 411 CPU_ZERO(&vm->active_cpus); 412 CPU_ZERO(&vm->debug_cpus); 413 414 vm->suspend = 0; 415 CPU_ZERO(&vm->suspended_cpus); 416 417 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 418 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 419 420 if (!create) { 421 for (i = 0; i < vm->maxcpus; i++) { 422 if (vm->vcpu[i] != NULL) 423 vcpu_init(vm->vcpu[i]); 424 } 425 } 426 } 427 428 void 429 vm_disable_vcpu_creation(struct vm *vm) 430 { 431 sx_xlock(&vm->vcpus_init_lock); 432 vm->dying = true; 433 sx_xunlock(&vm->vcpus_init_lock); 434 } 435 436 struct vcpu * 437 vm_alloc_vcpu(struct vm *vm, int vcpuid) 438 { 439 struct vcpu *vcpu; 440 441 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 442 return (NULL); 443 444 /* Some interrupt controllers may have a CPU limit */ 445 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 446 return (NULL); 447 448 vcpu = (struct vcpu *) 449 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 450 if (__predict_true(vcpu != NULL)) 451 return (vcpu); 452 453 sx_xlock(&vm->vcpus_init_lock); 454 vcpu = vm->vcpu[vcpuid]; 455 if (vcpu == NULL && !vm->dying) { 456 vcpu = vcpu_alloc(vm, vcpuid); 457 vcpu_init(vcpu); 458 459 /* 460 * Ensure vCPU is fully created before updating pointer 461 * to permit unlocked reads above. 462 */ 463 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 464 (uintptr_t)vcpu); 465 } 466 sx_xunlock(&vm->vcpus_init_lock); 467 return (vcpu); 468 } 469 470 void 471 vm_slock_vcpus(struct vm *vm) 472 { 473 sx_slock(&vm->vcpus_init_lock); 474 } 475 476 void 477 vm_unlock_vcpus(struct vm *vm) 478 { 479 sx_unlock(&vm->vcpus_init_lock); 480 } 481 482 int 483 vm_create(const char *name, struct vm **retvm) 484 { 485 struct vm *vm; 486 struct vmspace *vmspace; 487 488 /* 489 * If vmm.ko could not be successfully initialized then don't attempt 490 * to create the virtual machine. 491 */ 492 if (!vmm_initialized) 493 return (ENXIO); 494 495 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 496 return (EINVAL); 497 498 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 499 if (vmspace == NULL) 500 return (ENOMEM); 501 502 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 503 strcpy(vm->name, name); 504 vm->vmspace = vmspace; 505 sx_init(&vm->mem_segs_lock, "vm mem_segs"); 506 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 507 508 vm->sockets = 1; 509 vm->cores = 1; /* XXX backwards compatibility */ 510 vm->threads = 1; /* XXX backwards compatibility */ 511 vm->maxcpus = vm_maxcpu; 512 513 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 514 M_WAITOK | M_ZERO); 515 516 vm_init(vm, true); 517 518 *retvm = vm; 519 return (0); 520 } 521 522 void 523 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 524 uint16_t *threads, uint16_t *maxcpus) 525 { 526 *sockets = vm->sockets; 527 *cores = vm->cores; 528 *threads = vm->threads; 529 *maxcpus = vm->maxcpus; 530 } 531 532 uint16_t 533 vm_get_maxcpus(struct vm *vm) 534 { 535 return (vm->maxcpus); 536 } 537 538 int 539 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 540 uint16_t threads, uint16_t maxcpus) 541 { 542 /* Ignore maxcpus. */ 543 if ((sockets * cores * threads) > vm->maxcpus) 544 return (EINVAL); 545 vm->sockets = sockets; 546 vm->cores = cores; 547 vm->threads = threads; 548 return(0); 549 } 550 551 static void 552 vm_cleanup(struct vm *vm, bool destroy) 553 { 554 struct mem_map *mm; 555 pmap_t pmap __diagused; 556 int i; 557 558 if (destroy) { 559 pmap = vmspace_pmap(vm->vmspace); 560 sched_pin(); 561 PCPU_SET(curvmpmap, NULL); 562 sched_unpin(); 563 CPU_FOREACH(i) { 564 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 565 } 566 } 567 568 vgic_detach_from_vm(vm->cookie); 569 570 for (i = 0; i < vm->maxcpus; i++) { 571 if (vm->vcpu[i] != NULL) 572 vcpu_cleanup(vm->vcpu[i], destroy); 573 } 574 575 vmmops_cleanup(vm->cookie); 576 577 /* 578 * System memory is removed from the guest address space only when 579 * the VM is destroyed. This is because the mapping remains the same 580 * across VM reset. 581 * 582 * Device memory can be relocated by the guest (e.g. using PCI BARs) 583 * so those mappings are removed on a VM reset. 584 */ 585 if (!destroy) { 586 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 587 mm = &vm->mem_maps[i]; 588 if (destroy || !sysmem_mapping(vm, mm)) 589 vm_free_memmap(vm, i); 590 } 591 } 592 593 if (destroy) { 594 for (i = 0; i < VM_MAX_MEMSEGS; i++) 595 vm_free_memseg(vm, i); 596 597 vmmops_vmspace_free(vm->vmspace); 598 vm->vmspace = NULL; 599 600 for (i = 0; i < vm->maxcpus; i++) 601 free(vm->vcpu[i], M_VMM); 602 free(vm->vcpu, M_VMM); 603 sx_destroy(&vm->vcpus_init_lock); 604 sx_destroy(&vm->mem_segs_lock); 605 } 606 } 607 608 void 609 vm_destroy(struct vm *vm) 610 { 611 vm_cleanup(vm, true); 612 free(vm, M_VMM); 613 } 614 615 int 616 vm_reinit(struct vm *vm) 617 { 618 int error; 619 620 /* 621 * A virtual machine can be reset only if all vcpus are suspended. 622 */ 623 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 624 vm_cleanup(vm, false); 625 vm_init(vm, false); 626 error = 0; 627 } else { 628 error = EBUSY; 629 } 630 631 return (error); 632 } 633 634 const char * 635 vm_name(struct vm *vm) 636 { 637 return (vm->name); 638 } 639 640 void 641 vm_slock_memsegs(struct vm *vm) 642 { 643 sx_slock(&vm->mem_segs_lock); 644 } 645 646 void 647 vm_xlock_memsegs(struct vm *vm) 648 { 649 sx_xlock(&vm->mem_segs_lock); 650 } 651 652 void 653 vm_unlock_memsegs(struct vm *vm) 654 { 655 sx_unlock(&vm->mem_segs_lock); 656 } 657 658 /* 659 * Return 'true' if 'gpa' is allocated in the guest address space. 660 * 661 * This function is called in the context of a running vcpu which acts as 662 * an implicit lock on 'vm->mem_maps[]'. 663 */ 664 bool 665 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 666 { 667 struct vm *vm = vcpu->vm; 668 struct mem_map *mm; 669 int i; 670 671 #ifdef INVARIANTS 672 int hostcpu, state; 673 state = vcpu_get_state(vcpu, &hostcpu); 674 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 675 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 676 #endif 677 678 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 679 mm = &vm->mem_maps[i]; 680 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 681 return (true); /* 'gpa' is sysmem or devmem */ 682 } 683 684 return (false); 685 } 686 687 int 688 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 689 { 690 struct mem_seg *seg; 691 vm_object_t obj; 692 693 sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 694 695 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 696 return (EINVAL); 697 698 if (len == 0 || (len & PAGE_MASK)) 699 return (EINVAL); 700 701 seg = &vm->mem_segs[ident]; 702 if (seg->object != NULL) { 703 if (seg->len == len && seg->sysmem == sysmem) 704 return (EEXIST); 705 else 706 return (EINVAL); 707 } 708 709 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 710 if (obj == NULL) 711 return (ENOMEM); 712 713 seg->len = len; 714 seg->object = obj; 715 seg->sysmem = sysmem; 716 return (0); 717 } 718 719 int 720 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 721 vm_object_t *objptr) 722 { 723 struct mem_seg *seg; 724 725 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 726 727 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 728 return (EINVAL); 729 730 seg = &vm->mem_segs[ident]; 731 if (len) 732 *len = seg->len; 733 if (sysmem) 734 *sysmem = seg->sysmem; 735 if (objptr) 736 *objptr = seg->object; 737 return (0); 738 } 739 740 void 741 vm_free_memseg(struct vm *vm, int ident) 742 { 743 struct mem_seg *seg; 744 745 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 746 ("%s: invalid memseg ident %d", __func__, ident)); 747 748 seg = &vm->mem_segs[ident]; 749 if (seg->object != NULL) { 750 vm_object_deallocate(seg->object); 751 bzero(seg, sizeof(struct mem_seg)); 752 } 753 } 754 755 int 756 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 757 size_t len, int prot, int flags) 758 { 759 struct mem_seg *seg; 760 struct mem_map *m, *map; 761 vm_ooffset_t last; 762 int i, error; 763 764 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 765 return (EINVAL); 766 767 if (flags & ~VM_MEMMAP_F_WIRED) 768 return (EINVAL); 769 770 if (segid < 0 || segid >= VM_MAX_MEMSEGS) 771 return (EINVAL); 772 773 seg = &vm->mem_segs[segid]; 774 if (seg->object == NULL) 775 return (EINVAL); 776 777 last = first + len; 778 if (first < 0 || first >= last || last > seg->len) 779 return (EINVAL); 780 781 if ((gpa | first | last) & PAGE_MASK) 782 return (EINVAL); 783 784 map = NULL; 785 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 786 m = &vm->mem_maps[i]; 787 if (m->len == 0) { 788 map = m; 789 break; 790 } 791 } 792 793 if (map == NULL) 794 return (ENOSPC); 795 796 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 797 len, 0, VMFS_NO_SPACE, prot, prot, 0); 798 if (error != KERN_SUCCESS) 799 return (EFAULT); 800 801 vm_object_reference(seg->object); 802 803 if (flags & VM_MEMMAP_F_WIRED) { 804 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 805 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 806 if (error != KERN_SUCCESS) { 807 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 808 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 809 EFAULT); 810 } 811 } 812 813 map->gpa = gpa; 814 map->len = len; 815 map->segoff = first; 816 map->segid = segid; 817 map->prot = prot; 818 map->flags = flags; 819 return (0); 820 } 821 822 int 823 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 824 { 825 struct mem_map *m; 826 int i; 827 828 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 829 m = &vm->mem_maps[i]; 830 if (m->gpa == gpa && m->len == len) { 831 vm_free_memmap(vm, i); 832 return (0); 833 } 834 } 835 836 return (EINVAL); 837 } 838 839 int 840 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 841 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 842 { 843 struct mem_map *mm, *mmnext; 844 int i; 845 846 mmnext = NULL; 847 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 848 mm = &vm->mem_maps[i]; 849 if (mm->len == 0 || mm->gpa < *gpa) 850 continue; 851 if (mmnext == NULL || mm->gpa < mmnext->gpa) 852 mmnext = mm; 853 } 854 855 if (mmnext != NULL) { 856 *gpa = mmnext->gpa; 857 if (segid) 858 *segid = mmnext->segid; 859 if (segoff) 860 *segoff = mmnext->segoff; 861 if (len) 862 *len = mmnext->len; 863 if (prot) 864 *prot = mmnext->prot; 865 if (flags) 866 *flags = mmnext->flags; 867 return (0); 868 } else { 869 return (ENOENT); 870 } 871 } 872 873 static void 874 vm_free_memmap(struct vm *vm, int ident) 875 { 876 struct mem_map *mm; 877 int error __diagused; 878 879 mm = &vm->mem_maps[ident]; 880 if (mm->len) { 881 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 882 mm->gpa + mm->len); 883 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 884 __func__, error)); 885 bzero(mm, sizeof(struct mem_map)); 886 } 887 } 888 889 static __inline bool 890 sysmem_mapping(struct vm *vm, struct mem_map *mm) 891 { 892 893 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 894 return (true); 895 else 896 return (false); 897 } 898 899 vm_paddr_t 900 vmm_sysmem_maxaddr(struct vm *vm) 901 { 902 struct mem_map *mm; 903 vm_paddr_t maxaddr; 904 int i; 905 906 maxaddr = 0; 907 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 908 mm = &vm->mem_maps[i]; 909 if (sysmem_mapping(vm, mm)) { 910 if (maxaddr < mm->gpa + mm->len) 911 maxaddr = mm->gpa + mm->len; 912 } 913 } 914 return (maxaddr); 915 } 916 917 int 918 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 919 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 920 { 921 922 vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 923 return (0); 924 } 925 926 static int 927 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 928 { 929 *rval = 0; 930 return (0); 931 } 932 933 static int 934 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 935 { 936 *rval = *(uint64_t *)arg; 937 return (0); 938 } 939 940 static int 941 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 942 { 943 return (0); 944 } 945 946 static const struct vmm_special_reg vmm_special_regs[] = { 947 #define SPECIAL_REG(_reg, _read, _write) \ 948 { \ 949 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 950 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 951 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 952 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 953 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 954 .esr_mask = ISS_MSR_REG_MASK, \ 955 .reg_read = (_read), \ 956 .reg_write = (_write), \ 957 .arg = NULL, \ 958 } 959 #define ID_SPECIAL_REG(_reg, _name) \ 960 { \ 961 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 962 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 963 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 964 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 965 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 966 .esr_mask = ISS_MSR_REG_MASK, \ 967 .reg_read = vmm_reg_read_arg, \ 968 .reg_write = vmm_reg_wi, \ 969 .arg = &(vmm_arch_regs._name), \ 970 } 971 972 /* ID registers */ 973 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 974 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 975 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 976 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 977 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 978 979 /* 980 * All other ID registers are read as zero. 981 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 982 */ 983 { 984 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 985 (0 << ISS_MSR_OP1_SHIFT) | 986 (0 << ISS_MSR_CRn_SHIFT) | 987 (0 << ISS_MSR_CRm_SHIFT), 988 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 989 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 990 .reg_read = vmm_reg_raz, 991 .reg_write = vmm_reg_wi, 992 .arg = NULL, 993 }, 994 995 /* Counter physical registers */ 996 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 997 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 998 vtimer_phys_cval_write), 999 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 1000 vtimer_phys_tval_write), 1001 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 1002 #undef SPECIAL_REG 1003 }; 1004 1005 void 1006 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 1007 reg_read_t reg_read, reg_write_t reg_write, void *arg) 1008 { 1009 int i; 1010 1011 for (i = 0; i < nitems(vm->special_reg); i++) { 1012 if (vm->special_reg[i].esr_iss == 0 && 1013 vm->special_reg[i].esr_mask == 0) { 1014 vm->special_reg[i].esr_iss = iss; 1015 vm->special_reg[i].esr_mask = mask; 1016 vm->special_reg[i].reg_read = reg_read; 1017 vm->special_reg[i].reg_write = reg_write; 1018 vm->special_reg[i].arg = arg; 1019 return; 1020 } 1021 } 1022 1023 panic("%s: No free special register slot", __func__); 1024 } 1025 1026 void 1027 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 1028 { 1029 int i; 1030 1031 for (i = 0; i < nitems(vm->special_reg); i++) { 1032 if (vm->special_reg[i].esr_iss == iss && 1033 vm->special_reg[i].esr_mask == mask) { 1034 memset(&vm->special_reg[i], 0, 1035 sizeof(vm->special_reg[i])); 1036 return; 1037 } 1038 } 1039 1040 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 1041 mask); 1042 } 1043 1044 static int 1045 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 1046 { 1047 struct vm *vm; 1048 struct vm_exit *vme; 1049 struct vre *vre; 1050 int i, rv; 1051 1052 vm = vcpu->vm; 1053 vme = &vcpu->exitinfo; 1054 vre = &vme->u.reg_emul.vre; 1055 1056 for (i = 0; i < nitems(vm->special_reg); i++) { 1057 if (vm->special_reg[i].esr_iss == 0 && 1058 vm->special_reg[i].esr_mask == 0) 1059 continue; 1060 1061 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 1062 vm->special_reg[i].esr_iss) { 1063 rv = vmm_emulate_register(vcpu, vre, 1064 vm->special_reg[i].reg_read, 1065 vm->special_reg[i].reg_write, 1066 vm->special_reg[i].arg); 1067 if (rv == 0) { 1068 *retu = false; 1069 } 1070 return (rv); 1071 } 1072 } 1073 for (i = 0; i < nitems(vmm_special_regs); i++) { 1074 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 1075 vmm_special_regs[i].esr_iss) { 1076 rv = vmm_emulate_register(vcpu, vre, 1077 vmm_special_regs[i].reg_read, 1078 vmm_special_regs[i].reg_write, 1079 vmm_special_regs[i].arg); 1080 if (rv == 0) { 1081 *retu = false; 1082 } 1083 return (rv); 1084 } 1085 } 1086 1087 1088 *retu = true; 1089 return (0); 1090 } 1091 1092 void 1093 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 1094 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 1095 { 1096 int i; 1097 1098 for (i = 0; i < nitems(vm->mmio_region); i++) { 1099 if (vm->mmio_region[i].start == 0 && 1100 vm->mmio_region[i].end == 0) { 1101 vm->mmio_region[i].start = start; 1102 vm->mmio_region[i].end = start + size; 1103 vm->mmio_region[i].read = mmio_read; 1104 vm->mmio_region[i].write = mmio_write; 1105 return; 1106 } 1107 } 1108 1109 panic("%s: No free MMIO region", __func__); 1110 } 1111 1112 void 1113 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 1114 { 1115 int i; 1116 1117 for (i = 0; i < nitems(vm->mmio_region); i++) { 1118 if (vm->mmio_region[i].start == start && 1119 vm->mmio_region[i].end == start + size) { 1120 memset(&vm->mmio_region[i], 0, 1121 sizeof(vm->mmio_region[i])); 1122 return; 1123 } 1124 } 1125 1126 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 1127 start + size); 1128 } 1129 1130 static int 1131 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 1132 { 1133 struct vm *vm; 1134 struct vm_exit *vme; 1135 struct vie *vie; 1136 struct hyp *hyp; 1137 uint64_t fault_ipa; 1138 struct vm_guest_paging *paging; 1139 struct vmm_mmio_region *vmr; 1140 int error, i; 1141 1142 vm = vcpu->vm; 1143 hyp = vm->cookie; 1144 if (!hyp->vgic_attached) 1145 goto out_user; 1146 1147 vme = &vcpu->exitinfo; 1148 vie = &vme->u.inst_emul.vie; 1149 paging = &vme->u.inst_emul.paging; 1150 1151 fault_ipa = vme->u.inst_emul.gpa; 1152 1153 vmr = NULL; 1154 for (i = 0; i < nitems(vm->mmio_region); i++) { 1155 if (vm->mmio_region[i].start <= fault_ipa && 1156 vm->mmio_region[i].end > fault_ipa) { 1157 vmr = &vm->mmio_region[i]; 1158 break; 1159 } 1160 } 1161 if (vmr == NULL) 1162 goto out_user; 1163 1164 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 1165 vmr->read, vmr->write, retu); 1166 return (error); 1167 1168 out_user: 1169 *retu = true; 1170 return (0); 1171 } 1172 1173 int 1174 vm_suspend(struct vm *vm, enum vm_suspend_how how) 1175 { 1176 int i; 1177 1178 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1179 return (EINVAL); 1180 1181 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1182 VM_CTR2(vm, "virtual machine already suspended %d/%d", 1183 vm->suspend, how); 1184 return (EALREADY); 1185 } 1186 1187 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1188 1189 /* 1190 * Notify all active vcpus that they are now suspended. 1191 */ 1192 for (i = 0; i < vm->maxcpus; i++) { 1193 if (CPU_ISSET(i, &vm->active_cpus)) 1194 vcpu_notify_event(vm_vcpu(vm, i)); 1195 } 1196 1197 return (0); 1198 } 1199 1200 void 1201 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 1202 { 1203 struct vm *vm = vcpu->vm; 1204 struct vm_exit *vmexit; 1205 1206 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1207 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1208 1209 vmexit = vm_exitinfo(vcpu); 1210 vmexit->pc = pc; 1211 vmexit->inst_length = 4; 1212 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1213 vmexit->u.suspended.how = vm->suspend; 1214 } 1215 1216 void 1217 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 1218 { 1219 struct vm_exit *vmexit; 1220 1221 vmexit = vm_exitinfo(vcpu); 1222 vmexit->pc = pc; 1223 vmexit->inst_length = 4; 1224 vmexit->exitcode = VM_EXITCODE_DEBUG; 1225 } 1226 1227 int 1228 vm_activate_cpu(struct vcpu *vcpu) 1229 { 1230 struct vm *vm = vcpu->vm; 1231 1232 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1233 return (EBUSY); 1234 1235 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 1236 return (0); 1237 1238 } 1239 1240 int 1241 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 1242 { 1243 if (vcpu == NULL) { 1244 vm->debug_cpus = vm->active_cpus; 1245 for (int i = 0; i < vm->maxcpus; i++) { 1246 if (CPU_ISSET(i, &vm->active_cpus)) 1247 vcpu_notify_event(vm_vcpu(vm, i)); 1248 } 1249 } else { 1250 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1251 return (EINVAL); 1252 1253 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1254 vcpu_notify_event(vcpu); 1255 } 1256 return (0); 1257 } 1258 1259 int 1260 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 1261 { 1262 1263 if (vcpu == NULL) { 1264 CPU_ZERO(&vm->debug_cpus); 1265 } else { 1266 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1267 return (EINVAL); 1268 1269 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1270 } 1271 return (0); 1272 } 1273 1274 int 1275 vcpu_debugged(struct vcpu *vcpu) 1276 { 1277 1278 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1279 } 1280 1281 cpuset_t 1282 vm_active_cpus(struct vm *vm) 1283 { 1284 1285 return (vm->active_cpus); 1286 } 1287 1288 cpuset_t 1289 vm_debug_cpus(struct vm *vm) 1290 { 1291 1292 return (vm->debug_cpus); 1293 } 1294 1295 cpuset_t 1296 vm_suspended_cpus(struct vm *vm) 1297 { 1298 1299 return (vm->suspended_cpus); 1300 } 1301 1302 1303 void * 1304 vcpu_stats(struct vcpu *vcpu) 1305 { 1306 1307 return (vcpu->stats); 1308 } 1309 1310 /* 1311 * This function is called to ensure that a vcpu "sees" a pending event 1312 * as soon as possible: 1313 * - If the vcpu thread is sleeping then it is woken up. 1314 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1315 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1316 */ 1317 static void 1318 vcpu_notify_event_locked(struct vcpu *vcpu) 1319 { 1320 int hostcpu; 1321 1322 hostcpu = vcpu->hostcpu; 1323 if (vcpu->state == VCPU_RUNNING) { 1324 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1325 if (hostcpu != curcpu) { 1326 ipi_cpu(hostcpu, vmm_ipinum); 1327 } else { 1328 /* 1329 * If the 'vcpu' is running on 'curcpu' then it must 1330 * be sending a notification to itself (e.g. SELF_IPI). 1331 * The pending event will be picked up when the vcpu 1332 * transitions back to guest context. 1333 */ 1334 } 1335 } else { 1336 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1337 "with hostcpu %d", vcpu->state, hostcpu)); 1338 if (vcpu->state == VCPU_SLEEPING) 1339 wakeup_one(vcpu); 1340 } 1341 } 1342 1343 void 1344 vcpu_notify_event(struct vcpu *vcpu) 1345 { 1346 vcpu_lock(vcpu); 1347 vcpu_notify_event_locked(vcpu); 1348 vcpu_unlock(vcpu); 1349 } 1350 1351 static void 1352 restore_guest_fpustate(struct vcpu *vcpu) 1353 { 1354 1355 /* flush host state to the pcb */ 1356 vfp_save_state(curthread, curthread->td_pcb); 1357 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1358 PCPU_SET(fpcurthread, NULL); 1359 1360 /* restore guest FPU state */ 1361 vfp_enable(); 1362 vfp_restore(vcpu->guestfpu); 1363 1364 /* 1365 * The FPU is now "dirty" with the guest's state so turn on emulation 1366 * to trap any access to the FPU by the host. 1367 */ 1368 vfp_disable(); 1369 } 1370 1371 static void 1372 save_guest_fpustate(struct vcpu *vcpu) 1373 { 1374 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1375 CPACR_FPEN_TRAP_ALL1) 1376 panic("VFP not enabled in host!"); 1377 1378 /* save guest FPU state */ 1379 vfp_enable(); 1380 vfp_store(vcpu->guestfpu); 1381 vfp_disable(); 1382 1383 KASSERT(PCPU_GET(fpcurthread) == NULL, 1384 ("%s: fpcurthread set with guest registers", __func__)); 1385 } 1386 static int 1387 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1388 bool from_idle) 1389 { 1390 int error; 1391 1392 vcpu_assert_locked(vcpu); 1393 1394 /* 1395 * State transitions from the vmmdev_ioctl() must always begin from 1396 * the VCPU_IDLE state. This guarantees that there is only a single 1397 * ioctl() operating on a vcpu at any point. 1398 */ 1399 if (from_idle) { 1400 while (vcpu->state != VCPU_IDLE) { 1401 vcpu_notify_event_locked(vcpu); 1402 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1403 } 1404 } else { 1405 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1406 "vcpu idle state")); 1407 } 1408 1409 if (vcpu->state == VCPU_RUNNING) { 1410 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1411 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1412 } else { 1413 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1414 "vcpu that is not running", vcpu->hostcpu)); 1415 } 1416 1417 /* 1418 * The following state transitions are allowed: 1419 * IDLE -> FROZEN -> IDLE 1420 * FROZEN -> RUNNING -> FROZEN 1421 * FROZEN -> SLEEPING -> FROZEN 1422 */ 1423 switch (vcpu->state) { 1424 case VCPU_IDLE: 1425 case VCPU_RUNNING: 1426 case VCPU_SLEEPING: 1427 error = (newstate != VCPU_FROZEN); 1428 break; 1429 case VCPU_FROZEN: 1430 error = (newstate == VCPU_FROZEN); 1431 break; 1432 default: 1433 error = 1; 1434 break; 1435 } 1436 1437 if (error) 1438 return (EBUSY); 1439 1440 vcpu->state = newstate; 1441 if (newstate == VCPU_RUNNING) 1442 vcpu->hostcpu = curcpu; 1443 else 1444 vcpu->hostcpu = NOCPU; 1445 1446 if (newstate == VCPU_IDLE) 1447 wakeup(&vcpu->state); 1448 1449 return (0); 1450 } 1451 1452 static void 1453 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1454 { 1455 int error; 1456 1457 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1458 panic("Error %d setting state to %d\n", error, newstate); 1459 } 1460 1461 static void 1462 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1463 { 1464 int error; 1465 1466 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1467 panic("Error %d setting state to %d", error, newstate); 1468 } 1469 1470 int 1471 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1472 { 1473 if (type < 0 || type >= VM_CAP_MAX) 1474 return (EINVAL); 1475 1476 return (vmmops_getcap(vcpu->cookie, type, retval)); 1477 } 1478 1479 int 1480 vm_set_capability(struct vcpu *vcpu, int type, int val) 1481 { 1482 if (type < 0 || type >= VM_CAP_MAX) 1483 return (EINVAL); 1484 1485 return (vmmops_setcap(vcpu->cookie, type, val)); 1486 } 1487 1488 struct vm * 1489 vcpu_vm(struct vcpu *vcpu) 1490 { 1491 return (vcpu->vm); 1492 } 1493 1494 int 1495 vcpu_vcpuid(struct vcpu *vcpu) 1496 { 1497 return (vcpu->vcpuid); 1498 } 1499 1500 void * 1501 vcpu_get_cookie(struct vcpu *vcpu) 1502 { 1503 return (vcpu->cookie); 1504 } 1505 1506 struct vcpu * 1507 vm_vcpu(struct vm *vm, int vcpuid) 1508 { 1509 return (vm->vcpu[vcpuid]); 1510 } 1511 1512 int 1513 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1514 { 1515 int error; 1516 1517 vcpu_lock(vcpu); 1518 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1519 vcpu_unlock(vcpu); 1520 1521 return (error); 1522 } 1523 1524 enum vcpu_state 1525 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1526 { 1527 enum vcpu_state state; 1528 1529 vcpu_lock(vcpu); 1530 state = vcpu->state; 1531 if (hostcpu != NULL) 1532 *hostcpu = vcpu->hostcpu; 1533 vcpu_unlock(vcpu); 1534 1535 return (state); 1536 } 1537 1538 static void * 1539 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1540 void **cookie) 1541 { 1542 int i, count, pageoff; 1543 struct mem_map *mm; 1544 vm_page_t m; 1545 1546 pageoff = gpa & PAGE_MASK; 1547 if (len > PAGE_SIZE - pageoff) 1548 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1549 1550 count = 0; 1551 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1552 mm = &vm->mem_maps[i]; 1553 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1554 gpa < mm->gpa + mm->len) { 1555 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1556 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1557 break; 1558 } 1559 } 1560 1561 if (count == 1) { 1562 *cookie = m; 1563 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1564 } else { 1565 *cookie = NULL; 1566 return (NULL); 1567 } 1568 } 1569 1570 void * 1571 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1572 void **cookie) 1573 { 1574 #ifdef INVARIANTS 1575 /* 1576 * The current vcpu should be frozen to ensure 'vm_memmap[]' 1577 * stability. 1578 */ 1579 int state = vcpu_get_state(vcpu, NULL); 1580 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1581 __func__, state)); 1582 #endif 1583 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1584 } 1585 1586 void * 1587 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1588 void **cookie) 1589 { 1590 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1591 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1592 } 1593 1594 void 1595 vm_gpa_release(void *cookie) 1596 { 1597 vm_page_t m = cookie; 1598 1599 vm_page_unwire(m, PQ_ACTIVE); 1600 } 1601 1602 int 1603 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1604 { 1605 1606 if (reg >= VM_REG_LAST) 1607 return (EINVAL); 1608 1609 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1610 } 1611 1612 int 1613 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1614 { 1615 int error; 1616 1617 if (reg >= VM_REG_LAST) 1618 return (EINVAL); 1619 error = vmmops_setreg(vcpu->cookie, reg, val); 1620 if (error || reg != VM_REG_GUEST_PC) 1621 return (error); 1622 1623 vcpu->nextpc = val; 1624 1625 return (0); 1626 } 1627 1628 void * 1629 vm_get_cookie(struct vm *vm) 1630 { 1631 return (vm->cookie); 1632 } 1633 1634 int 1635 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1636 { 1637 return (vmmops_exception(vcpu->cookie, esr, far)); 1638 } 1639 1640 int 1641 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1642 { 1643 return (vgic_attach_to_vm(vm->cookie, descr)); 1644 } 1645 1646 int 1647 vm_assert_irq(struct vm *vm, uint32_t irq) 1648 { 1649 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1650 } 1651 1652 int 1653 vm_deassert_irq(struct vm *vm, uint32_t irq) 1654 { 1655 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1656 } 1657 1658 int 1659 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1660 int func) 1661 { 1662 /* TODO: Should we raise an SError? */ 1663 return (vgic_inject_msi(vm->cookie, msg, addr)); 1664 } 1665 1666 static int 1667 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1668 { 1669 struct hypctx *hypctx; 1670 int i; 1671 1672 hypctx = vcpu_get_cookie(vcpu); 1673 1674 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1675 return (1); 1676 1677 vme->exitcode = VM_EXITCODE_SMCCC; 1678 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1679 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1680 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1681 1682 *retu = true; 1683 return (0); 1684 } 1685 1686 static int 1687 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1688 { 1689 vcpu_lock(vcpu); 1690 while (1) { 1691 if (vgic_has_pending_irq(vcpu->cookie)) 1692 break; 1693 1694 if (vcpu_should_yield(vcpu)) 1695 break; 1696 1697 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1698 /* 1699 * XXX msleep_spin() cannot be interrupted by signals so 1700 * wake up periodically to check pending signals. 1701 */ 1702 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1703 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1704 } 1705 vcpu_unlock(vcpu); 1706 1707 *retu = false; 1708 return (0); 1709 } 1710 1711 static int 1712 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1713 { 1714 struct vm *vm = vcpu->vm; 1715 struct vm_exit *vme; 1716 struct vm_map *map; 1717 uint64_t addr, esr; 1718 pmap_t pmap; 1719 int ftype, rv; 1720 1721 vme = &vcpu->exitinfo; 1722 1723 pmap = vmspace_pmap(vcpu->vm->vmspace); 1724 addr = vme->u.paging.gpa; 1725 esr = vme->u.paging.esr; 1726 1727 /* The page exists, but the page table needs to be updated. */ 1728 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1729 return (0); 1730 1731 switch (ESR_ELx_EXCEPTION(esr)) { 1732 case EXCP_INSN_ABORT_L: 1733 case EXCP_DATA_ABORT_L: 1734 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1735 break; 1736 default: 1737 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1738 } 1739 1740 map = &vm->vmspace->vm_map; 1741 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1742 if (rv != KERN_SUCCESS) 1743 return (EFAULT); 1744 1745 return (0); 1746 } 1747 1748 static int 1749 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1750 { 1751 struct vm *vm = vcpu->vm; 1752 int error, i; 1753 struct thread *td; 1754 1755 error = 0; 1756 td = curthread; 1757 1758 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1759 1760 /* 1761 * Wait until all 'active_cpus' have suspended themselves. 1762 * 1763 * Since a VM may be suspended at any time including when one or 1764 * more vcpus are doing a rendezvous we need to call the rendezvous 1765 * handler while we are waiting to prevent a deadlock. 1766 */ 1767 vcpu_lock(vcpu); 1768 while (error == 0) { 1769 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1770 break; 1771 1772 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1773 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1774 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1775 if (td_ast_pending(td, TDA_SUSPEND)) { 1776 vcpu_unlock(vcpu); 1777 error = thread_check_susp(td, false); 1778 vcpu_lock(vcpu); 1779 } 1780 } 1781 vcpu_unlock(vcpu); 1782 1783 /* 1784 * Wakeup the other sleeping vcpus and return to userspace. 1785 */ 1786 for (i = 0; i < vm->maxcpus; i++) { 1787 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1788 vcpu_notify_event(vm_vcpu(vm, i)); 1789 } 1790 } 1791 1792 *retu = true; 1793 return (error); 1794 } 1795 1796 int 1797 vm_run(struct vcpu *vcpu) 1798 { 1799 struct vm *vm = vcpu->vm; 1800 struct vm_eventinfo evinfo; 1801 int error, vcpuid; 1802 struct vm_exit *vme; 1803 bool retu; 1804 pmap_t pmap; 1805 1806 vcpuid = vcpu->vcpuid; 1807 1808 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1809 return (EINVAL); 1810 1811 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1812 return (EINVAL); 1813 1814 pmap = vmspace_pmap(vm->vmspace); 1815 vme = &vcpu->exitinfo; 1816 evinfo.rptr = NULL; 1817 evinfo.sptr = &vm->suspend; 1818 evinfo.iptr = NULL; 1819 restart: 1820 critical_enter(); 1821 1822 restore_guest_fpustate(vcpu); 1823 1824 vcpu_require_state(vcpu, VCPU_RUNNING); 1825 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1826 vcpu_require_state(vcpu, VCPU_FROZEN); 1827 1828 save_guest_fpustate(vcpu); 1829 1830 critical_exit(); 1831 1832 if (error == 0) { 1833 retu = false; 1834 switch (vme->exitcode) { 1835 case VM_EXITCODE_INST_EMUL: 1836 vcpu->nextpc = vme->pc + vme->inst_length; 1837 error = vm_handle_inst_emul(vcpu, &retu); 1838 break; 1839 1840 case VM_EXITCODE_REG_EMUL: 1841 vcpu->nextpc = vme->pc + vme->inst_length; 1842 error = vm_handle_reg_emul(vcpu, &retu); 1843 break; 1844 1845 case VM_EXITCODE_HVC: 1846 /* 1847 * The HVC instruction saves the address for the 1848 * next instruction as the return address. 1849 */ 1850 vcpu->nextpc = vme->pc; 1851 /* 1852 * The PSCI call can change the exit information in the 1853 * case of suspend/reset/poweroff/cpu off/cpu on. 1854 */ 1855 error = vm_handle_smccc_call(vcpu, vme, &retu); 1856 break; 1857 1858 case VM_EXITCODE_WFI: 1859 vcpu->nextpc = vme->pc + vme->inst_length; 1860 error = vm_handle_wfi(vcpu, vme, &retu); 1861 break; 1862 1863 case VM_EXITCODE_PAGING: 1864 vcpu->nextpc = vme->pc; 1865 error = vm_handle_paging(vcpu, &retu); 1866 break; 1867 1868 case VM_EXITCODE_SUSPENDED: 1869 vcpu->nextpc = vme->pc; 1870 error = vm_handle_suspend(vcpu, &retu); 1871 break; 1872 1873 default: 1874 /* Handle in userland */ 1875 vcpu->nextpc = vme->pc; 1876 retu = true; 1877 break; 1878 } 1879 } 1880 1881 if (error == 0 && retu == false) 1882 goto restart; 1883 1884 return (error); 1885 } 1886