1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_stat.h> 69 70 #include "arm64.h" 71 #include "mmu.h" 72 73 #include "io/vgic.h" 74 #include "io/vtimer.h" 75 76 struct vcpu { 77 int flags; 78 enum vcpu_state state; 79 struct mtx mtx; 80 int hostcpu; /* host cpuid this vcpu last ran on */ 81 int vcpuid; 82 void *stats; 83 struct vm_exit exitinfo; 84 uint64_t nextpc; /* (x) next instruction to execute */ 85 struct vm *vm; /* (o) */ 86 void *cookie; /* (i) cpu-specific data */ 87 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 88 }; 89 90 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 91 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 92 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 93 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 94 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 95 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 96 97 struct mem_seg { 98 uint64_t gpa; 99 size_t len; 100 bool wired; 101 bool sysmem; 102 vm_object_t object; 103 }; 104 #define VM_MAX_MEMSEGS 3 105 106 struct mem_map { 107 vm_paddr_t gpa; 108 size_t len; 109 vm_ooffset_t segoff; 110 int segid; 111 int prot; 112 int flags; 113 }; 114 #define VM_MAX_MEMMAPS 4 115 116 struct vmm_mmio_region { 117 uint64_t start; 118 uint64_t end; 119 mem_region_read_t read; 120 mem_region_write_t write; 121 }; 122 #define VM_MAX_MMIO_REGIONS 4 123 124 struct vmm_special_reg { 125 uint32_t esr_iss; 126 uint32_t esr_mask; 127 reg_read_t reg_read; 128 reg_write_t reg_write; 129 void *arg; 130 }; 131 #define VM_MAX_SPECIAL_REGS 16 132 133 /* 134 * Initialization: 135 * (o) initialized the first time the VM is created 136 * (i) initialized when VM is created and when it is reinitialized 137 * (x) initialized before use 138 */ 139 struct vm { 140 void *cookie; /* (i) cpu-specific data */ 141 volatile cpuset_t active_cpus; /* (i) active vcpus */ 142 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 143 int suspend; /* (i) stop VM execution */ 144 bool dying; /* (o) is dying */ 145 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 146 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 147 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 148 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 149 struct vmspace *vmspace; /* (o) guest's address space */ 150 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 151 struct vcpu **vcpu; /* (i) guest vcpus */ 152 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 153 /* (o) guest MMIO regions */ 154 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 155 /* The following describe the vm cpu topology */ 156 uint16_t sockets; /* (o) num of sockets */ 157 uint16_t cores; /* (o) num of cores/socket */ 158 uint16_t threads; /* (o) num of threads/core */ 159 uint16_t maxcpus; /* (o) max pluggable cpus */ 160 struct sx mem_segs_lock; /* (o) */ 161 struct sx vcpus_init_lock; /* (o) */ 162 }; 163 164 static bool vmm_initialized = false; 165 166 static int vm_handle_wfi(struct vcpu *vcpu, 167 struct vm_exit *vme, bool *retu); 168 169 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 170 171 /* statistics */ 172 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 173 174 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 175 176 static int vmm_ipinum; 177 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 178 "IPI vector used for vcpu notifications"); 179 180 struct vmm_regs { 181 uint64_t id_aa64afr0; 182 uint64_t id_aa64afr1; 183 uint64_t id_aa64dfr0; 184 uint64_t id_aa64dfr1; 185 uint64_t id_aa64isar0; 186 uint64_t id_aa64isar1; 187 uint64_t id_aa64isar2; 188 uint64_t id_aa64mmfr0; 189 uint64_t id_aa64mmfr1; 190 uint64_t id_aa64mmfr2; 191 uint64_t id_aa64pfr0; 192 uint64_t id_aa64pfr1; 193 }; 194 195 static const struct vmm_regs vmm_arch_regs_masks = { 196 .id_aa64dfr0 = 197 ID_AA64DFR0_CTX_CMPs_MASK | 198 ID_AA64DFR0_WRPs_MASK | 199 ID_AA64DFR0_BRPs_MASK | 200 ID_AA64DFR0_PMUVer_3 | 201 ID_AA64DFR0_DebugVer_8, 202 .id_aa64isar0 = 203 ID_AA64ISAR0_TLB_TLBIOSR | 204 ID_AA64ISAR0_SHA3_IMPL | 205 ID_AA64ISAR0_RDM_IMPL | 206 ID_AA64ISAR0_Atomic_IMPL | 207 ID_AA64ISAR0_CRC32_BASE | 208 ID_AA64ISAR0_SHA2_512 | 209 ID_AA64ISAR0_SHA1_BASE | 210 ID_AA64ISAR0_AES_PMULL, 211 .id_aa64mmfr0 = 212 ID_AA64MMFR0_TGran4_IMPL | 213 ID_AA64MMFR0_TGran64_IMPL | 214 ID_AA64MMFR0_TGran16_IMPL | 215 ID_AA64MMFR0_ASIDBits_16 | 216 ID_AA64MMFR0_PARange_4P, 217 .id_aa64mmfr1 = 218 ID_AA64MMFR1_SpecSEI_IMPL | 219 ID_AA64MMFR1_PAN_ATS1E1 | 220 ID_AA64MMFR1_HAFDBS_AF, 221 .id_aa64pfr0 = 222 ID_AA64PFR0_GIC_CPUIF_NONE | 223 ID_AA64PFR0_AdvSIMD_HP | 224 ID_AA64PFR0_FP_HP | 225 ID_AA64PFR0_EL3_64 | 226 ID_AA64PFR0_EL2_64 | 227 ID_AA64PFR0_EL1_64 | 228 ID_AA64PFR0_EL0_64, 229 }; 230 231 /* Host registers masked by vmm_arch_regs_masks. */ 232 static struct vmm_regs vmm_arch_regs; 233 234 u_int vm_maxcpu; 235 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 236 &vm_maxcpu, 0, "Maximum number of vCPUs"); 237 238 static void vm_free_memmap(struct vm *vm, int ident); 239 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 240 static void vcpu_notify_event_locked(struct vcpu *vcpu); 241 242 /* global statistics */ 243 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 244 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 245 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 246 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 247 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 248 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 249 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 250 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 251 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 252 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 253 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 254 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 255 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 256 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 257 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 258 259 /* 260 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 261 * is a safe value for now. 262 */ 263 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 264 265 static int 266 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 267 { 268 #define _FETCH_KERN_REG(reg, field) do { \ 269 regs->field = vmm_arch_regs_masks.field; \ 270 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ 271 regs->field = 0; \ 272 } while (0) 273 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 274 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 275 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 276 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 277 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 278 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 279 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 280 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 281 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 282 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 283 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 284 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 285 #undef _FETCH_KERN_REG 286 return (0); 287 } 288 289 static void 290 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 291 { 292 vmmops_vcpu_cleanup(vcpu->cookie); 293 vcpu->cookie = NULL; 294 if (destroy) { 295 vmm_stat_free(vcpu->stats); 296 fpu_save_area_free(vcpu->guestfpu); 297 vcpu_lock_destroy(vcpu); 298 } 299 } 300 301 static struct vcpu * 302 vcpu_alloc(struct vm *vm, int vcpu_id) 303 { 304 struct vcpu *vcpu; 305 306 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 307 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 308 309 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 310 vcpu_lock_init(vcpu); 311 vcpu->state = VCPU_IDLE; 312 vcpu->hostcpu = NOCPU; 313 vcpu->vcpuid = vcpu_id; 314 vcpu->vm = vm; 315 vcpu->guestfpu = fpu_save_area_alloc(); 316 vcpu->stats = vmm_stat_alloc(); 317 return (vcpu); 318 } 319 320 static void 321 vcpu_init(struct vcpu *vcpu) 322 { 323 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 324 MPASS(vcpu->cookie != NULL); 325 fpu_save_area_reset(vcpu->guestfpu); 326 vmm_stat_init(vcpu->stats); 327 } 328 329 struct vm_exit * 330 vm_exitinfo(struct vcpu *vcpu) 331 { 332 return (&vcpu->exitinfo); 333 } 334 335 static int 336 vmm_init(void) 337 { 338 int error; 339 340 vm_maxcpu = mp_ncpus; 341 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 342 343 if (vm_maxcpu > VM_MAXCPU) { 344 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 345 vm_maxcpu = VM_MAXCPU; 346 } 347 if (vm_maxcpu == 0) 348 vm_maxcpu = 1; 349 350 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 351 if (error != 0) 352 return (error); 353 354 return (vmmops_modinit(0)); 355 } 356 357 static int 358 vmm_handler(module_t mod, int what, void *arg) 359 { 360 int error; 361 362 switch (what) { 363 case MOD_LOAD: 364 /* TODO: if (vmm_is_hw_supported()) { */ 365 vmmdev_init(); 366 error = vmm_init(); 367 if (error == 0) 368 vmm_initialized = true; 369 break; 370 case MOD_UNLOAD: 371 /* TODO: if (vmm_is_hw_supported()) { */ 372 error = vmmdev_cleanup(); 373 if (error == 0 && vmm_initialized) { 374 error = vmmops_modcleanup(); 375 if (error) 376 vmm_initialized = false; 377 } 378 break; 379 default: 380 error = 0; 381 break; 382 } 383 return (error); 384 } 385 386 static moduledata_t vmm_kmod = { 387 "vmm", 388 vmm_handler, 389 NULL 390 }; 391 392 /* 393 * vmm initialization has the following dependencies: 394 * 395 * - HYP initialization requires smp_rendezvous() and therefore must happen 396 * after SMP is fully functional (after SI_SUB_SMP). 397 */ 398 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 399 MODULE_VERSION(vmm, 1); 400 401 static void 402 vm_init(struct vm *vm, bool create) 403 { 404 int i; 405 406 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 407 MPASS(vm->cookie != NULL); 408 409 CPU_ZERO(&vm->active_cpus); 410 CPU_ZERO(&vm->debug_cpus); 411 412 vm->suspend = 0; 413 CPU_ZERO(&vm->suspended_cpus); 414 415 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 416 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 417 418 if (!create) { 419 for (i = 0; i < vm->maxcpus; i++) { 420 if (vm->vcpu[i] != NULL) 421 vcpu_init(vm->vcpu[i]); 422 } 423 } 424 } 425 426 void 427 vm_disable_vcpu_creation(struct vm *vm) 428 { 429 sx_xlock(&vm->vcpus_init_lock); 430 vm->dying = true; 431 sx_xunlock(&vm->vcpus_init_lock); 432 } 433 434 struct vcpu * 435 vm_alloc_vcpu(struct vm *vm, int vcpuid) 436 { 437 struct vcpu *vcpu; 438 439 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 440 return (NULL); 441 442 /* Some interrupt controllers may have a CPU limit */ 443 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 444 return (NULL); 445 446 vcpu = (struct vcpu *) 447 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 448 if (__predict_true(vcpu != NULL)) 449 return (vcpu); 450 451 sx_xlock(&vm->vcpus_init_lock); 452 vcpu = vm->vcpu[vcpuid]; 453 if (vcpu == NULL && !vm->dying) { 454 vcpu = vcpu_alloc(vm, vcpuid); 455 vcpu_init(vcpu); 456 457 /* 458 * Ensure vCPU is fully created before updating pointer 459 * to permit unlocked reads above. 460 */ 461 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 462 (uintptr_t)vcpu); 463 } 464 sx_xunlock(&vm->vcpus_init_lock); 465 return (vcpu); 466 } 467 468 void 469 vm_slock_vcpus(struct vm *vm) 470 { 471 sx_slock(&vm->vcpus_init_lock); 472 } 473 474 void 475 vm_unlock_vcpus(struct vm *vm) 476 { 477 sx_unlock(&vm->vcpus_init_lock); 478 } 479 480 int 481 vm_create(const char *name, struct vm **retvm) 482 { 483 struct vm *vm; 484 struct vmspace *vmspace; 485 486 /* 487 * If vmm.ko could not be successfully initialized then don't attempt 488 * to create the virtual machine. 489 */ 490 if (!vmm_initialized) 491 return (ENXIO); 492 493 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 494 return (EINVAL); 495 496 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 497 if (vmspace == NULL) 498 return (ENOMEM); 499 500 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 501 strcpy(vm->name, name); 502 vm->vmspace = vmspace; 503 sx_init(&vm->mem_segs_lock, "vm mem_segs"); 504 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 505 506 vm->sockets = 1; 507 vm->cores = 1; /* XXX backwards compatibility */ 508 vm->threads = 1; /* XXX backwards compatibility */ 509 vm->maxcpus = vm_maxcpu; 510 511 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 512 M_WAITOK | M_ZERO); 513 514 vm_init(vm, true); 515 516 *retvm = vm; 517 return (0); 518 } 519 520 void 521 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 522 uint16_t *threads, uint16_t *maxcpus) 523 { 524 *sockets = vm->sockets; 525 *cores = vm->cores; 526 *threads = vm->threads; 527 *maxcpus = vm->maxcpus; 528 } 529 530 uint16_t 531 vm_get_maxcpus(struct vm *vm) 532 { 533 return (vm->maxcpus); 534 } 535 536 int 537 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 538 uint16_t threads, uint16_t maxcpus) 539 { 540 /* Ignore maxcpus. */ 541 if ((sockets * cores * threads) > vm->maxcpus) 542 return (EINVAL); 543 vm->sockets = sockets; 544 vm->cores = cores; 545 vm->threads = threads; 546 return(0); 547 } 548 549 static void 550 vm_cleanup(struct vm *vm, bool destroy) 551 { 552 struct mem_map *mm; 553 pmap_t pmap __diagused; 554 int i; 555 556 if (destroy) { 557 pmap = vmspace_pmap(vm->vmspace); 558 sched_pin(); 559 PCPU_SET(curvmpmap, NULL); 560 sched_unpin(); 561 CPU_FOREACH(i) { 562 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 563 } 564 } 565 566 vgic_detach_from_vm(vm->cookie); 567 568 for (i = 0; i < vm->maxcpus; i++) { 569 if (vm->vcpu[i] != NULL) 570 vcpu_cleanup(vm->vcpu[i], destroy); 571 } 572 573 vmmops_cleanup(vm->cookie); 574 575 /* 576 * System memory is removed from the guest address space only when 577 * the VM is destroyed. This is because the mapping remains the same 578 * across VM reset. 579 * 580 * Device memory can be relocated by the guest (e.g. using PCI BARs) 581 * so those mappings are removed on a VM reset. 582 */ 583 if (!destroy) { 584 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 585 mm = &vm->mem_maps[i]; 586 if (destroy || !sysmem_mapping(vm, mm)) 587 vm_free_memmap(vm, i); 588 } 589 } 590 591 if (destroy) { 592 for (i = 0; i < VM_MAX_MEMSEGS; i++) 593 vm_free_memseg(vm, i); 594 595 vmmops_vmspace_free(vm->vmspace); 596 vm->vmspace = NULL; 597 598 for (i = 0; i < vm->maxcpus; i++) 599 free(vm->vcpu[i], M_VMM); 600 free(vm->vcpu, M_VMM); 601 sx_destroy(&vm->vcpus_init_lock); 602 sx_destroy(&vm->mem_segs_lock); 603 } 604 } 605 606 void 607 vm_destroy(struct vm *vm) 608 { 609 vm_cleanup(vm, true); 610 free(vm, M_VMM); 611 } 612 613 int 614 vm_reinit(struct vm *vm) 615 { 616 int error; 617 618 /* 619 * A virtual machine can be reset only if all vcpus are suspended. 620 */ 621 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 622 vm_cleanup(vm, false); 623 vm_init(vm, false); 624 error = 0; 625 } else { 626 error = EBUSY; 627 } 628 629 return (error); 630 } 631 632 const char * 633 vm_name(struct vm *vm) 634 { 635 return (vm->name); 636 } 637 638 void 639 vm_slock_memsegs(struct vm *vm) 640 { 641 sx_slock(&vm->mem_segs_lock); 642 } 643 644 void 645 vm_xlock_memsegs(struct vm *vm) 646 { 647 sx_xlock(&vm->mem_segs_lock); 648 } 649 650 void 651 vm_unlock_memsegs(struct vm *vm) 652 { 653 sx_unlock(&vm->mem_segs_lock); 654 } 655 656 /* 657 * Return 'true' if 'gpa' is allocated in the guest address space. 658 * 659 * This function is called in the context of a running vcpu which acts as 660 * an implicit lock on 'vm->mem_maps[]'. 661 */ 662 bool 663 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 664 { 665 struct vm *vm = vcpu->vm; 666 struct mem_map *mm; 667 int i; 668 669 #ifdef INVARIANTS 670 int hostcpu, state; 671 state = vcpu_get_state(vcpu, &hostcpu); 672 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 673 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 674 #endif 675 676 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 677 mm = &vm->mem_maps[i]; 678 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 679 return (true); /* 'gpa' is sysmem or devmem */ 680 } 681 682 return (false); 683 } 684 685 int 686 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 687 { 688 struct mem_seg *seg; 689 vm_object_t obj; 690 691 sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 692 693 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 694 return (EINVAL); 695 696 if (len == 0 || (len & PAGE_MASK)) 697 return (EINVAL); 698 699 seg = &vm->mem_segs[ident]; 700 if (seg->object != NULL) { 701 if (seg->len == len && seg->sysmem == sysmem) 702 return (EEXIST); 703 else 704 return (EINVAL); 705 } 706 707 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 708 if (obj == NULL) 709 return (ENOMEM); 710 711 seg->len = len; 712 seg->object = obj; 713 seg->sysmem = sysmem; 714 return (0); 715 } 716 717 int 718 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 719 vm_object_t *objptr) 720 { 721 struct mem_seg *seg; 722 723 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 724 725 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 726 return (EINVAL); 727 728 seg = &vm->mem_segs[ident]; 729 if (len) 730 *len = seg->len; 731 if (sysmem) 732 *sysmem = seg->sysmem; 733 if (objptr) 734 *objptr = seg->object; 735 return (0); 736 } 737 738 void 739 vm_free_memseg(struct vm *vm, int ident) 740 { 741 struct mem_seg *seg; 742 743 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 744 ("%s: invalid memseg ident %d", __func__, ident)); 745 746 seg = &vm->mem_segs[ident]; 747 if (seg->object != NULL) { 748 vm_object_deallocate(seg->object); 749 bzero(seg, sizeof(struct mem_seg)); 750 } 751 } 752 753 int 754 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 755 size_t len, int prot, int flags) 756 { 757 struct mem_seg *seg; 758 struct mem_map *m, *map; 759 vm_ooffset_t last; 760 int i, error; 761 762 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 763 return (EINVAL); 764 765 if (flags & ~VM_MEMMAP_F_WIRED) 766 return (EINVAL); 767 768 if (segid < 0 || segid >= VM_MAX_MEMSEGS) 769 return (EINVAL); 770 771 seg = &vm->mem_segs[segid]; 772 if (seg->object == NULL) 773 return (EINVAL); 774 775 last = first + len; 776 if (first < 0 || first >= last || last > seg->len) 777 return (EINVAL); 778 779 if ((gpa | first | last) & PAGE_MASK) 780 return (EINVAL); 781 782 map = NULL; 783 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 784 m = &vm->mem_maps[i]; 785 if (m->len == 0) { 786 map = m; 787 break; 788 } 789 } 790 791 if (map == NULL) 792 return (ENOSPC); 793 794 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 795 len, 0, VMFS_NO_SPACE, prot, prot, 0); 796 if (error != KERN_SUCCESS) 797 return (EFAULT); 798 799 vm_object_reference(seg->object); 800 801 if (flags & VM_MEMMAP_F_WIRED) { 802 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 803 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 804 if (error != KERN_SUCCESS) { 805 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 806 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 807 EFAULT); 808 } 809 } 810 811 map->gpa = gpa; 812 map->len = len; 813 map->segoff = first; 814 map->segid = segid; 815 map->prot = prot; 816 map->flags = flags; 817 return (0); 818 } 819 820 int 821 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 822 { 823 struct mem_map *m; 824 int i; 825 826 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 827 m = &vm->mem_maps[i]; 828 if (m->gpa == gpa && m->len == len) { 829 vm_free_memmap(vm, i); 830 return (0); 831 } 832 } 833 834 return (EINVAL); 835 } 836 837 int 838 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 839 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 840 { 841 struct mem_map *mm, *mmnext; 842 int i; 843 844 mmnext = NULL; 845 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 846 mm = &vm->mem_maps[i]; 847 if (mm->len == 0 || mm->gpa < *gpa) 848 continue; 849 if (mmnext == NULL || mm->gpa < mmnext->gpa) 850 mmnext = mm; 851 } 852 853 if (mmnext != NULL) { 854 *gpa = mmnext->gpa; 855 if (segid) 856 *segid = mmnext->segid; 857 if (segoff) 858 *segoff = mmnext->segoff; 859 if (len) 860 *len = mmnext->len; 861 if (prot) 862 *prot = mmnext->prot; 863 if (flags) 864 *flags = mmnext->flags; 865 return (0); 866 } else { 867 return (ENOENT); 868 } 869 } 870 871 static void 872 vm_free_memmap(struct vm *vm, int ident) 873 { 874 struct mem_map *mm; 875 int error __diagused; 876 877 mm = &vm->mem_maps[ident]; 878 if (mm->len) { 879 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 880 mm->gpa + mm->len); 881 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 882 __func__, error)); 883 bzero(mm, sizeof(struct mem_map)); 884 } 885 } 886 887 static __inline bool 888 sysmem_mapping(struct vm *vm, struct mem_map *mm) 889 { 890 891 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 892 return (true); 893 else 894 return (false); 895 } 896 897 vm_paddr_t 898 vmm_sysmem_maxaddr(struct vm *vm) 899 { 900 struct mem_map *mm; 901 vm_paddr_t maxaddr; 902 int i; 903 904 maxaddr = 0; 905 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 906 mm = &vm->mem_maps[i]; 907 if (sysmem_mapping(vm, mm)) { 908 if (maxaddr < mm->gpa + mm->len) 909 maxaddr = mm->gpa + mm->len; 910 } 911 } 912 return (maxaddr); 913 } 914 915 int 916 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 917 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 918 { 919 920 vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 921 return (0); 922 } 923 924 static int 925 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 926 { 927 *rval = 0; 928 return (0); 929 } 930 931 static int 932 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 933 { 934 *rval = *(uint64_t *)arg; 935 return (0); 936 } 937 938 static int 939 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 940 { 941 return (0); 942 } 943 944 static const struct vmm_special_reg vmm_special_regs[] = { 945 #define SPECIAL_REG(_reg, _read, _write) \ 946 { \ 947 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 948 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 949 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 950 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 951 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 952 .esr_mask = ISS_MSR_REG_MASK, \ 953 .reg_read = (_read), \ 954 .reg_write = (_write), \ 955 .arg = NULL, \ 956 } 957 #define ID_SPECIAL_REG(_reg, _name) \ 958 { \ 959 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 960 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 961 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 962 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 963 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 964 .esr_mask = ISS_MSR_REG_MASK, \ 965 .reg_read = vmm_reg_read_arg, \ 966 .reg_write = vmm_reg_wi, \ 967 .arg = &(vmm_arch_regs._name), \ 968 } 969 970 /* ID registers */ 971 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 972 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 973 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 974 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 975 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 976 977 /* 978 * All other ID registers are read as zero. 979 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 980 */ 981 { 982 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 983 (0 << ISS_MSR_OP1_SHIFT) | 984 (0 << ISS_MSR_CRn_SHIFT) | 985 (0 << ISS_MSR_CRm_SHIFT), 986 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 987 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 988 .reg_read = vmm_reg_raz, 989 .reg_write = vmm_reg_wi, 990 .arg = NULL, 991 }, 992 993 /* Counter physical registers */ 994 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 995 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 996 vtimer_phys_cval_write), 997 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 998 vtimer_phys_tval_write), 999 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 1000 #undef SPECIAL_REG 1001 }; 1002 1003 void 1004 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 1005 reg_read_t reg_read, reg_write_t reg_write, void *arg) 1006 { 1007 int i; 1008 1009 for (i = 0; i < nitems(vm->special_reg); i++) { 1010 if (vm->special_reg[i].esr_iss == 0 && 1011 vm->special_reg[i].esr_mask == 0) { 1012 vm->special_reg[i].esr_iss = iss; 1013 vm->special_reg[i].esr_mask = mask; 1014 vm->special_reg[i].reg_read = reg_read; 1015 vm->special_reg[i].reg_write = reg_write; 1016 vm->special_reg[i].arg = arg; 1017 return; 1018 } 1019 } 1020 1021 panic("%s: No free special register slot", __func__); 1022 } 1023 1024 void 1025 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 1026 { 1027 int i; 1028 1029 for (i = 0; i < nitems(vm->special_reg); i++) { 1030 if (vm->special_reg[i].esr_iss == iss && 1031 vm->special_reg[i].esr_mask == mask) { 1032 memset(&vm->special_reg[i], 0, 1033 sizeof(vm->special_reg[i])); 1034 return; 1035 } 1036 } 1037 1038 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 1039 mask); 1040 } 1041 1042 static int 1043 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 1044 { 1045 struct vm *vm; 1046 struct vm_exit *vme; 1047 struct vre *vre; 1048 int i, rv; 1049 1050 vm = vcpu->vm; 1051 vme = &vcpu->exitinfo; 1052 vre = &vme->u.reg_emul.vre; 1053 1054 for (i = 0; i < nitems(vm->special_reg); i++) { 1055 if (vm->special_reg[i].esr_iss == 0 && 1056 vm->special_reg[i].esr_mask == 0) 1057 continue; 1058 1059 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 1060 vm->special_reg[i].esr_iss) { 1061 rv = vmm_emulate_register(vcpu, vre, 1062 vm->special_reg[i].reg_read, 1063 vm->special_reg[i].reg_write, 1064 vm->special_reg[i].arg); 1065 if (rv == 0) { 1066 *retu = false; 1067 } 1068 return (rv); 1069 } 1070 } 1071 for (i = 0; i < nitems(vmm_special_regs); i++) { 1072 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 1073 vmm_special_regs[i].esr_iss) { 1074 rv = vmm_emulate_register(vcpu, vre, 1075 vmm_special_regs[i].reg_read, 1076 vmm_special_regs[i].reg_write, 1077 vmm_special_regs[i].arg); 1078 if (rv == 0) { 1079 *retu = false; 1080 } 1081 return (rv); 1082 } 1083 } 1084 1085 1086 *retu = true; 1087 return (0); 1088 } 1089 1090 void 1091 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 1092 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 1093 { 1094 int i; 1095 1096 for (i = 0; i < nitems(vm->mmio_region); i++) { 1097 if (vm->mmio_region[i].start == 0 && 1098 vm->mmio_region[i].end == 0) { 1099 vm->mmio_region[i].start = start; 1100 vm->mmio_region[i].end = start + size; 1101 vm->mmio_region[i].read = mmio_read; 1102 vm->mmio_region[i].write = mmio_write; 1103 return; 1104 } 1105 } 1106 1107 panic("%s: No free MMIO region", __func__); 1108 } 1109 1110 void 1111 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 1112 { 1113 int i; 1114 1115 for (i = 0; i < nitems(vm->mmio_region); i++) { 1116 if (vm->mmio_region[i].start == start && 1117 vm->mmio_region[i].end == start + size) { 1118 memset(&vm->mmio_region[i], 0, 1119 sizeof(vm->mmio_region[i])); 1120 return; 1121 } 1122 } 1123 1124 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 1125 start + size); 1126 } 1127 1128 static int 1129 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 1130 { 1131 struct vm *vm; 1132 struct vm_exit *vme; 1133 struct vie *vie; 1134 struct hyp *hyp; 1135 uint64_t fault_ipa; 1136 struct vm_guest_paging *paging; 1137 struct vmm_mmio_region *vmr; 1138 int error, i; 1139 1140 vm = vcpu->vm; 1141 hyp = vm->cookie; 1142 if (!hyp->vgic_attached) 1143 goto out_user; 1144 1145 vme = &vcpu->exitinfo; 1146 vie = &vme->u.inst_emul.vie; 1147 paging = &vme->u.inst_emul.paging; 1148 1149 fault_ipa = vme->u.inst_emul.gpa; 1150 1151 vmr = NULL; 1152 for (i = 0; i < nitems(vm->mmio_region); i++) { 1153 if (vm->mmio_region[i].start <= fault_ipa && 1154 vm->mmio_region[i].end > fault_ipa) { 1155 vmr = &vm->mmio_region[i]; 1156 break; 1157 } 1158 } 1159 if (vmr == NULL) 1160 goto out_user; 1161 1162 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 1163 vmr->read, vmr->write, retu); 1164 return (error); 1165 1166 out_user: 1167 *retu = true; 1168 return (0); 1169 } 1170 1171 int 1172 vm_suspend(struct vm *vm, enum vm_suspend_how how) 1173 { 1174 int i; 1175 1176 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1177 return (EINVAL); 1178 1179 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1180 VM_CTR2(vm, "virtual machine already suspended %d/%d", 1181 vm->suspend, how); 1182 return (EALREADY); 1183 } 1184 1185 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1186 1187 /* 1188 * Notify all active vcpus that they are now suspended. 1189 */ 1190 for (i = 0; i < vm->maxcpus; i++) { 1191 if (CPU_ISSET(i, &vm->active_cpus)) 1192 vcpu_notify_event(vm_vcpu(vm, i)); 1193 } 1194 1195 return (0); 1196 } 1197 1198 void 1199 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 1200 { 1201 struct vm *vm = vcpu->vm; 1202 struct vm_exit *vmexit; 1203 1204 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1205 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1206 1207 vmexit = vm_exitinfo(vcpu); 1208 vmexit->pc = pc; 1209 vmexit->inst_length = 4; 1210 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1211 vmexit->u.suspended.how = vm->suspend; 1212 } 1213 1214 void 1215 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 1216 { 1217 struct vm_exit *vmexit; 1218 1219 vmexit = vm_exitinfo(vcpu); 1220 vmexit->pc = pc; 1221 vmexit->inst_length = 4; 1222 vmexit->exitcode = VM_EXITCODE_DEBUG; 1223 } 1224 1225 int 1226 vm_activate_cpu(struct vcpu *vcpu) 1227 { 1228 struct vm *vm = vcpu->vm; 1229 1230 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1231 return (EBUSY); 1232 1233 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 1234 return (0); 1235 1236 } 1237 1238 int 1239 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 1240 { 1241 if (vcpu == NULL) { 1242 vm->debug_cpus = vm->active_cpus; 1243 for (int i = 0; i < vm->maxcpus; i++) { 1244 if (CPU_ISSET(i, &vm->active_cpus)) 1245 vcpu_notify_event(vm_vcpu(vm, i)); 1246 } 1247 } else { 1248 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1249 return (EINVAL); 1250 1251 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1252 vcpu_notify_event(vcpu); 1253 } 1254 return (0); 1255 } 1256 1257 int 1258 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 1259 { 1260 1261 if (vcpu == NULL) { 1262 CPU_ZERO(&vm->debug_cpus); 1263 } else { 1264 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1265 return (EINVAL); 1266 1267 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1268 } 1269 return (0); 1270 } 1271 1272 int 1273 vcpu_debugged(struct vcpu *vcpu) 1274 { 1275 1276 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1277 } 1278 1279 cpuset_t 1280 vm_active_cpus(struct vm *vm) 1281 { 1282 1283 return (vm->active_cpus); 1284 } 1285 1286 cpuset_t 1287 vm_debug_cpus(struct vm *vm) 1288 { 1289 1290 return (vm->debug_cpus); 1291 } 1292 1293 cpuset_t 1294 vm_suspended_cpus(struct vm *vm) 1295 { 1296 1297 return (vm->suspended_cpus); 1298 } 1299 1300 1301 void * 1302 vcpu_stats(struct vcpu *vcpu) 1303 { 1304 1305 return (vcpu->stats); 1306 } 1307 1308 /* 1309 * This function is called to ensure that a vcpu "sees" a pending event 1310 * as soon as possible: 1311 * - If the vcpu thread is sleeping then it is woken up. 1312 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1313 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1314 */ 1315 static void 1316 vcpu_notify_event_locked(struct vcpu *vcpu) 1317 { 1318 int hostcpu; 1319 1320 hostcpu = vcpu->hostcpu; 1321 if (vcpu->state == VCPU_RUNNING) { 1322 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1323 if (hostcpu != curcpu) { 1324 ipi_cpu(hostcpu, vmm_ipinum); 1325 } else { 1326 /* 1327 * If the 'vcpu' is running on 'curcpu' then it must 1328 * be sending a notification to itself (e.g. SELF_IPI). 1329 * The pending event will be picked up when the vcpu 1330 * transitions back to guest context. 1331 */ 1332 } 1333 } else { 1334 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1335 "with hostcpu %d", vcpu->state, hostcpu)); 1336 if (vcpu->state == VCPU_SLEEPING) 1337 wakeup_one(vcpu); 1338 } 1339 } 1340 1341 void 1342 vcpu_notify_event(struct vcpu *vcpu) 1343 { 1344 vcpu_lock(vcpu); 1345 vcpu_notify_event_locked(vcpu); 1346 vcpu_unlock(vcpu); 1347 } 1348 1349 static void 1350 restore_guest_fpustate(struct vcpu *vcpu) 1351 { 1352 1353 /* flush host state to the pcb */ 1354 vfp_save_state(curthread, curthread->td_pcb); 1355 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1356 PCPU_SET(fpcurthread, NULL); 1357 1358 /* restore guest FPU state */ 1359 vfp_enable(); 1360 vfp_restore(vcpu->guestfpu); 1361 1362 /* 1363 * The FPU is now "dirty" with the guest's state so turn on emulation 1364 * to trap any access to the FPU by the host. 1365 */ 1366 vfp_disable(); 1367 } 1368 1369 static void 1370 save_guest_fpustate(struct vcpu *vcpu) 1371 { 1372 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1373 CPACR_FPEN_TRAP_ALL1) 1374 panic("VFP not enabled in host!"); 1375 1376 /* save guest FPU state */ 1377 vfp_enable(); 1378 vfp_store(vcpu->guestfpu); 1379 vfp_disable(); 1380 1381 KASSERT(PCPU_GET(fpcurthread) == NULL, 1382 ("%s: fpcurthread set with guest registers", __func__)); 1383 } 1384 static int 1385 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1386 bool from_idle) 1387 { 1388 int error; 1389 1390 vcpu_assert_locked(vcpu); 1391 1392 /* 1393 * State transitions from the vmmdev_ioctl() must always begin from 1394 * the VCPU_IDLE state. This guarantees that there is only a single 1395 * ioctl() operating on a vcpu at any point. 1396 */ 1397 if (from_idle) { 1398 while (vcpu->state != VCPU_IDLE) { 1399 vcpu_notify_event_locked(vcpu); 1400 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1401 } 1402 } else { 1403 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1404 "vcpu idle state")); 1405 } 1406 1407 if (vcpu->state == VCPU_RUNNING) { 1408 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1409 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1410 } else { 1411 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1412 "vcpu that is not running", vcpu->hostcpu)); 1413 } 1414 1415 /* 1416 * The following state transitions are allowed: 1417 * IDLE -> FROZEN -> IDLE 1418 * FROZEN -> RUNNING -> FROZEN 1419 * FROZEN -> SLEEPING -> FROZEN 1420 */ 1421 switch (vcpu->state) { 1422 case VCPU_IDLE: 1423 case VCPU_RUNNING: 1424 case VCPU_SLEEPING: 1425 error = (newstate != VCPU_FROZEN); 1426 break; 1427 case VCPU_FROZEN: 1428 error = (newstate == VCPU_FROZEN); 1429 break; 1430 default: 1431 error = 1; 1432 break; 1433 } 1434 1435 if (error) 1436 return (EBUSY); 1437 1438 vcpu->state = newstate; 1439 if (newstate == VCPU_RUNNING) 1440 vcpu->hostcpu = curcpu; 1441 else 1442 vcpu->hostcpu = NOCPU; 1443 1444 if (newstate == VCPU_IDLE) 1445 wakeup(&vcpu->state); 1446 1447 return (0); 1448 } 1449 1450 static void 1451 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1452 { 1453 int error; 1454 1455 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1456 panic("Error %d setting state to %d\n", error, newstate); 1457 } 1458 1459 static void 1460 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1461 { 1462 int error; 1463 1464 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1465 panic("Error %d setting state to %d", error, newstate); 1466 } 1467 1468 int 1469 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1470 { 1471 if (type < 0 || type >= VM_CAP_MAX) 1472 return (EINVAL); 1473 1474 return (vmmops_getcap(vcpu->cookie, type, retval)); 1475 } 1476 1477 int 1478 vm_set_capability(struct vcpu *vcpu, int type, int val) 1479 { 1480 if (type < 0 || type >= VM_CAP_MAX) 1481 return (EINVAL); 1482 1483 return (vmmops_setcap(vcpu->cookie, type, val)); 1484 } 1485 1486 struct vm * 1487 vcpu_vm(struct vcpu *vcpu) 1488 { 1489 return (vcpu->vm); 1490 } 1491 1492 int 1493 vcpu_vcpuid(struct vcpu *vcpu) 1494 { 1495 return (vcpu->vcpuid); 1496 } 1497 1498 void * 1499 vcpu_get_cookie(struct vcpu *vcpu) 1500 { 1501 return (vcpu->cookie); 1502 } 1503 1504 struct vcpu * 1505 vm_vcpu(struct vm *vm, int vcpuid) 1506 { 1507 return (vm->vcpu[vcpuid]); 1508 } 1509 1510 int 1511 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1512 { 1513 int error; 1514 1515 vcpu_lock(vcpu); 1516 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1517 vcpu_unlock(vcpu); 1518 1519 return (error); 1520 } 1521 1522 enum vcpu_state 1523 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1524 { 1525 enum vcpu_state state; 1526 1527 vcpu_lock(vcpu); 1528 state = vcpu->state; 1529 if (hostcpu != NULL) 1530 *hostcpu = vcpu->hostcpu; 1531 vcpu_unlock(vcpu); 1532 1533 return (state); 1534 } 1535 1536 static void * 1537 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1538 void **cookie) 1539 { 1540 int i, count, pageoff; 1541 struct mem_map *mm; 1542 vm_page_t m; 1543 1544 pageoff = gpa & PAGE_MASK; 1545 if (len > PAGE_SIZE - pageoff) 1546 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1547 1548 count = 0; 1549 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1550 mm = &vm->mem_maps[i]; 1551 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1552 gpa < mm->gpa + mm->len) { 1553 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1554 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1555 break; 1556 } 1557 } 1558 1559 if (count == 1) { 1560 *cookie = m; 1561 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1562 } else { 1563 *cookie = NULL; 1564 return (NULL); 1565 } 1566 } 1567 1568 void * 1569 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1570 void **cookie) 1571 { 1572 #ifdef INVARIANTS 1573 /* 1574 * The current vcpu should be frozen to ensure 'vm_memmap[]' 1575 * stability. 1576 */ 1577 int state = vcpu_get_state(vcpu, NULL); 1578 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1579 __func__, state)); 1580 #endif 1581 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1582 } 1583 1584 void * 1585 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1586 void **cookie) 1587 { 1588 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1589 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1590 } 1591 1592 void 1593 vm_gpa_release(void *cookie) 1594 { 1595 vm_page_t m = cookie; 1596 1597 vm_page_unwire(m, PQ_ACTIVE); 1598 } 1599 1600 int 1601 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1602 { 1603 1604 if (reg >= VM_REG_LAST) 1605 return (EINVAL); 1606 1607 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1608 } 1609 1610 int 1611 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1612 { 1613 int error; 1614 1615 if (reg >= VM_REG_LAST) 1616 return (EINVAL); 1617 error = vmmops_setreg(vcpu->cookie, reg, val); 1618 if (error || reg != VM_REG_GUEST_PC) 1619 return (error); 1620 1621 vcpu->nextpc = val; 1622 1623 return (0); 1624 } 1625 1626 void * 1627 vm_get_cookie(struct vm *vm) 1628 { 1629 return (vm->cookie); 1630 } 1631 1632 int 1633 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1634 { 1635 return (vmmops_exception(vcpu->cookie, esr, far)); 1636 } 1637 1638 int 1639 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1640 { 1641 return (vgic_attach_to_vm(vm->cookie, descr)); 1642 } 1643 1644 int 1645 vm_assert_irq(struct vm *vm, uint32_t irq) 1646 { 1647 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1648 } 1649 1650 int 1651 vm_deassert_irq(struct vm *vm, uint32_t irq) 1652 { 1653 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1654 } 1655 1656 int 1657 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1658 int func) 1659 { 1660 /* TODO: Should we raise an SError? */ 1661 return (vgic_inject_msi(vm->cookie, msg, addr)); 1662 } 1663 1664 static int 1665 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1666 { 1667 struct hypctx *hypctx; 1668 int i; 1669 1670 hypctx = vcpu_get_cookie(vcpu); 1671 1672 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1673 return (1); 1674 1675 vme->exitcode = VM_EXITCODE_SMCCC; 1676 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1677 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1678 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1679 1680 *retu = true; 1681 return (0); 1682 } 1683 1684 static int 1685 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1686 { 1687 vcpu_lock(vcpu); 1688 while (1) { 1689 if (vgic_has_pending_irq(vcpu->cookie)) 1690 break; 1691 1692 if (vcpu_should_yield(vcpu)) 1693 break; 1694 1695 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1696 /* 1697 * XXX msleep_spin() cannot be interrupted by signals so 1698 * wake up periodically to check pending signals. 1699 */ 1700 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1701 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1702 } 1703 vcpu_unlock(vcpu); 1704 1705 *retu = false; 1706 return (0); 1707 } 1708 1709 static int 1710 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1711 { 1712 struct vm *vm = vcpu->vm; 1713 struct vm_exit *vme; 1714 struct vm_map *map; 1715 uint64_t addr, esr; 1716 pmap_t pmap; 1717 int ftype, rv; 1718 1719 vme = &vcpu->exitinfo; 1720 1721 pmap = vmspace_pmap(vcpu->vm->vmspace); 1722 addr = vme->u.paging.gpa; 1723 esr = vme->u.paging.esr; 1724 1725 /* The page exists, but the page table needs to be updated. */ 1726 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1727 return (0); 1728 1729 switch (ESR_ELx_EXCEPTION(esr)) { 1730 case EXCP_INSN_ABORT_L: 1731 case EXCP_DATA_ABORT_L: 1732 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1733 break; 1734 default: 1735 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1736 } 1737 1738 map = &vm->vmspace->vm_map; 1739 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1740 if (rv != KERN_SUCCESS) 1741 return (EFAULT); 1742 1743 return (0); 1744 } 1745 1746 static int 1747 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1748 { 1749 struct vm *vm = vcpu->vm; 1750 int error, i; 1751 struct thread *td; 1752 1753 error = 0; 1754 td = curthread; 1755 1756 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1757 1758 /* 1759 * Wait until all 'active_cpus' have suspended themselves. 1760 * 1761 * Since a VM may be suspended at any time including when one or 1762 * more vcpus are doing a rendezvous we need to call the rendezvous 1763 * handler while we are waiting to prevent a deadlock. 1764 */ 1765 vcpu_lock(vcpu); 1766 while (error == 0) { 1767 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1768 break; 1769 1770 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1771 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1772 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1773 if (td_ast_pending(td, TDA_SUSPEND)) { 1774 vcpu_unlock(vcpu); 1775 error = thread_check_susp(td, false); 1776 vcpu_lock(vcpu); 1777 } 1778 } 1779 vcpu_unlock(vcpu); 1780 1781 /* 1782 * Wakeup the other sleeping vcpus and return to userspace. 1783 */ 1784 for (i = 0; i < vm->maxcpus; i++) { 1785 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1786 vcpu_notify_event(vm_vcpu(vm, i)); 1787 } 1788 } 1789 1790 *retu = true; 1791 return (error); 1792 } 1793 1794 int 1795 vm_run(struct vcpu *vcpu) 1796 { 1797 struct vm *vm = vcpu->vm; 1798 struct vm_eventinfo evinfo; 1799 int error, vcpuid; 1800 struct vm_exit *vme; 1801 bool retu; 1802 pmap_t pmap; 1803 1804 vcpuid = vcpu->vcpuid; 1805 1806 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1807 return (EINVAL); 1808 1809 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1810 return (EINVAL); 1811 1812 pmap = vmspace_pmap(vm->vmspace); 1813 vme = &vcpu->exitinfo; 1814 evinfo.rptr = NULL; 1815 evinfo.sptr = &vm->suspend; 1816 evinfo.iptr = NULL; 1817 restart: 1818 critical_enter(); 1819 1820 restore_guest_fpustate(vcpu); 1821 1822 vcpu_require_state(vcpu, VCPU_RUNNING); 1823 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1824 vcpu_require_state(vcpu, VCPU_FROZEN); 1825 1826 save_guest_fpustate(vcpu); 1827 1828 critical_exit(); 1829 1830 if (error == 0) { 1831 retu = false; 1832 switch (vme->exitcode) { 1833 case VM_EXITCODE_INST_EMUL: 1834 vcpu->nextpc = vme->pc + vme->inst_length; 1835 error = vm_handle_inst_emul(vcpu, &retu); 1836 break; 1837 1838 case VM_EXITCODE_REG_EMUL: 1839 vcpu->nextpc = vme->pc + vme->inst_length; 1840 error = vm_handle_reg_emul(vcpu, &retu); 1841 break; 1842 1843 case VM_EXITCODE_HVC: 1844 /* 1845 * The HVC instruction saves the address for the 1846 * next instruction as the return address. 1847 */ 1848 vcpu->nextpc = vme->pc; 1849 /* 1850 * The PSCI call can change the exit information in the 1851 * case of suspend/reset/poweroff/cpu off/cpu on. 1852 */ 1853 error = vm_handle_smccc_call(vcpu, vme, &retu); 1854 break; 1855 1856 case VM_EXITCODE_WFI: 1857 vcpu->nextpc = vme->pc + vme->inst_length; 1858 error = vm_handle_wfi(vcpu, vme, &retu); 1859 break; 1860 1861 case VM_EXITCODE_PAGING: 1862 vcpu->nextpc = vme->pc; 1863 error = vm_handle_paging(vcpu, &retu); 1864 break; 1865 1866 case VM_EXITCODE_SUSPENDED: 1867 vcpu->nextpc = vme->pc; 1868 error = vm_handle_suspend(vcpu, &retu); 1869 break; 1870 1871 default: 1872 /* Handle in userland */ 1873 vcpu->nextpc = vme->pc; 1874 retu = true; 1875 break; 1876 } 1877 } 1878 1879 if (error == 0 && retu == false) 1880 goto restart; 1881 1882 return (error); 1883 } 1884