1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_stat.h> 69 70 #include "arm64.h" 71 #include "mmu.h" 72 73 #include "io/vgic.h" 74 #include "io/vtimer.h" 75 76 struct vcpu { 77 int flags; 78 enum vcpu_state state; 79 struct mtx mtx; 80 int hostcpu; /* host cpuid this vcpu last ran on */ 81 int vcpuid; 82 void *stats; 83 struct vm_exit exitinfo; 84 uint64_t nextpc; /* (x) next instruction to execute */ 85 struct vm *vm; /* (o) */ 86 void *cookie; /* (i) cpu-specific data */ 87 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 88 }; 89 90 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 91 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 92 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 93 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 94 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 95 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 96 97 struct mem_seg { 98 uint64_t gpa; 99 size_t len; 100 bool wired; 101 bool sysmem; 102 vm_object_t object; 103 }; 104 #define VM_MAX_MEMSEGS 3 105 106 struct mem_map { 107 vm_paddr_t gpa; 108 size_t len; 109 vm_ooffset_t segoff; 110 int segid; 111 int prot; 112 int flags; 113 }; 114 #define VM_MAX_MEMMAPS 4 115 116 struct vmm_mmio_region { 117 uint64_t start; 118 uint64_t end; 119 mem_region_read_t read; 120 mem_region_write_t write; 121 }; 122 #define VM_MAX_MMIO_REGIONS 4 123 124 struct vmm_special_reg { 125 uint32_t esr_iss; 126 uint32_t esr_mask; 127 reg_read_t reg_read; 128 reg_write_t reg_write; 129 void *arg; 130 }; 131 #define VM_MAX_SPECIAL_REGS 16 132 133 /* 134 * Initialization: 135 * (o) initialized the first time the VM is created 136 * (i) initialized when VM is created and when it is reinitialized 137 * (x) initialized before use 138 */ 139 struct vm { 140 void *cookie; /* (i) cpu-specific data */ 141 volatile cpuset_t active_cpus; /* (i) active vcpus */ 142 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 143 int suspend; /* (i) stop VM execution */ 144 bool dying; /* (o) is dying */ 145 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 146 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 147 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 148 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 149 struct vmspace *vmspace; /* (o) guest's address space */ 150 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 151 struct vcpu **vcpu; /* (i) guest vcpus */ 152 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 153 /* (o) guest MMIO regions */ 154 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 155 /* The following describe the vm cpu topology */ 156 uint16_t sockets; /* (o) num of sockets */ 157 uint16_t cores; /* (o) num of cores/socket */ 158 uint16_t threads; /* (o) num of threads/core */ 159 uint16_t maxcpus; /* (o) max pluggable cpus */ 160 struct sx mem_segs_lock; /* (o) */ 161 struct sx vcpus_init_lock; /* (o) */ 162 }; 163 164 static bool vmm_initialized = false; 165 166 static int vm_handle_wfi(struct vcpu *vcpu, 167 struct vm_exit *vme, bool *retu); 168 169 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 170 171 /* statistics */ 172 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 173 174 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 175 176 static int vmm_ipinum; 177 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 178 "IPI vector used for vcpu notifications"); 179 180 struct vmm_regs { 181 uint64_t id_aa64afr0; 182 uint64_t id_aa64afr1; 183 uint64_t id_aa64dfr0; 184 uint64_t id_aa64dfr1; 185 uint64_t id_aa64isar0; 186 uint64_t id_aa64isar1; 187 uint64_t id_aa64isar2; 188 uint64_t id_aa64mmfr0; 189 uint64_t id_aa64mmfr1; 190 uint64_t id_aa64mmfr2; 191 uint64_t id_aa64pfr0; 192 uint64_t id_aa64pfr1; 193 }; 194 195 static const struct vmm_regs vmm_arch_regs_masks = { 196 .id_aa64dfr0 = 197 ID_AA64DFR0_CTX_CMPs_MASK | 198 ID_AA64DFR0_WRPs_MASK | 199 ID_AA64DFR0_BRPs_MASK | 200 ID_AA64DFR0_PMUVer_3 | 201 ID_AA64DFR0_DebugVer_8, 202 .id_aa64isar0 = 203 ID_AA64ISAR0_TLB_TLBIOSR | 204 ID_AA64ISAR0_SHA3_IMPL | 205 ID_AA64ISAR0_RDM_IMPL | 206 ID_AA64ISAR0_Atomic_IMPL | 207 ID_AA64ISAR0_CRC32_BASE | 208 ID_AA64ISAR0_SHA2_512 | 209 ID_AA64ISAR0_SHA1_BASE | 210 ID_AA64ISAR0_AES_PMULL, 211 .id_aa64mmfr0 = 212 ID_AA64MMFR0_TGran4_IMPL | 213 ID_AA64MMFR0_TGran64_IMPL | 214 ID_AA64MMFR0_TGran16_IMPL | 215 ID_AA64MMFR0_ASIDBits_16 | 216 ID_AA64MMFR0_PARange_4P, 217 .id_aa64mmfr1 = 218 ID_AA64MMFR1_SpecSEI_IMPL | 219 ID_AA64MMFR1_PAN_ATS1E1 | 220 ID_AA64MMFR1_HAFDBS_AF, 221 .id_aa64pfr0 = 222 ID_AA64PFR0_GIC_CPUIF_NONE | 223 ID_AA64PFR0_AdvSIMD_HP | 224 ID_AA64PFR0_FP_HP | 225 ID_AA64PFR0_EL3_64 | 226 ID_AA64PFR0_EL2_64 | 227 ID_AA64PFR0_EL1_64 | 228 ID_AA64PFR0_EL0_64, 229 }; 230 231 /* Host registers masked by vmm_arch_regs_masks. */ 232 static struct vmm_regs vmm_arch_regs; 233 234 u_int vm_maxcpu; 235 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 236 &vm_maxcpu, 0, "Maximum number of vCPUs"); 237 238 static void vm_free_memmap(struct vm *vm, int ident); 239 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 240 static void vcpu_notify_event_locked(struct vcpu *vcpu); 241 242 /* global statistics */ 243 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 244 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 245 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 246 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 247 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 248 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 249 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 250 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 251 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 252 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 253 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 254 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 255 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 256 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 257 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 258 259 /* 260 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 261 * is a safe value for now. 262 */ 263 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 264 265 static int 266 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 267 { 268 #define _FETCH_KERN_REG(reg, field) do { \ 269 regs->field = vmm_arch_regs_masks.field; \ 270 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ 271 regs->field = 0; \ 272 } while (0) 273 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 274 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 275 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 276 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 277 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 278 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 279 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 280 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 281 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 282 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 283 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 284 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 285 #undef _FETCH_KERN_REG 286 return (0); 287 } 288 289 static void 290 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 291 { 292 vmmops_vcpu_cleanup(vcpu->cookie); 293 vcpu->cookie = NULL; 294 if (destroy) { 295 vmm_stat_free(vcpu->stats); 296 fpu_save_area_free(vcpu->guestfpu); 297 vcpu_lock_destroy(vcpu); 298 } 299 } 300 301 static struct vcpu * 302 vcpu_alloc(struct vm *vm, int vcpu_id) 303 { 304 struct vcpu *vcpu; 305 306 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 307 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 308 309 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 310 vcpu_lock_init(vcpu); 311 vcpu->state = VCPU_IDLE; 312 vcpu->hostcpu = NOCPU; 313 vcpu->vcpuid = vcpu_id; 314 vcpu->vm = vm; 315 vcpu->guestfpu = fpu_save_area_alloc(); 316 vcpu->stats = vmm_stat_alloc(); 317 return (vcpu); 318 } 319 320 static void 321 vcpu_init(struct vcpu *vcpu) 322 { 323 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 324 MPASS(vcpu->cookie != NULL); 325 fpu_save_area_reset(vcpu->guestfpu); 326 vmm_stat_init(vcpu->stats); 327 } 328 329 struct vm_exit * 330 vm_exitinfo(struct vcpu *vcpu) 331 { 332 return (&vcpu->exitinfo); 333 } 334 335 static int 336 vmm_init(void) 337 { 338 int error; 339 340 vm_maxcpu = mp_ncpus; 341 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 342 343 if (vm_maxcpu > VM_MAXCPU) { 344 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 345 vm_maxcpu = VM_MAXCPU; 346 } 347 if (vm_maxcpu == 0) 348 vm_maxcpu = 1; 349 350 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 351 if (error != 0) 352 return (error); 353 354 return (vmmops_modinit(0)); 355 } 356 357 static int 358 vmm_handler(module_t mod, int what, void *arg) 359 { 360 int error; 361 362 switch (what) { 363 case MOD_LOAD: 364 /* TODO: if (vmm_is_hw_supported()) { */ 365 error = vmmdev_init(); 366 if (error != 0) 367 break; 368 error = vmm_init(); 369 if (error == 0) 370 vmm_initialized = true; 371 break; 372 case MOD_UNLOAD: 373 /* TODO: if (vmm_is_hw_supported()) { */ 374 error = vmmdev_cleanup(); 375 if (error == 0 && vmm_initialized) { 376 error = vmmops_modcleanup(); 377 if (error) 378 vmm_initialized = false; 379 } 380 break; 381 default: 382 error = 0; 383 break; 384 } 385 return (error); 386 } 387 388 static moduledata_t vmm_kmod = { 389 "vmm", 390 vmm_handler, 391 NULL 392 }; 393 394 /* 395 * vmm initialization has the following dependencies: 396 * 397 * - HYP initialization requires smp_rendezvous() and therefore must happen 398 * after SMP is fully functional (after SI_SUB_SMP). 399 * - vmm device initialization requires an initialized devfs. 400 */ 401 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 402 MODULE_VERSION(vmm, 1); 403 404 static void 405 vm_init(struct vm *vm, bool create) 406 { 407 int i; 408 409 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 410 MPASS(vm->cookie != NULL); 411 412 CPU_ZERO(&vm->active_cpus); 413 CPU_ZERO(&vm->debug_cpus); 414 415 vm->suspend = 0; 416 CPU_ZERO(&vm->suspended_cpus); 417 418 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 419 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 420 421 if (!create) { 422 for (i = 0; i < vm->maxcpus; i++) { 423 if (vm->vcpu[i] != NULL) 424 vcpu_init(vm->vcpu[i]); 425 } 426 } 427 } 428 429 void 430 vm_disable_vcpu_creation(struct vm *vm) 431 { 432 sx_xlock(&vm->vcpus_init_lock); 433 vm->dying = true; 434 sx_xunlock(&vm->vcpus_init_lock); 435 } 436 437 struct vcpu * 438 vm_alloc_vcpu(struct vm *vm, int vcpuid) 439 { 440 struct vcpu *vcpu; 441 442 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 443 return (NULL); 444 445 /* Some interrupt controllers may have a CPU limit */ 446 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 447 return (NULL); 448 449 vcpu = (struct vcpu *) 450 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 451 if (__predict_true(vcpu != NULL)) 452 return (vcpu); 453 454 sx_xlock(&vm->vcpus_init_lock); 455 vcpu = vm->vcpu[vcpuid]; 456 if (vcpu == NULL && !vm->dying) { 457 vcpu = vcpu_alloc(vm, vcpuid); 458 vcpu_init(vcpu); 459 460 /* 461 * Ensure vCPU is fully created before updating pointer 462 * to permit unlocked reads above. 463 */ 464 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 465 (uintptr_t)vcpu); 466 } 467 sx_xunlock(&vm->vcpus_init_lock); 468 return (vcpu); 469 } 470 471 void 472 vm_slock_vcpus(struct vm *vm) 473 { 474 sx_slock(&vm->vcpus_init_lock); 475 } 476 477 void 478 vm_unlock_vcpus(struct vm *vm) 479 { 480 sx_unlock(&vm->vcpus_init_lock); 481 } 482 483 int 484 vm_create(const char *name, struct vm **retvm) 485 { 486 struct vm *vm; 487 struct vmspace *vmspace; 488 489 /* 490 * If vmm.ko could not be successfully initialized then don't attempt 491 * to create the virtual machine. 492 */ 493 if (!vmm_initialized) 494 return (ENXIO); 495 496 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 497 return (EINVAL); 498 499 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 500 if (vmspace == NULL) 501 return (ENOMEM); 502 503 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 504 strcpy(vm->name, name); 505 vm->vmspace = vmspace; 506 sx_init(&vm->mem_segs_lock, "vm mem_segs"); 507 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 508 509 vm->sockets = 1; 510 vm->cores = 1; /* XXX backwards compatibility */ 511 vm->threads = 1; /* XXX backwards compatibility */ 512 vm->maxcpus = vm_maxcpu; 513 514 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 515 M_WAITOK | M_ZERO); 516 517 vm_init(vm, true); 518 519 *retvm = vm; 520 return (0); 521 } 522 523 void 524 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 525 uint16_t *threads, uint16_t *maxcpus) 526 { 527 *sockets = vm->sockets; 528 *cores = vm->cores; 529 *threads = vm->threads; 530 *maxcpus = vm->maxcpus; 531 } 532 533 uint16_t 534 vm_get_maxcpus(struct vm *vm) 535 { 536 return (vm->maxcpus); 537 } 538 539 int 540 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 541 uint16_t threads, uint16_t maxcpus) 542 { 543 /* Ignore maxcpus. */ 544 if ((sockets * cores * threads) > vm->maxcpus) 545 return (EINVAL); 546 vm->sockets = sockets; 547 vm->cores = cores; 548 vm->threads = threads; 549 return(0); 550 } 551 552 static void 553 vm_cleanup(struct vm *vm, bool destroy) 554 { 555 struct mem_map *mm; 556 pmap_t pmap __diagused; 557 int i; 558 559 if (destroy) { 560 pmap = vmspace_pmap(vm->vmspace); 561 sched_pin(); 562 PCPU_SET(curvmpmap, NULL); 563 sched_unpin(); 564 CPU_FOREACH(i) { 565 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 566 } 567 } 568 569 vgic_detach_from_vm(vm->cookie); 570 571 for (i = 0; i < vm->maxcpus; i++) { 572 if (vm->vcpu[i] != NULL) 573 vcpu_cleanup(vm->vcpu[i], destroy); 574 } 575 576 vmmops_cleanup(vm->cookie); 577 578 /* 579 * System memory is removed from the guest address space only when 580 * the VM is destroyed. This is because the mapping remains the same 581 * across VM reset. 582 * 583 * Device memory can be relocated by the guest (e.g. using PCI BARs) 584 * so those mappings are removed on a VM reset. 585 */ 586 if (!destroy) { 587 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 588 mm = &vm->mem_maps[i]; 589 if (destroy || !sysmem_mapping(vm, mm)) 590 vm_free_memmap(vm, i); 591 } 592 } 593 594 if (destroy) { 595 for (i = 0; i < VM_MAX_MEMSEGS; i++) 596 vm_free_memseg(vm, i); 597 598 vmmops_vmspace_free(vm->vmspace); 599 vm->vmspace = NULL; 600 601 for (i = 0; i < vm->maxcpus; i++) 602 free(vm->vcpu[i], M_VMM); 603 free(vm->vcpu, M_VMM); 604 sx_destroy(&vm->vcpus_init_lock); 605 sx_destroy(&vm->mem_segs_lock); 606 } 607 } 608 609 void 610 vm_destroy(struct vm *vm) 611 { 612 vm_cleanup(vm, true); 613 free(vm, M_VMM); 614 } 615 616 int 617 vm_reinit(struct vm *vm) 618 { 619 int error; 620 621 /* 622 * A virtual machine can be reset only if all vcpus are suspended. 623 */ 624 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 625 vm_cleanup(vm, false); 626 vm_init(vm, false); 627 error = 0; 628 } else { 629 error = EBUSY; 630 } 631 632 return (error); 633 } 634 635 const char * 636 vm_name(struct vm *vm) 637 { 638 return (vm->name); 639 } 640 641 void 642 vm_slock_memsegs(struct vm *vm) 643 { 644 sx_slock(&vm->mem_segs_lock); 645 } 646 647 void 648 vm_xlock_memsegs(struct vm *vm) 649 { 650 sx_xlock(&vm->mem_segs_lock); 651 } 652 653 void 654 vm_unlock_memsegs(struct vm *vm) 655 { 656 sx_unlock(&vm->mem_segs_lock); 657 } 658 659 /* 660 * Return 'true' if 'gpa' is allocated in the guest address space. 661 * 662 * This function is called in the context of a running vcpu which acts as 663 * an implicit lock on 'vm->mem_maps[]'. 664 */ 665 bool 666 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 667 { 668 struct vm *vm = vcpu->vm; 669 struct mem_map *mm; 670 int i; 671 672 #ifdef INVARIANTS 673 int hostcpu, state; 674 state = vcpu_get_state(vcpu, &hostcpu); 675 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 676 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 677 #endif 678 679 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 680 mm = &vm->mem_maps[i]; 681 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 682 return (true); /* 'gpa' is sysmem or devmem */ 683 } 684 685 return (false); 686 } 687 688 int 689 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 690 { 691 struct mem_seg *seg; 692 vm_object_t obj; 693 694 sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 695 696 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 697 return (EINVAL); 698 699 if (len == 0 || (len & PAGE_MASK)) 700 return (EINVAL); 701 702 seg = &vm->mem_segs[ident]; 703 if (seg->object != NULL) { 704 if (seg->len == len && seg->sysmem == sysmem) 705 return (EEXIST); 706 else 707 return (EINVAL); 708 } 709 710 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 711 if (obj == NULL) 712 return (ENOMEM); 713 714 seg->len = len; 715 seg->object = obj; 716 seg->sysmem = sysmem; 717 return (0); 718 } 719 720 int 721 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 722 vm_object_t *objptr) 723 { 724 struct mem_seg *seg; 725 726 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 727 728 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 729 return (EINVAL); 730 731 seg = &vm->mem_segs[ident]; 732 if (len) 733 *len = seg->len; 734 if (sysmem) 735 *sysmem = seg->sysmem; 736 if (objptr) 737 *objptr = seg->object; 738 return (0); 739 } 740 741 void 742 vm_free_memseg(struct vm *vm, int ident) 743 { 744 struct mem_seg *seg; 745 746 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 747 ("%s: invalid memseg ident %d", __func__, ident)); 748 749 seg = &vm->mem_segs[ident]; 750 if (seg->object != NULL) { 751 vm_object_deallocate(seg->object); 752 bzero(seg, sizeof(struct mem_seg)); 753 } 754 } 755 756 int 757 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 758 size_t len, int prot, int flags) 759 { 760 struct mem_seg *seg; 761 struct mem_map *m, *map; 762 vm_ooffset_t last; 763 int i, error; 764 765 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 766 return (EINVAL); 767 768 if (flags & ~VM_MEMMAP_F_WIRED) 769 return (EINVAL); 770 771 if (segid < 0 || segid >= VM_MAX_MEMSEGS) 772 return (EINVAL); 773 774 seg = &vm->mem_segs[segid]; 775 if (seg->object == NULL) 776 return (EINVAL); 777 778 last = first + len; 779 if (first < 0 || first >= last || last > seg->len) 780 return (EINVAL); 781 782 if ((gpa | first | last) & PAGE_MASK) 783 return (EINVAL); 784 785 map = NULL; 786 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 787 m = &vm->mem_maps[i]; 788 if (m->len == 0) { 789 map = m; 790 break; 791 } 792 } 793 794 if (map == NULL) 795 return (ENOSPC); 796 797 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 798 len, 0, VMFS_NO_SPACE, prot, prot, 0); 799 if (error != KERN_SUCCESS) 800 return (EFAULT); 801 802 vm_object_reference(seg->object); 803 804 if (flags & VM_MEMMAP_F_WIRED) { 805 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 806 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 807 if (error != KERN_SUCCESS) { 808 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 809 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 810 EFAULT); 811 } 812 } 813 814 map->gpa = gpa; 815 map->len = len; 816 map->segoff = first; 817 map->segid = segid; 818 map->prot = prot; 819 map->flags = flags; 820 return (0); 821 } 822 823 int 824 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 825 { 826 struct mem_map *m; 827 int i; 828 829 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 830 m = &vm->mem_maps[i]; 831 if (m->gpa == gpa && m->len == len) { 832 vm_free_memmap(vm, i); 833 return (0); 834 } 835 } 836 837 return (EINVAL); 838 } 839 840 int 841 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 842 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 843 { 844 struct mem_map *mm, *mmnext; 845 int i; 846 847 mmnext = NULL; 848 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 849 mm = &vm->mem_maps[i]; 850 if (mm->len == 0 || mm->gpa < *gpa) 851 continue; 852 if (mmnext == NULL || mm->gpa < mmnext->gpa) 853 mmnext = mm; 854 } 855 856 if (mmnext != NULL) { 857 *gpa = mmnext->gpa; 858 if (segid) 859 *segid = mmnext->segid; 860 if (segoff) 861 *segoff = mmnext->segoff; 862 if (len) 863 *len = mmnext->len; 864 if (prot) 865 *prot = mmnext->prot; 866 if (flags) 867 *flags = mmnext->flags; 868 return (0); 869 } else { 870 return (ENOENT); 871 } 872 } 873 874 static void 875 vm_free_memmap(struct vm *vm, int ident) 876 { 877 struct mem_map *mm; 878 int error __diagused; 879 880 mm = &vm->mem_maps[ident]; 881 if (mm->len) { 882 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 883 mm->gpa + mm->len); 884 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 885 __func__, error)); 886 bzero(mm, sizeof(struct mem_map)); 887 } 888 } 889 890 static __inline bool 891 sysmem_mapping(struct vm *vm, struct mem_map *mm) 892 { 893 894 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 895 return (true); 896 else 897 return (false); 898 } 899 900 vm_paddr_t 901 vmm_sysmem_maxaddr(struct vm *vm) 902 { 903 struct mem_map *mm; 904 vm_paddr_t maxaddr; 905 int i; 906 907 maxaddr = 0; 908 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 909 mm = &vm->mem_maps[i]; 910 if (sysmem_mapping(vm, mm)) { 911 if (maxaddr < mm->gpa + mm->len) 912 maxaddr = mm->gpa + mm->len; 913 } 914 } 915 return (maxaddr); 916 } 917 918 int 919 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 920 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 921 { 922 923 vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 924 return (0); 925 } 926 927 static int 928 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 929 { 930 *rval = 0; 931 return (0); 932 } 933 934 static int 935 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 936 { 937 *rval = *(uint64_t *)arg; 938 return (0); 939 } 940 941 static int 942 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 943 { 944 return (0); 945 } 946 947 static const struct vmm_special_reg vmm_special_regs[] = { 948 #define SPECIAL_REG(_reg, _read, _write) \ 949 { \ 950 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 951 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 952 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 953 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 954 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 955 .esr_mask = ISS_MSR_REG_MASK, \ 956 .reg_read = (_read), \ 957 .reg_write = (_write), \ 958 .arg = NULL, \ 959 } 960 #define ID_SPECIAL_REG(_reg, _name) \ 961 { \ 962 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 963 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 964 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 965 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 966 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 967 .esr_mask = ISS_MSR_REG_MASK, \ 968 .reg_read = vmm_reg_read_arg, \ 969 .reg_write = vmm_reg_wi, \ 970 .arg = &(vmm_arch_regs._name), \ 971 } 972 973 /* ID registers */ 974 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 975 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 976 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 977 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 978 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 979 980 /* 981 * All other ID registers are read as zero. 982 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 983 */ 984 { 985 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 986 (0 << ISS_MSR_OP1_SHIFT) | 987 (0 << ISS_MSR_CRn_SHIFT) | 988 (0 << ISS_MSR_CRm_SHIFT), 989 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 990 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 991 .reg_read = vmm_reg_raz, 992 .reg_write = vmm_reg_wi, 993 .arg = NULL, 994 }, 995 996 /* Counter physical registers */ 997 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 998 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 999 vtimer_phys_cval_write), 1000 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 1001 vtimer_phys_tval_write), 1002 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 1003 #undef SPECIAL_REG 1004 }; 1005 1006 void 1007 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 1008 reg_read_t reg_read, reg_write_t reg_write, void *arg) 1009 { 1010 int i; 1011 1012 for (i = 0; i < nitems(vm->special_reg); i++) { 1013 if (vm->special_reg[i].esr_iss == 0 && 1014 vm->special_reg[i].esr_mask == 0) { 1015 vm->special_reg[i].esr_iss = iss; 1016 vm->special_reg[i].esr_mask = mask; 1017 vm->special_reg[i].reg_read = reg_read; 1018 vm->special_reg[i].reg_write = reg_write; 1019 vm->special_reg[i].arg = arg; 1020 return; 1021 } 1022 } 1023 1024 panic("%s: No free special register slot", __func__); 1025 } 1026 1027 void 1028 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 1029 { 1030 int i; 1031 1032 for (i = 0; i < nitems(vm->special_reg); i++) { 1033 if (vm->special_reg[i].esr_iss == iss && 1034 vm->special_reg[i].esr_mask == mask) { 1035 memset(&vm->special_reg[i], 0, 1036 sizeof(vm->special_reg[i])); 1037 return; 1038 } 1039 } 1040 1041 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 1042 mask); 1043 } 1044 1045 static int 1046 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 1047 { 1048 struct vm *vm; 1049 struct vm_exit *vme; 1050 struct vre *vre; 1051 int i, rv; 1052 1053 vm = vcpu->vm; 1054 vme = &vcpu->exitinfo; 1055 vre = &vme->u.reg_emul.vre; 1056 1057 for (i = 0; i < nitems(vm->special_reg); i++) { 1058 if (vm->special_reg[i].esr_iss == 0 && 1059 vm->special_reg[i].esr_mask == 0) 1060 continue; 1061 1062 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 1063 vm->special_reg[i].esr_iss) { 1064 rv = vmm_emulate_register(vcpu, vre, 1065 vm->special_reg[i].reg_read, 1066 vm->special_reg[i].reg_write, 1067 vm->special_reg[i].arg); 1068 if (rv == 0) { 1069 *retu = false; 1070 } 1071 return (rv); 1072 } 1073 } 1074 for (i = 0; i < nitems(vmm_special_regs); i++) { 1075 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 1076 vmm_special_regs[i].esr_iss) { 1077 rv = vmm_emulate_register(vcpu, vre, 1078 vmm_special_regs[i].reg_read, 1079 vmm_special_regs[i].reg_write, 1080 vmm_special_regs[i].arg); 1081 if (rv == 0) { 1082 *retu = false; 1083 } 1084 return (rv); 1085 } 1086 } 1087 1088 1089 *retu = true; 1090 return (0); 1091 } 1092 1093 void 1094 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 1095 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 1096 { 1097 int i; 1098 1099 for (i = 0; i < nitems(vm->mmio_region); i++) { 1100 if (vm->mmio_region[i].start == 0 && 1101 vm->mmio_region[i].end == 0) { 1102 vm->mmio_region[i].start = start; 1103 vm->mmio_region[i].end = start + size; 1104 vm->mmio_region[i].read = mmio_read; 1105 vm->mmio_region[i].write = mmio_write; 1106 return; 1107 } 1108 } 1109 1110 panic("%s: No free MMIO region", __func__); 1111 } 1112 1113 void 1114 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 1115 { 1116 int i; 1117 1118 for (i = 0; i < nitems(vm->mmio_region); i++) { 1119 if (vm->mmio_region[i].start == start && 1120 vm->mmio_region[i].end == start + size) { 1121 memset(&vm->mmio_region[i], 0, 1122 sizeof(vm->mmio_region[i])); 1123 return; 1124 } 1125 } 1126 1127 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 1128 start + size); 1129 } 1130 1131 static int 1132 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 1133 { 1134 struct vm *vm; 1135 struct vm_exit *vme; 1136 struct vie *vie; 1137 struct hyp *hyp; 1138 uint64_t fault_ipa; 1139 struct vm_guest_paging *paging; 1140 struct vmm_mmio_region *vmr; 1141 int error, i; 1142 1143 vm = vcpu->vm; 1144 hyp = vm->cookie; 1145 if (!hyp->vgic_attached) 1146 goto out_user; 1147 1148 vme = &vcpu->exitinfo; 1149 vie = &vme->u.inst_emul.vie; 1150 paging = &vme->u.inst_emul.paging; 1151 1152 fault_ipa = vme->u.inst_emul.gpa; 1153 1154 vmr = NULL; 1155 for (i = 0; i < nitems(vm->mmio_region); i++) { 1156 if (vm->mmio_region[i].start <= fault_ipa && 1157 vm->mmio_region[i].end > fault_ipa) { 1158 vmr = &vm->mmio_region[i]; 1159 break; 1160 } 1161 } 1162 if (vmr == NULL) 1163 goto out_user; 1164 1165 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 1166 vmr->read, vmr->write, retu); 1167 return (error); 1168 1169 out_user: 1170 *retu = true; 1171 return (0); 1172 } 1173 1174 int 1175 vm_suspend(struct vm *vm, enum vm_suspend_how how) 1176 { 1177 int i; 1178 1179 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1180 return (EINVAL); 1181 1182 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1183 VM_CTR2(vm, "virtual machine already suspended %d/%d", 1184 vm->suspend, how); 1185 return (EALREADY); 1186 } 1187 1188 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1189 1190 /* 1191 * Notify all active vcpus that they are now suspended. 1192 */ 1193 for (i = 0; i < vm->maxcpus; i++) { 1194 if (CPU_ISSET(i, &vm->active_cpus)) 1195 vcpu_notify_event(vm_vcpu(vm, i)); 1196 } 1197 1198 return (0); 1199 } 1200 1201 void 1202 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 1203 { 1204 struct vm *vm = vcpu->vm; 1205 struct vm_exit *vmexit; 1206 1207 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1208 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1209 1210 vmexit = vm_exitinfo(vcpu); 1211 vmexit->pc = pc; 1212 vmexit->inst_length = 4; 1213 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1214 vmexit->u.suspended.how = vm->suspend; 1215 } 1216 1217 void 1218 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 1219 { 1220 struct vm_exit *vmexit; 1221 1222 vmexit = vm_exitinfo(vcpu); 1223 vmexit->pc = pc; 1224 vmexit->inst_length = 4; 1225 vmexit->exitcode = VM_EXITCODE_DEBUG; 1226 } 1227 1228 int 1229 vm_activate_cpu(struct vcpu *vcpu) 1230 { 1231 struct vm *vm = vcpu->vm; 1232 1233 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1234 return (EBUSY); 1235 1236 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 1237 return (0); 1238 1239 } 1240 1241 int 1242 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 1243 { 1244 if (vcpu == NULL) { 1245 vm->debug_cpus = vm->active_cpus; 1246 for (int i = 0; i < vm->maxcpus; i++) { 1247 if (CPU_ISSET(i, &vm->active_cpus)) 1248 vcpu_notify_event(vm_vcpu(vm, i)); 1249 } 1250 } else { 1251 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1252 return (EINVAL); 1253 1254 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1255 vcpu_notify_event(vcpu); 1256 } 1257 return (0); 1258 } 1259 1260 int 1261 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 1262 { 1263 1264 if (vcpu == NULL) { 1265 CPU_ZERO(&vm->debug_cpus); 1266 } else { 1267 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1268 return (EINVAL); 1269 1270 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1271 } 1272 return (0); 1273 } 1274 1275 int 1276 vcpu_debugged(struct vcpu *vcpu) 1277 { 1278 1279 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1280 } 1281 1282 cpuset_t 1283 vm_active_cpus(struct vm *vm) 1284 { 1285 1286 return (vm->active_cpus); 1287 } 1288 1289 cpuset_t 1290 vm_debug_cpus(struct vm *vm) 1291 { 1292 1293 return (vm->debug_cpus); 1294 } 1295 1296 cpuset_t 1297 vm_suspended_cpus(struct vm *vm) 1298 { 1299 1300 return (vm->suspended_cpus); 1301 } 1302 1303 1304 void * 1305 vcpu_stats(struct vcpu *vcpu) 1306 { 1307 1308 return (vcpu->stats); 1309 } 1310 1311 /* 1312 * This function is called to ensure that a vcpu "sees" a pending event 1313 * as soon as possible: 1314 * - If the vcpu thread is sleeping then it is woken up. 1315 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1316 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1317 */ 1318 static void 1319 vcpu_notify_event_locked(struct vcpu *vcpu) 1320 { 1321 int hostcpu; 1322 1323 hostcpu = vcpu->hostcpu; 1324 if (vcpu->state == VCPU_RUNNING) { 1325 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1326 if (hostcpu != curcpu) { 1327 ipi_cpu(hostcpu, vmm_ipinum); 1328 } else { 1329 /* 1330 * If the 'vcpu' is running on 'curcpu' then it must 1331 * be sending a notification to itself (e.g. SELF_IPI). 1332 * The pending event will be picked up when the vcpu 1333 * transitions back to guest context. 1334 */ 1335 } 1336 } else { 1337 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1338 "with hostcpu %d", vcpu->state, hostcpu)); 1339 if (vcpu->state == VCPU_SLEEPING) 1340 wakeup_one(vcpu); 1341 } 1342 } 1343 1344 void 1345 vcpu_notify_event(struct vcpu *vcpu) 1346 { 1347 vcpu_lock(vcpu); 1348 vcpu_notify_event_locked(vcpu); 1349 vcpu_unlock(vcpu); 1350 } 1351 1352 static void 1353 restore_guest_fpustate(struct vcpu *vcpu) 1354 { 1355 1356 /* flush host state to the pcb */ 1357 vfp_save_state(curthread, curthread->td_pcb); 1358 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1359 PCPU_SET(fpcurthread, NULL); 1360 1361 /* restore guest FPU state */ 1362 vfp_enable(); 1363 vfp_restore(vcpu->guestfpu); 1364 1365 /* 1366 * The FPU is now "dirty" with the guest's state so turn on emulation 1367 * to trap any access to the FPU by the host. 1368 */ 1369 vfp_disable(); 1370 } 1371 1372 static void 1373 save_guest_fpustate(struct vcpu *vcpu) 1374 { 1375 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1376 CPACR_FPEN_TRAP_ALL1) 1377 panic("VFP not enabled in host!"); 1378 1379 /* save guest FPU state */ 1380 vfp_enable(); 1381 vfp_store(vcpu->guestfpu); 1382 vfp_disable(); 1383 1384 KASSERT(PCPU_GET(fpcurthread) == NULL, 1385 ("%s: fpcurthread set with guest registers", __func__)); 1386 } 1387 static int 1388 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1389 bool from_idle) 1390 { 1391 int error; 1392 1393 vcpu_assert_locked(vcpu); 1394 1395 /* 1396 * State transitions from the vmmdev_ioctl() must always begin from 1397 * the VCPU_IDLE state. This guarantees that there is only a single 1398 * ioctl() operating on a vcpu at any point. 1399 */ 1400 if (from_idle) { 1401 while (vcpu->state != VCPU_IDLE) { 1402 vcpu_notify_event_locked(vcpu); 1403 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1404 } 1405 } else { 1406 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1407 "vcpu idle state")); 1408 } 1409 1410 if (vcpu->state == VCPU_RUNNING) { 1411 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1412 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1413 } else { 1414 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1415 "vcpu that is not running", vcpu->hostcpu)); 1416 } 1417 1418 /* 1419 * The following state transitions are allowed: 1420 * IDLE -> FROZEN -> IDLE 1421 * FROZEN -> RUNNING -> FROZEN 1422 * FROZEN -> SLEEPING -> FROZEN 1423 */ 1424 switch (vcpu->state) { 1425 case VCPU_IDLE: 1426 case VCPU_RUNNING: 1427 case VCPU_SLEEPING: 1428 error = (newstate != VCPU_FROZEN); 1429 break; 1430 case VCPU_FROZEN: 1431 error = (newstate == VCPU_FROZEN); 1432 break; 1433 default: 1434 error = 1; 1435 break; 1436 } 1437 1438 if (error) 1439 return (EBUSY); 1440 1441 vcpu->state = newstate; 1442 if (newstate == VCPU_RUNNING) 1443 vcpu->hostcpu = curcpu; 1444 else 1445 vcpu->hostcpu = NOCPU; 1446 1447 if (newstate == VCPU_IDLE) 1448 wakeup(&vcpu->state); 1449 1450 return (0); 1451 } 1452 1453 static void 1454 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1455 { 1456 int error; 1457 1458 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1459 panic("Error %d setting state to %d\n", error, newstate); 1460 } 1461 1462 static void 1463 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1464 { 1465 int error; 1466 1467 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1468 panic("Error %d setting state to %d", error, newstate); 1469 } 1470 1471 int 1472 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1473 { 1474 if (type < 0 || type >= VM_CAP_MAX) 1475 return (EINVAL); 1476 1477 return (vmmops_getcap(vcpu->cookie, type, retval)); 1478 } 1479 1480 int 1481 vm_set_capability(struct vcpu *vcpu, int type, int val) 1482 { 1483 if (type < 0 || type >= VM_CAP_MAX) 1484 return (EINVAL); 1485 1486 return (vmmops_setcap(vcpu->cookie, type, val)); 1487 } 1488 1489 struct vm * 1490 vcpu_vm(struct vcpu *vcpu) 1491 { 1492 return (vcpu->vm); 1493 } 1494 1495 int 1496 vcpu_vcpuid(struct vcpu *vcpu) 1497 { 1498 return (vcpu->vcpuid); 1499 } 1500 1501 void * 1502 vcpu_get_cookie(struct vcpu *vcpu) 1503 { 1504 return (vcpu->cookie); 1505 } 1506 1507 struct vcpu * 1508 vm_vcpu(struct vm *vm, int vcpuid) 1509 { 1510 return (vm->vcpu[vcpuid]); 1511 } 1512 1513 int 1514 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1515 { 1516 int error; 1517 1518 vcpu_lock(vcpu); 1519 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1520 vcpu_unlock(vcpu); 1521 1522 return (error); 1523 } 1524 1525 enum vcpu_state 1526 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1527 { 1528 enum vcpu_state state; 1529 1530 vcpu_lock(vcpu); 1531 state = vcpu->state; 1532 if (hostcpu != NULL) 1533 *hostcpu = vcpu->hostcpu; 1534 vcpu_unlock(vcpu); 1535 1536 return (state); 1537 } 1538 1539 static void * 1540 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1541 void **cookie) 1542 { 1543 int i, count, pageoff; 1544 struct mem_map *mm; 1545 vm_page_t m; 1546 1547 pageoff = gpa & PAGE_MASK; 1548 if (len > PAGE_SIZE - pageoff) 1549 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1550 1551 count = 0; 1552 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1553 mm = &vm->mem_maps[i]; 1554 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1555 gpa < mm->gpa + mm->len) { 1556 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1557 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1558 break; 1559 } 1560 } 1561 1562 if (count == 1) { 1563 *cookie = m; 1564 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1565 } else { 1566 *cookie = NULL; 1567 return (NULL); 1568 } 1569 } 1570 1571 void * 1572 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1573 void **cookie) 1574 { 1575 #ifdef INVARIANTS 1576 /* 1577 * The current vcpu should be frozen to ensure 'vm_memmap[]' 1578 * stability. 1579 */ 1580 int state = vcpu_get_state(vcpu, NULL); 1581 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1582 __func__, state)); 1583 #endif 1584 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1585 } 1586 1587 void * 1588 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1589 void **cookie) 1590 { 1591 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1592 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1593 } 1594 1595 void 1596 vm_gpa_release(void *cookie) 1597 { 1598 vm_page_t m = cookie; 1599 1600 vm_page_unwire(m, PQ_ACTIVE); 1601 } 1602 1603 int 1604 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1605 { 1606 1607 if (reg >= VM_REG_LAST) 1608 return (EINVAL); 1609 1610 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1611 } 1612 1613 int 1614 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1615 { 1616 int error; 1617 1618 if (reg >= VM_REG_LAST) 1619 return (EINVAL); 1620 error = vmmops_setreg(vcpu->cookie, reg, val); 1621 if (error || reg != VM_REG_GUEST_PC) 1622 return (error); 1623 1624 vcpu->nextpc = val; 1625 1626 return (0); 1627 } 1628 1629 void * 1630 vm_get_cookie(struct vm *vm) 1631 { 1632 return (vm->cookie); 1633 } 1634 1635 int 1636 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1637 { 1638 return (vmmops_exception(vcpu->cookie, esr, far)); 1639 } 1640 1641 int 1642 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1643 { 1644 return (vgic_attach_to_vm(vm->cookie, descr)); 1645 } 1646 1647 int 1648 vm_assert_irq(struct vm *vm, uint32_t irq) 1649 { 1650 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1651 } 1652 1653 int 1654 vm_deassert_irq(struct vm *vm, uint32_t irq) 1655 { 1656 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1657 } 1658 1659 int 1660 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1661 int func) 1662 { 1663 /* TODO: Should we raise an SError? */ 1664 return (vgic_inject_msi(vm->cookie, msg, addr)); 1665 } 1666 1667 static int 1668 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1669 { 1670 struct hypctx *hypctx; 1671 int i; 1672 1673 hypctx = vcpu_get_cookie(vcpu); 1674 1675 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1676 return (1); 1677 1678 vme->exitcode = VM_EXITCODE_SMCCC; 1679 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1680 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1681 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1682 1683 *retu = true; 1684 return (0); 1685 } 1686 1687 static int 1688 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1689 { 1690 vcpu_lock(vcpu); 1691 while (1) { 1692 if (vgic_has_pending_irq(vcpu->cookie)) 1693 break; 1694 1695 if (vcpu_should_yield(vcpu)) 1696 break; 1697 1698 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1699 /* 1700 * XXX msleep_spin() cannot be interrupted by signals so 1701 * wake up periodically to check pending signals. 1702 */ 1703 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1704 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1705 } 1706 vcpu_unlock(vcpu); 1707 1708 *retu = false; 1709 return (0); 1710 } 1711 1712 static int 1713 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1714 { 1715 struct vm *vm = vcpu->vm; 1716 struct vm_exit *vme; 1717 struct vm_map *map; 1718 uint64_t addr, esr; 1719 pmap_t pmap; 1720 int ftype, rv; 1721 1722 vme = &vcpu->exitinfo; 1723 1724 pmap = vmspace_pmap(vcpu->vm->vmspace); 1725 addr = vme->u.paging.gpa; 1726 esr = vme->u.paging.esr; 1727 1728 /* The page exists, but the page table needs to be updated. */ 1729 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1730 return (0); 1731 1732 switch (ESR_ELx_EXCEPTION(esr)) { 1733 case EXCP_INSN_ABORT_L: 1734 case EXCP_DATA_ABORT_L: 1735 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1736 break; 1737 default: 1738 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1739 } 1740 1741 map = &vm->vmspace->vm_map; 1742 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1743 if (rv != KERN_SUCCESS) 1744 return (EFAULT); 1745 1746 return (0); 1747 } 1748 1749 static int 1750 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1751 { 1752 struct vm *vm = vcpu->vm; 1753 int error, i; 1754 struct thread *td; 1755 1756 error = 0; 1757 td = curthread; 1758 1759 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1760 1761 /* 1762 * Wait until all 'active_cpus' have suspended themselves. 1763 * 1764 * Since a VM may be suspended at any time including when one or 1765 * more vcpus are doing a rendezvous we need to call the rendezvous 1766 * handler while we are waiting to prevent a deadlock. 1767 */ 1768 vcpu_lock(vcpu); 1769 while (error == 0) { 1770 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1771 break; 1772 1773 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1774 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1775 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1776 if (td_ast_pending(td, TDA_SUSPEND)) { 1777 vcpu_unlock(vcpu); 1778 error = thread_check_susp(td, false); 1779 vcpu_lock(vcpu); 1780 } 1781 } 1782 vcpu_unlock(vcpu); 1783 1784 /* 1785 * Wakeup the other sleeping vcpus and return to userspace. 1786 */ 1787 for (i = 0; i < vm->maxcpus; i++) { 1788 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1789 vcpu_notify_event(vm_vcpu(vm, i)); 1790 } 1791 } 1792 1793 *retu = true; 1794 return (error); 1795 } 1796 1797 int 1798 vm_run(struct vcpu *vcpu) 1799 { 1800 struct vm *vm = vcpu->vm; 1801 struct vm_eventinfo evinfo; 1802 int error, vcpuid; 1803 struct vm_exit *vme; 1804 bool retu; 1805 pmap_t pmap; 1806 1807 vcpuid = vcpu->vcpuid; 1808 1809 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1810 return (EINVAL); 1811 1812 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1813 return (EINVAL); 1814 1815 pmap = vmspace_pmap(vm->vmspace); 1816 vme = &vcpu->exitinfo; 1817 evinfo.rptr = NULL; 1818 evinfo.sptr = &vm->suspend; 1819 evinfo.iptr = NULL; 1820 restart: 1821 critical_enter(); 1822 1823 restore_guest_fpustate(vcpu); 1824 1825 vcpu_require_state(vcpu, VCPU_RUNNING); 1826 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1827 vcpu_require_state(vcpu, VCPU_FROZEN); 1828 1829 save_guest_fpustate(vcpu); 1830 1831 critical_exit(); 1832 1833 if (error == 0) { 1834 retu = false; 1835 switch (vme->exitcode) { 1836 case VM_EXITCODE_INST_EMUL: 1837 vcpu->nextpc = vme->pc + vme->inst_length; 1838 error = vm_handle_inst_emul(vcpu, &retu); 1839 break; 1840 1841 case VM_EXITCODE_REG_EMUL: 1842 vcpu->nextpc = vme->pc + vme->inst_length; 1843 error = vm_handle_reg_emul(vcpu, &retu); 1844 break; 1845 1846 case VM_EXITCODE_HVC: 1847 /* 1848 * The HVC instruction saves the address for the 1849 * next instruction as the return address. 1850 */ 1851 vcpu->nextpc = vme->pc; 1852 /* 1853 * The PSCI call can change the exit information in the 1854 * case of suspend/reset/poweroff/cpu off/cpu on. 1855 */ 1856 error = vm_handle_smccc_call(vcpu, vme, &retu); 1857 break; 1858 1859 case VM_EXITCODE_WFI: 1860 vcpu->nextpc = vme->pc + vme->inst_length; 1861 error = vm_handle_wfi(vcpu, vme, &retu); 1862 break; 1863 1864 case VM_EXITCODE_PAGING: 1865 vcpu->nextpc = vme->pc; 1866 error = vm_handle_paging(vcpu, &retu); 1867 break; 1868 1869 case VM_EXITCODE_SUSPENDED: 1870 vcpu->nextpc = vme->pc; 1871 error = vm_handle_suspend(vcpu, &retu); 1872 break; 1873 1874 default: 1875 /* Handle in userland */ 1876 vcpu->nextpc = vme->pc; 1877 retu = true; 1878 break; 1879 } 1880 } 1881 1882 if (error == 0 && retu == false) 1883 goto restart; 1884 1885 return (error); 1886 } 1887