1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_stat.h> 69 70 #include "arm64.h" 71 #include "mmu.h" 72 73 #include "io/vgic.h" 74 #include "io/vtimer.h" 75 76 struct vcpu { 77 int flags; 78 enum vcpu_state state; 79 struct mtx mtx; 80 int hostcpu; /* host cpuid this vcpu last ran on */ 81 int vcpuid; 82 void *stats; 83 struct vm_exit exitinfo; 84 uint64_t nextpc; /* (x) next instruction to execute */ 85 struct vm *vm; /* (o) */ 86 void *cookie; /* (i) cpu-specific data */ 87 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 88 }; 89 90 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 91 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 92 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 93 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 94 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 95 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 96 97 struct mem_seg { 98 uint64_t gpa; 99 size_t len; 100 bool wired; 101 bool sysmem; 102 vm_object_t object; 103 }; 104 #define VM_MAX_MEMSEGS 3 105 106 struct mem_map { 107 vm_paddr_t gpa; 108 size_t len; 109 vm_ooffset_t segoff; 110 int segid; 111 int prot; 112 int flags; 113 }; 114 #define VM_MAX_MEMMAPS 4 115 116 struct vmm_mmio_region { 117 uint64_t start; 118 uint64_t end; 119 mem_region_read_t read; 120 mem_region_write_t write; 121 }; 122 #define VM_MAX_MMIO_REGIONS 4 123 124 struct vmm_special_reg { 125 uint32_t esr_iss; 126 uint32_t esr_mask; 127 reg_read_t reg_read; 128 reg_write_t reg_write; 129 void *arg; 130 }; 131 #define VM_MAX_SPECIAL_REGS 16 132 133 /* 134 * Initialization: 135 * (o) initialized the first time the VM is created 136 * (i) initialized when VM is created and when it is reinitialized 137 * (x) initialized before use 138 */ 139 struct vm { 140 void *cookie; /* (i) cpu-specific data */ 141 volatile cpuset_t active_cpus; /* (i) active vcpus */ 142 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 143 int suspend; /* (i) stop VM execution */ 144 bool dying; /* (o) is dying */ 145 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 146 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 147 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 148 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 149 struct vmspace *vmspace; /* (o) guest's address space */ 150 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 151 struct vcpu **vcpu; /* (i) guest vcpus */ 152 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 153 /* (o) guest MMIO regions */ 154 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 155 /* The following describe the vm cpu topology */ 156 uint16_t sockets; /* (o) num of sockets */ 157 uint16_t cores; /* (o) num of cores/socket */ 158 uint16_t threads; /* (o) num of threads/core */ 159 uint16_t maxcpus; /* (o) max pluggable cpus */ 160 struct sx mem_segs_lock; /* (o) */ 161 struct sx vcpus_init_lock; /* (o) */ 162 }; 163 164 static bool vmm_initialized = false; 165 166 static int vm_handle_wfi(struct vcpu *vcpu, 167 struct vm_exit *vme, bool *retu); 168 169 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 170 171 /* statistics */ 172 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 173 174 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 175 176 static int vmm_ipinum; 177 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 178 "IPI vector used for vcpu notifications"); 179 180 struct vmm_regs { 181 uint64_t id_aa64afr0; 182 uint64_t id_aa64afr1; 183 uint64_t id_aa64dfr0; 184 uint64_t id_aa64dfr1; 185 uint64_t id_aa64isar0; 186 uint64_t id_aa64isar1; 187 uint64_t id_aa64isar2; 188 uint64_t id_aa64mmfr0; 189 uint64_t id_aa64mmfr1; 190 uint64_t id_aa64mmfr2; 191 uint64_t id_aa64pfr0; 192 uint64_t id_aa64pfr1; 193 }; 194 195 static const struct vmm_regs vmm_arch_regs_masks = { 196 .id_aa64dfr0 = 197 ID_AA64DFR0_CTX_CMPs_MASK | 198 ID_AA64DFR0_WRPs_MASK | 199 ID_AA64DFR0_BRPs_MASK | 200 ID_AA64DFR0_PMUVer_3 | 201 ID_AA64DFR0_DebugVer_8, 202 .id_aa64isar0 = 203 ID_AA64ISAR0_TLB_TLBIOSR | 204 ID_AA64ISAR0_SHA3_IMPL | 205 ID_AA64ISAR0_RDM_IMPL | 206 ID_AA64ISAR0_Atomic_IMPL | 207 ID_AA64ISAR0_CRC32_BASE | 208 ID_AA64ISAR0_SHA2_512 | 209 ID_AA64ISAR0_SHA1_BASE | 210 ID_AA64ISAR0_AES_PMULL, 211 .id_aa64mmfr0 = 212 ID_AA64MMFR0_TGran4_IMPL | 213 ID_AA64MMFR0_TGran64_IMPL | 214 ID_AA64MMFR0_TGran16_IMPL | 215 ID_AA64MMFR0_ASIDBits_16 | 216 ID_AA64MMFR0_PARange_4P, 217 .id_aa64mmfr1 = 218 ID_AA64MMFR1_SpecSEI_IMPL | 219 ID_AA64MMFR1_PAN_ATS1E1 | 220 ID_AA64MMFR1_HAFDBS_AF, 221 .id_aa64pfr0 = 222 ID_AA64PFR0_GIC_CPUIF_NONE | 223 ID_AA64PFR0_AdvSIMD_HP | 224 ID_AA64PFR0_FP_HP | 225 ID_AA64PFR0_EL3_64 | 226 ID_AA64PFR0_EL2_64 | 227 ID_AA64PFR0_EL1_64 | 228 ID_AA64PFR0_EL0_64, 229 }; 230 231 /* Host registers masked by vmm_arch_regs_masks. */ 232 static struct vmm_regs vmm_arch_regs; 233 234 u_int vm_maxcpu; 235 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 236 &vm_maxcpu, 0, "Maximum number of vCPUs"); 237 238 static void vm_free_memmap(struct vm *vm, int ident); 239 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 240 static void vcpu_notify_event_locked(struct vcpu *vcpu); 241 242 /* global statistics */ 243 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 244 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 245 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 246 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 247 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 248 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 249 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 250 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 251 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 252 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 253 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 254 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 255 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 256 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 257 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 258 259 /* 260 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 261 * is a safe value for now. 262 */ 263 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 264 265 static int 266 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 267 { 268 #define _FETCH_KERN_REG(reg, field) do { \ 269 regs->field = vmm_arch_regs_masks.field; \ 270 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ 271 regs->field = 0; \ 272 } while (0) 273 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 274 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 275 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 276 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 277 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 278 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 279 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 280 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 281 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 282 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 283 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 284 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 285 #undef _FETCH_KERN_REG 286 return (0); 287 } 288 289 static void 290 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 291 { 292 vmmops_vcpu_cleanup(vcpu->cookie); 293 vcpu->cookie = NULL; 294 if (destroy) { 295 vmm_stat_free(vcpu->stats); 296 fpu_save_area_free(vcpu->guestfpu); 297 vcpu_lock_destroy(vcpu); 298 } 299 } 300 301 static struct vcpu * 302 vcpu_alloc(struct vm *vm, int vcpu_id) 303 { 304 struct vcpu *vcpu; 305 306 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 307 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 308 309 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 310 vcpu_lock_init(vcpu); 311 vcpu->state = VCPU_IDLE; 312 vcpu->hostcpu = NOCPU; 313 vcpu->vcpuid = vcpu_id; 314 vcpu->vm = vm; 315 vcpu->guestfpu = fpu_save_area_alloc(); 316 vcpu->stats = vmm_stat_alloc(); 317 return (vcpu); 318 } 319 320 static void 321 vcpu_init(struct vcpu *vcpu) 322 { 323 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 324 MPASS(vcpu->cookie != NULL); 325 fpu_save_area_reset(vcpu->guestfpu); 326 vmm_stat_init(vcpu->stats); 327 } 328 329 struct vm_exit * 330 vm_exitinfo(struct vcpu *vcpu) 331 { 332 return (&vcpu->exitinfo); 333 } 334 335 static int 336 vmm_init(void) 337 { 338 int error; 339 340 vm_maxcpu = mp_ncpus; 341 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 342 343 if (vm_maxcpu > VM_MAXCPU) { 344 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 345 vm_maxcpu = VM_MAXCPU; 346 } 347 if (vm_maxcpu == 0) 348 vm_maxcpu = 1; 349 350 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 351 if (error != 0) 352 return (error); 353 354 return (vmmops_modinit(0)); 355 } 356 357 static int 358 vmm_handler(module_t mod, int what, void *arg) 359 { 360 int error; 361 362 switch (what) { 363 case MOD_LOAD: 364 /* TODO: if (vmm_is_hw_supported()) { */ 365 vmmdev_init(); 366 error = vmm_init(); 367 if (error == 0) 368 vmm_initialized = true; 369 break; 370 case MOD_UNLOAD: 371 /* TODO: if (vmm_is_hw_supported()) { */ 372 error = vmmdev_cleanup(); 373 if (error == 0 && vmm_initialized) { 374 error = vmmops_modcleanup(); 375 if (error) 376 vmm_initialized = false; 377 } 378 break; 379 default: 380 error = 0; 381 break; 382 } 383 return (error); 384 } 385 386 static moduledata_t vmm_kmod = { 387 "vmm", 388 vmm_handler, 389 NULL 390 }; 391 392 /* 393 * vmm initialization has the following dependencies: 394 * 395 * - HYP initialization requires smp_rendezvous() and therefore must happen 396 * after SMP is fully functional (after SI_SUB_SMP). 397 */ 398 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 399 MODULE_VERSION(vmm, 1); 400 401 static void 402 vm_init(struct vm *vm, bool create) 403 { 404 int i; 405 406 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 407 MPASS(vm->cookie != NULL); 408 409 CPU_ZERO(&vm->active_cpus); 410 CPU_ZERO(&vm->debug_cpus); 411 412 vm->suspend = 0; 413 CPU_ZERO(&vm->suspended_cpus); 414 415 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 416 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 417 418 if (!create) { 419 for (i = 0; i < vm->maxcpus; i++) { 420 if (vm->vcpu[i] != NULL) 421 vcpu_init(vm->vcpu[i]); 422 } 423 } 424 } 425 426 void 427 vm_disable_vcpu_creation(struct vm *vm) 428 { 429 sx_xlock(&vm->vcpus_init_lock); 430 vm->dying = true; 431 sx_xunlock(&vm->vcpus_init_lock); 432 } 433 434 struct vcpu * 435 vm_alloc_vcpu(struct vm *vm, int vcpuid) 436 { 437 struct vcpu *vcpu; 438 439 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 440 return (NULL); 441 442 /* Some interrupt controllers may have a CPU limit */ 443 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 444 return (NULL); 445 446 vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); 447 if (__predict_true(vcpu != NULL)) 448 return (vcpu); 449 450 sx_xlock(&vm->vcpus_init_lock); 451 vcpu = vm->vcpu[vcpuid]; 452 if (vcpu == NULL && !vm->dying) { 453 vcpu = vcpu_alloc(vm, vcpuid); 454 vcpu_init(vcpu); 455 456 /* 457 * Ensure vCPU is fully created before updating pointer 458 * to permit unlocked reads above. 459 */ 460 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 461 (uintptr_t)vcpu); 462 } 463 sx_xunlock(&vm->vcpus_init_lock); 464 return (vcpu); 465 } 466 467 void 468 vm_slock_vcpus(struct vm *vm) 469 { 470 sx_slock(&vm->vcpus_init_lock); 471 } 472 473 void 474 vm_unlock_vcpus(struct vm *vm) 475 { 476 sx_unlock(&vm->vcpus_init_lock); 477 } 478 479 int 480 vm_create(const char *name, struct vm **retvm) 481 { 482 struct vm *vm; 483 struct vmspace *vmspace; 484 485 /* 486 * If vmm.ko could not be successfully initialized then don't attempt 487 * to create the virtual machine. 488 */ 489 if (!vmm_initialized) 490 return (ENXIO); 491 492 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 493 return (EINVAL); 494 495 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 496 if (vmspace == NULL) 497 return (ENOMEM); 498 499 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 500 strcpy(vm->name, name); 501 vm->vmspace = vmspace; 502 sx_init(&vm->mem_segs_lock, "vm mem_segs"); 503 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 504 505 vm->sockets = 1; 506 vm->cores = 1; /* XXX backwards compatibility */ 507 vm->threads = 1; /* XXX backwards compatibility */ 508 vm->maxcpus = vm_maxcpu; 509 510 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 511 M_WAITOK | M_ZERO); 512 513 vm_init(vm, true); 514 515 *retvm = vm; 516 return (0); 517 } 518 519 void 520 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 521 uint16_t *threads, uint16_t *maxcpus) 522 { 523 *sockets = vm->sockets; 524 *cores = vm->cores; 525 *threads = vm->threads; 526 *maxcpus = vm->maxcpus; 527 } 528 529 uint16_t 530 vm_get_maxcpus(struct vm *vm) 531 { 532 return (vm->maxcpus); 533 } 534 535 int 536 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 537 uint16_t threads, uint16_t maxcpus) 538 { 539 /* Ignore maxcpus. */ 540 if ((sockets * cores * threads) > vm->maxcpus) 541 return (EINVAL); 542 vm->sockets = sockets; 543 vm->cores = cores; 544 vm->threads = threads; 545 return(0); 546 } 547 548 static void 549 vm_cleanup(struct vm *vm, bool destroy) 550 { 551 struct mem_map *mm; 552 pmap_t pmap __diagused; 553 int i; 554 555 if (destroy) { 556 pmap = vmspace_pmap(vm->vmspace); 557 sched_pin(); 558 PCPU_SET(curvmpmap, NULL); 559 sched_unpin(); 560 CPU_FOREACH(i) { 561 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 562 } 563 } 564 565 vgic_detach_from_vm(vm->cookie); 566 567 for (i = 0; i < vm->maxcpus; i++) { 568 if (vm->vcpu[i] != NULL) 569 vcpu_cleanup(vm->vcpu[i], destroy); 570 } 571 572 vmmops_cleanup(vm->cookie); 573 574 /* 575 * System memory is removed from the guest address space only when 576 * the VM is destroyed. This is because the mapping remains the same 577 * across VM reset. 578 * 579 * Device memory can be relocated by the guest (e.g. using PCI BARs) 580 * so those mappings are removed on a VM reset. 581 */ 582 if (!destroy) { 583 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 584 mm = &vm->mem_maps[i]; 585 if (destroy || !sysmem_mapping(vm, mm)) 586 vm_free_memmap(vm, i); 587 } 588 } 589 590 if (destroy) { 591 for (i = 0; i < VM_MAX_MEMSEGS; i++) 592 vm_free_memseg(vm, i); 593 594 vmmops_vmspace_free(vm->vmspace); 595 vm->vmspace = NULL; 596 597 for (i = 0; i < vm->maxcpus; i++) 598 free(vm->vcpu[i], M_VMM); 599 free(vm->vcpu, M_VMM); 600 sx_destroy(&vm->vcpus_init_lock); 601 sx_destroy(&vm->mem_segs_lock); 602 } 603 } 604 605 void 606 vm_destroy(struct vm *vm) 607 { 608 vm_cleanup(vm, true); 609 free(vm, M_VMM); 610 } 611 612 int 613 vm_reinit(struct vm *vm) 614 { 615 int error; 616 617 /* 618 * A virtual machine can be reset only if all vcpus are suspended. 619 */ 620 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 621 vm_cleanup(vm, false); 622 vm_init(vm, false); 623 error = 0; 624 } else { 625 error = EBUSY; 626 } 627 628 return (error); 629 } 630 631 const char * 632 vm_name(struct vm *vm) 633 { 634 return (vm->name); 635 } 636 637 void 638 vm_slock_memsegs(struct vm *vm) 639 { 640 sx_slock(&vm->mem_segs_lock); 641 } 642 643 void 644 vm_xlock_memsegs(struct vm *vm) 645 { 646 sx_xlock(&vm->mem_segs_lock); 647 } 648 649 void 650 vm_unlock_memsegs(struct vm *vm) 651 { 652 sx_unlock(&vm->mem_segs_lock); 653 } 654 655 /* 656 * Return 'true' if 'gpa' is allocated in the guest address space. 657 * 658 * This function is called in the context of a running vcpu which acts as 659 * an implicit lock on 'vm->mem_maps[]'. 660 */ 661 bool 662 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 663 { 664 struct vm *vm = vcpu->vm; 665 struct mem_map *mm; 666 int i; 667 668 #ifdef INVARIANTS 669 int hostcpu, state; 670 state = vcpu_get_state(vcpu, &hostcpu); 671 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 672 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 673 #endif 674 675 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 676 mm = &vm->mem_maps[i]; 677 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 678 return (true); /* 'gpa' is sysmem or devmem */ 679 } 680 681 return (false); 682 } 683 684 int 685 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 686 { 687 struct mem_seg *seg; 688 vm_object_t obj; 689 690 sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 691 692 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 693 return (EINVAL); 694 695 if (len == 0 || (len & PAGE_MASK)) 696 return (EINVAL); 697 698 seg = &vm->mem_segs[ident]; 699 if (seg->object != NULL) { 700 if (seg->len == len && seg->sysmem == sysmem) 701 return (EEXIST); 702 else 703 return (EINVAL); 704 } 705 706 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 707 if (obj == NULL) 708 return (ENOMEM); 709 710 seg->len = len; 711 seg->object = obj; 712 seg->sysmem = sysmem; 713 return (0); 714 } 715 716 int 717 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 718 vm_object_t *objptr) 719 { 720 struct mem_seg *seg; 721 722 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 723 724 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 725 return (EINVAL); 726 727 seg = &vm->mem_segs[ident]; 728 if (len) 729 *len = seg->len; 730 if (sysmem) 731 *sysmem = seg->sysmem; 732 if (objptr) 733 *objptr = seg->object; 734 return (0); 735 } 736 737 void 738 vm_free_memseg(struct vm *vm, int ident) 739 { 740 struct mem_seg *seg; 741 742 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 743 ("%s: invalid memseg ident %d", __func__, ident)); 744 745 seg = &vm->mem_segs[ident]; 746 if (seg->object != NULL) { 747 vm_object_deallocate(seg->object); 748 bzero(seg, sizeof(struct mem_seg)); 749 } 750 } 751 752 int 753 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 754 size_t len, int prot, int flags) 755 { 756 struct mem_seg *seg; 757 struct mem_map *m, *map; 758 vm_ooffset_t last; 759 int i, error; 760 761 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 762 return (EINVAL); 763 764 if (flags & ~VM_MEMMAP_F_WIRED) 765 return (EINVAL); 766 767 if (segid < 0 || segid >= VM_MAX_MEMSEGS) 768 return (EINVAL); 769 770 seg = &vm->mem_segs[segid]; 771 if (seg->object == NULL) 772 return (EINVAL); 773 774 last = first + len; 775 if (first < 0 || first >= last || last > seg->len) 776 return (EINVAL); 777 778 if ((gpa | first | last) & PAGE_MASK) 779 return (EINVAL); 780 781 map = NULL; 782 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 783 m = &vm->mem_maps[i]; 784 if (m->len == 0) { 785 map = m; 786 break; 787 } 788 } 789 790 if (map == NULL) 791 return (ENOSPC); 792 793 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 794 len, 0, VMFS_NO_SPACE, prot, prot, 0); 795 if (error != KERN_SUCCESS) 796 return (EFAULT); 797 798 vm_object_reference(seg->object); 799 800 if (flags & VM_MEMMAP_F_WIRED) { 801 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 802 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 803 if (error != KERN_SUCCESS) { 804 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 805 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 806 EFAULT); 807 } 808 } 809 810 map->gpa = gpa; 811 map->len = len; 812 map->segoff = first; 813 map->segid = segid; 814 map->prot = prot; 815 map->flags = flags; 816 return (0); 817 } 818 819 int 820 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 821 { 822 struct mem_map *m; 823 int i; 824 825 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 826 m = &vm->mem_maps[i]; 827 if (m->gpa == gpa && m->len == len) { 828 vm_free_memmap(vm, i); 829 return (0); 830 } 831 } 832 833 return (EINVAL); 834 } 835 836 int 837 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 838 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 839 { 840 struct mem_map *mm, *mmnext; 841 int i; 842 843 mmnext = NULL; 844 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 845 mm = &vm->mem_maps[i]; 846 if (mm->len == 0 || mm->gpa < *gpa) 847 continue; 848 if (mmnext == NULL || mm->gpa < mmnext->gpa) 849 mmnext = mm; 850 } 851 852 if (mmnext != NULL) { 853 *gpa = mmnext->gpa; 854 if (segid) 855 *segid = mmnext->segid; 856 if (segoff) 857 *segoff = mmnext->segoff; 858 if (len) 859 *len = mmnext->len; 860 if (prot) 861 *prot = mmnext->prot; 862 if (flags) 863 *flags = mmnext->flags; 864 return (0); 865 } else { 866 return (ENOENT); 867 } 868 } 869 870 static void 871 vm_free_memmap(struct vm *vm, int ident) 872 { 873 struct mem_map *mm; 874 int error __diagused; 875 876 mm = &vm->mem_maps[ident]; 877 if (mm->len) { 878 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 879 mm->gpa + mm->len); 880 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 881 __func__, error)); 882 bzero(mm, sizeof(struct mem_map)); 883 } 884 } 885 886 static __inline bool 887 sysmem_mapping(struct vm *vm, struct mem_map *mm) 888 { 889 890 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 891 return (true); 892 else 893 return (false); 894 } 895 896 vm_paddr_t 897 vmm_sysmem_maxaddr(struct vm *vm) 898 { 899 struct mem_map *mm; 900 vm_paddr_t maxaddr; 901 int i; 902 903 maxaddr = 0; 904 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 905 mm = &vm->mem_maps[i]; 906 if (sysmem_mapping(vm, mm)) { 907 if (maxaddr < mm->gpa + mm->len) 908 maxaddr = mm->gpa + mm->len; 909 } 910 } 911 return (maxaddr); 912 } 913 914 int 915 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 916 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 917 { 918 919 vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 920 return (0); 921 } 922 923 static int 924 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 925 { 926 *rval = 0; 927 return (0); 928 } 929 930 static int 931 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 932 { 933 *rval = *(uint64_t *)arg; 934 return (0); 935 } 936 937 static int 938 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 939 { 940 return (0); 941 } 942 943 static const struct vmm_special_reg vmm_special_regs[] = { 944 #define SPECIAL_REG(_reg, _read, _write) \ 945 { \ 946 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 947 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 948 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 949 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 950 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 951 .esr_mask = ISS_MSR_REG_MASK, \ 952 .reg_read = (_read), \ 953 .reg_write = (_write), \ 954 .arg = NULL, \ 955 } 956 #define ID_SPECIAL_REG(_reg, _name) \ 957 { \ 958 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 959 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 960 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 961 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 962 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 963 .esr_mask = ISS_MSR_REG_MASK, \ 964 .reg_read = vmm_reg_read_arg, \ 965 .reg_write = vmm_reg_wi, \ 966 .arg = &(vmm_arch_regs._name), \ 967 } 968 969 /* ID registers */ 970 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 971 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 972 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 973 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 974 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 975 976 /* 977 * All other ID registers are read as zero. 978 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 979 */ 980 { 981 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 982 (0 << ISS_MSR_OP1_SHIFT) | 983 (0 << ISS_MSR_CRn_SHIFT) | 984 (0 << ISS_MSR_CRm_SHIFT), 985 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 986 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 987 .reg_read = vmm_reg_raz, 988 .reg_write = vmm_reg_wi, 989 .arg = NULL, 990 }, 991 992 /* Counter physical registers */ 993 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 994 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 995 vtimer_phys_cval_write), 996 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 997 vtimer_phys_tval_write), 998 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 999 #undef SPECIAL_REG 1000 }; 1001 1002 void 1003 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 1004 reg_read_t reg_read, reg_write_t reg_write, void *arg) 1005 { 1006 int i; 1007 1008 for (i = 0; i < nitems(vm->special_reg); i++) { 1009 if (vm->special_reg[i].esr_iss == 0 && 1010 vm->special_reg[i].esr_mask == 0) { 1011 vm->special_reg[i].esr_iss = iss; 1012 vm->special_reg[i].esr_mask = mask; 1013 vm->special_reg[i].reg_read = reg_read; 1014 vm->special_reg[i].reg_write = reg_write; 1015 vm->special_reg[i].arg = arg; 1016 return; 1017 } 1018 } 1019 1020 panic("%s: No free special register slot", __func__); 1021 } 1022 1023 void 1024 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 1025 { 1026 int i; 1027 1028 for (i = 0; i < nitems(vm->special_reg); i++) { 1029 if (vm->special_reg[i].esr_iss == iss && 1030 vm->special_reg[i].esr_mask == mask) { 1031 memset(&vm->special_reg[i], 0, 1032 sizeof(vm->special_reg[i])); 1033 return; 1034 } 1035 } 1036 1037 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 1038 mask); 1039 } 1040 1041 static int 1042 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 1043 { 1044 struct vm *vm; 1045 struct vm_exit *vme; 1046 struct vre *vre; 1047 int i, rv; 1048 1049 vm = vcpu->vm; 1050 vme = &vcpu->exitinfo; 1051 vre = &vme->u.reg_emul.vre; 1052 1053 for (i = 0; i < nitems(vm->special_reg); i++) { 1054 if (vm->special_reg[i].esr_iss == 0 && 1055 vm->special_reg[i].esr_mask == 0) 1056 continue; 1057 1058 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 1059 vm->special_reg[i].esr_iss) { 1060 rv = vmm_emulate_register(vcpu, vre, 1061 vm->special_reg[i].reg_read, 1062 vm->special_reg[i].reg_write, 1063 vm->special_reg[i].arg); 1064 if (rv == 0) { 1065 *retu = false; 1066 } 1067 return (rv); 1068 } 1069 } 1070 for (i = 0; i < nitems(vmm_special_regs); i++) { 1071 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 1072 vmm_special_regs[i].esr_iss) { 1073 rv = vmm_emulate_register(vcpu, vre, 1074 vmm_special_regs[i].reg_read, 1075 vmm_special_regs[i].reg_write, 1076 vmm_special_regs[i].arg); 1077 if (rv == 0) { 1078 *retu = false; 1079 } 1080 return (rv); 1081 } 1082 } 1083 1084 1085 *retu = true; 1086 return (0); 1087 } 1088 1089 void 1090 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 1091 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 1092 { 1093 int i; 1094 1095 for (i = 0; i < nitems(vm->mmio_region); i++) { 1096 if (vm->mmio_region[i].start == 0 && 1097 vm->mmio_region[i].end == 0) { 1098 vm->mmio_region[i].start = start; 1099 vm->mmio_region[i].end = start + size; 1100 vm->mmio_region[i].read = mmio_read; 1101 vm->mmio_region[i].write = mmio_write; 1102 return; 1103 } 1104 } 1105 1106 panic("%s: No free MMIO region", __func__); 1107 } 1108 1109 void 1110 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 1111 { 1112 int i; 1113 1114 for (i = 0; i < nitems(vm->mmio_region); i++) { 1115 if (vm->mmio_region[i].start == start && 1116 vm->mmio_region[i].end == start + size) { 1117 memset(&vm->mmio_region[i], 0, 1118 sizeof(vm->mmio_region[i])); 1119 return; 1120 } 1121 } 1122 1123 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 1124 start + size); 1125 } 1126 1127 static int 1128 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 1129 { 1130 struct vm *vm; 1131 struct vm_exit *vme; 1132 struct vie *vie; 1133 struct hyp *hyp; 1134 uint64_t fault_ipa; 1135 struct vm_guest_paging *paging; 1136 struct vmm_mmio_region *vmr; 1137 int error, i; 1138 1139 vm = vcpu->vm; 1140 hyp = vm->cookie; 1141 if (!hyp->vgic_attached) 1142 goto out_user; 1143 1144 vme = &vcpu->exitinfo; 1145 vie = &vme->u.inst_emul.vie; 1146 paging = &vme->u.inst_emul.paging; 1147 1148 fault_ipa = vme->u.inst_emul.gpa; 1149 1150 vmr = NULL; 1151 for (i = 0; i < nitems(vm->mmio_region); i++) { 1152 if (vm->mmio_region[i].start <= fault_ipa && 1153 vm->mmio_region[i].end > fault_ipa) { 1154 vmr = &vm->mmio_region[i]; 1155 break; 1156 } 1157 } 1158 if (vmr == NULL) 1159 goto out_user; 1160 1161 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 1162 vmr->read, vmr->write, retu); 1163 return (error); 1164 1165 out_user: 1166 *retu = true; 1167 return (0); 1168 } 1169 1170 int 1171 vm_suspend(struct vm *vm, enum vm_suspend_how how) 1172 { 1173 int i; 1174 1175 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1176 return (EINVAL); 1177 1178 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1179 VM_CTR2(vm, "virtual machine already suspended %d/%d", 1180 vm->suspend, how); 1181 return (EALREADY); 1182 } 1183 1184 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1185 1186 /* 1187 * Notify all active vcpus that they are now suspended. 1188 */ 1189 for (i = 0; i < vm->maxcpus; i++) { 1190 if (CPU_ISSET(i, &vm->active_cpus)) 1191 vcpu_notify_event(vm_vcpu(vm, i)); 1192 } 1193 1194 return (0); 1195 } 1196 1197 void 1198 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 1199 { 1200 struct vm *vm = vcpu->vm; 1201 struct vm_exit *vmexit; 1202 1203 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1204 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1205 1206 vmexit = vm_exitinfo(vcpu); 1207 vmexit->pc = pc; 1208 vmexit->inst_length = 4; 1209 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1210 vmexit->u.suspended.how = vm->suspend; 1211 } 1212 1213 void 1214 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 1215 { 1216 struct vm_exit *vmexit; 1217 1218 vmexit = vm_exitinfo(vcpu); 1219 vmexit->pc = pc; 1220 vmexit->inst_length = 4; 1221 vmexit->exitcode = VM_EXITCODE_DEBUG; 1222 } 1223 1224 int 1225 vm_activate_cpu(struct vcpu *vcpu) 1226 { 1227 struct vm *vm = vcpu->vm; 1228 1229 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1230 return (EBUSY); 1231 1232 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 1233 return (0); 1234 1235 } 1236 1237 int 1238 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 1239 { 1240 if (vcpu == NULL) { 1241 vm->debug_cpus = vm->active_cpus; 1242 for (int i = 0; i < vm->maxcpus; i++) { 1243 if (CPU_ISSET(i, &vm->active_cpus)) 1244 vcpu_notify_event(vm_vcpu(vm, i)); 1245 } 1246 } else { 1247 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1248 return (EINVAL); 1249 1250 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1251 vcpu_notify_event(vcpu); 1252 } 1253 return (0); 1254 } 1255 1256 int 1257 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 1258 { 1259 1260 if (vcpu == NULL) { 1261 CPU_ZERO(&vm->debug_cpus); 1262 } else { 1263 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1264 return (EINVAL); 1265 1266 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1267 } 1268 return (0); 1269 } 1270 1271 int 1272 vcpu_debugged(struct vcpu *vcpu) 1273 { 1274 1275 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1276 } 1277 1278 cpuset_t 1279 vm_active_cpus(struct vm *vm) 1280 { 1281 1282 return (vm->active_cpus); 1283 } 1284 1285 cpuset_t 1286 vm_debug_cpus(struct vm *vm) 1287 { 1288 1289 return (vm->debug_cpus); 1290 } 1291 1292 cpuset_t 1293 vm_suspended_cpus(struct vm *vm) 1294 { 1295 1296 return (vm->suspended_cpus); 1297 } 1298 1299 1300 void * 1301 vcpu_stats(struct vcpu *vcpu) 1302 { 1303 1304 return (vcpu->stats); 1305 } 1306 1307 /* 1308 * This function is called to ensure that a vcpu "sees" a pending event 1309 * as soon as possible: 1310 * - If the vcpu thread is sleeping then it is woken up. 1311 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1312 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1313 */ 1314 static void 1315 vcpu_notify_event_locked(struct vcpu *vcpu) 1316 { 1317 int hostcpu; 1318 1319 hostcpu = vcpu->hostcpu; 1320 if (vcpu->state == VCPU_RUNNING) { 1321 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1322 if (hostcpu != curcpu) { 1323 ipi_cpu(hostcpu, vmm_ipinum); 1324 } else { 1325 /* 1326 * If the 'vcpu' is running on 'curcpu' then it must 1327 * be sending a notification to itself (e.g. SELF_IPI). 1328 * The pending event will be picked up when the vcpu 1329 * transitions back to guest context. 1330 */ 1331 } 1332 } else { 1333 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1334 "with hostcpu %d", vcpu->state, hostcpu)); 1335 if (vcpu->state == VCPU_SLEEPING) 1336 wakeup_one(vcpu); 1337 } 1338 } 1339 1340 void 1341 vcpu_notify_event(struct vcpu *vcpu) 1342 { 1343 vcpu_lock(vcpu); 1344 vcpu_notify_event_locked(vcpu); 1345 vcpu_unlock(vcpu); 1346 } 1347 1348 static void 1349 restore_guest_fpustate(struct vcpu *vcpu) 1350 { 1351 1352 /* flush host state to the pcb */ 1353 vfp_save_state(curthread, curthread->td_pcb); 1354 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1355 PCPU_SET(fpcurthread, NULL); 1356 1357 /* restore guest FPU state */ 1358 vfp_enable(); 1359 vfp_restore(vcpu->guestfpu); 1360 1361 /* 1362 * The FPU is now "dirty" with the guest's state so turn on emulation 1363 * to trap any access to the FPU by the host. 1364 */ 1365 vfp_disable(); 1366 } 1367 1368 static void 1369 save_guest_fpustate(struct vcpu *vcpu) 1370 { 1371 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1372 CPACR_FPEN_TRAP_ALL1) 1373 panic("VFP not enabled in host!"); 1374 1375 /* save guest FPU state */ 1376 vfp_enable(); 1377 vfp_store(vcpu->guestfpu); 1378 vfp_disable(); 1379 1380 KASSERT(PCPU_GET(fpcurthread) == NULL, 1381 ("%s: fpcurthread set with guest registers", __func__)); 1382 } 1383 static int 1384 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1385 bool from_idle) 1386 { 1387 int error; 1388 1389 vcpu_assert_locked(vcpu); 1390 1391 /* 1392 * State transitions from the vmmdev_ioctl() must always begin from 1393 * the VCPU_IDLE state. This guarantees that there is only a single 1394 * ioctl() operating on a vcpu at any point. 1395 */ 1396 if (from_idle) { 1397 while (vcpu->state != VCPU_IDLE) { 1398 vcpu_notify_event_locked(vcpu); 1399 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1400 } 1401 } else { 1402 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1403 "vcpu idle state")); 1404 } 1405 1406 if (vcpu->state == VCPU_RUNNING) { 1407 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1408 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1409 } else { 1410 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1411 "vcpu that is not running", vcpu->hostcpu)); 1412 } 1413 1414 /* 1415 * The following state transitions are allowed: 1416 * IDLE -> FROZEN -> IDLE 1417 * FROZEN -> RUNNING -> FROZEN 1418 * FROZEN -> SLEEPING -> FROZEN 1419 */ 1420 switch (vcpu->state) { 1421 case VCPU_IDLE: 1422 case VCPU_RUNNING: 1423 case VCPU_SLEEPING: 1424 error = (newstate != VCPU_FROZEN); 1425 break; 1426 case VCPU_FROZEN: 1427 error = (newstate == VCPU_FROZEN); 1428 break; 1429 default: 1430 error = 1; 1431 break; 1432 } 1433 1434 if (error) 1435 return (EBUSY); 1436 1437 vcpu->state = newstate; 1438 if (newstate == VCPU_RUNNING) 1439 vcpu->hostcpu = curcpu; 1440 else 1441 vcpu->hostcpu = NOCPU; 1442 1443 if (newstate == VCPU_IDLE) 1444 wakeup(&vcpu->state); 1445 1446 return (0); 1447 } 1448 1449 static void 1450 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1451 { 1452 int error; 1453 1454 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1455 panic("Error %d setting state to %d\n", error, newstate); 1456 } 1457 1458 static void 1459 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1460 { 1461 int error; 1462 1463 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1464 panic("Error %d setting state to %d", error, newstate); 1465 } 1466 1467 int 1468 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1469 { 1470 if (type < 0 || type >= VM_CAP_MAX) 1471 return (EINVAL); 1472 1473 return (vmmops_getcap(vcpu->cookie, type, retval)); 1474 } 1475 1476 int 1477 vm_set_capability(struct vcpu *vcpu, int type, int val) 1478 { 1479 if (type < 0 || type >= VM_CAP_MAX) 1480 return (EINVAL); 1481 1482 return (vmmops_setcap(vcpu->cookie, type, val)); 1483 } 1484 1485 struct vm * 1486 vcpu_vm(struct vcpu *vcpu) 1487 { 1488 return (vcpu->vm); 1489 } 1490 1491 int 1492 vcpu_vcpuid(struct vcpu *vcpu) 1493 { 1494 return (vcpu->vcpuid); 1495 } 1496 1497 void * 1498 vcpu_get_cookie(struct vcpu *vcpu) 1499 { 1500 return (vcpu->cookie); 1501 } 1502 1503 struct vcpu * 1504 vm_vcpu(struct vm *vm, int vcpuid) 1505 { 1506 return (vm->vcpu[vcpuid]); 1507 } 1508 1509 int 1510 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1511 { 1512 int error; 1513 1514 vcpu_lock(vcpu); 1515 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1516 vcpu_unlock(vcpu); 1517 1518 return (error); 1519 } 1520 1521 enum vcpu_state 1522 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1523 { 1524 enum vcpu_state state; 1525 1526 vcpu_lock(vcpu); 1527 state = vcpu->state; 1528 if (hostcpu != NULL) 1529 *hostcpu = vcpu->hostcpu; 1530 vcpu_unlock(vcpu); 1531 1532 return (state); 1533 } 1534 1535 static void * 1536 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1537 void **cookie) 1538 { 1539 int i, count, pageoff; 1540 struct mem_map *mm; 1541 vm_page_t m; 1542 1543 pageoff = gpa & PAGE_MASK; 1544 if (len > PAGE_SIZE - pageoff) 1545 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1546 1547 count = 0; 1548 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1549 mm = &vm->mem_maps[i]; 1550 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1551 gpa < mm->gpa + mm->len) { 1552 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1553 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1554 break; 1555 } 1556 } 1557 1558 if (count == 1) { 1559 *cookie = m; 1560 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1561 } else { 1562 *cookie = NULL; 1563 return (NULL); 1564 } 1565 } 1566 1567 void * 1568 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1569 void **cookie) 1570 { 1571 #ifdef INVARIANTS 1572 /* 1573 * The current vcpu should be frozen to ensure 'vm_memmap[]' 1574 * stability. 1575 */ 1576 int state = vcpu_get_state(vcpu, NULL); 1577 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1578 __func__, state)); 1579 #endif 1580 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1581 } 1582 1583 void * 1584 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1585 void **cookie) 1586 { 1587 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1588 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1589 } 1590 1591 void 1592 vm_gpa_release(void *cookie) 1593 { 1594 vm_page_t m = cookie; 1595 1596 vm_page_unwire(m, PQ_ACTIVE); 1597 } 1598 1599 int 1600 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1601 { 1602 1603 if (reg >= VM_REG_LAST) 1604 return (EINVAL); 1605 1606 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1607 } 1608 1609 int 1610 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1611 { 1612 int error; 1613 1614 if (reg >= VM_REG_LAST) 1615 return (EINVAL); 1616 error = vmmops_setreg(vcpu->cookie, reg, val); 1617 if (error || reg != VM_REG_GUEST_PC) 1618 return (error); 1619 1620 vcpu->nextpc = val; 1621 1622 return (0); 1623 } 1624 1625 void * 1626 vm_get_cookie(struct vm *vm) 1627 { 1628 return (vm->cookie); 1629 } 1630 1631 int 1632 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1633 { 1634 return (vmmops_exception(vcpu->cookie, esr, far)); 1635 } 1636 1637 int 1638 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1639 { 1640 return (vgic_attach_to_vm(vm->cookie, descr)); 1641 } 1642 1643 int 1644 vm_assert_irq(struct vm *vm, uint32_t irq) 1645 { 1646 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1647 } 1648 1649 int 1650 vm_deassert_irq(struct vm *vm, uint32_t irq) 1651 { 1652 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1653 } 1654 1655 int 1656 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1657 int func) 1658 { 1659 /* TODO: Should we raise an SError? */ 1660 return (vgic_inject_msi(vm->cookie, msg, addr)); 1661 } 1662 1663 static int 1664 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1665 { 1666 struct hypctx *hypctx; 1667 int i; 1668 1669 hypctx = vcpu_get_cookie(vcpu); 1670 1671 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1672 return (1); 1673 1674 vme->exitcode = VM_EXITCODE_SMCCC; 1675 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1676 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1677 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1678 1679 *retu = true; 1680 return (0); 1681 } 1682 1683 static int 1684 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1685 { 1686 vcpu_lock(vcpu); 1687 while (1) { 1688 if (vgic_has_pending_irq(vcpu->cookie)) 1689 break; 1690 1691 if (vcpu_should_yield(vcpu)) 1692 break; 1693 1694 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1695 /* 1696 * XXX msleep_spin() cannot be interrupted by signals so 1697 * wake up periodically to check pending signals. 1698 */ 1699 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1700 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1701 } 1702 vcpu_unlock(vcpu); 1703 1704 *retu = false; 1705 return (0); 1706 } 1707 1708 static int 1709 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1710 { 1711 struct vm *vm = vcpu->vm; 1712 struct vm_exit *vme; 1713 struct vm_map *map; 1714 uint64_t addr, esr; 1715 pmap_t pmap; 1716 int ftype, rv; 1717 1718 vme = &vcpu->exitinfo; 1719 1720 pmap = vmspace_pmap(vcpu->vm->vmspace); 1721 addr = vme->u.paging.gpa; 1722 esr = vme->u.paging.esr; 1723 1724 /* The page exists, but the page table needs to be updated. */ 1725 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1726 return (0); 1727 1728 switch (ESR_ELx_EXCEPTION(esr)) { 1729 case EXCP_INSN_ABORT_L: 1730 case EXCP_DATA_ABORT_L: 1731 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1732 break; 1733 default: 1734 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1735 } 1736 1737 map = &vm->vmspace->vm_map; 1738 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1739 if (rv != KERN_SUCCESS) 1740 return (EFAULT); 1741 1742 return (0); 1743 } 1744 1745 static int 1746 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1747 { 1748 struct vm *vm = vcpu->vm; 1749 int error, i; 1750 struct thread *td; 1751 1752 error = 0; 1753 td = curthread; 1754 1755 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1756 1757 /* 1758 * Wait until all 'active_cpus' have suspended themselves. 1759 * 1760 * Since a VM may be suspended at any time including when one or 1761 * more vcpus are doing a rendezvous we need to call the rendezvous 1762 * handler while we are waiting to prevent a deadlock. 1763 */ 1764 vcpu_lock(vcpu); 1765 while (error == 0) { 1766 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1767 break; 1768 1769 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1770 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1771 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1772 if (td_ast_pending(td, TDA_SUSPEND)) { 1773 vcpu_unlock(vcpu); 1774 error = thread_check_susp(td, false); 1775 vcpu_lock(vcpu); 1776 } 1777 } 1778 vcpu_unlock(vcpu); 1779 1780 /* 1781 * Wakeup the other sleeping vcpus and return to userspace. 1782 */ 1783 for (i = 0; i < vm->maxcpus; i++) { 1784 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1785 vcpu_notify_event(vm_vcpu(vm, i)); 1786 } 1787 } 1788 1789 *retu = true; 1790 return (error); 1791 } 1792 1793 int 1794 vm_run(struct vcpu *vcpu) 1795 { 1796 struct vm *vm = vcpu->vm; 1797 struct vm_eventinfo evinfo; 1798 int error, vcpuid; 1799 struct vm_exit *vme; 1800 bool retu; 1801 pmap_t pmap; 1802 1803 vcpuid = vcpu->vcpuid; 1804 1805 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1806 return (EINVAL); 1807 1808 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1809 return (EINVAL); 1810 1811 pmap = vmspace_pmap(vm->vmspace); 1812 vme = &vcpu->exitinfo; 1813 evinfo.rptr = NULL; 1814 evinfo.sptr = &vm->suspend; 1815 evinfo.iptr = NULL; 1816 restart: 1817 critical_enter(); 1818 1819 restore_guest_fpustate(vcpu); 1820 1821 vcpu_require_state(vcpu, VCPU_RUNNING); 1822 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1823 vcpu_require_state(vcpu, VCPU_FROZEN); 1824 1825 save_guest_fpustate(vcpu); 1826 1827 critical_exit(); 1828 1829 if (error == 0) { 1830 retu = false; 1831 switch (vme->exitcode) { 1832 case VM_EXITCODE_INST_EMUL: 1833 vcpu->nextpc = vme->pc + vme->inst_length; 1834 error = vm_handle_inst_emul(vcpu, &retu); 1835 break; 1836 1837 case VM_EXITCODE_REG_EMUL: 1838 vcpu->nextpc = vme->pc + vme->inst_length; 1839 error = vm_handle_reg_emul(vcpu, &retu); 1840 break; 1841 1842 case VM_EXITCODE_HVC: 1843 /* 1844 * The HVC instruction saves the address for the 1845 * next instruction as the return address. 1846 */ 1847 vcpu->nextpc = vme->pc; 1848 /* 1849 * The PSCI call can change the exit information in the 1850 * case of suspend/reset/poweroff/cpu off/cpu on. 1851 */ 1852 error = vm_handle_smccc_call(vcpu, vme, &retu); 1853 break; 1854 1855 case VM_EXITCODE_WFI: 1856 vcpu->nextpc = vme->pc + vme->inst_length; 1857 error = vm_handle_wfi(vcpu, vme, &retu); 1858 break; 1859 1860 case VM_EXITCODE_PAGING: 1861 vcpu->nextpc = vme->pc; 1862 error = vm_handle_paging(vcpu, &retu); 1863 break; 1864 1865 case VM_EXITCODE_SUSPENDED: 1866 vcpu->nextpc = vme->pc; 1867 error = vm_handle_suspend(vcpu, &retu); 1868 break; 1869 1870 default: 1871 /* Handle in userland */ 1872 vcpu->nextpc = vme->pc; 1873 retu = true; 1874 break; 1875 } 1876 } 1877 1878 if (error == 0 && retu == false) 1879 goto restart; 1880 1881 return (error); 1882 } 1883