1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_stat.h> 69 70 #include "arm64.h" 71 #include "mmu.h" 72 73 #include "io/vgic.h" 74 #include "io/vtimer.h" 75 76 struct vcpu { 77 int flags; 78 enum vcpu_state state; 79 struct mtx mtx; 80 int hostcpu; /* host cpuid this vcpu last ran on */ 81 int vcpuid; 82 void *stats; 83 struct vm_exit exitinfo; 84 uint64_t nextpc; /* (x) next instruction to execute */ 85 struct vm *vm; /* (o) */ 86 void *cookie; /* (i) cpu-specific data */ 87 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 88 }; 89 90 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 91 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 92 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 93 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 94 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 95 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 96 97 struct mem_seg { 98 uint64_t gpa; 99 size_t len; 100 bool wired; 101 bool sysmem; 102 vm_object_t object; 103 }; 104 #define VM_MAX_MEMSEGS 3 105 106 struct mem_map { 107 vm_paddr_t gpa; 108 size_t len; 109 vm_ooffset_t segoff; 110 int segid; 111 int prot; 112 int flags; 113 }; 114 #define VM_MAX_MEMMAPS 4 115 116 struct vmm_mmio_region { 117 uint64_t start; 118 uint64_t end; 119 mem_region_read_t read; 120 mem_region_write_t write; 121 }; 122 #define VM_MAX_MMIO_REGIONS 4 123 124 struct vmm_special_reg { 125 uint32_t esr_iss; 126 uint32_t esr_mask; 127 reg_read_t reg_read; 128 reg_write_t reg_write; 129 void *arg; 130 }; 131 #define VM_MAX_SPECIAL_REGS 16 132 133 /* 134 * Initialization: 135 * (o) initialized the first time the VM is created 136 * (i) initialized when VM is created and when it is reinitialized 137 * (x) initialized before use 138 */ 139 struct vm { 140 void *cookie; /* (i) cpu-specific data */ 141 volatile cpuset_t active_cpus; /* (i) active vcpus */ 142 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 143 int suspend; /* (i) stop VM execution */ 144 bool dying; /* (o) is dying */ 145 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 146 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 147 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 148 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 149 struct vmspace *vmspace; /* (o) guest's address space */ 150 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 151 struct vcpu **vcpu; /* (i) guest vcpus */ 152 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 153 /* (o) guest MMIO regions */ 154 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 155 /* The following describe the vm cpu topology */ 156 uint16_t sockets; /* (o) num of sockets */ 157 uint16_t cores; /* (o) num of cores/socket */ 158 uint16_t threads; /* (o) num of threads/core */ 159 uint16_t maxcpus; /* (o) max pluggable cpus */ 160 struct sx mem_segs_lock; /* (o) */ 161 struct sx vcpus_init_lock; /* (o) */ 162 }; 163 164 static bool vmm_initialized = false; 165 166 static int vm_handle_wfi(struct vcpu *vcpu, 167 struct vm_exit *vme, bool *retu); 168 169 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 170 171 /* statistics */ 172 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 173 174 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 175 176 static int vmm_ipinum; 177 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 178 "IPI vector used for vcpu notifications"); 179 180 struct vmm_regs { 181 uint64_t id_aa64afr0; 182 uint64_t id_aa64afr1; 183 uint64_t id_aa64dfr0; 184 uint64_t id_aa64dfr1; 185 uint64_t id_aa64isar0; 186 uint64_t id_aa64isar1; 187 uint64_t id_aa64isar2; 188 uint64_t id_aa64mmfr0; 189 uint64_t id_aa64mmfr1; 190 uint64_t id_aa64mmfr2; 191 uint64_t id_aa64pfr0; 192 uint64_t id_aa64pfr1; 193 }; 194 195 static const struct vmm_regs vmm_arch_regs_masks = { 196 .id_aa64dfr0 = 197 ID_AA64DFR0_CTX_CMPs_MASK | 198 ID_AA64DFR0_WRPs_MASK | 199 ID_AA64DFR0_BRPs_MASK | 200 ID_AA64DFR0_PMUVer_3 | 201 ID_AA64DFR0_DebugVer_8, 202 .id_aa64isar0 = 203 ID_AA64ISAR0_TLB_TLBIOSR | 204 ID_AA64ISAR0_SHA3_IMPL | 205 ID_AA64ISAR0_RDM_IMPL | 206 ID_AA64ISAR0_Atomic_IMPL | 207 ID_AA64ISAR0_CRC32_BASE | 208 ID_AA64ISAR0_SHA2_512 | 209 ID_AA64ISAR0_SHA1_BASE | 210 ID_AA64ISAR0_AES_PMULL, 211 .id_aa64mmfr0 = 212 ID_AA64MMFR0_TGran4_IMPL | 213 ID_AA64MMFR0_TGran64_IMPL | 214 ID_AA64MMFR0_TGran16_IMPL | 215 ID_AA64MMFR0_ASIDBits_16 | 216 ID_AA64MMFR0_PARange_4P, 217 .id_aa64mmfr1 = 218 ID_AA64MMFR1_SpecSEI_IMPL | 219 ID_AA64MMFR1_PAN_ATS1E1 | 220 ID_AA64MMFR1_HAFDBS_AF, 221 .id_aa64pfr0 = 222 ID_AA64PFR0_GIC_CPUIF_NONE | 223 ID_AA64PFR0_AdvSIMD_HP | 224 ID_AA64PFR0_FP_HP | 225 ID_AA64PFR0_EL3_64 | 226 ID_AA64PFR0_EL2_64 | 227 ID_AA64PFR0_EL1_64 | 228 ID_AA64PFR0_EL0_64, 229 }; 230 231 /* Host registers masked by vmm_arch_regs_masks. */ 232 static struct vmm_regs vmm_arch_regs; 233 234 u_int vm_maxcpu; 235 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 236 &vm_maxcpu, 0, "Maximum number of vCPUs"); 237 238 static void vm_free_memmap(struct vm *vm, int ident); 239 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 240 static void vcpu_notify_event_locked(struct vcpu *vcpu); 241 242 /* global statistics */ 243 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 244 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 245 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 246 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 247 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 248 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 249 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 250 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 251 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 252 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 253 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 254 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 255 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 256 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 257 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 258 259 /* 260 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 261 * is a safe value for now. 262 */ 263 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 264 265 static int 266 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 267 { 268 #define _FETCH_KERN_REG(reg, field) do { \ 269 regs->field = vmm_arch_regs_masks.field; \ 270 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ 271 regs->field = 0; \ 272 } while (0) 273 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 274 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 275 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 276 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 277 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 278 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 279 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 280 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 281 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 282 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 283 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 284 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 285 #undef _FETCH_KERN_REG 286 return (0); 287 } 288 289 static void 290 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 291 { 292 vmmops_vcpu_cleanup(vcpu->cookie); 293 vcpu->cookie = NULL; 294 if (destroy) { 295 vmm_stat_free(vcpu->stats); 296 fpu_save_area_free(vcpu->guestfpu); 297 vcpu_lock_destroy(vcpu); 298 } 299 } 300 301 static struct vcpu * 302 vcpu_alloc(struct vm *vm, int vcpu_id) 303 { 304 struct vcpu *vcpu; 305 306 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 307 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 308 309 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 310 vcpu_lock_init(vcpu); 311 vcpu->state = VCPU_IDLE; 312 vcpu->hostcpu = NOCPU; 313 vcpu->vcpuid = vcpu_id; 314 vcpu->vm = vm; 315 vcpu->guestfpu = fpu_save_area_alloc(); 316 vcpu->stats = vmm_stat_alloc(); 317 return (vcpu); 318 } 319 320 static void 321 vcpu_init(struct vcpu *vcpu) 322 { 323 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 324 MPASS(vcpu->cookie != NULL); 325 fpu_save_area_reset(vcpu->guestfpu); 326 vmm_stat_init(vcpu->stats); 327 } 328 329 struct vm_exit * 330 vm_exitinfo(struct vcpu *vcpu) 331 { 332 return (&vcpu->exitinfo); 333 } 334 335 static int 336 vmm_init(void) 337 { 338 int error; 339 340 vm_maxcpu = mp_ncpus; 341 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 342 343 if (vm_maxcpu > VM_MAXCPU) { 344 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 345 vm_maxcpu = VM_MAXCPU; 346 } 347 if (vm_maxcpu == 0) 348 vm_maxcpu = 1; 349 350 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 351 if (error != 0) 352 return (error); 353 354 return (vmmops_modinit(0)); 355 } 356 357 static int 358 vmm_handler(module_t mod, int what, void *arg) 359 { 360 int error; 361 362 switch (what) { 363 case MOD_LOAD: 364 error = vmmdev_init(); 365 if (error != 0) 366 break; 367 error = vmm_init(); 368 if (error == 0) 369 vmm_initialized = true; 370 else 371 (void)vmmdev_cleanup(); 372 break; 373 case MOD_UNLOAD: 374 error = vmmdev_cleanup(); 375 if (error == 0 && vmm_initialized) { 376 error = vmmops_modcleanup(); 377 if (error) { 378 /* 379 * Something bad happened - prevent new 380 * VMs from being created 381 */ 382 vmm_initialized = false; 383 } 384 } 385 break; 386 default: 387 error = 0; 388 break; 389 } 390 return (error); 391 } 392 393 static moduledata_t vmm_kmod = { 394 "vmm", 395 vmm_handler, 396 NULL 397 }; 398 399 /* 400 * vmm initialization has the following dependencies: 401 * 402 * - HYP initialization requires smp_rendezvous() and therefore must happen 403 * after SMP is fully functional (after SI_SUB_SMP). 404 * - vmm device initialization requires an initialized devfs. 405 */ 406 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 407 MODULE_VERSION(vmm, 1); 408 409 static void 410 vm_init(struct vm *vm, bool create) 411 { 412 int i; 413 414 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 415 MPASS(vm->cookie != NULL); 416 417 CPU_ZERO(&vm->active_cpus); 418 CPU_ZERO(&vm->debug_cpus); 419 420 vm->suspend = 0; 421 CPU_ZERO(&vm->suspended_cpus); 422 423 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 424 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 425 426 if (!create) { 427 for (i = 0; i < vm->maxcpus; i++) { 428 if (vm->vcpu[i] != NULL) 429 vcpu_init(vm->vcpu[i]); 430 } 431 } 432 } 433 434 void 435 vm_disable_vcpu_creation(struct vm *vm) 436 { 437 sx_xlock(&vm->vcpus_init_lock); 438 vm->dying = true; 439 sx_xunlock(&vm->vcpus_init_lock); 440 } 441 442 struct vcpu * 443 vm_alloc_vcpu(struct vm *vm, int vcpuid) 444 { 445 struct vcpu *vcpu; 446 447 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 448 return (NULL); 449 450 /* Some interrupt controllers may have a CPU limit */ 451 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 452 return (NULL); 453 454 vcpu = (struct vcpu *) 455 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 456 if (__predict_true(vcpu != NULL)) 457 return (vcpu); 458 459 sx_xlock(&vm->vcpus_init_lock); 460 vcpu = vm->vcpu[vcpuid]; 461 if (vcpu == NULL && !vm->dying) { 462 vcpu = vcpu_alloc(vm, vcpuid); 463 vcpu_init(vcpu); 464 465 /* 466 * Ensure vCPU is fully created before updating pointer 467 * to permit unlocked reads above. 468 */ 469 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 470 (uintptr_t)vcpu); 471 } 472 sx_xunlock(&vm->vcpus_init_lock); 473 return (vcpu); 474 } 475 476 void 477 vm_slock_vcpus(struct vm *vm) 478 { 479 sx_slock(&vm->vcpus_init_lock); 480 } 481 482 void 483 vm_unlock_vcpus(struct vm *vm) 484 { 485 sx_unlock(&vm->vcpus_init_lock); 486 } 487 488 int 489 vm_create(const char *name, struct vm **retvm) 490 { 491 struct vm *vm; 492 struct vmspace *vmspace; 493 494 /* 495 * If vmm.ko could not be successfully initialized then don't attempt 496 * to create the virtual machine. 497 */ 498 if (!vmm_initialized) 499 return (ENXIO); 500 501 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 502 return (EINVAL); 503 504 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 505 if (vmspace == NULL) 506 return (ENOMEM); 507 508 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 509 strcpy(vm->name, name); 510 vm->vmspace = vmspace; 511 sx_init(&vm->mem_segs_lock, "vm mem_segs"); 512 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 513 514 vm->sockets = 1; 515 vm->cores = 1; /* XXX backwards compatibility */ 516 vm->threads = 1; /* XXX backwards compatibility */ 517 vm->maxcpus = vm_maxcpu; 518 519 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 520 M_WAITOK | M_ZERO); 521 522 vm_init(vm, true); 523 524 *retvm = vm; 525 return (0); 526 } 527 528 void 529 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 530 uint16_t *threads, uint16_t *maxcpus) 531 { 532 *sockets = vm->sockets; 533 *cores = vm->cores; 534 *threads = vm->threads; 535 *maxcpus = vm->maxcpus; 536 } 537 538 uint16_t 539 vm_get_maxcpus(struct vm *vm) 540 { 541 return (vm->maxcpus); 542 } 543 544 int 545 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 546 uint16_t threads, uint16_t maxcpus) 547 { 548 /* Ignore maxcpus. */ 549 if ((sockets * cores * threads) > vm->maxcpus) 550 return (EINVAL); 551 vm->sockets = sockets; 552 vm->cores = cores; 553 vm->threads = threads; 554 return(0); 555 } 556 557 static void 558 vm_cleanup(struct vm *vm, bool destroy) 559 { 560 struct mem_map *mm; 561 pmap_t pmap __diagused; 562 int i; 563 564 if (destroy) { 565 pmap = vmspace_pmap(vm->vmspace); 566 sched_pin(); 567 PCPU_SET(curvmpmap, NULL); 568 sched_unpin(); 569 CPU_FOREACH(i) { 570 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 571 } 572 } 573 574 vgic_detach_from_vm(vm->cookie); 575 576 for (i = 0; i < vm->maxcpus; i++) { 577 if (vm->vcpu[i] != NULL) 578 vcpu_cleanup(vm->vcpu[i], destroy); 579 } 580 581 vmmops_cleanup(vm->cookie); 582 583 /* 584 * System memory is removed from the guest address space only when 585 * the VM is destroyed. This is because the mapping remains the same 586 * across VM reset. 587 * 588 * Device memory can be relocated by the guest (e.g. using PCI BARs) 589 * so those mappings are removed on a VM reset. 590 */ 591 if (!destroy) { 592 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 593 mm = &vm->mem_maps[i]; 594 if (destroy || !sysmem_mapping(vm, mm)) 595 vm_free_memmap(vm, i); 596 } 597 } 598 599 if (destroy) { 600 for (i = 0; i < VM_MAX_MEMSEGS; i++) 601 vm_free_memseg(vm, i); 602 603 vmmops_vmspace_free(vm->vmspace); 604 vm->vmspace = NULL; 605 606 for (i = 0; i < vm->maxcpus; i++) 607 free(vm->vcpu[i], M_VMM); 608 free(vm->vcpu, M_VMM); 609 sx_destroy(&vm->vcpus_init_lock); 610 sx_destroy(&vm->mem_segs_lock); 611 } 612 } 613 614 void 615 vm_destroy(struct vm *vm) 616 { 617 vm_cleanup(vm, true); 618 free(vm, M_VMM); 619 } 620 621 int 622 vm_reinit(struct vm *vm) 623 { 624 int error; 625 626 /* 627 * A virtual machine can be reset only if all vcpus are suspended. 628 */ 629 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 630 vm_cleanup(vm, false); 631 vm_init(vm, false); 632 error = 0; 633 } else { 634 error = EBUSY; 635 } 636 637 return (error); 638 } 639 640 const char * 641 vm_name(struct vm *vm) 642 { 643 return (vm->name); 644 } 645 646 void 647 vm_slock_memsegs(struct vm *vm) 648 { 649 sx_slock(&vm->mem_segs_lock); 650 } 651 652 void 653 vm_xlock_memsegs(struct vm *vm) 654 { 655 sx_xlock(&vm->mem_segs_lock); 656 } 657 658 void 659 vm_unlock_memsegs(struct vm *vm) 660 { 661 sx_unlock(&vm->mem_segs_lock); 662 } 663 664 /* 665 * Return 'true' if 'gpa' is allocated in the guest address space. 666 * 667 * This function is called in the context of a running vcpu which acts as 668 * an implicit lock on 'vm->mem_maps[]'. 669 */ 670 bool 671 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 672 { 673 struct vm *vm = vcpu->vm; 674 struct mem_map *mm; 675 int i; 676 677 #ifdef INVARIANTS 678 int hostcpu, state; 679 state = vcpu_get_state(vcpu, &hostcpu); 680 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 681 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 682 #endif 683 684 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 685 mm = &vm->mem_maps[i]; 686 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 687 return (true); /* 'gpa' is sysmem or devmem */ 688 } 689 690 return (false); 691 } 692 693 int 694 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 695 { 696 struct mem_seg *seg; 697 vm_object_t obj; 698 699 sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 700 701 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 702 return (EINVAL); 703 704 if (len == 0 || (len & PAGE_MASK)) 705 return (EINVAL); 706 707 seg = &vm->mem_segs[ident]; 708 if (seg->object != NULL) { 709 if (seg->len == len && seg->sysmem == sysmem) 710 return (EEXIST); 711 else 712 return (EINVAL); 713 } 714 715 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 716 if (obj == NULL) 717 return (ENOMEM); 718 719 seg->len = len; 720 seg->object = obj; 721 seg->sysmem = sysmem; 722 return (0); 723 } 724 725 int 726 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 727 vm_object_t *objptr) 728 { 729 struct mem_seg *seg; 730 731 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 732 733 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 734 return (EINVAL); 735 736 seg = &vm->mem_segs[ident]; 737 if (len) 738 *len = seg->len; 739 if (sysmem) 740 *sysmem = seg->sysmem; 741 if (objptr) 742 *objptr = seg->object; 743 return (0); 744 } 745 746 void 747 vm_free_memseg(struct vm *vm, int ident) 748 { 749 struct mem_seg *seg; 750 751 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 752 ("%s: invalid memseg ident %d", __func__, ident)); 753 754 seg = &vm->mem_segs[ident]; 755 if (seg->object != NULL) { 756 vm_object_deallocate(seg->object); 757 bzero(seg, sizeof(struct mem_seg)); 758 } 759 } 760 761 int 762 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 763 size_t len, int prot, int flags) 764 { 765 struct mem_seg *seg; 766 struct mem_map *m, *map; 767 vm_ooffset_t last; 768 int i, error; 769 770 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 771 return (EINVAL); 772 773 if (flags & ~VM_MEMMAP_F_WIRED) 774 return (EINVAL); 775 776 if (segid < 0 || segid >= VM_MAX_MEMSEGS) 777 return (EINVAL); 778 779 seg = &vm->mem_segs[segid]; 780 if (seg->object == NULL) 781 return (EINVAL); 782 783 last = first + len; 784 if (first < 0 || first >= last || last > seg->len) 785 return (EINVAL); 786 787 if ((gpa | first | last) & PAGE_MASK) 788 return (EINVAL); 789 790 map = NULL; 791 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 792 m = &vm->mem_maps[i]; 793 if (m->len == 0) { 794 map = m; 795 break; 796 } 797 } 798 799 if (map == NULL) 800 return (ENOSPC); 801 802 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 803 len, 0, VMFS_NO_SPACE, prot, prot, 0); 804 if (error != KERN_SUCCESS) 805 return (EFAULT); 806 807 vm_object_reference(seg->object); 808 809 if (flags & VM_MEMMAP_F_WIRED) { 810 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 811 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 812 if (error != KERN_SUCCESS) { 813 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 814 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 815 EFAULT); 816 } 817 } 818 819 map->gpa = gpa; 820 map->len = len; 821 map->segoff = first; 822 map->segid = segid; 823 map->prot = prot; 824 map->flags = flags; 825 return (0); 826 } 827 828 int 829 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 830 { 831 struct mem_map *m; 832 int i; 833 834 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 835 m = &vm->mem_maps[i]; 836 if (m->gpa == gpa && m->len == len) { 837 vm_free_memmap(vm, i); 838 return (0); 839 } 840 } 841 842 return (EINVAL); 843 } 844 845 int 846 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 847 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 848 { 849 struct mem_map *mm, *mmnext; 850 int i; 851 852 mmnext = NULL; 853 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 854 mm = &vm->mem_maps[i]; 855 if (mm->len == 0 || mm->gpa < *gpa) 856 continue; 857 if (mmnext == NULL || mm->gpa < mmnext->gpa) 858 mmnext = mm; 859 } 860 861 if (mmnext != NULL) { 862 *gpa = mmnext->gpa; 863 if (segid) 864 *segid = mmnext->segid; 865 if (segoff) 866 *segoff = mmnext->segoff; 867 if (len) 868 *len = mmnext->len; 869 if (prot) 870 *prot = mmnext->prot; 871 if (flags) 872 *flags = mmnext->flags; 873 return (0); 874 } else { 875 return (ENOENT); 876 } 877 } 878 879 static void 880 vm_free_memmap(struct vm *vm, int ident) 881 { 882 struct mem_map *mm; 883 int error __diagused; 884 885 mm = &vm->mem_maps[ident]; 886 if (mm->len) { 887 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 888 mm->gpa + mm->len); 889 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 890 __func__, error)); 891 bzero(mm, sizeof(struct mem_map)); 892 } 893 } 894 895 static __inline bool 896 sysmem_mapping(struct vm *vm, struct mem_map *mm) 897 { 898 899 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 900 return (true); 901 else 902 return (false); 903 } 904 905 vm_paddr_t 906 vmm_sysmem_maxaddr(struct vm *vm) 907 { 908 struct mem_map *mm; 909 vm_paddr_t maxaddr; 910 int i; 911 912 maxaddr = 0; 913 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 914 mm = &vm->mem_maps[i]; 915 if (sysmem_mapping(vm, mm)) { 916 if (maxaddr < mm->gpa + mm->len) 917 maxaddr = mm->gpa + mm->len; 918 } 919 } 920 return (maxaddr); 921 } 922 923 int 924 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 925 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 926 { 927 928 vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 929 return (0); 930 } 931 932 static int 933 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 934 { 935 *rval = 0; 936 return (0); 937 } 938 939 static int 940 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 941 { 942 *rval = *(uint64_t *)arg; 943 return (0); 944 } 945 946 static int 947 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 948 { 949 return (0); 950 } 951 952 static const struct vmm_special_reg vmm_special_regs[] = { 953 #define SPECIAL_REG(_reg, _read, _write) \ 954 { \ 955 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 956 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 957 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 958 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 959 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 960 .esr_mask = ISS_MSR_REG_MASK, \ 961 .reg_read = (_read), \ 962 .reg_write = (_write), \ 963 .arg = NULL, \ 964 } 965 #define ID_SPECIAL_REG(_reg, _name) \ 966 { \ 967 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 968 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 969 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 970 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 971 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 972 .esr_mask = ISS_MSR_REG_MASK, \ 973 .reg_read = vmm_reg_read_arg, \ 974 .reg_write = vmm_reg_wi, \ 975 .arg = &(vmm_arch_regs._name), \ 976 } 977 978 /* ID registers */ 979 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 980 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 981 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 982 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 983 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 984 985 /* 986 * All other ID registers are read as zero. 987 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 988 */ 989 { 990 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 991 (0 << ISS_MSR_OP1_SHIFT) | 992 (0 << ISS_MSR_CRn_SHIFT) | 993 (0 << ISS_MSR_CRm_SHIFT), 994 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 995 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 996 .reg_read = vmm_reg_raz, 997 .reg_write = vmm_reg_wi, 998 .arg = NULL, 999 }, 1000 1001 /* Counter physical registers */ 1002 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 1003 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 1004 vtimer_phys_cval_write), 1005 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 1006 vtimer_phys_tval_write), 1007 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 1008 #undef SPECIAL_REG 1009 }; 1010 1011 void 1012 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 1013 reg_read_t reg_read, reg_write_t reg_write, void *arg) 1014 { 1015 int i; 1016 1017 for (i = 0; i < nitems(vm->special_reg); i++) { 1018 if (vm->special_reg[i].esr_iss == 0 && 1019 vm->special_reg[i].esr_mask == 0) { 1020 vm->special_reg[i].esr_iss = iss; 1021 vm->special_reg[i].esr_mask = mask; 1022 vm->special_reg[i].reg_read = reg_read; 1023 vm->special_reg[i].reg_write = reg_write; 1024 vm->special_reg[i].arg = arg; 1025 return; 1026 } 1027 } 1028 1029 panic("%s: No free special register slot", __func__); 1030 } 1031 1032 void 1033 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 1034 { 1035 int i; 1036 1037 for (i = 0; i < nitems(vm->special_reg); i++) { 1038 if (vm->special_reg[i].esr_iss == iss && 1039 vm->special_reg[i].esr_mask == mask) { 1040 memset(&vm->special_reg[i], 0, 1041 sizeof(vm->special_reg[i])); 1042 return; 1043 } 1044 } 1045 1046 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 1047 mask); 1048 } 1049 1050 static int 1051 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 1052 { 1053 struct vm *vm; 1054 struct vm_exit *vme; 1055 struct vre *vre; 1056 int i, rv; 1057 1058 vm = vcpu->vm; 1059 vme = &vcpu->exitinfo; 1060 vre = &vme->u.reg_emul.vre; 1061 1062 for (i = 0; i < nitems(vm->special_reg); i++) { 1063 if (vm->special_reg[i].esr_iss == 0 && 1064 vm->special_reg[i].esr_mask == 0) 1065 continue; 1066 1067 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 1068 vm->special_reg[i].esr_iss) { 1069 rv = vmm_emulate_register(vcpu, vre, 1070 vm->special_reg[i].reg_read, 1071 vm->special_reg[i].reg_write, 1072 vm->special_reg[i].arg); 1073 if (rv == 0) { 1074 *retu = false; 1075 } 1076 return (rv); 1077 } 1078 } 1079 for (i = 0; i < nitems(vmm_special_regs); i++) { 1080 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 1081 vmm_special_regs[i].esr_iss) { 1082 rv = vmm_emulate_register(vcpu, vre, 1083 vmm_special_regs[i].reg_read, 1084 vmm_special_regs[i].reg_write, 1085 vmm_special_regs[i].arg); 1086 if (rv == 0) { 1087 *retu = false; 1088 } 1089 return (rv); 1090 } 1091 } 1092 1093 1094 *retu = true; 1095 return (0); 1096 } 1097 1098 void 1099 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 1100 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 1101 { 1102 int i; 1103 1104 for (i = 0; i < nitems(vm->mmio_region); i++) { 1105 if (vm->mmio_region[i].start == 0 && 1106 vm->mmio_region[i].end == 0) { 1107 vm->mmio_region[i].start = start; 1108 vm->mmio_region[i].end = start + size; 1109 vm->mmio_region[i].read = mmio_read; 1110 vm->mmio_region[i].write = mmio_write; 1111 return; 1112 } 1113 } 1114 1115 panic("%s: No free MMIO region", __func__); 1116 } 1117 1118 void 1119 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 1120 { 1121 int i; 1122 1123 for (i = 0; i < nitems(vm->mmio_region); i++) { 1124 if (vm->mmio_region[i].start == start && 1125 vm->mmio_region[i].end == start + size) { 1126 memset(&vm->mmio_region[i], 0, 1127 sizeof(vm->mmio_region[i])); 1128 return; 1129 } 1130 } 1131 1132 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 1133 start + size); 1134 } 1135 1136 static int 1137 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 1138 { 1139 struct vm *vm; 1140 struct vm_exit *vme; 1141 struct vie *vie; 1142 struct hyp *hyp; 1143 uint64_t fault_ipa; 1144 struct vm_guest_paging *paging; 1145 struct vmm_mmio_region *vmr; 1146 int error, i; 1147 1148 vm = vcpu->vm; 1149 hyp = vm->cookie; 1150 if (!hyp->vgic_attached) 1151 goto out_user; 1152 1153 vme = &vcpu->exitinfo; 1154 vie = &vme->u.inst_emul.vie; 1155 paging = &vme->u.inst_emul.paging; 1156 1157 fault_ipa = vme->u.inst_emul.gpa; 1158 1159 vmr = NULL; 1160 for (i = 0; i < nitems(vm->mmio_region); i++) { 1161 if (vm->mmio_region[i].start <= fault_ipa && 1162 vm->mmio_region[i].end > fault_ipa) { 1163 vmr = &vm->mmio_region[i]; 1164 break; 1165 } 1166 } 1167 if (vmr == NULL) 1168 goto out_user; 1169 1170 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 1171 vmr->read, vmr->write, retu); 1172 return (error); 1173 1174 out_user: 1175 *retu = true; 1176 return (0); 1177 } 1178 1179 int 1180 vm_suspend(struct vm *vm, enum vm_suspend_how how) 1181 { 1182 int i; 1183 1184 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1185 return (EINVAL); 1186 1187 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1188 VM_CTR2(vm, "virtual machine already suspended %d/%d", 1189 vm->suspend, how); 1190 return (EALREADY); 1191 } 1192 1193 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1194 1195 /* 1196 * Notify all active vcpus that they are now suspended. 1197 */ 1198 for (i = 0; i < vm->maxcpus; i++) { 1199 if (CPU_ISSET(i, &vm->active_cpus)) 1200 vcpu_notify_event(vm_vcpu(vm, i)); 1201 } 1202 1203 return (0); 1204 } 1205 1206 void 1207 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 1208 { 1209 struct vm *vm = vcpu->vm; 1210 struct vm_exit *vmexit; 1211 1212 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1213 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1214 1215 vmexit = vm_exitinfo(vcpu); 1216 vmexit->pc = pc; 1217 vmexit->inst_length = 4; 1218 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1219 vmexit->u.suspended.how = vm->suspend; 1220 } 1221 1222 void 1223 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 1224 { 1225 struct vm_exit *vmexit; 1226 1227 vmexit = vm_exitinfo(vcpu); 1228 vmexit->pc = pc; 1229 vmexit->inst_length = 4; 1230 vmexit->exitcode = VM_EXITCODE_DEBUG; 1231 } 1232 1233 int 1234 vm_activate_cpu(struct vcpu *vcpu) 1235 { 1236 struct vm *vm = vcpu->vm; 1237 1238 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1239 return (EBUSY); 1240 1241 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 1242 return (0); 1243 1244 } 1245 1246 int 1247 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 1248 { 1249 if (vcpu == NULL) { 1250 vm->debug_cpus = vm->active_cpus; 1251 for (int i = 0; i < vm->maxcpus; i++) { 1252 if (CPU_ISSET(i, &vm->active_cpus)) 1253 vcpu_notify_event(vm_vcpu(vm, i)); 1254 } 1255 } else { 1256 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1257 return (EINVAL); 1258 1259 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1260 vcpu_notify_event(vcpu); 1261 } 1262 return (0); 1263 } 1264 1265 int 1266 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 1267 { 1268 1269 if (vcpu == NULL) { 1270 CPU_ZERO(&vm->debug_cpus); 1271 } else { 1272 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1273 return (EINVAL); 1274 1275 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1276 } 1277 return (0); 1278 } 1279 1280 int 1281 vcpu_debugged(struct vcpu *vcpu) 1282 { 1283 1284 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1285 } 1286 1287 cpuset_t 1288 vm_active_cpus(struct vm *vm) 1289 { 1290 1291 return (vm->active_cpus); 1292 } 1293 1294 cpuset_t 1295 vm_debug_cpus(struct vm *vm) 1296 { 1297 1298 return (vm->debug_cpus); 1299 } 1300 1301 cpuset_t 1302 vm_suspended_cpus(struct vm *vm) 1303 { 1304 1305 return (vm->suspended_cpus); 1306 } 1307 1308 1309 void * 1310 vcpu_stats(struct vcpu *vcpu) 1311 { 1312 1313 return (vcpu->stats); 1314 } 1315 1316 /* 1317 * This function is called to ensure that a vcpu "sees" a pending event 1318 * as soon as possible: 1319 * - If the vcpu thread is sleeping then it is woken up. 1320 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1321 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1322 */ 1323 static void 1324 vcpu_notify_event_locked(struct vcpu *vcpu) 1325 { 1326 int hostcpu; 1327 1328 hostcpu = vcpu->hostcpu; 1329 if (vcpu->state == VCPU_RUNNING) { 1330 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1331 if (hostcpu != curcpu) { 1332 ipi_cpu(hostcpu, vmm_ipinum); 1333 } else { 1334 /* 1335 * If the 'vcpu' is running on 'curcpu' then it must 1336 * be sending a notification to itself (e.g. SELF_IPI). 1337 * The pending event will be picked up when the vcpu 1338 * transitions back to guest context. 1339 */ 1340 } 1341 } else { 1342 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1343 "with hostcpu %d", vcpu->state, hostcpu)); 1344 if (vcpu->state == VCPU_SLEEPING) 1345 wakeup_one(vcpu); 1346 } 1347 } 1348 1349 void 1350 vcpu_notify_event(struct vcpu *vcpu) 1351 { 1352 vcpu_lock(vcpu); 1353 vcpu_notify_event_locked(vcpu); 1354 vcpu_unlock(vcpu); 1355 } 1356 1357 static void 1358 restore_guest_fpustate(struct vcpu *vcpu) 1359 { 1360 1361 /* flush host state to the pcb */ 1362 vfp_save_state(curthread, curthread->td_pcb); 1363 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1364 PCPU_SET(fpcurthread, NULL); 1365 1366 /* restore guest FPU state */ 1367 vfp_enable(); 1368 vfp_restore(vcpu->guestfpu); 1369 1370 /* 1371 * The FPU is now "dirty" with the guest's state so turn on emulation 1372 * to trap any access to the FPU by the host. 1373 */ 1374 vfp_disable(); 1375 } 1376 1377 static void 1378 save_guest_fpustate(struct vcpu *vcpu) 1379 { 1380 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1381 CPACR_FPEN_TRAP_ALL1) 1382 panic("VFP not enabled in host!"); 1383 1384 /* save guest FPU state */ 1385 vfp_enable(); 1386 vfp_store(vcpu->guestfpu); 1387 vfp_disable(); 1388 1389 KASSERT(PCPU_GET(fpcurthread) == NULL, 1390 ("%s: fpcurthread set with guest registers", __func__)); 1391 } 1392 static int 1393 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1394 bool from_idle) 1395 { 1396 int error; 1397 1398 vcpu_assert_locked(vcpu); 1399 1400 /* 1401 * State transitions from the vmmdev_ioctl() must always begin from 1402 * the VCPU_IDLE state. This guarantees that there is only a single 1403 * ioctl() operating on a vcpu at any point. 1404 */ 1405 if (from_idle) { 1406 while (vcpu->state != VCPU_IDLE) { 1407 vcpu_notify_event_locked(vcpu); 1408 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1409 } 1410 } else { 1411 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1412 "vcpu idle state")); 1413 } 1414 1415 if (vcpu->state == VCPU_RUNNING) { 1416 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1417 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1418 } else { 1419 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1420 "vcpu that is not running", vcpu->hostcpu)); 1421 } 1422 1423 /* 1424 * The following state transitions are allowed: 1425 * IDLE -> FROZEN -> IDLE 1426 * FROZEN -> RUNNING -> FROZEN 1427 * FROZEN -> SLEEPING -> FROZEN 1428 */ 1429 switch (vcpu->state) { 1430 case VCPU_IDLE: 1431 case VCPU_RUNNING: 1432 case VCPU_SLEEPING: 1433 error = (newstate != VCPU_FROZEN); 1434 break; 1435 case VCPU_FROZEN: 1436 error = (newstate == VCPU_FROZEN); 1437 break; 1438 default: 1439 error = 1; 1440 break; 1441 } 1442 1443 if (error) 1444 return (EBUSY); 1445 1446 vcpu->state = newstate; 1447 if (newstate == VCPU_RUNNING) 1448 vcpu->hostcpu = curcpu; 1449 else 1450 vcpu->hostcpu = NOCPU; 1451 1452 if (newstate == VCPU_IDLE) 1453 wakeup(&vcpu->state); 1454 1455 return (0); 1456 } 1457 1458 static void 1459 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1460 { 1461 int error; 1462 1463 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1464 panic("Error %d setting state to %d\n", error, newstate); 1465 } 1466 1467 static void 1468 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1469 { 1470 int error; 1471 1472 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1473 panic("Error %d setting state to %d", error, newstate); 1474 } 1475 1476 int 1477 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1478 { 1479 if (type < 0 || type >= VM_CAP_MAX) 1480 return (EINVAL); 1481 1482 return (vmmops_getcap(vcpu->cookie, type, retval)); 1483 } 1484 1485 int 1486 vm_set_capability(struct vcpu *vcpu, int type, int val) 1487 { 1488 if (type < 0 || type >= VM_CAP_MAX) 1489 return (EINVAL); 1490 1491 return (vmmops_setcap(vcpu->cookie, type, val)); 1492 } 1493 1494 struct vm * 1495 vcpu_vm(struct vcpu *vcpu) 1496 { 1497 return (vcpu->vm); 1498 } 1499 1500 int 1501 vcpu_vcpuid(struct vcpu *vcpu) 1502 { 1503 return (vcpu->vcpuid); 1504 } 1505 1506 void * 1507 vcpu_get_cookie(struct vcpu *vcpu) 1508 { 1509 return (vcpu->cookie); 1510 } 1511 1512 struct vcpu * 1513 vm_vcpu(struct vm *vm, int vcpuid) 1514 { 1515 return (vm->vcpu[vcpuid]); 1516 } 1517 1518 int 1519 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1520 { 1521 int error; 1522 1523 vcpu_lock(vcpu); 1524 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1525 vcpu_unlock(vcpu); 1526 1527 return (error); 1528 } 1529 1530 enum vcpu_state 1531 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1532 { 1533 enum vcpu_state state; 1534 1535 vcpu_lock(vcpu); 1536 state = vcpu->state; 1537 if (hostcpu != NULL) 1538 *hostcpu = vcpu->hostcpu; 1539 vcpu_unlock(vcpu); 1540 1541 return (state); 1542 } 1543 1544 static void * 1545 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1546 void **cookie) 1547 { 1548 int i, count, pageoff; 1549 struct mem_map *mm; 1550 vm_page_t m; 1551 1552 pageoff = gpa & PAGE_MASK; 1553 if (len > PAGE_SIZE - pageoff) 1554 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1555 1556 count = 0; 1557 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1558 mm = &vm->mem_maps[i]; 1559 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1560 gpa < mm->gpa + mm->len) { 1561 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1562 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1563 break; 1564 } 1565 } 1566 1567 if (count == 1) { 1568 *cookie = m; 1569 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1570 } else { 1571 *cookie = NULL; 1572 return (NULL); 1573 } 1574 } 1575 1576 void * 1577 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1578 void **cookie) 1579 { 1580 #ifdef INVARIANTS 1581 /* 1582 * The current vcpu should be frozen to ensure 'vm_memmap[]' 1583 * stability. 1584 */ 1585 int state = vcpu_get_state(vcpu, NULL); 1586 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1587 __func__, state)); 1588 #endif 1589 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1590 } 1591 1592 void * 1593 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1594 void **cookie) 1595 { 1596 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1597 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1598 } 1599 1600 void 1601 vm_gpa_release(void *cookie) 1602 { 1603 vm_page_t m = cookie; 1604 1605 vm_page_unwire(m, PQ_ACTIVE); 1606 } 1607 1608 int 1609 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1610 { 1611 1612 if (reg >= VM_REG_LAST) 1613 return (EINVAL); 1614 1615 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1616 } 1617 1618 int 1619 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1620 { 1621 int error; 1622 1623 if (reg >= VM_REG_LAST) 1624 return (EINVAL); 1625 error = vmmops_setreg(vcpu->cookie, reg, val); 1626 if (error || reg != VM_REG_GUEST_PC) 1627 return (error); 1628 1629 vcpu->nextpc = val; 1630 1631 return (0); 1632 } 1633 1634 void * 1635 vm_get_cookie(struct vm *vm) 1636 { 1637 return (vm->cookie); 1638 } 1639 1640 int 1641 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1642 { 1643 return (vmmops_exception(vcpu->cookie, esr, far)); 1644 } 1645 1646 int 1647 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1648 { 1649 return (vgic_attach_to_vm(vm->cookie, descr)); 1650 } 1651 1652 int 1653 vm_assert_irq(struct vm *vm, uint32_t irq) 1654 { 1655 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1656 } 1657 1658 int 1659 vm_deassert_irq(struct vm *vm, uint32_t irq) 1660 { 1661 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1662 } 1663 1664 int 1665 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1666 int func) 1667 { 1668 /* TODO: Should we raise an SError? */ 1669 return (vgic_inject_msi(vm->cookie, msg, addr)); 1670 } 1671 1672 static int 1673 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1674 { 1675 struct hypctx *hypctx; 1676 int i; 1677 1678 hypctx = vcpu_get_cookie(vcpu); 1679 1680 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1681 return (1); 1682 1683 vme->exitcode = VM_EXITCODE_SMCCC; 1684 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1685 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1686 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1687 1688 *retu = true; 1689 return (0); 1690 } 1691 1692 static int 1693 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1694 { 1695 vcpu_lock(vcpu); 1696 while (1) { 1697 if (vgic_has_pending_irq(vcpu->cookie)) 1698 break; 1699 1700 if (vcpu_should_yield(vcpu)) 1701 break; 1702 1703 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1704 /* 1705 * XXX msleep_spin() cannot be interrupted by signals so 1706 * wake up periodically to check pending signals. 1707 */ 1708 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1709 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1710 } 1711 vcpu_unlock(vcpu); 1712 1713 *retu = false; 1714 return (0); 1715 } 1716 1717 static int 1718 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1719 { 1720 struct vm *vm = vcpu->vm; 1721 struct vm_exit *vme; 1722 struct vm_map *map; 1723 uint64_t addr, esr; 1724 pmap_t pmap; 1725 int ftype, rv; 1726 1727 vme = &vcpu->exitinfo; 1728 1729 pmap = vmspace_pmap(vcpu->vm->vmspace); 1730 addr = vme->u.paging.gpa; 1731 esr = vme->u.paging.esr; 1732 1733 /* The page exists, but the page table needs to be updated. */ 1734 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1735 return (0); 1736 1737 switch (ESR_ELx_EXCEPTION(esr)) { 1738 case EXCP_INSN_ABORT_L: 1739 case EXCP_DATA_ABORT_L: 1740 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1741 break; 1742 default: 1743 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1744 } 1745 1746 map = &vm->vmspace->vm_map; 1747 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1748 if (rv != KERN_SUCCESS) 1749 return (EFAULT); 1750 1751 return (0); 1752 } 1753 1754 static int 1755 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1756 { 1757 struct vm *vm = vcpu->vm; 1758 int error, i; 1759 struct thread *td; 1760 1761 error = 0; 1762 td = curthread; 1763 1764 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1765 1766 /* 1767 * Wait until all 'active_cpus' have suspended themselves. 1768 * 1769 * Since a VM may be suspended at any time including when one or 1770 * more vcpus are doing a rendezvous we need to call the rendezvous 1771 * handler while we are waiting to prevent a deadlock. 1772 */ 1773 vcpu_lock(vcpu); 1774 while (error == 0) { 1775 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1776 break; 1777 1778 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1779 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1780 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1781 if (td_ast_pending(td, TDA_SUSPEND)) { 1782 vcpu_unlock(vcpu); 1783 error = thread_check_susp(td, false); 1784 vcpu_lock(vcpu); 1785 } 1786 } 1787 vcpu_unlock(vcpu); 1788 1789 /* 1790 * Wakeup the other sleeping vcpus and return to userspace. 1791 */ 1792 for (i = 0; i < vm->maxcpus; i++) { 1793 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1794 vcpu_notify_event(vm_vcpu(vm, i)); 1795 } 1796 } 1797 1798 *retu = true; 1799 return (error); 1800 } 1801 1802 int 1803 vm_run(struct vcpu *vcpu) 1804 { 1805 struct vm *vm = vcpu->vm; 1806 struct vm_eventinfo evinfo; 1807 int error, vcpuid; 1808 struct vm_exit *vme; 1809 bool retu; 1810 pmap_t pmap; 1811 1812 vcpuid = vcpu->vcpuid; 1813 1814 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1815 return (EINVAL); 1816 1817 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1818 return (EINVAL); 1819 1820 pmap = vmspace_pmap(vm->vmspace); 1821 vme = &vcpu->exitinfo; 1822 evinfo.rptr = NULL; 1823 evinfo.sptr = &vm->suspend; 1824 evinfo.iptr = NULL; 1825 restart: 1826 critical_enter(); 1827 1828 restore_guest_fpustate(vcpu); 1829 1830 vcpu_require_state(vcpu, VCPU_RUNNING); 1831 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1832 vcpu_require_state(vcpu, VCPU_FROZEN); 1833 1834 save_guest_fpustate(vcpu); 1835 1836 critical_exit(); 1837 1838 if (error == 0) { 1839 retu = false; 1840 switch (vme->exitcode) { 1841 case VM_EXITCODE_INST_EMUL: 1842 vcpu->nextpc = vme->pc + vme->inst_length; 1843 error = vm_handle_inst_emul(vcpu, &retu); 1844 break; 1845 1846 case VM_EXITCODE_REG_EMUL: 1847 vcpu->nextpc = vme->pc + vme->inst_length; 1848 error = vm_handle_reg_emul(vcpu, &retu); 1849 break; 1850 1851 case VM_EXITCODE_HVC: 1852 /* 1853 * The HVC instruction saves the address for the 1854 * next instruction as the return address. 1855 */ 1856 vcpu->nextpc = vme->pc; 1857 /* 1858 * The PSCI call can change the exit information in the 1859 * case of suspend/reset/poweroff/cpu off/cpu on. 1860 */ 1861 error = vm_handle_smccc_call(vcpu, vme, &retu); 1862 break; 1863 1864 case VM_EXITCODE_WFI: 1865 vcpu->nextpc = vme->pc + vme->inst_length; 1866 error = vm_handle_wfi(vcpu, vme, &retu); 1867 break; 1868 1869 case VM_EXITCODE_PAGING: 1870 vcpu->nextpc = vme->pc; 1871 error = vm_handle_paging(vcpu, &retu); 1872 break; 1873 1874 case VM_EXITCODE_SUSPENDED: 1875 vcpu->nextpc = vme->pc; 1876 error = vm_handle_suspend(vcpu, &retu); 1877 break; 1878 1879 default: 1880 /* Handle in userland */ 1881 vcpu->nextpc = vme->pc; 1882 retu = true; 1883 break; 1884 } 1885 } 1886 1887 if (error == 0 && retu == false) 1888 goto restart; 1889 1890 return (error); 1891 } 1892