1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_dev.h> 64 #include <machine/vmm_instruction_emul.h> 65 66 #include <dev/pci/pcireg.h> 67 68 #include "vmm_ktr.h" 69 #include "vmm_stat.h" 70 #include "arm64.h" 71 #include "mmu.h" 72 73 #include "io/vgic.h" 74 #include "io/vtimer.h" 75 76 struct vcpu { 77 int flags; 78 enum vcpu_state state; 79 struct mtx mtx; 80 int hostcpu; /* host cpuid this vcpu last ran on */ 81 int vcpuid; 82 void *stats; 83 struct vm_exit exitinfo; 84 uint64_t nextpc; /* (x) next instruction to execute */ 85 struct vm *vm; /* (o) */ 86 void *cookie; /* (i) cpu-specific data */ 87 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 88 }; 89 90 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 91 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 92 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 93 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 94 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 95 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 96 97 struct mem_seg { 98 uint64_t gpa; 99 size_t len; 100 bool wired; 101 bool sysmem; 102 vm_object_t object; 103 }; 104 #define VM_MAX_MEMSEGS 3 105 106 struct mem_map { 107 vm_paddr_t gpa; 108 size_t len; 109 vm_ooffset_t segoff; 110 int segid; 111 int prot; 112 int flags; 113 }; 114 #define VM_MAX_MEMMAPS 4 115 116 struct vmm_mmio_region { 117 uint64_t start; 118 uint64_t end; 119 mem_region_read_t read; 120 mem_region_write_t write; 121 }; 122 #define VM_MAX_MMIO_REGIONS 4 123 124 struct vmm_special_reg { 125 uint32_t esr_iss; 126 uint32_t esr_mask; 127 reg_read_t reg_read; 128 reg_write_t reg_write; 129 void *arg; 130 }; 131 #define VM_MAX_SPECIAL_REGS 16 132 133 /* 134 * Initialization: 135 * (o) initialized the first time the VM is created 136 * (i) initialized when VM is created and when it is reinitialized 137 * (x) initialized before use 138 */ 139 struct vm { 140 void *cookie; /* (i) cpu-specific data */ 141 volatile cpuset_t active_cpus; /* (i) active vcpus */ 142 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 143 int suspend; /* (i) stop VM execution */ 144 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 145 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 146 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 147 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 148 struct vmspace *vmspace; /* (o) guest's address space */ 149 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 150 struct vcpu **vcpu; /* (i) guest vcpus */ 151 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 152 /* (o) guest MMIO regions */ 153 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 154 /* The following describe the vm cpu topology */ 155 uint16_t sockets; /* (o) num of sockets */ 156 uint16_t cores; /* (o) num of cores/socket */ 157 uint16_t threads; /* (o) num of threads/core */ 158 uint16_t maxcpus; /* (o) max pluggable cpus */ 159 struct sx mem_segs_lock; /* (o) */ 160 struct sx vcpus_init_lock; /* (o) */ 161 }; 162 163 static bool vmm_initialized = false; 164 165 static int vm_handle_wfi(struct vcpu *vcpu, 166 struct vm_exit *vme, bool *retu); 167 168 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 169 170 /* statistics */ 171 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 172 173 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 174 175 static int vmm_ipinum; 176 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 177 "IPI vector used for vcpu notifications"); 178 179 struct vmm_regs { 180 uint64_t id_aa64afr0; 181 uint64_t id_aa64afr1; 182 uint64_t id_aa64dfr0; 183 uint64_t id_aa64dfr1; 184 uint64_t id_aa64isar0; 185 uint64_t id_aa64isar1; 186 uint64_t id_aa64isar2; 187 uint64_t id_aa64mmfr0; 188 uint64_t id_aa64mmfr1; 189 uint64_t id_aa64mmfr2; 190 uint64_t id_aa64pfr0; 191 uint64_t id_aa64pfr1; 192 }; 193 194 static const struct vmm_regs vmm_arch_regs_masks = { 195 .id_aa64dfr0 = 196 ID_AA64DFR0_CTX_CMPs_MASK | 197 ID_AA64DFR0_WRPs_MASK | 198 ID_AA64DFR0_BRPs_MASK | 199 ID_AA64DFR0_PMUVer_3 | 200 ID_AA64DFR0_DebugVer_8, 201 .id_aa64isar0 = 202 ID_AA64ISAR0_TLB_TLBIOSR | 203 ID_AA64ISAR0_SHA3_IMPL | 204 ID_AA64ISAR0_RDM_IMPL | 205 ID_AA64ISAR0_Atomic_IMPL | 206 ID_AA64ISAR0_CRC32_BASE | 207 ID_AA64ISAR0_SHA2_512 | 208 ID_AA64ISAR0_SHA1_BASE | 209 ID_AA64ISAR0_AES_PMULL, 210 .id_aa64mmfr0 = 211 ID_AA64MMFR0_TGran4_IMPL | 212 ID_AA64MMFR0_TGran64_IMPL | 213 ID_AA64MMFR0_TGran16_IMPL | 214 ID_AA64MMFR0_ASIDBits_16 | 215 ID_AA64MMFR0_PARange_4P, 216 .id_aa64mmfr1 = 217 ID_AA64MMFR1_SpecSEI_IMPL | 218 ID_AA64MMFR1_PAN_ATS1E1 | 219 ID_AA64MMFR1_HAFDBS_AF, 220 .id_aa64pfr0 = 221 ID_AA64PFR0_GIC_CPUIF_NONE | 222 ID_AA64PFR0_AdvSIMD_HP | 223 ID_AA64PFR0_FP_HP | 224 ID_AA64PFR0_EL3_64 | 225 ID_AA64PFR0_EL2_64 | 226 ID_AA64PFR0_EL1_64 | 227 ID_AA64PFR0_EL0_64, 228 }; 229 230 /* Host registers masked by vmm_arch_regs_masks. */ 231 static struct vmm_regs vmm_arch_regs; 232 233 u_int vm_maxcpu; 234 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 235 &vm_maxcpu, 0, "Maximum number of vCPUs"); 236 237 static void vm_free_memmap(struct vm *vm, int ident); 238 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 239 static void vcpu_notify_event_locked(struct vcpu *vcpu); 240 241 /* 242 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 243 * is a safe value for now. 244 */ 245 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 246 247 static int 248 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 249 { 250 #define _FETCH_KERN_REG(reg, field) do { \ 251 regs->field = vmm_arch_regs_masks.field; \ 252 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ 253 regs->field = 0; \ 254 } while (0) 255 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 256 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 257 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 258 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 259 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 260 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 261 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 262 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 263 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 264 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 265 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 266 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 267 #undef _FETCH_KERN_REG 268 return (0); 269 } 270 271 static void 272 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 273 { 274 vmmops_vcpu_cleanup(vcpu->cookie); 275 vcpu->cookie = NULL; 276 if (destroy) { 277 vmm_stat_free(vcpu->stats); 278 fpu_save_area_free(vcpu->guestfpu); 279 vcpu_lock_destroy(vcpu); 280 } 281 } 282 283 static struct vcpu * 284 vcpu_alloc(struct vm *vm, int vcpu_id) 285 { 286 struct vcpu *vcpu; 287 288 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 289 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 290 291 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 292 vcpu_lock_init(vcpu); 293 vcpu->state = VCPU_IDLE; 294 vcpu->hostcpu = NOCPU; 295 vcpu->vcpuid = vcpu_id; 296 vcpu->vm = vm; 297 vcpu->guestfpu = fpu_save_area_alloc(); 298 vcpu->stats = vmm_stat_alloc(); 299 return (vcpu); 300 } 301 302 static void 303 vcpu_init(struct vcpu *vcpu) 304 { 305 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 306 MPASS(vcpu->cookie != NULL); 307 fpu_save_area_reset(vcpu->guestfpu); 308 vmm_stat_init(vcpu->stats); 309 } 310 311 struct vm_exit * 312 vm_exitinfo(struct vcpu *vcpu) 313 { 314 return (&vcpu->exitinfo); 315 } 316 317 static int 318 vmm_init(void) 319 { 320 int error; 321 322 vm_maxcpu = mp_ncpus; 323 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 324 325 if (vm_maxcpu > VM_MAXCPU) { 326 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 327 vm_maxcpu = VM_MAXCPU; 328 } 329 if (vm_maxcpu == 0) 330 vm_maxcpu = 1; 331 332 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 333 if (error != 0) 334 return (error); 335 336 return (vmmops_modinit(0)); 337 } 338 339 static int 340 vmm_handler(module_t mod, int what, void *arg) 341 { 342 int error; 343 344 switch (what) { 345 case MOD_LOAD: 346 /* TODO: if (vmm_is_hw_supported()) { */ 347 vmmdev_init(); 348 error = vmm_init(); 349 if (error == 0) 350 vmm_initialized = true; 351 break; 352 case MOD_UNLOAD: 353 /* TODO: if (vmm_is_hw_supported()) { */ 354 error = vmmdev_cleanup(); 355 if (error == 0 && vmm_initialized) { 356 error = vmmops_modcleanup(); 357 if (error) 358 vmm_initialized = false; 359 } 360 break; 361 default: 362 error = 0; 363 break; 364 } 365 return (error); 366 } 367 368 static moduledata_t vmm_kmod = { 369 "vmm", 370 vmm_handler, 371 NULL 372 }; 373 374 /* 375 * vmm initialization has the following dependencies: 376 * 377 * - HYP initialization requires smp_rendezvous() and therefore must happen 378 * after SMP is fully functional (after SI_SUB_SMP). 379 */ 380 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 381 MODULE_VERSION(vmm, 1); 382 383 static void 384 vm_init(struct vm *vm, bool create) 385 { 386 int i; 387 388 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 389 MPASS(vm->cookie != NULL); 390 391 CPU_ZERO(&vm->active_cpus); 392 CPU_ZERO(&vm->debug_cpus); 393 394 vm->suspend = 0; 395 CPU_ZERO(&vm->suspended_cpus); 396 397 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 398 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 399 400 if (!create) { 401 for (i = 0; i < vm->maxcpus; i++) { 402 if (vm->vcpu[i] != NULL) 403 vcpu_init(vm->vcpu[i]); 404 } 405 } 406 } 407 408 struct vcpu * 409 vm_alloc_vcpu(struct vm *vm, int vcpuid) 410 { 411 struct vcpu *vcpu; 412 413 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 414 return (NULL); 415 416 /* Some interrupt controllers may have a CPU limit */ 417 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 418 return (NULL); 419 420 vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); 421 if (__predict_true(vcpu != NULL)) 422 return (vcpu); 423 424 sx_xlock(&vm->vcpus_init_lock); 425 vcpu = vm->vcpu[vcpuid]; 426 if (vcpu == NULL/* && !vm->dying*/) { 427 vcpu = vcpu_alloc(vm, vcpuid); 428 vcpu_init(vcpu); 429 430 /* 431 * Ensure vCPU is fully created before updating pointer 432 * to permit unlocked reads above. 433 */ 434 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 435 (uintptr_t)vcpu); 436 } 437 sx_xunlock(&vm->vcpus_init_lock); 438 return (vcpu); 439 } 440 441 void 442 vm_slock_vcpus(struct vm *vm) 443 { 444 sx_slock(&vm->vcpus_init_lock); 445 } 446 447 void 448 vm_unlock_vcpus(struct vm *vm) 449 { 450 sx_unlock(&vm->vcpus_init_lock); 451 } 452 453 int 454 vm_create(const char *name, struct vm **retvm) 455 { 456 struct vm *vm; 457 struct vmspace *vmspace; 458 459 /* 460 * If vmm.ko could not be successfully initialized then don't attempt 461 * to create the virtual machine. 462 */ 463 if (!vmm_initialized) 464 return (ENXIO); 465 466 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 467 return (EINVAL); 468 469 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 470 if (vmspace == NULL) 471 return (ENOMEM); 472 473 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 474 strcpy(vm->name, name); 475 vm->vmspace = vmspace; 476 sx_init(&vm->mem_segs_lock, "vm mem_segs"); 477 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 478 479 vm->sockets = 1; 480 vm->cores = 1; /* XXX backwards compatibility */ 481 vm->threads = 1; /* XXX backwards compatibility */ 482 vm->maxcpus = vm_maxcpu; 483 484 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 485 M_WAITOK | M_ZERO); 486 487 vm_init(vm, true); 488 489 *retvm = vm; 490 return (0); 491 } 492 493 void 494 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 495 uint16_t *threads, uint16_t *maxcpus) 496 { 497 *sockets = vm->sockets; 498 *cores = vm->cores; 499 *threads = vm->threads; 500 *maxcpus = vm->maxcpus; 501 } 502 503 uint16_t 504 vm_get_maxcpus(struct vm *vm) 505 { 506 return (vm->maxcpus); 507 } 508 509 int 510 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 511 uint16_t threads, uint16_t maxcpus) 512 { 513 /* Ignore maxcpus. */ 514 if ((sockets * cores * threads) > vm->maxcpus) 515 return (EINVAL); 516 vm->sockets = sockets; 517 vm->cores = cores; 518 vm->threads = threads; 519 return(0); 520 } 521 522 static void 523 vm_cleanup(struct vm *vm, bool destroy) 524 { 525 struct mem_map *mm; 526 pmap_t pmap __diagused; 527 int i; 528 529 if (destroy) { 530 pmap = vmspace_pmap(vm->vmspace); 531 sched_pin(); 532 PCPU_SET(curvmpmap, NULL); 533 sched_unpin(); 534 CPU_FOREACH(i) { 535 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 536 } 537 } 538 539 vgic_detach_from_vm(vm->cookie); 540 541 for (i = 0; i < vm->maxcpus; i++) { 542 if (vm->vcpu[i] != NULL) 543 vcpu_cleanup(vm->vcpu[i], destroy); 544 } 545 546 vmmops_cleanup(vm->cookie); 547 548 /* 549 * System memory is removed from the guest address space only when 550 * the VM is destroyed. This is because the mapping remains the same 551 * across VM reset. 552 * 553 * Device memory can be relocated by the guest (e.g. using PCI BARs) 554 * so those mappings are removed on a VM reset. 555 */ 556 if (!destroy) { 557 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 558 mm = &vm->mem_maps[i]; 559 if (destroy || !sysmem_mapping(vm, mm)) 560 vm_free_memmap(vm, i); 561 } 562 } 563 564 if (destroy) { 565 for (i = 0; i < VM_MAX_MEMSEGS; i++) 566 vm_free_memseg(vm, i); 567 568 vmmops_vmspace_free(vm->vmspace); 569 vm->vmspace = NULL; 570 571 for (i = 0; i < vm->maxcpus; i++) 572 free(vm->vcpu[i], M_VMM); 573 free(vm->vcpu, M_VMM); 574 sx_destroy(&vm->vcpus_init_lock); 575 sx_destroy(&vm->mem_segs_lock); 576 } 577 } 578 579 void 580 vm_destroy(struct vm *vm) 581 { 582 vm_cleanup(vm, true); 583 free(vm, M_VMM); 584 } 585 586 int 587 vm_reinit(struct vm *vm) 588 { 589 int error; 590 591 /* 592 * A virtual machine can be reset only if all vcpus are suspended. 593 */ 594 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 595 vm_cleanup(vm, false); 596 vm_init(vm, false); 597 error = 0; 598 } else { 599 error = EBUSY; 600 } 601 602 return (error); 603 } 604 605 const char * 606 vm_name(struct vm *vm) 607 { 608 return (vm->name); 609 } 610 611 void 612 vm_slock_memsegs(struct vm *vm) 613 { 614 sx_slock(&vm->mem_segs_lock); 615 } 616 617 void 618 vm_xlock_memsegs(struct vm *vm) 619 { 620 sx_xlock(&vm->mem_segs_lock); 621 } 622 623 void 624 vm_unlock_memsegs(struct vm *vm) 625 { 626 sx_unlock(&vm->mem_segs_lock); 627 } 628 629 /* 630 * Return 'true' if 'gpa' is allocated in the guest address space. 631 * 632 * This function is called in the context of a running vcpu which acts as 633 * an implicit lock on 'vm->mem_maps[]'. 634 */ 635 bool 636 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 637 { 638 struct vm *vm = vcpu->vm; 639 struct mem_map *mm; 640 int i; 641 642 #ifdef INVARIANTS 643 int hostcpu, state; 644 state = vcpu_get_state(vcpu, &hostcpu); 645 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 646 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 647 #endif 648 649 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 650 mm = &vm->mem_maps[i]; 651 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 652 return (true); /* 'gpa' is sysmem or devmem */ 653 } 654 655 return (false); 656 } 657 658 int 659 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 660 { 661 struct mem_seg *seg; 662 vm_object_t obj; 663 664 sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 665 666 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 667 return (EINVAL); 668 669 if (len == 0 || (len & PAGE_MASK)) 670 return (EINVAL); 671 672 seg = &vm->mem_segs[ident]; 673 if (seg->object != NULL) { 674 if (seg->len == len && seg->sysmem == sysmem) 675 return (EEXIST); 676 else 677 return (EINVAL); 678 } 679 680 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 681 if (obj == NULL) 682 return (ENOMEM); 683 684 seg->len = len; 685 seg->object = obj; 686 seg->sysmem = sysmem; 687 return (0); 688 } 689 690 int 691 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 692 vm_object_t *objptr) 693 { 694 struct mem_seg *seg; 695 696 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 697 698 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 699 return (EINVAL); 700 701 seg = &vm->mem_segs[ident]; 702 if (len) 703 *len = seg->len; 704 if (sysmem) 705 *sysmem = seg->sysmem; 706 if (objptr) 707 *objptr = seg->object; 708 return (0); 709 } 710 711 void 712 vm_free_memseg(struct vm *vm, int ident) 713 { 714 struct mem_seg *seg; 715 716 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 717 ("%s: invalid memseg ident %d", __func__, ident)); 718 719 seg = &vm->mem_segs[ident]; 720 if (seg->object != NULL) { 721 vm_object_deallocate(seg->object); 722 bzero(seg, sizeof(struct mem_seg)); 723 } 724 } 725 726 int 727 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 728 size_t len, int prot, int flags) 729 { 730 struct mem_seg *seg; 731 struct mem_map *m, *map; 732 vm_ooffset_t last; 733 int i, error; 734 735 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 736 return (EINVAL); 737 738 if (flags & ~VM_MEMMAP_F_WIRED) 739 return (EINVAL); 740 741 if (segid < 0 || segid >= VM_MAX_MEMSEGS) 742 return (EINVAL); 743 744 seg = &vm->mem_segs[segid]; 745 if (seg->object == NULL) 746 return (EINVAL); 747 748 last = first + len; 749 if (first < 0 || first >= last || last > seg->len) 750 return (EINVAL); 751 752 if ((gpa | first | last) & PAGE_MASK) 753 return (EINVAL); 754 755 map = NULL; 756 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 757 m = &vm->mem_maps[i]; 758 if (m->len == 0) { 759 map = m; 760 break; 761 } 762 } 763 764 if (map == NULL) 765 return (ENOSPC); 766 767 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 768 len, 0, VMFS_NO_SPACE, prot, prot, 0); 769 if (error != KERN_SUCCESS) 770 return (EFAULT); 771 772 vm_object_reference(seg->object); 773 774 if (flags & VM_MEMMAP_F_WIRED) { 775 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 776 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 777 if (error != KERN_SUCCESS) { 778 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 779 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 780 EFAULT); 781 } 782 } 783 784 map->gpa = gpa; 785 map->len = len; 786 map->segoff = first; 787 map->segid = segid; 788 map->prot = prot; 789 map->flags = flags; 790 return (0); 791 } 792 793 int 794 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 795 { 796 struct mem_map *m; 797 int i; 798 799 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 800 m = &vm->mem_maps[i]; 801 if (m->gpa == gpa && m->len == len) { 802 vm_free_memmap(vm, i); 803 return (0); 804 } 805 } 806 807 return (EINVAL); 808 } 809 810 int 811 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 812 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 813 { 814 struct mem_map *mm, *mmnext; 815 int i; 816 817 mmnext = NULL; 818 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 819 mm = &vm->mem_maps[i]; 820 if (mm->len == 0 || mm->gpa < *gpa) 821 continue; 822 if (mmnext == NULL || mm->gpa < mmnext->gpa) 823 mmnext = mm; 824 } 825 826 if (mmnext != NULL) { 827 *gpa = mmnext->gpa; 828 if (segid) 829 *segid = mmnext->segid; 830 if (segoff) 831 *segoff = mmnext->segoff; 832 if (len) 833 *len = mmnext->len; 834 if (prot) 835 *prot = mmnext->prot; 836 if (flags) 837 *flags = mmnext->flags; 838 return (0); 839 } else { 840 return (ENOENT); 841 } 842 } 843 844 static void 845 vm_free_memmap(struct vm *vm, int ident) 846 { 847 struct mem_map *mm; 848 int error __diagused; 849 850 mm = &vm->mem_maps[ident]; 851 if (mm->len) { 852 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 853 mm->gpa + mm->len); 854 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 855 __func__, error)); 856 bzero(mm, sizeof(struct mem_map)); 857 } 858 } 859 860 static __inline bool 861 sysmem_mapping(struct vm *vm, struct mem_map *mm) 862 { 863 864 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 865 return (true); 866 else 867 return (false); 868 } 869 870 vm_paddr_t 871 vmm_sysmem_maxaddr(struct vm *vm) 872 { 873 struct mem_map *mm; 874 vm_paddr_t maxaddr; 875 int i; 876 877 maxaddr = 0; 878 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 879 mm = &vm->mem_maps[i]; 880 if (sysmem_mapping(vm, mm)) { 881 if (maxaddr < mm->gpa + mm->len) 882 maxaddr = mm->gpa + mm->len; 883 } 884 } 885 return (maxaddr); 886 } 887 888 int 889 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 890 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 891 { 892 893 vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 894 return (0); 895 } 896 897 static int 898 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 899 { 900 *rval = 0; 901 return (0); 902 } 903 904 static int 905 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 906 { 907 *rval = *(uint64_t *)arg; 908 return (0); 909 } 910 911 static int 912 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 913 { 914 return (0); 915 } 916 917 static const struct vmm_special_reg vmm_special_regs[] = { 918 #define SPECIAL_REG(_reg, _read, _write) \ 919 { \ 920 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 921 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 922 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 923 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 924 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 925 .esr_mask = ISS_MSR_REG_MASK, \ 926 .reg_read = (_read), \ 927 .reg_write = (_write), \ 928 .arg = NULL, \ 929 } 930 #define ID_SPECIAL_REG(_reg, _name) \ 931 { \ 932 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 933 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 934 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 935 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 936 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 937 .esr_mask = ISS_MSR_REG_MASK, \ 938 .reg_read = vmm_reg_read_arg, \ 939 .reg_write = vmm_reg_wi, \ 940 .arg = &(vmm_arch_regs._name), \ 941 } 942 943 /* ID registers */ 944 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 945 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 946 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 947 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 948 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 949 950 /* 951 * All other ID registers are read as zero. 952 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 953 */ 954 { 955 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 956 (0 << ISS_MSR_OP1_SHIFT) | 957 (0 << ISS_MSR_CRn_SHIFT) | 958 (0 << ISS_MSR_CRm_SHIFT), 959 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 960 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 961 .reg_read = vmm_reg_raz, 962 .reg_write = vmm_reg_wi, 963 .arg = NULL, 964 }, 965 966 /* Counter physical registers */ 967 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 968 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 969 vtimer_phys_cval_write), 970 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 971 vtimer_phys_tval_write), 972 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 973 #undef SPECIAL_REG 974 }; 975 976 void 977 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 978 reg_read_t reg_read, reg_write_t reg_write, void *arg) 979 { 980 int i; 981 982 for (i = 0; i < nitems(vm->special_reg); i++) { 983 if (vm->special_reg[i].esr_iss == 0 && 984 vm->special_reg[i].esr_mask == 0) { 985 vm->special_reg[i].esr_iss = iss; 986 vm->special_reg[i].esr_mask = mask; 987 vm->special_reg[i].reg_read = reg_read; 988 vm->special_reg[i].reg_write = reg_write; 989 vm->special_reg[i].arg = arg; 990 return; 991 } 992 } 993 994 panic("%s: No free special register slot", __func__); 995 } 996 997 void 998 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 999 { 1000 int i; 1001 1002 for (i = 0; i < nitems(vm->special_reg); i++) { 1003 if (vm->special_reg[i].esr_iss == iss && 1004 vm->special_reg[i].esr_mask == mask) { 1005 memset(&vm->special_reg[i], 0, 1006 sizeof(vm->special_reg[i])); 1007 return; 1008 } 1009 } 1010 1011 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 1012 mask); 1013 } 1014 1015 static int 1016 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 1017 { 1018 struct vm *vm; 1019 struct vm_exit *vme; 1020 struct vre *vre; 1021 int i, rv; 1022 1023 vm = vcpu->vm; 1024 vme = &vcpu->exitinfo; 1025 vre = &vme->u.reg_emul.vre; 1026 1027 for (i = 0; i < nitems(vm->special_reg); i++) { 1028 if (vm->special_reg[i].esr_iss == 0 && 1029 vm->special_reg[i].esr_mask == 0) 1030 continue; 1031 1032 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 1033 vm->special_reg[i].esr_iss) { 1034 rv = vmm_emulate_register(vcpu, vre, 1035 vm->special_reg[i].reg_read, 1036 vm->special_reg[i].reg_write, 1037 vm->special_reg[i].arg); 1038 if (rv == 0) { 1039 *retu = false; 1040 } 1041 return (rv); 1042 } 1043 } 1044 for (i = 0; i < nitems(vmm_special_regs); i++) { 1045 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 1046 vmm_special_regs[i].esr_iss) { 1047 rv = vmm_emulate_register(vcpu, vre, 1048 vmm_special_regs[i].reg_read, 1049 vmm_special_regs[i].reg_write, 1050 vmm_special_regs[i].arg); 1051 if (rv == 0) { 1052 *retu = false; 1053 } 1054 return (rv); 1055 } 1056 } 1057 1058 1059 *retu = true; 1060 return (0); 1061 } 1062 1063 void 1064 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 1065 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 1066 { 1067 int i; 1068 1069 for (i = 0; i < nitems(vm->mmio_region); i++) { 1070 if (vm->mmio_region[i].start == 0 && 1071 vm->mmio_region[i].end == 0) { 1072 vm->mmio_region[i].start = start; 1073 vm->mmio_region[i].end = start + size; 1074 vm->mmio_region[i].read = mmio_read; 1075 vm->mmio_region[i].write = mmio_write; 1076 return; 1077 } 1078 } 1079 1080 panic("%s: No free MMIO region", __func__); 1081 } 1082 1083 void 1084 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 1085 { 1086 int i; 1087 1088 for (i = 0; i < nitems(vm->mmio_region); i++) { 1089 if (vm->mmio_region[i].start == start && 1090 vm->mmio_region[i].end == start + size) { 1091 memset(&vm->mmio_region[i], 0, 1092 sizeof(vm->mmio_region[i])); 1093 return; 1094 } 1095 } 1096 1097 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 1098 start + size); 1099 } 1100 1101 static int 1102 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 1103 { 1104 struct vm *vm; 1105 struct vm_exit *vme; 1106 struct vie *vie; 1107 struct hyp *hyp; 1108 uint64_t fault_ipa; 1109 struct vm_guest_paging *paging; 1110 struct vmm_mmio_region *vmr; 1111 int error, i; 1112 1113 vm = vcpu->vm; 1114 hyp = vm->cookie; 1115 if (!hyp->vgic_attached) 1116 goto out_user; 1117 1118 vme = &vcpu->exitinfo; 1119 vie = &vme->u.inst_emul.vie; 1120 paging = &vme->u.inst_emul.paging; 1121 1122 fault_ipa = vme->u.inst_emul.gpa; 1123 1124 vmr = NULL; 1125 for (i = 0; i < nitems(vm->mmio_region); i++) { 1126 if (vm->mmio_region[i].start <= fault_ipa && 1127 vm->mmio_region[i].end > fault_ipa) { 1128 vmr = &vm->mmio_region[i]; 1129 break; 1130 } 1131 } 1132 if (vmr == NULL) 1133 goto out_user; 1134 1135 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 1136 vmr->read, vmr->write, retu); 1137 return (error); 1138 1139 out_user: 1140 *retu = true; 1141 return (0); 1142 } 1143 1144 int 1145 vm_suspend(struct vm *vm, enum vm_suspend_how how) 1146 { 1147 int i; 1148 1149 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1150 return (EINVAL); 1151 1152 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1153 VM_CTR2(vm, "virtual machine already suspended %d/%d", 1154 vm->suspend, how); 1155 return (EALREADY); 1156 } 1157 1158 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1159 1160 /* 1161 * Notify all active vcpus that they are now suspended. 1162 */ 1163 for (i = 0; i < vm->maxcpus; i++) { 1164 if (CPU_ISSET(i, &vm->active_cpus)) 1165 vcpu_notify_event(vm_vcpu(vm, i)); 1166 } 1167 1168 return (0); 1169 } 1170 1171 void 1172 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 1173 { 1174 struct vm *vm = vcpu->vm; 1175 struct vm_exit *vmexit; 1176 1177 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1178 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1179 1180 vmexit = vm_exitinfo(vcpu); 1181 vmexit->pc = pc; 1182 vmexit->inst_length = 4; 1183 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1184 vmexit->u.suspended.how = vm->suspend; 1185 } 1186 1187 void 1188 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 1189 { 1190 struct vm_exit *vmexit; 1191 1192 vmexit = vm_exitinfo(vcpu); 1193 vmexit->pc = pc; 1194 vmexit->inst_length = 4; 1195 vmexit->exitcode = VM_EXITCODE_DEBUG; 1196 } 1197 1198 int 1199 vm_activate_cpu(struct vcpu *vcpu) 1200 { 1201 struct vm *vm = vcpu->vm; 1202 1203 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1204 return (EBUSY); 1205 1206 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 1207 return (0); 1208 1209 } 1210 1211 int 1212 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 1213 { 1214 if (vcpu == NULL) { 1215 vm->debug_cpus = vm->active_cpus; 1216 for (int i = 0; i < vm->maxcpus; i++) { 1217 if (CPU_ISSET(i, &vm->active_cpus)) 1218 vcpu_notify_event(vm_vcpu(vm, i)); 1219 } 1220 } else { 1221 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1222 return (EINVAL); 1223 1224 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1225 vcpu_notify_event(vcpu); 1226 } 1227 return (0); 1228 } 1229 1230 int 1231 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 1232 { 1233 1234 if (vcpu == NULL) { 1235 CPU_ZERO(&vm->debug_cpus); 1236 } else { 1237 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1238 return (EINVAL); 1239 1240 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1241 } 1242 return (0); 1243 } 1244 1245 int 1246 vcpu_debugged(struct vcpu *vcpu) 1247 { 1248 1249 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1250 } 1251 1252 cpuset_t 1253 vm_active_cpus(struct vm *vm) 1254 { 1255 1256 return (vm->active_cpus); 1257 } 1258 1259 cpuset_t 1260 vm_debug_cpus(struct vm *vm) 1261 { 1262 1263 return (vm->debug_cpus); 1264 } 1265 1266 cpuset_t 1267 vm_suspended_cpus(struct vm *vm) 1268 { 1269 1270 return (vm->suspended_cpus); 1271 } 1272 1273 1274 void * 1275 vcpu_stats(struct vcpu *vcpu) 1276 { 1277 1278 return (vcpu->stats); 1279 } 1280 1281 /* 1282 * This function is called to ensure that a vcpu "sees" a pending event 1283 * as soon as possible: 1284 * - If the vcpu thread is sleeping then it is woken up. 1285 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1286 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1287 */ 1288 static void 1289 vcpu_notify_event_locked(struct vcpu *vcpu) 1290 { 1291 int hostcpu; 1292 1293 hostcpu = vcpu->hostcpu; 1294 if (vcpu->state == VCPU_RUNNING) { 1295 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1296 if (hostcpu != curcpu) { 1297 ipi_cpu(hostcpu, vmm_ipinum); 1298 } else { 1299 /* 1300 * If the 'vcpu' is running on 'curcpu' then it must 1301 * be sending a notification to itself (e.g. SELF_IPI). 1302 * The pending event will be picked up when the vcpu 1303 * transitions back to guest context. 1304 */ 1305 } 1306 } else { 1307 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1308 "with hostcpu %d", vcpu->state, hostcpu)); 1309 if (vcpu->state == VCPU_SLEEPING) 1310 wakeup_one(vcpu); 1311 } 1312 } 1313 1314 void 1315 vcpu_notify_event(struct vcpu *vcpu) 1316 { 1317 vcpu_lock(vcpu); 1318 vcpu_notify_event_locked(vcpu); 1319 vcpu_unlock(vcpu); 1320 } 1321 1322 static void 1323 restore_guest_fpustate(struct vcpu *vcpu) 1324 { 1325 1326 /* flush host state to the pcb */ 1327 vfp_save_state(curthread, curthread->td_pcb); 1328 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1329 PCPU_SET(fpcurthread, NULL); 1330 1331 /* restore guest FPU state */ 1332 vfp_enable(); 1333 vfp_restore(vcpu->guestfpu); 1334 1335 /* 1336 * The FPU is now "dirty" with the guest's state so turn on emulation 1337 * to trap any access to the FPU by the host. 1338 */ 1339 vfp_disable(); 1340 } 1341 1342 static void 1343 save_guest_fpustate(struct vcpu *vcpu) 1344 { 1345 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1346 CPACR_FPEN_TRAP_ALL1) 1347 panic("VFP not enabled in host!"); 1348 1349 /* save guest FPU state */ 1350 vfp_enable(); 1351 vfp_store(vcpu->guestfpu); 1352 vfp_disable(); 1353 1354 KASSERT(PCPU_GET(fpcurthread) == NULL, 1355 ("%s: fpcurthread set with guest registers", __func__)); 1356 } 1357 static int 1358 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1359 bool from_idle) 1360 { 1361 int error; 1362 1363 vcpu_assert_locked(vcpu); 1364 1365 /* 1366 * State transitions from the vmmdev_ioctl() must always begin from 1367 * the VCPU_IDLE state. This guarantees that there is only a single 1368 * ioctl() operating on a vcpu at any point. 1369 */ 1370 if (from_idle) { 1371 while (vcpu->state != VCPU_IDLE) { 1372 vcpu_notify_event_locked(vcpu); 1373 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1374 } 1375 } else { 1376 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1377 "vcpu idle state")); 1378 } 1379 1380 if (vcpu->state == VCPU_RUNNING) { 1381 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1382 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1383 } else { 1384 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1385 "vcpu that is not running", vcpu->hostcpu)); 1386 } 1387 1388 /* 1389 * The following state transitions are allowed: 1390 * IDLE -> FROZEN -> IDLE 1391 * FROZEN -> RUNNING -> FROZEN 1392 * FROZEN -> SLEEPING -> FROZEN 1393 */ 1394 switch (vcpu->state) { 1395 case VCPU_IDLE: 1396 case VCPU_RUNNING: 1397 case VCPU_SLEEPING: 1398 error = (newstate != VCPU_FROZEN); 1399 break; 1400 case VCPU_FROZEN: 1401 error = (newstate == VCPU_FROZEN); 1402 break; 1403 default: 1404 error = 1; 1405 break; 1406 } 1407 1408 if (error) 1409 return (EBUSY); 1410 1411 vcpu->state = newstate; 1412 if (newstate == VCPU_RUNNING) 1413 vcpu->hostcpu = curcpu; 1414 else 1415 vcpu->hostcpu = NOCPU; 1416 1417 if (newstate == VCPU_IDLE) 1418 wakeup(&vcpu->state); 1419 1420 return (0); 1421 } 1422 1423 static void 1424 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1425 { 1426 int error; 1427 1428 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1429 panic("Error %d setting state to %d\n", error, newstate); 1430 } 1431 1432 static void 1433 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1434 { 1435 int error; 1436 1437 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1438 panic("Error %d setting state to %d", error, newstate); 1439 } 1440 1441 int 1442 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1443 { 1444 if (type < 0 || type >= VM_CAP_MAX) 1445 return (EINVAL); 1446 1447 return (vmmops_getcap(vcpu->cookie, type, retval)); 1448 } 1449 1450 int 1451 vm_set_capability(struct vcpu *vcpu, int type, int val) 1452 { 1453 if (type < 0 || type >= VM_CAP_MAX) 1454 return (EINVAL); 1455 1456 return (vmmops_setcap(vcpu->cookie, type, val)); 1457 } 1458 1459 struct vm * 1460 vcpu_vm(struct vcpu *vcpu) 1461 { 1462 return (vcpu->vm); 1463 } 1464 1465 int 1466 vcpu_vcpuid(struct vcpu *vcpu) 1467 { 1468 return (vcpu->vcpuid); 1469 } 1470 1471 void * 1472 vcpu_get_cookie(struct vcpu *vcpu) 1473 { 1474 return (vcpu->cookie); 1475 } 1476 1477 struct vcpu * 1478 vm_vcpu(struct vm *vm, int vcpuid) 1479 { 1480 return (vm->vcpu[vcpuid]); 1481 } 1482 1483 int 1484 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1485 { 1486 int error; 1487 1488 vcpu_lock(vcpu); 1489 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1490 vcpu_unlock(vcpu); 1491 1492 return (error); 1493 } 1494 1495 enum vcpu_state 1496 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1497 { 1498 enum vcpu_state state; 1499 1500 vcpu_lock(vcpu); 1501 state = vcpu->state; 1502 if (hostcpu != NULL) 1503 *hostcpu = vcpu->hostcpu; 1504 vcpu_unlock(vcpu); 1505 1506 return (state); 1507 } 1508 1509 static void * 1510 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1511 void **cookie) 1512 { 1513 int i, count, pageoff; 1514 struct mem_map *mm; 1515 vm_page_t m; 1516 1517 pageoff = gpa & PAGE_MASK; 1518 if (len > PAGE_SIZE - pageoff) 1519 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1520 1521 count = 0; 1522 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1523 mm = &vm->mem_maps[i]; 1524 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1525 gpa < mm->gpa + mm->len) { 1526 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1527 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1528 break; 1529 } 1530 } 1531 1532 if (count == 1) { 1533 *cookie = m; 1534 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1535 } else { 1536 *cookie = NULL; 1537 return (NULL); 1538 } 1539 } 1540 1541 void * 1542 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1543 void **cookie) 1544 { 1545 #ifdef INVARIANTS 1546 /* 1547 * The current vcpu should be frozen to ensure 'vm_memmap[]' 1548 * stability. 1549 */ 1550 int state = vcpu_get_state(vcpu, NULL); 1551 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1552 __func__, state)); 1553 #endif 1554 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1555 } 1556 1557 void * 1558 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1559 void **cookie) 1560 { 1561 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1562 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1563 } 1564 1565 void 1566 vm_gpa_release(void *cookie) 1567 { 1568 vm_page_t m = cookie; 1569 1570 vm_page_unwire(m, PQ_ACTIVE); 1571 } 1572 1573 int 1574 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1575 { 1576 1577 if (reg >= VM_REG_LAST) 1578 return (EINVAL); 1579 1580 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1581 } 1582 1583 int 1584 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1585 { 1586 int error; 1587 1588 if (reg >= VM_REG_LAST) 1589 return (EINVAL); 1590 error = vmmops_setreg(vcpu->cookie, reg, val); 1591 if (error || reg != VM_REG_GUEST_PC) 1592 return (error); 1593 1594 vcpu->nextpc = val; 1595 1596 return (0); 1597 } 1598 1599 void * 1600 vm_get_cookie(struct vm *vm) 1601 { 1602 return (vm->cookie); 1603 } 1604 1605 int 1606 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1607 { 1608 return (vmmops_exception(vcpu->cookie, esr, far)); 1609 } 1610 1611 int 1612 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1613 { 1614 return (vgic_attach_to_vm(vm->cookie, descr)); 1615 } 1616 1617 int 1618 vm_assert_irq(struct vm *vm, uint32_t irq) 1619 { 1620 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1621 } 1622 1623 int 1624 vm_deassert_irq(struct vm *vm, uint32_t irq) 1625 { 1626 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1627 } 1628 1629 int 1630 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1631 int func) 1632 { 1633 /* TODO: Should we raise an SError? */ 1634 return (vgic_inject_msi(vm->cookie, msg, addr)); 1635 } 1636 1637 static int 1638 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1639 { 1640 struct hypctx *hypctx; 1641 int i; 1642 1643 hypctx = vcpu_get_cookie(vcpu); 1644 1645 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1646 return (1); 1647 1648 vme->exitcode = VM_EXITCODE_SMCCC; 1649 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1650 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1651 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1652 1653 *retu = true; 1654 return (0); 1655 } 1656 1657 static int 1658 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1659 { 1660 vcpu_lock(vcpu); 1661 while (1) { 1662 if (vgic_has_pending_irq(vcpu->cookie)) 1663 break; 1664 1665 if (vcpu_should_yield(vcpu)) 1666 break; 1667 1668 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1669 /* 1670 * XXX msleep_spin() cannot be interrupted by signals so 1671 * wake up periodically to check pending signals. 1672 */ 1673 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1674 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1675 } 1676 vcpu_unlock(vcpu); 1677 1678 *retu = false; 1679 return (0); 1680 } 1681 1682 static int 1683 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1684 { 1685 struct vm *vm = vcpu->vm; 1686 struct vm_exit *vme; 1687 struct vm_map *map; 1688 uint64_t addr, esr; 1689 pmap_t pmap; 1690 int ftype, rv; 1691 1692 vme = &vcpu->exitinfo; 1693 1694 pmap = vmspace_pmap(vcpu->vm->vmspace); 1695 addr = vme->u.paging.gpa; 1696 esr = vme->u.paging.esr; 1697 1698 /* The page exists, but the page table needs to be updated. */ 1699 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1700 return (0); 1701 1702 switch (ESR_ELx_EXCEPTION(esr)) { 1703 case EXCP_INSN_ABORT_L: 1704 case EXCP_DATA_ABORT_L: 1705 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1706 break; 1707 default: 1708 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1709 } 1710 1711 map = &vm->vmspace->vm_map; 1712 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1713 if (rv != KERN_SUCCESS) 1714 return (EFAULT); 1715 1716 return (0); 1717 } 1718 1719 static int 1720 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1721 { 1722 struct vm *vm = vcpu->vm; 1723 int error, i; 1724 struct thread *td; 1725 1726 error = 0; 1727 td = curthread; 1728 1729 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1730 1731 /* 1732 * Wait until all 'active_cpus' have suspended themselves. 1733 * 1734 * Since a VM may be suspended at any time including when one or 1735 * more vcpus are doing a rendezvous we need to call the rendezvous 1736 * handler while we are waiting to prevent a deadlock. 1737 */ 1738 vcpu_lock(vcpu); 1739 while (error == 0) { 1740 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1741 break; 1742 1743 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1744 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1745 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1746 if (td_ast_pending(td, TDA_SUSPEND)) { 1747 vcpu_unlock(vcpu); 1748 error = thread_check_susp(td, false); 1749 vcpu_lock(vcpu); 1750 } 1751 } 1752 vcpu_unlock(vcpu); 1753 1754 /* 1755 * Wakeup the other sleeping vcpus and return to userspace. 1756 */ 1757 for (i = 0; i < vm->maxcpus; i++) { 1758 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1759 vcpu_notify_event(vm_vcpu(vm, i)); 1760 } 1761 } 1762 1763 *retu = true; 1764 return (error); 1765 } 1766 1767 int 1768 vm_run(struct vcpu *vcpu) 1769 { 1770 struct vm *vm = vcpu->vm; 1771 struct vm_eventinfo evinfo; 1772 int error, vcpuid; 1773 struct vm_exit *vme; 1774 bool retu; 1775 pmap_t pmap; 1776 1777 vcpuid = vcpu->vcpuid; 1778 1779 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1780 return (EINVAL); 1781 1782 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1783 return (EINVAL); 1784 1785 pmap = vmspace_pmap(vm->vmspace); 1786 vme = &vcpu->exitinfo; 1787 evinfo.rptr = NULL; 1788 evinfo.sptr = &vm->suspend; 1789 evinfo.iptr = NULL; 1790 restart: 1791 critical_enter(); 1792 1793 restore_guest_fpustate(vcpu); 1794 1795 vcpu_require_state(vcpu, VCPU_RUNNING); 1796 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1797 vcpu_require_state(vcpu, VCPU_FROZEN); 1798 1799 save_guest_fpustate(vcpu); 1800 1801 critical_exit(); 1802 1803 if (error == 0) { 1804 retu = false; 1805 switch (vme->exitcode) { 1806 case VM_EXITCODE_INST_EMUL: 1807 vcpu->nextpc = vme->pc + vme->inst_length; 1808 error = vm_handle_inst_emul(vcpu, &retu); 1809 break; 1810 1811 case VM_EXITCODE_REG_EMUL: 1812 vcpu->nextpc = vme->pc + vme->inst_length; 1813 error = vm_handle_reg_emul(vcpu, &retu); 1814 break; 1815 1816 case VM_EXITCODE_HVC: 1817 /* 1818 * The HVC instruction saves the address for the 1819 * next instruction as the return address. 1820 */ 1821 vcpu->nextpc = vme->pc; 1822 /* 1823 * The PSCI call can change the exit information in the 1824 * case of suspend/reset/poweroff/cpu off/cpu on. 1825 */ 1826 error = vm_handle_smccc_call(vcpu, vme, &retu); 1827 break; 1828 1829 case VM_EXITCODE_WFI: 1830 vcpu->nextpc = vme->pc + vme->inst_length; 1831 error = vm_handle_wfi(vcpu, vme, &retu); 1832 break; 1833 1834 case VM_EXITCODE_PAGING: 1835 vcpu->nextpc = vme->pc; 1836 error = vm_handle_paging(vcpu, &retu); 1837 break; 1838 1839 case VM_EXITCODE_SUSPENDED: 1840 vcpu->nextpc = vme->pc; 1841 error = vm_handle_suspend(vcpu, &retu); 1842 break; 1843 1844 default: 1845 /* Handle in userland */ 1846 vcpu->nextpc = vme->pc; 1847 retu = true; 1848 break; 1849 } 1850 } 1851 1852 if (error == 0 && retu == false) 1853 goto restart; 1854 1855 return (error); 1856 } 1857