1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_dev.h> 64 #include <machine/vmm_instruction_emul.h> 65 66 #include <dev/pci/pcireg.h> 67 68 #include "vmm_ktr.h" 69 #include "vmm_stat.h" 70 #include "arm64.h" 71 #include "mmu.h" 72 73 #include "io/vgic.h" 74 #include "io/vtimer.h" 75 76 struct vcpu { 77 int flags; 78 enum vcpu_state state; 79 struct mtx mtx; 80 int hostcpu; /* host cpuid this vcpu last ran on */ 81 int vcpuid; 82 void *stats; 83 struct vm_exit exitinfo; 84 uint64_t nextpc; /* (x) next instruction to execute */ 85 struct vm *vm; /* (o) */ 86 void *cookie; /* (i) cpu-specific data */ 87 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 88 }; 89 90 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 91 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 92 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 93 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 94 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 95 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 96 97 struct mem_seg { 98 uint64_t gpa; 99 size_t len; 100 bool wired; 101 bool sysmem; 102 vm_object_t object; 103 }; 104 #define VM_MAX_MEMSEGS 3 105 106 struct mem_map { 107 vm_paddr_t gpa; 108 size_t len; 109 vm_ooffset_t segoff; 110 int segid; 111 int prot; 112 int flags; 113 }; 114 #define VM_MAX_MEMMAPS 4 115 116 struct vmm_mmio_region { 117 uint64_t start; 118 uint64_t end; 119 mem_region_read_t read; 120 mem_region_write_t write; 121 }; 122 #define VM_MAX_MMIO_REGIONS 4 123 124 struct vmm_special_reg { 125 uint32_t esr_iss; 126 uint32_t esr_mask; 127 reg_read_t reg_read; 128 reg_write_t reg_write; 129 void *arg; 130 }; 131 #define VM_MAX_SPECIAL_REGS 16 132 133 /* 134 * Initialization: 135 * (o) initialized the first time the VM is created 136 * (i) initialized when VM is created and when it is reinitialized 137 * (x) initialized before use 138 */ 139 struct vm { 140 void *cookie; /* (i) cpu-specific data */ 141 volatile cpuset_t active_cpus; /* (i) active vcpus */ 142 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 143 int suspend; /* (i) stop VM execution */ 144 bool dying; /* (o) is dying */ 145 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 146 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 147 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 148 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 149 struct vmspace *vmspace; /* (o) guest's address space */ 150 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 151 struct vcpu **vcpu; /* (i) guest vcpus */ 152 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 153 /* (o) guest MMIO regions */ 154 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 155 /* The following describe the vm cpu topology */ 156 uint16_t sockets; /* (o) num of sockets */ 157 uint16_t cores; /* (o) num of cores/socket */ 158 uint16_t threads; /* (o) num of threads/core */ 159 uint16_t maxcpus; /* (o) max pluggable cpus */ 160 struct sx mem_segs_lock; /* (o) */ 161 struct sx vcpus_init_lock; /* (o) */ 162 }; 163 164 static bool vmm_initialized = false; 165 166 static int vm_handle_wfi(struct vcpu *vcpu, 167 struct vm_exit *vme, bool *retu); 168 169 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 170 171 /* statistics */ 172 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 173 174 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 175 176 static int vmm_ipinum; 177 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 178 "IPI vector used for vcpu notifications"); 179 180 struct vmm_regs { 181 uint64_t id_aa64afr0; 182 uint64_t id_aa64afr1; 183 uint64_t id_aa64dfr0; 184 uint64_t id_aa64dfr1; 185 uint64_t id_aa64isar0; 186 uint64_t id_aa64isar1; 187 uint64_t id_aa64isar2; 188 uint64_t id_aa64mmfr0; 189 uint64_t id_aa64mmfr1; 190 uint64_t id_aa64mmfr2; 191 uint64_t id_aa64pfr0; 192 uint64_t id_aa64pfr1; 193 }; 194 195 static const struct vmm_regs vmm_arch_regs_masks = { 196 .id_aa64dfr0 = 197 ID_AA64DFR0_CTX_CMPs_MASK | 198 ID_AA64DFR0_WRPs_MASK | 199 ID_AA64DFR0_BRPs_MASK | 200 ID_AA64DFR0_PMUVer_3 | 201 ID_AA64DFR0_DebugVer_8, 202 .id_aa64isar0 = 203 ID_AA64ISAR0_TLB_TLBIOSR | 204 ID_AA64ISAR0_SHA3_IMPL | 205 ID_AA64ISAR0_RDM_IMPL | 206 ID_AA64ISAR0_Atomic_IMPL | 207 ID_AA64ISAR0_CRC32_BASE | 208 ID_AA64ISAR0_SHA2_512 | 209 ID_AA64ISAR0_SHA1_BASE | 210 ID_AA64ISAR0_AES_PMULL, 211 .id_aa64mmfr0 = 212 ID_AA64MMFR0_TGran4_IMPL | 213 ID_AA64MMFR0_TGran64_IMPL | 214 ID_AA64MMFR0_TGran16_IMPL | 215 ID_AA64MMFR0_ASIDBits_16 | 216 ID_AA64MMFR0_PARange_4P, 217 .id_aa64mmfr1 = 218 ID_AA64MMFR1_SpecSEI_IMPL | 219 ID_AA64MMFR1_PAN_ATS1E1 | 220 ID_AA64MMFR1_HAFDBS_AF, 221 .id_aa64pfr0 = 222 ID_AA64PFR0_GIC_CPUIF_NONE | 223 ID_AA64PFR0_AdvSIMD_HP | 224 ID_AA64PFR0_FP_HP | 225 ID_AA64PFR0_EL3_64 | 226 ID_AA64PFR0_EL2_64 | 227 ID_AA64PFR0_EL1_64 | 228 ID_AA64PFR0_EL0_64, 229 }; 230 231 /* Host registers masked by vmm_arch_regs_masks. */ 232 static struct vmm_regs vmm_arch_regs; 233 234 u_int vm_maxcpu; 235 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 236 &vm_maxcpu, 0, "Maximum number of vCPUs"); 237 238 static void vm_free_memmap(struct vm *vm, int ident); 239 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 240 static void vcpu_notify_event_locked(struct vcpu *vcpu); 241 242 /* 243 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 244 * is a safe value for now. 245 */ 246 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 247 248 static int 249 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 250 { 251 #define _FETCH_KERN_REG(reg, field) do { \ 252 regs->field = vmm_arch_regs_masks.field; \ 253 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ 254 regs->field = 0; \ 255 } while (0) 256 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 257 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 258 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 259 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 260 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 261 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 262 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 263 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 264 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 265 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 266 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 267 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 268 #undef _FETCH_KERN_REG 269 return (0); 270 } 271 272 static void 273 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 274 { 275 vmmops_vcpu_cleanup(vcpu->cookie); 276 vcpu->cookie = NULL; 277 if (destroy) { 278 vmm_stat_free(vcpu->stats); 279 fpu_save_area_free(vcpu->guestfpu); 280 vcpu_lock_destroy(vcpu); 281 } 282 } 283 284 static struct vcpu * 285 vcpu_alloc(struct vm *vm, int vcpu_id) 286 { 287 struct vcpu *vcpu; 288 289 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 290 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 291 292 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 293 vcpu_lock_init(vcpu); 294 vcpu->state = VCPU_IDLE; 295 vcpu->hostcpu = NOCPU; 296 vcpu->vcpuid = vcpu_id; 297 vcpu->vm = vm; 298 vcpu->guestfpu = fpu_save_area_alloc(); 299 vcpu->stats = vmm_stat_alloc(); 300 return (vcpu); 301 } 302 303 static void 304 vcpu_init(struct vcpu *vcpu) 305 { 306 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 307 MPASS(vcpu->cookie != NULL); 308 fpu_save_area_reset(vcpu->guestfpu); 309 vmm_stat_init(vcpu->stats); 310 } 311 312 struct vm_exit * 313 vm_exitinfo(struct vcpu *vcpu) 314 { 315 return (&vcpu->exitinfo); 316 } 317 318 static int 319 vmm_init(void) 320 { 321 int error; 322 323 vm_maxcpu = mp_ncpus; 324 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 325 326 if (vm_maxcpu > VM_MAXCPU) { 327 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 328 vm_maxcpu = VM_MAXCPU; 329 } 330 if (vm_maxcpu == 0) 331 vm_maxcpu = 1; 332 333 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 334 if (error != 0) 335 return (error); 336 337 return (vmmops_modinit(0)); 338 } 339 340 static int 341 vmm_handler(module_t mod, int what, void *arg) 342 { 343 int error; 344 345 switch (what) { 346 case MOD_LOAD: 347 /* TODO: if (vmm_is_hw_supported()) { */ 348 vmmdev_init(); 349 error = vmm_init(); 350 if (error == 0) 351 vmm_initialized = true; 352 break; 353 case MOD_UNLOAD: 354 /* TODO: if (vmm_is_hw_supported()) { */ 355 error = vmmdev_cleanup(); 356 if (error == 0 && vmm_initialized) { 357 error = vmmops_modcleanup(); 358 if (error) 359 vmm_initialized = false; 360 } 361 break; 362 default: 363 error = 0; 364 break; 365 } 366 return (error); 367 } 368 369 static moduledata_t vmm_kmod = { 370 "vmm", 371 vmm_handler, 372 NULL 373 }; 374 375 /* 376 * vmm initialization has the following dependencies: 377 * 378 * - HYP initialization requires smp_rendezvous() and therefore must happen 379 * after SMP is fully functional (after SI_SUB_SMP). 380 */ 381 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 382 MODULE_VERSION(vmm, 1); 383 384 static void 385 vm_init(struct vm *vm, bool create) 386 { 387 int i; 388 389 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 390 MPASS(vm->cookie != NULL); 391 392 CPU_ZERO(&vm->active_cpus); 393 CPU_ZERO(&vm->debug_cpus); 394 395 vm->suspend = 0; 396 CPU_ZERO(&vm->suspended_cpus); 397 398 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 399 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 400 401 if (!create) { 402 for (i = 0; i < vm->maxcpus; i++) { 403 if (vm->vcpu[i] != NULL) 404 vcpu_init(vm->vcpu[i]); 405 } 406 } 407 } 408 409 void 410 vm_disable_vcpu_creation(struct vm *vm) 411 { 412 sx_xlock(&vm->vcpus_init_lock); 413 vm->dying = true; 414 sx_xunlock(&vm->vcpus_init_lock); 415 } 416 417 struct vcpu * 418 vm_alloc_vcpu(struct vm *vm, int vcpuid) 419 { 420 struct vcpu *vcpu; 421 422 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 423 return (NULL); 424 425 /* Some interrupt controllers may have a CPU limit */ 426 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 427 return (NULL); 428 429 vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); 430 if (__predict_true(vcpu != NULL)) 431 return (vcpu); 432 433 sx_xlock(&vm->vcpus_init_lock); 434 vcpu = vm->vcpu[vcpuid]; 435 if (vcpu == NULL && !vm->dying) { 436 vcpu = vcpu_alloc(vm, vcpuid); 437 vcpu_init(vcpu); 438 439 /* 440 * Ensure vCPU is fully created before updating pointer 441 * to permit unlocked reads above. 442 */ 443 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 444 (uintptr_t)vcpu); 445 } 446 sx_xunlock(&vm->vcpus_init_lock); 447 return (vcpu); 448 } 449 450 void 451 vm_slock_vcpus(struct vm *vm) 452 { 453 sx_slock(&vm->vcpus_init_lock); 454 } 455 456 void 457 vm_unlock_vcpus(struct vm *vm) 458 { 459 sx_unlock(&vm->vcpus_init_lock); 460 } 461 462 int 463 vm_create(const char *name, struct vm **retvm) 464 { 465 struct vm *vm; 466 struct vmspace *vmspace; 467 468 /* 469 * If vmm.ko could not be successfully initialized then don't attempt 470 * to create the virtual machine. 471 */ 472 if (!vmm_initialized) 473 return (ENXIO); 474 475 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 476 return (EINVAL); 477 478 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 479 if (vmspace == NULL) 480 return (ENOMEM); 481 482 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 483 strcpy(vm->name, name); 484 vm->vmspace = vmspace; 485 sx_init(&vm->mem_segs_lock, "vm mem_segs"); 486 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 487 488 vm->sockets = 1; 489 vm->cores = 1; /* XXX backwards compatibility */ 490 vm->threads = 1; /* XXX backwards compatibility */ 491 vm->maxcpus = vm_maxcpu; 492 493 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 494 M_WAITOK | M_ZERO); 495 496 vm_init(vm, true); 497 498 *retvm = vm; 499 return (0); 500 } 501 502 void 503 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 504 uint16_t *threads, uint16_t *maxcpus) 505 { 506 *sockets = vm->sockets; 507 *cores = vm->cores; 508 *threads = vm->threads; 509 *maxcpus = vm->maxcpus; 510 } 511 512 uint16_t 513 vm_get_maxcpus(struct vm *vm) 514 { 515 return (vm->maxcpus); 516 } 517 518 int 519 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 520 uint16_t threads, uint16_t maxcpus) 521 { 522 /* Ignore maxcpus. */ 523 if ((sockets * cores * threads) > vm->maxcpus) 524 return (EINVAL); 525 vm->sockets = sockets; 526 vm->cores = cores; 527 vm->threads = threads; 528 return(0); 529 } 530 531 static void 532 vm_cleanup(struct vm *vm, bool destroy) 533 { 534 struct mem_map *mm; 535 pmap_t pmap __diagused; 536 int i; 537 538 if (destroy) { 539 pmap = vmspace_pmap(vm->vmspace); 540 sched_pin(); 541 PCPU_SET(curvmpmap, NULL); 542 sched_unpin(); 543 CPU_FOREACH(i) { 544 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 545 } 546 } 547 548 vgic_detach_from_vm(vm->cookie); 549 550 for (i = 0; i < vm->maxcpus; i++) { 551 if (vm->vcpu[i] != NULL) 552 vcpu_cleanup(vm->vcpu[i], destroy); 553 } 554 555 vmmops_cleanup(vm->cookie); 556 557 /* 558 * System memory is removed from the guest address space only when 559 * the VM is destroyed. This is because the mapping remains the same 560 * across VM reset. 561 * 562 * Device memory can be relocated by the guest (e.g. using PCI BARs) 563 * so those mappings are removed on a VM reset. 564 */ 565 if (!destroy) { 566 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 567 mm = &vm->mem_maps[i]; 568 if (destroy || !sysmem_mapping(vm, mm)) 569 vm_free_memmap(vm, i); 570 } 571 } 572 573 if (destroy) { 574 for (i = 0; i < VM_MAX_MEMSEGS; i++) 575 vm_free_memseg(vm, i); 576 577 vmmops_vmspace_free(vm->vmspace); 578 vm->vmspace = NULL; 579 580 for (i = 0; i < vm->maxcpus; i++) 581 free(vm->vcpu[i], M_VMM); 582 free(vm->vcpu, M_VMM); 583 sx_destroy(&vm->vcpus_init_lock); 584 sx_destroy(&vm->mem_segs_lock); 585 } 586 } 587 588 void 589 vm_destroy(struct vm *vm) 590 { 591 vm_cleanup(vm, true); 592 free(vm, M_VMM); 593 } 594 595 int 596 vm_reinit(struct vm *vm) 597 { 598 int error; 599 600 /* 601 * A virtual machine can be reset only if all vcpus are suspended. 602 */ 603 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 604 vm_cleanup(vm, false); 605 vm_init(vm, false); 606 error = 0; 607 } else { 608 error = EBUSY; 609 } 610 611 return (error); 612 } 613 614 const char * 615 vm_name(struct vm *vm) 616 { 617 return (vm->name); 618 } 619 620 void 621 vm_slock_memsegs(struct vm *vm) 622 { 623 sx_slock(&vm->mem_segs_lock); 624 } 625 626 void 627 vm_xlock_memsegs(struct vm *vm) 628 { 629 sx_xlock(&vm->mem_segs_lock); 630 } 631 632 void 633 vm_unlock_memsegs(struct vm *vm) 634 { 635 sx_unlock(&vm->mem_segs_lock); 636 } 637 638 /* 639 * Return 'true' if 'gpa' is allocated in the guest address space. 640 * 641 * This function is called in the context of a running vcpu which acts as 642 * an implicit lock on 'vm->mem_maps[]'. 643 */ 644 bool 645 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 646 { 647 struct vm *vm = vcpu->vm; 648 struct mem_map *mm; 649 int i; 650 651 #ifdef INVARIANTS 652 int hostcpu, state; 653 state = vcpu_get_state(vcpu, &hostcpu); 654 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 655 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 656 #endif 657 658 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 659 mm = &vm->mem_maps[i]; 660 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 661 return (true); /* 'gpa' is sysmem or devmem */ 662 } 663 664 return (false); 665 } 666 667 int 668 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 669 { 670 struct mem_seg *seg; 671 vm_object_t obj; 672 673 sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 674 675 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 676 return (EINVAL); 677 678 if (len == 0 || (len & PAGE_MASK)) 679 return (EINVAL); 680 681 seg = &vm->mem_segs[ident]; 682 if (seg->object != NULL) { 683 if (seg->len == len && seg->sysmem == sysmem) 684 return (EEXIST); 685 else 686 return (EINVAL); 687 } 688 689 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 690 if (obj == NULL) 691 return (ENOMEM); 692 693 seg->len = len; 694 seg->object = obj; 695 seg->sysmem = sysmem; 696 return (0); 697 } 698 699 int 700 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 701 vm_object_t *objptr) 702 { 703 struct mem_seg *seg; 704 705 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 706 707 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 708 return (EINVAL); 709 710 seg = &vm->mem_segs[ident]; 711 if (len) 712 *len = seg->len; 713 if (sysmem) 714 *sysmem = seg->sysmem; 715 if (objptr) 716 *objptr = seg->object; 717 return (0); 718 } 719 720 void 721 vm_free_memseg(struct vm *vm, int ident) 722 { 723 struct mem_seg *seg; 724 725 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 726 ("%s: invalid memseg ident %d", __func__, ident)); 727 728 seg = &vm->mem_segs[ident]; 729 if (seg->object != NULL) { 730 vm_object_deallocate(seg->object); 731 bzero(seg, sizeof(struct mem_seg)); 732 } 733 } 734 735 int 736 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 737 size_t len, int prot, int flags) 738 { 739 struct mem_seg *seg; 740 struct mem_map *m, *map; 741 vm_ooffset_t last; 742 int i, error; 743 744 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 745 return (EINVAL); 746 747 if (flags & ~VM_MEMMAP_F_WIRED) 748 return (EINVAL); 749 750 if (segid < 0 || segid >= VM_MAX_MEMSEGS) 751 return (EINVAL); 752 753 seg = &vm->mem_segs[segid]; 754 if (seg->object == NULL) 755 return (EINVAL); 756 757 last = first + len; 758 if (first < 0 || first >= last || last > seg->len) 759 return (EINVAL); 760 761 if ((gpa | first | last) & PAGE_MASK) 762 return (EINVAL); 763 764 map = NULL; 765 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 766 m = &vm->mem_maps[i]; 767 if (m->len == 0) { 768 map = m; 769 break; 770 } 771 } 772 773 if (map == NULL) 774 return (ENOSPC); 775 776 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 777 len, 0, VMFS_NO_SPACE, prot, prot, 0); 778 if (error != KERN_SUCCESS) 779 return (EFAULT); 780 781 vm_object_reference(seg->object); 782 783 if (flags & VM_MEMMAP_F_WIRED) { 784 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 785 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 786 if (error != KERN_SUCCESS) { 787 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 788 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 789 EFAULT); 790 } 791 } 792 793 map->gpa = gpa; 794 map->len = len; 795 map->segoff = first; 796 map->segid = segid; 797 map->prot = prot; 798 map->flags = flags; 799 return (0); 800 } 801 802 int 803 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 804 { 805 struct mem_map *m; 806 int i; 807 808 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 809 m = &vm->mem_maps[i]; 810 if (m->gpa == gpa && m->len == len) { 811 vm_free_memmap(vm, i); 812 return (0); 813 } 814 } 815 816 return (EINVAL); 817 } 818 819 int 820 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 821 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 822 { 823 struct mem_map *mm, *mmnext; 824 int i; 825 826 mmnext = NULL; 827 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 828 mm = &vm->mem_maps[i]; 829 if (mm->len == 0 || mm->gpa < *gpa) 830 continue; 831 if (mmnext == NULL || mm->gpa < mmnext->gpa) 832 mmnext = mm; 833 } 834 835 if (mmnext != NULL) { 836 *gpa = mmnext->gpa; 837 if (segid) 838 *segid = mmnext->segid; 839 if (segoff) 840 *segoff = mmnext->segoff; 841 if (len) 842 *len = mmnext->len; 843 if (prot) 844 *prot = mmnext->prot; 845 if (flags) 846 *flags = mmnext->flags; 847 return (0); 848 } else { 849 return (ENOENT); 850 } 851 } 852 853 static void 854 vm_free_memmap(struct vm *vm, int ident) 855 { 856 struct mem_map *mm; 857 int error __diagused; 858 859 mm = &vm->mem_maps[ident]; 860 if (mm->len) { 861 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 862 mm->gpa + mm->len); 863 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 864 __func__, error)); 865 bzero(mm, sizeof(struct mem_map)); 866 } 867 } 868 869 static __inline bool 870 sysmem_mapping(struct vm *vm, struct mem_map *mm) 871 { 872 873 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 874 return (true); 875 else 876 return (false); 877 } 878 879 vm_paddr_t 880 vmm_sysmem_maxaddr(struct vm *vm) 881 { 882 struct mem_map *mm; 883 vm_paddr_t maxaddr; 884 int i; 885 886 maxaddr = 0; 887 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 888 mm = &vm->mem_maps[i]; 889 if (sysmem_mapping(vm, mm)) { 890 if (maxaddr < mm->gpa + mm->len) 891 maxaddr = mm->gpa + mm->len; 892 } 893 } 894 return (maxaddr); 895 } 896 897 int 898 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 899 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 900 { 901 902 vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 903 return (0); 904 } 905 906 static int 907 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 908 { 909 *rval = 0; 910 return (0); 911 } 912 913 static int 914 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 915 { 916 *rval = *(uint64_t *)arg; 917 return (0); 918 } 919 920 static int 921 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 922 { 923 return (0); 924 } 925 926 static const struct vmm_special_reg vmm_special_regs[] = { 927 #define SPECIAL_REG(_reg, _read, _write) \ 928 { \ 929 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 930 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 931 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 932 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 933 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 934 .esr_mask = ISS_MSR_REG_MASK, \ 935 .reg_read = (_read), \ 936 .reg_write = (_write), \ 937 .arg = NULL, \ 938 } 939 #define ID_SPECIAL_REG(_reg, _name) \ 940 { \ 941 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 942 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 943 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 944 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 945 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 946 .esr_mask = ISS_MSR_REG_MASK, \ 947 .reg_read = vmm_reg_read_arg, \ 948 .reg_write = vmm_reg_wi, \ 949 .arg = &(vmm_arch_regs._name), \ 950 } 951 952 /* ID registers */ 953 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 954 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 955 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 956 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 957 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 958 959 /* 960 * All other ID registers are read as zero. 961 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 962 */ 963 { 964 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 965 (0 << ISS_MSR_OP1_SHIFT) | 966 (0 << ISS_MSR_CRn_SHIFT) | 967 (0 << ISS_MSR_CRm_SHIFT), 968 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 969 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 970 .reg_read = vmm_reg_raz, 971 .reg_write = vmm_reg_wi, 972 .arg = NULL, 973 }, 974 975 /* Counter physical registers */ 976 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 977 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 978 vtimer_phys_cval_write), 979 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 980 vtimer_phys_tval_write), 981 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 982 #undef SPECIAL_REG 983 }; 984 985 void 986 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 987 reg_read_t reg_read, reg_write_t reg_write, void *arg) 988 { 989 int i; 990 991 for (i = 0; i < nitems(vm->special_reg); i++) { 992 if (vm->special_reg[i].esr_iss == 0 && 993 vm->special_reg[i].esr_mask == 0) { 994 vm->special_reg[i].esr_iss = iss; 995 vm->special_reg[i].esr_mask = mask; 996 vm->special_reg[i].reg_read = reg_read; 997 vm->special_reg[i].reg_write = reg_write; 998 vm->special_reg[i].arg = arg; 999 return; 1000 } 1001 } 1002 1003 panic("%s: No free special register slot", __func__); 1004 } 1005 1006 void 1007 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 1008 { 1009 int i; 1010 1011 for (i = 0; i < nitems(vm->special_reg); i++) { 1012 if (vm->special_reg[i].esr_iss == iss && 1013 vm->special_reg[i].esr_mask == mask) { 1014 memset(&vm->special_reg[i], 0, 1015 sizeof(vm->special_reg[i])); 1016 return; 1017 } 1018 } 1019 1020 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 1021 mask); 1022 } 1023 1024 static int 1025 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 1026 { 1027 struct vm *vm; 1028 struct vm_exit *vme; 1029 struct vre *vre; 1030 int i, rv; 1031 1032 vm = vcpu->vm; 1033 vme = &vcpu->exitinfo; 1034 vre = &vme->u.reg_emul.vre; 1035 1036 for (i = 0; i < nitems(vm->special_reg); i++) { 1037 if (vm->special_reg[i].esr_iss == 0 && 1038 vm->special_reg[i].esr_mask == 0) 1039 continue; 1040 1041 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 1042 vm->special_reg[i].esr_iss) { 1043 rv = vmm_emulate_register(vcpu, vre, 1044 vm->special_reg[i].reg_read, 1045 vm->special_reg[i].reg_write, 1046 vm->special_reg[i].arg); 1047 if (rv == 0) { 1048 *retu = false; 1049 } 1050 return (rv); 1051 } 1052 } 1053 for (i = 0; i < nitems(vmm_special_regs); i++) { 1054 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 1055 vmm_special_regs[i].esr_iss) { 1056 rv = vmm_emulate_register(vcpu, vre, 1057 vmm_special_regs[i].reg_read, 1058 vmm_special_regs[i].reg_write, 1059 vmm_special_regs[i].arg); 1060 if (rv == 0) { 1061 *retu = false; 1062 } 1063 return (rv); 1064 } 1065 } 1066 1067 1068 *retu = true; 1069 return (0); 1070 } 1071 1072 void 1073 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 1074 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 1075 { 1076 int i; 1077 1078 for (i = 0; i < nitems(vm->mmio_region); i++) { 1079 if (vm->mmio_region[i].start == 0 && 1080 vm->mmio_region[i].end == 0) { 1081 vm->mmio_region[i].start = start; 1082 vm->mmio_region[i].end = start + size; 1083 vm->mmio_region[i].read = mmio_read; 1084 vm->mmio_region[i].write = mmio_write; 1085 return; 1086 } 1087 } 1088 1089 panic("%s: No free MMIO region", __func__); 1090 } 1091 1092 void 1093 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 1094 { 1095 int i; 1096 1097 for (i = 0; i < nitems(vm->mmio_region); i++) { 1098 if (vm->mmio_region[i].start == start && 1099 vm->mmio_region[i].end == start + size) { 1100 memset(&vm->mmio_region[i], 0, 1101 sizeof(vm->mmio_region[i])); 1102 return; 1103 } 1104 } 1105 1106 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 1107 start + size); 1108 } 1109 1110 static int 1111 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 1112 { 1113 struct vm *vm; 1114 struct vm_exit *vme; 1115 struct vie *vie; 1116 struct hyp *hyp; 1117 uint64_t fault_ipa; 1118 struct vm_guest_paging *paging; 1119 struct vmm_mmio_region *vmr; 1120 int error, i; 1121 1122 vm = vcpu->vm; 1123 hyp = vm->cookie; 1124 if (!hyp->vgic_attached) 1125 goto out_user; 1126 1127 vme = &vcpu->exitinfo; 1128 vie = &vme->u.inst_emul.vie; 1129 paging = &vme->u.inst_emul.paging; 1130 1131 fault_ipa = vme->u.inst_emul.gpa; 1132 1133 vmr = NULL; 1134 for (i = 0; i < nitems(vm->mmio_region); i++) { 1135 if (vm->mmio_region[i].start <= fault_ipa && 1136 vm->mmio_region[i].end > fault_ipa) { 1137 vmr = &vm->mmio_region[i]; 1138 break; 1139 } 1140 } 1141 if (vmr == NULL) 1142 goto out_user; 1143 1144 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 1145 vmr->read, vmr->write, retu); 1146 return (error); 1147 1148 out_user: 1149 *retu = true; 1150 return (0); 1151 } 1152 1153 int 1154 vm_suspend(struct vm *vm, enum vm_suspend_how how) 1155 { 1156 int i; 1157 1158 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1159 return (EINVAL); 1160 1161 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1162 VM_CTR2(vm, "virtual machine already suspended %d/%d", 1163 vm->suspend, how); 1164 return (EALREADY); 1165 } 1166 1167 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1168 1169 /* 1170 * Notify all active vcpus that they are now suspended. 1171 */ 1172 for (i = 0; i < vm->maxcpus; i++) { 1173 if (CPU_ISSET(i, &vm->active_cpus)) 1174 vcpu_notify_event(vm_vcpu(vm, i)); 1175 } 1176 1177 return (0); 1178 } 1179 1180 void 1181 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 1182 { 1183 struct vm *vm = vcpu->vm; 1184 struct vm_exit *vmexit; 1185 1186 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1187 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1188 1189 vmexit = vm_exitinfo(vcpu); 1190 vmexit->pc = pc; 1191 vmexit->inst_length = 4; 1192 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1193 vmexit->u.suspended.how = vm->suspend; 1194 } 1195 1196 void 1197 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 1198 { 1199 struct vm_exit *vmexit; 1200 1201 vmexit = vm_exitinfo(vcpu); 1202 vmexit->pc = pc; 1203 vmexit->inst_length = 4; 1204 vmexit->exitcode = VM_EXITCODE_DEBUG; 1205 } 1206 1207 int 1208 vm_activate_cpu(struct vcpu *vcpu) 1209 { 1210 struct vm *vm = vcpu->vm; 1211 1212 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1213 return (EBUSY); 1214 1215 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 1216 return (0); 1217 1218 } 1219 1220 int 1221 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 1222 { 1223 if (vcpu == NULL) { 1224 vm->debug_cpus = vm->active_cpus; 1225 for (int i = 0; i < vm->maxcpus; i++) { 1226 if (CPU_ISSET(i, &vm->active_cpus)) 1227 vcpu_notify_event(vm_vcpu(vm, i)); 1228 } 1229 } else { 1230 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 1231 return (EINVAL); 1232 1233 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1234 vcpu_notify_event(vcpu); 1235 } 1236 return (0); 1237 } 1238 1239 int 1240 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 1241 { 1242 1243 if (vcpu == NULL) { 1244 CPU_ZERO(&vm->debug_cpus); 1245 } else { 1246 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1247 return (EINVAL); 1248 1249 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1250 } 1251 return (0); 1252 } 1253 1254 int 1255 vcpu_debugged(struct vcpu *vcpu) 1256 { 1257 1258 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1259 } 1260 1261 cpuset_t 1262 vm_active_cpus(struct vm *vm) 1263 { 1264 1265 return (vm->active_cpus); 1266 } 1267 1268 cpuset_t 1269 vm_debug_cpus(struct vm *vm) 1270 { 1271 1272 return (vm->debug_cpus); 1273 } 1274 1275 cpuset_t 1276 vm_suspended_cpus(struct vm *vm) 1277 { 1278 1279 return (vm->suspended_cpus); 1280 } 1281 1282 1283 void * 1284 vcpu_stats(struct vcpu *vcpu) 1285 { 1286 1287 return (vcpu->stats); 1288 } 1289 1290 /* 1291 * This function is called to ensure that a vcpu "sees" a pending event 1292 * as soon as possible: 1293 * - If the vcpu thread is sleeping then it is woken up. 1294 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1295 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1296 */ 1297 static void 1298 vcpu_notify_event_locked(struct vcpu *vcpu) 1299 { 1300 int hostcpu; 1301 1302 hostcpu = vcpu->hostcpu; 1303 if (vcpu->state == VCPU_RUNNING) { 1304 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1305 if (hostcpu != curcpu) { 1306 ipi_cpu(hostcpu, vmm_ipinum); 1307 } else { 1308 /* 1309 * If the 'vcpu' is running on 'curcpu' then it must 1310 * be sending a notification to itself (e.g. SELF_IPI). 1311 * The pending event will be picked up when the vcpu 1312 * transitions back to guest context. 1313 */ 1314 } 1315 } else { 1316 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1317 "with hostcpu %d", vcpu->state, hostcpu)); 1318 if (vcpu->state == VCPU_SLEEPING) 1319 wakeup_one(vcpu); 1320 } 1321 } 1322 1323 void 1324 vcpu_notify_event(struct vcpu *vcpu) 1325 { 1326 vcpu_lock(vcpu); 1327 vcpu_notify_event_locked(vcpu); 1328 vcpu_unlock(vcpu); 1329 } 1330 1331 static void 1332 restore_guest_fpustate(struct vcpu *vcpu) 1333 { 1334 1335 /* flush host state to the pcb */ 1336 vfp_save_state(curthread, curthread->td_pcb); 1337 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1338 PCPU_SET(fpcurthread, NULL); 1339 1340 /* restore guest FPU state */ 1341 vfp_enable(); 1342 vfp_restore(vcpu->guestfpu); 1343 1344 /* 1345 * The FPU is now "dirty" with the guest's state so turn on emulation 1346 * to trap any access to the FPU by the host. 1347 */ 1348 vfp_disable(); 1349 } 1350 1351 static void 1352 save_guest_fpustate(struct vcpu *vcpu) 1353 { 1354 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1355 CPACR_FPEN_TRAP_ALL1) 1356 panic("VFP not enabled in host!"); 1357 1358 /* save guest FPU state */ 1359 vfp_enable(); 1360 vfp_store(vcpu->guestfpu); 1361 vfp_disable(); 1362 1363 KASSERT(PCPU_GET(fpcurthread) == NULL, 1364 ("%s: fpcurthread set with guest registers", __func__)); 1365 } 1366 static int 1367 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1368 bool from_idle) 1369 { 1370 int error; 1371 1372 vcpu_assert_locked(vcpu); 1373 1374 /* 1375 * State transitions from the vmmdev_ioctl() must always begin from 1376 * the VCPU_IDLE state. This guarantees that there is only a single 1377 * ioctl() operating on a vcpu at any point. 1378 */ 1379 if (from_idle) { 1380 while (vcpu->state != VCPU_IDLE) { 1381 vcpu_notify_event_locked(vcpu); 1382 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1383 } 1384 } else { 1385 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1386 "vcpu idle state")); 1387 } 1388 1389 if (vcpu->state == VCPU_RUNNING) { 1390 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1391 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1392 } else { 1393 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1394 "vcpu that is not running", vcpu->hostcpu)); 1395 } 1396 1397 /* 1398 * The following state transitions are allowed: 1399 * IDLE -> FROZEN -> IDLE 1400 * FROZEN -> RUNNING -> FROZEN 1401 * FROZEN -> SLEEPING -> FROZEN 1402 */ 1403 switch (vcpu->state) { 1404 case VCPU_IDLE: 1405 case VCPU_RUNNING: 1406 case VCPU_SLEEPING: 1407 error = (newstate != VCPU_FROZEN); 1408 break; 1409 case VCPU_FROZEN: 1410 error = (newstate == VCPU_FROZEN); 1411 break; 1412 default: 1413 error = 1; 1414 break; 1415 } 1416 1417 if (error) 1418 return (EBUSY); 1419 1420 vcpu->state = newstate; 1421 if (newstate == VCPU_RUNNING) 1422 vcpu->hostcpu = curcpu; 1423 else 1424 vcpu->hostcpu = NOCPU; 1425 1426 if (newstate == VCPU_IDLE) 1427 wakeup(&vcpu->state); 1428 1429 return (0); 1430 } 1431 1432 static void 1433 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1434 { 1435 int error; 1436 1437 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1438 panic("Error %d setting state to %d\n", error, newstate); 1439 } 1440 1441 static void 1442 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1443 { 1444 int error; 1445 1446 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1447 panic("Error %d setting state to %d", error, newstate); 1448 } 1449 1450 int 1451 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1452 { 1453 if (type < 0 || type >= VM_CAP_MAX) 1454 return (EINVAL); 1455 1456 return (vmmops_getcap(vcpu->cookie, type, retval)); 1457 } 1458 1459 int 1460 vm_set_capability(struct vcpu *vcpu, int type, int val) 1461 { 1462 if (type < 0 || type >= VM_CAP_MAX) 1463 return (EINVAL); 1464 1465 return (vmmops_setcap(vcpu->cookie, type, val)); 1466 } 1467 1468 struct vm * 1469 vcpu_vm(struct vcpu *vcpu) 1470 { 1471 return (vcpu->vm); 1472 } 1473 1474 int 1475 vcpu_vcpuid(struct vcpu *vcpu) 1476 { 1477 return (vcpu->vcpuid); 1478 } 1479 1480 void * 1481 vcpu_get_cookie(struct vcpu *vcpu) 1482 { 1483 return (vcpu->cookie); 1484 } 1485 1486 struct vcpu * 1487 vm_vcpu(struct vm *vm, int vcpuid) 1488 { 1489 return (vm->vcpu[vcpuid]); 1490 } 1491 1492 int 1493 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1494 { 1495 int error; 1496 1497 vcpu_lock(vcpu); 1498 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1499 vcpu_unlock(vcpu); 1500 1501 return (error); 1502 } 1503 1504 enum vcpu_state 1505 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1506 { 1507 enum vcpu_state state; 1508 1509 vcpu_lock(vcpu); 1510 state = vcpu->state; 1511 if (hostcpu != NULL) 1512 *hostcpu = vcpu->hostcpu; 1513 vcpu_unlock(vcpu); 1514 1515 return (state); 1516 } 1517 1518 static void * 1519 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1520 void **cookie) 1521 { 1522 int i, count, pageoff; 1523 struct mem_map *mm; 1524 vm_page_t m; 1525 1526 pageoff = gpa & PAGE_MASK; 1527 if (len > PAGE_SIZE - pageoff) 1528 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1529 1530 count = 0; 1531 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1532 mm = &vm->mem_maps[i]; 1533 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1534 gpa < mm->gpa + mm->len) { 1535 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1536 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1537 break; 1538 } 1539 } 1540 1541 if (count == 1) { 1542 *cookie = m; 1543 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1544 } else { 1545 *cookie = NULL; 1546 return (NULL); 1547 } 1548 } 1549 1550 void * 1551 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1552 void **cookie) 1553 { 1554 #ifdef INVARIANTS 1555 /* 1556 * The current vcpu should be frozen to ensure 'vm_memmap[]' 1557 * stability. 1558 */ 1559 int state = vcpu_get_state(vcpu, NULL); 1560 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1561 __func__, state)); 1562 #endif 1563 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1564 } 1565 1566 void * 1567 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1568 void **cookie) 1569 { 1570 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1571 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1572 } 1573 1574 void 1575 vm_gpa_release(void *cookie) 1576 { 1577 vm_page_t m = cookie; 1578 1579 vm_page_unwire(m, PQ_ACTIVE); 1580 } 1581 1582 int 1583 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1584 { 1585 1586 if (reg >= VM_REG_LAST) 1587 return (EINVAL); 1588 1589 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1590 } 1591 1592 int 1593 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1594 { 1595 int error; 1596 1597 if (reg >= VM_REG_LAST) 1598 return (EINVAL); 1599 error = vmmops_setreg(vcpu->cookie, reg, val); 1600 if (error || reg != VM_REG_GUEST_PC) 1601 return (error); 1602 1603 vcpu->nextpc = val; 1604 1605 return (0); 1606 } 1607 1608 void * 1609 vm_get_cookie(struct vm *vm) 1610 { 1611 return (vm->cookie); 1612 } 1613 1614 int 1615 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1616 { 1617 return (vmmops_exception(vcpu->cookie, esr, far)); 1618 } 1619 1620 int 1621 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1622 { 1623 return (vgic_attach_to_vm(vm->cookie, descr)); 1624 } 1625 1626 int 1627 vm_assert_irq(struct vm *vm, uint32_t irq) 1628 { 1629 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1630 } 1631 1632 int 1633 vm_deassert_irq(struct vm *vm, uint32_t irq) 1634 { 1635 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1636 } 1637 1638 int 1639 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1640 int func) 1641 { 1642 /* TODO: Should we raise an SError? */ 1643 return (vgic_inject_msi(vm->cookie, msg, addr)); 1644 } 1645 1646 static int 1647 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1648 { 1649 struct hypctx *hypctx; 1650 int i; 1651 1652 hypctx = vcpu_get_cookie(vcpu); 1653 1654 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1655 return (1); 1656 1657 vme->exitcode = VM_EXITCODE_SMCCC; 1658 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1659 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1660 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1661 1662 *retu = true; 1663 return (0); 1664 } 1665 1666 static int 1667 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1668 { 1669 vcpu_lock(vcpu); 1670 while (1) { 1671 if (vgic_has_pending_irq(vcpu->cookie)) 1672 break; 1673 1674 if (vcpu_should_yield(vcpu)) 1675 break; 1676 1677 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1678 /* 1679 * XXX msleep_spin() cannot be interrupted by signals so 1680 * wake up periodically to check pending signals. 1681 */ 1682 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1683 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1684 } 1685 vcpu_unlock(vcpu); 1686 1687 *retu = false; 1688 return (0); 1689 } 1690 1691 static int 1692 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1693 { 1694 struct vm *vm = vcpu->vm; 1695 struct vm_exit *vme; 1696 struct vm_map *map; 1697 uint64_t addr, esr; 1698 pmap_t pmap; 1699 int ftype, rv; 1700 1701 vme = &vcpu->exitinfo; 1702 1703 pmap = vmspace_pmap(vcpu->vm->vmspace); 1704 addr = vme->u.paging.gpa; 1705 esr = vme->u.paging.esr; 1706 1707 /* The page exists, but the page table needs to be updated. */ 1708 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1709 return (0); 1710 1711 switch (ESR_ELx_EXCEPTION(esr)) { 1712 case EXCP_INSN_ABORT_L: 1713 case EXCP_DATA_ABORT_L: 1714 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1715 break; 1716 default: 1717 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1718 } 1719 1720 map = &vm->vmspace->vm_map; 1721 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1722 if (rv != KERN_SUCCESS) 1723 return (EFAULT); 1724 1725 return (0); 1726 } 1727 1728 static int 1729 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1730 { 1731 struct vm *vm = vcpu->vm; 1732 int error, i; 1733 struct thread *td; 1734 1735 error = 0; 1736 td = curthread; 1737 1738 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1739 1740 /* 1741 * Wait until all 'active_cpus' have suspended themselves. 1742 * 1743 * Since a VM may be suspended at any time including when one or 1744 * more vcpus are doing a rendezvous we need to call the rendezvous 1745 * handler while we are waiting to prevent a deadlock. 1746 */ 1747 vcpu_lock(vcpu); 1748 while (error == 0) { 1749 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1750 break; 1751 1752 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1753 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1754 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1755 if (td_ast_pending(td, TDA_SUSPEND)) { 1756 vcpu_unlock(vcpu); 1757 error = thread_check_susp(td, false); 1758 vcpu_lock(vcpu); 1759 } 1760 } 1761 vcpu_unlock(vcpu); 1762 1763 /* 1764 * Wakeup the other sleeping vcpus and return to userspace. 1765 */ 1766 for (i = 0; i < vm->maxcpus; i++) { 1767 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1768 vcpu_notify_event(vm_vcpu(vm, i)); 1769 } 1770 } 1771 1772 *retu = true; 1773 return (error); 1774 } 1775 1776 int 1777 vm_run(struct vcpu *vcpu) 1778 { 1779 struct vm *vm = vcpu->vm; 1780 struct vm_eventinfo evinfo; 1781 int error, vcpuid; 1782 struct vm_exit *vme; 1783 bool retu; 1784 pmap_t pmap; 1785 1786 vcpuid = vcpu->vcpuid; 1787 1788 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1789 return (EINVAL); 1790 1791 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1792 return (EINVAL); 1793 1794 pmap = vmspace_pmap(vm->vmspace); 1795 vme = &vcpu->exitinfo; 1796 evinfo.rptr = NULL; 1797 evinfo.sptr = &vm->suspend; 1798 evinfo.iptr = NULL; 1799 restart: 1800 critical_enter(); 1801 1802 restore_guest_fpustate(vcpu); 1803 1804 vcpu_require_state(vcpu, VCPU_RUNNING); 1805 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1806 vcpu_require_state(vcpu, VCPU_FROZEN); 1807 1808 save_guest_fpustate(vcpu); 1809 1810 critical_exit(); 1811 1812 if (error == 0) { 1813 retu = false; 1814 switch (vme->exitcode) { 1815 case VM_EXITCODE_INST_EMUL: 1816 vcpu->nextpc = vme->pc + vme->inst_length; 1817 error = vm_handle_inst_emul(vcpu, &retu); 1818 break; 1819 1820 case VM_EXITCODE_REG_EMUL: 1821 vcpu->nextpc = vme->pc + vme->inst_length; 1822 error = vm_handle_reg_emul(vcpu, &retu); 1823 break; 1824 1825 case VM_EXITCODE_HVC: 1826 /* 1827 * The HVC instruction saves the address for the 1828 * next instruction as the return address. 1829 */ 1830 vcpu->nextpc = vme->pc; 1831 /* 1832 * The PSCI call can change the exit information in the 1833 * case of suspend/reset/poweroff/cpu off/cpu on. 1834 */ 1835 error = vm_handle_smccc_call(vcpu, vme, &retu); 1836 break; 1837 1838 case VM_EXITCODE_WFI: 1839 vcpu->nextpc = vme->pc + vme->inst_length; 1840 error = vm_handle_wfi(vcpu, vme, &retu); 1841 break; 1842 1843 case VM_EXITCODE_PAGING: 1844 vcpu->nextpc = vme->pc; 1845 error = vm_handle_paging(vcpu, &retu); 1846 break; 1847 1848 case VM_EXITCODE_SUSPENDED: 1849 vcpu->nextpc = vme->pc; 1850 error = vm_handle_suspend(vcpu, &retu); 1851 break; 1852 1853 default: 1854 /* Handle in userland */ 1855 vcpu->nextpc = vme->pc; 1856 retu = true; 1857 break; 1858 } 1859 } 1860 1861 if (error == 0 && retu == false) 1862 goto restart; 1863 1864 return (error); 1865 } 1866