1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_mem.h> 69 #include <dev/vmm/vmm_stat.h> 70 71 #include "arm64.h" 72 #include "mmu.h" 73 74 #include "io/vgic.h" 75 #include "io/vtimer.h" 76 77 struct vcpu { 78 int flags; 79 enum vcpu_state state; 80 struct mtx mtx; 81 int hostcpu; /* host cpuid this vcpu last ran on */ 82 int vcpuid; 83 void *stats; 84 struct vm_exit exitinfo; 85 uint64_t nextpc; /* (x) next instruction to execute */ 86 struct vm *vm; /* (o) */ 87 void *cookie; /* (i) cpu-specific data */ 88 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 89 }; 90 91 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 92 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 93 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 94 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 95 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 96 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 97 98 struct vmm_mmio_region { 99 uint64_t start; 100 uint64_t end; 101 mem_region_read_t read; 102 mem_region_write_t write; 103 }; 104 #define VM_MAX_MMIO_REGIONS 4 105 106 struct vmm_special_reg { 107 uint32_t esr_iss; 108 uint32_t esr_mask; 109 reg_read_t reg_read; 110 reg_write_t reg_write; 111 void *arg; 112 }; 113 #define VM_MAX_SPECIAL_REGS 16 114 115 /* 116 * Initialization: 117 * (o) initialized the first time the VM is created 118 * (i) initialized when VM is created and when it is reinitialized 119 * (x) initialized before use 120 */ 121 struct vm { 122 void *cookie; /* (i) cpu-specific data */ 123 volatile cpuset_t active_cpus; /* (i) active vcpus */ 124 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 125 int suspend; /* (i) stop VM execution */ 126 bool dying; /* (o) is dying */ 127 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 128 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 129 struct vmspace *vmspace; /* (o) guest's address space */ 130 struct vm_mem mem; /* (i) guest memory */ 131 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 132 struct vcpu **vcpu; /* (i) guest vcpus */ 133 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 134 /* (o) guest MMIO regions */ 135 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 136 /* The following describe the vm cpu topology */ 137 uint16_t sockets; /* (o) num of sockets */ 138 uint16_t cores; /* (o) num of cores/socket */ 139 uint16_t threads; /* (o) num of threads/core */ 140 uint16_t maxcpus; /* (o) max pluggable cpus */ 141 struct sx vcpus_init_lock; /* (o) */ 142 }; 143 144 static bool vmm_initialized = false; 145 146 static int vm_handle_wfi(struct vcpu *vcpu, 147 struct vm_exit *vme, bool *retu); 148 149 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 150 151 /* statistics */ 152 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 153 154 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 155 156 static int vmm_ipinum; 157 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 158 "IPI vector used for vcpu notifications"); 159 160 struct vmm_regs { 161 uint64_t id_aa64afr0; 162 uint64_t id_aa64afr1; 163 uint64_t id_aa64dfr0; 164 uint64_t id_aa64dfr1; 165 uint64_t id_aa64isar0; 166 uint64_t id_aa64isar1; 167 uint64_t id_aa64isar2; 168 uint64_t id_aa64mmfr0; 169 uint64_t id_aa64mmfr1; 170 uint64_t id_aa64mmfr2; 171 uint64_t id_aa64pfr0; 172 uint64_t id_aa64pfr1; 173 }; 174 175 static const struct vmm_regs vmm_arch_regs_masks = { 176 .id_aa64dfr0 = 177 ID_AA64DFR0_CTX_CMPs_MASK | 178 ID_AA64DFR0_WRPs_MASK | 179 ID_AA64DFR0_BRPs_MASK | 180 ID_AA64DFR0_PMUVer_3 | 181 ID_AA64DFR0_DebugVer_8, 182 .id_aa64isar0 = 183 ID_AA64ISAR0_TLB_TLBIOSR | 184 ID_AA64ISAR0_SHA3_IMPL | 185 ID_AA64ISAR0_RDM_IMPL | 186 ID_AA64ISAR0_Atomic_IMPL | 187 ID_AA64ISAR0_CRC32_BASE | 188 ID_AA64ISAR0_SHA2_512 | 189 ID_AA64ISAR0_SHA1_BASE | 190 ID_AA64ISAR0_AES_PMULL, 191 .id_aa64mmfr0 = 192 ID_AA64MMFR0_TGran4_IMPL | 193 ID_AA64MMFR0_TGran64_IMPL | 194 ID_AA64MMFR0_TGran16_IMPL | 195 ID_AA64MMFR0_ASIDBits_16 | 196 ID_AA64MMFR0_PARange_4P, 197 .id_aa64mmfr1 = 198 ID_AA64MMFR1_SpecSEI_IMPL | 199 ID_AA64MMFR1_PAN_ATS1E1 | 200 ID_AA64MMFR1_HAFDBS_AF, 201 .id_aa64pfr0 = 202 ID_AA64PFR0_GIC_CPUIF_NONE | 203 ID_AA64PFR0_AdvSIMD_HP | 204 ID_AA64PFR0_FP_HP | 205 ID_AA64PFR0_EL3_64 | 206 ID_AA64PFR0_EL2_64 | 207 ID_AA64PFR0_EL1_64 | 208 ID_AA64PFR0_EL0_64, 209 }; 210 211 /* Host registers masked by vmm_arch_regs_masks. */ 212 static struct vmm_regs vmm_arch_regs; 213 214 u_int vm_maxcpu; 215 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 216 &vm_maxcpu, 0, "Maximum number of vCPUs"); 217 218 static void vcpu_notify_event_locked(struct vcpu *vcpu); 219 220 /* global statistics */ 221 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 222 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 223 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 224 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 225 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 226 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 227 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 228 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 229 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 230 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 231 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 232 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 233 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 234 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 235 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 236 237 /* 238 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 239 * is a safe value for now. 240 */ 241 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 242 243 static int 244 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 245 { 246 #define _FETCH_KERN_REG(reg, field) do { \ 247 regs->field = vmm_arch_regs_masks.field; \ 248 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ 249 regs->field = 0; \ 250 } while (0) 251 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 252 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 253 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 254 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 255 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 256 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 257 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 258 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 259 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 260 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 261 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 262 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 263 #undef _FETCH_KERN_REG 264 return (0); 265 } 266 267 static void 268 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 269 { 270 vmmops_vcpu_cleanup(vcpu->cookie); 271 vcpu->cookie = NULL; 272 if (destroy) { 273 vmm_stat_free(vcpu->stats); 274 fpu_save_area_free(vcpu->guestfpu); 275 vcpu_lock_destroy(vcpu); 276 } 277 } 278 279 static struct vcpu * 280 vcpu_alloc(struct vm *vm, int vcpu_id) 281 { 282 struct vcpu *vcpu; 283 284 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 285 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 286 287 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 288 vcpu_lock_init(vcpu); 289 vcpu->state = VCPU_IDLE; 290 vcpu->hostcpu = NOCPU; 291 vcpu->vcpuid = vcpu_id; 292 vcpu->vm = vm; 293 vcpu->guestfpu = fpu_save_area_alloc(); 294 vcpu->stats = vmm_stat_alloc(); 295 return (vcpu); 296 } 297 298 static void 299 vcpu_init(struct vcpu *vcpu) 300 { 301 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 302 MPASS(vcpu->cookie != NULL); 303 fpu_save_area_reset(vcpu->guestfpu); 304 vmm_stat_init(vcpu->stats); 305 } 306 307 struct vm_exit * 308 vm_exitinfo(struct vcpu *vcpu) 309 { 310 return (&vcpu->exitinfo); 311 } 312 313 static int 314 vmm_unsupported_quirk(void) 315 { 316 /* 317 * Known to not load on Ampere eMAG 318 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051 319 */ 320 if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM, 321 CPU_PART_EMAG8180, 0, 0)) 322 return (ENXIO); 323 324 return (0); 325 } 326 327 static int 328 vmm_init(void) 329 { 330 int error; 331 332 vm_maxcpu = mp_ncpus; 333 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 334 335 if (vm_maxcpu > VM_MAXCPU) { 336 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 337 vm_maxcpu = VM_MAXCPU; 338 } 339 if (vm_maxcpu == 0) 340 vm_maxcpu = 1; 341 342 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 343 if (error != 0) 344 return (error); 345 346 return (vmmops_modinit(0)); 347 } 348 349 static int 350 vmm_handler(module_t mod, int what, void *arg) 351 { 352 int error; 353 354 switch (what) { 355 case MOD_LOAD: 356 error = vmm_unsupported_quirk(); 357 if (error != 0) 358 break; 359 error = vmmdev_init(); 360 if (error != 0) 361 break; 362 error = vmm_init(); 363 if (error == 0) 364 vmm_initialized = true; 365 else 366 (void)vmmdev_cleanup(); 367 break; 368 case MOD_UNLOAD: 369 error = vmmdev_cleanup(); 370 if (error == 0 && vmm_initialized) { 371 error = vmmops_modcleanup(); 372 if (error) { 373 /* 374 * Something bad happened - prevent new 375 * VMs from being created 376 */ 377 vmm_initialized = false; 378 } 379 } 380 break; 381 default: 382 error = 0; 383 break; 384 } 385 return (error); 386 } 387 388 static moduledata_t vmm_kmod = { 389 "vmm", 390 vmm_handler, 391 NULL 392 }; 393 394 /* 395 * vmm initialization has the following dependencies: 396 * 397 * - HYP initialization requires smp_rendezvous() and therefore must happen 398 * after SMP is fully functional (after SI_SUB_SMP). 399 * - vmm device initialization requires an initialized devfs. 400 */ 401 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 402 MODULE_VERSION(vmm, 1); 403 404 static void 405 vm_init(struct vm *vm, bool create) 406 { 407 int i; 408 409 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 410 MPASS(vm->cookie != NULL); 411 412 CPU_ZERO(&vm->active_cpus); 413 CPU_ZERO(&vm->debug_cpus); 414 415 vm->suspend = 0; 416 CPU_ZERO(&vm->suspended_cpus); 417 418 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 419 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 420 421 if (!create) { 422 for (i = 0; i < vm->maxcpus; i++) { 423 if (vm->vcpu[i] != NULL) 424 vcpu_init(vm->vcpu[i]); 425 } 426 } 427 } 428 429 void 430 vm_disable_vcpu_creation(struct vm *vm) 431 { 432 sx_xlock(&vm->vcpus_init_lock); 433 vm->dying = true; 434 sx_xunlock(&vm->vcpus_init_lock); 435 } 436 437 struct vcpu * 438 vm_alloc_vcpu(struct vm *vm, int vcpuid) 439 { 440 struct vcpu *vcpu; 441 442 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 443 return (NULL); 444 445 /* Some interrupt controllers may have a CPU limit */ 446 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 447 return (NULL); 448 449 vcpu = (struct vcpu *) 450 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 451 if (__predict_true(vcpu != NULL)) 452 return (vcpu); 453 454 sx_xlock(&vm->vcpus_init_lock); 455 vcpu = vm->vcpu[vcpuid]; 456 if (vcpu == NULL && !vm->dying) { 457 vcpu = vcpu_alloc(vm, vcpuid); 458 vcpu_init(vcpu); 459 460 /* 461 * Ensure vCPU is fully created before updating pointer 462 * to permit unlocked reads above. 463 */ 464 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 465 (uintptr_t)vcpu); 466 } 467 sx_xunlock(&vm->vcpus_init_lock); 468 return (vcpu); 469 } 470 471 void 472 vm_slock_vcpus(struct vm *vm) 473 { 474 sx_slock(&vm->vcpus_init_lock); 475 } 476 477 void 478 vm_unlock_vcpus(struct vm *vm) 479 { 480 sx_unlock(&vm->vcpus_init_lock); 481 } 482 483 int 484 vm_create(const char *name, struct vm **retvm) 485 { 486 struct vm *vm; 487 struct vmspace *vmspace; 488 489 /* 490 * If vmm.ko could not be successfully initialized then don't attempt 491 * to create the virtual machine. 492 */ 493 if (!vmm_initialized) 494 return (ENXIO); 495 496 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 497 return (EINVAL); 498 499 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 500 if (vmspace == NULL) 501 return (ENOMEM); 502 503 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 504 strcpy(vm->name, name); 505 vm->vmspace = vmspace; 506 vm_mem_init(&vm->mem); 507 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 508 509 vm->sockets = 1; 510 vm->cores = 1; /* XXX backwards compatibility */ 511 vm->threads = 1; /* XXX backwards compatibility */ 512 vm->maxcpus = vm_maxcpu; 513 514 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 515 M_WAITOK | M_ZERO); 516 517 vm_init(vm, true); 518 519 *retvm = vm; 520 return (0); 521 } 522 523 void 524 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 525 uint16_t *threads, uint16_t *maxcpus) 526 { 527 *sockets = vm->sockets; 528 *cores = vm->cores; 529 *threads = vm->threads; 530 *maxcpus = vm->maxcpus; 531 } 532 533 uint16_t 534 vm_get_maxcpus(struct vm *vm) 535 { 536 return (vm->maxcpus); 537 } 538 539 int 540 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 541 uint16_t threads, uint16_t maxcpus) 542 { 543 /* Ignore maxcpus. */ 544 if ((sockets * cores * threads) > vm->maxcpus) 545 return (EINVAL); 546 vm->sockets = sockets; 547 vm->cores = cores; 548 vm->threads = threads; 549 return(0); 550 } 551 552 static void 553 vm_cleanup(struct vm *vm, bool destroy) 554 { 555 pmap_t pmap __diagused; 556 int i; 557 558 if (destroy) { 559 vm_xlock_memsegs(vm); 560 pmap = vmspace_pmap(vm->vmspace); 561 sched_pin(); 562 PCPU_SET(curvmpmap, NULL); 563 sched_unpin(); 564 CPU_FOREACH(i) { 565 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 566 } 567 } else 568 vm_assert_memseg_xlocked(vm); 569 570 571 vgic_detach_from_vm(vm->cookie); 572 573 for (i = 0; i < vm->maxcpus; i++) { 574 if (vm->vcpu[i] != NULL) 575 vcpu_cleanup(vm->vcpu[i], destroy); 576 } 577 578 vmmops_cleanup(vm->cookie); 579 580 vm_mem_cleanup(vm); 581 if (destroy) { 582 vm_mem_destroy(vm); 583 584 vmmops_vmspace_free(vm->vmspace); 585 vm->vmspace = NULL; 586 587 for (i = 0; i < vm->maxcpus; i++) 588 free(vm->vcpu[i], M_VMM); 589 free(vm->vcpu, M_VMM); 590 sx_destroy(&vm->vcpus_init_lock); 591 } 592 } 593 594 void 595 vm_destroy(struct vm *vm) 596 { 597 vm_cleanup(vm, true); 598 free(vm, M_VMM); 599 } 600 601 int 602 vm_reinit(struct vm *vm) 603 { 604 int error; 605 606 /* 607 * A virtual machine can be reset only if all vcpus are suspended. 608 */ 609 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 610 vm_cleanup(vm, false); 611 vm_init(vm, false); 612 error = 0; 613 } else { 614 error = EBUSY; 615 } 616 617 return (error); 618 } 619 620 const char * 621 vm_name(struct vm *vm) 622 { 623 return (vm->name); 624 } 625 626 int 627 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 628 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 629 { 630 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); 631 } 632 633 static int 634 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 635 { 636 *rval = 0; 637 return (0); 638 } 639 640 static int 641 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 642 { 643 *rval = *(uint64_t *)arg; 644 return (0); 645 } 646 647 static int 648 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 649 { 650 return (0); 651 } 652 653 static const struct vmm_special_reg vmm_special_regs[] = { 654 #define SPECIAL_REG(_reg, _read, _write) \ 655 { \ 656 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 657 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 658 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 659 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 660 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 661 .esr_mask = ISS_MSR_REG_MASK, \ 662 .reg_read = (_read), \ 663 .reg_write = (_write), \ 664 .arg = NULL, \ 665 } 666 #define ID_SPECIAL_REG(_reg, _name) \ 667 { \ 668 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 669 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 670 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 671 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 672 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 673 .esr_mask = ISS_MSR_REG_MASK, \ 674 .reg_read = vmm_reg_read_arg, \ 675 .reg_write = vmm_reg_wi, \ 676 .arg = &(vmm_arch_regs._name), \ 677 } 678 679 /* ID registers */ 680 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 681 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 682 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 683 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 684 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 685 686 /* 687 * All other ID registers are read as zero. 688 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 689 */ 690 { 691 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 692 (0 << ISS_MSR_OP1_SHIFT) | 693 (0 << ISS_MSR_CRn_SHIFT) | 694 (0 << ISS_MSR_CRm_SHIFT), 695 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 696 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 697 .reg_read = vmm_reg_raz, 698 .reg_write = vmm_reg_wi, 699 .arg = NULL, 700 }, 701 702 /* Counter physical registers */ 703 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 704 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 705 vtimer_phys_cval_write), 706 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 707 vtimer_phys_tval_write), 708 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 709 #undef SPECIAL_REG 710 }; 711 712 void 713 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 714 reg_read_t reg_read, reg_write_t reg_write, void *arg) 715 { 716 int i; 717 718 for (i = 0; i < nitems(vm->special_reg); i++) { 719 if (vm->special_reg[i].esr_iss == 0 && 720 vm->special_reg[i].esr_mask == 0) { 721 vm->special_reg[i].esr_iss = iss; 722 vm->special_reg[i].esr_mask = mask; 723 vm->special_reg[i].reg_read = reg_read; 724 vm->special_reg[i].reg_write = reg_write; 725 vm->special_reg[i].arg = arg; 726 return; 727 } 728 } 729 730 panic("%s: No free special register slot", __func__); 731 } 732 733 void 734 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 735 { 736 int i; 737 738 for (i = 0; i < nitems(vm->special_reg); i++) { 739 if (vm->special_reg[i].esr_iss == iss && 740 vm->special_reg[i].esr_mask == mask) { 741 memset(&vm->special_reg[i], 0, 742 sizeof(vm->special_reg[i])); 743 return; 744 } 745 } 746 747 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 748 mask); 749 } 750 751 static int 752 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 753 { 754 struct vm *vm; 755 struct vm_exit *vme; 756 struct vre *vre; 757 int i, rv; 758 759 vm = vcpu->vm; 760 vme = &vcpu->exitinfo; 761 vre = &vme->u.reg_emul.vre; 762 763 for (i = 0; i < nitems(vm->special_reg); i++) { 764 if (vm->special_reg[i].esr_iss == 0 && 765 vm->special_reg[i].esr_mask == 0) 766 continue; 767 768 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 769 vm->special_reg[i].esr_iss) { 770 rv = vmm_emulate_register(vcpu, vre, 771 vm->special_reg[i].reg_read, 772 vm->special_reg[i].reg_write, 773 vm->special_reg[i].arg); 774 if (rv == 0) { 775 *retu = false; 776 } 777 return (rv); 778 } 779 } 780 for (i = 0; i < nitems(vmm_special_regs); i++) { 781 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 782 vmm_special_regs[i].esr_iss) { 783 rv = vmm_emulate_register(vcpu, vre, 784 vmm_special_regs[i].reg_read, 785 vmm_special_regs[i].reg_write, 786 vmm_special_regs[i].arg); 787 if (rv == 0) { 788 *retu = false; 789 } 790 return (rv); 791 } 792 } 793 794 795 *retu = true; 796 return (0); 797 } 798 799 void 800 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 801 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 802 { 803 int i; 804 805 for (i = 0; i < nitems(vm->mmio_region); i++) { 806 if (vm->mmio_region[i].start == 0 && 807 vm->mmio_region[i].end == 0) { 808 vm->mmio_region[i].start = start; 809 vm->mmio_region[i].end = start + size; 810 vm->mmio_region[i].read = mmio_read; 811 vm->mmio_region[i].write = mmio_write; 812 return; 813 } 814 } 815 816 panic("%s: No free MMIO region", __func__); 817 } 818 819 void 820 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 821 { 822 int i; 823 824 for (i = 0; i < nitems(vm->mmio_region); i++) { 825 if (vm->mmio_region[i].start == start && 826 vm->mmio_region[i].end == start + size) { 827 memset(&vm->mmio_region[i], 0, 828 sizeof(vm->mmio_region[i])); 829 return; 830 } 831 } 832 833 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 834 start + size); 835 } 836 837 static int 838 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 839 { 840 struct vm *vm; 841 struct vm_exit *vme; 842 struct vie *vie; 843 struct hyp *hyp; 844 uint64_t fault_ipa; 845 struct vm_guest_paging *paging; 846 struct vmm_mmio_region *vmr; 847 int error, i; 848 849 vm = vcpu->vm; 850 hyp = vm->cookie; 851 if (!hyp->vgic_attached) 852 goto out_user; 853 854 vme = &vcpu->exitinfo; 855 vie = &vme->u.inst_emul.vie; 856 paging = &vme->u.inst_emul.paging; 857 858 fault_ipa = vme->u.inst_emul.gpa; 859 860 vmr = NULL; 861 for (i = 0; i < nitems(vm->mmio_region); i++) { 862 if (vm->mmio_region[i].start <= fault_ipa && 863 vm->mmio_region[i].end > fault_ipa) { 864 vmr = &vm->mmio_region[i]; 865 break; 866 } 867 } 868 if (vmr == NULL) 869 goto out_user; 870 871 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 872 vmr->read, vmr->write, retu); 873 return (error); 874 875 out_user: 876 *retu = true; 877 return (0); 878 } 879 880 int 881 vm_suspend(struct vm *vm, enum vm_suspend_how how) 882 { 883 int i; 884 885 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 886 return (EINVAL); 887 888 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 889 VM_CTR2(vm, "virtual machine already suspended %d/%d", 890 vm->suspend, how); 891 return (EALREADY); 892 } 893 894 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 895 896 /* 897 * Notify all active vcpus that they are now suspended. 898 */ 899 for (i = 0; i < vm->maxcpus; i++) { 900 if (CPU_ISSET(i, &vm->active_cpus)) 901 vcpu_notify_event(vm_vcpu(vm, i)); 902 } 903 904 return (0); 905 } 906 907 void 908 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 909 { 910 struct vm *vm = vcpu->vm; 911 struct vm_exit *vmexit; 912 913 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 914 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 915 916 vmexit = vm_exitinfo(vcpu); 917 vmexit->pc = pc; 918 vmexit->inst_length = 4; 919 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 920 vmexit->u.suspended.how = vm->suspend; 921 } 922 923 void 924 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 925 { 926 struct vm_exit *vmexit; 927 928 vmexit = vm_exitinfo(vcpu); 929 vmexit->pc = pc; 930 vmexit->inst_length = 4; 931 vmexit->exitcode = VM_EXITCODE_DEBUG; 932 } 933 934 int 935 vm_activate_cpu(struct vcpu *vcpu) 936 { 937 struct vm *vm = vcpu->vm; 938 939 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 940 return (EBUSY); 941 942 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 943 return (0); 944 945 } 946 947 int 948 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 949 { 950 if (vcpu == NULL) { 951 vm->debug_cpus = vm->active_cpus; 952 for (int i = 0; i < vm->maxcpus; i++) { 953 if (CPU_ISSET(i, &vm->active_cpus)) 954 vcpu_notify_event(vm_vcpu(vm, i)); 955 } 956 } else { 957 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 958 return (EINVAL); 959 960 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 961 vcpu_notify_event(vcpu); 962 } 963 return (0); 964 } 965 966 int 967 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 968 { 969 970 if (vcpu == NULL) { 971 CPU_ZERO(&vm->debug_cpus); 972 } else { 973 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 974 return (EINVAL); 975 976 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 977 } 978 return (0); 979 } 980 981 int 982 vcpu_debugged(struct vcpu *vcpu) 983 { 984 985 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 986 } 987 988 cpuset_t 989 vm_active_cpus(struct vm *vm) 990 { 991 992 return (vm->active_cpus); 993 } 994 995 cpuset_t 996 vm_debug_cpus(struct vm *vm) 997 { 998 999 return (vm->debug_cpus); 1000 } 1001 1002 cpuset_t 1003 vm_suspended_cpus(struct vm *vm) 1004 { 1005 1006 return (vm->suspended_cpus); 1007 } 1008 1009 1010 void * 1011 vcpu_stats(struct vcpu *vcpu) 1012 { 1013 1014 return (vcpu->stats); 1015 } 1016 1017 /* 1018 * This function is called to ensure that a vcpu "sees" a pending event 1019 * as soon as possible: 1020 * - If the vcpu thread is sleeping then it is woken up. 1021 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1022 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1023 */ 1024 static void 1025 vcpu_notify_event_locked(struct vcpu *vcpu) 1026 { 1027 int hostcpu; 1028 1029 hostcpu = vcpu->hostcpu; 1030 if (vcpu->state == VCPU_RUNNING) { 1031 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1032 if (hostcpu != curcpu) { 1033 ipi_cpu(hostcpu, vmm_ipinum); 1034 } else { 1035 /* 1036 * If the 'vcpu' is running on 'curcpu' then it must 1037 * be sending a notification to itself (e.g. SELF_IPI). 1038 * The pending event will be picked up when the vcpu 1039 * transitions back to guest context. 1040 */ 1041 } 1042 } else { 1043 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1044 "with hostcpu %d", vcpu->state, hostcpu)); 1045 if (vcpu->state == VCPU_SLEEPING) 1046 wakeup_one(vcpu); 1047 } 1048 } 1049 1050 void 1051 vcpu_notify_event(struct vcpu *vcpu) 1052 { 1053 vcpu_lock(vcpu); 1054 vcpu_notify_event_locked(vcpu); 1055 vcpu_unlock(vcpu); 1056 } 1057 1058 struct vmspace * 1059 vm_vmspace(struct vm *vm) 1060 { 1061 return (vm->vmspace); 1062 } 1063 1064 struct vm_mem * 1065 vm_mem(struct vm *vm) 1066 { 1067 return (&vm->mem); 1068 } 1069 1070 static void 1071 restore_guest_fpustate(struct vcpu *vcpu) 1072 { 1073 1074 /* flush host state to the pcb */ 1075 vfp_save_state(curthread, curthread->td_pcb); 1076 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1077 PCPU_SET(fpcurthread, NULL); 1078 1079 /* restore guest FPU state */ 1080 vfp_enable(); 1081 vfp_restore(vcpu->guestfpu); 1082 1083 /* 1084 * The FPU is now "dirty" with the guest's state so turn on emulation 1085 * to trap any access to the FPU by the host. 1086 */ 1087 vfp_disable(); 1088 } 1089 1090 static void 1091 save_guest_fpustate(struct vcpu *vcpu) 1092 { 1093 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1094 CPACR_FPEN_TRAP_ALL1) 1095 panic("VFP not enabled in host!"); 1096 1097 /* save guest FPU state */ 1098 vfp_enable(); 1099 vfp_store(vcpu->guestfpu); 1100 vfp_disable(); 1101 1102 KASSERT(PCPU_GET(fpcurthread) == NULL, 1103 ("%s: fpcurthread set with guest registers", __func__)); 1104 } 1105 static int 1106 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1107 bool from_idle) 1108 { 1109 int error; 1110 1111 vcpu_assert_locked(vcpu); 1112 1113 /* 1114 * State transitions from the vmmdev_ioctl() must always begin from 1115 * the VCPU_IDLE state. This guarantees that there is only a single 1116 * ioctl() operating on a vcpu at any point. 1117 */ 1118 if (from_idle) { 1119 while (vcpu->state != VCPU_IDLE) { 1120 vcpu_notify_event_locked(vcpu); 1121 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1122 } 1123 } else { 1124 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1125 "vcpu idle state")); 1126 } 1127 1128 if (vcpu->state == VCPU_RUNNING) { 1129 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1130 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1131 } else { 1132 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1133 "vcpu that is not running", vcpu->hostcpu)); 1134 } 1135 1136 /* 1137 * The following state transitions are allowed: 1138 * IDLE -> FROZEN -> IDLE 1139 * FROZEN -> RUNNING -> FROZEN 1140 * FROZEN -> SLEEPING -> FROZEN 1141 */ 1142 switch (vcpu->state) { 1143 case VCPU_IDLE: 1144 case VCPU_RUNNING: 1145 case VCPU_SLEEPING: 1146 error = (newstate != VCPU_FROZEN); 1147 break; 1148 case VCPU_FROZEN: 1149 error = (newstate == VCPU_FROZEN); 1150 break; 1151 default: 1152 error = 1; 1153 break; 1154 } 1155 1156 if (error) 1157 return (EBUSY); 1158 1159 vcpu->state = newstate; 1160 if (newstate == VCPU_RUNNING) 1161 vcpu->hostcpu = curcpu; 1162 else 1163 vcpu->hostcpu = NOCPU; 1164 1165 if (newstate == VCPU_IDLE) 1166 wakeup(&vcpu->state); 1167 1168 return (0); 1169 } 1170 1171 static void 1172 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1173 { 1174 int error; 1175 1176 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1177 panic("Error %d setting state to %d\n", error, newstate); 1178 } 1179 1180 static void 1181 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1182 { 1183 int error; 1184 1185 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1186 panic("Error %d setting state to %d", error, newstate); 1187 } 1188 1189 int 1190 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1191 { 1192 if (type < 0 || type >= VM_CAP_MAX) 1193 return (EINVAL); 1194 1195 return (vmmops_getcap(vcpu->cookie, type, retval)); 1196 } 1197 1198 int 1199 vm_set_capability(struct vcpu *vcpu, int type, int val) 1200 { 1201 if (type < 0 || type >= VM_CAP_MAX) 1202 return (EINVAL); 1203 1204 return (vmmops_setcap(vcpu->cookie, type, val)); 1205 } 1206 1207 struct vm * 1208 vcpu_vm(struct vcpu *vcpu) 1209 { 1210 return (vcpu->vm); 1211 } 1212 1213 int 1214 vcpu_vcpuid(struct vcpu *vcpu) 1215 { 1216 return (vcpu->vcpuid); 1217 } 1218 1219 void * 1220 vcpu_get_cookie(struct vcpu *vcpu) 1221 { 1222 return (vcpu->cookie); 1223 } 1224 1225 struct vcpu * 1226 vm_vcpu(struct vm *vm, int vcpuid) 1227 { 1228 return (vm->vcpu[vcpuid]); 1229 } 1230 1231 int 1232 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1233 { 1234 int error; 1235 1236 vcpu_lock(vcpu); 1237 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1238 vcpu_unlock(vcpu); 1239 1240 return (error); 1241 } 1242 1243 enum vcpu_state 1244 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1245 { 1246 enum vcpu_state state; 1247 1248 vcpu_lock(vcpu); 1249 state = vcpu->state; 1250 if (hostcpu != NULL) 1251 *hostcpu = vcpu->hostcpu; 1252 vcpu_unlock(vcpu); 1253 1254 return (state); 1255 } 1256 1257 int 1258 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1259 { 1260 1261 if (reg >= VM_REG_LAST) 1262 return (EINVAL); 1263 1264 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1265 } 1266 1267 int 1268 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1269 { 1270 int error; 1271 1272 if (reg >= VM_REG_LAST) 1273 return (EINVAL); 1274 error = vmmops_setreg(vcpu->cookie, reg, val); 1275 if (error || reg != VM_REG_GUEST_PC) 1276 return (error); 1277 1278 vcpu->nextpc = val; 1279 1280 return (0); 1281 } 1282 1283 void * 1284 vm_get_cookie(struct vm *vm) 1285 { 1286 return (vm->cookie); 1287 } 1288 1289 int 1290 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1291 { 1292 return (vmmops_exception(vcpu->cookie, esr, far)); 1293 } 1294 1295 int 1296 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1297 { 1298 return (vgic_attach_to_vm(vm->cookie, descr)); 1299 } 1300 1301 int 1302 vm_assert_irq(struct vm *vm, uint32_t irq) 1303 { 1304 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1305 } 1306 1307 int 1308 vm_deassert_irq(struct vm *vm, uint32_t irq) 1309 { 1310 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1311 } 1312 1313 int 1314 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1315 int func) 1316 { 1317 /* TODO: Should we raise an SError? */ 1318 return (vgic_inject_msi(vm->cookie, msg, addr)); 1319 } 1320 1321 static int 1322 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1323 { 1324 struct hypctx *hypctx; 1325 int i; 1326 1327 hypctx = vcpu_get_cookie(vcpu); 1328 1329 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1330 return (1); 1331 1332 vme->exitcode = VM_EXITCODE_SMCCC; 1333 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1334 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1335 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1336 1337 *retu = true; 1338 return (0); 1339 } 1340 1341 static int 1342 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1343 { 1344 vcpu_lock(vcpu); 1345 while (1) { 1346 if (vgic_has_pending_irq(vcpu->cookie)) 1347 break; 1348 1349 if (vcpu_should_yield(vcpu)) 1350 break; 1351 1352 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1353 /* 1354 * XXX msleep_spin() cannot be interrupted by signals so 1355 * wake up periodically to check pending signals. 1356 */ 1357 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1358 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1359 } 1360 vcpu_unlock(vcpu); 1361 1362 *retu = false; 1363 return (0); 1364 } 1365 1366 static int 1367 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1368 { 1369 struct vm *vm = vcpu->vm; 1370 struct vm_exit *vme; 1371 struct vm_map *map; 1372 uint64_t addr, esr; 1373 pmap_t pmap; 1374 int ftype, rv; 1375 1376 vme = &vcpu->exitinfo; 1377 1378 pmap = vmspace_pmap(vcpu->vm->vmspace); 1379 addr = vme->u.paging.gpa; 1380 esr = vme->u.paging.esr; 1381 1382 /* The page exists, but the page table needs to be updated. */ 1383 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1384 return (0); 1385 1386 switch (ESR_ELx_EXCEPTION(esr)) { 1387 case EXCP_INSN_ABORT_L: 1388 case EXCP_DATA_ABORT_L: 1389 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1390 break; 1391 default: 1392 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1393 } 1394 1395 map = &vm->vmspace->vm_map; 1396 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1397 if (rv != KERN_SUCCESS) 1398 return (EFAULT); 1399 1400 return (0); 1401 } 1402 1403 static int 1404 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1405 { 1406 struct vm *vm = vcpu->vm; 1407 int error, i; 1408 struct thread *td; 1409 1410 error = 0; 1411 td = curthread; 1412 1413 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1414 1415 /* 1416 * Wait until all 'active_cpus' have suspended themselves. 1417 * 1418 * Since a VM may be suspended at any time including when one or 1419 * more vcpus are doing a rendezvous we need to call the rendezvous 1420 * handler while we are waiting to prevent a deadlock. 1421 */ 1422 vcpu_lock(vcpu); 1423 while (error == 0) { 1424 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1425 break; 1426 1427 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1428 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1429 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1430 if (td_ast_pending(td, TDA_SUSPEND)) { 1431 vcpu_unlock(vcpu); 1432 error = thread_check_susp(td, false); 1433 vcpu_lock(vcpu); 1434 } 1435 } 1436 vcpu_unlock(vcpu); 1437 1438 /* 1439 * Wakeup the other sleeping vcpus and return to userspace. 1440 */ 1441 for (i = 0; i < vm->maxcpus; i++) { 1442 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1443 vcpu_notify_event(vm_vcpu(vm, i)); 1444 } 1445 } 1446 1447 *retu = true; 1448 return (error); 1449 } 1450 1451 int 1452 vm_run(struct vcpu *vcpu) 1453 { 1454 struct vm *vm = vcpu->vm; 1455 struct vm_eventinfo evinfo; 1456 int error, vcpuid; 1457 struct vm_exit *vme; 1458 bool retu; 1459 pmap_t pmap; 1460 1461 vcpuid = vcpu->vcpuid; 1462 1463 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1464 return (EINVAL); 1465 1466 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1467 return (EINVAL); 1468 1469 pmap = vmspace_pmap(vm->vmspace); 1470 vme = &vcpu->exitinfo; 1471 evinfo.rptr = NULL; 1472 evinfo.sptr = &vm->suspend; 1473 evinfo.iptr = NULL; 1474 restart: 1475 critical_enter(); 1476 1477 restore_guest_fpustate(vcpu); 1478 1479 vcpu_require_state(vcpu, VCPU_RUNNING); 1480 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1481 vcpu_require_state(vcpu, VCPU_FROZEN); 1482 1483 save_guest_fpustate(vcpu); 1484 1485 critical_exit(); 1486 1487 if (error == 0) { 1488 retu = false; 1489 switch (vme->exitcode) { 1490 case VM_EXITCODE_INST_EMUL: 1491 vcpu->nextpc = vme->pc + vme->inst_length; 1492 error = vm_handle_inst_emul(vcpu, &retu); 1493 break; 1494 1495 case VM_EXITCODE_REG_EMUL: 1496 vcpu->nextpc = vme->pc + vme->inst_length; 1497 error = vm_handle_reg_emul(vcpu, &retu); 1498 break; 1499 1500 case VM_EXITCODE_HVC: 1501 /* 1502 * The HVC instruction saves the address for the 1503 * next instruction as the return address. 1504 */ 1505 vcpu->nextpc = vme->pc; 1506 /* 1507 * The PSCI call can change the exit information in the 1508 * case of suspend/reset/poweroff/cpu off/cpu on. 1509 */ 1510 error = vm_handle_smccc_call(vcpu, vme, &retu); 1511 break; 1512 1513 case VM_EXITCODE_WFI: 1514 vcpu->nextpc = vme->pc + vme->inst_length; 1515 error = vm_handle_wfi(vcpu, vme, &retu); 1516 break; 1517 1518 case VM_EXITCODE_PAGING: 1519 vcpu->nextpc = vme->pc; 1520 error = vm_handle_paging(vcpu, &retu); 1521 break; 1522 1523 case VM_EXITCODE_SUSPENDED: 1524 vcpu->nextpc = vme->pc; 1525 error = vm_handle_suspend(vcpu, &retu); 1526 break; 1527 1528 default: 1529 /* Handle in userland */ 1530 vcpu->nextpc = vme->pc; 1531 retu = true; 1532 break; 1533 } 1534 } 1535 1536 if (error == 0 && retu == false) 1537 goto restart; 1538 1539 return (error); 1540 } 1541