1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_mem.h> 69 #include <dev/vmm/vmm_stat.h> 70 71 #include "arm64.h" 72 #include "mmu.h" 73 74 #include "io/vgic.h" 75 #include "io/vtimer.h" 76 77 struct vcpu { 78 int flags; 79 enum vcpu_state state; 80 struct mtx mtx; 81 int hostcpu; /* host cpuid this vcpu last ran on */ 82 int vcpuid; 83 void *stats; 84 struct vm_exit exitinfo; 85 uint64_t nextpc; /* (x) next instruction to execute */ 86 struct vm *vm; /* (o) */ 87 void *cookie; /* (i) cpu-specific data */ 88 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 89 }; 90 91 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 92 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 93 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 94 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 95 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 96 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 97 98 struct vmm_mmio_region { 99 uint64_t start; 100 uint64_t end; 101 mem_region_read_t read; 102 mem_region_write_t write; 103 }; 104 #define VM_MAX_MMIO_REGIONS 4 105 106 struct vmm_special_reg { 107 uint32_t esr_iss; 108 uint32_t esr_mask; 109 reg_read_t reg_read; 110 reg_write_t reg_write; 111 void *arg; 112 }; 113 #define VM_MAX_SPECIAL_REGS 16 114 115 /* 116 * Initialization: 117 * (o) initialized the first time the VM is created 118 * (i) initialized when VM is created and when it is reinitialized 119 * (x) initialized before use 120 */ 121 struct vm { 122 void *cookie; /* (i) cpu-specific data */ 123 volatile cpuset_t active_cpus; /* (i) active vcpus */ 124 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 125 int suspend; /* (i) stop VM execution */ 126 bool dying; /* (o) is dying */ 127 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 128 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 129 struct vmspace *vmspace; /* (o) guest's address space */ 130 struct vm_mem mem; /* (i) guest memory */ 131 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 132 struct vcpu **vcpu; /* (i) guest vcpus */ 133 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 134 /* (o) guest MMIO regions */ 135 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 136 /* The following describe the vm cpu topology */ 137 uint16_t sockets; /* (o) num of sockets */ 138 uint16_t cores; /* (o) num of cores/socket */ 139 uint16_t threads; /* (o) num of threads/core */ 140 uint16_t maxcpus; /* (o) max pluggable cpus */ 141 struct sx vcpus_init_lock; /* (o) */ 142 }; 143 144 static bool vmm_initialized = false; 145 146 static int vm_handle_wfi(struct vcpu *vcpu, 147 struct vm_exit *vme, bool *retu); 148 149 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 150 151 /* statistics */ 152 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 153 154 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 155 156 static int vmm_ipinum; 157 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 158 "IPI vector used for vcpu notifications"); 159 160 struct vmm_regs { 161 uint64_t id_aa64afr0; 162 uint64_t id_aa64afr1; 163 uint64_t id_aa64dfr0; 164 uint64_t id_aa64dfr1; 165 uint64_t id_aa64isar0; 166 uint64_t id_aa64isar1; 167 uint64_t id_aa64isar2; 168 uint64_t id_aa64mmfr0; 169 uint64_t id_aa64mmfr1; 170 uint64_t id_aa64mmfr2; 171 uint64_t id_aa64pfr0; 172 uint64_t id_aa64pfr1; 173 }; 174 175 static const struct vmm_regs vmm_arch_regs_masks = { 176 .id_aa64dfr0 = 177 ID_AA64DFR0_CTX_CMPs_MASK | 178 ID_AA64DFR0_WRPs_MASK | 179 ID_AA64DFR0_BRPs_MASK | 180 ID_AA64DFR0_PMUVer_3 | 181 ID_AA64DFR0_DebugVer_8, 182 .id_aa64isar0 = 183 ID_AA64ISAR0_TLB_TLBIOSR | 184 ID_AA64ISAR0_SHA3_IMPL | 185 ID_AA64ISAR0_RDM_IMPL | 186 ID_AA64ISAR0_Atomic_IMPL | 187 ID_AA64ISAR0_CRC32_BASE | 188 ID_AA64ISAR0_SHA2_512 | 189 ID_AA64ISAR0_SHA1_BASE | 190 ID_AA64ISAR0_AES_PMULL, 191 .id_aa64mmfr0 = 192 ID_AA64MMFR0_TGran4_IMPL | 193 ID_AA64MMFR0_TGran64_IMPL | 194 ID_AA64MMFR0_TGran16_IMPL | 195 ID_AA64MMFR0_ASIDBits_16 | 196 ID_AA64MMFR0_PARange_4P, 197 .id_aa64mmfr1 = 198 ID_AA64MMFR1_SpecSEI_IMPL | 199 ID_AA64MMFR1_PAN_ATS1E1 | 200 ID_AA64MMFR1_HAFDBS_AF, 201 .id_aa64pfr0 = 202 ID_AA64PFR0_GIC_CPUIF_NONE | 203 ID_AA64PFR0_AdvSIMD_HP | 204 ID_AA64PFR0_FP_HP | 205 ID_AA64PFR0_EL3_64 | 206 ID_AA64PFR0_EL2_64 | 207 ID_AA64PFR0_EL1_64 | 208 ID_AA64PFR0_EL0_64, 209 }; 210 211 /* Host registers masked by vmm_arch_regs_masks. */ 212 static struct vmm_regs vmm_arch_regs; 213 214 u_int vm_maxcpu; 215 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 216 &vm_maxcpu, 0, "Maximum number of vCPUs"); 217 218 static void vcpu_notify_event_locked(struct vcpu *vcpu); 219 220 /* global statistics */ 221 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 222 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 223 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 224 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 225 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 226 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 227 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 228 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 229 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 230 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 231 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 232 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 233 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 234 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 235 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 236 237 /* 238 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 239 * is a safe value for now. 240 */ 241 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 242 243 static int 244 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 245 { 246 #define _FETCH_KERN_REG(reg, field) do { \ 247 regs->field = vmm_arch_regs_masks.field; \ 248 if (!get_kernel_reg_iss_masked(reg ## _ISS, ®s->field, \ 249 masks->field)) \ 250 regs->field = 0; \ 251 } while (0) 252 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 253 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 254 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 255 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 256 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 257 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 258 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 259 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 260 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 261 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 262 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 263 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 264 #undef _FETCH_KERN_REG 265 return (0); 266 } 267 268 static void 269 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 270 { 271 vmmops_vcpu_cleanup(vcpu->cookie); 272 vcpu->cookie = NULL; 273 if (destroy) { 274 vmm_stat_free(vcpu->stats); 275 fpu_save_area_free(vcpu->guestfpu); 276 vcpu_lock_destroy(vcpu); 277 } 278 } 279 280 static struct vcpu * 281 vcpu_alloc(struct vm *vm, int vcpu_id) 282 { 283 struct vcpu *vcpu; 284 285 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 286 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 287 288 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 289 vcpu_lock_init(vcpu); 290 vcpu->state = VCPU_IDLE; 291 vcpu->hostcpu = NOCPU; 292 vcpu->vcpuid = vcpu_id; 293 vcpu->vm = vm; 294 vcpu->guestfpu = fpu_save_area_alloc(); 295 vcpu->stats = vmm_stat_alloc(); 296 return (vcpu); 297 } 298 299 static void 300 vcpu_init(struct vcpu *vcpu) 301 { 302 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 303 MPASS(vcpu->cookie != NULL); 304 fpu_save_area_reset(vcpu->guestfpu); 305 vmm_stat_init(vcpu->stats); 306 } 307 308 struct vm_exit * 309 vm_exitinfo(struct vcpu *vcpu) 310 { 311 return (&vcpu->exitinfo); 312 } 313 314 static int 315 vmm_unsupported_quirk(void) 316 { 317 /* 318 * Known to not load on Ampere eMAG 319 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051 320 */ 321 if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM, 322 CPU_PART_EMAG8180, 0, 0)) 323 return (ENXIO); 324 325 return (0); 326 } 327 328 static int 329 vmm_init(void) 330 { 331 int error; 332 333 vm_maxcpu = mp_ncpus; 334 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 335 336 if (vm_maxcpu > VM_MAXCPU) { 337 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 338 vm_maxcpu = VM_MAXCPU; 339 } 340 if (vm_maxcpu == 0) 341 vm_maxcpu = 1; 342 343 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 344 if (error != 0) 345 return (error); 346 347 return (vmmops_modinit(0)); 348 } 349 350 static int 351 vmm_handler(module_t mod, int what, void *arg) 352 { 353 int error; 354 355 switch (what) { 356 case MOD_LOAD: 357 error = vmm_unsupported_quirk(); 358 if (error != 0) 359 break; 360 error = vmmdev_init(); 361 if (error != 0) 362 break; 363 error = vmm_init(); 364 if (error == 0) 365 vmm_initialized = true; 366 else 367 (void)vmmdev_cleanup(); 368 break; 369 case MOD_UNLOAD: 370 error = vmmdev_cleanup(); 371 if (error == 0 && vmm_initialized) { 372 error = vmmops_modcleanup(); 373 if (error) { 374 /* 375 * Something bad happened - prevent new 376 * VMs from being created 377 */ 378 vmm_initialized = false; 379 } 380 } 381 break; 382 default: 383 error = 0; 384 break; 385 } 386 return (error); 387 } 388 389 static moduledata_t vmm_kmod = { 390 "vmm", 391 vmm_handler, 392 NULL 393 }; 394 395 /* 396 * vmm initialization has the following dependencies: 397 * 398 * - HYP initialization requires smp_rendezvous() and therefore must happen 399 * after SMP is fully functional (after SI_SUB_SMP). 400 * - vmm device initialization requires an initialized devfs. 401 */ 402 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 403 MODULE_VERSION(vmm, 1); 404 405 static void 406 vm_init(struct vm *vm, bool create) 407 { 408 int i; 409 410 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 411 MPASS(vm->cookie != NULL); 412 413 CPU_ZERO(&vm->active_cpus); 414 CPU_ZERO(&vm->debug_cpus); 415 416 vm->suspend = 0; 417 CPU_ZERO(&vm->suspended_cpus); 418 419 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 420 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 421 422 if (!create) { 423 for (i = 0; i < vm->maxcpus; i++) { 424 if (vm->vcpu[i] != NULL) 425 vcpu_init(vm->vcpu[i]); 426 } 427 } 428 } 429 430 void 431 vm_disable_vcpu_creation(struct vm *vm) 432 { 433 sx_xlock(&vm->vcpus_init_lock); 434 vm->dying = true; 435 sx_xunlock(&vm->vcpus_init_lock); 436 } 437 438 struct vcpu * 439 vm_alloc_vcpu(struct vm *vm, int vcpuid) 440 { 441 struct vcpu *vcpu; 442 443 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 444 return (NULL); 445 446 /* Some interrupt controllers may have a CPU limit */ 447 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 448 return (NULL); 449 450 vcpu = (struct vcpu *) 451 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 452 if (__predict_true(vcpu != NULL)) 453 return (vcpu); 454 455 sx_xlock(&vm->vcpus_init_lock); 456 vcpu = vm->vcpu[vcpuid]; 457 if (vcpu == NULL && !vm->dying) { 458 vcpu = vcpu_alloc(vm, vcpuid); 459 vcpu_init(vcpu); 460 461 /* 462 * Ensure vCPU is fully created before updating pointer 463 * to permit unlocked reads above. 464 */ 465 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 466 (uintptr_t)vcpu); 467 } 468 sx_xunlock(&vm->vcpus_init_lock); 469 return (vcpu); 470 } 471 472 void 473 vm_slock_vcpus(struct vm *vm) 474 { 475 sx_slock(&vm->vcpus_init_lock); 476 } 477 478 void 479 vm_unlock_vcpus(struct vm *vm) 480 { 481 sx_unlock(&vm->vcpus_init_lock); 482 } 483 484 int 485 vm_create(const char *name, struct vm **retvm) 486 { 487 struct vm *vm; 488 struct vmspace *vmspace; 489 490 /* 491 * If vmm.ko could not be successfully initialized then don't attempt 492 * to create the virtual machine. 493 */ 494 if (!vmm_initialized) 495 return (ENXIO); 496 497 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 498 return (EINVAL); 499 500 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 501 if (vmspace == NULL) 502 return (ENOMEM); 503 504 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 505 strcpy(vm->name, name); 506 vm->vmspace = vmspace; 507 vm_mem_init(&vm->mem); 508 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 509 510 vm->sockets = 1; 511 vm->cores = 1; /* XXX backwards compatibility */ 512 vm->threads = 1; /* XXX backwards compatibility */ 513 vm->maxcpus = vm_maxcpu; 514 515 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 516 M_WAITOK | M_ZERO); 517 518 vm_init(vm, true); 519 520 *retvm = vm; 521 return (0); 522 } 523 524 void 525 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 526 uint16_t *threads, uint16_t *maxcpus) 527 { 528 *sockets = vm->sockets; 529 *cores = vm->cores; 530 *threads = vm->threads; 531 *maxcpus = vm->maxcpus; 532 } 533 534 uint16_t 535 vm_get_maxcpus(struct vm *vm) 536 { 537 return (vm->maxcpus); 538 } 539 540 int 541 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 542 uint16_t threads, uint16_t maxcpus) 543 { 544 /* Ignore maxcpus. */ 545 if ((sockets * cores * threads) > vm->maxcpus) 546 return (EINVAL); 547 vm->sockets = sockets; 548 vm->cores = cores; 549 vm->threads = threads; 550 return(0); 551 } 552 553 static void 554 vm_cleanup(struct vm *vm, bool destroy) 555 { 556 pmap_t pmap __diagused; 557 int i; 558 559 if (destroy) { 560 vm_xlock_memsegs(vm); 561 pmap = vmspace_pmap(vm->vmspace); 562 sched_pin(); 563 PCPU_SET(curvmpmap, NULL); 564 sched_unpin(); 565 CPU_FOREACH(i) { 566 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 567 } 568 } else 569 vm_assert_memseg_xlocked(vm); 570 571 572 vgic_detach_from_vm(vm->cookie); 573 574 for (i = 0; i < vm->maxcpus; i++) { 575 if (vm->vcpu[i] != NULL) 576 vcpu_cleanup(vm->vcpu[i], destroy); 577 } 578 579 vmmops_cleanup(vm->cookie); 580 581 vm_mem_cleanup(vm); 582 if (destroy) { 583 vm_mem_destroy(vm); 584 585 vmmops_vmspace_free(vm->vmspace); 586 vm->vmspace = NULL; 587 588 for (i = 0; i < vm->maxcpus; i++) 589 free(vm->vcpu[i], M_VMM); 590 free(vm->vcpu, M_VMM); 591 sx_destroy(&vm->vcpus_init_lock); 592 } 593 } 594 595 void 596 vm_destroy(struct vm *vm) 597 { 598 vm_cleanup(vm, true); 599 free(vm, M_VMM); 600 } 601 602 int 603 vm_reinit(struct vm *vm) 604 { 605 int error; 606 607 /* 608 * A virtual machine can be reset only if all vcpus are suspended. 609 */ 610 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 611 vm_cleanup(vm, false); 612 vm_init(vm, false); 613 error = 0; 614 } else { 615 error = EBUSY; 616 } 617 618 return (error); 619 } 620 621 const char * 622 vm_name(struct vm *vm) 623 { 624 return (vm->name); 625 } 626 627 int 628 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 629 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 630 { 631 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); 632 } 633 634 static int 635 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 636 { 637 *rval = 0; 638 return (0); 639 } 640 641 static int 642 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 643 { 644 *rval = *(uint64_t *)arg; 645 return (0); 646 } 647 648 static int 649 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 650 { 651 return (0); 652 } 653 654 static const struct vmm_special_reg vmm_special_regs[] = { 655 #define SPECIAL_REG(_reg, _read, _write) \ 656 { \ 657 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 658 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 659 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 660 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 661 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 662 .esr_mask = ISS_MSR_REG_MASK, \ 663 .reg_read = (_read), \ 664 .reg_write = (_write), \ 665 .arg = NULL, \ 666 } 667 #define ID_SPECIAL_REG(_reg, _name) \ 668 { \ 669 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 670 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 671 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 672 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 673 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 674 .esr_mask = ISS_MSR_REG_MASK, \ 675 .reg_read = vmm_reg_read_arg, \ 676 .reg_write = vmm_reg_wi, \ 677 .arg = &(vmm_arch_regs._name), \ 678 } 679 680 /* ID registers */ 681 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 682 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 683 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 684 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 685 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 686 687 /* 688 * All other ID registers are read as zero. 689 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 690 */ 691 { 692 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 693 (0 << ISS_MSR_OP1_SHIFT) | 694 (0 << ISS_MSR_CRn_SHIFT) | 695 (0 << ISS_MSR_CRm_SHIFT), 696 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 697 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 698 .reg_read = vmm_reg_raz, 699 .reg_write = vmm_reg_wi, 700 .arg = NULL, 701 }, 702 703 /* Counter physical registers */ 704 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 705 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 706 vtimer_phys_cval_write), 707 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 708 vtimer_phys_tval_write), 709 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 710 #undef SPECIAL_REG 711 }; 712 713 void 714 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 715 reg_read_t reg_read, reg_write_t reg_write, void *arg) 716 { 717 int i; 718 719 for (i = 0; i < nitems(vm->special_reg); i++) { 720 if (vm->special_reg[i].esr_iss == 0 && 721 vm->special_reg[i].esr_mask == 0) { 722 vm->special_reg[i].esr_iss = iss; 723 vm->special_reg[i].esr_mask = mask; 724 vm->special_reg[i].reg_read = reg_read; 725 vm->special_reg[i].reg_write = reg_write; 726 vm->special_reg[i].arg = arg; 727 return; 728 } 729 } 730 731 panic("%s: No free special register slot", __func__); 732 } 733 734 void 735 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 736 { 737 int i; 738 739 for (i = 0; i < nitems(vm->special_reg); i++) { 740 if (vm->special_reg[i].esr_iss == iss && 741 vm->special_reg[i].esr_mask == mask) { 742 memset(&vm->special_reg[i], 0, 743 sizeof(vm->special_reg[i])); 744 return; 745 } 746 } 747 748 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 749 mask); 750 } 751 752 static int 753 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 754 { 755 struct vm *vm; 756 struct vm_exit *vme; 757 struct vre *vre; 758 int i, rv; 759 760 vm = vcpu->vm; 761 vme = &vcpu->exitinfo; 762 vre = &vme->u.reg_emul.vre; 763 764 for (i = 0; i < nitems(vm->special_reg); i++) { 765 if (vm->special_reg[i].esr_iss == 0 && 766 vm->special_reg[i].esr_mask == 0) 767 continue; 768 769 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 770 vm->special_reg[i].esr_iss) { 771 rv = vmm_emulate_register(vcpu, vre, 772 vm->special_reg[i].reg_read, 773 vm->special_reg[i].reg_write, 774 vm->special_reg[i].arg); 775 if (rv == 0) { 776 *retu = false; 777 } 778 return (rv); 779 } 780 } 781 for (i = 0; i < nitems(vmm_special_regs); i++) { 782 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 783 vmm_special_regs[i].esr_iss) { 784 rv = vmm_emulate_register(vcpu, vre, 785 vmm_special_regs[i].reg_read, 786 vmm_special_regs[i].reg_write, 787 vmm_special_regs[i].arg); 788 if (rv == 0) { 789 *retu = false; 790 } 791 return (rv); 792 } 793 } 794 795 796 *retu = true; 797 return (0); 798 } 799 800 void 801 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 802 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 803 { 804 int i; 805 806 for (i = 0; i < nitems(vm->mmio_region); i++) { 807 if (vm->mmio_region[i].start == 0 && 808 vm->mmio_region[i].end == 0) { 809 vm->mmio_region[i].start = start; 810 vm->mmio_region[i].end = start + size; 811 vm->mmio_region[i].read = mmio_read; 812 vm->mmio_region[i].write = mmio_write; 813 return; 814 } 815 } 816 817 panic("%s: No free MMIO region", __func__); 818 } 819 820 void 821 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 822 { 823 int i; 824 825 for (i = 0; i < nitems(vm->mmio_region); i++) { 826 if (vm->mmio_region[i].start == start && 827 vm->mmio_region[i].end == start + size) { 828 memset(&vm->mmio_region[i], 0, 829 sizeof(vm->mmio_region[i])); 830 return; 831 } 832 } 833 834 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 835 start + size); 836 } 837 838 static int 839 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 840 { 841 struct vm *vm; 842 struct vm_exit *vme; 843 struct vie *vie; 844 struct hyp *hyp; 845 uint64_t fault_ipa; 846 struct vm_guest_paging *paging; 847 struct vmm_mmio_region *vmr; 848 int error, i; 849 850 vm = vcpu->vm; 851 hyp = vm->cookie; 852 if (!hyp->vgic_attached) 853 goto out_user; 854 855 vme = &vcpu->exitinfo; 856 vie = &vme->u.inst_emul.vie; 857 paging = &vme->u.inst_emul.paging; 858 859 fault_ipa = vme->u.inst_emul.gpa; 860 861 vmr = NULL; 862 for (i = 0; i < nitems(vm->mmio_region); i++) { 863 if (vm->mmio_region[i].start <= fault_ipa && 864 vm->mmio_region[i].end > fault_ipa) { 865 vmr = &vm->mmio_region[i]; 866 break; 867 } 868 } 869 if (vmr == NULL) 870 goto out_user; 871 872 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 873 vmr->read, vmr->write, retu); 874 return (error); 875 876 out_user: 877 *retu = true; 878 return (0); 879 } 880 881 int 882 vm_suspend(struct vm *vm, enum vm_suspend_how how) 883 { 884 int i; 885 886 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 887 return (EINVAL); 888 889 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 890 VM_CTR2(vm, "virtual machine already suspended %d/%d", 891 vm->suspend, how); 892 return (EALREADY); 893 } 894 895 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 896 897 /* 898 * Notify all active vcpus that they are now suspended. 899 */ 900 for (i = 0; i < vm->maxcpus; i++) { 901 if (CPU_ISSET(i, &vm->active_cpus)) 902 vcpu_notify_event(vm_vcpu(vm, i)); 903 } 904 905 return (0); 906 } 907 908 void 909 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 910 { 911 struct vm *vm = vcpu->vm; 912 struct vm_exit *vmexit; 913 914 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 915 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 916 917 vmexit = vm_exitinfo(vcpu); 918 vmexit->pc = pc; 919 vmexit->inst_length = 4; 920 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 921 vmexit->u.suspended.how = vm->suspend; 922 } 923 924 void 925 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 926 { 927 struct vm_exit *vmexit; 928 929 vmexit = vm_exitinfo(vcpu); 930 vmexit->pc = pc; 931 vmexit->inst_length = 4; 932 vmexit->exitcode = VM_EXITCODE_DEBUG; 933 } 934 935 int 936 vm_activate_cpu(struct vcpu *vcpu) 937 { 938 struct vm *vm = vcpu->vm; 939 940 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 941 return (EBUSY); 942 943 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 944 return (0); 945 946 } 947 948 int 949 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 950 { 951 if (vcpu == NULL) { 952 vm->debug_cpus = vm->active_cpus; 953 for (int i = 0; i < vm->maxcpus; i++) { 954 if (CPU_ISSET(i, &vm->active_cpus)) 955 vcpu_notify_event(vm_vcpu(vm, i)); 956 } 957 } else { 958 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 959 return (EINVAL); 960 961 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 962 vcpu_notify_event(vcpu); 963 } 964 return (0); 965 } 966 967 int 968 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 969 { 970 971 if (vcpu == NULL) { 972 CPU_ZERO(&vm->debug_cpus); 973 } else { 974 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 975 return (EINVAL); 976 977 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 978 } 979 return (0); 980 } 981 982 int 983 vcpu_debugged(struct vcpu *vcpu) 984 { 985 986 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 987 } 988 989 cpuset_t 990 vm_active_cpus(struct vm *vm) 991 { 992 993 return (vm->active_cpus); 994 } 995 996 cpuset_t 997 vm_debug_cpus(struct vm *vm) 998 { 999 1000 return (vm->debug_cpus); 1001 } 1002 1003 cpuset_t 1004 vm_suspended_cpus(struct vm *vm) 1005 { 1006 1007 return (vm->suspended_cpus); 1008 } 1009 1010 1011 void * 1012 vcpu_stats(struct vcpu *vcpu) 1013 { 1014 1015 return (vcpu->stats); 1016 } 1017 1018 /* 1019 * This function is called to ensure that a vcpu "sees" a pending event 1020 * as soon as possible: 1021 * - If the vcpu thread is sleeping then it is woken up. 1022 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1023 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1024 */ 1025 static void 1026 vcpu_notify_event_locked(struct vcpu *vcpu) 1027 { 1028 int hostcpu; 1029 1030 hostcpu = vcpu->hostcpu; 1031 if (vcpu->state == VCPU_RUNNING) { 1032 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1033 if (hostcpu != curcpu) { 1034 ipi_cpu(hostcpu, vmm_ipinum); 1035 } else { 1036 /* 1037 * If the 'vcpu' is running on 'curcpu' then it must 1038 * be sending a notification to itself (e.g. SELF_IPI). 1039 * The pending event will be picked up when the vcpu 1040 * transitions back to guest context. 1041 */ 1042 } 1043 } else { 1044 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1045 "with hostcpu %d", vcpu->state, hostcpu)); 1046 if (vcpu->state == VCPU_SLEEPING) 1047 wakeup_one(vcpu); 1048 } 1049 } 1050 1051 void 1052 vcpu_notify_event(struct vcpu *vcpu) 1053 { 1054 vcpu_lock(vcpu); 1055 vcpu_notify_event_locked(vcpu); 1056 vcpu_unlock(vcpu); 1057 } 1058 1059 struct vmspace * 1060 vm_vmspace(struct vm *vm) 1061 { 1062 return (vm->vmspace); 1063 } 1064 1065 struct vm_mem * 1066 vm_mem(struct vm *vm) 1067 { 1068 return (&vm->mem); 1069 } 1070 1071 static void 1072 restore_guest_fpustate(struct vcpu *vcpu) 1073 { 1074 1075 /* flush host state to the pcb */ 1076 vfp_save_state(curthread, curthread->td_pcb); 1077 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1078 PCPU_SET(fpcurthread, NULL); 1079 1080 /* restore guest FPU state */ 1081 vfp_enable(); 1082 vfp_restore(vcpu->guestfpu); 1083 1084 /* 1085 * The FPU is now "dirty" with the guest's state so turn on emulation 1086 * to trap any access to the FPU by the host. 1087 */ 1088 vfp_disable(); 1089 } 1090 1091 static void 1092 save_guest_fpustate(struct vcpu *vcpu) 1093 { 1094 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1095 CPACR_FPEN_TRAP_ALL1) 1096 panic("VFP not enabled in host!"); 1097 1098 /* save guest FPU state */ 1099 vfp_enable(); 1100 vfp_store(vcpu->guestfpu); 1101 vfp_disable(); 1102 1103 KASSERT(PCPU_GET(fpcurthread) == NULL, 1104 ("%s: fpcurthread set with guest registers", __func__)); 1105 } 1106 static int 1107 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1108 bool from_idle) 1109 { 1110 int error; 1111 1112 vcpu_assert_locked(vcpu); 1113 1114 /* 1115 * State transitions from the vmmdev_ioctl() must always begin from 1116 * the VCPU_IDLE state. This guarantees that there is only a single 1117 * ioctl() operating on a vcpu at any point. 1118 */ 1119 if (from_idle) { 1120 while (vcpu->state != VCPU_IDLE) { 1121 vcpu_notify_event_locked(vcpu); 1122 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1123 } 1124 } else { 1125 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1126 "vcpu idle state")); 1127 } 1128 1129 if (vcpu->state == VCPU_RUNNING) { 1130 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1131 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1132 } else { 1133 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1134 "vcpu that is not running", vcpu->hostcpu)); 1135 } 1136 1137 /* 1138 * The following state transitions are allowed: 1139 * IDLE -> FROZEN -> IDLE 1140 * FROZEN -> RUNNING -> FROZEN 1141 * FROZEN -> SLEEPING -> FROZEN 1142 */ 1143 switch (vcpu->state) { 1144 case VCPU_IDLE: 1145 case VCPU_RUNNING: 1146 case VCPU_SLEEPING: 1147 error = (newstate != VCPU_FROZEN); 1148 break; 1149 case VCPU_FROZEN: 1150 error = (newstate == VCPU_FROZEN); 1151 break; 1152 default: 1153 error = 1; 1154 break; 1155 } 1156 1157 if (error) 1158 return (EBUSY); 1159 1160 vcpu->state = newstate; 1161 if (newstate == VCPU_RUNNING) 1162 vcpu->hostcpu = curcpu; 1163 else 1164 vcpu->hostcpu = NOCPU; 1165 1166 if (newstate == VCPU_IDLE) 1167 wakeup(&vcpu->state); 1168 1169 return (0); 1170 } 1171 1172 static void 1173 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1174 { 1175 int error; 1176 1177 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1178 panic("Error %d setting state to %d\n", error, newstate); 1179 } 1180 1181 static void 1182 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1183 { 1184 int error; 1185 1186 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1187 panic("Error %d setting state to %d", error, newstate); 1188 } 1189 1190 int 1191 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1192 { 1193 if (type < 0 || type >= VM_CAP_MAX) 1194 return (EINVAL); 1195 1196 return (vmmops_getcap(vcpu->cookie, type, retval)); 1197 } 1198 1199 int 1200 vm_set_capability(struct vcpu *vcpu, int type, int val) 1201 { 1202 if (type < 0 || type >= VM_CAP_MAX) 1203 return (EINVAL); 1204 1205 return (vmmops_setcap(vcpu->cookie, type, val)); 1206 } 1207 1208 struct vm * 1209 vcpu_vm(struct vcpu *vcpu) 1210 { 1211 return (vcpu->vm); 1212 } 1213 1214 int 1215 vcpu_vcpuid(struct vcpu *vcpu) 1216 { 1217 return (vcpu->vcpuid); 1218 } 1219 1220 void * 1221 vcpu_get_cookie(struct vcpu *vcpu) 1222 { 1223 return (vcpu->cookie); 1224 } 1225 1226 struct vcpu * 1227 vm_vcpu(struct vm *vm, int vcpuid) 1228 { 1229 return (vm->vcpu[vcpuid]); 1230 } 1231 1232 int 1233 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1234 { 1235 int error; 1236 1237 vcpu_lock(vcpu); 1238 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1239 vcpu_unlock(vcpu); 1240 1241 return (error); 1242 } 1243 1244 enum vcpu_state 1245 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1246 { 1247 enum vcpu_state state; 1248 1249 vcpu_lock(vcpu); 1250 state = vcpu->state; 1251 if (hostcpu != NULL) 1252 *hostcpu = vcpu->hostcpu; 1253 vcpu_unlock(vcpu); 1254 1255 return (state); 1256 } 1257 1258 int 1259 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1260 { 1261 1262 if (reg >= VM_REG_LAST) 1263 return (EINVAL); 1264 1265 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1266 } 1267 1268 int 1269 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1270 { 1271 int error; 1272 1273 if (reg >= VM_REG_LAST) 1274 return (EINVAL); 1275 error = vmmops_setreg(vcpu->cookie, reg, val); 1276 if (error || reg != VM_REG_GUEST_PC) 1277 return (error); 1278 1279 vcpu->nextpc = val; 1280 1281 return (0); 1282 } 1283 1284 void * 1285 vm_get_cookie(struct vm *vm) 1286 { 1287 return (vm->cookie); 1288 } 1289 1290 int 1291 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1292 { 1293 return (vmmops_exception(vcpu->cookie, esr, far)); 1294 } 1295 1296 int 1297 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1298 { 1299 return (vgic_attach_to_vm(vm->cookie, descr)); 1300 } 1301 1302 int 1303 vm_assert_irq(struct vm *vm, uint32_t irq) 1304 { 1305 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1306 } 1307 1308 int 1309 vm_deassert_irq(struct vm *vm, uint32_t irq) 1310 { 1311 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1312 } 1313 1314 int 1315 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1316 int func) 1317 { 1318 /* TODO: Should we raise an SError? */ 1319 return (vgic_inject_msi(vm->cookie, msg, addr)); 1320 } 1321 1322 static int 1323 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1324 { 1325 struct hypctx *hypctx; 1326 int i; 1327 1328 hypctx = vcpu_get_cookie(vcpu); 1329 1330 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1331 return (1); 1332 1333 vme->exitcode = VM_EXITCODE_SMCCC; 1334 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1335 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1336 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1337 1338 *retu = true; 1339 return (0); 1340 } 1341 1342 static int 1343 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1344 { 1345 struct vm *vm; 1346 1347 vm = vcpu->vm; 1348 vcpu_lock(vcpu); 1349 while (1) { 1350 if (vm->suspend) 1351 break; 1352 1353 if (vgic_has_pending_irq(vcpu->cookie)) 1354 break; 1355 1356 if (vcpu_should_yield(vcpu)) 1357 break; 1358 1359 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1360 /* 1361 * XXX msleep_spin() cannot be interrupted by signals so 1362 * wake up periodically to check pending signals. 1363 */ 1364 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1365 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1366 } 1367 vcpu_unlock(vcpu); 1368 1369 *retu = false; 1370 return (0); 1371 } 1372 1373 static int 1374 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1375 { 1376 struct vm *vm = vcpu->vm; 1377 struct vm_exit *vme; 1378 struct vm_map *map; 1379 uint64_t addr, esr; 1380 pmap_t pmap; 1381 int ftype, rv; 1382 1383 vme = &vcpu->exitinfo; 1384 1385 pmap = vmspace_pmap(vcpu->vm->vmspace); 1386 addr = vme->u.paging.gpa; 1387 esr = vme->u.paging.esr; 1388 1389 /* The page exists, but the page table needs to be updated. */ 1390 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1391 return (0); 1392 1393 switch (ESR_ELx_EXCEPTION(esr)) { 1394 case EXCP_INSN_ABORT_L: 1395 case EXCP_DATA_ABORT_L: 1396 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1397 break; 1398 default: 1399 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1400 } 1401 1402 map = &vm->vmspace->vm_map; 1403 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1404 if (rv != KERN_SUCCESS) 1405 return (EFAULT); 1406 1407 return (0); 1408 } 1409 1410 static int 1411 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1412 { 1413 struct vm *vm = vcpu->vm; 1414 int error, i; 1415 struct thread *td; 1416 1417 error = 0; 1418 td = curthread; 1419 1420 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1421 1422 /* 1423 * Wait until all 'active_cpus' have suspended themselves. 1424 * 1425 * Since a VM may be suspended at any time including when one or 1426 * more vcpus are doing a rendezvous we need to call the rendezvous 1427 * handler while we are waiting to prevent a deadlock. 1428 */ 1429 vcpu_lock(vcpu); 1430 while (error == 0) { 1431 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1432 break; 1433 1434 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1435 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1436 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1437 if (td_ast_pending(td, TDA_SUSPEND)) { 1438 vcpu_unlock(vcpu); 1439 error = thread_check_susp(td, false); 1440 vcpu_lock(vcpu); 1441 } 1442 } 1443 vcpu_unlock(vcpu); 1444 1445 /* 1446 * Wakeup the other sleeping vcpus and return to userspace. 1447 */ 1448 for (i = 0; i < vm->maxcpus; i++) { 1449 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1450 vcpu_notify_event(vm_vcpu(vm, i)); 1451 } 1452 } 1453 1454 *retu = true; 1455 return (error); 1456 } 1457 1458 int 1459 vm_run(struct vcpu *vcpu) 1460 { 1461 struct vm *vm = vcpu->vm; 1462 struct vm_eventinfo evinfo; 1463 int error, vcpuid; 1464 struct vm_exit *vme; 1465 bool retu; 1466 pmap_t pmap; 1467 1468 vcpuid = vcpu->vcpuid; 1469 1470 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1471 return (EINVAL); 1472 1473 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1474 return (EINVAL); 1475 1476 pmap = vmspace_pmap(vm->vmspace); 1477 vme = &vcpu->exitinfo; 1478 evinfo.rptr = NULL; 1479 evinfo.sptr = &vm->suspend; 1480 evinfo.iptr = NULL; 1481 restart: 1482 critical_enter(); 1483 1484 restore_guest_fpustate(vcpu); 1485 1486 vcpu_require_state(vcpu, VCPU_RUNNING); 1487 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1488 vcpu_require_state(vcpu, VCPU_FROZEN); 1489 1490 save_guest_fpustate(vcpu); 1491 1492 critical_exit(); 1493 1494 if (error == 0) { 1495 retu = false; 1496 switch (vme->exitcode) { 1497 case VM_EXITCODE_INST_EMUL: 1498 vcpu->nextpc = vme->pc + vme->inst_length; 1499 error = vm_handle_inst_emul(vcpu, &retu); 1500 break; 1501 1502 case VM_EXITCODE_REG_EMUL: 1503 vcpu->nextpc = vme->pc + vme->inst_length; 1504 error = vm_handle_reg_emul(vcpu, &retu); 1505 break; 1506 1507 case VM_EXITCODE_HVC: 1508 /* 1509 * The HVC instruction saves the address for the 1510 * next instruction as the return address. 1511 */ 1512 vcpu->nextpc = vme->pc; 1513 /* 1514 * The PSCI call can change the exit information in the 1515 * case of suspend/reset/poweroff/cpu off/cpu on. 1516 */ 1517 error = vm_handle_smccc_call(vcpu, vme, &retu); 1518 break; 1519 1520 case VM_EXITCODE_WFI: 1521 vcpu->nextpc = vme->pc + vme->inst_length; 1522 error = vm_handle_wfi(vcpu, vme, &retu); 1523 break; 1524 1525 case VM_EXITCODE_PAGING: 1526 vcpu->nextpc = vme->pc; 1527 error = vm_handle_paging(vcpu, &retu); 1528 break; 1529 1530 case VM_EXITCODE_SUSPENDED: 1531 vcpu->nextpc = vme->pc; 1532 error = vm_handle_suspend(vcpu, &retu); 1533 break; 1534 1535 default: 1536 /* Handle in userland */ 1537 vcpu->nextpc = vme->pc; 1538 retu = true; 1539 break; 1540 } 1541 } 1542 1543 if (error == 0 && retu == false) 1544 goto restart; 1545 1546 return (error); 1547 } 1548