1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_mem.h> 69 #include <dev/vmm/vmm_stat.h> 70 71 #include "arm64.h" 72 #include "mmu.h" 73 74 #include "io/vgic.h" 75 #include "io/vtimer.h" 76 77 struct vcpu { 78 int flags; 79 enum vcpu_state state; 80 struct mtx mtx; 81 int hostcpu; /* host cpuid this vcpu last ran on */ 82 int vcpuid; 83 void *stats; 84 struct vm_exit exitinfo; 85 uint64_t nextpc; /* (x) next instruction to execute */ 86 struct vm *vm; /* (o) */ 87 void *cookie; /* (i) cpu-specific data */ 88 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 89 }; 90 91 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 92 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 93 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 94 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 95 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 96 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 97 98 struct vmm_mmio_region { 99 uint64_t start; 100 uint64_t end; 101 mem_region_read_t read; 102 mem_region_write_t write; 103 }; 104 #define VM_MAX_MMIO_REGIONS 4 105 106 struct vmm_special_reg { 107 uint32_t esr_iss; 108 uint32_t esr_mask; 109 reg_read_t reg_read; 110 reg_write_t reg_write; 111 void *arg; 112 }; 113 #define VM_MAX_SPECIAL_REGS 16 114 115 /* 116 * Initialization: 117 * (o) initialized the first time the VM is created 118 * (i) initialized when VM is created and when it is reinitialized 119 * (x) initialized before use 120 */ 121 struct vm { 122 void *cookie; /* (i) cpu-specific data */ 123 volatile cpuset_t active_cpus; /* (i) active vcpus */ 124 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 125 int suspend; /* (i) stop VM execution */ 126 bool dying; /* (o) is dying */ 127 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 128 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 129 struct vmspace *vmspace; /* (o) guest's address space */ 130 struct vm_mem mem; /* (i) guest memory */ 131 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 132 struct vcpu **vcpu; /* (i) guest vcpus */ 133 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 134 /* (o) guest MMIO regions */ 135 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 136 /* The following describe the vm cpu topology */ 137 uint16_t sockets; /* (o) num of sockets */ 138 uint16_t cores; /* (o) num of cores/socket */ 139 uint16_t threads; /* (o) num of threads/core */ 140 uint16_t maxcpus; /* (o) max pluggable cpus */ 141 struct sx vcpus_init_lock; /* (o) */ 142 }; 143 144 static bool vmm_initialized = false; 145 146 static int vm_handle_wfi(struct vcpu *vcpu, 147 struct vm_exit *vme, bool *retu); 148 149 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 150 151 /* statistics */ 152 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 153 154 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 155 156 static int vmm_ipinum; 157 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 158 "IPI vector used for vcpu notifications"); 159 160 struct vmm_regs { 161 uint64_t id_aa64afr0; 162 uint64_t id_aa64afr1; 163 uint64_t id_aa64dfr0; 164 uint64_t id_aa64dfr1; 165 uint64_t id_aa64isar0; 166 uint64_t id_aa64isar1; 167 uint64_t id_aa64isar2; 168 uint64_t id_aa64mmfr0; 169 uint64_t id_aa64mmfr1; 170 uint64_t id_aa64mmfr2; 171 uint64_t id_aa64pfr0; 172 uint64_t id_aa64pfr1; 173 }; 174 175 static const struct vmm_regs vmm_arch_regs_masks = { 176 .id_aa64dfr0 = 177 ID_AA64DFR0_CTX_CMPs_MASK | 178 ID_AA64DFR0_WRPs_MASK | 179 ID_AA64DFR0_BRPs_MASK | 180 ID_AA64DFR0_PMUVer_3 | 181 ID_AA64DFR0_DebugVer_8, 182 .id_aa64isar0 = 183 ID_AA64ISAR0_TLB_TLBIOSR | 184 ID_AA64ISAR0_SHA3_IMPL | 185 ID_AA64ISAR0_RDM_IMPL | 186 ID_AA64ISAR0_Atomic_IMPL | 187 ID_AA64ISAR0_CRC32_BASE | 188 ID_AA64ISAR0_SHA2_512 | 189 ID_AA64ISAR0_SHA1_BASE | 190 ID_AA64ISAR0_AES_PMULL, 191 .id_aa64mmfr0 = 192 ID_AA64MMFR0_TGran4_IMPL | 193 ID_AA64MMFR0_TGran64_IMPL | 194 ID_AA64MMFR0_TGran16_IMPL | 195 ID_AA64MMFR0_ASIDBits_16 | 196 ID_AA64MMFR0_PARange_4P, 197 .id_aa64mmfr1 = 198 ID_AA64MMFR1_SpecSEI_IMPL | 199 ID_AA64MMFR1_PAN_ATS1E1 | 200 ID_AA64MMFR1_HAFDBS_AF, 201 .id_aa64pfr0 = 202 ID_AA64PFR0_GIC_CPUIF_NONE | 203 ID_AA64PFR0_AdvSIMD_HP | 204 ID_AA64PFR0_FP_HP | 205 ID_AA64PFR0_EL3_64 | 206 ID_AA64PFR0_EL2_64 | 207 ID_AA64PFR0_EL1_64 | 208 ID_AA64PFR0_EL0_64, 209 }; 210 211 /* Host registers masked by vmm_arch_regs_masks. */ 212 static struct vmm_regs vmm_arch_regs; 213 214 u_int vm_maxcpu; 215 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 216 &vm_maxcpu, 0, "Maximum number of vCPUs"); 217 218 static void vcpu_notify_event_locked(struct vcpu *vcpu); 219 220 /* global statistics */ 221 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 222 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 223 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 224 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 225 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 226 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 227 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 228 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 229 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 230 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 231 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 232 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 233 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 234 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 235 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 236 237 /* 238 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 239 * is a safe value for now. 240 */ 241 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 242 243 static int 244 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 245 { 246 #define _FETCH_KERN_REG(reg, field) do { \ 247 regs->field = vmm_arch_regs_masks.field; \ 248 if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ 249 regs->field = 0; \ 250 } while (0) 251 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 252 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 253 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 254 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 255 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 256 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 257 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 258 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 259 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 260 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 261 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 262 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 263 #undef _FETCH_KERN_REG 264 return (0); 265 } 266 267 static void 268 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 269 { 270 vmmops_vcpu_cleanup(vcpu->cookie); 271 vcpu->cookie = NULL; 272 if (destroy) { 273 vmm_stat_free(vcpu->stats); 274 fpu_save_area_free(vcpu->guestfpu); 275 vcpu_lock_destroy(vcpu); 276 } 277 } 278 279 static struct vcpu * 280 vcpu_alloc(struct vm *vm, int vcpu_id) 281 { 282 struct vcpu *vcpu; 283 284 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 285 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 286 287 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 288 vcpu_lock_init(vcpu); 289 vcpu->state = VCPU_IDLE; 290 vcpu->hostcpu = NOCPU; 291 vcpu->vcpuid = vcpu_id; 292 vcpu->vm = vm; 293 vcpu->guestfpu = fpu_save_area_alloc(); 294 vcpu->stats = vmm_stat_alloc(); 295 return (vcpu); 296 } 297 298 static void 299 vcpu_init(struct vcpu *vcpu) 300 { 301 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 302 MPASS(vcpu->cookie != NULL); 303 fpu_save_area_reset(vcpu->guestfpu); 304 vmm_stat_init(vcpu->stats); 305 } 306 307 struct vm_exit * 308 vm_exitinfo(struct vcpu *vcpu) 309 { 310 return (&vcpu->exitinfo); 311 } 312 313 static int 314 vmm_init(void) 315 { 316 int error; 317 318 vm_maxcpu = mp_ncpus; 319 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 320 321 if (vm_maxcpu > VM_MAXCPU) { 322 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 323 vm_maxcpu = VM_MAXCPU; 324 } 325 if (vm_maxcpu == 0) 326 vm_maxcpu = 1; 327 328 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 329 if (error != 0) 330 return (error); 331 332 return (vmmops_modinit(0)); 333 } 334 335 static int 336 vmm_handler(module_t mod, int what, void *arg) 337 { 338 int error; 339 340 switch (what) { 341 case MOD_LOAD: 342 error = vmmdev_init(); 343 if (error != 0) 344 break; 345 error = vmm_init(); 346 if (error == 0) 347 vmm_initialized = true; 348 else 349 (void)vmmdev_cleanup(); 350 break; 351 case MOD_UNLOAD: 352 error = vmmdev_cleanup(); 353 if (error == 0 && vmm_initialized) { 354 error = vmmops_modcleanup(); 355 if (error) { 356 /* 357 * Something bad happened - prevent new 358 * VMs from being created 359 */ 360 vmm_initialized = false; 361 } 362 } 363 break; 364 default: 365 error = 0; 366 break; 367 } 368 return (error); 369 } 370 371 static moduledata_t vmm_kmod = { 372 "vmm", 373 vmm_handler, 374 NULL 375 }; 376 377 /* 378 * vmm initialization has the following dependencies: 379 * 380 * - HYP initialization requires smp_rendezvous() and therefore must happen 381 * after SMP is fully functional (after SI_SUB_SMP). 382 * - vmm device initialization requires an initialized devfs. 383 */ 384 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 385 MODULE_VERSION(vmm, 1); 386 387 static void 388 vm_init(struct vm *vm, bool create) 389 { 390 int i; 391 392 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 393 MPASS(vm->cookie != NULL); 394 395 CPU_ZERO(&vm->active_cpus); 396 CPU_ZERO(&vm->debug_cpus); 397 398 vm->suspend = 0; 399 CPU_ZERO(&vm->suspended_cpus); 400 401 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 402 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 403 404 if (!create) { 405 for (i = 0; i < vm->maxcpus; i++) { 406 if (vm->vcpu[i] != NULL) 407 vcpu_init(vm->vcpu[i]); 408 } 409 } 410 } 411 412 void 413 vm_disable_vcpu_creation(struct vm *vm) 414 { 415 sx_xlock(&vm->vcpus_init_lock); 416 vm->dying = true; 417 sx_xunlock(&vm->vcpus_init_lock); 418 } 419 420 struct vcpu * 421 vm_alloc_vcpu(struct vm *vm, int vcpuid) 422 { 423 struct vcpu *vcpu; 424 425 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 426 return (NULL); 427 428 /* Some interrupt controllers may have a CPU limit */ 429 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 430 return (NULL); 431 432 vcpu = (struct vcpu *) 433 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 434 if (__predict_true(vcpu != NULL)) 435 return (vcpu); 436 437 sx_xlock(&vm->vcpus_init_lock); 438 vcpu = vm->vcpu[vcpuid]; 439 if (vcpu == NULL && !vm->dying) { 440 vcpu = vcpu_alloc(vm, vcpuid); 441 vcpu_init(vcpu); 442 443 /* 444 * Ensure vCPU is fully created before updating pointer 445 * to permit unlocked reads above. 446 */ 447 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 448 (uintptr_t)vcpu); 449 } 450 sx_xunlock(&vm->vcpus_init_lock); 451 return (vcpu); 452 } 453 454 void 455 vm_slock_vcpus(struct vm *vm) 456 { 457 sx_slock(&vm->vcpus_init_lock); 458 } 459 460 void 461 vm_unlock_vcpus(struct vm *vm) 462 { 463 sx_unlock(&vm->vcpus_init_lock); 464 } 465 466 int 467 vm_create(const char *name, struct vm **retvm) 468 { 469 struct vm *vm; 470 struct vmspace *vmspace; 471 472 /* 473 * If vmm.ko could not be successfully initialized then don't attempt 474 * to create the virtual machine. 475 */ 476 if (!vmm_initialized) 477 return (ENXIO); 478 479 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 480 return (EINVAL); 481 482 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 483 if (vmspace == NULL) 484 return (ENOMEM); 485 486 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 487 strcpy(vm->name, name); 488 vm->vmspace = vmspace; 489 vm_mem_init(&vm->mem); 490 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 491 492 vm->sockets = 1; 493 vm->cores = 1; /* XXX backwards compatibility */ 494 vm->threads = 1; /* XXX backwards compatibility */ 495 vm->maxcpus = vm_maxcpu; 496 497 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 498 M_WAITOK | M_ZERO); 499 500 vm_init(vm, true); 501 502 *retvm = vm; 503 return (0); 504 } 505 506 void 507 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 508 uint16_t *threads, uint16_t *maxcpus) 509 { 510 *sockets = vm->sockets; 511 *cores = vm->cores; 512 *threads = vm->threads; 513 *maxcpus = vm->maxcpus; 514 } 515 516 uint16_t 517 vm_get_maxcpus(struct vm *vm) 518 { 519 return (vm->maxcpus); 520 } 521 522 int 523 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 524 uint16_t threads, uint16_t maxcpus) 525 { 526 /* Ignore maxcpus. */ 527 if ((sockets * cores * threads) > vm->maxcpus) 528 return (EINVAL); 529 vm->sockets = sockets; 530 vm->cores = cores; 531 vm->threads = threads; 532 return(0); 533 } 534 535 static void 536 vm_cleanup(struct vm *vm, bool destroy) 537 { 538 pmap_t pmap __diagused; 539 int i; 540 541 if (destroy) { 542 vm_xlock_memsegs(vm); 543 pmap = vmspace_pmap(vm->vmspace); 544 sched_pin(); 545 PCPU_SET(curvmpmap, NULL); 546 sched_unpin(); 547 CPU_FOREACH(i) { 548 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 549 } 550 } else 551 vm_assert_memseg_xlocked(vm); 552 553 554 vgic_detach_from_vm(vm->cookie); 555 556 for (i = 0; i < vm->maxcpus; i++) { 557 if (vm->vcpu[i] != NULL) 558 vcpu_cleanup(vm->vcpu[i], destroy); 559 } 560 561 vmmops_cleanup(vm->cookie); 562 563 vm_mem_cleanup(vm); 564 if (destroy) { 565 vm_mem_destroy(vm); 566 567 vmmops_vmspace_free(vm->vmspace); 568 vm->vmspace = NULL; 569 570 for (i = 0; i < vm->maxcpus; i++) 571 free(vm->vcpu[i], M_VMM); 572 free(vm->vcpu, M_VMM); 573 sx_destroy(&vm->vcpus_init_lock); 574 } 575 } 576 577 void 578 vm_destroy(struct vm *vm) 579 { 580 vm_cleanup(vm, true); 581 free(vm, M_VMM); 582 } 583 584 int 585 vm_reinit(struct vm *vm) 586 { 587 int error; 588 589 /* 590 * A virtual machine can be reset only if all vcpus are suspended. 591 */ 592 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 593 vm_cleanup(vm, false); 594 vm_init(vm, false); 595 error = 0; 596 } else { 597 error = EBUSY; 598 } 599 600 return (error); 601 } 602 603 const char * 604 vm_name(struct vm *vm) 605 { 606 return (vm->name); 607 } 608 609 int 610 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 611 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 612 { 613 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); 614 } 615 616 static int 617 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 618 { 619 *rval = 0; 620 return (0); 621 } 622 623 static int 624 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 625 { 626 *rval = *(uint64_t *)arg; 627 return (0); 628 } 629 630 static int 631 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 632 { 633 return (0); 634 } 635 636 static const struct vmm_special_reg vmm_special_regs[] = { 637 #define SPECIAL_REG(_reg, _read, _write) \ 638 { \ 639 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 640 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 641 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 642 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 643 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 644 .esr_mask = ISS_MSR_REG_MASK, \ 645 .reg_read = (_read), \ 646 .reg_write = (_write), \ 647 .arg = NULL, \ 648 } 649 #define ID_SPECIAL_REG(_reg, _name) \ 650 { \ 651 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 652 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 653 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 654 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 655 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 656 .esr_mask = ISS_MSR_REG_MASK, \ 657 .reg_read = vmm_reg_read_arg, \ 658 .reg_write = vmm_reg_wi, \ 659 .arg = &(vmm_arch_regs._name), \ 660 } 661 662 /* ID registers */ 663 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 664 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 665 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 666 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 667 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 668 669 /* 670 * All other ID registers are read as zero. 671 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 672 */ 673 { 674 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 675 (0 << ISS_MSR_OP1_SHIFT) | 676 (0 << ISS_MSR_CRn_SHIFT) | 677 (0 << ISS_MSR_CRm_SHIFT), 678 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 679 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 680 .reg_read = vmm_reg_raz, 681 .reg_write = vmm_reg_wi, 682 .arg = NULL, 683 }, 684 685 /* Counter physical registers */ 686 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 687 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 688 vtimer_phys_cval_write), 689 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 690 vtimer_phys_tval_write), 691 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 692 #undef SPECIAL_REG 693 }; 694 695 void 696 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 697 reg_read_t reg_read, reg_write_t reg_write, void *arg) 698 { 699 int i; 700 701 for (i = 0; i < nitems(vm->special_reg); i++) { 702 if (vm->special_reg[i].esr_iss == 0 && 703 vm->special_reg[i].esr_mask == 0) { 704 vm->special_reg[i].esr_iss = iss; 705 vm->special_reg[i].esr_mask = mask; 706 vm->special_reg[i].reg_read = reg_read; 707 vm->special_reg[i].reg_write = reg_write; 708 vm->special_reg[i].arg = arg; 709 return; 710 } 711 } 712 713 panic("%s: No free special register slot", __func__); 714 } 715 716 void 717 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 718 { 719 int i; 720 721 for (i = 0; i < nitems(vm->special_reg); i++) { 722 if (vm->special_reg[i].esr_iss == iss && 723 vm->special_reg[i].esr_mask == mask) { 724 memset(&vm->special_reg[i], 0, 725 sizeof(vm->special_reg[i])); 726 return; 727 } 728 } 729 730 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 731 mask); 732 } 733 734 static int 735 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 736 { 737 struct vm *vm; 738 struct vm_exit *vme; 739 struct vre *vre; 740 int i, rv; 741 742 vm = vcpu->vm; 743 vme = &vcpu->exitinfo; 744 vre = &vme->u.reg_emul.vre; 745 746 for (i = 0; i < nitems(vm->special_reg); i++) { 747 if (vm->special_reg[i].esr_iss == 0 && 748 vm->special_reg[i].esr_mask == 0) 749 continue; 750 751 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 752 vm->special_reg[i].esr_iss) { 753 rv = vmm_emulate_register(vcpu, vre, 754 vm->special_reg[i].reg_read, 755 vm->special_reg[i].reg_write, 756 vm->special_reg[i].arg); 757 if (rv == 0) { 758 *retu = false; 759 } 760 return (rv); 761 } 762 } 763 for (i = 0; i < nitems(vmm_special_regs); i++) { 764 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 765 vmm_special_regs[i].esr_iss) { 766 rv = vmm_emulate_register(vcpu, vre, 767 vmm_special_regs[i].reg_read, 768 vmm_special_regs[i].reg_write, 769 vmm_special_regs[i].arg); 770 if (rv == 0) { 771 *retu = false; 772 } 773 return (rv); 774 } 775 } 776 777 778 *retu = true; 779 return (0); 780 } 781 782 void 783 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 784 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 785 { 786 int i; 787 788 for (i = 0; i < nitems(vm->mmio_region); i++) { 789 if (vm->mmio_region[i].start == 0 && 790 vm->mmio_region[i].end == 0) { 791 vm->mmio_region[i].start = start; 792 vm->mmio_region[i].end = start + size; 793 vm->mmio_region[i].read = mmio_read; 794 vm->mmio_region[i].write = mmio_write; 795 return; 796 } 797 } 798 799 panic("%s: No free MMIO region", __func__); 800 } 801 802 void 803 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 804 { 805 int i; 806 807 for (i = 0; i < nitems(vm->mmio_region); i++) { 808 if (vm->mmio_region[i].start == start && 809 vm->mmio_region[i].end == start + size) { 810 memset(&vm->mmio_region[i], 0, 811 sizeof(vm->mmio_region[i])); 812 return; 813 } 814 } 815 816 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 817 start + size); 818 } 819 820 static int 821 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 822 { 823 struct vm *vm; 824 struct vm_exit *vme; 825 struct vie *vie; 826 struct hyp *hyp; 827 uint64_t fault_ipa; 828 struct vm_guest_paging *paging; 829 struct vmm_mmio_region *vmr; 830 int error, i; 831 832 vm = vcpu->vm; 833 hyp = vm->cookie; 834 if (!hyp->vgic_attached) 835 goto out_user; 836 837 vme = &vcpu->exitinfo; 838 vie = &vme->u.inst_emul.vie; 839 paging = &vme->u.inst_emul.paging; 840 841 fault_ipa = vme->u.inst_emul.gpa; 842 843 vmr = NULL; 844 for (i = 0; i < nitems(vm->mmio_region); i++) { 845 if (vm->mmio_region[i].start <= fault_ipa && 846 vm->mmio_region[i].end > fault_ipa) { 847 vmr = &vm->mmio_region[i]; 848 break; 849 } 850 } 851 if (vmr == NULL) 852 goto out_user; 853 854 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 855 vmr->read, vmr->write, retu); 856 return (error); 857 858 out_user: 859 *retu = true; 860 return (0); 861 } 862 863 int 864 vm_suspend(struct vm *vm, enum vm_suspend_how how) 865 { 866 int i; 867 868 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 869 return (EINVAL); 870 871 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 872 VM_CTR2(vm, "virtual machine already suspended %d/%d", 873 vm->suspend, how); 874 return (EALREADY); 875 } 876 877 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 878 879 /* 880 * Notify all active vcpus that they are now suspended. 881 */ 882 for (i = 0; i < vm->maxcpus; i++) { 883 if (CPU_ISSET(i, &vm->active_cpus)) 884 vcpu_notify_event(vm_vcpu(vm, i)); 885 } 886 887 return (0); 888 } 889 890 void 891 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 892 { 893 struct vm *vm = vcpu->vm; 894 struct vm_exit *vmexit; 895 896 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 897 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 898 899 vmexit = vm_exitinfo(vcpu); 900 vmexit->pc = pc; 901 vmexit->inst_length = 4; 902 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 903 vmexit->u.suspended.how = vm->suspend; 904 } 905 906 void 907 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 908 { 909 struct vm_exit *vmexit; 910 911 vmexit = vm_exitinfo(vcpu); 912 vmexit->pc = pc; 913 vmexit->inst_length = 4; 914 vmexit->exitcode = VM_EXITCODE_DEBUG; 915 } 916 917 int 918 vm_activate_cpu(struct vcpu *vcpu) 919 { 920 struct vm *vm = vcpu->vm; 921 922 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 923 return (EBUSY); 924 925 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 926 return (0); 927 928 } 929 930 int 931 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 932 { 933 if (vcpu == NULL) { 934 vm->debug_cpus = vm->active_cpus; 935 for (int i = 0; i < vm->maxcpus; i++) { 936 if (CPU_ISSET(i, &vm->active_cpus)) 937 vcpu_notify_event(vm_vcpu(vm, i)); 938 } 939 } else { 940 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 941 return (EINVAL); 942 943 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 944 vcpu_notify_event(vcpu); 945 } 946 return (0); 947 } 948 949 int 950 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 951 { 952 953 if (vcpu == NULL) { 954 CPU_ZERO(&vm->debug_cpus); 955 } else { 956 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 957 return (EINVAL); 958 959 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 960 } 961 return (0); 962 } 963 964 int 965 vcpu_debugged(struct vcpu *vcpu) 966 { 967 968 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 969 } 970 971 cpuset_t 972 vm_active_cpus(struct vm *vm) 973 { 974 975 return (vm->active_cpus); 976 } 977 978 cpuset_t 979 vm_debug_cpus(struct vm *vm) 980 { 981 982 return (vm->debug_cpus); 983 } 984 985 cpuset_t 986 vm_suspended_cpus(struct vm *vm) 987 { 988 989 return (vm->suspended_cpus); 990 } 991 992 993 void * 994 vcpu_stats(struct vcpu *vcpu) 995 { 996 997 return (vcpu->stats); 998 } 999 1000 /* 1001 * This function is called to ensure that a vcpu "sees" a pending event 1002 * as soon as possible: 1003 * - If the vcpu thread is sleeping then it is woken up. 1004 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1005 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1006 */ 1007 static void 1008 vcpu_notify_event_locked(struct vcpu *vcpu) 1009 { 1010 int hostcpu; 1011 1012 hostcpu = vcpu->hostcpu; 1013 if (vcpu->state == VCPU_RUNNING) { 1014 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1015 if (hostcpu != curcpu) { 1016 ipi_cpu(hostcpu, vmm_ipinum); 1017 } else { 1018 /* 1019 * If the 'vcpu' is running on 'curcpu' then it must 1020 * be sending a notification to itself (e.g. SELF_IPI). 1021 * The pending event will be picked up when the vcpu 1022 * transitions back to guest context. 1023 */ 1024 } 1025 } else { 1026 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1027 "with hostcpu %d", vcpu->state, hostcpu)); 1028 if (vcpu->state == VCPU_SLEEPING) 1029 wakeup_one(vcpu); 1030 } 1031 } 1032 1033 void 1034 vcpu_notify_event(struct vcpu *vcpu) 1035 { 1036 vcpu_lock(vcpu); 1037 vcpu_notify_event_locked(vcpu); 1038 vcpu_unlock(vcpu); 1039 } 1040 1041 struct vmspace * 1042 vm_vmspace(struct vm *vm) 1043 { 1044 return (vm->vmspace); 1045 } 1046 1047 struct vm_mem * 1048 vm_mem(struct vm *vm) 1049 { 1050 return (&vm->mem); 1051 } 1052 1053 static void 1054 restore_guest_fpustate(struct vcpu *vcpu) 1055 { 1056 1057 /* flush host state to the pcb */ 1058 vfp_save_state(curthread, curthread->td_pcb); 1059 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1060 PCPU_SET(fpcurthread, NULL); 1061 1062 /* restore guest FPU state */ 1063 vfp_enable(); 1064 vfp_restore(vcpu->guestfpu); 1065 1066 /* 1067 * The FPU is now "dirty" with the guest's state so turn on emulation 1068 * to trap any access to the FPU by the host. 1069 */ 1070 vfp_disable(); 1071 } 1072 1073 static void 1074 save_guest_fpustate(struct vcpu *vcpu) 1075 { 1076 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1077 CPACR_FPEN_TRAP_ALL1) 1078 panic("VFP not enabled in host!"); 1079 1080 /* save guest FPU state */ 1081 vfp_enable(); 1082 vfp_store(vcpu->guestfpu); 1083 vfp_disable(); 1084 1085 KASSERT(PCPU_GET(fpcurthread) == NULL, 1086 ("%s: fpcurthread set with guest registers", __func__)); 1087 } 1088 static int 1089 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1090 bool from_idle) 1091 { 1092 int error; 1093 1094 vcpu_assert_locked(vcpu); 1095 1096 /* 1097 * State transitions from the vmmdev_ioctl() must always begin from 1098 * the VCPU_IDLE state. This guarantees that there is only a single 1099 * ioctl() operating on a vcpu at any point. 1100 */ 1101 if (from_idle) { 1102 while (vcpu->state != VCPU_IDLE) { 1103 vcpu_notify_event_locked(vcpu); 1104 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1105 } 1106 } else { 1107 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1108 "vcpu idle state")); 1109 } 1110 1111 if (vcpu->state == VCPU_RUNNING) { 1112 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1113 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1114 } else { 1115 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1116 "vcpu that is not running", vcpu->hostcpu)); 1117 } 1118 1119 /* 1120 * The following state transitions are allowed: 1121 * IDLE -> FROZEN -> IDLE 1122 * FROZEN -> RUNNING -> FROZEN 1123 * FROZEN -> SLEEPING -> FROZEN 1124 */ 1125 switch (vcpu->state) { 1126 case VCPU_IDLE: 1127 case VCPU_RUNNING: 1128 case VCPU_SLEEPING: 1129 error = (newstate != VCPU_FROZEN); 1130 break; 1131 case VCPU_FROZEN: 1132 error = (newstate == VCPU_FROZEN); 1133 break; 1134 default: 1135 error = 1; 1136 break; 1137 } 1138 1139 if (error) 1140 return (EBUSY); 1141 1142 vcpu->state = newstate; 1143 if (newstate == VCPU_RUNNING) 1144 vcpu->hostcpu = curcpu; 1145 else 1146 vcpu->hostcpu = NOCPU; 1147 1148 if (newstate == VCPU_IDLE) 1149 wakeup(&vcpu->state); 1150 1151 return (0); 1152 } 1153 1154 static void 1155 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1156 { 1157 int error; 1158 1159 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1160 panic("Error %d setting state to %d\n", error, newstate); 1161 } 1162 1163 static void 1164 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1165 { 1166 int error; 1167 1168 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1169 panic("Error %d setting state to %d", error, newstate); 1170 } 1171 1172 int 1173 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1174 { 1175 if (type < 0 || type >= VM_CAP_MAX) 1176 return (EINVAL); 1177 1178 return (vmmops_getcap(vcpu->cookie, type, retval)); 1179 } 1180 1181 int 1182 vm_set_capability(struct vcpu *vcpu, int type, int val) 1183 { 1184 if (type < 0 || type >= VM_CAP_MAX) 1185 return (EINVAL); 1186 1187 return (vmmops_setcap(vcpu->cookie, type, val)); 1188 } 1189 1190 struct vm * 1191 vcpu_vm(struct vcpu *vcpu) 1192 { 1193 return (vcpu->vm); 1194 } 1195 1196 int 1197 vcpu_vcpuid(struct vcpu *vcpu) 1198 { 1199 return (vcpu->vcpuid); 1200 } 1201 1202 void * 1203 vcpu_get_cookie(struct vcpu *vcpu) 1204 { 1205 return (vcpu->cookie); 1206 } 1207 1208 struct vcpu * 1209 vm_vcpu(struct vm *vm, int vcpuid) 1210 { 1211 return (vm->vcpu[vcpuid]); 1212 } 1213 1214 int 1215 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1216 { 1217 int error; 1218 1219 vcpu_lock(vcpu); 1220 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1221 vcpu_unlock(vcpu); 1222 1223 return (error); 1224 } 1225 1226 enum vcpu_state 1227 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1228 { 1229 enum vcpu_state state; 1230 1231 vcpu_lock(vcpu); 1232 state = vcpu->state; 1233 if (hostcpu != NULL) 1234 *hostcpu = vcpu->hostcpu; 1235 vcpu_unlock(vcpu); 1236 1237 return (state); 1238 } 1239 1240 int 1241 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1242 { 1243 1244 if (reg >= VM_REG_LAST) 1245 return (EINVAL); 1246 1247 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1248 } 1249 1250 int 1251 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1252 { 1253 int error; 1254 1255 if (reg >= VM_REG_LAST) 1256 return (EINVAL); 1257 error = vmmops_setreg(vcpu->cookie, reg, val); 1258 if (error || reg != VM_REG_GUEST_PC) 1259 return (error); 1260 1261 vcpu->nextpc = val; 1262 1263 return (0); 1264 } 1265 1266 void * 1267 vm_get_cookie(struct vm *vm) 1268 { 1269 return (vm->cookie); 1270 } 1271 1272 int 1273 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1274 { 1275 return (vmmops_exception(vcpu->cookie, esr, far)); 1276 } 1277 1278 int 1279 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1280 { 1281 return (vgic_attach_to_vm(vm->cookie, descr)); 1282 } 1283 1284 int 1285 vm_assert_irq(struct vm *vm, uint32_t irq) 1286 { 1287 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1288 } 1289 1290 int 1291 vm_deassert_irq(struct vm *vm, uint32_t irq) 1292 { 1293 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1294 } 1295 1296 int 1297 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1298 int func) 1299 { 1300 /* TODO: Should we raise an SError? */ 1301 return (vgic_inject_msi(vm->cookie, msg, addr)); 1302 } 1303 1304 static int 1305 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1306 { 1307 struct hypctx *hypctx; 1308 int i; 1309 1310 hypctx = vcpu_get_cookie(vcpu); 1311 1312 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1313 return (1); 1314 1315 vme->exitcode = VM_EXITCODE_SMCCC; 1316 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1317 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1318 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1319 1320 *retu = true; 1321 return (0); 1322 } 1323 1324 static int 1325 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1326 { 1327 vcpu_lock(vcpu); 1328 while (1) { 1329 if (vgic_has_pending_irq(vcpu->cookie)) 1330 break; 1331 1332 if (vcpu_should_yield(vcpu)) 1333 break; 1334 1335 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1336 /* 1337 * XXX msleep_spin() cannot be interrupted by signals so 1338 * wake up periodically to check pending signals. 1339 */ 1340 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1341 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1342 } 1343 vcpu_unlock(vcpu); 1344 1345 *retu = false; 1346 return (0); 1347 } 1348 1349 static int 1350 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1351 { 1352 struct vm *vm = vcpu->vm; 1353 struct vm_exit *vme; 1354 struct vm_map *map; 1355 uint64_t addr, esr; 1356 pmap_t pmap; 1357 int ftype, rv; 1358 1359 vme = &vcpu->exitinfo; 1360 1361 pmap = vmspace_pmap(vcpu->vm->vmspace); 1362 addr = vme->u.paging.gpa; 1363 esr = vme->u.paging.esr; 1364 1365 /* The page exists, but the page table needs to be updated. */ 1366 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1367 return (0); 1368 1369 switch (ESR_ELx_EXCEPTION(esr)) { 1370 case EXCP_INSN_ABORT_L: 1371 case EXCP_DATA_ABORT_L: 1372 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1373 break; 1374 default: 1375 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1376 } 1377 1378 map = &vm->vmspace->vm_map; 1379 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1380 if (rv != KERN_SUCCESS) 1381 return (EFAULT); 1382 1383 return (0); 1384 } 1385 1386 static int 1387 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1388 { 1389 struct vm *vm = vcpu->vm; 1390 int error, i; 1391 struct thread *td; 1392 1393 error = 0; 1394 td = curthread; 1395 1396 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1397 1398 /* 1399 * Wait until all 'active_cpus' have suspended themselves. 1400 * 1401 * Since a VM may be suspended at any time including when one or 1402 * more vcpus are doing a rendezvous we need to call the rendezvous 1403 * handler while we are waiting to prevent a deadlock. 1404 */ 1405 vcpu_lock(vcpu); 1406 while (error == 0) { 1407 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1408 break; 1409 1410 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1411 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1412 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1413 if (td_ast_pending(td, TDA_SUSPEND)) { 1414 vcpu_unlock(vcpu); 1415 error = thread_check_susp(td, false); 1416 vcpu_lock(vcpu); 1417 } 1418 } 1419 vcpu_unlock(vcpu); 1420 1421 /* 1422 * Wakeup the other sleeping vcpus and return to userspace. 1423 */ 1424 for (i = 0; i < vm->maxcpus; i++) { 1425 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1426 vcpu_notify_event(vm_vcpu(vm, i)); 1427 } 1428 } 1429 1430 *retu = true; 1431 return (error); 1432 } 1433 1434 int 1435 vm_run(struct vcpu *vcpu) 1436 { 1437 struct vm *vm = vcpu->vm; 1438 struct vm_eventinfo evinfo; 1439 int error, vcpuid; 1440 struct vm_exit *vme; 1441 bool retu; 1442 pmap_t pmap; 1443 1444 vcpuid = vcpu->vcpuid; 1445 1446 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1447 return (EINVAL); 1448 1449 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1450 return (EINVAL); 1451 1452 pmap = vmspace_pmap(vm->vmspace); 1453 vme = &vcpu->exitinfo; 1454 evinfo.rptr = NULL; 1455 evinfo.sptr = &vm->suspend; 1456 evinfo.iptr = NULL; 1457 restart: 1458 critical_enter(); 1459 1460 restore_guest_fpustate(vcpu); 1461 1462 vcpu_require_state(vcpu, VCPU_RUNNING); 1463 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1464 vcpu_require_state(vcpu, VCPU_FROZEN); 1465 1466 save_guest_fpustate(vcpu); 1467 1468 critical_exit(); 1469 1470 if (error == 0) { 1471 retu = false; 1472 switch (vme->exitcode) { 1473 case VM_EXITCODE_INST_EMUL: 1474 vcpu->nextpc = vme->pc + vme->inst_length; 1475 error = vm_handle_inst_emul(vcpu, &retu); 1476 break; 1477 1478 case VM_EXITCODE_REG_EMUL: 1479 vcpu->nextpc = vme->pc + vme->inst_length; 1480 error = vm_handle_reg_emul(vcpu, &retu); 1481 break; 1482 1483 case VM_EXITCODE_HVC: 1484 /* 1485 * The HVC instruction saves the address for the 1486 * next instruction as the return address. 1487 */ 1488 vcpu->nextpc = vme->pc; 1489 /* 1490 * The PSCI call can change the exit information in the 1491 * case of suspend/reset/poweroff/cpu off/cpu on. 1492 */ 1493 error = vm_handle_smccc_call(vcpu, vme, &retu); 1494 break; 1495 1496 case VM_EXITCODE_WFI: 1497 vcpu->nextpc = vme->pc + vme->inst_length; 1498 error = vm_handle_wfi(vcpu, vme, &retu); 1499 break; 1500 1501 case VM_EXITCODE_PAGING: 1502 vcpu->nextpc = vme->pc; 1503 error = vm_handle_paging(vcpu, &retu); 1504 break; 1505 1506 case VM_EXITCODE_SUSPENDED: 1507 vcpu->nextpc = vme->pc; 1508 error = vm_handle_suspend(vcpu, &retu); 1509 break; 1510 1511 default: 1512 /* Handle in userland */ 1513 vcpu->nextpc = vme->pc; 1514 retu = true; 1515 break; 1516 } 1517 } 1518 1519 if (error == 0 && retu == false) 1520 goto restart; 1521 1522 return (error); 1523 } 1524