1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_mem.h> 69 #include <dev/vmm/vmm_stat.h> 70 71 #include "arm64.h" 72 #include "mmu.h" 73 74 #include "io/vgic.h" 75 #include "io/vtimer.h" 76 77 struct vcpu { 78 int flags; 79 enum vcpu_state state; 80 struct mtx mtx; 81 int hostcpu; /* host cpuid this vcpu last ran on */ 82 int vcpuid; 83 void *stats; 84 struct vm_exit exitinfo; 85 uint64_t nextpc; /* (x) next instruction to execute */ 86 struct vm *vm; /* (o) */ 87 void *cookie; /* (i) cpu-specific data */ 88 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 89 }; 90 91 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 92 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 93 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 94 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 95 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 96 97 struct vmm_mmio_region { 98 uint64_t start; 99 uint64_t end; 100 mem_region_read_t read; 101 mem_region_write_t write; 102 }; 103 #define VM_MAX_MMIO_REGIONS 4 104 105 struct vmm_special_reg { 106 uint32_t esr_iss; 107 uint32_t esr_mask; 108 reg_read_t reg_read; 109 reg_write_t reg_write; 110 void *arg; 111 }; 112 #define VM_MAX_SPECIAL_REGS 16 113 114 /* 115 * Initialization: 116 * (o) initialized the first time the VM is created 117 * (i) initialized when VM is created and when it is reinitialized 118 * (x) initialized before use 119 */ 120 struct vm { 121 void *cookie; /* (i) cpu-specific data */ 122 volatile cpuset_t active_cpus; /* (i) active vcpus */ 123 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 124 int suspend; /* (i) stop VM execution */ 125 bool dying; /* (o) is dying */ 126 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 127 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 128 struct vm_mem mem; /* (i) guest memory */ 129 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 130 struct vcpu **vcpu; /* (i) guest vcpus */ 131 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 132 /* (o) guest MMIO regions */ 133 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 134 /* The following describe the vm cpu topology */ 135 uint16_t sockets; /* (o) num of sockets */ 136 uint16_t cores; /* (o) num of cores/socket */ 137 uint16_t threads; /* (o) num of threads/core */ 138 uint16_t maxcpus; /* (o) max pluggable cpus */ 139 struct sx vcpus_init_lock; /* (o) */ 140 }; 141 142 static bool vmm_initialized = false; 143 144 static int vm_handle_wfi(struct vcpu *vcpu, 145 struct vm_exit *vme, bool *retu); 146 147 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 148 149 /* statistics */ 150 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 151 152 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 153 154 static int vmm_ipinum; 155 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 156 "IPI vector used for vcpu notifications"); 157 158 struct vmm_regs { 159 uint64_t id_aa64afr0; 160 uint64_t id_aa64afr1; 161 uint64_t id_aa64dfr0; 162 uint64_t id_aa64dfr1; 163 uint64_t id_aa64isar0; 164 uint64_t id_aa64isar1; 165 uint64_t id_aa64isar2; 166 uint64_t id_aa64mmfr0; 167 uint64_t id_aa64mmfr1; 168 uint64_t id_aa64mmfr2; 169 uint64_t id_aa64pfr0; 170 uint64_t id_aa64pfr1; 171 }; 172 173 static const struct vmm_regs vmm_arch_regs_masks = { 174 .id_aa64dfr0 = 175 ID_AA64DFR0_CTX_CMPs_MASK | 176 ID_AA64DFR0_WRPs_MASK | 177 ID_AA64DFR0_BRPs_MASK | 178 ID_AA64DFR0_PMUVer_3 | 179 ID_AA64DFR0_DebugVer_8, 180 .id_aa64isar0 = 181 ID_AA64ISAR0_TLB_TLBIOSR | 182 ID_AA64ISAR0_SHA3_IMPL | 183 ID_AA64ISAR0_RDM_IMPL | 184 ID_AA64ISAR0_Atomic_IMPL | 185 ID_AA64ISAR0_CRC32_BASE | 186 ID_AA64ISAR0_SHA2_512 | 187 ID_AA64ISAR0_SHA1_BASE | 188 ID_AA64ISAR0_AES_PMULL, 189 .id_aa64mmfr0 = 190 ID_AA64MMFR0_TGran4_IMPL | 191 ID_AA64MMFR0_TGran64_IMPL | 192 ID_AA64MMFR0_TGran16_IMPL | 193 ID_AA64MMFR0_ASIDBits_16 | 194 ID_AA64MMFR0_PARange_4P, 195 .id_aa64mmfr1 = 196 ID_AA64MMFR1_SpecSEI_IMPL | 197 ID_AA64MMFR1_PAN_ATS1E1 | 198 ID_AA64MMFR1_HAFDBS_AF, 199 .id_aa64pfr0 = 200 ID_AA64PFR0_GIC_CPUIF_NONE | 201 ID_AA64PFR0_AdvSIMD_HP | 202 ID_AA64PFR0_FP_HP | 203 ID_AA64PFR0_EL3_64 | 204 ID_AA64PFR0_EL2_64 | 205 ID_AA64PFR0_EL1_64 | 206 ID_AA64PFR0_EL0_64, 207 }; 208 209 /* Host registers masked by vmm_arch_regs_masks. */ 210 static struct vmm_regs vmm_arch_regs; 211 212 u_int vm_maxcpu; 213 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 214 &vm_maxcpu, 0, "Maximum number of vCPUs"); 215 216 static void vcpu_notify_event_locked(struct vcpu *vcpu); 217 218 /* global statistics */ 219 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 220 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 221 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 222 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 223 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 224 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 225 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 226 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 227 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 228 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 229 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 230 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 231 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 232 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 233 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 234 235 /* 236 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 237 * is a safe value for now. 238 */ 239 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 240 241 static int 242 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 243 { 244 #define _FETCH_KERN_REG(reg, field) do { \ 245 regs->field = vmm_arch_regs_masks.field; \ 246 if (!get_kernel_reg_iss_masked(reg ## _ISS, ®s->field, \ 247 masks->field)) \ 248 regs->field = 0; \ 249 } while (0) 250 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 251 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 252 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 253 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 254 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 255 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 256 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 257 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 258 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 259 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 260 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 261 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 262 #undef _FETCH_KERN_REG 263 return (0); 264 } 265 266 static void 267 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 268 { 269 vmmops_vcpu_cleanup(vcpu->cookie); 270 vcpu->cookie = NULL; 271 if (destroy) { 272 vmm_stat_free(vcpu->stats); 273 fpu_save_area_free(vcpu->guestfpu); 274 vcpu_lock_destroy(vcpu); 275 free(vcpu, M_VMM); 276 } 277 } 278 279 static struct vcpu * 280 vcpu_alloc(struct vm *vm, int vcpu_id) 281 { 282 struct vcpu *vcpu; 283 284 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 285 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 286 287 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 288 vcpu_lock_init(vcpu); 289 vcpu->state = VCPU_IDLE; 290 vcpu->hostcpu = NOCPU; 291 vcpu->vcpuid = vcpu_id; 292 vcpu->vm = vm; 293 vcpu->guestfpu = fpu_save_area_alloc(); 294 vcpu->stats = vmm_stat_alloc(); 295 return (vcpu); 296 } 297 298 static void 299 vcpu_init(struct vcpu *vcpu) 300 { 301 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 302 MPASS(vcpu->cookie != NULL); 303 fpu_save_area_reset(vcpu->guestfpu); 304 vmm_stat_init(vcpu->stats); 305 } 306 307 struct vm_exit * 308 vm_exitinfo(struct vcpu *vcpu) 309 { 310 return (&vcpu->exitinfo); 311 } 312 313 static int 314 vmm_unsupported_quirk(void) 315 { 316 /* 317 * Known to not load on Ampere eMAG 318 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051 319 */ 320 if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM, 321 CPU_PART_EMAG8180, 0, 0)) 322 return (ENXIO); 323 324 return (0); 325 } 326 327 static int 328 vmm_init(void) 329 { 330 int error; 331 332 vm_maxcpu = mp_ncpus; 333 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 334 335 if (vm_maxcpu > VM_MAXCPU) { 336 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 337 vm_maxcpu = VM_MAXCPU; 338 } 339 if (vm_maxcpu == 0) 340 vm_maxcpu = 1; 341 342 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 343 if (error != 0) 344 return (error); 345 346 return (vmmops_modinit(0)); 347 } 348 349 static int 350 vmm_handler(module_t mod, int what, void *arg) 351 { 352 int error; 353 354 switch (what) { 355 case MOD_LOAD: 356 error = vmm_unsupported_quirk(); 357 if (error != 0) 358 break; 359 error = vmmdev_init(); 360 if (error != 0) 361 break; 362 error = vmm_init(); 363 if (error == 0) 364 vmm_initialized = true; 365 else 366 (void)vmmdev_cleanup(); 367 break; 368 case MOD_UNLOAD: 369 error = vmmdev_cleanup(); 370 if (error == 0 && vmm_initialized) { 371 error = vmmops_modcleanup(); 372 if (error) { 373 /* 374 * Something bad happened - prevent new 375 * VMs from being created 376 */ 377 vmm_initialized = false; 378 } 379 } 380 break; 381 default: 382 error = 0; 383 break; 384 } 385 return (error); 386 } 387 388 static moduledata_t vmm_kmod = { 389 "vmm", 390 vmm_handler, 391 NULL 392 }; 393 394 /* 395 * vmm initialization has the following dependencies: 396 * 397 * - HYP initialization requires smp_rendezvous() and therefore must happen 398 * after SMP is fully functional (after SI_SUB_SMP). 399 * - vmm device initialization requires an initialized devfs. 400 */ 401 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 402 MODULE_VERSION(vmm, 1); 403 404 static void 405 vm_init(struct vm *vm, bool create) 406 { 407 int i; 408 409 vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); 410 MPASS(vm->cookie != NULL); 411 412 CPU_ZERO(&vm->active_cpus); 413 CPU_ZERO(&vm->debug_cpus); 414 415 vm->suspend = 0; 416 CPU_ZERO(&vm->suspended_cpus); 417 418 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 419 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 420 421 if (!create) { 422 for (i = 0; i < vm->maxcpus; i++) { 423 if (vm->vcpu[i] != NULL) 424 vcpu_init(vm->vcpu[i]); 425 } 426 } 427 } 428 429 void 430 vm_disable_vcpu_creation(struct vm *vm) 431 { 432 sx_xlock(&vm->vcpus_init_lock); 433 vm->dying = true; 434 sx_xunlock(&vm->vcpus_init_lock); 435 } 436 437 struct vcpu * 438 vm_alloc_vcpu(struct vm *vm, int vcpuid) 439 { 440 struct vcpu *vcpu; 441 442 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 443 return (NULL); 444 445 /* Some interrupt controllers may have a CPU limit */ 446 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 447 return (NULL); 448 449 vcpu = (struct vcpu *) 450 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 451 if (__predict_true(vcpu != NULL)) 452 return (vcpu); 453 454 sx_xlock(&vm->vcpus_init_lock); 455 vcpu = vm->vcpu[vcpuid]; 456 if (vcpu == NULL && !vm->dying) { 457 vcpu = vcpu_alloc(vm, vcpuid); 458 vcpu_init(vcpu); 459 460 /* 461 * Ensure vCPU is fully created before updating pointer 462 * to permit unlocked reads above. 463 */ 464 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 465 (uintptr_t)vcpu); 466 } 467 sx_xunlock(&vm->vcpus_init_lock); 468 return (vcpu); 469 } 470 471 void 472 vm_slock_vcpus(struct vm *vm) 473 { 474 sx_slock(&vm->vcpus_init_lock); 475 } 476 477 void 478 vm_unlock_vcpus(struct vm *vm) 479 { 480 sx_unlock(&vm->vcpus_init_lock); 481 } 482 483 int 484 vm_create(const char *name, struct vm **retvm) 485 { 486 struct vm *vm; 487 int error; 488 489 /* 490 * If vmm.ko could not be successfully initialized then don't attempt 491 * to create the virtual machine. 492 */ 493 if (!vmm_initialized) 494 return (ENXIO); 495 496 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 497 return (EINVAL); 498 499 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 500 error = vm_mem_init(&vm->mem, 0, 1ul << 39); 501 if (error != 0) { 502 free(vm, M_VMM); 503 return (error); 504 } 505 strcpy(vm->name, name); 506 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 507 508 vm->sockets = 1; 509 vm->cores = 1; /* XXX backwards compatibility */ 510 vm->threads = 1; /* XXX backwards compatibility */ 511 vm->maxcpus = vm_maxcpu; 512 513 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 514 M_WAITOK | M_ZERO); 515 516 vm_init(vm, true); 517 518 *retvm = vm; 519 return (0); 520 } 521 522 void 523 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 524 uint16_t *threads, uint16_t *maxcpus) 525 { 526 *sockets = vm->sockets; 527 *cores = vm->cores; 528 *threads = vm->threads; 529 *maxcpus = vm->maxcpus; 530 } 531 532 uint16_t 533 vm_get_maxcpus(struct vm *vm) 534 { 535 return (vm->maxcpus); 536 } 537 538 int 539 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 540 uint16_t threads, uint16_t maxcpus) 541 { 542 /* Ignore maxcpus. */ 543 if ((sockets * cores * threads) > vm->maxcpus) 544 return (EINVAL); 545 vm->sockets = sockets; 546 vm->cores = cores; 547 vm->threads = threads; 548 return(0); 549 } 550 551 static void 552 vm_cleanup(struct vm *vm, bool destroy) 553 { 554 pmap_t pmap __diagused; 555 int i; 556 557 if (destroy) { 558 vm_xlock_memsegs(vm); 559 pmap = vmspace_pmap(vm_vmspace(vm)); 560 sched_pin(); 561 PCPU_SET(curvmpmap, NULL); 562 sched_unpin(); 563 CPU_FOREACH(i) { 564 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 565 } 566 } else 567 vm_assert_memseg_xlocked(vm); 568 569 570 vgic_detach_from_vm(vm->cookie); 571 572 for (i = 0; i < vm->maxcpus; i++) { 573 if (vm->vcpu[i] != NULL) 574 vcpu_cleanup(vm->vcpu[i], destroy); 575 } 576 577 vmmops_cleanup(vm->cookie); 578 579 vm_mem_cleanup(vm); 580 if (destroy) { 581 vm_mem_destroy(vm); 582 583 free(vm->vcpu, M_VMM); 584 sx_destroy(&vm->vcpus_init_lock); 585 } 586 } 587 588 void 589 vm_destroy(struct vm *vm) 590 { 591 vm_cleanup(vm, true); 592 free(vm, M_VMM); 593 } 594 595 int 596 vm_reinit(struct vm *vm) 597 { 598 int error; 599 600 /* 601 * A virtual machine can be reset only if all vcpus are suspended. 602 */ 603 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 604 vm_cleanup(vm, false); 605 vm_init(vm, false); 606 error = 0; 607 } else { 608 error = EBUSY; 609 } 610 611 return (error); 612 } 613 614 const char * 615 vm_name(struct vm *vm) 616 { 617 return (vm->name); 618 } 619 620 int 621 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 622 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 623 { 624 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); 625 } 626 627 static int 628 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 629 { 630 *rval = 0; 631 return (0); 632 } 633 634 static int 635 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 636 { 637 *rval = *(uint64_t *)arg; 638 return (0); 639 } 640 641 static int 642 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 643 { 644 return (0); 645 } 646 647 static int 648 vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg) 649 { 650 struct hypctx *hypctx; 651 652 hypctx = vcpu_get_cookie(vcpu); 653 /* All other fields are RES0 & we don't do anything with this */ 654 /* TODO: Disable access to other debug state when locked */ 655 hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK; 656 return (0); 657 } 658 659 static int 660 vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg) 661 { 662 struct hypctx *hypctx; 663 uint64_t val; 664 665 hypctx = vcpu_get_cookie(vcpu); 666 val = OSLSR_OSLM_1; 667 if (hypctx->dbg_oslock) 668 val |= OSLSR_OSLK; 669 *rval = val; 670 671 return (0); 672 } 673 674 static const struct vmm_special_reg vmm_special_regs[] = { 675 #define SPECIAL_REG(_reg, _read, _write) \ 676 { \ 677 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 678 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 679 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 680 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 681 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 682 .esr_mask = ISS_MSR_REG_MASK, \ 683 .reg_read = (_read), \ 684 .reg_write = (_write), \ 685 .arg = NULL, \ 686 } 687 #define ID_SPECIAL_REG(_reg, _name) \ 688 { \ 689 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 690 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 691 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 692 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 693 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 694 .esr_mask = ISS_MSR_REG_MASK, \ 695 .reg_read = vmm_reg_read_arg, \ 696 .reg_write = vmm_reg_wi, \ 697 .arg = &(vmm_arch_regs._name), \ 698 } 699 700 /* ID registers */ 701 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 702 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 703 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 704 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 705 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 706 707 /* 708 * All other ID registers are read as zero. 709 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 710 */ 711 { 712 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 713 (0 << ISS_MSR_OP1_SHIFT) | 714 (0 << ISS_MSR_CRn_SHIFT) | 715 (0 << ISS_MSR_CRm_SHIFT), 716 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 717 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 718 .reg_read = vmm_reg_raz, 719 .reg_write = vmm_reg_wi, 720 .arg = NULL, 721 }, 722 723 /* Counter physical registers */ 724 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 725 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 726 vtimer_phys_cval_write), 727 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 728 vtimer_phys_tval_write), 729 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 730 731 /* Debug registers */ 732 SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi), 733 SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi), 734 /* TODO: Exceptions on invalid access */ 735 SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1), 736 SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi), 737 #undef SPECIAL_REG 738 }; 739 740 void 741 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 742 reg_read_t reg_read, reg_write_t reg_write, void *arg) 743 { 744 int i; 745 746 for (i = 0; i < nitems(vm->special_reg); i++) { 747 if (vm->special_reg[i].esr_iss == 0 && 748 vm->special_reg[i].esr_mask == 0) { 749 vm->special_reg[i].esr_iss = iss; 750 vm->special_reg[i].esr_mask = mask; 751 vm->special_reg[i].reg_read = reg_read; 752 vm->special_reg[i].reg_write = reg_write; 753 vm->special_reg[i].arg = arg; 754 return; 755 } 756 } 757 758 panic("%s: No free special register slot", __func__); 759 } 760 761 void 762 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 763 { 764 int i; 765 766 for (i = 0; i < nitems(vm->special_reg); i++) { 767 if (vm->special_reg[i].esr_iss == iss && 768 vm->special_reg[i].esr_mask == mask) { 769 memset(&vm->special_reg[i], 0, 770 sizeof(vm->special_reg[i])); 771 return; 772 } 773 } 774 775 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 776 mask); 777 } 778 779 static int 780 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 781 { 782 struct vm *vm; 783 struct vm_exit *vme; 784 struct vre *vre; 785 int i, rv; 786 787 vm = vcpu->vm; 788 vme = &vcpu->exitinfo; 789 vre = &vme->u.reg_emul.vre; 790 791 for (i = 0; i < nitems(vm->special_reg); i++) { 792 if (vm->special_reg[i].esr_iss == 0 && 793 vm->special_reg[i].esr_mask == 0) 794 continue; 795 796 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 797 vm->special_reg[i].esr_iss) { 798 rv = vmm_emulate_register(vcpu, vre, 799 vm->special_reg[i].reg_read, 800 vm->special_reg[i].reg_write, 801 vm->special_reg[i].arg); 802 if (rv == 0) { 803 *retu = false; 804 } 805 return (rv); 806 } 807 } 808 for (i = 0; i < nitems(vmm_special_regs); i++) { 809 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 810 vmm_special_regs[i].esr_iss) { 811 rv = vmm_emulate_register(vcpu, vre, 812 vmm_special_regs[i].reg_read, 813 vmm_special_regs[i].reg_write, 814 vmm_special_regs[i].arg); 815 if (rv == 0) { 816 *retu = false; 817 } 818 return (rv); 819 } 820 } 821 822 823 *retu = true; 824 return (0); 825 } 826 827 void 828 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 829 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 830 { 831 int i; 832 833 for (i = 0; i < nitems(vm->mmio_region); i++) { 834 if (vm->mmio_region[i].start == 0 && 835 vm->mmio_region[i].end == 0) { 836 vm->mmio_region[i].start = start; 837 vm->mmio_region[i].end = start + size; 838 vm->mmio_region[i].read = mmio_read; 839 vm->mmio_region[i].write = mmio_write; 840 return; 841 } 842 } 843 844 panic("%s: No free MMIO region", __func__); 845 } 846 847 void 848 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 849 { 850 int i; 851 852 for (i = 0; i < nitems(vm->mmio_region); i++) { 853 if (vm->mmio_region[i].start == start && 854 vm->mmio_region[i].end == start + size) { 855 memset(&vm->mmio_region[i], 0, 856 sizeof(vm->mmio_region[i])); 857 return; 858 } 859 } 860 861 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 862 start + size); 863 } 864 865 static int 866 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 867 { 868 struct vm *vm; 869 struct vm_exit *vme; 870 struct vie *vie; 871 struct hyp *hyp; 872 uint64_t fault_ipa; 873 struct vm_guest_paging *paging; 874 struct vmm_mmio_region *vmr; 875 int error, i; 876 877 vm = vcpu->vm; 878 hyp = vm->cookie; 879 if (!hyp->vgic_attached) 880 goto out_user; 881 882 vme = &vcpu->exitinfo; 883 vie = &vme->u.inst_emul.vie; 884 paging = &vme->u.inst_emul.paging; 885 886 fault_ipa = vme->u.inst_emul.gpa; 887 888 vmr = NULL; 889 for (i = 0; i < nitems(vm->mmio_region); i++) { 890 if (vm->mmio_region[i].start <= fault_ipa && 891 vm->mmio_region[i].end > fault_ipa) { 892 vmr = &vm->mmio_region[i]; 893 break; 894 } 895 } 896 if (vmr == NULL) 897 goto out_user; 898 899 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 900 vmr->read, vmr->write, retu); 901 return (error); 902 903 out_user: 904 *retu = true; 905 return (0); 906 } 907 908 int 909 vm_suspend(struct vm *vm, enum vm_suspend_how how) 910 { 911 int i; 912 913 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 914 return (EINVAL); 915 916 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 917 VM_CTR2(vm, "virtual machine already suspended %d/%d", 918 vm->suspend, how); 919 return (EALREADY); 920 } 921 922 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 923 924 /* 925 * Notify all active vcpus that they are now suspended. 926 */ 927 for (i = 0; i < vm->maxcpus; i++) { 928 if (CPU_ISSET(i, &vm->active_cpus)) 929 vcpu_notify_event(vm_vcpu(vm, i)); 930 } 931 932 return (0); 933 } 934 935 void 936 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 937 { 938 struct vm *vm = vcpu->vm; 939 struct vm_exit *vmexit; 940 941 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 942 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 943 944 vmexit = vm_exitinfo(vcpu); 945 vmexit->pc = pc; 946 vmexit->inst_length = 4; 947 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 948 vmexit->u.suspended.how = vm->suspend; 949 } 950 951 void 952 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 953 { 954 struct vm_exit *vmexit; 955 956 vmexit = vm_exitinfo(vcpu); 957 vmexit->pc = pc; 958 vmexit->inst_length = 4; 959 vmexit->exitcode = VM_EXITCODE_DEBUG; 960 } 961 962 int 963 vm_activate_cpu(struct vcpu *vcpu) 964 { 965 struct vm *vm = vcpu->vm; 966 967 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 968 return (EBUSY); 969 970 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 971 return (0); 972 973 } 974 975 int 976 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 977 { 978 if (vcpu == NULL) { 979 vm->debug_cpus = vm->active_cpus; 980 for (int i = 0; i < vm->maxcpus; i++) { 981 if (CPU_ISSET(i, &vm->active_cpus)) 982 vcpu_notify_event(vm_vcpu(vm, i)); 983 } 984 } else { 985 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 986 return (EINVAL); 987 988 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 989 vcpu_notify_event(vcpu); 990 } 991 return (0); 992 } 993 994 int 995 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 996 { 997 998 if (vcpu == NULL) { 999 CPU_ZERO(&vm->debug_cpus); 1000 } else { 1001 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1002 return (EINVAL); 1003 1004 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1005 } 1006 return (0); 1007 } 1008 1009 int 1010 vcpu_debugged(struct vcpu *vcpu) 1011 { 1012 1013 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1014 } 1015 1016 cpuset_t 1017 vm_active_cpus(struct vm *vm) 1018 { 1019 1020 return (vm->active_cpus); 1021 } 1022 1023 cpuset_t 1024 vm_debug_cpus(struct vm *vm) 1025 { 1026 1027 return (vm->debug_cpus); 1028 } 1029 1030 cpuset_t 1031 vm_suspended_cpus(struct vm *vm) 1032 { 1033 1034 return (vm->suspended_cpus); 1035 } 1036 1037 1038 void * 1039 vcpu_stats(struct vcpu *vcpu) 1040 { 1041 1042 return (vcpu->stats); 1043 } 1044 1045 /* 1046 * This function is called to ensure that a vcpu "sees" a pending event 1047 * as soon as possible: 1048 * - If the vcpu thread is sleeping then it is woken up. 1049 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1050 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1051 */ 1052 static void 1053 vcpu_notify_event_locked(struct vcpu *vcpu) 1054 { 1055 int hostcpu; 1056 1057 hostcpu = vcpu->hostcpu; 1058 if (vcpu->state == VCPU_RUNNING) { 1059 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1060 if (hostcpu != curcpu) { 1061 ipi_cpu(hostcpu, vmm_ipinum); 1062 } else { 1063 /* 1064 * If the 'vcpu' is running on 'curcpu' then it must 1065 * be sending a notification to itself (e.g. SELF_IPI). 1066 * The pending event will be picked up when the vcpu 1067 * transitions back to guest context. 1068 */ 1069 } 1070 } else { 1071 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1072 "with hostcpu %d", vcpu->state, hostcpu)); 1073 if (vcpu->state == VCPU_SLEEPING) 1074 wakeup_one(vcpu); 1075 } 1076 } 1077 1078 void 1079 vcpu_notify_event(struct vcpu *vcpu) 1080 { 1081 vcpu_lock(vcpu); 1082 vcpu_notify_event_locked(vcpu); 1083 vcpu_unlock(vcpu); 1084 } 1085 1086 struct vm_mem * 1087 vm_mem(struct vm *vm) 1088 { 1089 return (&vm->mem); 1090 } 1091 1092 static void 1093 restore_guest_fpustate(struct vcpu *vcpu) 1094 { 1095 1096 /* flush host state to the pcb */ 1097 vfp_save_state(curthread, curthread->td_pcb); 1098 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1099 PCPU_SET(fpcurthread, NULL); 1100 1101 /* restore guest FPU state */ 1102 vfp_enable(); 1103 vfp_restore(vcpu->guestfpu); 1104 1105 /* 1106 * The FPU is now "dirty" with the guest's state so turn on emulation 1107 * to trap any access to the FPU by the host. 1108 */ 1109 vfp_disable(); 1110 } 1111 1112 static void 1113 save_guest_fpustate(struct vcpu *vcpu) 1114 { 1115 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1116 CPACR_FPEN_TRAP_ALL1) 1117 panic("VFP not enabled in host!"); 1118 1119 /* save guest FPU state */ 1120 vfp_enable(); 1121 vfp_store(vcpu->guestfpu); 1122 vfp_disable(); 1123 1124 KASSERT(PCPU_GET(fpcurthread) == NULL, 1125 ("%s: fpcurthread set with guest registers", __func__)); 1126 } 1127 static int 1128 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1129 bool from_idle) 1130 { 1131 int error; 1132 1133 vcpu_assert_locked(vcpu); 1134 1135 /* 1136 * State transitions from the vmmdev_ioctl() must always begin from 1137 * the VCPU_IDLE state. This guarantees that there is only a single 1138 * ioctl() operating on a vcpu at any point. 1139 */ 1140 if (from_idle) { 1141 while (vcpu->state != VCPU_IDLE) { 1142 vcpu_notify_event_locked(vcpu); 1143 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1144 } 1145 } else { 1146 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1147 "vcpu idle state")); 1148 } 1149 1150 if (vcpu->state == VCPU_RUNNING) { 1151 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1152 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1153 } else { 1154 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1155 "vcpu that is not running", vcpu->hostcpu)); 1156 } 1157 1158 /* 1159 * The following state transitions are allowed: 1160 * IDLE -> FROZEN -> IDLE 1161 * FROZEN -> RUNNING -> FROZEN 1162 * FROZEN -> SLEEPING -> FROZEN 1163 */ 1164 switch (vcpu->state) { 1165 case VCPU_IDLE: 1166 case VCPU_RUNNING: 1167 case VCPU_SLEEPING: 1168 error = (newstate != VCPU_FROZEN); 1169 break; 1170 case VCPU_FROZEN: 1171 error = (newstate == VCPU_FROZEN); 1172 break; 1173 default: 1174 error = 1; 1175 break; 1176 } 1177 1178 if (error) 1179 return (EBUSY); 1180 1181 vcpu->state = newstate; 1182 if (newstate == VCPU_RUNNING) 1183 vcpu->hostcpu = curcpu; 1184 else 1185 vcpu->hostcpu = NOCPU; 1186 1187 if (newstate == VCPU_IDLE) 1188 wakeup(&vcpu->state); 1189 1190 return (0); 1191 } 1192 1193 static void 1194 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1195 { 1196 int error; 1197 1198 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1199 panic("Error %d setting state to %d\n", error, newstate); 1200 } 1201 1202 static void 1203 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1204 { 1205 int error; 1206 1207 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1208 panic("Error %d setting state to %d", error, newstate); 1209 } 1210 1211 int 1212 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1213 { 1214 if (type < 0 || type >= VM_CAP_MAX) 1215 return (EINVAL); 1216 1217 return (vmmops_getcap(vcpu->cookie, type, retval)); 1218 } 1219 1220 int 1221 vm_set_capability(struct vcpu *vcpu, int type, int val) 1222 { 1223 if (type < 0 || type >= VM_CAP_MAX) 1224 return (EINVAL); 1225 1226 return (vmmops_setcap(vcpu->cookie, type, val)); 1227 } 1228 1229 struct vm * 1230 vcpu_vm(struct vcpu *vcpu) 1231 { 1232 return (vcpu->vm); 1233 } 1234 1235 int 1236 vcpu_vcpuid(struct vcpu *vcpu) 1237 { 1238 return (vcpu->vcpuid); 1239 } 1240 1241 void * 1242 vcpu_get_cookie(struct vcpu *vcpu) 1243 { 1244 return (vcpu->cookie); 1245 } 1246 1247 struct vcpu * 1248 vm_vcpu(struct vm *vm, int vcpuid) 1249 { 1250 return (vm->vcpu[vcpuid]); 1251 } 1252 1253 int 1254 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1255 { 1256 int error; 1257 1258 vcpu_lock(vcpu); 1259 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1260 vcpu_unlock(vcpu); 1261 1262 return (error); 1263 } 1264 1265 enum vcpu_state 1266 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1267 { 1268 enum vcpu_state state; 1269 1270 vcpu_lock(vcpu); 1271 state = vcpu->state; 1272 if (hostcpu != NULL) 1273 *hostcpu = vcpu->hostcpu; 1274 vcpu_unlock(vcpu); 1275 1276 return (state); 1277 } 1278 1279 int 1280 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1281 { 1282 1283 if (reg >= VM_REG_LAST) 1284 return (EINVAL); 1285 1286 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1287 } 1288 1289 int 1290 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1291 { 1292 int error; 1293 1294 if (reg >= VM_REG_LAST) 1295 return (EINVAL); 1296 error = vmmops_setreg(vcpu->cookie, reg, val); 1297 if (error || reg != VM_REG_GUEST_PC) 1298 return (error); 1299 1300 vcpu->nextpc = val; 1301 1302 return (0); 1303 } 1304 1305 void * 1306 vm_get_cookie(struct vm *vm) 1307 { 1308 return (vm->cookie); 1309 } 1310 1311 int 1312 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1313 { 1314 return (vmmops_exception(vcpu->cookie, esr, far)); 1315 } 1316 1317 int 1318 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1319 { 1320 return (vgic_attach_to_vm(vm->cookie, descr)); 1321 } 1322 1323 int 1324 vm_assert_irq(struct vm *vm, uint32_t irq) 1325 { 1326 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1327 } 1328 1329 int 1330 vm_deassert_irq(struct vm *vm, uint32_t irq) 1331 { 1332 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1333 } 1334 1335 int 1336 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1337 int func) 1338 { 1339 /* TODO: Should we raise an SError? */ 1340 return (vgic_inject_msi(vm->cookie, msg, addr)); 1341 } 1342 1343 static int 1344 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1345 { 1346 struct hypctx *hypctx; 1347 int i; 1348 1349 hypctx = vcpu_get_cookie(vcpu); 1350 1351 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1352 return (1); 1353 1354 vme->exitcode = VM_EXITCODE_SMCCC; 1355 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1356 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1357 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1358 1359 *retu = true; 1360 return (0); 1361 } 1362 1363 static int 1364 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1365 { 1366 struct vm *vm; 1367 1368 vm = vcpu->vm; 1369 vcpu_lock(vcpu); 1370 while (1) { 1371 if (vm->suspend) 1372 break; 1373 1374 if (vgic_has_pending_irq(vcpu->cookie)) 1375 break; 1376 1377 if (vcpu_should_yield(vcpu)) 1378 break; 1379 1380 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1381 /* 1382 * XXX msleep_spin() cannot be interrupted by signals so 1383 * wake up periodically to check pending signals. 1384 */ 1385 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1386 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1387 } 1388 vcpu_unlock(vcpu); 1389 1390 *retu = false; 1391 return (0); 1392 } 1393 1394 static int 1395 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1396 { 1397 struct vm *vm = vcpu->vm; 1398 struct vm_exit *vme; 1399 struct vm_map *map; 1400 uint64_t addr, esr; 1401 pmap_t pmap; 1402 int ftype, rv; 1403 1404 vme = &vcpu->exitinfo; 1405 1406 pmap = vmspace_pmap(vm_vmspace(vcpu->vm)); 1407 addr = vme->u.paging.gpa; 1408 esr = vme->u.paging.esr; 1409 1410 /* The page exists, but the page table needs to be updated. */ 1411 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1412 return (0); 1413 1414 switch (ESR_ELx_EXCEPTION(esr)) { 1415 case EXCP_INSN_ABORT_L: 1416 case EXCP_DATA_ABORT_L: 1417 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1418 break; 1419 default: 1420 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1421 } 1422 1423 map = &vm_vmspace(vm)->vm_map; 1424 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1425 if (rv != KERN_SUCCESS) 1426 return (EFAULT); 1427 1428 return (0); 1429 } 1430 1431 static int 1432 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1433 { 1434 struct vm *vm = vcpu->vm; 1435 int error, i; 1436 struct thread *td; 1437 1438 error = 0; 1439 td = curthread; 1440 1441 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1442 1443 /* 1444 * Wait until all 'active_cpus' have suspended themselves. 1445 * 1446 * Since a VM may be suspended at any time including when one or 1447 * more vcpus are doing a rendezvous we need to call the rendezvous 1448 * handler while we are waiting to prevent a deadlock. 1449 */ 1450 vcpu_lock(vcpu); 1451 while (error == 0) { 1452 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1453 break; 1454 1455 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1456 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1457 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1458 if (td_ast_pending(td, TDA_SUSPEND)) { 1459 vcpu_unlock(vcpu); 1460 error = thread_check_susp(td, false); 1461 vcpu_lock(vcpu); 1462 } 1463 } 1464 vcpu_unlock(vcpu); 1465 1466 /* 1467 * Wakeup the other sleeping vcpus and return to userspace. 1468 */ 1469 for (i = 0; i < vm->maxcpus; i++) { 1470 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1471 vcpu_notify_event(vm_vcpu(vm, i)); 1472 } 1473 } 1474 1475 *retu = true; 1476 return (error); 1477 } 1478 1479 int 1480 vm_run(struct vcpu *vcpu) 1481 { 1482 struct vm *vm = vcpu->vm; 1483 struct vm_eventinfo evinfo; 1484 int error, vcpuid; 1485 struct vm_exit *vme; 1486 bool retu; 1487 pmap_t pmap; 1488 1489 vcpuid = vcpu->vcpuid; 1490 1491 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1492 return (EINVAL); 1493 1494 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1495 return (EINVAL); 1496 1497 pmap = vmspace_pmap(vm_vmspace(vm)); 1498 vme = &vcpu->exitinfo; 1499 evinfo.rptr = NULL; 1500 evinfo.sptr = &vm->suspend; 1501 evinfo.iptr = NULL; 1502 restart: 1503 critical_enter(); 1504 1505 restore_guest_fpustate(vcpu); 1506 1507 vcpu_require_state(vcpu, VCPU_RUNNING); 1508 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1509 vcpu_require_state(vcpu, VCPU_FROZEN); 1510 1511 save_guest_fpustate(vcpu); 1512 1513 critical_exit(); 1514 1515 if (error == 0) { 1516 retu = false; 1517 switch (vme->exitcode) { 1518 case VM_EXITCODE_INST_EMUL: 1519 vcpu->nextpc = vme->pc + vme->inst_length; 1520 error = vm_handle_inst_emul(vcpu, &retu); 1521 break; 1522 1523 case VM_EXITCODE_REG_EMUL: 1524 vcpu->nextpc = vme->pc + vme->inst_length; 1525 error = vm_handle_reg_emul(vcpu, &retu); 1526 break; 1527 1528 case VM_EXITCODE_HVC: 1529 /* 1530 * The HVC instruction saves the address for the 1531 * next instruction as the return address. 1532 */ 1533 vcpu->nextpc = vme->pc; 1534 /* 1535 * The PSCI call can change the exit information in the 1536 * case of suspend/reset/poweroff/cpu off/cpu on. 1537 */ 1538 error = vm_handle_smccc_call(vcpu, vme, &retu); 1539 break; 1540 1541 case VM_EXITCODE_WFI: 1542 vcpu->nextpc = vme->pc + vme->inst_length; 1543 error = vm_handle_wfi(vcpu, vme, &retu); 1544 break; 1545 1546 case VM_EXITCODE_PAGING: 1547 vcpu->nextpc = vme->pc; 1548 error = vm_handle_paging(vcpu, &retu); 1549 break; 1550 1551 case VM_EXITCODE_SUSPENDED: 1552 vcpu->nextpc = vme->pc; 1553 error = vm_handle_suspend(vcpu, &retu); 1554 break; 1555 1556 default: 1557 /* Handle in userland */ 1558 vcpu->nextpc = vme->pc; 1559 retu = true; 1560 break; 1561 } 1562 } 1563 1564 if (error == 0 && retu == false) 1565 goto restart; 1566 1567 return (error); 1568 } 1569