1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/cpu.h> 55 #include <machine/fpu.h> 56 #include <machine/machdep.h> 57 #include <machine/pcb.h> 58 #include <machine/smp.h> 59 #include <machine/vm.h> 60 #include <machine/vmparam.h> 61 #include <machine/vmm.h> 62 #include <machine/vmm_instruction_emul.h> 63 64 #include <dev/pci/pcireg.h> 65 #include <dev/vmm/vmm_dev.h> 66 #include <dev/vmm/vmm_ktr.h> 67 #include <dev/vmm/vmm_mem.h> 68 #include <dev/vmm/vmm_stat.h> 69 70 #include "arm64.h" 71 #include "mmu.h" 72 73 #include "io/vgic.h" 74 #include "io/vtimer.h" 75 76 struct vcpu { 77 int flags; 78 enum vcpu_state state; 79 struct mtx mtx; 80 int hostcpu; /* host cpuid this vcpu last ran on */ 81 int vcpuid; 82 void *stats; 83 struct vm_exit exitinfo; 84 uint64_t nextpc; /* (x) next instruction to execute */ 85 struct vm *vm; /* (o) */ 86 void *cookie; /* (i) cpu-specific data */ 87 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 88 }; 89 90 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 91 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 92 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 93 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 94 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 95 96 struct vmm_mmio_region { 97 uint64_t start; 98 uint64_t end; 99 mem_region_read_t read; 100 mem_region_write_t write; 101 }; 102 #define VM_MAX_MMIO_REGIONS 4 103 104 struct vmm_special_reg { 105 uint32_t esr_iss; 106 uint32_t esr_mask; 107 reg_read_t reg_read; 108 reg_write_t reg_write; 109 void *arg; 110 }; 111 #define VM_MAX_SPECIAL_REGS 16 112 113 /* 114 * Initialization: 115 * (o) initialized the first time the VM is created 116 * (i) initialized when VM is created and when it is reinitialized 117 * (x) initialized before use 118 */ 119 struct vm { 120 void *cookie; /* (i) cpu-specific data */ 121 volatile cpuset_t active_cpus; /* (i) active vcpus */ 122 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 123 int suspend; /* (i) stop VM execution */ 124 bool dying; /* (o) is dying */ 125 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 126 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 127 struct vm_mem mem; /* (i) guest memory */ 128 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 129 struct vcpu **vcpu; /* (i) guest vcpus */ 130 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 131 /* (o) guest MMIO regions */ 132 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 133 /* The following describe the vm cpu topology */ 134 uint16_t sockets; /* (o) num of sockets */ 135 uint16_t cores; /* (o) num of cores/socket */ 136 uint16_t threads; /* (o) num of threads/core */ 137 uint16_t maxcpus; /* (o) max pluggable cpus */ 138 struct sx vcpus_init_lock; /* (o) */ 139 }; 140 141 static bool vmm_initialized = false; 142 143 static int vm_handle_wfi(struct vcpu *vcpu, 144 struct vm_exit *vme, bool *retu); 145 146 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 147 148 /* statistics */ 149 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 150 151 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 152 153 static int vmm_ipinum; 154 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 155 "IPI vector used for vcpu notifications"); 156 157 struct vmm_regs { 158 uint64_t id_aa64afr0; 159 uint64_t id_aa64afr1; 160 uint64_t id_aa64dfr0; 161 uint64_t id_aa64dfr1; 162 uint64_t id_aa64isar0; 163 uint64_t id_aa64isar1; 164 uint64_t id_aa64isar2; 165 uint64_t id_aa64mmfr0; 166 uint64_t id_aa64mmfr1; 167 uint64_t id_aa64mmfr2; 168 uint64_t id_aa64pfr0; 169 uint64_t id_aa64pfr1; 170 }; 171 172 static const struct vmm_regs vmm_arch_regs_masks = { 173 .id_aa64dfr0 = 174 ID_AA64DFR0_CTX_CMPs_MASK | 175 ID_AA64DFR0_WRPs_MASK | 176 ID_AA64DFR0_BRPs_MASK | 177 ID_AA64DFR0_PMUVer_3 | 178 ID_AA64DFR0_DebugVer_8, 179 .id_aa64isar0 = 180 ID_AA64ISAR0_TLB_TLBIOSR | 181 ID_AA64ISAR0_SHA3_IMPL | 182 ID_AA64ISAR0_RDM_IMPL | 183 ID_AA64ISAR0_Atomic_IMPL | 184 ID_AA64ISAR0_CRC32_BASE | 185 ID_AA64ISAR0_SHA2_512 | 186 ID_AA64ISAR0_SHA1_BASE | 187 ID_AA64ISAR0_AES_PMULL, 188 .id_aa64mmfr0 = 189 ID_AA64MMFR0_TGran4_IMPL | 190 ID_AA64MMFR0_TGran64_IMPL | 191 ID_AA64MMFR0_TGran16_IMPL | 192 ID_AA64MMFR0_ASIDBits_16 | 193 ID_AA64MMFR0_PARange_4P, 194 .id_aa64mmfr1 = 195 ID_AA64MMFR1_SpecSEI_IMPL | 196 ID_AA64MMFR1_PAN_ATS1E1 | 197 ID_AA64MMFR1_HAFDBS_AF, 198 .id_aa64pfr0 = 199 ID_AA64PFR0_GIC_CPUIF_NONE | 200 ID_AA64PFR0_AdvSIMD_HP | 201 ID_AA64PFR0_FP_HP | 202 ID_AA64PFR0_EL3_64 | 203 ID_AA64PFR0_EL2_64 | 204 ID_AA64PFR0_EL1_64 | 205 ID_AA64PFR0_EL0_64, 206 }; 207 208 /* Host registers masked by vmm_arch_regs_masks. */ 209 static struct vmm_regs vmm_arch_regs; 210 211 u_int vm_maxcpu; 212 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 213 &vm_maxcpu, 0, "Maximum number of vCPUs"); 214 215 static void vcpu_notify_event_locked(struct vcpu *vcpu); 216 217 /* global statistics */ 218 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 219 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 220 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 221 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 222 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 223 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 224 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 225 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 226 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 227 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 228 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 229 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 230 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 231 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 232 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 233 234 /* 235 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 236 * is a safe value for now. 237 */ 238 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 239 240 static int 241 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 242 { 243 #define _FETCH_KERN_REG(reg, field) do { \ 244 regs->field = vmm_arch_regs_masks.field; \ 245 if (!get_kernel_reg_iss_masked(reg ## _ISS, ®s->field, \ 246 masks->field)) \ 247 regs->field = 0; \ 248 } while (0) 249 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 250 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 251 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 252 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 253 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 254 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 255 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 256 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 257 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 258 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 259 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 260 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 261 #undef _FETCH_KERN_REG 262 return (0); 263 } 264 265 static void 266 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 267 { 268 vmmops_vcpu_cleanup(vcpu->cookie); 269 vcpu->cookie = NULL; 270 if (destroy) { 271 vmm_stat_free(vcpu->stats); 272 fpu_save_area_free(vcpu->guestfpu); 273 vcpu_lock_destroy(vcpu); 274 free(vcpu, M_VMM); 275 } 276 } 277 278 static struct vcpu * 279 vcpu_alloc(struct vm *vm, int vcpu_id) 280 { 281 struct vcpu *vcpu; 282 283 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 284 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 285 286 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 287 vcpu_lock_init(vcpu); 288 vcpu->state = VCPU_IDLE; 289 vcpu->hostcpu = NOCPU; 290 vcpu->vcpuid = vcpu_id; 291 vcpu->vm = vm; 292 vcpu->guestfpu = fpu_save_area_alloc(); 293 vcpu->stats = vmm_stat_alloc(); 294 return (vcpu); 295 } 296 297 static void 298 vcpu_init(struct vcpu *vcpu) 299 { 300 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 301 MPASS(vcpu->cookie != NULL); 302 fpu_save_area_reset(vcpu->guestfpu); 303 vmm_stat_init(vcpu->stats); 304 } 305 306 struct vm_exit * 307 vm_exitinfo(struct vcpu *vcpu) 308 { 309 return (&vcpu->exitinfo); 310 } 311 312 static int 313 vmm_unsupported_quirk(void) 314 { 315 /* 316 * Known to not load on Ampere eMAG 317 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051 318 */ 319 if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM, 320 CPU_PART_EMAG8180, 0, 0)) 321 return (ENXIO); 322 323 return (0); 324 } 325 326 static int 327 vmm_init(void) 328 { 329 int error; 330 331 vm_maxcpu = mp_ncpus; 332 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 333 334 if (vm_maxcpu > VM_MAXCPU) { 335 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 336 vm_maxcpu = VM_MAXCPU; 337 } 338 if (vm_maxcpu == 0) 339 vm_maxcpu = 1; 340 341 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 342 if (error != 0) 343 return (error); 344 345 return (vmmops_modinit(0)); 346 } 347 348 static int 349 vmm_handler(module_t mod, int what, void *arg) 350 { 351 int error; 352 353 switch (what) { 354 case MOD_LOAD: 355 error = vmm_unsupported_quirk(); 356 if (error != 0) 357 break; 358 error = vmmdev_init(); 359 if (error != 0) 360 break; 361 error = vmm_init(); 362 if (error == 0) 363 vmm_initialized = true; 364 else 365 (void)vmmdev_cleanup(); 366 break; 367 case MOD_UNLOAD: 368 error = vmmdev_cleanup(); 369 if (error == 0 && vmm_initialized) { 370 error = vmmops_modcleanup(); 371 if (error) { 372 /* 373 * Something bad happened - prevent new 374 * VMs from being created 375 */ 376 vmm_initialized = false; 377 } 378 } 379 break; 380 default: 381 error = 0; 382 break; 383 } 384 return (error); 385 } 386 387 static moduledata_t vmm_kmod = { 388 "vmm", 389 vmm_handler, 390 NULL 391 }; 392 393 /* 394 * vmm initialization has the following dependencies: 395 * 396 * - HYP initialization requires smp_rendezvous() and therefore must happen 397 * after SMP is fully functional (after SI_SUB_SMP). 398 * - vmm device initialization requires an initialized devfs. 399 */ 400 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 401 MODULE_VERSION(vmm, 1); 402 403 static void 404 vm_init(struct vm *vm, bool create) 405 { 406 int i; 407 408 vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); 409 MPASS(vm->cookie != NULL); 410 411 CPU_ZERO(&vm->active_cpus); 412 CPU_ZERO(&vm->debug_cpus); 413 414 vm->suspend = 0; 415 CPU_ZERO(&vm->suspended_cpus); 416 417 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 418 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 419 420 if (!create) { 421 for (i = 0; i < vm->maxcpus; i++) { 422 if (vm->vcpu[i] != NULL) 423 vcpu_init(vm->vcpu[i]); 424 } 425 } 426 } 427 428 void 429 vm_disable_vcpu_creation(struct vm *vm) 430 { 431 sx_xlock(&vm->vcpus_init_lock); 432 vm->dying = true; 433 sx_xunlock(&vm->vcpus_init_lock); 434 } 435 436 struct vcpu * 437 vm_alloc_vcpu(struct vm *vm, int vcpuid) 438 { 439 struct vcpu *vcpu; 440 441 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 442 return (NULL); 443 444 /* Some interrupt controllers may have a CPU limit */ 445 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 446 return (NULL); 447 448 vcpu = (struct vcpu *) 449 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 450 if (__predict_true(vcpu != NULL)) 451 return (vcpu); 452 453 sx_xlock(&vm->vcpus_init_lock); 454 vcpu = vm->vcpu[vcpuid]; 455 if (vcpu == NULL && !vm->dying) { 456 vcpu = vcpu_alloc(vm, vcpuid); 457 vcpu_init(vcpu); 458 459 /* 460 * Ensure vCPU is fully created before updating pointer 461 * to permit unlocked reads above. 462 */ 463 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 464 (uintptr_t)vcpu); 465 } 466 sx_xunlock(&vm->vcpus_init_lock); 467 return (vcpu); 468 } 469 470 void 471 vm_lock_vcpus(struct vm *vm) 472 { 473 sx_xlock(&vm->vcpus_init_lock); 474 } 475 476 void 477 vm_unlock_vcpus(struct vm *vm) 478 { 479 sx_unlock(&vm->vcpus_init_lock); 480 } 481 482 int 483 vm_create(const char *name, struct vm **retvm) 484 { 485 struct vm *vm; 486 int error; 487 488 /* 489 * If vmm.ko could not be successfully initialized then don't attempt 490 * to create the virtual machine. 491 */ 492 if (!vmm_initialized) 493 return (ENXIO); 494 495 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 496 return (EINVAL); 497 498 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 499 error = vm_mem_init(&vm->mem, 0, 1ul << 39); 500 if (error != 0) { 501 free(vm, M_VMM); 502 return (error); 503 } 504 strcpy(vm->name, name); 505 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 506 507 vm->sockets = 1; 508 vm->cores = 1; /* XXX backwards compatibility */ 509 vm->threads = 1; /* XXX backwards compatibility */ 510 vm->maxcpus = vm_maxcpu; 511 512 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 513 M_WAITOK | M_ZERO); 514 515 vm_init(vm, true); 516 517 *retvm = vm; 518 return (0); 519 } 520 521 void 522 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 523 uint16_t *threads, uint16_t *maxcpus) 524 { 525 *sockets = vm->sockets; 526 *cores = vm->cores; 527 *threads = vm->threads; 528 *maxcpus = vm->maxcpus; 529 } 530 531 uint16_t 532 vm_get_maxcpus(struct vm *vm) 533 { 534 return (vm->maxcpus); 535 } 536 537 int 538 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 539 uint16_t threads, uint16_t maxcpus) 540 { 541 /* Ignore maxcpus. */ 542 if ((sockets * cores * threads) > vm->maxcpus) 543 return (EINVAL); 544 vm->sockets = sockets; 545 vm->cores = cores; 546 vm->threads = threads; 547 return(0); 548 } 549 550 static void 551 vm_cleanup(struct vm *vm, bool destroy) 552 { 553 pmap_t pmap __diagused; 554 int i; 555 556 if (destroy) { 557 vm_xlock_memsegs(vm); 558 pmap = vmspace_pmap(vm_vmspace(vm)); 559 sched_pin(); 560 PCPU_SET(curvmpmap, NULL); 561 sched_unpin(); 562 CPU_FOREACH(i) { 563 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 564 } 565 } else 566 vm_assert_memseg_xlocked(vm); 567 568 569 vgic_detach_from_vm(vm->cookie); 570 571 for (i = 0; i < vm->maxcpus; i++) { 572 if (vm->vcpu[i] != NULL) 573 vcpu_cleanup(vm->vcpu[i], destroy); 574 } 575 576 vmmops_cleanup(vm->cookie); 577 578 vm_mem_cleanup(vm); 579 if (destroy) { 580 vm_mem_destroy(vm); 581 582 free(vm->vcpu, M_VMM); 583 sx_destroy(&vm->vcpus_init_lock); 584 } 585 } 586 587 void 588 vm_destroy(struct vm *vm) 589 { 590 vm_cleanup(vm, true); 591 free(vm, M_VMM); 592 } 593 594 int 595 vm_reinit(struct vm *vm) 596 { 597 int error; 598 599 /* 600 * A virtual machine can be reset only if all vcpus are suspended. 601 */ 602 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 603 vm_cleanup(vm, false); 604 vm_init(vm, false); 605 error = 0; 606 } else { 607 error = EBUSY; 608 } 609 610 return (error); 611 } 612 613 const char * 614 vm_name(struct vm *vm) 615 { 616 return (vm->name); 617 } 618 619 int 620 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 621 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 622 { 623 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); 624 } 625 626 static int 627 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 628 { 629 *rval = 0; 630 return (0); 631 } 632 633 static int 634 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 635 { 636 *rval = *(uint64_t *)arg; 637 return (0); 638 } 639 640 static int 641 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 642 { 643 return (0); 644 } 645 646 static int 647 vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg) 648 { 649 struct hypctx *hypctx; 650 651 hypctx = vcpu_get_cookie(vcpu); 652 /* All other fields are RES0 & we don't do anything with this */ 653 /* TODO: Disable access to other debug state when locked */ 654 hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK; 655 return (0); 656 } 657 658 static int 659 vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg) 660 { 661 struct hypctx *hypctx; 662 uint64_t val; 663 664 hypctx = vcpu_get_cookie(vcpu); 665 val = OSLSR_OSLM_1; 666 if (hypctx->dbg_oslock) 667 val |= OSLSR_OSLK; 668 *rval = val; 669 670 return (0); 671 } 672 673 static const struct vmm_special_reg vmm_special_regs[] = { 674 #define SPECIAL_REG(_reg, _read, _write) \ 675 { \ 676 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 677 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 678 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 679 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 680 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 681 .esr_mask = ISS_MSR_REG_MASK, \ 682 .reg_read = (_read), \ 683 .reg_write = (_write), \ 684 .arg = NULL, \ 685 } 686 #define ID_SPECIAL_REG(_reg, _name) \ 687 { \ 688 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 689 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 690 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 691 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 692 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 693 .esr_mask = ISS_MSR_REG_MASK, \ 694 .reg_read = vmm_reg_read_arg, \ 695 .reg_write = vmm_reg_wi, \ 696 .arg = &(vmm_arch_regs._name), \ 697 } 698 699 /* ID registers */ 700 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 701 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 702 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 703 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 704 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 705 706 /* 707 * All other ID registers are read as zero. 708 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 709 */ 710 { 711 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 712 (0 << ISS_MSR_OP1_SHIFT) | 713 (0 << ISS_MSR_CRn_SHIFT) | 714 (0 << ISS_MSR_CRm_SHIFT), 715 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 716 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 717 .reg_read = vmm_reg_raz, 718 .reg_write = vmm_reg_wi, 719 .arg = NULL, 720 }, 721 722 /* Counter physical registers */ 723 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 724 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 725 vtimer_phys_cval_write), 726 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 727 vtimer_phys_tval_write), 728 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 729 730 /* Debug registers */ 731 SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi), 732 SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi), 733 /* TODO: Exceptions on invalid access */ 734 SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1), 735 SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi), 736 #undef SPECIAL_REG 737 }; 738 739 void 740 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 741 reg_read_t reg_read, reg_write_t reg_write, void *arg) 742 { 743 int i; 744 745 for (i = 0; i < nitems(vm->special_reg); i++) { 746 if (vm->special_reg[i].esr_iss == 0 && 747 vm->special_reg[i].esr_mask == 0) { 748 vm->special_reg[i].esr_iss = iss; 749 vm->special_reg[i].esr_mask = mask; 750 vm->special_reg[i].reg_read = reg_read; 751 vm->special_reg[i].reg_write = reg_write; 752 vm->special_reg[i].arg = arg; 753 return; 754 } 755 } 756 757 panic("%s: No free special register slot", __func__); 758 } 759 760 void 761 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 762 { 763 int i; 764 765 for (i = 0; i < nitems(vm->special_reg); i++) { 766 if (vm->special_reg[i].esr_iss == iss && 767 vm->special_reg[i].esr_mask == mask) { 768 memset(&vm->special_reg[i], 0, 769 sizeof(vm->special_reg[i])); 770 return; 771 } 772 } 773 774 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 775 mask); 776 } 777 778 static int 779 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 780 { 781 struct vm *vm; 782 struct vm_exit *vme; 783 struct vre *vre; 784 int i, rv; 785 786 vm = vcpu->vm; 787 vme = &vcpu->exitinfo; 788 vre = &vme->u.reg_emul.vre; 789 790 for (i = 0; i < nitems(vm->special_reg); i++) { 791 if (vm->special_reg[i].esr_iss == 0 && 792 vm->special_reg[i].esr_mask == 0) 793 continue; 794 795 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 796 vm->special_reg[i].esr_iss) { 797 rv = vmm_emulate_register(vcpu, vre, 798 vm->special_reg[i].reg_read, 799 vm->special_reg[i].reg_write, 800 vm->special_reg[i].arg); 801 if (rv == 0) { 802 *retu = false; 803 } 804 return (rv); 805 } 806 } 807 for (i = 0; i < nitems(vmm_special_regs); i++) { 808 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 809 vmm_special_regs[i].esr_iss) { 810 rv = vmm_emulate_register(vcpu, vre, 811 vmm_special_regs[i].reg_read, 812 vmm_special_regs[i].reg_write, 813 vmm_special_regs[i].arg); 814 if (rv == 0) { 815 *retu = false; 816 } 817 return (rv); 818 } 819 } 820 821 822 *retu = true; 823 return (0); 824 } 825 826 void 827 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 828 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 829 { 830 int i; 831 832 for (i = 0; i < nitems(vm->mmio_region); i++) { 833 if (vm->mmio_region[i].start == 0 && 834 vm->mmio_region[i].end == 0) { 835 vm->mmio_region[i].start = start; 836 vm->mmio_region[i].end = start + size; 837 vm->mmio_region[i].read = mmio_read; 838 vm->mmio_region[i].write = mmio_write; 839 return; 840 } 841 } 842 843 panic("%s: No free MMIO region", __func__); 844 } 845 846 void 847 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 848 { 849 int i; 850 851 for (i = 0; i < nitems(vm->mmio_region); i++) { 852 if (vm->mmio_region[i].start == start && 853 vm->mmio_region[i].end == start + size) { 854 memset(&vm->mmio_region[i], 0, 855 sizeof(vm->mmio_region[i])); 856 return; 857 } 858 } 859 860 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 861 start + size); 862 } 863 864 static int 865 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 866 { 867 struct vm *vm; 868 struct vm_exit *vme; 869 struct vie *vie; 870 struct hyp *hyp; 871 uint64_t fault_ipa; 872 struct vm_guest_paging *paging; 873 struct vmm_mmio_region *vmr; 874 int error, i; 875 876 vm = vcpu->vm; 877 hyp = vm->cookie; 878 if (!hyp->vgic_attached) 879 goto out_user; 880 881 vme = &vcpu->exitinfo; 882 vie = &vme->u.inst_emul.vie; 883 paging = &vme->u.inst_emul.paging; 884 885 fault_ipa = vme->u.inst_emul.gpa; 886 887 vmr = NULL; 888 for (i = 0; i < nitems(vm->mmio_region); i++) { 889 if (vm->mmio_region[i].start <= fault_ipa && 890 vm->mmio_region[i].end > fault_ipa) { 891 vmr = &vm->mmio_region[i]; 892 break; 893 } 894 } 895 if (vmr == NULL) 896 goto out_user; 897 898 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 899 vmr->read, vmr->write, retu); 900 return (error); 901 902 out_user: 903 *retu = true; 904 return (0); 905 } 906 907 int 908 vm_suspend(struct vm *vm, enum vm_suspend_how how) 909 { 910 int i; 911 912 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 913 return (EINVAL); 914 915 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 916 VM_CTR2(vm, "virtual machine already suspended %d/%d", 917 vm->suspend, how); 918 return (EALREADY); 919 } 920 921 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 922 923 /* 924 * Notify all active vcpus that they are now suspended. 925 */ 926 for (i = 0; i < vm->maxcpus; i++) { 927 if (CPU_ISSET(i, &vm->active_cpus)) 928 vcpu_notify_event(vm_vcpu(vm, i)); 929 } 930 931 return (0); 932 } 933 934 void 935 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 936 { 937 struct vm *vm = vcpu->vm; 938 struct vm_exit *vmexit; 939 940 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 941 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 942 943 vmexit = vm_exitinfo(vcpu); 944 vmexit->pc = pc; 945 vmexit->inst_length = 4; 946 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 947 vmexit->u.suspended.how = vm->suspend; 948 } 949 950 void 951 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 952 { 953 struct vm_exit *vmexit; 954 955 vmexit = vm_exitinfo(vcpu); 956 vmexit->pc = pc; 957 vmexit->inst_length = 4; 958 vmexit->exitcode = VM_EXITCODE_DEBUG; 959 } 960 961 int 962 vm_activate_cpu(struct vcpu *vcpu) 963 { 964 struct vm *vm = vcpu->vm; 965 966 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 967 return (EBUSY); 968 969 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 970 return (0); 971 972 } 973 974 int 975 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 976 { 977 if (vcpu == NULL) { 978 vm->debug_cpus = vm->active_cpus; 979 for (int i = 0; i < vm->maxcpus; i++) { 980 if (CPU_ISSET(i, &vm->active_cpus)) 981 vcpu_notify_event(vm_vcpu(vm, i)); 982 } 983 } else { 984 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 985 return (EINVAL); 986 987 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 988 vcpu_notify_event(vcpu); 989 } 990 return (0); 991 } 992 993 int 994 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 995 { 996 997 if (vcpu == NULL) { 998 CPU_ZERO(&vm->debug_cpus); 999 } else { 1000 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1001 return (EINVAL); 1002 1003 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1004 } 1005 return (0); 1006 } 1007 1008 int 1009 vcpu_debugged(struct vcpu *vcpu) 1010 { 1011 1012 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1013 } 1014 1015 cpuset_t 1016 vm_active_cpus(struct vm *vm) 1017 { 1018 1019 return (vm->active_cpus); 1020 } 1021 1022 cpuset_t 1023 vm_debug_cpus(struct vm *vm) 1024 { 1025 1026 return (vm->debug_cpus); 1027 } 1028 1029 cpuset_t 1030 vm_suspended_cpus(struct vm *vm) 1031 { 1032 1033 return (vm->suspended_cpus); 1034 } 1035 1036 1037 void * 1038 vcpu_stats(struct vcpu *vcpu) 1039 { 1040 1041 return (vcpu->stats); 1042 } 1043 1044 /* 1045 * This function is called to ensure that a vcpu "sees" a pending event 1046 * as soon as possible: 1047 * - If the vcpu thread is sleeping then it is woken up. 1048 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1049 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1050 */ 1051 static void 1052 vcpu_notify_event_locked(struct vcpu *vcpu) 1053 { 1054 int hostcpu; 1055 1056 hostcpu = vcpu->hostcpu; 1057 if (vcpu->state == VCPU_RUNNING) { 1058 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1059 if (hostcpu != curcpu) { 1060 ipi_cpu(hostcpu, vmm_ipinum); 1061 } else { 1062 /* 1063 * If the 'vcpu' is running on 'curcpu' then it must 1064 * be sending a notification to itself (e.g. SELF_IPI). 1065 * The pending event will be picked up when the vcpu 1066 * transitions back to guest context. 1067 */ 1068 } 1069 } else { 1070 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1071 "with hostcpu %d", vcpu->state, hostcpu)); 1072 if (vcpu->state == VCPU_SLEEPING) 1073 wakeup_one(vcpu); 1074 } 1075 } 1076 1077 void 1078 vcpu_notify_event(struct vcpu *vcpu) 1079 { 1080 vcpu_lock(vcpu); 1081 vcpu_notify_event_locked(vcpu); 1082 vcpu_unlock(vcpu); 1083 } 1084 1085 struct vm_mem * 1086 vm_mem(struct vm *vm) 1087 { 1088 return (&vm->mem); 1089 } 1090 1091 static void 1092 restore_guest_fpustate(struct vcpu *vcpu) 1093 { 1094 1095 /* flush host state to the pcb */ 1096 vfp_save_state(curthread, curthread->td_pcb); 1097 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1098 PCPU_SET(fpcurthread, NULL); 1099 1100 /* restore guest FPU state */ 1101 vfp_enable(); 1102 vfp_restore(vcpu->guestfpu); 1103 1104 /* 1105 * The FPU is now "dirty" with the guest's state so turn on emulation 1106 * to trap any access to the FPU by the host. 1107 */ 1108 vfp_disable(); 1109 } 1110 1111 static void 1112 save_guest_fpustate(struct vcpu *vcpu) 1113 { 1114 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1115 CPACR_FPEN_TRAP_ALL1) 1116 panic("VFP not enabled in host!"); 1117 1118 /* save guest FPU state */ 1119 vfp_enable(); 1120 vfp_store(vcpu->guestfpu); 1121 vfp_disable(); 1122 1123 KASSERT(PCPU_GET(fpcurthread) == NULL, 1124 ("%s: fpcurthread set with guest registers", __func__)); 1125 } 1126 static int 1127 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1128 bool from_idle) 1129 { 1130 int error; 1131 1132 vcpu_assert_locked(vcpu); 1133 1134 /* 1135 * State transitions from the vmmdev_ioctl() must always begin from 1136 * the VCPU_IDLE state. This guarantees that there is only a single 1137 * ioctl() operating on a vcpu at any point. 1138 */ 1139 if (from_idle) { 1140 while (vcpu->state != VCPU_IDLE) { 1141 vcpu_notify_event_locked(vcpu); 1142 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1143 } 1144 } else { 1145 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1146 "vcpu idle state")); 1147 } 1148 1149 if (vcpu->state == VCPU_RUNNING) { 1150 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1151 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1152 } else { 1153 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1154 "vcpu that is not running", vcpu->hostcpu)); 1155 } 1156 1157 /* 1158 * The following state transitions are allowed: 1159 * IDLE -> FROZEN -> IDLE 1160 * FROZEN -> RUNNING -> FROZEN 1161 * FROZEN -> SLEEPING -> FROZEN 1162 */ 1163 switch (vcpu->state) { 1164 case VCPU_IDLE: 1165 case VCPU_RUNNING: 1166 case VCPU_SLEEPING: 1167 error = (newstate != VCPU_FROZEN); 1168 break; 1169 case VCPU_FROZEN: 1170 error = (newstate == VCPU_FROZEN); 1171 break; 1172 default: 1173 error = 1; 1174 break; 1175 } 1176 1177 if (error) 1178 return (EBUSY); 1179 1180 vcpu->state = newstate; 1181 if (newstate == VCPU_RUNNING) 1182 vcpu->hostcpu = curcpu; 1183 else 1184 vcpu->hostcpu = NOCPU; 1185 1186 if (newstate == VCPU_IDLE) 1187 wakeup(&vcpu->state); 1188 1189 return (0); 1190 } 1191 1192 static void 1193 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1194 { 1195 int error; 1196 1197 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1198 panic("Error %d setting state to %d\n", error, newstate); 1199 } 1200 1201 static void 1202 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1203 { 1204 int error; 1205 1206 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1207 panic("Error %d setting state to %d", error, newstate); 1208 } 1209 1210 int 1211 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1212 { 1213 if (type < 0 || type >= VM_CAP_MAX) 1214 return (EINVAL); 1215 1216 return (vmmops_getcap(vcpu->cookie, type, retval)); 1217 } 1218 1219 int 1220 vm_set_capability(struct vcpu *vcpu, int type, int val) 1221 { 1222 if (type < 0 || type >= VM_CAP_MAX) 1223 return (EINVAL); 1224 1225 return (vmmops_setcap(vcpu->cookie, type, val)); 1226 } 1227 1228 struct vm * 1229 vcpu_vm(struct vcpu *vcpu) 1230 { 1231 return (vcpu->vm); 1232 } 1233 1234 int 1235 vcpu_vcpuid(struct vcpu *vcpu) 1236 { 1237 return (vcpu->vcpuid); 1238 } 1239 1240 void * 1241 vcpu_get_cookie(struct vcpu *vcpu) 1242 { 1243 return (vcpu->cookie); 1244 } 1245 1246 struct vcpu * 1247 vm_vcpu(struct vm *vm, int vcpuid) 1248 { 1249 return (vm->vcpu[vcpuid]); 1250 } 1251 1252 int 1253 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1254 { 1255 int error; 1256 1257 vcpu_lock(vcpu); 1258 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1259 vcpu_unlock(vcpu); 1260 1261 return (error); 1262 } 1263 1264 enum vcpu_state 1265 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1266 { 1267 enum vcpu_state state; 1268 1269 vcpu_lock(vcpu); 1270 state = vcpu->state; 1271 if (hostcpu != NULL) 1272 *hostcpu = vcpu->hostcpu; 1273 vcpu_unlock(vcpu); 1274 1275 return (state); 1276 } 1277 1278 int 1279 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1280 { 1281 if (reg < 0 || reg >= VM_REG_LAST) 1282 return (EINVAL); 1283 1284 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1285 } 1286 1287 int 1288 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1289 { 1290 int error; 1291 1292 if (reg < 0 || reg >= VM_REG_LAST) 1293 return (EINVAL); 1294 error = vmmops_setreg(vcpu->cookie, reg, val); 1295 if (error || reg != VM_REG_GUEST_PC) 1296 return (error); 1297 1298 vcpu->nextpc = val; 1299 1300 return (0); 1301 } 1302 1303 void * 1304 vm_get_cookie(struct vm *vm) 1305 { 1306 return (vm->cookie); 1307 } 1308 1309 int 1310 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1311 { 1312 return (vmmops_exception(vcpu->cookie, esr, far)); 1313 } 1314 1315 int 1316 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1317 { 1318 return (vgic_attach_to_vm(vm->cookie, descr)); 1319 } 1320 1321 int 1322 vm_assert_irq(struct vm *vm, uint32_t irq) 1323 { 1324 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1325 } 1326 1327 int 1328 vm_deassert_irq(struct vm *vm, uint32_t irq) 1329 { 1330 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1331 } 1332 1333 int 1334 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1335 int func) 1336 { 1337 /* TODO: Should we raise an SError? */ 1338 return (vgic_inject_msi(vm->cookie, msg, addr)); 1339 } 1340 1341 static int 1342 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1343 { 1344 struct hypctx *hypctx; 1345 int i; 1346 1347 hypctx = vcpu_get_cookie(vcpu); 1348 1349 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1350 return (1); 1351 1352 vme->exitcode = VM_EXITCODE_SMCCC; 1353 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1354 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1355 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1356 1357 *retu = true; 1358 return (0); 1359 } 1360 1361 static int 1362 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1363 { 1364 struct vm *vm; 1365 1366 vm = vcpu->vm; 1367 vcpu_lock(vcpu); 1368 while (1) { 1369 if (vm->suspend) 1370 break; 1371 1372 if (vgic_has_pending_irq(vcpu->cookie)) 1373 break; 1374 1375 if (vcpu_should_yield(vcpu)) 1376 break; 1377 1378 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1379 /* 1380 * XXX msleep_spin() cannot be interrupted by signals so 1381 * wake up periodically to check pending signals. 1382 */ 1383 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1384 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1385 } 1386 vcpu_unlock(vcpu); 1387 1388 *retu = false; 1389 return (0); 1390 } 1391 1392 static int 1393 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1394 { 1395 struct vm *vm = vcpu->vm; 1396 struct vm_exit *vme; 1397 struct vm_map *map; 1398 uint64_t addr, esr; 1399 pmap_t pmap; 1400 int ftype, rv; 1401 1402 vme = &vcpu->exitinfo; 1403 1404 pmap = vmspace_pmap(vm_vmspace(vcpu->vm)); 1405 addr = vme->u.paging.gpa; 1406 esr = vme->u.paging.esr; 1407 1408 /* The page exists, but the page table needs to be updated. */ 1409 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1410 return (0); 1411 1412 switch (ESR_ELx_EXCEPTION(esr)) { 1413 case EXCP_INSN_ABORT_L: 1414 case EXCP_DATA_ABORT_L: 1415 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1416 break; 1417 default: 1418 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1419 } 1420 1421 map = &vm_vmspace(vm)->vm_map; 1422 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1423 if (rv != KERN_SUCCESS) 1424 return (EFAULT); 1425 1426 return (0); 1427 } 1428 1429 static int 1430 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1431 { 1432 struct vm *vm = vcpu->vm; 1433 int error, i; 1434 struct thread *td; 1435 1436 error = 0; 1437 td = curthread; 1438 1439 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1440 1441 /* 1442 * Wait until all 'active_cpus' have suspended themselves. 1443 * 1444 * Since a VM may be suspended at any time including when one or 1445 * more vcpus are doing a rendezvous we need to call the rendezvous 1446 * handler while we are waiting to prevent a deadlock. 1447 */ 1448 vcpu_lock(vcpu); 1449 while (error == 0) { 1450 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1451 break; 1452 1453 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1454 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1455 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1456 if (td_ast_pending(td, TDA_SUSPEND)) { 1457 vcpu_unlock(vcpu); 1458 error = thread_check_susp(td, false); 1459 vcpu_lock(vcpu); 1460 } 1461 } 1462 vcpu_unlock(vcpu); 1463 1464 /* 1465 * Wakeup the other sleeping vcpus and return to userspace. 1466 */ 1467 for (i = 0; i < vm->maxcpus; i++) { 1468 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1469 vcpu_notify_event(vm_vcpu(vm, i)); 1470 } 1471 } 1472 1473 *retu = true; 1474 return (error); 1475 } 1476 1477 int 1478 vm_run(struct vcpu *vcpu) 1479 { 1480 struct vm *vm = vcpu->vm; 1481 struct vm_eventinfo evinfo; 1482 int error, vcpuid; 1483 struct vm_exit *vme; 1484 bool retu; 1485 pmap_t pmap; 1486 1487 vcpuid = vcpu->vcpuid; 1488 1489 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1490 return (EINVAL); 1491 1492 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1493 return (EINVAL); 1494 1495 pmap = vmspace_pmap(vm_vmspace(vm)); 1496 vme = &vcpu->exitinfo; 1497 evinfo.rptr = NULL; 1498 evinfo.sptr = &vm->suspend; 1499 evinfo.iptr = NULL; 1500 restart: 1501 critical_enter(); 1502 1503 restore_guest_fpustate(vcpu); 1504 1505 vcpu_require_state(vcpu, VCPU_RUNNING); 1506 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1507 vcpu_require_state(vcpu, VCPU_FROZEN); 1508 1509 save_guest_fpustate(vcpu); 1510 1511 critical_exit(); 1512 1513 if (error == 0) { 1514 retu = false; 1515 switch (vme->exitcode) { 1516 case VM_EXITCODE_INST_EMUL: 1517 vcpu->nextpc = vme->pc + vme->inst_length; 1518 error = vm_handle_inst_emul(vcpu, &retu); 1519 break; 1520 1521 case VM_EXITCODE_REG_EMUL: 1522 vcpu->nextpc = vme->pc + vme->inst_length; 1523 error = vm_handle_reg_emul(vcpu, &retu); 1524 break; 1525 1526 case VM_EXITCODE_HVC: 1527 /* 1528 * The HVC instruction saves the address for the 1529 * next instruction as the return address. 1530 */ 1531 vcpu->nextpc = vme->pc; 1532 /* 1533 * The PSCI call can change the exit information in the 1534 * case of suspend/reset/poweroff/cpu off/cpu on. 1535 */ 1536 error = vm_handle_smccc_call(vcpu, vme, &retu); 1537 break; 1538 1539 case VM_EXITCODE_WFI: 1540 vcpu->nextpc = vme->pc + vme->inst_length; 1541 error = vm_handle_wfi(vcpu, vme, &retu); 1542 break; 1543 1544 case VM_EXITCODE_PAGING: 1545 vcpu->nextpc = vme->pc; 1546 error = vm_handle_paging(vcpu, &retu); 1547 break; 1548 1549 case VM_EXITCODE_SUSPENDED: 1550 vcpu->nextpc = vme->pc; 1551 error = vm_handle_suspend(vcpu, &retu); 1552 break; 1553 1554 default: 1555 /* Handle in userland */ 1556 vcpu->nextpc = vme->pc; 1557 retu = true; 1558 break; 1559 } 1560 } 1561 1562 if (error == 0 && retu == false) 1563 goto restart; 1564 1565 return (error); 1566 } 1567