1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mutex.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/queue.h> 41 #include <sys/rwlock.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_object.h> 48 #include <vm/vm_page.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_param.h> 53 54 #include <machine/armreg.h> 55 #include <machine/cpu.h> 56 #include <machine/fpu.h> 57 #include <machine/machdep.h> 58 #include <machine/pcb.h> 59 #include <machine/smp.h> 60 #include <machine/vm.h> 61 #include <machine/vmparam.h> 62 #include <machine/vmm.h> 63 #include <machine/vmm_instruction_emul.h> 64 65 #include <dev/pci/pcireg.h> 66 #include <dev/vmm/vmm_dev.h> 67 #include <dev/vmm/vmm_ktr.h> 68 #include <dev/vmm/vmm_mem.h> 69 #include <dev/vmm/vmm_stat.h> 70 71 #include "arm64.h" 72 #include "mmu.h" 73 74 #include "io/vgic.h" 75 #include "io/vtimer.h" 76 77 struct vcpu { 78 int flags; 79 enum vcpu_state state; 80 struct mtx mtx; 81 int hostcpu; /* host cpuid this vcpu last ran on */ 82 int vcpuid; 83 void *stats; 84 struct vm_exit exitinfo; 85 uint64_t nextpc; /* (x) next instruction to execute */ 86 struct vm *vm; /* (o) */ 87 void *cookie; /* (i) cpu-specific data */ 88 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 89 }; 90 91 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 92 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 93 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 94 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 95 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 96 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 97 98 struct vmm_mmio_region { 99 uint64_t start; 100 uint64_t end; 101 mem_region_read_t read; 102 mem_region_write_t write; 103 }; 104 #define VM_MAX_MMIO_REGIONS 4 105 106 struct vmm_special_reg { 107 uint32_t esr_iss; 108 uint32_t esr_mask; 109 reg_read_t reg_read; 110 reg_write_t reg_write; 111 void *arg; 112 }; 113 #define VM_MAX_SPECIAL_REGS 16 114 115 /* 116 * Initialization: 117 * (o) initialized the first time the VM is created 118 * (i) initialized when VM is created and when it is reinitialized 119 * (x) initialized before use 120 */ 121 struct vm { 122 void *cookie; /* (i) cpu-specific data */ 123 volatile cpuset_t active_cpus; /* (i) active vcpus */ 124 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 125 int suspend; /* (i) stop VM execution */ 126 bool dying; /* (o) is dying */ 127 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 128 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 129 struct vmspace *vmspace; /* (o) guest's address space */ 130 struct vm_mem mem; /* (i) guest memory */ 131 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 132 struct vcpu **vcpu; /* (i) guest vcpus */ 133 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 134 /* (o) guest MMIO regions */ 135 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 136 /* The following describe the vm cpu topology */ 137 uint16_t sockets; /* (o) num of sockets */ 138 uint16_t cores; /* (o) num of cores/socket */ 139 uint16_t threads; /* (o) num of threads/core */ 140 uint16_t maxcpus; /* (o) max pluggable cpus */ 141 struct sx vcpus_init_lock; /* (o) */ 142 }; 143 144 static bool vmm_initialized = false; 145 146 static int vm_handle_wfi(struct vcpu *vcpu, 147 struct vm_exit *vme, bool *retu); 148 149 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 150 151 /* statistics */ 152 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 153 154 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 155 156 static int vmm_ipinum; 157 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 158 "IPI vector used for vcpu notifications"); 159 160 struct vmm_regs { 161 uint64_t id_aa64afr0; 162 uint64_t id_aa64afr1; 163 uint64_t id_aa64dfr0; 164 uint64_t id_aa64dfr1; 165 uint64_t id_aa64isar0; 166 uint64_t id_aa64isar1; 167 uint64_t id_aa64isar2; 168 uint64_t id_aa64mmfr0; 169 uint64_t id_aa64mmfr1; 170 uint64_t id_aa64mmfr2; 171 uint64_t id_aa64pfr0; 172 uint64_t id_aa64pfr1; 173 }; 174 175 static const struct vmm_regs vmm_arch_regs_masks = { 176 .id_aa64dfr0 = 177 ID_AA64DFR0_CTX_CMPs_MASK | 178 ID_AA64DFR0_WRPs_MASK | 179 ID_AA64DFR0_BRPs_MASK | 180 ID_AA64DFR0_PMUVer_3 | 181 ID_AA64DFR0_DebugVer_8, 182 .id_aa64isar0 = 183 ID_AA64ISAR0_TLB_TLBIOSR | 184 ID_AA64ISAR0_SHA3_IMPL | 185 ID_AA64ISAR0_RDM_IMPL | 186 ID_AA64ISAR0_Atomic_IMPL | 187 ID_AA64ISAR0_CRC32_BASE | 188 ID_AA64ISAR0_SHA2_512 | 189 ID_AA64ISAR0_SHA1_BASE | 190 ID_AA64ISAR0_AES_PMULL, 191 .id_aa64mmfr0 = 192 ID_AA64MMFR0_TGran4_IMPL | 193 ID_AA64MMFR0_TGran64_IMPL | 194 ID_AA64MMFR0_TGran16_IMPL | 195 ID_AA64MMFR0_ASIDBits_16 | 196 ID_AA64MMFR0_PARange_4P, 197 .id_aa64mmfr1 = 198 ID_AA64MMFR1_SpecSEI_IMPL | 199 ID_AA64MMFR1_PAN_ATS1E1 | 200 ID_AA64MMFR1_HAFDBS_AF, 201 .id_aa64pfr0 = 202 ID_AA64PFR0_GIC_CPUIF_NONE | 203 ID_AA64PFR0_AdvSIMD_HP | 204 ID_AA64PFR0_FP_HP | 205 ID_AA64PFR0_EL3_64 | 206 ID_AA64PFR0_EL2_64 | 207 ID_AA64PFR0_EL1_64 | 208 ID_AA64PFR0_EL0_64, 209 }; 210 211 /* Host registers masked by vmm_arch_regs_masks. */ 212 static struct vmm_regs vmm_arch_regs; 213 214 u_int vm_maxcpu; 215 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 216 &vm_maxcpu, 0, "Maximum number of vCPUs"); 217 218 static void vcpu_notify_event_locked(struct vcpu *vcpu); 219 220 /* global statistics */ 221 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 222 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 223 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 224 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 225 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 226 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 227 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 228 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 229 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 230 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 231 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 232 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 233 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 234 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 235 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 236 237 /* 238 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 239 * is a safe value for now. 240 */ 241 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 242 243 static int 244 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 245 { 246 #define _FETCH_KERN_REG(reg, field) do { \ 247 regs->field = vmm_arch_regs_masks.field; \ 248 if (!get_kernel_reg_iss_masked(reg ## _ISS, ®s->field, \ 249 masks->field)) \ 250 regs->field = 0; \ 251 } while (0) 252 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 253 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 254 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 255 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 256 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 257 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 258 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 259 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 260 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 261 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 262 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 263 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 264 #undef _FETCH_KERN_REG 265 return (0); 266 } 267 268 static void 269 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 270 { 271 vmmops_vcpu_cleanup(vcpu->cookie); 272 vcpu->cookie = NULL; 273 if (destroy) { 274 vmm_stat_free(vcpu->stats); 275 fpu_save_area_free(vcpu->guestfpu); 276 vcpu_lock_destroy(vcpu); 277 } 278 } 279 280 static struct vcpu * 281 vcpu_alloc(struct vm *vm, int vcpu_id) 282 { 283 struct vcpu *vcpu; 284 285 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 286 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 287 288 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 289 vcpu_lock_init(vcpu); 290 vcpu->state = VCPU_IDLE; 291 vcpu->hostcpu = NOCPU; 292 vcpu->vcpuid = vcpu_id; 293 vcpu->vm = vm; 294 vcpu->guestfpu = fpu_save_area_alloc(); 295 vcpu->stats = vmm_stat_alloc(); 296 return (vcpu); 297 } 298 299 static void 300 vcpu_init(struct vcpu *vcpu) 301 { 302 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 303 MPASS(vcpu->cookie != NULL); 304 fpu_save_area_reset(vcpu->guestfpu); 305 vmm_stat_init(vcpu->stats); 306 } 307 308 struct vm_exit * 309 vm_exitinfo(struct vcpu *vcpu) 310 { 311 return (&vcpu->exitinfo); 312 } 313 314 static int 315 vmm_unsupported_quirk(void) 316 { 317 /* 318 * Known to not load on Ampere eMAG 319 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051 320 */ 321 if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM, 322 CPU_PART_EMAG8180, 0, 0)) 323 return (ENXIO); 324 325 return (0); 326 } 327 328 static int 329 vmm_init(void) 330 { 331 int error; 332 333 vm_maxcpu = mp_ncpus; 334 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 335 336 if (vm_maxcpu > VM_MAXCPU) { 337 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 338 vm_maxcpu = VM_MAXCPU; 339 } 340 if (vm_maxcpu == 0) 341 vm_maxcpu = 1; 342 343 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 344 if (error != 0) 345 return (error); 346 347 return (vmmops_modinit(0)); 348 } 349 350 static int 351 vmm_handler(module_t mod, int what, void *arg) 352 { 353 int error; 354 355 switch (what) { 356 case MOD_LOAD: 357 error = vmm_unsupported_quirk(); 358 if (error != 0) 359 break; 360 error = vmmdev_init(); 361 if (error != 0) 362 break; 363 error = vmm_init(); 364 if (error == 0) 365 vmm_initialized = true; 366 else 367 (void)vmmdev_cleanup(); 368 break; 369 case MOD_UNLOAD: 370 error = vmmdev_cleanup(); 371 if (error == 0 && vmm_initialized) { 372 error = vmmops_modcleanup(); 373 if (error) { 374 /* 375 * Something bad happened - prevent new 376 * VMs from being created 377 */ 378 vmm_initialized = false; 379 } 380 } 381 break; 382 default: 383 error = 0; 384 break; 385 } 386 return (error); 387 } 388 389 static moduledata_t vmm_kmod = { 390 "vmm", 391 vmm_handler, 392 NULL 393 }; 394 395 /* 396 * vmm initialization has the following dependencies: 397 * 398 * - HYP initialization requires smp_rendezvous() and therefore must happen 399 * after SMP is fully functional (after SI_SUB_SMP). 400 * - vmm device initialization requires an initialized devfs. 401 */ 402 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); 403 MODULE_VERSION(vmm, 1); 404 405 static void 406 vm_init(struct vm *vm, bool create) 407 { 408 int i; 409 410 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 411 MPASS(vm->cookie != NULL); 412 413 CPU_ZERO(&vm->active_cpus); 414 CPU_ZERO(&vm->debug_cpus); 415 416 vm->suspend = 0; 417 CPU_ZERO(&vm->suspended_cpus); 418 419 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 420 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 421 422 if (!create) { 423 for (i = 0; i < vm->maxcpus; i++) { 424 if (vm->vcpu[i] != NULL) 425 vcpu_init(vm->vcpu[i]); 426 } 427 } 428 } 429 430 void 431 vm_disable_vcpu_creation(struct vm *vm) 432 { 433 sx_xlock(&vm->vcpus_init_lock); 434 vm->dying = true; 435 sx_xunlock(&vm->vcpus_init_lock); 436 } 437 438 struct vcpu * 439 vm_alloc_vcpu(struct vm *vm, int vcpuid) 440 { 441 struct vcpu *vcpu; 442 443 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 444 return (NULL); 445 446 /* Some interrupt controllers may have a CPU limit */ 447 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) 448 return (NULL); 449 450 vcpu = (struct vcpu *) 451 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 452 if (__predict_true(vcpu != NULL)) 453 return (vcpu); 454 455 sx_xlock(&vm->vcpus_init_lock); 456 vcpu = vm->vcpu[vcpuid]; 457 if (vcpu == NULL && !vm->dying) { 458 vcpu = vcpu_alloc(vm, vcpuid); 459 vcpu_init(vcpu); 460 461 /* 462 * Ensure vCPU is fully created before updating pointer 463 * to permit unlocked reads above. 464 */ 465 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 466 (uintptr_t)vcpu); 467 } 468 sx_xunlock(&vm->vcpus_init_lock); 469 return (vcpu); 470 } 471 472 void 473 vm_slock_vcpus(struct vm *vm) 474 { 475 sx_slock(&vm->vcpus_init_lock); 476 } 477 478 void 479 vm_unlock_vcpus(struct vm *vm) 480 { 481 sx_unlock(&vm->vcpus_init_lock); 482 } 483 484 int 485 vm_create(const char *name, struct vm **retvm) 486 { 487 struct vm *vm; 488 struct vmspace *vmspace; 489 490 /* 491 * If vmm.ko could not be successfully initialized then don't attempt 492 * to create the virtual machine. 493 */ 494 if (!vmm_initialized) 495 return (ENXIO); 496 497 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 498 return (EINVAL); 499 500 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 501 if (vmspace == NULL) 502 return (ENOMEM); 503 504 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 505 strcpy(vm->name, name); 506 vm->vmspace = vmspace; 507 vm_mem_init(&vm->mem); 508 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 509 510 vm->sockets = 1; 511 vm->cores = 1; /* XXX backwards compatibility */ 512 vm->threads = 1; /* XXX backwards compatibility */ 513 vm->maxcpus = vm_maxcpu; 514 515 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 516 M_WAITOK | M_ZERO); 517 518 vm_init(vm, true); 519 520 *retvm = vm; 521 return (0); 522 } 523 524 void 525 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 526 uint16_t *threads, uint16_t *maxcpus) 527 { 528 *sockets = vm->sockets; 529 *cores = vm->cores; 530 *threads = vm->threads; 531 *maxcpus = vm->maxcpus; 532 } 533 534 uint16_t 535 vm_get_maxcpus(struct vm *vm) 536 { 537 return (vm->maxcpus); 538 } 539 540 int 541 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 542 uint16_t threads, uint16_t maxcpus) 543 { 544 /* Ignore maxcpus. */ 545 if ((sockets * cores * threads) > vm->maxcpus) 546 return (EINVAL); 547 vm->sockets = sockets; 548 vm->cores = cores; 549 vm->threads = threads; 550 return(0); 551 } 552 553 static void 554 vm_cleanup(struct vm *vm, bool destroy) 555 { 556 pmap_t pmap __diagused; 557 int i; 558 559 if (destroy) { 560 vm_xlock_memsegs(vm); 561 pmap = vmspace_pmap(vm->vmspace); 562 sched_pin(); 563 PCPU_SET(curvmpmap, NULL); 564 sched_unpin(); 565 CPU_FOREACH(i) { 566 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 567 } 568 } else 569 vm_assert_memseg_xlocked(vm); 570 571 572 vgic_detach_from_vm(vm->cookie); 573 574 for (i = 0; i < vm->maxcpus; i++) { 575 if (vm->vcpu[i] != NULL) 576 vcpu_cleanup(vm->vcpu[i], destroy); 577 } 578 579 vmmops_cleanup(vm->cookie); 580 581 vm_mem_cleanup(vm); 582 if (destroy) { 583 vm_mem_destroy(vm); 584 585 vmmops_vmspace_free(vm->vmspace); 586 vm->vmspace = NULL; 587 588 for (i = 0; i < vm->maxcpus; i++) 589 free(vm->vcpu[i], M_VMM); 590 free(vm->vcpu, M_VMM); 591 sx_destroy(&vm->vcpus_init_lock); 592 } 593 } 594 595 void 596 vm_destroy(struct vm *vm) 597 { 598 vm_cleanup(vm, true); 599 free(vm, M_VMM); 600 } 601 602 int 603 vm_reinit(struct vm *vm) 604 { 605 int error; 606 607 /* 608 * A virtual machine can be reset only if all vcpus are suspended. 609 */ 610 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 611 vm_cleanup(vm, false); 612 vm_init(vm, false); 613 error = 0; 614 } else { 615 error = EBUSY; 616 } 617 618 return (error); 619 } 620 621 const char * 622 vm_name(struct vm *vm) 623 { 624 return (vm->name); 625 } 626 627 int 628 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 629 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 630 { 631 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); 632 } 633 634 static int 635 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 636 { 637 *rval = 0; 638 return (0); 639 } 640 641 static int 642 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 643 { 644 *rval = *(uint64_t *)arg; 645 return (0); 646 } 647 648 static int 649 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 650 { 651 return (0); 652 } 653 654 static int 655 vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg) 656 { 657 struct hypctx *hypctx; 658 659 hypctx = vcpu_get_cookie(vcpu); 660 /* All other fields are RES0 & we don't do anything with this */ 661 /* TODO: Disable access to other debug state when locked */ 662 hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK; 663 return (0); 664 } 665 666 static int 667 vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg) 668 { 669 struct hypctx *hypctx; 670 uint64_t val; 671 672 hypctx = vcpu_get_cookie(vcpu); 673 val = OSLSR_OSLM_1; 674 if (hypctx->dbg_oslock) 675 val |= OSLSR_OSLK; 676 *rval = val; 677 678 return (0); 679 } 680 681 static const struct vmm_special_reg vmm_special_regs[] = { 682 #define SPECIAL_REG(_reg, _read, _write) \ 683 { \ 684 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 685 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 686 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 687 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 688 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 689 .esr_mask = ISS_MSR_REG_MASK, \ 690 .reg_read = (_read), \ 691 .reg_write = (_write), \ 692 .arg = NULL, \ 693 } 694 #define ID_SPECIAL_REG(_reg, _name) \ 695 { \ 696 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 697 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 698 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 699 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 700 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 701 .esr_mask = ISS_MSR_REG_MASK, \ 702 .reg_read = vmm_reg_read_arg, \ 703 .reg_write = vmm_reg_wi, \ 704 .arg = &(vmm_arch_regs._name), \ 705 } 706 707 /* ID registers */ 708 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 709 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 710 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 711 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 712 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 713 714 /* 715 * All other ID registers are read as zero. 716 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 717 */ 718 { 719 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 720 (0 << ISS_MSR_OP1_SHIFT) | 721 (0 << ISS_MSR_CRn_SHIFT) | 722 (0 << ISS_MSR_CRm_SHIFT), 723 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 724 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 725 .reg_read = vmm_reg_raz, 726 .reg_write = vmm_reg_wi, 727 .arg = NULL, 728 }, 729 730 /* Counter physical registers */ 731 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 732 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 733 vtimer_phys_cval_write), 734 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 735 vtimer_phys_tval_write), 736 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 737 738 /* Debug registers */ 739 SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi), 740 SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi), 741 /* TODO: Exceptions on invalid access */ 742 SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1), 743 SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi), 744 #undef SPECIAL_REG 745 }; 746 747 void 748 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 749 reg_read_t reg_read, reg_write_t reg_write, void *arg) 750 { 751 int i; 752 753 for (i = 0; i < nitems(vm->special_reg); i++) { 754 if (vm->special_reg[i].esr_iss == 0 && 755 vm->special_reg[i].esr_mask == 0) { 756 vm->special_reg[i].esr_iss = iss; 757 vm->special_reg[i].esr_mask = mask; 758 vm->special_reg[i].reg_read = reg_read; 759 vm->special_reg[i].reg_write = reg_write; 760 vm->special_reg[i].arg = arg; 761 return; 762 } 763 } 764 765 panic("%s: No free special register slot", __func__); 766 } 767 768 void 769 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 770 { 771 int i; 772 773 for (i = 0; i < nitems(vm->special_reg); i++) { 774 if (vm->special_reg[i].esr_iss == iss && 775 vm->special_reg[i].esr_mask == mask) { 776 memset(&vm->special_reg[i], 0, 777 sizeof(vm->special_reg[i])); 778 return; 779 } 780 } 781 782 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 783 mask); 784 } 785 786 static int 787 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 788 { 789 struct vm *vm; 790 struct vm_exit *vme; 791 struct vre *vre; 792 int i, rv; 793 794 vm = vcpu->vm; 795 vme = &vcpu->exitinfo; 796 vre = &vme->u.reg_emul.vre; 797 798 for (i = 0; i < nitems(vm->special_reg); i++) { 799 if (vm->special_reg[i].esr_iss == 0 && 800 vm->special_reg[i].esr_mask == 0) 801 continue; 802 803 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 804 vm->special_reg[i].esr_iss) { 805 rv = vmm_emulate_register(vcpu, vre, 806 vm->special_reg[i].reg_read, 807 vm->special_reg[i].reg_write, 808 vm->special_reg[i].arg); 809 if (rv == 0) { 810 *retu = false; 811 } 812 return (rv); 813 } 814 } 815 for (i = 0; i < nitems(vmm_special_regs); i++) { 816 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 817 vmm_special_regs[i].esr_iss) { 818 rv = vmm_emulate_register(vcpu, vre, 819 vmm_special_regs[i].reg_read, 820 vmm_special_regs[i].reg_write, 821 vmm_special_regs[i].arg); 822 if (rv == 0) { 823 *retu = false; 824 } 825 return (rv); 826 } 827 } 828 829 830 *retu = true; 831 return (0); 832 } 833 834 void 835 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 836 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 837 { 838 int i; 839 840 for (i = 0; i < nitems(vm->mmio_region); i++) { 841 if (vm->mmio_region[i].start == 0 && 842 vm->mmio_region[i].end == 0) { 843 vm->mmio_region[i].start = start; 844 vm->mmio_region[i].end = start + size; 845 vm->mmio_region[i].read = mmio_read; 846 vm->mmio_region[i].write = mmio_write; 847 return; 848 } 849 } 850 851 panic("%s: No free MMIO region", __func__); 852 } 853 854 void 855 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 856 { 857 int i; 858 859 for (i = 0; i < nitems(vm->mmio_region); i++) { 860 if (vm->mmio_region[i].start == start && 861 vm->mmio_region[i].end == start + size) { 862 memset(&vm->mmio_region[i], 0, 863 sizeof(vm->mmio_region[i])); 864 return; 865 } 866 } 867 868 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 869 start + size); 870 } 871 872 static int 873 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 874 { 875 struct vm *vm; 876 struct vm_exit *vme; 877 struct vie *vie; 878 struct hyp *hyp; 879 uint64_t fault_ipa; 880 struct vm_guest_paging *paging; 881 struct vmm_mmio_region *vmr; 882 int error, i; 883 884 vm = vcpu->vm; 885 hyp = vm->cookie; 886 if (!hyp->vgic_attached) 887 goto out_user; 888 889 vme = &vcpu->exitinfo; 890 vie = &vme->u.inst_emul.vie; 891 paging = &vme->u.inst_emul.paging; 892 893 fault_ipa = vme->u.inst_emul.gpa; 894 895 vmr = NULL; 896 for (i = 0; i < nitems(vm->mmio_region); i++) { 897 if (vm->mmio_region[i].start <= fault_ipa && 898 vm->mmio_region[i].end > fault_ipa) { 899 vmr = &vm->mmio_region[i]; 900 break; 901 } 902 } 903 if (vmr == NULL) 904 goto out_user; 905 906 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 907 vmr->read, vmr->write, retu); 908 return (error); 909 910 out_user: 911 *retu = true; 912 return (0); 913 } 914 915 int 916 vm_suspend(struct vm *vm, enum vm_suspend_how how) 917 { 918 int i; 919 920 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 921 return (EINVAL); 922 923 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 924 VM_CTR2(vm, "virtual machine already suspended %d/%d", 925 vm->suspend, how); 926 return (EALREADY); 927 } 928 929 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 930 931 /* 932 * Notify all active vcpus that they are now suspended. 933 */ 934 for (i = 0; i < vm->maxcpus; i++) { 935 if (CPU_ISSET(i, &vm->active_cpus)) 936 vcpu_notify_event(vm_vcpu(vm, i)); 937 } 938 939 return (0); 940 } 941 942 void 943 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 944 { 945 struct vm *vm = vcpu->vm; 946 struct vm_exit *vmexit; 947 948 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 949 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 950 951 vmexit = vm_exitinfo(vcpu); 952 vmexit->pc = pc; 953 vmexit->inst_length = 4; 954 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 955 vmexit->u.suspended.how = vm->suspend; 956 } 957 958 void 959 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 960 { 961 struct vm_exit *vmexit; 962 963 vmexit = vm_exitinfo(vcpu); 964 vmexit->pc = pc; 965 vmexit->inst_length = 4; 966 vmexit->exitcode = VM_EXITCODE_DEBUG; 967 } 968 969 int 970 vm_activate_cpu(struct vcpu *vcpu) 971 { 972 struct vm *vm = vcpu->vm; 973 974 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 975 return (EBUSY); 976 977 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 978 return (0); 979 980 } 981 982 int 983 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 984 { 985 if (vcpu == NULL) { 986 vm->debug_cpus = vm->active_cpus; 987 for (int i = 0; i < vm->maxcpus; i++) { 988 if (CPU_ISSET(i, &vm->active_cpus)) 989 vcpu_notify_event(vm_vcpu(vm, i)); 990 } 991 } else { 992 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 993 return (EINVAL); 994 995 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 996 vcpu_notify_event(vcpu); 997 } 998 return (0); 999 } 1000 1001 int 1002 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 1003 { 1004 1005 if (vcpu == NULL) { 1006 CPU_ZERO(&vm->debug_cpus); 1007 } else { 1008 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 1009 return (EINVAL); 1010 1011 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 1012 } 1013 return (0); 1014 } 1015 1016 int 1017 vcpu_debugged(struct vcpu *vcpu) 1018 { 1019 1020 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1021 } 1022 1023 cpuset_t 1024 vm_active_cpus(struct vm *vm) 1025 { 1026 1027 return (vm->active_cpus); 1028 } 1029 1030 cpuset_t 1031 vm_debug_cpus(struct vm *vm) 1032 { 1033 1034 return (vm->debug_cpus); 1035 } 1036 1037 cpuset_t 1038 vm_suspended_cpus(struct vm *vm) 1039 { 1040 1041 return (vm->suspended_cpus); 1042 } 1043 1044 1045 void * 1046 vcpu_stats(struct vcpu *vcpu) 1047 { 1048 1049 return (vcpu->stats); 1050 } 1051 1052 /* 1053 * This function is called to ensure that a vcpu "sees" a pending event 1054 * as soon as possible: 1055 * - If the vcpu thread is sleeping then it is woken up. 1056 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1057 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1058 */ 1059 static void 1060 vcpu_notify_event_locked(struct vcpu *vcpu) 1061 { 1062 int hostcpu; 1063 1064 hostcpu = vcpu->hostcpu; 1065 if (vcpu->state == VCPU_RUNNING) { 1066 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1067 if (hostcpu != curcpu) { 1068 ipi_cpu(hostcpu, vmm_ipinum); 1069 } else { 1070 /* 1071 * If the 'vcpu' is running on 'curcpu' then it must 1072 * be sending a notification to itself (e.g. SELF_IPI). 1073 * The pending event will be picked up when the vcpu 1074 * transitions back to guest context. 1075 */ 1076 } 1077 } else { 1078 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1079 "with hostcpu %d", vcpu->state, hostcpu)); 1080 if (vcpu->state == VCPU_SLEEPING) 1081 wakeup_one(vcpu); 1082 } 1083 } 1084 1085 void 1086 vcpu_notify_event(struct vcpu *vcpu) 1087 { 1088 vcpu_lock(vcpu); 1089 vcpu_notify_event_locked(vcpu); 1090 vcpu_unlock(vcpu); 1091 } 1092 1093 struct vmspace * 1094 vm_vmspace(struct vm *vm) 1095 { 1096 return (vm->vmspace); 1097 } 1098 1099 struct vm_mem * 1100 vm_mem(struct vm *vm) 1101 { 1102 return (&vm->mem); 1103 } 1104 1105 static void 1106 restore_guest_fpustate(struct vcpu *vcpu) 1107 { 1108 1109 /* flush host state to the pcb */ 1110 vfp_save_state(curthread, curthread->td_pcb); 1111 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1112 PCPU_SET(fpcurthread, NULL); 1113 1114 /* restore guest FPU state */ 1115 vfp_enable(); 1116 vfp_restore(vcpu->guestfpu); 1117 1118 /* 1119 * The FPU is now "dirty" with the guest's state so turn on emulation 1120 * to trap any access to the FPU by the host. 1121 */ 1122 vfp_disable(); 1123 } 1124 1125 static void 1126 save_guest_fpustate(struct vcpu *vcpu) 1127 { 1128 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1129 CPACR_FPEN_TRAP_ALL1) 1130 panic("VFP not enabled in host!"); 1131 1132 /* save guest FPU state */ 1133 vfp_enable(); 1134 vfp_store(vcpu->guestfpu); 1135 vfp_disable(); 1136 1137 KASSERT(PCPU_GET(fpcurthread) == NULL, 1138 ("%s: fpcurthread set with guest registers", __func__)); 1139 } 1140 static int 1141 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1142 bool from_idle) 1143 { 1144 int error; 1145 1146 vcpu_assert_locked(vcpu); 1147 1148 /* 1149 * State transitions from the vmmdev_ioctl() must always begin from 1150 * the VCPU_IDLE state. This guarantees that there is only a single 1151 * ioctl() operating on a vcpu at any point. 1152 */ 1153 if (from_idle) { 1154 while (vcpu->state != VCPU_IDLE) { 1155 vcpu_notify_event_locked(vcpu); 1156 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1157 } 1158 } else { 1159 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1160 "vcpu idle state")); 1161 } 1162 1163 if (vcpu->state == VCPU_RUNNING) { 1164 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1165 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1166 } else { 1167 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1168 "vcpu that is not running", vcpu->hostcpu)); 1169 } 1170 1171 /* 1172 * The following state transitions are allowed: 1173 * IDLE -> FROZEN -> IDLE 1174 * FROZEN -> RUNNING -> FROZEN 1175 * FROZEN -> SLEEPING -> FROZEN 1176 */ 1177 switch (vcpu->state) { 1178 case VCPU_IDLE: 1179 case VCPU_RUNNING: 1180 case VCPU_SLEEPING: 1181 error = (newstate != VCPU_FROZEN); 1182 break; 1183 case VCPU_FROZEN: 1184 error = (newstate == VCPU_FROZEN); 1185 break; 1186 default: 1187 error = 1; 1188 break; 1189 } 1190 1191 if (error) 1192 return (EBUSY); 1193 1194 vcpu->state = newstate; 1195 if (newstate == VCPU_RUNNING) 1196 vcpu->hostcpu = curcpu; 1197 else 1198 vcpu->hostcpu = NOCPU; 1199 1200 if (newstate == VCPU_IDLE) 1201 wakeup(&vcpu->state); 1202 1203 return (0); 1204 } 1205 1206 static void 1207 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1208 { 1209 int error; 1210 1211 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1212 panic("Error %d setting state to %d\n", error, newstate); 1213 } 1214 1215 static void 1216 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1217 { 1218 int error; 1219 1220 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1221 panic("Error %d setting state to %d", error, newstate); 1222 } 1223 1224 int 1225 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1226 { 1227 if (type < 0 || type >= VM_CAP_MAX) 1228 return (EINVAL); 1229 1230 return (vmmops_getcap(vcpu->cookie, type, retval)); 1231 } 1232 1233 int 1234 vm_set_capability(struct vcpu *vcpu, int type, int val) 1235 { 1236 if (type < 0 || type >= VM_CAP_MAX) 1237 return (EINVAL); 1238 1239 return (vmmops_setcap(vcpu->cookie, type, val)); 1240 } 1241 1242 struct vm * 1243 vcpu_vm(struct vcpu *vcpu) 1244 { 1245 return (vcpu->vm); 1246 } 1247 1248 int 1249 vcpu_vcpuid(struct vcpu *vcpu) 1250 { 1251 return (vcpu->vcpuid); 1252 } 1253 1254 void * 1255 vcpu_get_cookie(struct vcpu *vcpu) 1256 { 1257 return (vcpu->cookie); 1258 } 1259 1260 struct vcpu * 1261 vm_vcpu(struct vm *vm, int vcpuid) 1262 { 1263 return (vm->vcpu[vcpuid]); 1264 } 1265 1266 int 1267 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1268 { 1269 int error; 1270 1271 vcpu_lock(vcpu); 1272 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1273 vcpu_unlock(vcpu); 1274 1275 return (error); 1276 } 1277 1278 enum vcpu_state 1279 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1280 { 1281 enum vcpu_state state; 1282 1283 vcpu_lock(vcpu); 1284 state = vcpu->state; 1285 if (hostcpu != NULL) 1286 *hostcpu = vcpu->hostcpu; 1287 vcpu_unlock(vcpu); 1288 1289 return (state); 1290 } 1291 1292 int 1293 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1294 { 1295 1296 if (reg >= VM_REG_LAST) 1297 return (EINVAL); 1298 1299 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1300 } 1301 1302 int 1303 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1304 { 1305 int error; 1306 1307 if (reg >= VM_REG_LAST) 1308 return (EINVAL); 1309 error = vmmops_setreg(vcpu->cookie, reg, val); 1310 if (error || reg != VM_REG_GUEST_PC) 1311 return (error); 1312 1313 vcpu->nextpc = val; 1314 1315 return (0); 1316 } 1317 1318 void * 1319 vm_get_cookie(struct vm *vm) 1320 { 1321 return (vm->cookie); 1322 } 1323 1324 int 1325 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1326 { 1327 return (vmmops_exception(vcpu->cookie, esr, far)); 1328 } 1329 1330 int 1331 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1332 { 1333 return (vgic_attach_to_vm(vm->cookie, descr)); 1334 } 1335 1336 int 1337 vm_assert_irq(struct vm *vm, uint32_t irq) 1338 { 1339 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1340 } 1341 1342 int 1343 vm_deassert_irq(struct vm *vm, uint32_t irq) 1344 { 1345 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1346 } 1347 1348 int 1349 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1350 int func) 1351 { 1352 /* TODO: Should we raise an SError? */ 1353 return (vgic_inject_msi(vm->cookie, msg, addr)); 1354 } 1355 1356 static int 1357 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1358 { 1359 struct hypctx *hypctx; 1360 int i; 1361 1362 hypctx = vcpu_get_cookie(vcpu); 1363 1364 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1365 return (1); 1366 1367 vme->exitcode = VM_EXITCODE_SMCCC; 1368 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1369 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1370 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1371 1372 *retu = true; 1373 return (0); 1374 } 1375 1376 static int 1377 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1378 { 1379 struct vm *vm; 1380 1381 vm = vcpu->vm; 1382 vcpu_lock(vcpu); 1383 while (1) { 1384 if (vm->suspend) 1385 break; 1386 1387 if (vgic_has_pending_irq(vcpu->cookie)) 1388 break; 1389 1390 if (vcpu_should_yield(vcpu)) 1391 break; 1392 1393 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1394 /* 1395 * XXX msleep_spin() cannot be interrupted by signals so 1396 * wake up periodically to check pending signals. 1397 */ 1398 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1399 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1400 } 1401 vcpu_unlock(vcpu); 1402 1403 *retu = false; 1404 return (0); 1405 } 1406 1407 static int 1408 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1409 { 1410 struct vm *vm = vcpu->vm; 1411 struct vm_exit *vme; 1412 struct vm_map *map; 1413 uint64_t addr, esr; 1414 pmap_t pmap; 1415 int ftype, rv; 1416 1417 vme = &vcpu->exitinfo; 1418 1419 pmap = vmspace_pmap(vcpu->vm->vmspace); 1420 addr = vme->u.paging.gpa; 1421 esr = vme->u.paging.esr; 1422 1423 /* The page exists, but the page table needs to be updated. */ 1424 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1425 return (0); 1426 1427 switch (ESR_ELx_EXCEPTION(esr)) { 1428 case EXCP_INSN_ABORT_L: 1429 case EXCP_DATA_ABORT_L: 1430 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1431 break; 1432 default: 1433 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1434 } 1435 1436 map = &vm->vmspace->vm_map; 1437 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1438 if (rv != KERN_SUCCESS) 1439 return (EFAULT); 1440 1441 return (0); 1442 } 1443 1444 static int 1445 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1446 { 1447 struct vm *vm = vcpu->vm; 1448 int error, i; 1449 struct thread *td; 1450 1451 error = 0; 1452 td = curthread; 1453 1454 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1455 1456 /* 1457 * Wait until all 'active_cpus' have suspended themselves. 1458 * 1459 * Since a VM may be suspended at any time including when one or 1460 * more vcpus are doing a rendezvous we need to call the rendezvous 1461 * handler while we are waiting to prevent a deadlock. 1462 */ 1463 vcpu_lock(vcpu); 1464 while (error == 0) { 1465 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1466 break; 1467 1468 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1469 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1470 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1471 if (td_ast_pending(td, TDA_SUSPEND)) { 1472 vcpu_unlock(vcpu); 1473 error = thread_check_susp(td, false); 1474 vcpu_lock(vcpu); 1475 } 1476 } 1477 vcpu_unlock(vcpu); 1478 1479 /* 1480 * Wakeup the other sleeping vcpus and return to userspace. 1481 */ 1482 for (i = 0; i < vm->maxcpus; i++) { 1483 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1484 vcpu_notify_event(vm_vcpu(vm, i)); 1485 } 1486 } 1487 1488 *retu = true; 1489 return (error); 1490 } 1491 1492 int 1493 vm_run(struct vcpu *vcpu) 1494 { 1495 struct vm *vm = vcpu->vm; 1496 struct vm_eventinfo evinfo; 1497 int error, vcpuid; 1498 struct vm_exit *vme; 1499 bool retu; 1500 pmap_t pmap; 1501 1502 vcpuid = vcpu->vcpuid; 1503 1504 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1505 return (EINVAL); 1506 1507 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1508 return (EINVAL); 1509 1510 pmap = vmspace_pmap(vm->vmspace); 1511 vme = &vcpu->exitinfo; 1512 evinfo.rptr = NULL; 1513 evinfo.sptr = &vm->suspend; 1514 evinfo.iptr = NULL; 1515 restart: 1516 critical_enter(); 1517 1518 restore_guest_fpustate(vcpu); 1519 1520 vcpu_require_state(vcpu, VCPU_RUNNING); 1521 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1522 vcpu_require_state(vcpu, VCPU_FROZEN); 1523 1524 save_guest_fpustate(vcpu); 1525 1526 critical_exit(); 1527 1528 if (error == 0) { 1529 retu = false; 1530 switch (vme->exitcode) { 1531 case VM_EXITCODE_INST_EMUL: 1532 vcpu->nextpc = vme->pc + vme->inst_length; 1533 error = vm_handle_inst_emul(vcpu, &retu); 1534 break; 1535 1536 case VM_EXITCODE_REG_EMUL: 1537 vcpu->nextpc = vme->pc + vme->inst_length; 1538 error = vm_handle_reg_emul(vcpu, &retu); 1539 break; 1540 1541 case VM_EXITCODE_HVC: 1542 /* 1543 * The HVC instruction saves the address for the 1544 * next instruction as the return address. 1545 */ 1546 vcpu->nextpc = vme->pc; 1547 /* 1548 * The PSCI call can change the exit information in the 1549 * case of suspend/reset/poweroff/cpu off/cpu on. 1550 */ 1551 error = vm_handle_smccc_call(vcpu, vme, &retu); 1552 break; 1553 1554 case VM_EXITCODE_WFI: 1555 vcpu->nextpc = vme->pc + vme->inst_length; 1556 error = vm_handle_wfi(vcpu, vme, &retu); 1557 break; 1558 1559 case VM_EXITCODE_PAGING: 1560 vcpu->nextpc = vme->pc; 1561 error = vm_handle_paging(vcpu, &retu); 1562 break; 1563 1564 case VM_EXITCODE_SUSPENDED: 1565 vcpu->nextpc = vme->pc; 1566 error = vm_handle_suspend(vcpu, &retu); 1567 break; 1568 1569 default: 1570 /* Handle in userland */ 1571 vcpu->nextpc = vme->pc; 1572 retu = true; 1573 break; 1574 } 1575 } 1576 1577 if (error == 0 && retu == false) 1578 goto restart; 1579 1580 return (error); 1581 } 1582