1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/cpuset.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/pcpu.h> 38 #include <sys/proc.h> 39 #include <sys/queue.h> 40 #include <sys/rwlock.h> 41 #include <sys/sched.h> 42 #include <sys/smp.h> 43 #include <sys/sysctl.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_object.h> 47 #include <vm/vm_page.h> 48 #include <vm/pmap.h> 49 #include <vm/vm_map.h> 50 #include <vm/vm_extern.h> 51 #include <vm/vm_param.h> 52 53 #include <machine/cpu.h> 54 #include <machine/fpu.h> 55 #include <machine/machdep.h> 56 #include <machine/pcb.h> 57 #include <machine/smp.h> 58 #include <machine/vm.h> 59 #include <machine/vmparam.h> 60 #include <machine/vmm.h> 61 #include <machine/vmm_instruction_emul.h> 62 63 #include <dev/pci/pcireg.h> 64 #include <dev/vmm/vmm_dev.h> 65 #include <dev/vmm/vmm_ktr.h> 66 #include <dev/vmm/vmm_mem.h> 67 #include <dev/vmm/vmm_stat.h> 68 69 #include "arm64.h" 70 #include "mmu.h" 71 72 #include "io/vgic.h" 73 #include "io/vtimer.h" 74 75 struct vcpu { 76 int flags; 77 enum vcpu_state state; 78 struct mtx mtx; 79 int hostcpu; /* host cpuid this vcpu last ran on */ 80 int vcpuid; 81 void *stats; 82 struct vm_exit exitinfo; 83 uint64_t nextpc; /* (x) next instruction to execute */ 84 struct vm *vm; /* (o) */ 85 void *cookie; /* (i) cpu-specific data */ 86 struct vfpstate *guestfpu; /* (a,i) guest fpu state */ 87 }; 88 89 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 90 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 91 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 92 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 93 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 94 95 struct vmm_mmio_region { 96 uint64_t start; 97 uint64_t end; 98 mem_region_read_t read; 99 mem_region_write_t write; 100 }; 101 #define VM_MAX_MMIO_REGIONS 4 102 103 struct vmm_special_reg { 104 uint32_t esr_iss; 105 uint32_t esr_mask; 106 reg_read_t reg_read; 107 reg_write_t reg_write; 108 void *arg; 109 }; 110 #define VM_MAX_SPECIAL_REGS 16 111 112 /* 113 * Initialization: 114 * (o) initialized the first time the VM is created 115 * (i) initialized when VM is created and when it is reinitialized 116 * (x) initialized before use 117 */ 118 struct vm { 119 void *cookie; /* (i) cpu-specific data */ 120 volatile cpuset_t active_cpus; /* (i) active vcpus */ 121 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ 122 int suspend; /* (i) stop VM execution */ 123 bool dying; /* (o) is dying */ 124 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 125 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 126 struct vm_mem mem; /* (i) guest memory */ 127 char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */ 128 struct vcpu **vcpu; /* (i) guest vcpus */ 129 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 130 /* (o) guest MMIO regions */ 131 struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; 132 /* The following describe the vm cpu topology */ 133 uint16_t sockets; /* (o) num of sockets */ 134 uint16_t cores; /* (o) num of cores/socket */ 135 uint16_t threads; /* (o) num of threads/core */ 136 uint16_t maxcpus; /* (o) max pluggable cpus */ 137 struct sx vcpus_init_lock; /* (o) */ 138 }; 139 140 static int vm_handle_wfi(struct vcpu *vcpu, 141 struct vm_exit *vme, bool *retu); 142 143 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 144 145 /* statistics */ 146 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 147 148 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 149 150 static int vmm_ipinum; 151 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 152 "IPI vector used for vcpu notifications"); 153 154 struct vmm_regs { 155 uint64_t id_aa64afr0; 156 uint64_t id_aa64afr1; 157 uint64_t id_aa64dfr0; 158 uint64_t id_aa64dfr1; 159 uint64_t id_aa64isar0; 160 uint64_t id_aa64isar1; 161 uint64_t id_aa64isar2; 162 uint64_t id_aa64mmfr0; 163 uint64_t id_aa64mmfr1; 164 uint64_t id_aa64mmfr2; 165 uint64_t id_aa64pfr0; 166 uint64_t id_aa64pfr1; 167 }; 168 169 static const struct vmm_regs vmm_arch_regs_masks = { 170 .id_aa64dfr0 = 171 ID_AA64DFR0_CTX_CMPs_MASK | 172 ID_AA64DFR0_WRPs_MASK | 173 ID_AA64DFR0_BRPs_MASK | 174 ID_AA64DFR0_PMUVer_3 | 175 ID_AA64DFR0_DebugVer_8, 176 .id_aa64isar0 = 177 ID_AA64ISAR0_TLB_TLBIOSR | 178 ID_AA64ISAR0_SHA3_IMPL | 179 ID_AA64ISAR0_RDM_IMPL | 180 ID_AA64ISAR0_Atomic_IMPL | 181 ID_AA64ISAR0_CRC32_BASE | 182 ID_AA64ISAR0_SHA2_512 | 183 ID_AA64ISAR0_SHA1_BASE | 184 ID_AA64ISAR0_AES_PMULL, 185 .id_aa64mmfr0 = 186 ID_AA64MMFR0_TGran4_IMPL | 187 ID_AA64MMFR0_TGran64_IMPL | 188 ID_AA64MMFR0_TGran16_IMPL | 189 ID_AA64MMFR0_ASIDBits_16 | 190 ID_AA64MMFR0_PARange_4P, 191 .id_aa64mmfr1 = 192 ID_AA64MMFR1_SpecSEI_IMPL | 193 ID_AA64MMFR1_PAN_ATS1E1 | 194 ID_AA64MMFR1_HAFDBS_AF, 195 .id_aa64pfr0 = 196 ID_AA64PFR0_GIC_CPUIF_NONE | 197 ID_AA64PFR0_AdvSIMD_HP | 198 ID_AA64PFR0_FP_HP | 199 ID_AA64PFR0_EL3_64 | 200 ID_AA64PFR0_EL2_64 | 201 ID_AA64PFR0_EL1_64 | 202 ID_AA64PFR0_EL0_64, 203 }; 204 205 /* Host registers masked by vmm_arch_regs_masks. */ 206 static struct vmm_regs vmm_arch_regs; 207 208 static void vcpu_notify_event_locked(struct vcpu *vcpu); 209 210 /* global statistics */ 211 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 212 VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); 213 VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); 214 VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); 215 VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); 216 VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); 217 VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); 218 VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); 219 VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); 220 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 221 VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); 222 VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); 223 VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); 224 VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); 225 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 226 227 static int 228 vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) 229 { 230 #define _FETCH_KERN_REG(reg, field) do { \ 231 regs->field = vmm_arch_regs_masks.field; \ 232 if (!get_kernel_reg_iss_masked(reg ## _ISS, ®s->field, \ 233 masks->field)) \ 234 regs->field = 0; \ 235 } while (0) 236 _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); 237 _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); 238 _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); 239 _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); 240 _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); 241 _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); 242 _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); 243 _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); 244 _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); 245 _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); 246 _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); 247 _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); 248 #undef _FETCH_KERN_REG 249 return (0); 250 } 251 252 static void 253 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 254 { 255 vmmops_vcpu_cleanup(vcpu->cookie); 256 vcpu->cookie = NULL; 257 if (destroy) { 258 vmm_stat_free(vcpu->stats); 259 fpu_save_area_free(vcpu->guestfpu); 260 vcpu_lock_destroy(vcpu); 261 free(vcpu, M_VMM); 262 } 263 } 264 265 static struct vcpu * 266 vcpu_alloc(struct vm *vm, int vcpu_id) 267 { 268 struct vcpu *vcpu; 269 270 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 271 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 272 273 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 274 vcpu_lock_init(vcpu); 275 vcpu->state = VCPU_IDLE; 276 vcpu->hostcpu = NOCPU; 277 vcpu->vcpuid = vcpu_id; 278 vcpu->vm = vm; 279 vcpu->guestfpu = fpu_save_area_alloc(); 280 vcpu->stats = vmm_stat_alloc(); 281 return (vcpu); 282 } 283 284 static void 285 vcpu_init(struct vcpu *vcpu) 286 { 287 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 288 MPASS(vcpu->cookie != NULL); 289 fpu_save_area_reset(vcpu->guestfpu); 290 vmm_stat_init(vcpu->stats); 291 } 292 293 struct vm_exit * 294 vm_exitinfo(struct vcpu *vcpu) 295 { 296 return (&vcpu->exitinfo); 297 } 298 299 static int 300 vmm_unsupported_quirk(void) 301 { 302 /* 303 * Known to not load on Ampere eMAG 304 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051 305 */ 306 if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM, 307 CPU_PART_EMAG8180, 0, 0)) 308 return (ENXIO); 309 310 return (0); 311 } 312 313 int 314 vmm_modinit(void) 315 { 316 int error; 317 318 error = vmm_unsupported_quirk(); 319 if (error != 0) 320 return (error); 321 322 error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); 323 if (error != 0) 324 return (error); 325 326 return (vmmops_modinit(0)); 327 } 328 329 int 330 vmm_modcleanup(void) 331 { 332 return (vmmops_modcleanup()); 333 } 334 335 static void 336 vm_init(struct vm *vm, bool create) 337 { 338 int i; 339 340 vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); 341 MPASS(vm->cookie != NULL); 342 343 CPU_ZERO(&vm->active_cpus); 344 CPU_ZERO(&vm->debug_cpus); 345 346 vm->suspend = 0; 347 CPU_ZERO(&vm->suspended_cpus); 348 349 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 350 memset(vm->special_reg, 0, sizeof(vm->special_reg)); 351 352 if (!create) { 353 for (i = 0; i < vm->maxcpus; i++) { 354 if (vm->vcpu[i] != NULL) 355 vcpu_init(vm->vcpu[i]); 356 } 357 } 358 } 359 360 void 361 vm_disable_vcpu_creation(struct vm *vm) 362 { 363 sx_xlock(&vm->vcpus_init_lock); 364 vm->dying = true; 365 sx_xunlock(&vm->vcpus_init_lock); 366 } 367 368 struct vcpu * 369 vm_alloc_vcpu(struct vm *vm, int vcpuid) 370 { 371 struct vcpu *vcpu; 372 373 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 374 return (NULL); 375 376 vcpu = (struct vcpu *) 377 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 378 if (__predict_true(vcpu != NULL)) 379 return (vcpu); 380 381 sx_xlock(&vm->vcpus_init_lock); 382 vcpu = vm->vcpu[vcpuid]; 383 if (vcpu == NULL && !vm->dying) { 384 /* Some interrupt controllers may have a CPU limit */ 385 if (vcpuid >= vgic_max_cpu_count(vm->cookie)) { 386 sx_xunlock(&vm->vcpus_init_lock); 387 return (NULL); 388 } 389 390 vcpu = vcpu_alloc(vm, vcpuid); 391 vcpu_init(vcpu); 392 393 /* 394 * Ensure vCPU is fully created before updating pointer 395 * to permit unlocked reads above. 396 */ 397 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 398 (uintptr_t)vcpu); 399 } 400 sx_xunlock(&vm->vcpus_init_lock); 401 return (vcpu); 402 } 403 404 void 405 vm_lock_vcpus(struct vm *vm) 406 { 407 sx_xlock(&vm->vcpus_init_lock); 408 } 409 410 void 411 vm_unlock_vcpus(struct vm *vm) 412 { 413 sx_unlock(&vm->vcpus_init_lock); 414 } 415 416 int 417 vm_create(const char *name, struct vm **retvm) 418 { 419 struct vm *vm; 420 int error; 421 422 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 423 error = vm_mem_init(&vm->mem, 0, 1ul << 39); 424 if (error != 0) { 425 free(vm, M_VMM); 426 return (error); 427 } 428 strcpy(vm->name, name); 429 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 430 431 vm->sockets = 1; 432 vm->cores = 1; /* XXX backwards compatibility */ 433 vm->threads = 1; /* XXX backwards compatibility */ 434 vm->maxcpus = vm_maxcpu; 435 436 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 437 M_WAITOK | M_ZERO); 438 439 vm_init(vm, true); 440 441 *retvm = vm; 442 return (0); 443 } 444 445 void 446 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 447 uint16_t *threads, uint16_t *maxcpus) 448 { 449 *sockets = vm->sockets; 450 *cores = vm->cores; 451 *threads = vm->threads; 452 *maxcpus = vm->maxcpus; 453 } 454 455 uint16_t 456 vm_get_maxcpus(struct vm *vm) 457 { 458 return (vm->maxcpus); 459 } 460 461 int 462 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 463 uint16_t threads, uint16_t maxcpus) 464 { 465 /* Ignore maxcpus. */ 466 if ((sockets * cores * threads) > vm->maxcpus) 467 return (EINVAL); 468 vm->sockets = sockets; 469 vm->cores = cores; 470 vm->threads = threads; 471 return(0); 472 } 473 474 static void 475 vm_cleanup(struct vm *vm, bool destroy) 476 { 477 pmap_t pmap __diagused; 478 int i; 479 480 if (destroy) { 481 vm_xlock_memsegs(vm); 482 pmap = vmspace_pmap(vm_vmspace(vm)); 483 sched_pin(); 484 PCPU_SET(curvmpmap, NULL); 485 sched_unpin(); 486 CPU_FOREACH(i) { 487 MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); 488 } 489 } else 490 vm_assert_memseg_xlocked(vm); 491 492 493 vgic_detach_from_vm(vm->cookie); 494 495 for (i = 0; i < vm->maxcpus; i++) { 496 if (vm->vcpu[i] != NULL) 497 vcpu_cleanup(vm->vcpu[i], destroy); 498 } 499 500 vmmops_cleanup(vm->cookie); 501 502 vm_mem_cleanup(vm); 503 if (destroy) { 504 vm_mem_destroy(vm); 505 506 free(vm->vcpu, M_VMM); 507 sx_destroy(&vm->vcpus_init_lock); 508 } 509 } 510 511 void 512 vm_destroy(struct vm *vm) 513 { 514 vm_cleanup(vm, true); 515 free(vm, M_VMM); 516 } 517 518 int 519 vm_reinit(struct vm *vm) 520 { 521 int error; 522 523 /* 524 * A virtual machine can be reset only if all vcpus are suspended. 525 */ 526 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 527 vm_cleanup(vm, false); 528 vm_init(vm, false); 529 error = 0; 530 } else { 531 error = EBUSY; 532 } 533 534 return (error); 535 } 536 537 const char * 538 vm_name(struct vm *vm) 539 { 540 return (vm->name); 541 } 542 543 int 544 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 545 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 546 { 547 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); 548 } 549 550 static int 551 vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) 552 { 553 *rval = 0; 554 return (0); 555 } 556 557 static int 558 vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) 559 { 560 *rval = *(uint64_t *)arg; 561 return (0); 562 } 563 564 static int 565 vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) 566 { 567 return (0); 568 } 569 570 static int 571 vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg) 572 { 573 struct hypctx *hypctx; 574 575 hypctx = vcpu_get_cookie(vcpu); 576 /* All other fields are RES0 & we don't do anything with this */ 577 /* TODO: Disable access to other debug state when locked */ 578 hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK; 579 return (0); 580 } 581 582 static int 583 vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg) 584 { 585 struct hypctx *hypctx; 586 uint64_t val; 587 588 hypctx = vcpu_get_cookie(vcpu); 589 val = OSLSR_OSLM_1; 590 if (hypctx->dbg_oslock) 591 val |= OSLSR_OSLK; 592 *rval = val; 593 594 return (0); 595 } 596 597 static const struct vmm_special_reg vmm_special_regs[] = { 598 #define SPECIAL_REG(_reg, _read, _write) \ 599 { \ 600 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 601 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 602 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 603 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 604 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 605 .esr_mask = ISS_MSR_REG_MASK, \ 606 .reg_read = (_read), \ 607 .reg_write = (_write), \ 608 .arg = NULL, \ 609 } 610 #define ID_SPECIAL_REG(_reg, _name) \ 611 { \ 612 .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ 613 ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ 614 ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ 615 ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ 616 ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ 617 .esr_mask = ISS_MSR_REG_MASK, \ 618 .reg_read = vmm_reg_read_arg, \ 619 .reg_write = vmm_reg_wi, \ 620 .arg = &(vmm_arch_regs._name), \ 621 } 622 623 /* ID registers */ 624 ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), 625 ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), 626 ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), 627 ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), 628 ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), 629 630 /* 631 * All other ID registers are read as zero. 632 * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. 633 */ 634 { 635 .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | 636 (0 << ISS_MSR_OP1_SHIFT) | 637 (0 << ISS_MSR_CRn_SHIFT) | 638 (0 << ISS_MSR_CRm_SHIFT), 639 .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | 640 ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), 641 .reg_read = vmm_reg_raz, 642 .reg_write = vmm_reg_wi, 643 .arg = NULL, 644 }, 645 646 /* Counter physical registers */ 647 SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), 648 SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, 649 vtimer_phys_cval_write), 650 SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, 651 vtimer_phys_tval_write), 652 SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), 653 654 /* Debug registers */ 655 SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi), 656 SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi), 657 /* TODO: Exceptions on invalid access */ 658 SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1), 659 SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi), 660 #undef SPECIAL_REG 661 }; 662 663 void 664 vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, 665 reg_read_t reg_read, reg_write_t reg_write, void *arg) 666 { 667 int i; 668 669 for (i = 0; i < nitems(vm->special_reg); i++) { 670 if (vm->special_reg[i].esr_iss == 0 && 671 vm->special_reg[i].esr_mask == 0) { 672 vm->special_reg[i].esr_iss = iss; 673 vm->special_reg[i].esr_mask = mask; 674 vm->special_reg[i].reg_read = reg_read; 675 vm->special_reg[i].reg_write = reg_write; 676 vm->special_reg[i].arg = arg; 677 return; 678 } 679 } 680 681 panic("%s: No free special register slot", __func__); 682 } 683 684 void 685 vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) 686 { 687 int i; 688 689 for (i = 0; i < nitems(vm->special_reg); i++) { 690 if (vm->special_reg[i].esr_iss == iss && 691 vm->special_reg[i].esr_mask == mask) { 692 memset(&vm->special_reg[i], 0, 693 sizeof(vm->special_reg[i])); 694 return; 695 } 696 } 697 698 panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, 699 mask); 700 } 701 702 static int 703 vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) 704 { 705 struct vm *vm; 706 struct vm_exit *vme; 707 struct vre *vre; 708 int i, rv; 709 710 vm = vcpu->vm; 711 vme = &vcpu->exitinfo; 712 vre = &vme->u.reg_emul.vre; 713 714 for (i = 0; i < nitems(vm->special_reg); i++) { 715 if (vm->special_reg[i].esr_iss == 0 && 716 vm->special_reg[i].esr_mask == 0) 717 continue; 718 719 if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == 720 vm->special_reg[i].esr_iss) { 721 rv = vmm_emulate_register(vcpu, vre, 722 vm->special_reg[i].reg_read, 723 vm->special_reg[i].reg_write, 724 vm->special_reg[i].arg); 725 if (rv == 0) { 726 *retu = false; 727 } 728 return (rv); 729 } 730 } 731 for (i = 0; i < nitems(vmm_special_regs); i++) { 732 if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == 733 vmm_special_regs[i].esr_iss) { 734 rv = vmm_emulate_register(vcpu, vre, 735 vmm_special_regs[i].reg_read, 736 vmm_special_regs[i].reg_write, 737 vmm_special_regs[i].arg); 738 if (rv == 0) { 739 *retu = false; 740 } 741 return (rv); 742 } 743 } 744 745 746 *retu = true; 747 return (0); 748 } 749 750 void 751 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 752 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 753 { 754 int i; 755 756 for (i = 0; i < nitems(vm->mmio_region); i++) { 757 if (vm->mmio_region[i].start == 0 && 758 vm->mmio_region[i].end == 0) { 759 vm->mmio_region[i].start = start; 760 vm->mmio_region[i].end = start + size; 761 vm->mmio_region[i].read = mmio_read; 762 vm->mmio_region[i].write = mmio_write; 763 return; 764 } 765 } 766 767 panic("%s: No free MMIO region", __func__); 768 } 769 770 void 771 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 772 { 773 int i; 774 775 for (i = 0; i < nitems(vm->mmio_region); i++) { 776 if (vm->mmio_region[i].start == start && 777 vm->mmio_region[i].end == start + size) { 778 memset(&vm->mmio_region[i], 0, 779 sizeof(vm->mmio_region[i])); 780 return; 781 } 782 } 783 784 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 785 start + size); 786 } 787 788 static int 789 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 790 { 791 struct vm *vm; 792 struct vm_exit *vme; 793 struct vie *vie; 794 struct hyp *hyp; 795 uint64_t fault_ipa; 796 struct vm_guest_paging *paging; 797 struct vmm_mmio_region *vmr; 798 int error, i; 799 800 vm = vcpu->vm; 801 hyp = vm->cookie; 802 if (!hyp->vgic_attached) 803 goto out_user; 804 805 vme = &vcpu->exitinfo; 806 vie = &vme->u.inst_emul.vie; 807 paging = &vme->u.inst_emul.paging; 808 809 fault_ipa = vme->u.inst_emul.gpa; 810 811 vmr = NULL; 812 for (i = 0; i < nitems(vm->mmio_region); i++) { 813 if (vm->mmio_region[i].start <= fault_ipa && 814 vm->mmio_region[i].end > fault_ipa) { 815 vmr = &vm->mmio_region[i]; 816 break; 817 } 818 } 819 if (vmr == NULL) 820 goto out_user; 821 822 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 823 vmr->read, vmr->write, retu); 824 return (error); 825 826 out_user: 827 *retu = true; 828 return (0); 829 } 830 831 int 832 vm_suspend(struct vm *vm, enum vm_suspend_how how) 833 { 834 int i; 835 836 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 837 return (EINVAL); 838 839 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 840 VM_CTR2(vm, "virtual machine already suspended %d/%d", 841 vm->suspend, how); 842 return (EALREADY); 843 } 844 845 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 846 847 /* 848 * Notify all active vcpus that they are now suspended. 849 */ 850 for (i = 0; i < vm->maxcpus; i++) { 851 if (CPU_ISSET(i, &vm->active_cpus)) 852 vcpu_notify_event(vm_vcpu(vm, i)); 853 } 854 855 return (0); 856 } 857 858 void 859 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 860 { 861 struct vm *vm = vcpu->vm; 862 struct vm_exit *vmexit; 863 864 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 865 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 866 867 vmexit = vm_exitinfo(vcpu); 868 vmexit->pc = pc; 869 vmexit->inst_length = 4; 870 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 871 vmexit->u.suspended.how = vm->suspend; 872 } 873 874 void 875 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 876 { 877 struct vm_exit *vmexit; 878 879 vmexit = vm_exitinfo(vcpu); 880 vmexit->pc = pc; 881 vmexit->inst_length = 4; 882 vmexit->exitcode = VM_EXITCODE_DEBUG; 883 } 884 885 int 886 vm_activate_cpu(struct vcpu *vcpu) 887 { 888 struct vm *vm = vcpu->vm; 889 890 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 891 return (EBUSY); 892 893 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 894 return (0); 895 896 } 897 898 int 899 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 900 { 901 if (vcpu == NULL) { 902 vm->debug_cpus = vm->active_cpus; 903 for (int i = 0; i < vm->maxcpus; i++) { 904 if (CPU_ISSET(i, &vm->active_cpus)) 905 vcpu_notify_event(vm_vcpu(vm, i)); 906 } 907 } else { 908 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 909 return (EINVAL); 910 911 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 912 vcpu_notify_event(vcpu); 913 } 914 return (0); 915 } 916 917 int 918 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 919 { 920 921 if (vcpu == NULL) { 922 CPU_ZERO(&vm->debug_cpus); 923 } else { 924 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 925 return (EINVAL); 926 927 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 928 } 929 return (0); 930 } 931 932 int 933 vcpu_debugged(struct vcpu *vcpu) 934 { 935 936 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 937 } 938 939 cpuset_t 940 vm_active_cpus(struct vm *vm) 941 { 942 943 return (vm->active_cpus); 944 } 945 946 cpuset_t 947 vm_debug_cpus(struct vm *vm) 948 { 949 950 return (vm->debug_cpus); 951 } 952 953 cpuset_t 954 vm_suspended_cpus(struct vm *vm) 955 { 956 957 return (vm->suspended_cpus); 958 } 959 960 961 void * 962 vcpu_stats(struct vcpu *vcpu) 963 { 964 965 return (vcpu->stats); 966 } 967 968 /* 969 * This function is called to ensure that a vcpu "sees" a pending event 970 * as soon as possible: 971 * - If the vcpu thread is sleeping then it is woken up. 972 * - If the vcpu is running on a different host_cpu then an IPI will be directed 973 * to the host_cpu to cause the vcpu to trap into the hypervisor. 974 */ 975 static void 976 vcpu_notify_event_locked(struct vcpu *vcpu) 977 { 978 int hostcpu; 979 980 hostcpu = vcpu->hostcpu; 981 if (vcpu->state == VCPU_RUNNING) { 982 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 983 if (hostcpu != curcpu) { 984 ipi_cpu(hostcpu, vmm_ipinum); 985 } else { 986 /* 987 * If the 'vcpu' is running on 'curcpu' then it must 988 * be sending a notification to itself (e.g. SELF_IPI). 989 * The pending event will be picked up when the vcpu 990 * transitions back to guest context. 991 */ 992 } 993 } else { 994 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 995 "with hostcpu %d", vcpu->state, hostcpu)); 996 if (vcpu->state == VCPU_SLEEPING) 997 wakeup_one(vcpu); 998 } 999 } 1000 1001 void 1002 vcpu_notify_event(struct vcpu *vcpu) 1003 { 1004 vcpu_lock(vcpu); 1005 vcpu_notify_event_locked(vcpu); 1006 vcpu_unlock(vcpu); 1007 } 1008 1009 struct vm_mem * 1010 vm_mem(struct vm *vm) 1011 { 1012 return (&vm->mem); 1013 } 1014 1015 static void 1016 restore_guest_fpustate(struct vcpu *vcpu) 1017 { 1018 1019 /* flush host state to the pcb */ 1020 vfp_save_state(curthread, curthread->td_pcb); 1021 /* Ensure the VFP state will be re-loaded when exiting the guest */ 1022 PCPU_SET(fpcurthread, NULL); 1023 1024 /* restore guest FPU state */ 1025 vfp_enable(); 1026 vfp_restore(vcpu->guestfpu); 1027 1028 /* 1029 * The FPU is now "dirty" with the guest's state so turn on emulation 1030 * to trap any access to the FPU by the host. 1031 */ 1032 vfp_disable(); 1033 } 1034 1035 static void 1036 save_guest_fpustate(struct vcpu *vcpu) 1037 { 1038 if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != 1039 CPACR_FPEN_TRAP_ALL1) 1040 panic("VFP not enabled in host!"); 1041 1042 /* save guest FPU state */ 1043 vfp_enable(); 1044 vfp_store(vcpu->guestfpu); 1045 vfp_disable(); 1046 1047 KASSERT(PCPU_GET(fpcurthread) == NULL, 1048 ("%s: fpcurthread set with guest registers", __func__)); 1049 } 1050 static int 1051 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1052 bool from_idle) 1053 { 1054 int error; 1055 1056 vcpu_assert_locked(vcpu); 1057 1058 /* 1059 * State transitions from the vmmdev_ioctl() must always begin from 1060 * the VCPU_IDLE state. This guarantees that there is only a single 1061 * ioctl() operating on a vcpu at any point. 1062 */ 1063 if (from_idle) { 1064 while (vcpu->state != VCPU_IDLE) { 1065 vcpu_notify_event_locked(vcpu); 1066 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1067 } 1068 } else { 1069 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1070 "vcpu idle state")); 1071 } 1072 1073 if (vcpu->state == VCPU_RUNNING) { 1074 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1075 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1076 } else { 1077 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1078 "vcpu that is not running", vcpu->hostcpu)); 1079 } 1080 1081 /* 1082 * The following state transitions are allowed: 1083 * IDLE -> FROZEN -> IDLE 1084 * FROZEN -> RUNNING -> FROZEN 1085 * FROZEN -> SLEEPING -> FROZEN 1086 */ 1087 switch (vcpu->state) { 1088 case VCPU_IDLE: 1089 case VCPU_RUNNING: 1090 case VCPU_SLEEPING: 1091 error = (newstate != VCPU_FROZEN); 1092 break; 1093 case VCPU_FROZEN: 1094 error = (newstate == VCPU_FROZEN); 1095 break; 1096 default: 1097 error = 1; 1098 break; 1099 } 1100 1101 if (error) 1102 return (EBUSY); 1103 1104 vcpu->state = newstate; 1105 if (newstate == VCPU_RUNNING) 1106 vcpu->hostcpu = curcpu; 1107 else 1108 vcpu->hostcpu = NOCPU; 1109 1110 if (newstate == VCPU_IDLE) 1111 wakeup(&vcpu->state); 1112 1113 return (0); 1114 } 1115 1116 static void 1117 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1118 { 1119 int error; 1120 1121 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1122 panic("Error %d setting state to %d\n", error, newstate); 1123 } 1124 1125 static void 1126 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1127 { 1128 int error; 1129 1130 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1131 panic("Error %d setting state to %d", error, newstate); 1132 } 1133 1134 int 1135 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1136 { 1137 if (type < 0 || type >= VM_CAP_MAX) 1138 return (EINVAL); 1139 1140 return (vmmops_getcap(vcpu->cookie, type, retval)); 1141 } 1142 1143 int 1144 vm_set_capability(struct vcpu *vcpu, int type, int val) 1145 { 1146 if (type < 0 || type >= VM_CAP_MAX) 1147 return (EINVAL); 1148 1149 return (vmmops_setcap(vcpu->cookie, type, val)); 1150 } 1151 1152 struct vm * 1153 vcpu_vm(struct vcpu *vcpu) 1154 { 1155 return (vcpu->vm); 1156 } 1157 1158 int 1159 vcpu_vcpuid(struct vcpu *vcpu) 1160 { 1161 return (vcpu->vcpuid); 1162 } 1163 1164 void * 1165 vcpu_get_cookie(struct vcpu *vcpu) 1166 { 1167 return (vcpu->cookie); 1168 } 1169 1170 struct vcpu * 1171 vm_vcpu(struct vm *vm, int vcpuid) 1172 { 1173 return (vm->vcpu[vcpuid]); 1174 } 1175 1176 int 1177 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1178 { 1179 int error; 1180 1181 vcpu_lock(vcpu); 1182 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1183 vcpu_unlock(vcpu); 1184 1185 return (error); 1186 } 1187 1188 enum vcpu_state 1189 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1190 { 1191 enum vcpu_state state; 1192 1193 vcpu_lock(vcpu); 1194 state = vcpu->state; 1195 if (hostcpu != NULL) 1196 *hostcpu = vcpu->hostcpu; 1197 vcpu_unlock(vcpu); 1198 1199 return (state); 1200 } 1201 1202 int 1203 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1204 { 1205 if (reg < 0 || reg >= VM_REG_LAST) 1206 return (EINVAL); 1207 1208 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1209 } 1210 1211 int 1212 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1213 { 1214 int error; 1215 1216 if (reg < 0 || reg >= VM_REG_LAST) 1217 return (EINVAL); 1218 error = vmmops_setreg(vcpu->cookie, reg, val); 1219 if (error || reg != VM_REG_GUEST_PC) 1220 return (error); 1221 1222 vcpu->nextpc = val; 1223 1224 return (0); 1225 } 1226 1227 void * 1228 vm_get_cookie(struct vm *vm) 1229 { 1230 return (vm->cookie); 1231 } 1232 1233 int 1234 vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) 1235 { 1236 return (vmmops_exception(vcpu->cookie, esr, far)); 1237 } 1238 1239 int 1240 vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) 1241 { 1242 return (vgic_attach_to_vm(vm->cookie, descr)); 1243 } 1244 1245 int 1246 vm_assert_irq(struct vm *vm, uint32_t irq) 1247 { 1248 return (vgic_inject_irq(vm->cookie, -1, irq, true)); 1249 } 1250 1251 int 1252 vm_deassert_irq(struct vm *vm, uint32_t irq) 1253 { 1254 return (vgic_inject_irq(vm->cookie, -1, irq, false)); 1255 } 1256 1257 int 1258 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1259 int func) 1260 { 1261 /* TODO: Should we raise an SError? */ 1262 return (vgic_inject_msi(vm->cookie, msg, addr)); 1263 } 1264 1265 static int 1266 vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1267 { 1268 struct hypctx *hypctx; 1269 int i; 1270 1271 hypctx = vcpu_get_cookie(vcpu); 1272 1273 if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) 1274 return (1); 1275 1276 vme->exitcode = VM_EXITCODE_SMCCC; 1277 vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; 1278 for (i = 0; i < nitems(vme->u.smccc_call.args); i++) 1279 vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; 1280 1281 *retu = true; 1282 return (0); 1283 } 1284 1285 static int 1286 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1287 { 1288 struct vm *vm; 1289 1290 vm = vcpu->vm; 1291 vcpu_lock(vcpu); 1292 while (1) { 1293 if (vm->suspend) 1294 break; 1295 1296 if (vgic_has_pending_irq(vcpu->cookie)) 1297 break; 1298 1299 if (vcpu_should_yield(vcpu)) 1300 break; 1301 1302 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1303 /* 1304 * XXX msleep_spin() cannot be interrupted by signals so 1305 * wake up periodically to check pending signals. 1306 */ 1307 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 1308 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1309 } 1310 vcpu_unlock(vcpu); 1311 1312 *retu = false; 1313 return (0); 1314 } 1315 1316 static int 1317 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1318 { 1319 struct vm *vm = vcpu->vm; 1320 struct vm_exit *vme; 1321 struct vm_map *map; 1322 uint64_t addr, esr; 1323 pmap_t pmap; 1324 int ftype, rv; 1325 1326 vme = &vcpu->exitinfo; 1327 1328 pmap = vmspace_pmap(vm_vmspace(vcpu->vm)); 1329 addr = vme->u.paging.gpa; 1330 esr = vme->u.paging.esr; 1331 1332 /* The page exists, but the page table needs to be updated. */ 1333 if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) 1334 return (0); 1335 1336 switch (ESR_ELx_EXCEPTION(esr)) { 1337 case EXCP_INSN_ABORT_L: 1338 case EXCP_DATA_ABORT_L: 1339 ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; 1340 break; 1341 default: 1342 panic("%s: Invalid exception (esr = %lx)", __func__, esr); 1343 } 1344 1345 map = &vm_vmspace(vm)->vm_map; 1346 rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); 1347 if (rv != KERN_SUCCESS) 1348 return (EFAULT); 1349 1350 return (0); 1351 } 1352 1353 static int 1354 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1355 { 1356 struct vm *vm = vcpu->vm; 1357 int error, i; 1358 struct thread *td; 1359 1360 error = 0; 1361 td = curthread; 1362 1363 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1364 1365 /* 1366 * Wait until all 'active_cpus' have suspended themselves. 1367 * 1368 * Since a VM may be suspended at any time including when one or 1369 * more vcpus are doing a rendezvous we need to call the rendezvous 1370 * handler while we are waiting to prevent a deadlock. 1371 */ 1372 vcpu_lock(vcpu); 1373 while (error == 0) { 1374 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1375 break; 1376 1377 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1378 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1379 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1380 if (td_ast_pending(td, TDA_SUSPEND)) { 1381 vcpu_unlock(vcpu); 1382 error = thread_check_susp(td, false); 1383 vcpu_lock(vcpu); 1384 } 1385 } 1386 vcpu_unlock(vcpu); 1387 1388 /* 1389 * Wakeup the other sleeping vcpus and return to userspace. 1390 */ 1391 for (i = 0; i < vm->maxcpus; i++) { 1392 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1393 vcpu_notify_event(vm_vcpu(vm, i)); 1394 } 1395 } 1396 1397 *retu = true; 1398 return (error); 1399 } 1400 1401 int 1402 vm_run(struct vcpu *vcpu) 1403 { 1404 struct vm *vm = vcpu->vm; 1405 struct vm_eventinfo evinfo; 1406 int error, vcpuid; 1407 struct vm_exit *vme; 1408 bool retu; 1409 pmap_t pmap; 1410 1411 vcpuid = vcpu->vcpuid; 1412 1413 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1414 return (EINVAL); 1415 1416 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1417 return (EINVAL); 1418 1419 pmap = vmspace_pmap(vm_vmspace(vm)); 1420 vme = &vcpu->exitinfo; 1421 evinfo.rptr = NULL; 1422 evinfo.sptr = &vm->suspend; 1423 evinfo.iptr = NULL; 1424 restart: 1425 critical_enter(); 1426 1427 restore_guest_fpustate(vcpu); 1428 1429 vcpu_require_state(vcpu, VCPU_RUNNING); 1430 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1431 vcpu_require_state(vcpu, VCPU_FROZEN); 1432 1433 save_guest_fpustate(vcpu); 1434 1435 critical_exit(); 1436 1437 if (error == 0) { 1438 retu = false; 1439 switch (vme->exitcode) { 1440 case VM_EXITCODE_INST_EMUL: 1441 vcpu->nextpc = vme->pc + vme->inst_length; 1442 error = vm_handle_inst_emul(vcpu, &retu); 1443 break; 1444 1445 case VM_EXITCODE_REG_EMUL: 1446 vcpu->nextpc = vme->pc + vme->inst_length; 1447 error = vm_handle_reg_emul(vcpu, &retu); 1448 break; 1449 1450 case VM_EXITCODE_HVC: 1451 /* 1452 * The HVC instruction saves the address for the 1453 * next instruction as the return address. 1454 */ 1455 vcpu->nextpc = vme->pc; 1456 /* 1457 * The PSCI call can change the exit information in the 1458 * case of suspend/reset/poweroff/cpu off/cpu on. 1459 */ 1460 error = vm_handle_smccc_call(vcpu, vme, &retu); 1461 break; 1462 1463 case VM_EXITCODE_WFI: 1464 vcpu->nextpc = vme->pc + vme->inst_length; 1465 error = vm_handle_wfi(vcpu, vme, &retu); 1466 break; 1467 1468 case VM_EXITCODE_PAGING: 1469 vcpu->nextpc = vme->pc; 1470 error = vm_handle_paging(vcpu, &retu); 1471 break; 1472 1473 case VM_EXITCODE_SUSPENDED: 1474 vcpu->nextpc = vme->pc; 1475 error = vm_handle_suspend(vcpu, &retu); 1476 break; 1477 1478 default: 1479 /* Handle in userland */ 1480 vcpu->nextpc = vme->pc; 1481 retu = true; 1482 break; 1483 } 1484 } 1485 1486 if (error == 0 && retu == false) 1487 goto restart; 1488 1489 return (error); 1490 } 1491