1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com> 6 * 7 * This software was developed by the University of Cambridge Computer 8 * Laboratory (Department of Computer Science and Technology) under Innovate 9 * UK project 105694, "Digital Security by Design (DSbD) Technology Platform 10 * Prototype". 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/cpuset.h> 37 #include <sys/kernel.h> 38 #include <sys/linker.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/module.h> 42 #include <sys/mutex.h> 43 #include <sys/pcpu.h> 44 #include <sys/proc.h> 45 #include <sys/queue.h> 46 #include <sys/rwlock.h> 47 #include <sys/sched.h> 48 #include <sys/smp.h> 49 #include <sys/sysctl.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_object.h> 53 #include <vm/vm_page.h> 54 #include <vm/pmap.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_extern.h> 57 #include <vm/vm_param.h> 58 59 #include <machine/riscvreg.h> 60 #include <machine/cpu.h> 61 #include <machine/fpe.h> 62 #include <machine/machdep.h> 63 #include <machine/pcb.h> 64 #include <machine/smp.h> 65 #include <machine/vm.h> 66 #include <machine/vmparam.h> 67 #include <machine/vmm.h> 68 #include <machine/vmm_instruction_emul.h> 69 70 #include <dev/pci/pcireg.h> 71 72 #include <dev/vmm/vmm_dev.h> 73 #include <dev/vmm/vmm_ktr.h> 74 75 #include "vmm_stat.h" 76 #include "riscv.h" 77 78 #include "vmm_aplic.h" 79 80 struct vcpu { 81 int flags; 82 enum vcpu_state state; 83 struct mtx mtx; 84 int hostcpu; /* host cpuid this vcpu last ran on */ 85 int vcpuid; 86 void *stats; 87 struct vm_exit exitinfo; 88 uint64_t nextpc; /* (x) next instruction to execute */ 89 struct vm *vm; /* (o) */ 90 void *cookie; /* (i) cpu-specific data */ 91 struct fpreg *guestfpu; /* (a,i) guest fpu state */ 92 }; 93 94 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 95 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 96 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 97 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 98 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 99 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 100 101 struct mem_seg { 102 uint64_t gpa; 103 size_t len; 104 bool wired; 105 bool sysmem; 106 vm_object_t object; 107 }; 108 #define VM_MAX_MEMSEGS 3 109 110 struct mem_map { 111 vm_paddr_t gpa; 112 size_t len; 113 vm_ooffset_t segoff; 114 int segid; 115 int prot; 116 int flags; 117 }; 118 #define VM_MAX_MEMMAPS 4 119 120 struct vmm_mmio_region { 121 uint64_t start; 122 uint64_t end; 123 mem_region_read_t read; 124 mem_region_write_t write; 125 }; 126 #define VM_MAX_MMIO_REGIONS 4 127 128 /* 129 * Initialization: 130 * (o) initialized the first time the VM is created 131 * (i) initialized when VM is created and when it is reinitialized 132 * (x) initialized before use 133 */ 134 struct vm { 135 void *cookie; /* (i) cpu-specific data */ 136 volatile cpuset_t active_cpus; /* (i) active vcpus */ 137 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ 138 int suspend; /* (i) stop VM execution */ 139 bool dying; /* (o) is dying */ 140 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 141 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 142 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 143 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 144 struct vmspace *vmspace; /* (o) guest's address space */ 145 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 146 struct vcpu **vcpu; /* (i) guest vcpus */ 147 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 148 /* (o) guest MMIO regions */ 149 /* The following describe the vm cpu topology */ 150 uint16_t sockets; /* (o) num of sockets */ 151 uint16_t cores; /* (o) num of cores/socket */ 152 uint16_t threads; /* (o) num of threads/core */ 153 uint16_t maxcpus; /* (o) max pluggable cpus */ 154 struct sx mem_segs_lock; /* (o) */ 155 struct sx vcpus_init_lock; /* (o) */ 156 }; 157 158 static bool vmm_initialized = false; 159 160 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 161 162 /* statistics */ 163 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 164 165 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 166 167 static int vmm_ipinum; 168 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 169 "IPI vector used for vcpu notifications"); 170 171 u_int vm_maxcpu; 172 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 173 &vm_maxcpu, 0, "Maximum number of vCPUs"); 174 175 static void vm_free_memmap(struct vm *vm, int ident); 176 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 177 static void vcpu_notify_event_locked(struct vcpu *vcpu); 178 179 /* global statistics */ 180 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 181 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 182 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 183 184 /* 185 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 186 * is a safe value for now. 187 */ 188 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 189 190 static void 191 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 192 { 193 vmmops_vcpu_cleanup(vcpu->cookie); 194 vcpu->cookie = NULL; 195 if (destroy) { 196 vmm_stat_free(vcpu->stats); 197 fpu_save_area_free(vcpu->guestfpu); 198 vcpu_lock_destroy(vcpu); 199 } 200 } 201 202 static struct vcpu * 203 vcpu_alloc(struct vm *vm, int vcpu_id) 204 { 205 struct vcpu *vcpu; 206 207 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 208 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 209 210 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 211 vcpu_lock_init(vcpu); 212 vcpu->state = VCPU_IDLE; 213 vcpu->hostcpu = NOCPU; 214 vcpu->vcpuid = vcpu_id; 215 vcpu->vm = vm; 216 vcpu->guestfpu = fpu_save_area_alloc(); 217 vcpu->stats = vmm_stat_alloc(); 218 return (vcpu); 219 } 220 221 static void 222 vcpu_init(struct vcpu *vcpu) 223 { 224 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 225 MPASS(vcpu->cookie != NULL); 226 fpu_save_area_reset(vcpu->guestfpu); 227 vmm_stat_init(vcpu->stats); 228 } 229 230 struct vm_exit * 231 vm_exitinfo(struct vcpu *vcpu) 232 { 233 return (&vcpu->exitinfo); 234 } 235 236 static int 237 vmm_init(void) 238 { 239 240 vm_maxcpu = mp_ncpus; 241 242 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 243 244 if (vm_maxcpu > VM_MAXCPU) { 245 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 246 vm_maxcpu = VM_MAXCPU; 247 } 248 249 if (vm_maxcpu == 0) 250 vm_maxcpu = 1; 251 252 return (vmmops_modinit()); 253 } 254 255 static int 256 vmm_handler(module_t mod, int what, void *arg) 257 { 258 int error; 259 260 switch (what) { 261 case MOD_LOAD: 262 /* TODO: check if has_hyp here? */ 263 error = vmmdev_init(); 264 if (error != 0) 265 break; 266 error = vmm_init(); 267 if (error == 0) 268 vmm_initialized = true; 269 break; 270 case MOD_UNLOAD: 271 /* TODO: check if has_hyp here? */ 272 error = vmmdev_cleanup(); 273 if (error == 0 && vmm_initialized) { 274 error = vmmops_modcleanup(); 275 if (error) 276 vmm_initialized = false; 277 } 278 break; 279 default: 280 error = 0; 281 break; 282 } 283 return (error); 284 } 285 286 static moduledata_t vmm_kmod = { 287 "vmm", 288 vmm_handler, 289 NULL 290 }; 291 292 /* 293 * vmm initialization has the following dependencies: 294 * 295 * - vmm device initialization requires an initialized devfs. 296 */ 297 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_DEVFS + 1, SI_ORDER_ANY); 298 MODULE_VERSION(vmm, 1); 299 300 static void 301 vm_init(struct vm *vm, bool create) 302 { 303 int i; 304 305 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 306 MPASS(vm->cookie != NULL); 307 308 CPU_ZERO(&vm->active_cpus); 309 CPU_ZERO(&vm->debug_cpus); 310 311 vm->suspend = 0; 312 CPU_ZERO(&vm->suspended_cpus); 313 314 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 315 316 if (!create) { 317 for (i = 0; i < vm->maxcpus; i++) { 318 if (vm->vcpu[i] != NULL) 319 vcpu_init(vm->vcpu[i]); 320 } 321 } 322 } 323 324 void 325 vm_disable_vcpu_creation(struct vm *vm) 326 { 327 sx_xlock(&vm->vcpus_init_lock); 328 vm->dying = true; 329 sx_xunlock(&vm->vcpus_init_lock); 330 } 331 332 struct vcpu * 333 vm_alloc_vcpu(struct vm *vm, int vcpuid) 334 { 335 struct vcpu *vcpu; 336 337 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 338 return (NULL); 339 340 /* Some interrupt controllers may have a CPU limit */ 341 if (vcpuid >= aplic_max_cpu_count(vm->cookie)) 342 return (NULL); 343 344 vcpu = (struct vcpu *) 345 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 346 if (__predict_true(vcpu != NULL)) 347 return (vcpu); 348 349 sx_xlock(&vm->vcpus_init_lock); 350 vcpu = vm->vcpu[vcpuid]; 351 if (vcpu == NULL && !vm->dying) { 352 vcpu = vcpu_alloc(vm, vcpuid); 353 vcpu_init(vcpu); 354 355 /* 356 * Ensure vCPU is fully created before updating pointer 357 * to permit unlocked reads above. 358 */ 359 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 360 (uintptr_t)vcpu); 361 } 362 sx_xunlock(&vm->vcpus_init_lock); 363 return (vcpu); 364 } 365 366 void 367 vm_slock_vcpus(struct vm *vm) 368 { 369 sx_slock(&vm->vcpus_init_lock); 370 } 371 372 void 373 vm_unlock_vcpus(struct vm *vm) 374 { 375 sx_unlock(&vm->vcpus_init_lock); 376 } 377 378 int 379 vm_create(const char *name, struct vm **retvm) 380 { 381 struct vm *vm; 382 struct vmspace *vmspace; 383 384 /* 385 * If vmm.ko could not be successfully initialized then don't attempt 386 * to create the virtual machine. 387 */ 388 if (!vmm_initialized) 389 return (ENXIO); 390 391 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 392 return (EINVAL); 393 394 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 395 if (vmspace == NULL) 396 return (ENOMEM); 397 398 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 399 strcpy(vm->name, name); 400 vm->vmspace = vmspace; 401 sx_init(&vm->mem_segs_lock, "vm mem_segs"); 402 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 403 404 vm->sockets = 1; 405 vm->cores = 1; /* XXX backwards compatibility */ 406 vm->threads = 1; /* XXX backwards compatibility */ 407 vm->maxcpus = vm_maxcpu; 408 409 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 410 M_WAITOK | M_ZERO); 411 412 vm_init(vm, true); 413 414 *retvm = vm; 415 return (0); 416 } 417 418 void 419 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 420 uint16_t *threads, uint16_t *maxcpus) 421 { 422 *sockets = vm->sockets; 423 *cores = vm->cores; 424 *threads = vm->threads; 425 *maxcpus = vm->maxcpus; 426 } 427 428 uint16_t 429 vm_get_maxcpus(struct vm *vm) 430 { 431 return (vm->maxcpus); 432 } 433 434 int 435 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 436 uint16_t threads, uint16_t maxcpus) 437 { 438 /* Ignore maxcpus. */ 439 if ((sockets * cores * threads) > vm->maxcpus) 440 return (EINVAL); 441 vm->sockets = sockets; 442 vm->cores = cores; 443 vm->threads = threads; 444 return(0); 445 } 446 447 static void 448 vm_cleanup(struct vm *vm, bool destroy) 449 { 450 struct mem_map *mm; 451 int i; 452 453 aplic_detach_from_vm(vm->cookie); 454 455 for (i = 0; i < vm->maxcpus; i++) { 456 if (vm->vcpu[i] != NULL) 457 vcpu_cleanup(vm->vcpu[i], destroy); 458 } 459 460 vmmops_cleanup(vm->cookie); 461 462 /* 463 * System memory is removed from the guest address space only when 464 * the VM is destroyed. This is because the mapping remains the same 465 * across VM reset. 466 * 467 * Device memory can be relocated by the guest (e.g. using PCI BARs) 468 * so those mappings are removed on a VM reset. 469 */ 470 if (!destroy) { 471 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 472 mm = &vm->mem_maps[i]; 473 if (destroy || !sysmem_mapping(vm, mm)) 474 vm_free_memmap(vm, i); 475 } 476 } 477 478 if (destroy) { 479 for (i = 0; i < VM_MAX_MEMSEGS; i++) 480 vm_free_memseg(vm, i); 481 482 vmmops_vmspace_free(vm->vmspace); 483 vm->vmspace = NULL; 484 485 for (i = 0; i < vm->maxcpus; i++) 486 free(vm->vcpu[i], M_VMM); 487 free(vm->vcpu, M_VMM); 488 sx_destroy(&vm->vcpus_init_lock); 489 sx_destroy(&vm->mem_segs_lock); 490 } 491 } 492 493 void 494 vm_destroy(struct vm *vm) 495 { 496 497 vm_cleanup(vm, true); 498 499 free(vm, M_VMM); 500 } 501 502 int 503 vm_reinit(struct vm *vm) 504 { 505 int error; 506 507 /* 508 * A virtual machine can be reset only if all vcpus are suspended. 509 */ 510 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 511 vm_cleanup(vm, false); 512 vm_init(vm, false); 513 error = 0; 514 } else { 515 error = EBUSY; 516 } 517 518 return (error); 519 } 520 521 const char * 522 vm_name(struct vm *vm) 523 { 524 return (vm->name); 525 } 526 527 void 528 vm_slock_memsegs(struct vm *vm) 529 { 530 sx_slock(&vm->mem_segs_lock); 531 } 532 533 void 534 vm_xlock_memsegs(struct vm *vm) 535 { 536 sx_xlock(&vm->mem_segs_lock); 537 } 538 539 void 540 vm_unlock_memsegs(struct vm *vm) 541 { 542 sx_unlock(&vm->mem_segs_lock); 543 } 544 545 /* 546 * Return 'true' if 'gpa' is allocated in the guest address space. 547 * 548 * This function is called in the context of a running vcpu which acts as 549 * an implicit lock on 'vm->mem_maps[]'. 550 */ 551 bool 552 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 553 { 554 struct vm *vm = vcpu->vm; 555 struct mem_map *mm; 556 int i; 557 558 #ifdef INVARIANTS 559 int hostcpu, state; 560 state = vcpu_get_state(vcpu, &hostcpu); 561 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 562 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 563 #endif 564 565 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 566 mm = &vm->mem_maps[i]; 567 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 568 return (true); /* 'gpa' is sysmem or devmem */ 569 } 570 571 return (false); 572 } 573 574 int 575 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 576 { 577 struct mem_seg *seg; 578 vm_object_t obj; 579 580 sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 581 582 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 583 return (EINVAL); 584 585 if (len == 0 || (len & PAGE_MASK)) 586 return (EINVAL); 587 588 seg = &vm->mem_segs[ident]; 589 if (seg->object != NULL) { 590 if (seg->len == len && seg->sysmem == sysmem) 591 return (EEXIST); 592 else 593 return (EINVAL); 594 } 595 596 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 597 if (obj == NULL) 598 return (ENOMEM); 599 600 seg->len = len; 601 seg->object = obj; 602 seg->sysmem = sysmem; 603 return (0); 604 } 605 606 int 607 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 608 vm_object_t *objptr) 609 { 610 struct mem_seg *seg; 611 612 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 613 614 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 615 return (EINVAL); 616 617 seg = &vm->mem_segs[ident]; 618 if (len) 619 *len = seg->len; 620 if (sysmem) 621 *sysmem = seg->sysmem; 622 if (objptr) 623 *objptr = seg->object; 624 return (0); 625 } 626 627 void 628 vm_free_memseg(struct vm *vm, int ident) 629 { 630 struct mem_seg *seg; 631 632 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 633 ("%s: invalid memseg ident %d", __func__, ident)); 634 635 seg = &vm->mem_segs[ident]; 636 if (seg->object != NULL) { 637 vm_object_deallocate(seg->object); 638 bzero(seg, sizeof(struct mem_seg)); 639 } 640 } 641 642 int 643 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 644 size_t len, int prot, int flags) 645 { 646 struct mem_seg *seg; 647 struct mem_map *m, *map; 648 vm_ooffset_t last; 649 int i, error; 650 651 dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len); 652 653 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 654 return (EINVAL); 655 656 if (flags & ~VM_MEMMAP_F_WIRED) 657 return (EINVAL); 658 659 if (segid < 0 || segid >= VM_MAX_MEMSEGS) 660 return (EINVAL); 661 662 seg = &vm->mem_segs[segid]; 663 if (seg->object == NULL) 664 return (EINVAL); 665 666 last = first + len; 667 if (first < 0 || first >= last || last > seg->len) 668 return (EINVAL); 669 670 if ((gpa | first | last) & PAGE_MASK) 671 return (EINVAL); 672 673 map = NULL; 674 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 675 m = &vm->mem_maps[i]; 676 if (m->len == 0) { 677 map = m; 678 break; 679 } 680 } 681 682 if (map == NULL) 683 return (ENOSPC); 684 685 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 686 len, 0, VMFS_NO_SPACE, prot, prot, 0); 687 if (error != KERN_SUCCESS) 688 return (EFAULT); 689 690 vm_object_reference(seg->object); 691 692 if (flags & VM_MEMMAP_F_WIRED) { 693 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 694 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 695 if (error != KERN_SUCCESS) { 696 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 697 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 698 EFAULT); 699 } 700 } 701 702 map->gpa = gpa; 703 map->len = len; 704 map->segoff = first; 705 map->segid = segid; 706 map->prot = prot; 707 map->flags = flags; 708 return (0); 709 } 710 711 int 712 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 713 { 714 struct mem_map *m; 715 int i; 716 717 dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len); 718 719 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 720 m = &vm->mem_maps[i]; 721 if (m->gpa == gpa && m->len == len) { 722 vm_free_memmap(vm, i); 723 return (0); 724 } 725 } 726 727 return (EINVAL); 728 } 729 730 int 731 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 732 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 733 { 734 struct mem_map *mm, *mmnext; 735 int i; 736 737 mmnext = NULL; 738 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 739 mm = &vm->mem_maps[i]; 740 if (mm->len == 0 || mm->gpa < *gpa) 741 continue; 742 if (mmnext == NULL || mm->gpa < mmnext->gpa) 743 mmnext = mm; 744 } 745 746 if (mmnext != NULL) { 747 *gpa = mmnext->gpa; 748 if (segid) 749 *segid = mmnext->segid; 750 if (segoff) 751 *segoff = mmnext->segoff; 752 if (len) 753 *len = mmnext->len; 754 if (prot) 755 *prot = mmnext->prot; 756 if (flags) 757 *flags = mmnext->flags; 758 return (0); 759 } else { 760 return (ENOENT); 761 } 762 } 763 764 static void 765 vm_free_memmap(struct vm *vm, int ident) 766 { 767 struct mem_map *mm; 768 int error __diagused; 769 770 mm = &vm->mem_maps[ident]; 771 if (mm->len) { 772 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 773 mm->gpa + mm->len); 774 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 775 __func__, error)); 776 bzero(mm, sizeof(struct mem_map)); 777 } 778 } 779 780 static __inline bool 781 sysmem_mapping(struct vm *vm, struct mem_map *mm) 782 { 783 784 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 785 return (true); 786 else 787 return (false); 788 } 789 790 vm_paddr_t 791 vmm_sysmem_maxaddr(struct vm *vm) 792 { 793 struct mem_map *mm; 794 vm_paddr_t maxaddr; 795 int i; 796 797 maxaddr = 0; 798 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 799 mm = &vm->mem_maps[i]; 800 if (sysmem_mapping(vm, mm)) { 801 if (maxaddr < mm->gpa + mm->len) 802 maxaddr = mm->gpa + mm->len; 803 } 804 } 805 return (maxaddr); 806 } 807 808 int 809 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 810 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 811 { 812 int error; 813 814 error = vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 815 816 return (error); 817 } 818 819 void 820 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 821 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 822 { 823 int i; 824 825 for (i = 0; i < nitems(vm->mmio_region); i++) { 826 if (vm->mmio_region[i].start == 0 && 827 vm->mmio_region[i].end == 0) { 828 vm->mmio_region[i].start = start; 829 vm->mmio_region[i].end = start + size; 830 vm->mmio_region[i].read = mmio_read; 831 vm->mmio_region[i].write = mmio_write; 832 return; 833 } 834 } 835 836 panic("%s: No free MMIO region", __func__); 837 } 838 839 void 840 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 841 { 842 int i; 843 844 for (i = 0; i < nitems(vm->mmio_region); i++) { 845 if (vm->mmio_region[i].start == start && 846 vm->mmio_region[i].end == start + size) { 847 memset(&vm->mmio_region[i], 0, 848 sizeof(vm->mmio_region[i])); 849 return; 850 } 851 } 852 853 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 854 start + size); 855 } 856 857 static int 858 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 859 { 860 struct vm *vm; 861 struct vm_exit *vme; 862 struct vie *vie; 863 struct hyp *hyp; 864 uint64_t fault_ipa; 865 struct vm_guest_paging *paging; 866 struct vmm_mmio_region *vmr; 867 int error, i; 868 869 vm = vcpu->vm; 870 hyp = vm->cookie; 871 if (!hyp->aplic_attached) 872 goto out_user; 873 874 vme = &vcpu->exitinfo; 875 vie = &vme->u.inst_emul.vie; 876 paging = &vme->u.inst_emul.paging; 877 878 fault_ipa = vme->u.inst_emul.gpa; 879 880 vmr = NULL; 881 for (i = 0; i < nitems(vm->mmio_region); i++) { 882 if (vm->mmio_region[i].start <= fault_ipa && 883 vm->mmio_region[i].end > fault_ipa) { 884 vmr = &vm->mmio_region[i]; 885 break; 886 } 887 } 888 if (vmr == NULL) 889 goto out_user; 890 891 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 892 vmr->read, vmr->write, retu); 893 return (error); 894 895 out_user: 896 *retu = true; 897 return (0); 898 } 899 900 int 901 vm_suspend(struct vm *vm, enum vm_suspend_how how) 902 { 903 int i; 904 905 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 906 return (EINVAL); 907 908 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 909 VM_CTR2(vm, "virtual machine already suspended %d/%d", 910 vm->suspend, how); 911 return (EALREADY); 912 } 913 914 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 915 916 /* 917 * Notify all active vcpus that they are now suspended. 918 */ 919 for (i = 0; i < vm->maxcpus; i++) { 920 if (CPU_ISSET(i, &vm->active_cpus)) 921 vcpu_notify_event(vm_vcpu(vm, i)); 922 } 923 924 return (0); 925 } 926 927 void 928 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 929 { 930 struct vm *vm = vcpu->vm; 931 struct vm_exit *vmexit; 932 933 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 934 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 935 936 vmexit = vm_exitinfo(vcpu); 937 vmexit->pc = pc; 938 vmexit->inst_length = 4; 939 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 940 vmexit->u.suspended.how = vm->suspend; 941 } 942 943 void 944 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 945 { 946 struct vm_exit *vmexit; 947 948 vmexit = vm_exitinfo(vcpu); 949 vmexit->pc = pc; 950 vmexit->inst_length = 4; 951 vmexit->exitcode = VM_EXITCODE_DEBUG; 952 } 953 954 int 955 vm_activate_cpu(struct vcpu *vcpu) 956 { 957 struct vm *vm = vcpu->vm; 958 959 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 960 return (EBUSY); 961 962 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 963 return (0); 964 965 } 966 967 int 968 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 969 { 970 if (vcpu == NULL) { 971 vm->debug_cpus = vm->active_cpus; 972 for (int i = 0; i < vm->maxcpus; i++) { 973 if (CPU_ISSET(i, &vm->active_cpus)) 974 vcpu_notify_event(vm_vcpu(vm, i)); 975 } 976 } else { 977 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 978 return (EINVAL); 979 980 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 981 vcpu_notify_event(vcpu); 982 } 983 return (0); 984 } 985 986 int 987 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 988 { 989 990 if (vcpu == NULL) { 991 CPU_ZERO(&vm->debug_cpus); 992 } else { 993 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 994 return (EINVAL); 995 996 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 997 } 998 return (0); 999 } 1000 1001 int 1002 vcpu_debugged(struct vcpu *vcpu) 1003 { 1004 1005 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1006 } 1007 1008 cpuset_t 1009 vm_active_cpus(struct vm *vm) 1010 { 1011 1012 return (vm->active_cpus); 1013 } 1014 1015 cpuset_t 1016 vm_debug_cpus(struct vm *vm) 1017 { 1018 1019 return (vm->debug_cpus); 1020 } 1021 1022 cpuset_t 1023 vm_suspended_cpus(struct vm *vm) 1024 { 1025 1026 return (vm->suspended_cpus); 1027 } 1028 1029 1030 void * 1031 vcpu_stats(struct vcpu *vcpu) 1032 { 1033 1034 return (vcpu->stats); 1035 } 1036 1037 /* 1038 * This function is called to ensure that a vcpu "sees" a pending event 1039 * as soon as possible: 1040 * - If the vcpu thread is sleeping then it is woken up. 1041 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1042 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1043 */ 1044 static void 1045 vcpu_notify_event_locked(struct vcpu *vcpu) 1046 { 1047 int hostcpu; 1048 1049 hostcpu = vcpu->hostcpu; 1050 if (vcpu->state == VCPU_RUNNING) { 1051 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1052 if (hostcpu != curcpu) { 1053 ipi_cpu(hostcpu, vmm_ipinum); 1054 } else { 1055 /* 1056 * If the 'vcpu' is running on 'curcpu' then it must 1057 * be sending a notification to itself (e.g. SELF_IPI). 1058 * The pending event will be picked up when the vcpu 1059 * transitions back to guest context. 1060 */ 1061 } 1062 } else { 1063 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1064 "with hostcpu %d", vcpu->state, hostcpu)); 1065 if (vcpu->state == VCPU_SLEEPING) 1066 wakeup_one(vcpu); 1067 } 1068 } 1069 1070 void 1071 vcpu_notify_event(struct vcpu *vcpu) 1072 { 1073 vcpu_lock(vcpu); 1074 vcpu_notify_event_locked(vcpu); 1075 vcpu_unlock(vcpu); 1076 } 1077 1078 static void 1079 restore_guest_fpustate(struct vcpu *vcpu) 1080 { 1081 1082 /* Flush host state to the pcb. */ 1083 fpe_state_save(curthread); 1084 1085 /* Ensure the VFP state will be re-loaded when exiting the guest. */ 1086 PCPU_SET(fpcurthread, NULL); 1087 1088 /* restore guest FPU state */ 1089 fpe_enable(); 1090 fpe_restore(vcpu->guestfpu); 1091 1092 /* 1093 * The FPU is now "dirty" with the guest's state so turn on emulation 1094 * to trap any access to the FPU by the host. 1095 */ 1096 fpe_disable(); 1097 } 1098 1099 static void 1100 save_guest_fpustate(struct vcpu *vcpu) 1101 { 1102 1103 /* Save guest FPE state. */ 1104 fpe_enable(); 1105 fpe_store(vcpu->guestfpu); 1106 fpe_disable(); 1107 1108 KASSERT(PCPU_GET(fpcurthread) == NULL, 1109 ("%s: fpcurthread set with guest registers", __func__)); 1110 } 1111 1112 static int 1113 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1114 bool from_idle) 1115 { 1116 int error; 1117 1118 vcpu_assert_locked(vcpu); 1119 1120 /* 1121 * State transitions from the vmmdev_ioctl() must always begin from 1122 * the VCPU_IDLE state. This guarantees that there is only a single 1123 * ioctl() operating on a vcpu at any point. 1124 */ 1125 if (from_idle) { 1126 while (vcpu->state != VCPU_IDLE) { 1127 vcpu_notify_event_locked(vcpu); 1128 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", 1129 hz / 1000); 1130 } 1131 } else { 1132 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1133 "vcpu idle state")); 1134 } 1135 1136 if (vcpu->state == VCPU_RUNNING) { 1137 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1138 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1139 } else { 1140 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1141 "vcpu that is not running", vcpu->hostcpu)); 1142 } 1143 1144 /* 1145 * The following state transitions are allowed: 1146 * IDLE -> FROZEN -> IDLE 1147 * FROZEN -> RUNNING -> FROZEN 1148 * FROZEN -> SLEEPING -> FROZEN 1149 */ 1150 switch (vcpu->state) { 1151 case VCPU_IDLE: 1152 case VCPU_RUNNING: 1153 case VCPU_SLEEPING: 1154 error = (newstate != VCPU_FROZEN); 1155 break; 1156 case VCPU_FROZEN: 1157 error = (newstate == VCPU_FROZEN); 1158 break; 1159 default: 1160 error = 1; 1161 break; 1162 } 1163 1164 if (error) 1165 return (EBUSY); 1166 1167 vcpu->state = newstate; 1168 if (newstate == VCPU_RUNNING) 1169 vcpu->hostcpu = curcpu; 1170 else 1171 vcpu->hostcpu = NOCPU; 1172 1173 if (newstate == VCPU_IDLE) 1174 wakeup(&vcpu->state); 1175 1176 return (0); 1177 } 1178 1179 static void 1180 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1181 { 1182 int error; 1183 1184 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1185 panic("Error %d setting state to %d\n", error, newstate); 1186 } 1187 1188 static void 1189 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1190 { 1191 int error; 1192 1193 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1194 panic("Error %d setting state to %d", error, newstate); 1195 } 1196 1197 int 1198 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1199 { 1200 1201 if (type < 0 || type >= VM_CAP_MAX) 1202 return (EINVAL); 1203 1204 return (vmmops_getcap(vcpu->cookie, type, retval)); 1205 } 1206 1207 int 1208 vm_set_capability(struct vcpu *vcpu, int type, int val) 1209 { 1210 1211 if (type < 0 || type >= VM_CAP_MAX) 1212 return (EINVAL); 1213 1214 return (vmmops_setcap(vcpu->cookie, type, val)); 1215 } 1216 1217 struct vm * 1218 vcpu_vm(struct vcpu *vcpu) 1219 { 1220 1221 return (vcpu->vm); 1222 } 1223 1224 int 1225 vcpu_vcpuid(struct vcpu *vcpu) 1226 { 1227 1228 return (vcpu->vcpuid); 1229 } 1230 1231 void * 1232 vcpu_get_cookie(struct vcpu *vcpu) 1233 { 1234 1235 return (vcpu->cookie); 1236 } 1237 1238 struct vcpu * 1239 vm_vcpu(struct vm *vm, int vcpuid) 1240 { 1241 1242 return (vm->vcpu[vcpuid]); 1243 } 1244 1245 int 1246 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1247 { 1248 int error; 1249 1250 vcpu_lock(vcpu); 1251 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1252 vcpu_unlock(vcpu); 1253 1254 return (error); 1255 } 1256 1257 enum vcpu_state 1258 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1259 { 1260 enum vcpu_state state; 1261 1262 vcpu_lock(vcpu); 1263 state = vcpu->state; 1264 if (hostcpu != NULL) 1265 *hostcpu = vcpu->hostcpu; 1266 vcpu_unlock(vcpu); 1267 1268 return (state); 1269 } 1270 1271 static void * 1272 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1273 void **cookie) 1274 { 1275 int i, count, pageoff; 1276 struct mem_map *mm; 1277 vm_page_t m; 1278 1279 pageoff = gpa & PAGE_MASK; 1280 if (len > PAGE_SIZE - pageoff) 1281 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1282 1283 count = 0; 1284 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1285 mm = &vm->mem_maps[i]; 1286 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1287 gpa < mm->gpa + mm->len) { 1288 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1289 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1290 break; 1291 } 1292 } 1293 1294 if (count == 1) { 1295 *cookie = m; 1296 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1297 } else { 1298 *cookie = NULL; 1299 return (NULL); 1300 } 1301 } 1302 1303 void * 1304 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1305 void **cookie) 1306 { 1307 #ifdef INVARIANTS 1308 /* 1309 * The current vcpu should be frozen to ensure 'vm_memmap[]' 1310 * stability. 1311 */ 1312 int state = vcpu_get_state(vcpu, NULL); 1313 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1314 __func__, state)); 1315 #endif 1316 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1317 } 1318 1319 void * 1320 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1321 void **cookie) 1322 { 1323 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1324 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1325 } 1326 1327 void 1328 vm_gpa_release(void *cookie) 1329 { 1330 vm_page_t m = cookie; 1331 1332 vm_page_unwire(m, PQ_ACTIVE); 1333 } 1334 1335 int 1336 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1337 { 1338 1339 if (reg >= VM_REG_LAST) 1340 return (EINVAL); 1341 1342 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1343 } 1344 1345 int 1346 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1347 { 1348 int error; 1349 1350 if (reg >= VM_REG_LAST) 1351 return (EINVAL); 1352 error = vmmops_setreg(vcpu->cookie, reg, val); 1353 if (error || reg != VM_REG_GUEST_SEPC) 1354 return (error); 1355 1356 vcpu->nextpc = val; 1357 1358 return (0); 1359 } 1360 1361 void * 1362 vm_get_cookie(struct vm *vm) 1363 { 1364 1365 return (vm->cookie); 1366 } 1367 1368 int 1369 vm_inject_exception(struct vcpu *vcpu, uint64_t scause) 1370 { 1371 1372 return (vmmops_exception(vcpu->cookie, scause)); 1373 } 1374 1375 int 1376 vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) 1377 { 1378 1379 return (aplic_attach_to_vm(vm->cookie, descr)); 1380 } 1381 1382 int 1383 vm_assert_irq(struct vm *vm, uint32_t irq) 1384 { 1385 1386 return (aplic_inject_irq(vm->cookie, -1, irq, true)); 1387 } 1388 1389 int 1390 vm_deassert_irq(struct vm *vm, uint32_t irq) 1391 { 1392 1393 return (aplic_inject_irq(vm->cookie, -1, irq, false)); 1394 } 1395 1396 int 1397 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1398 int func) 1399 { 1400 1401 return (aplic_inject_msi(vm->cookie, msg, addr)); 1402 } 1403 1404 static int 1405 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1406 { 1407 1408 vcpu_lock(vcpu); 1409 1410 while (1) { 1411 if (aplic_check_pending(vcpu->cookie)) 1412 break; 1413 1414 if (riscv_check_ipi(vcpu->cookie, false)) 1415 break; 1416 1417 if (vcpu_should_yield(vcpu)) 1418 break; 1419 1420 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1421 /* 1422 * XXX msleep_spin() cannot be interrupted by signals so 1423 * wake up periodically to check pending signals. 1424 */ 1425 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000); 1426 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1427 } 1428 vcpu_unlock(vcpu); 1429 1430 *retu = false; 1431 1432 return (0); 1433 } 1434 1435 static int 1436 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1437 { 1438 struct vm *vm; 1439 struct vm_exit *vme; 1440 struct vm_map *map; 1441 uint64_t addr; 1442 pmap_t pmap; 1443 int ftype, rv; 1444 1445 vm = vcpu->vm; 1446 vme = &vcpu->exitinfo; 1447 1448 pmap = vmspace_pmap(vm->vmspace); 1449 addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); 1450 1451 dprintf("%s: %lx\n", __func__, addr); 1452 1453 switch (vme->scause) { 1454 case SCAUSE_STORE_GUEST_PAGE_FAULT: 1455 ftype = VM_PROT_WRITE; 1456 break; 1457 case SCAUSE_FETCH_GUEST_PAGE_FAULT: 1458 ftype = VM_PROT_EXECUTE; 1459 break; 1460 case SCAUSE_LOAD_GUEST_PAGE_FAULT: 1461 ftype = VM_PROT_READ; 1462 break; 1463 default: 1464 panic("unknown page trap: %lu", vme->scause); 1465 } 1466 1467 /* The page exists, but the page table needs to be updated. */ 1468 if (pmap_fault(pmap, addr, ftype)) 1469 return (0); 1470 1471 map = &vm->vmspace->vm_map; 1472 rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); 1473 if (rv != KERN_SUCCESS) { 1474 printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", 1475 __func__, addr, ftype, rv); 1476 return (EFAULT); 1477 } 1478 1479 return (0); 1480 } 1481 1482 static int 1483 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1484 { 1485 struct vm *vm = vcpu->vm; 1486 int error, i; 1487 struct thread *td; 1488 1489 error = 0; 1490 td = curthread; 1491 1492 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1493 1494 /* 1495 * Wait until all 'active_cpus' have suspended themselves. 1496 * 1497 * Since a VM may be suspended at any time including when one or 1498 * more vcpus are doing a rendezvous we need to call the rendezvous 1499 * handler while we are waiting to prevent a deadlock. 1500 */ 1501 vcpu_lock(vcpu); 1502 while (error == 0) { 1503 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1504 break; 1505 1506 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1507 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1508 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1509 if (td_ast_pending(td, TDA_SUSPEND)) { 1510 vcpu_unlock(vcpu); 1511 error = thread_check_susp(td, false); 1512 vcpu_lock(vcpu); 1513 } 1514 } 1515 vcpu_unlock(vcpu); 1516 1517 /* 1518 * Wakeup the other sleeping vcpus and return to userspace. 1519 */ 1520 for (i = 0; i < vm->maxcpus; i++) { 1521 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1522 vcpu_notify_event(vm_vcpu(vm, i)); 1523 } 1524 } 1525 1526 *retu = true; 1527 return (error); 1528 } 1529 1530 int 1531 vm_run(struct vcpu *vcpu) 1532 { 1533 struct vm_eventinfo evinfo; 1534 struct vm_exit *vme; 1535 struct vm *vm; 1536 pmap_t pmap; 1537 int error; 1538 int vcpuid; 1539 bool retu; 1540 1541 vm = vcpu->vm; 1542 1543 dprintf("%s\n", __func__); 1544 1545 vcpuid = vcpu->vcpuid; 1546 1547 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1548 return (EINVAL); 1549 1550 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1551 return (EINVAL); 1552 1553 pmap = vmspace_pmap(vm->vmspace); 1554 vme = &vcpu->exitinfo; 1555 evinfo.rptr = NULL; 1556 evinfo.sptr = &vm->suspend; 1557 evinfo.iptr = NULL; 1558 restart: 1559 critical_enter(); 1560 1561 restore_guest_fpustate(vcpu); 1562 1563 vcpu_require_state(vcpu, VCPU_RUNNING); 1564 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1565 vcpu_require_state(vcpu, VCPU_FROZEN); 1566 1567 save_guest_fpustate(vcpu); 1568 1569 critical_exit(); 1570 1571 if (error == 0) { 1572 retu = false; 1573 switch (vme->exitcode) { 1574 case VM_EXITCODE_INST_EMUL: 1575 vcpu->nextpc = vme->pc + vme->inst_length; 1576 error = vm_handle_inst_emul(vcpu, &retu); 1577 break; 1578 case VM_EXITCODE_WFI: 1579 vcpu->nextpc = vme->pc + vme->inst_length; 1580 error = vm_handle_wfi(vcpu, vme, &retu); 1581 break; 1582 case VM_EXITCODE_ECALL: 1583 /* Handle in userland. */ 1584 vcpu->nextpc = vme->pc + vme->inst_length; 1585 retu = true; 1586 break; 1587 case VM_EXITCODE_PAGING: 1588 vcpu->nextpc = vme->pc; 1589 error = vm_handle_paging(vcpu, &retu); 1590 break; 1591 case VM_EXITCODE_BOGUS: 1592 vcpu->nextpc = vme->pc; 1593 retu = false; 1594 error = 0; 1595 break; 1596 case VM_EXITCODE_SUSPENDED: 1597 vcpu->nextpc = vme->pc; 1598 error = vm_handle_suspend(vcpu, &retu); 1599 break; 1600 default: 1601 /* Handle in userland. */ 1602 vcpu->nextpc = vme->pc; 1603 retu = true; 1604 break; 1605 } 1606 } 1607 1608 if (error == 0 && retu == false) 1609 goto restart; 1610 1611 return (error); 1612 } 1613