1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com> 6 * 7 * This software was developed by the University of Cambridge Computer 8 * Laboratory (Department of Computer Science and Technology) under Innovate 9 * UK project 105694, "Digital Security by Design (DSbD) Technology Platform 10 * Prototype". 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/cpuset.h> 37 #include <sys/kernel.h> 38 #include <sys/linker.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/module.h> 42 #include <sys/mutex.h> 43 #include <sys/pcpu.h> 44 #include <sys/proc.h> 45 #include <sys/queue.h> 46 #include <sys/rwlock.h> 47 #include <sys/sched.h> 48 #include <sys/smp.h> 49 #include <sys/sysctl.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_object.h> 53 #include <vm/vm_page.h> 54 #include <vm/pmap.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_extern.h> 57 #include <vm/vm_param.h> 58 59 #include <machine/riscvreg.h> 60 #include <machine/cpu.h> 61 #include <machine/fpe.h> 62 #include <machine/machdep.h> 63 #include <machine/pcb.h> 64 #include <machine/smp.h> 65 #include <machine/vm.h> 66 #include <machine/vmparam.h> 67 #include <machine/vmm.h> 68 #include <machine/vmm_instruction_emul.h> 69 70 #include <dev/pci/pcireg.h> 71 72 #include <dev/vmm/vmm_dev.h> 73 #include <dev/vmm/vmm_ktr.h> 74 75 #include "vmm_stat.h" 76 #include "riscv.h" 77 78 #include "vmm_aplic.h" 79 80 struct vcpu { 81 int flags; 82 enum vcpu_state state; 83 struct mtx mtx; 84 int hostcpu; /* host cpuid this vcpu last ran on */ 85 int vcpuid; 86 void *stats; 87 struct vm_exit exitinfo; 88 uint64_t nextpc; /* (x) next instruction to execute */ 89 struct vm *vm; /* (o) */ 90 void *cookie; /* (i) cpu-specific data */ 91 struct fpreg *guestfpu; /* (a,i) guest fpu state */ 92 }; 93 94 #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 95 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 96 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 97 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 98 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 99 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 100 101 struct mem_seg { 102 uint64_t gpa; 103 size_t len; 104 bool wired; 105 bool sysmem; 106 vm_object_t object; 107 }; 108 #define VM_MAX_MEMSEGS 3 109 110 struct mem_map { 111 vm_paddr_t gpa; 112 size_t len; 113 vm_ooffset_t segoff; 114 int segid; 115 int prot; 116 int flags; 117 }; 118 #define VM_MAX_MEMMAPS 4 119 120 struct vmm_mmio_region { 121 uint64_t start; 122 uint64_t end; 123 mem_region_read_t read; 124 mem_region_write_t write; 125 }; 126 #define VM_MAX_MMIO_REGIONS 4 127 128 /* 129 * Initialization: 130 * (o) initialized the first time the VM is created 131 * (i) initialized when VM is created and when it is reinitialized 132 * (x) initialized before use 133 */ 134 struct vm { 135 void *cookie; /* (i) cpu-specific data */ 136 volatile cpuset_t active_cpus; /* (i) active vcpus */ 137 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ 138 int suspend; /* (i) stop VM execution */ 139 bool dying; /* (o) is dying */ 140 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 141 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 142 struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ 143 struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ 144 struct vmspace *vmspace; /* (o) guest's address space */ 145 char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 146 struct vcpu **vcpu; /* (i) guest vcpus */ 147 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 148 /* (o) guest MMIO regions */ 149 /* The following describe the vm cpu topology */ 150 uint16_t sockets; /* (o) num of sockets */ 151 uint16_t cores; /* (o) num of cores/socket */ 152 uint16_t threads; /* (o) num of threads/core */ 153 uint16_t maxcpus; /* (o) max pluggable cpus */ 154 struct sx mem_segs_lock; /* (o) */ 155 struct sx vcpus_init_lock; /* (o) */ 156 }; 157 158 static bool vmm_initialized = false; 159 160 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 161 162 /* statistics */ 163 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 164 165 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 166 167 static int vmm_ipinum; 168 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 169 "IPI vector used for vcpu notifications"); 170 171 u_int vm_maxcpu; 172 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 173 &vm_maxcpu, 0, "Maximum number of vCPUs"); 174 175 static void vm_free_memmap(struct vm *vm, int ident); 176 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); 177 static void vcpu_notify_event_locked(struct vcpu *vcpu); 178 179 /* 180 * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this 181 * is a safe value for now. 182 */ 183 #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) 184 185 static void 186 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 187 { 188 vmmops_vcpu_cleanup(vcpu->cookie); 189 vcpu->cookie = NULL; 190 if (destroy) { 191 vmm_stat_free(vcpu->stats); 192 fpu_save_area_free(vcpu->guestfpu); 193 vcpu_lock_destroy(vcpu); 194 } 195 } 196 197 static struct vcpu * 198 vcpu_alloc(struct vm *vm, int vcpu_id) 199 { 200 struct vcpu *vcpu; 201 202 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 203 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 204 205 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 206 vcpu_lock_init(vcpu); 207 vcpu->state = VCPU_IDLE; 208 vcpu->hostcpu = NOCPU; 209 vcpu->vcpuid = vcpu_id; 210 vcpu->vm = vm; 211 vcpu->guestfpu = fpu_save_area_alloc(); 212 vcpu->stats = vmm_stat_alloc(); 213 return (vcpu); 214 } 215 216 static void 217 vcpu_init(struct vcpu *vcpu) 218 { 219 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 220 MPASS(vcpu->cookie != NULL); 221 fpu_save_area_reset(vcpu->guestfpu); 222 vmm_stat_init(vcpu->stats); 223 } 224 225 struct vm_exit * 226 vm_exitinfo(struct vcpu *vcpu) 227 { 228 return (&vcpu->exitinfo); 229 } 230 231 static int 232 vmm_init(void) 233 { 234 235 vm_maxcpu = mp_ncpus; 236 237 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); 238 239 if (vm_maxcpu > VM_MAXCPU) { 240 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); 241 vm_maxcpu = VM_MAXCPU; 242 } 243 244 if (vm_maxcpu == 0) 245 vm_maxcpu = 1; 246 247 return (vmmops_modinit()); 248 } 249 250 static int 251 vmm_handler(module_t mod, int what, void *arg) 252 { 253 int error; 254 255 switch (what) { 256 case MOD_LOAD: 257 /* TODO: check if has_hyp here? */ 258 vmmdev_init(); 259 error = vmm_init(); 260 if (error == 0) 261 vmm_initialized = true; 262 break; 263 case MOD_UNLOAD: 264 /* TODO: check if has_hyp here? */ 265 error = vmmdev_cleanup(); 266 if (error == 0 && vmm_initialized) { 267 error = vmmops_modcleanup(); 268 if (error) 269 vmm_initialized = false; 270 } 271 break; 272 default: 273 error = 0; 274 break; 275 } 276 return (error); 277 } 278 279 static moduledata_t vmm_kmod = { 280 "vmm", 281 vmm_handler, 282 NULL 283 }; 284 285 /* 286 * vmm initialization has the following dependencies: 287 * 288 * - HYP initialization requires smp_rendezvous() and therefore must happen 289 * after SMP is fully functional (after SI_SUB_SMP). 290 */ 291 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 292 MODULE_VERSION(vmm, 1); 293 294 static void 295 vm_init(struct vm *vm, bool create) 296 { 297 int i; 298 299 vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); 300 MPASS(vm->cookie != NULL); 301 302 CPU_ZERO(&vm->active_cpus); 303 CPU_ZERO(&vm->debug_cpus); 304 305 vm->suspend = 0; 306 CPU_ZERO(&vm->suspended_cpus); 307 308 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 309 310 if (!create) { 311 for (i = 0; i < vm->maxcpus; i++) { 312 if (vm->vcpu[i] != NULL) 313 vcpu_init(vm->vcpu[i]); 314 } 315 } 316 } 317 318 void 319 vm_disable_vcpu_creation(struct vm *vm) 320 { 321 sx_xlock(&vm->vcpus_init_lock); 322 vm->dying = true; 323 sx_xunlock(&vm->vcpus_init_lock); 324 } 325 326 struct vcpu * 327 vm_alloc_vcpu(struct vm *vm, int vcpuid) 328 { 329 struct vcpu *vcpu; 330 331 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 332 return (NULL); 333 334 /* Some interrupt controllers may have a CPU limit */ 335 if (vcpuid >= aplic_max_cpu_count(vm->cookie)) 336 return (NULL); 337 338 vcpu = (struct vcpu *) 339 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 340 if (__predict_true(vcpu != NULL)) 341 return (vcpu); 342 343 sx_xlock(&vm->vcpus_init_lock); 344 vcpu = vm->vcpu[vcpuid]; 345 if (vcpu == NULL && !vm->dying) { 346 vcpu = vcpu_alloc(vm, vcpuid); 347 vcpu_init(vcpu); 348 349 /* 350 * Ensure vCPU is fully created before updating pointer 351 * to permit unlocked reads above. 352 */ 353 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 354 (uintptr_t)vcpu); 355 } 356 sx_xunlock(&vm->vcpus_init_lock); 357 return (vcpu); 358 } 359 360 void 361 vm_slock_vcpus(struct vm *vm) 362 { 363 sx_slock(&vm->vcpus_init_lock); 364 } 365 366 void 367 vm_unlock_vcpus(struct vm *vm) 368 { 369 sx_unlock(&vm->vcpus_init_lock); 370 } 371 372 int 373 vm_create(const char *name, struct vm **retvm) 374 { 375 struct vm *vm; 376 struct vmspace *vmspace; 377 378 /* 379 * If vmm.ko could not be successfully initialized then don't attempt 380 * to create the virtual machine. 381 */ 382 if (!vmm_initialized) 383 return (ENXIO); 384 385 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 386 return (EINVAL); 387 388 vmspace = vmmops_vmspace_alloc(0, 1ul << 39); 389 if (vmspace == NULL) 390 return (ENOMEM); 391 392 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 393 strcpy(vm->name, name); 394 vm->vmspace = vmspace; 395 sx_init(&vm->mem_segs_lock, "vm mem_segs"); 396 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 397 398 vm->sockets = 1; 399 vm->cores = 1; /* XXX backwards compatibility */ 400 vm->threads = 1; /* XXX backwards compatibility */ 401 vm->maxcpus = vm_maxcpu; 402 403 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 404 M_WAITOK | M_ZERO); 405 406 vm_init(vm, true); 407 408 *retvm = vm; 409 return (0); 410 } 411 412 void 413 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 414 uint16_t *threads, uint16_t *maxcpus) 415 { 416 *sockets = vm->sockets; 417 *cores = vm->cores; 418 *threads = vm->threads; 419 *maxcpus = vm->maxcpus; 420 } 421 422 uint16_t 423 vm_get_maxcpus(struct vm *vm) 424 { 425 return (vm->maxcpus); 426 } 427 428 int 429 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 430 uint16_t threads, uint16_t maxcpus) 431 { 432 /* Ignore maxcpus. */ 433 if ((sockets * cores * threads) > vm->maxcpus) 434 return (EINVAL); 435 vm->sockets = sockets; 436 vm->cores = cores; 437 vm->threads = threads; 438 return(0); 439 } 440 441 static void 442 vm_cleanup(struct vm *vm, bool destroy) 443 { 444 struct mem_map *mm; 445 int i; 446 447 aplic_detach_from_vm(vm->cookie); 448 449 for (i = 0; i < vm->maxcpus; i++) { 450 if (vm->vcpu[i] != NULL) 451 vcpu_cleanup(vm->vcpu[i], destroy); 452 } 453 454 vmmops_cleanup(vm->cookie); 455 456 /* 457 * System memory is removed from the guest address space only when 458 * the VM is destroyed. This is because the mapping remains the same 459 * across VM reset. 460 * 461 * Device memory can be relocated by the guest (e.g. using PCI BARs) 462 * so those mappings are removed on a VM reset. 463 */ 464 if (!destroy) { 465 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 466 mm = &vm->mem_maps[i]; 467 if (destroy || !sysmem_mapping(vm, mm)) 468 vm_free_memmap(vm, i); 469 } 470 } 471 472 if (destroy) { 473 for (i = 0; i < VM_MAX_MEMSEGS; i++) 474 vm_free_memseg(vm, i); 475 476 vmmops_vmspace_free(vm->vmspace); 477 vm->vmspace = NULL; 478 479 for (i = 0; i < vm->maxcpus; i++) 480 free(vm->vcpu[i], M_VMM); 481 free(vm->vcpu, M_VMM); 482 sx_destroy(&vm->vcpus_init_lock); 483 sx_destroy(&vm->mem_segs_lock); 484 } 485 } 486 487 void 488 vm_destroy(struct vm *vm) 489 { 490 491 vm_cleanup(vm, true); 492 493 free(vm, M_VMM); 494 } 495 496 int 497 vm_reinit(struct vm *vm) 498 { 499 int error; 500 501 /* 502 * A virtual machine can be reset only if all vcpus are suspended. 503 */ 504 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 505 vm_cleanup(vm, false); 506 vm_init(vm, false); 507 error = 0; 508 } else { 509 error = EBUSY; 510 } 511 512 return (error); 513 } 514 515 const char * 516 vm_name(struct vm *vm) 517 { 518 return (vm->name); 519 } 520 521 void 522 vm_slock_memsegs(struct vm *vm) 523 { 524 sx_slock(&vm->mem_segs_lock); 525 } 526 527 void 528 vm_xlock_memsegs(struct vm *vm) 529 { 530 sx_xlock(&vm->mem_segs_lock); 531 } 532 533 void 534 vm_unlock_memsegs(struct vm *vm) 535 { 536 sx_unlock(&vm->mem_segs_lock); 537 } 538 539 /* 540 * Return 'true' if 'gpa' is allocated in the guest address space. 541 * 542 * This function is called in the context of a running vcpu which acts as 543 * an implicit lock on 'vm->mem_maps[]'. 544 */ 545 bool 546 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) 547 { 548 struct vm *vm = vcpu->vm; 549 struct mem_map *mm; 550 int i; 551 552 #ifdef INVARIANTS 553 int hostcpu, state; 554 state = vcpu_get_state(vcpu, &hostcpu); 555 KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, 556 ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); 557 #endif 558 559 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 560 mm = &vm->mem_maps[i]; 561 if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) 562 return (true); /* 'gpa' is sysmem or devmem */ 563 } 564 565 return (false); 566 } 567 568 int 569 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) 570 { 571 struct mem_seg *seg; 572 vm_object_t obj; 573 574 sx_assert(&vm->mem_segs_lock, SX_XLOCKED); 575 576 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 577 return (EINVAL); 578 579 if (len == 0 || (len & PAGE_MASK)) 580 return (EINVAL); 581 582 seg = &vm->mem_segs[ident]; 583 if (seg->object != NULL) { 584 if (seg->len == len && seg->sysmem == sysmem) 585 return (EEXIST); 586 else 587 return (EINVAL); 588 } 589 590 obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); 591 if (obj == NULL) 592 return (ENOMEM); 593 594 seg->len = len; 595 seg->object = obj; 596 seg->sysmem = sysmem; 597 return (0); 598 } 599 600 int 601 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 602 vm_object_t *objptr) 603 { 604 struct mem_seg *seg; 605 606 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 607 608 if (ident < 0 || ident >= VM_MAX_MEMSEGS) 609 return (EINVAL); 610 611 seg = &vm->mem_segs[ident]; 612 if (len) 613 *len = seg->len; 614 if (sysmem) 615 *sysmem = seg->sysmem; 616 if (objptr) 617 *objptr = seg->object; 618 return (0); 619 } 620 621 void 622 vm_free_memseg(struct vm *vm, int ident) 623 { 624 struct mem_seg *seg; 625 626 KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, 627 ("%s: invalid memseg ident %d", __func__, ident)); 628 629 seg = &vm->mem_segs[ident]; 630 if (seg->object != NULL) { 631 vm_object_deallocate(seg->object); 632 bzero(seg, sizeof(struct mem_seg)); 633 } 634 } 635 636 int 637 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, 638 size_t len, int prot, int flags) 639 { 640 struct mem_seg *seg; 641 struct mem_map *m, *map; 642 vm_ooffset_t last; 643 int i, error; 644 645 dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len); 646 647 if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) 648 return (EINVAL); 649 650 if (flags & ~VM_MEMMAP_F_WIRED) 651 return (EINVAL); 652 653 if (segid < 0 || segid >= VM_MAX_MEMSEGS) 654 return (EINVAL); 655 656 seg = &vm->mem_segs[segid]; 657 if (seg->object == NULL) 658 return (EINVAL); 659 660 last = first + len; 661 if (first < 0 || first >= last || last > seg->len) 662 return (EINVAL); 663 664 if ((gpa | first | last) & PAGE_MASK) 665 return (EINVAL); 666 667 map = NULL; 668 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 669 m = &vm->mem_maps[i]; 670 if (m->len == 0) { 671 map = m; 672 break; 673 } 674 } 675 676 if (map == NULL) 677 return (ENOSPC); 678 679 error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, 680 len, 0, VMFS_NO_SPACE, prot, prot, 0); 681 if (error != KERN_SUCCESS) 682 return (EFAULT); 683 684 vm_object_reference(seg->object); 685 686 if (flags & VM_MEMMAP_F_WIRED) { 687 error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, 688 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 689 if (error != KERN_SUCCESS) { 690 vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); 691 return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : 692 EFAULT); 693 } 694 } 695 696 map->gpa = gpa; 697 map->len = len; 698 map->segoff = first; 699 map->segid = segid; 700 map->prot = prot; 701 map->flags = flags; 702 return (0); 703 } 704 705 int 706 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) 707 { 708 struct mem_map *m; 709 int i; 710 711 dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len); 712 713 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 714 m = &vm->mem_maps[i]; 715 if (m->gpa == gpa && m->len == len) { 716 vm_free_memmap(vm, i); 717 return (0); 718 } 719 } 720 721 return (EINVAL); 722 } 723 724 int 725 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 726 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 727 { 728 struct mem_map *mm, *mmnext; 729 int i; 730 731 mmnext = NULL; 732 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 733 mm = &vm->mem_maps[i]; 734 if (mm->len == 0 || mm->gpa < *gpa) 735 continue; 736 if (mmnext == NULL || mm->gpa < mmnext->gpa) 737 mmnext = mm; 738 } 739 740 if (mmnext != NULL) { 741 *gpa = mmnext->gpa; 742 if (segid) 743 *segid = mmnext->segid; 744 if (segoff) 745 *segoff = mmnext->segoff; 746 if (len) 747 *len = mmnext->len; 748 if (prot) 749 *prot = mmnext->prot; 750 if (flags) 751 *flags = mmnext->flags; 752 return (0); 753 } else { 754 return (ENOENT); 755 } 756 } 757 758 static void 759 vm_free_memmap(struct vm *vm, int ident) 760 { 761 struct mem_map *mm; 762 int error __diagused; 763 764 mm = &vm->mem_maps[ident]; 765 if (mm->len) { 766 error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, 767 mm->gpa + mm->len); 768 KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", 769 __func__, error)); 770 bzero(mm, sizeof(struct mem_map)); 771 } 772 } 773 774 static __inline bool 775 sysmem_mapping(struct vm *vm, struct mem_map *mm) 776 { 777 778 if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) 779 return (true); 780 else 781 return (false); 782 } 783 784 vm_paddr_t 785 vmm_sysmem_maxaddr(struct vm *vm) 786 { 787 struct mem_map *mm; 788 vm_paddr_t maxaddr; 789 int i; 790 791 maxaddr = 0; 792 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 793 mm = &vm->mem_maps[i]; 794 if (sysmem_mapping(vm, mm)) { 795 if (maxaddr < mm->gpa + mm->len) 796 maxaddr = mm->gpa + mm->len; 797 } 798 } 799 return (maxaddr); 800 } 801 802 int 803 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 804 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 805 { 806 int error; 807 808 error = vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); 809 810 return (error); 811 } 812 813 void 814 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 815 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 816 { 817 int i; 818 819 for (i = 0; i < nitems(vm->mmio_region); i++) { 820 if (vm->mmio_region[i].start == 0 && 821 vm->mmio_region[i].end == 0) { 822 vm->mmio_region[i].start = start; 823 vm->mmio_region[i].end = start + size; 824 vm->mmio_region[i].read = mmio_read; 825 vm->mmio_region[i].write = mmio_write; 826 return; 827 } 828 } 829 830 panic("%s: No free MMIO region", __func__); 831 } 832 833 void 834 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 835 { 836 int i; 837 838 for (i = 0; i < nitems(vm->mmio_region); i++) { 839 if (vm->mmio_region[i].start == start && 840 vm->mmio_region[i].end == start + size) { 841 memset(&vm->mmio_region[i], 0, 842 sizeof(vm->mmio_region[i])); 843 return; 844 } 845 } 846 847 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 848 start + size); 849 } 850 851 static int 852 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 853 { 854 struct vm *vm; 855 struct vm_exit *vme; 856 struct vie *vie; 857 struct hyp *hyp; 858 uint64_t fault_ipa; 859 struct vm_guest_paging *paging; 860 struct vmm_mmio_region *vmr; 861 int error, i; 862 863 vm = vcpu->vm; 864 hyp = vm->cookie; 865 if (!hyp->aplic_attached) 866 goto out_user; 867 868 vme = &vcpu->exitinfo; 869 vie = &vme->u.inst_emul.vie; 870 paging = &vme->u.inst_emul.paging; 871 872 fault_ipa = vme->u.inst_emul.gpa; 873 874 vmr = NULL; 875 for (i = 0; i < nitems(vm->mmio_region); i++) { 876 if (vm->mmio_region[i].start <= fault_ipa && 877 vm->mmio_region[i].end > fault_ipa) { 878 vmr = &vm->mmio_region[i]; 879 break; 880 } 881 } 882 if (vmr == NULL) 883 goto out_user; 884 885 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 886 vmr->read, vmr->write, retu); 887 return (error); 888 889 out_user: 890 *retu = true; 891 return (0); 892 } 893 894 int 895 vm_suspend(struct vm *vm, enum vm_suspend_how how) 896 { 897 int i; 898 899 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 900 return (EINVAL); 901 902 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 903 VM_CTR2(vm, "virtual machine already suspended %d/%d", 904 vm->suspend, how); 905 return (EALREADY); 906 } 907 908 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 909 910 /* 911 * Notify all active vcpus that they are now suspended. 912 */ 913 for (i = 0; i < vm->maxcpus; i++) { 914 if (CPU_ISSET(i, &vm->active_cpus)) 915 vcpu_notify_event(vm_vcpu(vm, i)); 916 } 917 918 return (0); 919 } 920 921 void 922 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 923 { 924 struct vm *vm = vcpu->vm; 925 struct vm_exit *vmexit; 926 927 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 928 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 929 930 vmexit = vm_exitinfo(vcpu); 931 vmexit->pc = pc; 932 vmexit->inst_length = 4; 933 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 934 vmexit->u.suspended.how = vm->suspend; 935 } 936 937 void 938 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 939 { 940 struct vm_exit *vmexit; 941 942 vmexit = vm_exitinfo(vcpu); 943 vmexit->pc = pc; 944 vmexit->inst_length = 4; 945 vmexit->exitcode = VM_EXITCODE_DEBUG; 946 } 947 948 int 949 vm_activate_cpu(struct vcpu *vcpu) 950 { 951 struct vm *vm = vcpu->vm; 952 953 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 954 return (EBUSY); 955 956 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 957 return (0); 958 959 } 960 961 int 962 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 963 { 964 if (vcpu == NULL) { 965 vm->debug_cpus = vm->active_cpus; 966 for (int i = 0; i < vm->maxcpus; i++) { 967 if (CPU_ISSET(i, &vm->active_cpus)) 968 vcpu_notify_event(vm_vcpu(vm, i)); 969 } 970 } else { 971 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 972 return (EINVAL); 973 974 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 975 vcpu_notify_event(vcpu); 976 } 977 return (0); 978 } 979 980 int 981 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 982 { 983 984 if (vcpu == NULL) { 985 CPU_ZERO(&vm->debug_cpus); 986 } else { 987 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 988 return (EINVAL); 989 990 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 991 } 992 return (0); 993 } 994 995 int 996 vcpu_debugged(struct vcpu *vcpu) 997 { 998 999 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 1000 } 1001 1002 cpuset_t 1003 vm_active_cpus(struct vm *vm) 1004 { 1005 1006 return (vm->active_cpus); 1007 } 1008 1009 cpuset_t 1010 vm_debug_cpus(struct vm *vm) 1011 { 1012 1013 return (vm->debug_cpus); 1014 } 1015 1016 cpuset_t 1017 vm_suspended_cpus(struct vm *vm) 1018 { 1019 1020 return (vm->suspended_cpus); 1021 } 1022 1023 1024 void * 1025 vcpu_stats(struct vcpu *vcpu) 1026 { 1027 1028 return (vcpu->stats); 1029 } 1030 1031 /* 1032 * This function is called to ensure that a vcpu "sees" a pending event 1033 * as soon as possible: 1034 * - If the vcpu thread is sleeping then it is woken up. 1035 * - If the vcpu is running on a different host_cpu then an IPI will be directed 1036 * to the host_cpu to cause the vcpu to trap into the hypervisor. 1037 */ 1038 static void 1039 vcpu_notify_event_locked(struct vcpu *vcpu) 1040 { 1041 int hostcpu; 1042 1043 hostcpu = vcpu->hostcpu; 1044 if (vcpu->state == VCPU_RUNNING) { 1045 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1046 if (hostcpu != curcpu) { 1047 ipi_cpu(hostcpu, vmm_ipinum); 1048 } else { 1049 /* 1050 * If the 'vcpu' is running on 'curcpu' then it must 1051 * be sending a notification to itself (e.g. SELF_IPI). 1052 * The pending event will be picked up when the vcpu 1053 * transitions back to guest context. 1054 */ 1055 } 1056 } else { 1057 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1058 "with hostcpu %d", vcpu->state, hostcpu)); 1059 if (vcpu->state == VCPU_SLEEPING) 1060 wakeup_one(vcpu); 1061 } 1062 } 1063 1064 void 1065 vcpu_notify_event(struct vcpu *vcpu) 1066 { 1067 vcpu_lock(vcpu); 1068 vcpu_notify_event_locked(vcpu); 1069 vcpu_unlock(vcpu); 1070 } 1071 1072 static void 1073 restore_guest_fpustate(struct vcpu *vcpu) 1074 { 1075 1076 /* Flush host state to the pcb. */ 1077 fpe_state_save(curthread); 1078 1079 /* Ensure the VFP state will be re-loaded when exiting the guest. */ 1080 PCPU_SET(fpcurthread, NULL); 1081 1082 /* restore guest FPU state */ 1083 fpe_enable(); 1084 fpe_restore(vcpu->guestfpu); 1085 1086 /* 1087 * The FPU is now "dirty" with the guest's state so turn on emulation 1088 * to trap any access to the FPU by the host. 1089 */ 1090 fpe_disable(); 1091 } 1092 1093 static void 1094 save_guest_fpustate(struct vcpu *vcpu) 1095 { 1096 1097 /* Save guest FPE state. */ 1098 fpe_enable(); 1099 fpe_store(vcpu->guestfpu); 1100 fpe_disable(); 1101 1102 KASSERT(PCPU_GET(fpcurthread) == NULL, 1103 ("%s: fpcurthread set with guest registers", __func__)); 1104 } 1105 1106 static int 1107 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 1108 bool from_idle) 1109 { 1110 int error; 1111 1112 vcpu_assert_locked(vcpu); 1113 1114 /* 1115 * State transitions from the vmmdev_ioctl() must always begin from 1116 * the VCPU_IDLE state. This guarantees that there is only a single 1117 * ioctl() operating on a vcpu at any point. 1118 */ 1119 if (from_idle) { 1120 while (vcpu->state != VCPU_IDLE) { 1121 vcpu_notify_event_locked(vcpu); 1122 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", 1123 hz / 1000); 1124 } 1125 } else { 1126 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1127 "vcpu idle state")); 1128 } 1129 1130 if (vcpu->state == VCPU_RUNNING) { 1131 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1132 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1133 } else { 1134 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1135 "vcpu that is not running", vcpu->hostcpu)); 1136 } 1137 1138 /* 1139 * The following state transitions are allowed: 1140 * IDLE -> FROZEN -> IDLE 1141 * FROZEN -> RUNNING -> FROZEN 1142 * FROZEN -> SLEEPING -> FROZEN 1143 */ 1144 switch (vcpu->state) { 1145 case VCPU_IDLE: 1146 case VCPU_RUNNING: 1147 case VCPU_SLEEPING: 1148 error = (newstate != VCPU_FROZEN); 1149 break; 1150 case VCPU_FROZEN: 1151 error = (newstate == VCPU_FROZEN); 1152 break; 1153 default: 1154 error = 1; 1155 break; 1156 } 1157 1158 if (error) 1159 return (EBUSY); 1160 1161 vcpu->state = newstate; 1162 if (newstate == VCPU_RUNNING) 1163 vcpu->hostcpu = curcpu; 1164 else 1165 vcpu->hostcpu = NOCPU; 1166 1167 if (newstate == VCPU_IDLE) 1168 wakeup(&vcpu->state); 1169 1170 return (0); 1171 } 1172 1173 static void 1174 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 1175 { 1176 int error; 1177 1178 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 1179 panic("Error %d setting state to %d\n", error, newstate); 1180 } 1181 1182 static void 1183 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1184 { 1185 int error; 1186 1187 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1188 panic("Error %d setting state to %d", error, newstate); 1189 } 1190 1191 int 1192 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 1193 { 1194 1195 if (type < 0 || type >= VM_CAP_MAX) 1196 return (EINVAL); 1197 1198 return (vmmops_getcap(vcpu->cookie, type, retval)); 1199 } 1200 1201 int 1202 vm_set_capability(struct vcpu *vcpu, int type, int val) 1203 { 1204 1205 if (type < 0 || type >= VM_CAP_MAX) 1206 return (EINVAL); 1207 1208 return (vmmops_setcap(vcpu->cookie, type, val)); 1209 } 1210 1211 struct vm * 1212 vcpu_vm(struct vcpu *vcpu) 1213 { 1214 1215 return (vcpu->vm); 1216 } 1217 1218 int 1219 vcpu_vcpuid(struct vcpu *vcpu) 1220 { 1221 1222 return (vcpu->vcpuid); 1223 } 1224 1225 void * 1226 vcpu_get_cookie(struct vcpu *vcpu) 1227 { 1228 1229 return (vcpu->cookie); 1230 } 1231 1232 struct vcpu * 1233 vm_vcpu(struct vm *vm, int vcpuid) 1234 { 1235 1236 return (vm->vcpu[vcpuid]); 1237 } 1238 1239 int 1240 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 1241 { 1242 int error; 1243 1244 vcpu_lock(vcpu); 1245 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1246 vcpu_unlock(vcpu); 1247 1248 return (error); 1249 } 1250 1251 enum vcpu_state 1252 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 1253 { 1254 enum vcpu_state state; 1255 1256 vcpu_lock(vcpu); 1257 state = vcpu->state; 1258 if (hostcpu != NULL) 1259 *hostcpu = vcpu->hostcpu; 1260 vcpu_unlock(vcpu); 1261 1262 return (state); 1263 } 1264 1265 static void * 1266 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1267 void **cookie) 1268 { 1269 int i, count, pageoff; 1270 struct mem_map *mm; 1271 vm_page_t m; 1272 1273 pageoff = gpa & PAGE_MASK; 1274 if (len > PAGE_SIZE - pageoff) 1275 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 1276 1277 count = 0; 1278 for (i = 0; i < VM_MAX_MEMMAPS; i++) { 1279 mm = &vm->mem_maps[i]; 1280 if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && 1281 gpa < mm->gpa + mm->len) { 1282 count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 1283 trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 1284 break; 1285 } 1286 } 1287 1288 if (count == 1) { 1289 *cookie = m; 1290 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 1291 } else { 1292 *cookie = NULL; 1293 return (NULL); 1294 } 1295 } 1296 1297 void * 1298 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, 1299 void **cookie) 1300 { 1301 #ifdef INVARIANTS 1302 /* 1303 * The current vcpu should be frozen to ensure 'vm_memmap[]' 1304 * stability. 1305 */ 1306 int state = vcpu_get_state(vcpu, NULL); 1307 KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", 1308 __func__, state)); 1309 #endif 1310 return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); 1311 } 1312 1313 void * 1314 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 1315 void **cookie) 1316 { 1317 sx_assert(&vm->mem_segs_lock, SX_LOCKED); 1318 return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); 1319 } 1320 1321 void 1322 vm_gpa_release(void *cookie) 1323 { 1324 vm_page_t m = cookie; 1325 1326 vm_page_unwire(m, PQ_ACTIVE); 1327 } 1328 1329 int 1330 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 1331 { 1332 1333 if (reg >= VM_REG_LAST) 1334 return (EINVAL); 1335 1336 return (vmmops_getreg(vcpu->cookie, reg, retval)); 1337 } 1338 1339 int 1340 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 1341 { 1342 int error; 1343 1344 if (reg >= VM_REG_LAST) 1345 return (EINVAL); 1346 error = vmmops_setreg(vcpu->cookie, reg, val); 1347 if (error || reg != VM_REG_GUEST_SEPC) 1348 return (error); 1349 1350 vcpu->nextpc = val; 1351 1352 return (0); 1353 } 1354 1355 void * 1356 vm_get_cookie(struct vm *vm) 1357 { 1358 1359 return (vm->cookie); 1360 } 1361 1362 int 1363 vm_inject_exception(struct vcpu *vcpu, uint64_t scause) 1364 { 1365 1366 return (vmmops_exception(vcpu->cookie, scause)); 1367 } 1368 1369 int 1370 vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) 1371 { 1372 1373 return (aplic_attach_to_vm(vm->cookie, descr)); 1374 } 1375 1376 int 1377 vm_assert_irq(struct vm *vm, uint32_t irq) 1378 { 1379 1380 return (aplic_inject_irq(vm->cookie, -1, irq, true)); 1381 } 1382 1383 int 1384 vm_deassert_irq(struct vm *vm, uint32_t irq) 1385 { 1386 1387 return (aplic_inject_irq(vm->cookie, -1, irq, false)); 1388 } 1389 1390 int 1391 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 1392 int func) 1393 { 1394 1395 return (aplic_inject_msi(vm->cookie, msg, addr)); 1396 } 1397 1398 static int 1399 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1400 { 1401 1402 vcpu_lock(vcpu); 1403 1404 while (1) { 1405 if (aplic_check_pending(vcpu->cookie)) 1406 break; 1407 1408 if (riscv_check_ipi(vcpu->cookie, false)) 1409 break; 1410 1411 if (vcpu_should_yield(vcpu)) 1412 break; 1413 1414 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1415 /* 1416 * XXX msleep_spin() cannot be interrupted by signals so 1417 * wake up periodically to check pending signals. 1418 */ 1419 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000); 1420 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1421 } 1422 vcpu_unlock(vcpu); 1423 1424 *retu = false; 1425 1426 return (0); 1427 } 1428 1429 static int 1430 vm_handle_paging(struct vcpu *vcpu, bool *retu) 1431 { 1432 struct vm *vm; 1433 struct vm_exit *vme; 1434 struct vm_map *map; 1435 uint64_t addr; 1436 pmap_t pmap; 1437 int ftype, rv; 1438 1439 vm = vcpu->vm; 1440 vme = &vcpu->exitinfo; 1441 1442 pmap = vmspace_pmap(vm->vmspace); 1443 addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); 1444 1445 dprintf("%s: %lx\n", __func__, addr); 1446 1447 switch (vme->scause) { 1448 case SCAUSE_STORE_GUEST_PAGE_FAULT: 1449 ftype = VM_PROT_WRITE; 1450 break; 1451 case SCAUSE_FETCH_GUEST_PAGE_FAULT: 1452 ftype = VM_PROT_EXECUTE; 1453 break; 1454 case SCAUSE_LOAD_GUEST_PAGE_FAULT: 1455 ftype = VM_PROT_READ; 1456 break; 1457 default: 1458 panic("unknown page trap: %lu", vme->scause); 1459 } 1460 1461 /* The page exists, but the page table needs to be updated. */ 1462 if (pmap_fault(pmap, addr, ftype)) 1463 return (0); 1464 1465 map = &vm->vmspace->vm_map; 1466 rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); 1467 if (rv != KERN_SUCCESS) { 1468 printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", 1469 __func__, addr, ftype, rv); 1470 return (EFAULT); 1471 } 1472 1473 return (0); 1474 } 1475 1476 static int 1477 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1478 { 1479 struct vm *vm = vcpu->vm; 1480 int error, i; 1481 struct thread *td; 1482 1483 error = 0; 1484 td = curthread; 1485 1486 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1487 1488 /* 1489 * Wait until all 'active_cpus' have suspended themselves. 1490 * 1491 * Since a VM may be suspended at any time including when one or 1492 * more vcpus are doing a rendezvous we need to call the rendezvous 1493 * handler while we are waiting to prevent a deadlock. 1494 */ 1495 vcpu_lock(vcpu); 1496 while (error == 0) { 1497 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1498 break; 1499 1500 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1501 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1502 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1503 if (td_ast_pending(td, TDA_SUSPEND)) { 1504 vcpu_unlock(vcpu); 1505 error = thread_check_susp(td, false); 1506 vcpu_lock(vcpu); 1507 } 1508 } 1509 vcpu_unlock(vcpu); 1510 1511 /* 1512 * Wakeup the other sleeping vcpus and return to userspace. 1513 */ 1514 for (i = 0; i < vm->maxcpus; i++) { 1515 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1516 vcpu_notify_event(vm_vcpu(vm, i)); 1517 } 1518 } 1519 1520 *retu = true; 1521 return (error); 1522 } 1523 1524 int 1525 vm_run(struct vcpu *vcpu) 1526 { 1527 struct vm_eventinfo evinfo; 1528 struct vm_exit *vme; 1529 struct vm *vm; 1530 pmap_t pmap; 1531 int error; 1532 int vcpuid; 1533 bool retu; 1534 1535 vm = vcpu->vm; 1536 1537 dprintf("%s\n", __func__); 1538 1539 vcpuid = vcpu->vcpuid; 1540 1541 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1542 return (EINVAL); 1543 1544 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1545 return (EINVAL); 1546 1547 pmap = vmspace_pmap(vm->vmspace); 1548 vme = &vcpu->exitinfo; 1549 evinfo.rptr = NULL; 1550 evinfo.sptr = &vm->suspend; 1551 evinfo.iptr = NULL; 1552 restart: 1553 critical_enter(); 1554 1555 restore_guest_fpustate(vcpu); 1556 1557 vcpu_require_state(vcpu, VCPU_RUNNING); 1558 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1559 vcpu_require_state(vcpu, VCPU_FROZEN); 1560 1561 save_guest_fpustate(vcpu); 1562 1563 critical_exit(); 1564 1565 if (error == 0) { 1566 retu = false; 1567 switch (vme->exitcode) { 1568 case VM_EXITCODE_INST_EMUL: 1569 vcpu->nextpc = vme->pc + vme->inst_length; 1570 error = vm_handle_inst_emul(vcpu, &retu); 1571 break; 1572 case VM_EXITCODE_WFI: 1573 vcpu->nextpc = vme->pc + vme->inst_length; 1574 error = vm_handle_wfi(vcpu, vme, &retu); 1575 break; 1576 case VM_EXITCODE_ECALL: 1577 /* Handle in userland. */ 1578 vcpu->nextpc = vme->pc + vme->inst_length; 1579 retu = true; 1580 break; 1581 case VM_EXITCODE_PAGING: 1582 vcpu->nextpc = vme->pc; 1583 error = vm_handle_paging(vcpu, &retu); 1584 break; 1585 case VM_EXITCODE_BOGUS: 1586 vcpu->nextpc = vme->pc; 1587 retu = false; 1588 error = 0; 1589 break; 1590 case VM_EXITCODE_SUSPENDED: 1591 vcpu->nextpc = vme->pc; 1592 error = vm_handle_suspend(vcpu, &retu); 1593 break; 1594 default: 1595 /* Handle in userland. */ 1596 vcpu->nextpc = vme->pc; 1597 retu = true; 1598 break; 1599 } 1600 } 1601 1602 if (error == 0 && retu == false) 1603 goto restart; 1604 1605 return (error); 1606 } 1607