1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com> 5 * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com> 6 * 7 * This software was developed by the University of Cambridge Computer 8 * Laboratory (Department of Computer Science and Technology) under Innovate 9 * UK project 105694, "Digital Security by Design (DSbD) Technology Platform 10 * Prototype". 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/cpuset.h> 37 #include <sys/kernel.h> 38 #include <sys/linker.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/pcpu.h> 43 #include <sys/proc.h> 44 #include <sys/queue.h> 45 #include <sys/rwlock.h> 46 #include <sys/sched.h> 47 #include <sys/smp.h> 48 #include <sys/sysctl.h> 49 50 #include <vm/vm.h> 51 #include <vm/vm_object.h> 52 #include <vm/vm_page.h> 53 #include <vm/pmap.h> 54 #include <vm/vm_map.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_param.h> 57 58 #include <machine/riscvreg.h> 59 #include <machine/cpu.h> 60 #include <machine/fpe.h> 61 #include <machine/machdep.h> 62 #include <machine/pcb.h> 63 #include <machine/smp.h> 64 #include <machine/vm.h> 65 #include <machine/vmparam.h> 66 #include <machine/vmm.h> 67 #include <machine/vmm_instruction_emul.h> 68 69 #include <dev/pci/pcireg.h> 70 71 #include <dev/vmm/vmm_dev.h> 72 #include <dev/vmm/vmm_ktr.h> 73 #include <dev/vmm/vmm_mem.h> 74 75 #include "vmm_stat.h" 76 #include "riscv.h" 77 78 #include "vmm_aplic.h" 79 80 struct vcpu { 81 int flags; 82 enum vcpu_state state; 83 struct mtx mtx; 84 int hostcpu; /* host cpuid this vcpu last ran on */ 85 int vcpuid; 86 void *stats; 87 struct vm_exit exitinfo; 88 uint64_t nextpc; /* (x) next instruction to execute */ 89 struct vm *vm; /* (o) */ 90 void *cookie; /* (i) cpu-specific data */ 91 struct fpreg *guestfpu; /* (a,i) guest fpu state */ 92 }; 93 94 #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 95 #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) 96 #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 97 #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 98 #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 99 100 struct vmm_mmio_region { 101 uint64_t start; 102 uint64_t end; 103 mem_region_read_t read; 104 mem_region_write_t write; 105 }; 106 #define VM_MAX_MMIO_REGIONS 4 107 108 /* 109 * Initialization: 110 * (o) initialized the first time the VM is created 111 * (i) initialized when VM is created and when it is reinitialized 112 * (x) initialized before use 113 */ 114 struct vm { 115 void *cookie; /* (i) cpu-specific data */ 116 volatile cpuset_t active_cpus; /* (i) active vcpus */ 117 volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ 118 int suspend; /* (i) stop VM execution */ 119 bool dying; /* (o) is dying */ 120 volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 121 volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 122 struct vm_mem mem; /* (i) [m+v] guest memory */ 123 char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */ 124 struct vcpu **vcpu; /* (i) guest vcpus */ 125 struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; 126 /* (o) guest MMIO regions */ 127 /* The following describe the vm cpu topology */ 128 uint16_t sockets; /* (o) num of sockets */ 129 uint16_t cores; /* (o) num of cores/socket */ 130 uint16_t threads; /* (o) num of threads/core */ 131 uint16_t maxcpus; /* (o) max pluggable cpus */ 132 struct sx vcpus_init_lock; /* (o) */ 133 }; 134 135 static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); 136 137 /* statistics */ 138 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 139 140 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 141 142 static int vmm_ipinum; 143 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 144 "IPI vector used for vcpu notifications"); 145 146 static void vcpu_notify_event_locked(struct vcpu *vcpu); 147 148 /* global statistics */ 149 VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); 150 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); 151 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); 152 153 static void 154 vcpu_cleanup(struct vcpu *vcpu, bool destroy) 155 { 156 vmmops_vcpu_cleanup(vcpu->cookie); 157 vcpu->cookie = NULL; 158 if (destroy) { 159 vmm_stat_free(vcpu->stats); 160 fpu_save_area_free(vcpu->guestfpu); 161 vcpu_lock_destroy(vcpu); 162 free(vcpu, M_VMM); 163 } 164 } 165 166 static struct vcpu * 167 vcpu_alloc(struct vm *vm, int vcpu_id) 168 { 169 struct vcpu *vcpu; 170 171 KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, 172 ("vcpu_alloc: invalid vcpu %d", vcpu_id)); 173 174 vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); 175 vcpu_lock_init(vcpu); 176 vcpu->state = VCPU_IDLE; 177 vcpu->hostcpu = NOCPU; 178 vcpu->vcpuid = vcpu_id; 179 vcpu->vm = vm; 180 vcpu->guestfpu = fpu_save_area_alloc(); 181 vcpu->stats = vmm_stat_alloc(); 182 return (vcpu); 183 } 184 185 static void 186 vcpu_init(struct vcpu *vcpu) 187 { 188 vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); 189 MPASS(vcpu->cookie != NULL); 190 fpu_save_area_reset(vcpu->guestfpu); 191 vmm_stat_init(vcpu->stats); 192 } 193 194 struct vm_exit * 195 vm_exitinfo(struct vcpu *vcpu) 196 { 197 return (&vcpu->exitinfo); 198 } 199 200 int 201 vmm_modinit(void) 202 { 203 return (vmmops_modinit()); 204 } 205 206 int 207 vmm_modcleanup(void) 208 { 209 return (vmmops_modcleanup()); 210 } 211 212 static void 213 vm_init(struct vm *vm, bool create) 214 { 215 int i; 216 217 vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); 218 MPASS(vm->cookie != NULL); 219 220 CPU_ZERO(&vm->active_cpus); 221 CPU_ZERO(&vm->debug_cpus); 222 223 vm->suspend = 0; 224 CPU_ZERO(&vm->suspended_cpus); 225 226 memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); 227 228 if (!create) { 229 for (i = 0; i < vm->maxcpus; i++) { 230 if (vm->vcpu[i] != NULL) 231 vcpu_init(vm->vcpu[i]); 232 } 233 } 234 } 235 236 void 237 vm_disable_vcpu_creation(struct vm *vm) 238 { 239 sx_xlock(&vm->vcpus_init_lock); 240 vm->dying = true; 241 sx_xunlock(&vm->vcpus_init_lock); 242 } 243 244 struct vcpu * 245 vm_alloc_vcpu(struct vm *vm, int vcpuid) 246 { 247 struct vcpu *vcpu; 248 249 if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) 250 return (NULL); 251 252 vcpu = (struct vcpu *) 253 atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); 254 if (__predict_true(vcpu != NULL)) 255 return (vcpu); 256 257 sx_xlock(&vm->vcpus_init_lock); 258 vcpu = vm->vcpu[vcpuid]; 259 if (vcpu == NULL && !vm->dying) { 260 vcpu = vcpu_alloc(vm, vcpuid); 261 vcpu_init(vcpu); 262 263 /* 264 * Ensure vCPU is fully created before updating pointer 265 * to permit unlocked reads above. 266 */ 267 atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], 268 (uintptr_t)vcpu); 269 } 270 sx_xunlock(&vm->vcpus_init_lock); 271 return (vcpu); 272 } 273 274 void 275 vm_lock_vcpus(struct vm *vm) 276 { 277 sx_xlock(&vm->vcpus_init_lock); 278 } 279 280 void 281 vm_unlock_vcpus(struct vm *vm) 282 { 283 sx_unlock(&vm->vcpus_init_lock); 284 } 285 286 int 287 vm_create(const char *name, struct vm **retvm) 288 { 289 struct vm *vm; 290 int error; 291 292 vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); 293 error = vm_mem_init(&vm->mem, 0, 1ul << 39); 294 if (error != 0) { 295 free(vm, M_VMM); 296 return (error); 297 } 298 strcpy(vm->name, name); 299 sx_init(&vm->vcpus_init_lock, "vm vcpus"); 300 301 vm->sockets = 1; 302 vm->cores = 1; /* XXX backwards compatibility */ 303 vm->threads = 1; /* XXX backwards compatibility */ 304 vm->maxcpus = vm_maxcpu; 305 306 vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, 307 M_WAITOK | M_ZERO); 308 309 vm_init(vm, true); 310 311 *retvm = vm; 312 return (0); 313 } 314 315 void 316 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 317 uint16_t *threads, uint16_t *maxcpus) 318 { 319 *sockets = vm->sockets; 320 *cores = vm->cores; 321 *threads = vm->threads; 322 *maxcpus = vm->maxcpus; 323 } 324 325 uint16_t 326 vm_get_maxcpus(struct vm *vm) 327 { 328 return (vm->maxcpus); 329 } 330 331 int 332 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 333 uint16_t threads, uint16_t maxcpus) 334 { 335 /* Ignore maxcpus. */ 336 if ((sockets * cores * threads) > vm->maxcpus) 337 return (EINVAL); 338 vm->sockets = sockets; 339 vm->cores = cores; 340 vm->threads = threads; 341 return(0); 342 } 343 344 static void 345 vm_cleanup(struct vm *vm, bool destroy) 346 { 347 int i; 348 349 if (destroy) 350 vm_xlock_memsegs(vm); 351 else 352 vm_assert_memseg_xlocked(vm); 353 354 aplic_detach_from_vm(vm->cookie); 355 356 for (i = 0; i < vm->maxcpus; i++) { 357 if (vm->vcpu[i] != NULL) 358 vcpu_cleanup(vm->vcpu[i], destroy); 359 } 360 361 vmmops_cleanup(vm->cookie); 362 363 vm_mem_cleanup(vm); 364 if (destroy) { 365 vm_mem_destroy(vm); 366 367 free(vm->vcpu, M_VMM); 368 sx_destroy(&vm->vcpus_init_lock); 369 } 370 } 371 372 void 373 vm_destroy(struct vm *vm) 374 { 375 376 vm_cleanup(vm, true); 377 378 free(vm, M_VMM); 379 } 380 381 int 382 vm_reinit(struct vm *vm) 383 { 384 int error; 385 386 /* 387 * A virtual machine can be reset only if all vcpus are suspended. 388 */ 389 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 390 vm_cleanup(vm, false); 391 vm_init(vm, false); 392 error = 0; 393 } else { 394 error = EBUSY; 395 } 396 397 return (error); 398 } 399 400 const char * 401 vm_name(struct vm *vm) 402 { 403 return (vm->name); 404 } 405 406 int 407 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 408 uint64_t gla, int prot, uint64_t *gpa, int *is_fault) 409 { 410 return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); 411 } 412 413 void 414 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, 415 mem_region_read_t mmio_read, mem_region_write_t mmio_write) 416 { 417 int i; 418 419 for (i = 0; i < nitems(vm->mmio_region); i++) { 420 if (vm->mmio_region[i].start == 0 && 421 vm->mmio_region[i].end == 0) { 422 vm->mmio_region[i].start = start; 423 vm->mmio_region[i].end = start + size; 424 vm->mmio_region[i].read = mmio_read; 425 vm->mmio_region[i].write = mmio_write; 426 return; 427 } 428 } 429 430 panic("%s: No free MMIO region", __func__); 431 } 432 433 void 434 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) 435 { 436 int i; 437 438 for (i = 0; i < nitems(vm->mmio_region); i++) { 439 if (vm->mmio_region[i].start == start && 440 vm->mmio_region[i].end == start + size) { 441 memset(&vm->mmio_region[i], 0, 442 sizeof(vm->mmio_region[i])); 443 return; 444 } 445 } 446 447 panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, 448 start + size); 449 } 450 451 static int 452 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) 453 { 454 struct vm *vm; 455 struct vm_exit *vme; 456 struct vie *vie; 457 struct hyp *hyp; 458 uint64_t fault_ipa; 459 struct vm_guest_paging *paging; 460 struct vmm_mmio_region *vmr; 461 int error, i; 462 463 vm = vcpu->vm; 464 hyp = vm->cookie; 465 if (!hyp->aplic_attached) 466 goto out_user; 467 468 vme = &vcpu->exitinfo; 469 vie = &vme->u.inst_emul.vie; 470 paging = &vme->u.inst_emul.paging; 471 472 fault_ipa = vme->u.inst_emul.gpa; 473 474 vmr = NULL; 475 for (i = 0; i < nitems(vm->mmio_region); i++) { 476 if (vm->mmio_region[i].start <= fault_ipa && 477 vm->mmio_region[i].end > fault_ipa) { 478 vmr = &vm->mmio_region[i]; 479 break; 480 } 481 } 482 if (vmr == NULL) 483 goto out_user; 484 485 error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, 486 vmr->read, vmr->write, retu); 487 return (error); 488 489 out_user: 490 *retu = true; 491 return (0); 492 } 493 494 int 495 vm_suspend(struct vm *vm, enum vm_suspend_how how) 496 { 497 int i; 498 499 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 500 return (EINVAL); 501 502 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 503 VM_CTR2(vm, "virtual machine already suspended %d/%d", 504 vm->suspend, how); 505 return (EALREADY); 506 } 507 508 VM_CTR1(vm, "virtual machine successfully suspended %d", how); 509 510 /* 511 * Notify all active vcpus that they are now suspended. 512 */ 513 for (i = 0; i < vm->maxcpus; i++) { 514 if (CPU_ISSET(i, &vm->active_cpus)) 515 vcpu_notify_event(vm_vcpu(vm, i)); 516 } 517 518 return (0); 519 } 520 521 void 522 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) 523 { 524 struct vm *vm = vcpu->vm; 525 struct vm_exit *vmexit; 526 527 KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 528 ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 529 530 vmexit = vm_exitinfo(vcpu); 531 vmexit->pc = pc; 532 vmexit->inst_length = 4; 533 vmexit->exitcode = VM_EXITCODE_SUSPENDED; 534 vmexit->u.suspended.how = vm->suspend; 535 } 536 537 void 538 vm_exit_debug(struct vcpu *vcpu, uint64_t pc) 539 { 540 struct vm_exit *vmexit; 541 542 vmexit = vm_exitinfo(vcpu); 543 vmexit->pc = pc; 544 vmexit->inst_length = 4; 545 vmexit->exitcode = VM_EXITCODE_DEBUG; 546 } 547 548 int 549 vm_activate_cpu(struct vcpu *vcpu) 550 { 551 struct vm *vm = vcpu->vm; 552 553 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 554 return (EBUSY); 555 556 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 557 return (0); 558 559 } 560 561 int 562 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 563 { 564 if (vcpu == NULL) { 565 vm->debug_cpus = vm->active_cpus; 566 for (int i = 0; i < vm->maxcpus; i++) { 567 if (CPU_ISSET(i, &vm->active_cpus)) 568 vcpu_notify_event(vm_vcpu(vm, i)); 569 } 570 } else { 571 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 572 return (EINVAL); 573 574 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 575 vcpu_notify_event(vcpu); 576 } 577 return (0); 578 } 579 580 int 581 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 582 { 583 584 if (vcpu == NULL) { 585 CPU_ZERO(&vm->debug_cpus); 586 } else { 587 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 588 return (EINVAL); 589 590 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 591 } 592 return (0); 593 } 594 595 int 596 vcpu_debugged(struct vcpu *vcpu) 597 { 598 599 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 600 } 601 602 cpuset_t 603 vm_active_cpus(struct vm *vm) 604 { 605 606 return (vm->active_cpus); 607 } 608 609 cpuset_t 610 vm_debug_cpus(struct vm *vm) 611 { 612 613 return (vm->debug_cpus); 614 } 615 616 cpuset_t 617 vm_suspended_cpus(struct vm *vm) 618 { 619 620 return (vm->suspended_cpus); 621 } 622 623 624 void * 625 vcpu_stats(struct vcpu *vcpu) 626 { 627 628 return (vcpu->stats); 629 } 630 631 /* 632 * This function is called to ensure that a vcpu "sees" a pending event 633 * as soon as possible: 634 * - If the vcpu thread is sleeping then it is woken up. 635 * - If the vcpu is running on a different host_cpu then an IPI will be directed 636 * to the host_cpu to cause the vcpu to trap into the hypervisor. 637 */ 638 static void 639 vcpu_notify_event_locked(struct vcpu *vcpu) 640 { 641 int hostcpu; 642 643 hostcpu = vcpu->hostcpu; 644 if (vcpu->state == VCPU_RUNNING) { 645 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 646 if (hostcpu != curcpu) { 647 ipi_cpu(hostcpu, vmm_ipinum); 648 } else { 649 /* 650 * If the 'vcpu' is running on 'curcpu' then it must 651 * be sending a notification to itself (e.g. SELF_IPI). 652 * The pending event will be picked up when the vcpu 653 * transitions back to guest context. 654 */ 655 } 656 } else { 657 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 658 "with hostcpu %d", vcpu->state, hostcpu)); 659 if (vcpu->state == VCPU_SLEEPING) 660 wakeup_one(vcpu); 661 } 662 } 663 664 void 665 vcpu_notify_event(struct vcpu *vcpu) 666 { 667 vcpu_lock(vcpu); 668 vcpu_notify_event_locked(vcpu); 669 vcpu_unlock(vcpu); 670 } 671 672 struct vm_mem * 673 vm_mem(struct vm *vm) 674 { 675 return (&vm->mem); 676 } 677 678 static void 679 restore_guest_fpustate(struct vcpu *vcpu) 680 { 681 682 /* Flush host state to the pcb. */ 683 fpe_state_save(curthread); 684 685 /* Ensure the VFP state will be re-loaded when exiting the guest. */ 686 PCPU_SET(fpcurthread, NULL); 687 688 /* restore guest FPU state */ 689 fpe_enable(); 690 fpe_restore(vcpu->guestfpu); 691 692 /* 693 * The FPU is now "dirty" with the guest's state so turn on emulation 694 * to trap any access to the FPU by the host. 695 */ 696 fpe_disable(); 697 } 698 699 static void 700 save_guest_fpustate(struct vcpu *vcpu) 701 { 702 703 /* Save guest FPE state. */ 704 fpe_enable(); 705 fpe_store(vcpu->guestfpu); 706 fpe_disable(); 707 708 KASSERT(PCPU_GET(fpcurthread) == NULL, 709 ("%s: fpcurthread set with guest registers", __func__)); 710 } 711 712 static int 713 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 714 bool from_idle) 715 { 716 int error; 717 718 vcpu_assert_locked(vcpu); 719 720 /* 721 * State transitions from the vmmdev_ioctl() must always begin from 722 * the VCPU_IDLE state. This guarantees that there is only a single 723 * ioctl() operating on a vcpu at any point. 724 */ 725 if (from_idle) { 726 while (vcpu->state != VCPU_IDLE) { 727 vcpu_notify_event_locked(vcpu); 728 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 729 } 730 } else { 731 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 732 "vcpu idle state")); 733 } 734 735 if (vcpu->state == VCPU_RUNNING) { 736 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 737 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 738 } else { 739 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 740 "vcpu that is not running", vcpu->hostcpu)); 741 } 742 743 /* 744 * The following state transitions are allowed: 745 * IDLE -> FROZEN -> IDLE 746 * FROZEN -> RUNNING -> FROZEN 747 * FROZEN -> SLEEPING -> FROZEN 748 */ 749 switch (vcpu->state) { 750 case VCPU_IDLE: 751 case VCPU_RUNNING: 752 case VCPU_SLEEPING: 753 error = (newstate != VCPU_FROZEN); 754 break; 755 case VCPU_FROZEN: 756 error = (newstate == VCPU_FROZEN); 757 break; 758 default: 759 error = 1; 760 break; 761 } 762 763 if (error) 764 return (EBUSY); 765 766 vcpu->state = newstate; 767 if (newstate == VCPU_RUNNING) 768 vcpu->hostcpu = curcpu; 769 else 770 vcpu->hostcpu = NOCPU; 771 772 if (newstate == VCPU_IDLE) 773 wakeup(&vcpu->state); 774 775 return (0); 776 } 777 778 static void 779 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) 780 { 781 int error; 782 783 if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) 784 panic("Error %d setting state to %d\n", error, newstate); 785 } 786 787 static void 788 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 789 { 790 int error; 791 792 if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 793 panic("Error %d setting state to %d", error, newstate); 794 } 795 796 int 797 vm_get_capability(struct vcpu *vcpu, int type, int *retval) 798 { 799 800 if (type < 0 || type >= VM_CAP_MAX) 801 return (EINVAL); 802 803 return (vmmops_getcap(vcpu->cookie, type, retval)); 804 } 805 806 int 807 vm_set_capability(struct vcpu *vcpu, int type, int val) 808 { 809 810 if (type < 0 || type >= VM_CAP_MAX) 811 return (EINVAL); 812 813 return (vmmops_setcap(vcpu->cookie, type, val)); 814 } 815 816 struct vm * 817 vcpu_vm(struct vcpu *vcpu) 818 { 819 820 return (vcpu->vm); 821 } 822 823 int 824 vcpu_vcpuid(struct vcpu *vcpu) 825 { 826 827 return (vcpu->vcpuid); 828 } 829 830 void * 831 vcpu_get_cookie(struct vcpu *vcpu) 832 { 833 834 return (vcpu->cookie); 835 } 836 837 struct vcpu * 838 vm_vcpu(struct vm *vm, int vcpuid) 839 { 840 841 return (vm->vcpu[vcpuid]); 842 } 843 844 int 845 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 846 { 847 int error; 848 849 vcpu_lock(vcpu); 850 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 851 vcpu_unlock(vcpu); 852 853 return (error); 854 } 855 856 enum vcpu_state 857 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 858 { 859 enum vcpu_state state; 860 861 vcpu_lock(vcpu); 862 state = vcpu->state; 863 if (hostcpu != NULL) 864 *hostcpu = vcpu->hostcpu; 865 vcpu_unlock(vcpu); 866 867 return (state); 868 } 869 870 int 871 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) 872 { 873 if (reg < 0 || reg >= VM_REG_LAST) 874 return (EINVAL); 875 876 return (vmmops_getreg(vcpu->cookie, reg, retval)); 877 } 878 879 int 880 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 881 { 882 int error; 883 884 if (reg < 0 || reg >= VM_REG_LAST) 885 return (EINVAL); 886 error = vmmops_setreg(vcpu->cookie, reg, val); 887 if (error || reg != VM_REG_GUEST_SEPC) 888 return (error); 889 890 vcpu->nextpc = val; 891 892 return (0); 893 } 894 895 void * 896 vm_get_cookie(struct vm *vm) 897 { 898 899 return (vm->cookie); 900 } 901 902 int 903 vm_inject_exception(struct vcpu *vcpu, uint64_t scause) 904 { 905 906 return (vmmops_exception(vcpu->cookie, scause)); 907 } 908 909 int 910 vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) 911 { 912 913 return (aplic_attach_to_vm(vm->cookie, descr)); 914 } 915 916 int 917 vm_assert_irq(struct vm *vm, uint32_t irq) 918 { 919 920 return (aplic_inject_irq(vm->cookie, -1, irq, true)); 921 } 922 923 int 924 vm_deassert_irq(struct vm *vm, uint32_t irq) 925 { 926 927 return (aplic_inject_irq(vm->cookie, -1, irq, false)); 928 } 929 930 int 931 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, 932 int func) 933 { 934 935 return (aplic_inject_msi(vm->cookie, msg, addr)); 936 } 937 938 static int 939 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 940 { 941 struct vm *vm; 942 943 vm = vcpu->vm; 944 vcpu_lock(vcpu); 945 while (1) { 946 if (vm->suspend) 947 break; 948 949 if (aplic_check_pending(vcpu->cookie)) 950 break; 951 952 if (riscv_check_ipi(vcpu->cookie, false)) 953 break; 954 955 if (riscv_check_interrupts_pending(vcpu->cookie)) 956 break; 957 958 if (vcpu_should_yield(vcpu)) 959 break; 960 961 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 962 /* 963 * XXX msleep_spin() cannot be interrupted by signals so 964 * wake up periodically to check pending signals. 965 */ 966 msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); 967 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 968 } 969 vcpu_unlock(vcpu); 970 971 *retu = false; 972 973 return (0); 974 } 975 976 static int 977 vm_handle_paging(struct vcpu *vcpu, bool *retu) 978 { 979 struct vm *vm; 980 struct vm_exit *vme; 981 struct vm_map *map; 982 uint64_t addr; 983 pmap_t pmap; 984 int ftype, rv; 985 986 vm = vcpu->vm; 987 vme = &vcpu->exitinfo; 988 989 pmap = vmspace_pmap(vm_vmspace(vm)); 990 addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); 991 992 dprintf("%s: %lx\n", __func__, addr); 993 994 switch (vme->scause) { 995 case SCAUSE_STORE_GUEST_PAGE_FAULT: 996 ftype = VM_PROT_WRITE; 997 break; 998 case SCAUSE_FETCH_GUEST_PAGE_FAULT: 999 ftype = VM_PROT_EXECUTE; 1000 break; 1001 case SCAUSE_LOAD_GUEST_PAGE_FAULT: 1002 ftype = VM_PROT_READ; 1003 break; 1004 default: 1005 panic("unknown page trap: %lu", vme->scause); 1006 } 1007 1008 /* The page exists, but the page table needs to be updated. */ 1009 if (pmap_fault(pmap, addr, ftype)) 1010 return (0); 1011 1012 map = &vm_vmspace(vm)->vm_map; 1013 rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); 1014 if (rv != KERN_SUCCESS) { 1015 printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", 1016 __func__, addr, ftype, rv); 1017 return (EFAULT); 1018 } 1019 1020 return (0); 1021 } 1022 1023 static int 1024 vm_handle_suspend(struct vcpu *vcpu, bool *retu) 1025 { 1026 struct vm *vm = vcpu->vm; 1027 int error, i; 1028 struct thread *td; 1029 1030 error = 0; 1031 td = curthread; 1032 1033 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); 1034 1035 /* 1036 * Wait until all 'active_cpus' have suspended themselves. 1037 * 1038 * Since a VM may be suspended at any time including when one or 1039 * more vcpus are doing a rendezvous we need to call the rendezvous 1040 * handler while we are waiting to prevent a deadlock. 1041 */ 1042 vcpu_lock(vcpu); 1043 while (error == 0) { 1044 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) 1045 break; 1046 1047 vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1048 msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1049 vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1050 if (td_ast_pending(td, TDA_SUSPEND)) { 1051 vcpu_unlock(vcpu); 1052 error = thread_check_susp(td, false); 1053 vcpu_lock(vcpu); 1054 } 1055 } 1056 vcpu_unlock(vcpu); 1057 1058 /* 1059 * Wakeup the other sleeping vcpus and return to userspace. 1060 */ 1061 for (i = 0; i < vm->maxcpus; i++) { 1062 if (CPU_ISSET(i, &vm->suspended_cpus)) { 1063 vcpu_notify_event(vm_vcpu(vm, i)); 1064 } 1065 } 1066 1067 *retu = true; 1068 return (error); 1069 } 1070 1071 int 1072 vm_run(struct vcpu *vcpu) 1073 { 1074 struct vm_eventinfo evinfo; 1075 struct vm_exit *vme; 1076 struct vm *vm; 1077 pmap_t pmap; 1078 int error; 1079 int vcpuid; 1080 bool retu; 1081 1082 vm = vcpu->vm; 1083 1084 dprintf("%s\n", __func__); 1085 1086 vcpuid = vcpu->vcpuid; 1087 1088 if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1089 return (EINVAL); 1090 1091 if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1092 return (EINVAL); 1093 1094 pmap = vmspace_pmap(vm_vmspace(vm)); 1095 vme = &vcpu->exitinfo; 1096 evinfo.rptr = NULL; 1097 evinfo.sptr = &vm->suspend; 1098 evinfo.iptr = NULL; 1099 restart: 1100 critical_enter(); 1101 1102 restore_guest_fpustate(vcpu); 1103 1104 vcpu_require_state(vcpu, VCPU_RUNNING); 1105 error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); 1106 vcpu_require_state(vcpu, VCPU_FROZEN); 1107 1108 save_guest_fpustate(vcpu); 1109 1110 critical_exit(); 1111 1112 if (error == 0) { 1113 retu = false; 1114 switch (vme->exitcode) { 1115 case VM_EXITCODE_INST_EMUL: 1116 vcpu->nextpc = vme->pc + vme->inst_length; 1117 error = vm_handle_inst_emul(vcpu, &retu); 1118 break; 1119 case VM_EXITCODE_WFI: 1120 vcpu->nextpc = vme->pc + vme->inst_length; 1121 error = vm_handle_wfi(vcpu, vme, &retu); 1122 break; 1123 case VM_EXITCODE_ECALL: 1124 /* Handle in userland. */ 1125 vcpu->nextpc = vme->pc + vme->inst_length; 1126 retu = true; 1127 break; 1128 case VM_EXITCODE_PAGING: 1129 vcpu->nextpc = vme->pc; 1130 error = vm_handle_paging(vcpu, &retu); 1131 break; 1132 case VM_EXITCODE_BOGUS: 1133 vcpu->nextpc = vme->pc; 1134 retu = false; 1135 error = 0; 1136 break; 1137 case VM_EXITCODE_SUSPENDED: 1138 vcpu->nextpc = vme->pc; 1139 error = vm_handle_suspend(vcpu, &retu); 1140 break; 1141 default: 1142 /* Handle in userland. */ 1143 vcpu->nextpc = vme->pc; 1144 retu = true; 1145 break; 1146 } 1147 } 1148 1149 if (error == 0 && retu == false) 1150 goto restart; 1151 1152 return (error); 1153 } 1154